rm(list=ls(all=TRUE))
library(readxl)
library(tm)
library(tidytext)
library(qdap)
library(dplyr)
library(wordcloud)
library(plotrix)
library(dendextend)
library(ggplot2)
library(ggthemes)
library(RWeka)
library(reshape2)
library(quanteda)
library(plotrix)
library(sentimentr)
library(syuzhet)
install.packages("stringr")
library(stringr)

comments <- read_excel("dyson comments.xlsx", sheet = "positive",col_names = FALSE)

negative_comments <- read_excel("dyson comments.xlsx",
                                sheet = "negative", col_names = FALSE)



tryTolower <- function(x){
  y = NA
  try_error = tryCatch(tolower(x), error = function(e) e)
  if (!inherits(try_error, 'error'))
    y = tolower(x)
  return(y)
}

custom.stopwords <- c(stopwords('english'), 'dyson')


clean.vec<-function(text.vec){
  text.vec <- tryTolower(text.vec)
  text.vec <- removeWords(text.vec, custom.stopwords)
  text.vec <- removePunctuation(text.vec)
  text.vec <- stripWhitespace(text.vec)
  text.vec <- removeNumbers(text.vec)
  return(text.vec)
}

com.vec<-clean.vec(comments)
neg.vec<-clean.vec(negative_comments)

com.vec <- paste(com.vec, collapse=" ")
neg.vec <- paste(neg.vec, collapse=" ")
all <- c(com.vec, neg.vec)
corpus <- VCorpus(VectorSource(all))

dtm<-TermDocumentMatrix(corpus)
dtm <- removeSparseTerms(dtm, 0.999)   #Remove sparse terms in the dtm
term_freq<-frequency(dtm, 30)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)



corpus=tm_map(corpus, removeWords,c(" ’re ", " ’ve ", "dryer", "phon", "supersonic"))
corpus=tm_map(corpus, stemDocument)



tdm <- TermDocumentMatrix(corpus)
tdm.m <- as.matrix(tdm)
colnames(tdm.m) = c("Positive Comments", "Negative Comments")



common.words <- subset(tdm.m, tdm.m[, 1] > 0 &
                         tdm.m[, 2] > 0)


difference <- abs(common.words[, 1] - common.words[, 2])
common.words <- cbind(common.words, difference)



common.words <- common.words[order(common.words[, 3], 
                                   decreasing = TRUE), ]


top25.df <- data.frame(x = common.words[1:25, 2],y =
                         common.words[1:25, 3], labels = rownames(common.words[1:25, ]))


colnames(top25.df)=c("PositiveComments" 
                     , "NegativeComments", "Differences" )

pyramid.plot(top25.df$PositiveComments, top25.df$NegativeComments,
             
             labels = top25.df$Differences,
             gap = 1, top.labels = c("Positive Comments", 
                                     "Terms", "Negative Comments"),
             main = "Words in Common", laxlab = NULL,
             raxlab = NULL, unit = NULL)



#sentiment analysis
s_v <- get_sentences(com.vec)#positive comments
s_v <- get_sentences(neg.vec)#negative comments
s_v <- get_sentences(all)#to delete stemming
poa_word_v <- get_tokens(s_v, pattern = "\\W")
syuzhet_vector <- get_sentiment(poa_word_v, method="syuzhet")
head(syuzhet_vector)
plot(
  syuzhet_vector,
  type="l",
  main="Plot Trajectory",
  xlab = "Narrative Time",
  ylab= "Emotional Variance"
)


# bing - binary scale with -1 for negative scores and +1 for positive scores
bing_vector <- get_sentiment(comments, method="bing")
head(bing_vector)
summary(bing_vector)

#affin - scale from -5 to +5
afinn_vector <- get_sentiment(comments, method="afinn")
head(afinn_vector)
summary(afinn_vector)
afinn_vector2 <- get_sentiment(poa_word_v, method="afinn")
plot(
  afinn_vector2,
  type="l",
  main="Plot Trajectory",
  xlab = "Narrative Time",
  ylab= "Emotional Variance"
)



#to plot percentage values of emotions- happy ending 

percent_vals <- get_percentage_values(syuzhet_vector, bins = 10)
plot(
  percent_vals, 
  type="l", 
  main="Joyce's Portrait Using Percentage-Based Means", 
  xlab = "Narrative Time", 
  ylab= "Emotional Valence", 
  col="red"
)


sentiments <- get_sentiments("nrc")
sentiment <- d %>% #in file dyson
  inner_join(sentiments) %>% 
  count(word, sentiment, sort=TRUE) %>% 
  pivot_wider(names_from=sentiment, 
              values_from=n, 
              values_fill=0)

sentiment %>% with(wordcloud(word, joy, 
                             min.freq=10,
                             random.color = T,
                             colors = brewer.pal(10, "PuOr")))


sentiment %>% with(wordcloud(word, anger, 
                             min.freq=10,
                             random.color = FALSE,
                             colors = brewer.pal(12, "Paired")))

all=rbind(comments, negative_comments)
all_comments_vector=unlist(all)
s_v <- get_sentences(all_comments_vector)

#negative_comments_vector=unlist(all) #unlist is the command to get a vector of 11 rows and one column

#another way to visualize sentiment
nrc_data <- get_nrc_sentiment(s_v)

#to see angry data

angry_items <- which(nrc_data$anger > 1)
s_v[angry_items]


#or joyful data

joy_items <- which(nrc_data$joy > 0)
s_v[joy_items]

#to plot emotions
barplot(
  sort(colSums(prop.table(nrc_data[, 1:8]))), 
  horiz = T, 
  cex.names = 0.7, 
  las = 1, 
  main = "Emotions", xlab="Percentage"
)

Sentimentscores=data.frame(colSums(nrc_data))
names(Sentimentscores)<- "Scores"
Sentimentscores<- cbind("Sentiment"=row.names(Sentimentscores), Sentimentscores)
rownames(Sentimentscores)<-NULL
ggplot(data = Sentimentscores, 
       aes(x=Sentiment, y=Scores))+
  geom_bar(aes(fill= Sentiment), stat = "identity")
+ theme(legend.position = "none") + 
  xlab("Sentiments")+ ylab("Scores") + 
  ggtitle("Sentiments of people behind the comments on d company")

#ggplot(Sentimentscores, aes(x = as.numeric(Sentiment), y = Scores)) 
#geom_point(aes="identity") 
 #+ geom_smooth(method = "auto") # pick a method & fit a model