rm(list=ls(all=TRUE)) library(readxl) library(tm) library(tidytext) library(qdap) library(dplyr) library(wordcloud) library(plotrix) library(dendextend) library(ggplot2) library(ggthemes) library(RWeka) library(reshape2) library(quanteda) library(plotrix) library(sentimentr) library(syuzhet) install.packages("stringr") library(stringr) comments <- read_excel("dyson comments.xlsx", sheet = "positive",col_names = FALSE) negative_comments <- read_excel("dyson comments.xlsx", sheet = "negative", col_names = FALSE) tryTolower <- function(x){ y = NA try_error = tryCatch(tolower(x), error = function(e) e) if (!inherits(try_error, 'error')) y = tolower(x) return(y) } custom.stopwords <- c(stopwords('english'), 'dyson') clean.vec<-function(text.vec){ text.vec <- tryTolower(text.vec) text.vec <- removeWords(text.vec, custom.stopwords) text.vec <- removePunctuation(text.vec) text.vec <- stripWhitespace(text.vec) text.vec <- removeNumbers(text.vec) return(text.vec) } com.vec<-clean.vec(comments) neg.vec<-clean.vec(negative_comments) com.vec <- paste(com.vec, collapse=" ") neg.vec <- paste(neg.vec, collapse=" ") all <- c(com.vec, neg.vec) corpus <- VCorpus(VectorSource(all)) dtm<-TermDocumentMatrix(corpus) dtm <- removeSparseTerms(dtm, 0.999) #Remove sparse terms in the dtm term_freq<-frequency(dtm, 30) m <- as.matrix(dtm) v <- sort(rowSums(m),decreasing=TRUE) d <- data.frame(word = names(v),freq=v) corpus=tm_map(corpus, removeWords,c(" ’re ", " ’ve ", "dryer", "phon", "supersonic")) corpus=tm_map(corpus, stemDocument) tdm <- TermDocumentMatrix(corpus) tdm.m <- as.matrix(tdm) colnames(tdm.m) = c("Positive Comments", "Negative Comments") common.words <- subset(tdm.m, tdm.m[, 1] > 0 & tdm.m[, 2] > 0) difference <- abs(common.words[, 1] - common.words[, 2]) common.words <- cbind(common.words, difference) common.words <- common.words[order(common.words[, 3], decreasing = TRUE), ] top25.df <- data.frame(x = common.words[1:25, 2],y = common.words[1:25, 3], labels = rownames(common.words[1:25, ])) colnames(top25.df)=c("PositiveComments" , "NegativeComments", "Differences" ) pyramid.plot(top25.df$PositiveComments, top25.df$NegativeComments, labels = top25.df$Differences, gap = 1, top.labels = c("Positive Comments", "Terms", "Negative Comments"), main = "Words in Common", laxlab = NULL, raxlab = NULL, unit = NULL) #sentiment analysis s_v <- get_sentences(com.vec)#positive comments s_v <- get_sentences(neg.vec)#negative comments s_v <- get_sentences(all)#to delete stemming poa_word_v <- get_tokens(s_v, pattern = "\\W") syuzhet_vector <- get_sentiment(poa_word_v, method="syuzhet") head(syuzhet_vector) plot( syuzhet_vector, type="l", main="Plot Trajectory", xlab = "Narrative Time", ylab= "Emotional Variance" ) # bing - binary scale with -1 for negative scores and +1 for positive scores bing_vector <- get_sentiment(comments, method="bing") head(bing_vector) summary(bing_vector) #affin - scale from -5 to +5 afinn_vector <- get_sentiment(comments, method="afinn") head(afinn_vector) summary(afinn_vector) afinn_vector2 <- get_sentiment(poa_word_v, method="afinn") plot( afinn_vector2, type="l", main="Plot Trajectory", xlab = "Narrative Time", ylab= "Emotional Variance" ) #to plot percentage values of emotions- happy ending percent_vals <- get_percentage_values(syuzhet_vector, bins = 10) plot( percent_vals, type="l", main="Joyce's Portrait Using Percentage-Based Means", xlab = "Narrative Time", ylab= "Emotional Valence", col="red" ) sentiments <- get_sentiments("nrc") sentiment <- d %>% #in file dyson inner_join(sentiments) %>% count(word, sentiment, sort=TRUE) %>% pivot_wider(names_from=sentiment, values_from=n, values_fill=0) sentiment %>% with(wordcloud(word, joy, min.freq=10, random.color = T, colors = brewer.pal(10, "PuOr"))) sentiment %>% with(wordcloud(word, anger, min.freq=10, random.color = FALSE, colors = brewer.pal(12, "Paired"))) all=rbind(comments, negative_comments) all_comments_vector=unlist(all) s_v <- get_sentences(all_comments_vector) #negative_comments_vector=unlist(all) #unlist is the command to get a vector of 11 rows and one column #another way to visualize sentiment nrc_data <- get_nrc_sentiment(s_v) #to see angry data angry_items <- which(nrc_data$anger > 1) s_v[angry_items] #or joyful data joy_items <- which(nrc_data$joy > 0) s_v[joy_items] #to plot emotions barplot( sort(colSums(prop.table(nrc_data[, 1:8]))), horiz = T, cex.names = 0.7, las = 1, main = "Emotions", xlab="Percentage" ) Sentimentscores=data.frame(colSums(nrc_data)) names(Sentimentscores)<- "Scores" Sentimentscores<- cbind("Sentiment"=row.names(Sentimentscores), Sentimentscores) rownames(Sentimentscores)<-NULL ggplot(data = Sentimentscores, aes(x=Sentiment, y=Scores))+ geom_bar(aes(fill= Sentiment), stat = "identity") + theme(legend.position = "none") + xlab("Sentiments")+ ylab("Scores") + ggtitle("Sentiments of people behind the comments on d company") #ggplot(Sentimentscores, aes(x = as.numeric(Sentiment), y = Scores)) #geom_point(aes="identity") #+ geom_smooth(method = "auto") # pick a method & fit a model