install.packages("ggthemes") install.packages("qdap") install.packages("dplyr") install.packages("wordcloud") install.packages("plotrix") install.packages("dendextend") install.packages("ggplot2") install.packages("ggthemes") install.packages("RWeka") install.packages("reshape2") install.packages("quanteda") install.packages("BiocManager") BiocManager::install("Rgraphviz") install.packages("tidyverse") install.packages("tidytext") install.packages("igraph") install.packages("ggraph") install.packages("sentimentr") install.packages("syuzhet") library("tm") library("qdap") library("dplyr") library("wordcloud") library("plotrix") library("dendextend") library("ggplot2") library("ggthemes") library("RWeka") library("reshape2") library("quanteda") library(readxl) library("SnowballC") library("wordcloud") library("RColorBrewer") library("syuzhet") library("Rgraphviz") library(tidyverse) library(tidytext) library(igraph) library(ggraph) comments <- read_excel("dyson comments.xlsx", col_names = FALSE) corpus_review=Corpus(VectorSource(comments)) toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x)) corpus_review= tm_map(corpus_review, toSpace, "/") corpus_review=tm_map(corpus_review, tolower) corpus_review= tm_map(corpus_review, toSpace, "@") corpus_review= tm_map(corpus_review, toSpace, "'") corpus_review= tm_map(corpus_review, toSpace, "°") corpus_review= tm_map(corpus_review, toSpace, "’") corpus_review=tm_map(corpus_review, removePunctuation) corpus_review=tm_map(corpus_review, removeNumbers) corpus_review=tm_map(corpus_review, removeWords, stopwords("it")) corpus_review=tm_map(corpus_review, removeWords,c("dyson","dryer", "phon", "supersonic")) for (j in seq(corpus_review)) { corpus_review [[j]] <- gsub("less heat ","less_heat", corpus_review [[j]]) # corpus_review [[j]] <- gsub(“long hair", “long_hair”, corpus_review [[j]]) } corpus_review=tm_map(corpus_review, stripWhitespace) corpus_review=tm_map(corpus_review, stemDocument) writeLines(as.character(corpus_review)) #to see the corpus dtm<-TermDocumentMatrix(corpus_review) #dtm <- DocumentTermMatrix(corpus_review) dtm <- removeSparseTerms(dtm, 0.999) #Remove sparse terms in the dtm term_freq<-frequency(dtm, 30) m <- as.matrix(dtm) v <- sort(rowSums(m),decreasing=TRUE) d <- data.frame(word = names(v),freq=v) barplot(d[1:10,]$freq, las = 3, names.arg = d[1:10,]$word, col = rainbow(50), main ="Most frequent words", ylab = "Word frequencies") set.seed(1234) wordcloud(words = d$word, freq = d$freq, max.words=200, random.order=FALSE, rot.per=0.35, colors=brewer.pal(8, "Dark2")) #Word association for positive comments where dtm is the Doument term matrix of positive comments (file dyson) findAssocs(dtm, terms = c("hair","much","use"), corlimit = 0.10) a=findAssocs(dtm, terms = findFreqTerms(dtm, lowfreq = 20), corlimit = 0.25) freq.term<-findFreqTerms(dtm, lowfreq=10) plot(dtm, term=freq.term, corThreshold=0.25) #word cluster v=sort(v, decreasing=TRUE) v1=v[v>4] hc<-hclust(d=dist(v1, method="euclidean"), method="complete") plot(hc)