I have a reactive function where a documentterm matrix is created.I need to use the dtm in another reactive function.
I have used dataframe from one reactive to another function using test()$dataframe. Would using dtm in similar way be apt?
make_tree <- reactive ({
validate(
need((input$text != "") || (!is.null(input$file)),
"Please give me some text to work upon!"
)
)
# If text input is not empty then get the corpus
# else load text from the text file uploaded.
# case of both text box and file uploader being empty was
# covered by the above validate function.
if (nchar(input$text) > 0){
docs <- Corpus(VectorSource(input$text))
}
else if (!is.null(input$file)){
filenames <- input$file$datapath
}
filenames<-input$file$datapath
#read files into a character vector
files <- lapply(filenames,readLines)
#create corpus from vector
docs <- Corpus(VectorSource(files))
#inspect a particular document in corpus - data import check
#writeLines(as.character(docs[[1]]))
#start preprocessing
#Transform to lower case
docs <-tm_map(docs,content_transformer(tolower))
#remove potentially problematic symbols
toSpace <- content_transformer(function(x, pattern) { return (gsub(pattern, " ", x))})
docs <- tm_map(docs, toSpace, "-")
docs <- tm_map(docs, toSpace, "'")
docs <- tm_map(docs, toSpace, "`")
docs <- tm_map(docs, toSpace, ":")
# replace other symbols and junk as necessary
#remove punctuation
docs <- tm_map(docs, removePunctuation)
#Strip digits
docs <- tm_map(docs, removeNumbers)
#remove stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
#remove whitespace
docs <- tm_map(docs, stripWhitespace)
#Check if replacements have been done
#writeLines(as.character(docs[[1]]))
#Stem document
docs <- tm_map(docs,stemDocument)
myStopwords1 <- c("can", "say","one","way","use",
"also","however","tell","will",
"much","need","take","tend","even",
"like","particular","rather","said",
"get","well","make","ask","come","end",
"first","two","help","often","may",
"might","see","something","thing","point",
"post","look","right","now","think","'ve ",
"'re ","another","put","set","new","good",
"want","sure","kind","yes,","day","etc",
"quit","since","attempt","lack","seen","aware",
"little","ever","moreover","though","found","able",
"enough","far","earlier","away","achieve","draw",
"last","never","brief","bit","entire","brief",
"great","lot")
docs <- tm_map(docs, removeWords, myStopwords1)
#Create document-term matrix
dtm <- DocumentTermMatrix(docs)
#convert rownames to filenames
rownames(dtm) <- input$file$datapath
#collapse matrix by summing over columns
freq <- colSums(as.matrix(dtm))
#length should be total number of terms
# length(freq)
#create sort order (descending)
ord <- order(freq,decreasing=TRUE)
#List all terms in decreasing order of freq and write to disk
sortedWordFreq<- as.data.frame(freq[ord])
sortedWordFreq$words <- row.names(sortedWordFreq)
colnames(sortedWordFreq) <- c("freq","words")
row.names(sortedWordFreq)<- NULL
sortedWordFreq<-sortedWordFreq[,c(2,1)]
#Set parameters for Gibbs sampling
burnin <- 4000
iter <- 2000
thin <- 500
seed <-list(2003,5,63,100001,765)
nstart <- 5
best <- TRUE
#Number of topics
numberOfTopics <- 11
#Run LDA using Gibbs sampling
ldaOut <-LDA(dtm,numberOfTopics, method="Gibbs", control=list(nstart=nstart, seed = seed, best=best, burnin = burnin, iter = iter, thin=thin))
#write out results
#docs to topics - use only if there are multiple text files and you want to assign a topic to each doc
ldaOut.topics <- as.matrix(topics(ldaOut))
#top x terms in each topic
topNtemsInTopic <- 5
ldaOut.terms <- as.matrix(terms(ldaOut,topNtemsInTopic))
##write.csv(freq[ord],"word_freq.csv")
wordfreq <-sortedWordFreq
topicwords<-data.frame(ldaOut.terms)
test <- data.frame(words=unlist(topicwords, use.names = T))
test<-setDT(test, keep.rownames = TRUE)[]
topicwords<-merge(x = test, y = wordfreq, by = "words", all.x = TRUE)
topicwords$topic<-substr(topicwords$rn,0,(stri_length(topicwords$rn)-1))
tree<-subset(topicwords[,c(4,1,3)])
png("treemap.png", width = 3, height = 3, units = "in", res = 500)
w <- treemap(tree, #Your data frame object
index=c("topic","words"), #A list of your categorical variables
vSize = "freq", #This is your quantitative variable
type="index", #Type sets the organization and color scheme of your treemap
palette = "RdYlGn", #Select your color palette from the RColorBrewer presets or make your own.
title="Topics", #Customize your title
fontsize.title = 5 #Change the font size of the title
)
dev.off()
list(dtm=dtm)
filename <- "treemap.png"
})
I have tried to output the dtm using list in the make_tree reactive function and used in the below reactive function.
topic_wordcloud<-observe({
req(input$file)
if(input$number != "NONE"){
numberOfTopics <- input$number
#Run LDA using Gibbs sampling
dtm<-make_tree$dtm
ldaOut <-LDA(dtm,numberOfTopics, method="Gibbs", control=list(nstart=nstart, seed = seed, best=best, burnin = burnin, iter = iter, thin=thin))
#write out results
#docs to topics - use only if there are multiple text files and you want to assign a topic to each doc
ldaOut.topics <- as.matrix(topics(ldaOut))
for (i in 1:input$number){
topic <- i
df <- data.frame(term = ldaOut@terms, p = exp(ldaOut@beta[i,]))
head(df[order(-df$p),])
mypath<-file.path("D:","Dropbox (eClerx Services Ltd.)","kaveri.malviya","My Documents","TM_Demo (2)","TM_Demo" ,paste("topic_",i, ".jpg", sep = ""))
jpeg(file=mypath)
wordcloud(words = df$term,
freq = df$p,
max.words = 100,
random.order = FALSE,
rot.per = 0.35,
colors=brewer.pal(8, "Dark2"))
dev.off()
}
}
})
input$number is the number of topics which the user wants.Below is the part of Ui.R
tabPanel(title = "Topic WordClouds",
box(width = 12,
height = 600,
background = NULL,
solidHeader = TRUE,
status = "primary",
title = tags$strong("Topic WordClouds"),
selectInput("number","Select Number of Topics",choices=c("NONE",seq(1,20,by=1)),multiple=FALSE,selected="NONE")
# imageOutput("treemap")
)
)
I get below error if i follow above steps Warning: Error in : $ operator is invalid for atomic vectors