0
votes

I have a reactive function where a documentterm matrix is created.I need to use the dtm in another reactive function.

I have used dataframe from one reactive to another function using test()$dataframe. Would using dtm in similar way be apt?

  make_tree <- reactive ({
    validate(
      need((input$text != "") || (!is.null(input$file)),
           "Please give me some text to work upon!"
      )
    )
    # If text input is not empty then get the corpus
    # else load text from the text file uploaded.
    # case of both text box and file uploader being empty was
    # covered by the above validate function.
    if (nchar(input$text) > 0){
      docs <- Corpus(VectorSource(input$text))
    }
    else if (!is.null(input$file)){
      filenames <- input$file$datapath
    }

    filenames<-input$file$datapath
    #read files into a character vector
    files <- lapply(filenames,readLines)

    #create corpus from vector
    docs <- Corpus(VectorSource(files))

    #inspect a particular document in corpus - data import check
    #writeLines(as.character(docs[[1]]))


    #start preprocessing
    #Transform to lower case
    docs <-tm_map(docs,content_transformer(tolower))


    #remove potentially problematic symbols
    toSpace <- content_transformer(function(x, pattern) { return (gsub(pattern, " ", x))})
    docs <- tm_map(docs, toSpace, "-")
    docs <- tm_map(docs, toSpace, "'")
    docs <- tm_map(docs, toSpace, "`")
    docs <- tm_map(docs, toSpace, ":")

    # replace other symbols and junk as necessary
    #remove punctuation
    docs <- tm_map(docs, removePunctuation)
    #Strip digits
    docs <- tm_map(docs, removeNumbers)
    #remove stopwords
    docs <- tm_map(docs, removeWords, stopwords("english"))
    #remove whitespace
    docs <- tm_map(docs, stripWhitespace)

    #Check if replacements have been done
    #writeLines(as.character(docs[[1]]))

    #Stem document
    docs <- tm_map(docs,stemDocument)

    myStopwords1 <- c("can", "say","one","way","use",
                      "also","however","tell","will",
                      "much","need","take","tend","even",
                      "like","particular","rather","said",
                      "get","well","make","ask","come","end",
                      "first","two","help","often","may",
                      "might","see","something","thing","point",
                      "post","look","right","now","think","'ve ",
                      "'re ","another","put","set","new","good",
                      "want","sure","kind","yes,","day","etc",
                      "quit","since","attempt","lack","seen","aware",
                      "little","ever","moreover","though","found","able",
                      "enough","far","earlier","away","achieve","draw",
                      "last","never","brief","bit","entire","brief",
                      "great","lot")
    docs <- tm_map(docs, removeWords, myStopwords1)


    #Create document-term matrix
    dtm <- DocumentTermMatrix(docs)

    #convert rownames to filenames
    rownames(dtm) <- input$file$datapath

    #collapse matrix by summing over columns
    freq <- colSums(as.matrix(dtm))

    #length should be total number of terms
    # length(freq)

    #create sort order (descending)
    ord <- order(freq,decreasing=TRUE)

    #List all terms in decreasing order of freq and write to disk

    sortedWordFreq<- as.data.frame(freq[ord])
    sortedWordFreq$words <- row.names(sortedWordFreq)
    colnames(sortedWordFreq) <- c("freq","words")
    row.names(sortedWordFreq)<- NULL
    sortedWordFreq<-sortedWordFreq[,c(2,1)]

    #Set parameters for Gibbs sampling
    burnin <- 4000
    iter <- 2000
    thin <- 500
    seed <-list(2003,5,63,100001,765)
    nstart <- 5
    best <- TRUE

    #Number of topics
    numberOfTopics <- 11

    #Run LDA using Gibbs sampling
    ldaOut <-LDA(dtm,numberOfTopics, method="Gibbs", control=list(nstart=nstart, seed = seed, best=best, burnin = burnin, iter = iter, thin=thin))

    #write out results
    #docs to topics - use only if there are multiple text files and you want to assign a topic to each doc
    ldaOut.topics <- as.matrix(topics(ldaOut))

    #top x terms in each topic
    topNtemsInTopic <- 5
    ldaOut.terms <- as.matrix(terms(ldaOut,topNtemsInTopic))

    ##write.csv(freq[ord],"word_freq.csv")
    wordfreq <-sortedWordFreq

    topicwords<-data.frame(ldaOut.terms)

    test <- data.frame(words=unlist(topicwords, use.names = T))

    test<-setDT(test, keep.rownames = TRUE)[]

    topicwords<-merge(x = test, y = wordfreq, by = "words", all.x = TRUE)

    topicwords$topic<-substr(topicwords$rn,0,(stri_length(topicwords$rn)-1))

    tree<-subset(topicwords[,c(4,1,3)])

    png("treemap.png", width = 3, height = 3, units = "in", res = 500)
    w <- treemap(tree, #Your data frame object
                 index=c("topic","words"),  #A list of your categorical variables
                 vSize = "freq",  #This is your quantitative variable
                 type="index", #Type sets the organization and color scheme of your treemap
                 palette = "RdYlGn",  #Select your color palette from the RColorBrewer presets or make your own.
                 title="Topics", #Customize your title
                 fontsize.title = 5 #Change the font size of the title

    )
    dev.off()
    list(dtm=dtm)
    filename <- "treemap.png"

  })

I have tried to output the dtm using list in the make_tree reactive function and used in the below reactive function.

    topic_wordcloud<-observe({
  req(input$file)
  if(input$number != "NONE"){

  numberOfTopics <- input$number

  #Run LDA using Gibbs sampling
  dtm<-make_tree$dtm
  ldaOut <-LDA(dtm,numberOfTopics, method="Gibbs", control=list(nstart=nstart, seed = seed, best=best, burnin = burnin, iter = iter, thin=thin))

  #write out results
  #docs to topics - use only if there are multiple text files and you want to assign a topic to each doc
  ldaOut.topics <- as.matrix(topics(ldaOut))

  for (i in 1:input$number){
    topic <- i
    df <- data.frame(term = ldaOut@terms, p = exp(ldaOut@beta[i,]))
    head(df[order(-df$p),])

    mypath<-file.path("D:","Dropbox (eClerx Services Ltd.)","kaveri.malviya","My Documents","TM_Demo (2)","TM_Demo" ,paste("topic_",i, ".jpg", sep = ""))

    jpeg(file=mypath)

    wordcloud(words = df$term,
              freq = df$p,
              max.words = 100,
              random.order = FALSE,
              rot.per = 0.35,
              colors=brewer.pal(8, "Dark2"))
    dev.off()
  }
}

})

input$number is the number of topics which the user wants.Below is the part of Ui.R

    tabPanel(title = "Topic WordClouds",
                                             box(width = 12,
                                                 height = 600,
                                                 background = NULL,
                                                 solidHeader = TRUE,
                                                 status = "primary",
                                                 title = tags$strong("Topic WordClouds"),
                                                 selectInput("number","Select Number of Topics",choices=c("NONE",seq(1,20,by=1)),multiple=FALSE,selected="NONE")
#                                                  imageOutput("treemap")
                                                ) 
                                             )

I get below error if i follow above steps Warning: Error in : $ operator is invalid for atomic vectors

1
I don't see why not. When asking for help, you should include a simple reproducible example with sample input and desired output that can be used to test and verify possible solutions.MrFlick

1 Answers

0
votes

If you use make_tree <- reactive({}) then you need to return an object from that reactive expression block. By default R returns the last value from a block. In your case that last line is

filename <- "treemap.png"

so they on;y thing returned from your function is "treemap.png". You need to switch around this line so

list(dtm=dtm)

is the last line in your block.