2
votes

GOAL: Create a histogram that accepts user input for bin count, and overlay it with a curve to fit the distribution. Plotted data is the amount of time it takes a person to cut a cookie.

KEY FUNCTIONS:

geom_histogram(aes(y = ..count..), bins = input$binCount) - This statement creates a frequency plot with a user-specified number of bins.

geom_density(aes(y = (..density..)(N)(binWidth) )) - This statement is supposed to create a curve that fits the distribution. "N" is the total number of data points (20) and "binWidth" is the width of each bin (default = 5), which varies according the number of bins specified by user. A full explanation of the math behind this transformation can be found here.

PROBLEM: The "aes()" mapping statement within the "geom_density()" function is not recognizing the variables "N" or "binWidth", which are previously created in the "RenderPlot" block.

CODE: The following code is immediately runnable. Line 84 will cause the error. If you'd like to see what the result should look like for David or Sharon (with the default binCount = 5), then you can uncomment Lines 85 or 86.

SIMILAR ISSUES: I've found some posts dealing with similar ggplot issues, such as this post, but they deal mostly with passing strings to the mapping statement using "aes_string()", and I'm using numeric variables.

Thanks for any help you can provide!

#
# Cookie Cutting Analytics
#
# Author: Cody
# Date:   10/16/2017
# Descr:  An application to analyze David and Sharon's cookie cutting efficiency.
#

# Libraries -----------------------------------------------------------
suppressWarnings(library(dplyr))
suppressWarnings(library(ggplot2))
suppressWarnings(library(shiny))


# User Interface ------------------------------------------------------
ui <- fluidPage(

  # App Title
  titlePanel("Cookie Cutting Analytics"),

  # Sidebar layout
  sidebarLayout(

    # Sidebar panel for Input
    sidebarPanel(

      # Input: Proc Name Dropdown
      selectInput("cutterPerson", "Cookie Cutter:",
                  c("David", "Sharon")),

      # Input: Histogram Bin Count Slider
      sliderInput("binCount", "Number of Bins:",
                  min = 1,
                  max = 10,
                  value = 5)

    ),

    # Main panel for displaying outputs
    mainPanel(

      tabsetPanel(type = "tabs",
                  tabPanel("Plots",
                           br(),
                           plotOutput("histogram"),
                           br(),
                           plotOutput("boxPlot")),
                  tabPanel("Stats", verbatimTextOutput("summary")),
                  tabPanel("Data", tableOutput("table"))
      )

    )

  )
)


# Server Logic --------------------------------------------------------
server <- function(input, output) {

  # Reactive Expression: Cookie Data
  cookieData.df <- reactive ({
    person <- c("David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon")
    cutTime <- c(5,10,8,12,6,9,8,8,4,15,9,14,5,9,7,12,6,13,8,11,6,12,6,10,8,13,9,8,5,11,4,13,7,10,5,12,6,10,5,15)
    data.frame(person, cutTime)
  })

  # Reactive Expression: Person Cutting
  cutterName <- reactive({
    input$cutterPerson
  })

  # Reactive Expression: Filtered Data
  filteredData.df <- reactive({
    cookieData.df() %>% select(person, cutTime) %>% filter(person == input$cutterPerson)
  })

  # Output: Histogram
  output$histogram <- renderPlot({
    N <- nrow(filteredData.df())
    binWidth = (max(filteredData.df()$cutTime)-min(filteredData.df()$cutTime) / input$binCount)
    ggplot(filteredData.df(), aes(cutTime)) + 
      geom_histogram(aes(y = ..count..), bins = input$binCount) +
      geom_density(aes(y = ..density..* N * binWidth), color = "red") + # Error: Does not recognize "N" or "binWidth"
      #geom_density(aes(y = ..density.. * 20 * 1), color = "red") + # David Curve: N = 10, binWidth = (max(cutTime)-min(cutTime))/binCount = (9-4)/5 = 1
      #geom_density(aes(y = ..density.. * 20 * 1.4), color = "red") + # Sharon Curve: N = 10, binWidth = (max(cutTime)-min(cutTime))/binCount = (15-8)/5 = 1.4
      labs(title = "Histogram of Cookie Cut Times", x = "Cut Duration (s)", y = "Frequency") +
      theme(plot.title = element_text(size = 25, face = "bold"),
            axis.title = element_text(size = 15, face = "bold"))
  })


  # Reactive Expression: (N) Filtered Data
  N <- reactive({
    nrow(filteredData.df())
  })

  # Reactive Expression: (binWidth) Filtered Data
  binWidth <- reactive({
    (max(filteredData.df()$cutTime) - min(filteredData.df()$cutTime)) / input$binCount
  })

}


shinyApp(ui, server)
2
This really isn't related to shiny. Similar to problem here. ggplot can't look across environments for mappings that use .. calculated variables.MrFlick
Still having trouble getting it to work. I appreciate you linking me to another post, but a personal explanation would be much more helpful. Especially since I spent an hour putting together a well-written post with sample code.Cody

2 Answers

4
votes

Consider the much simpler example

# works
ggplot(iris, aes(Sepal.Width)) + geom_density(aes(y=..density.. * 5))
# doesn't work
N <- 5
ggplot(iris, aes(Sepal.Width)) + geom_density(aes(y=..density.. * N))

For the ggplot layers that do calculations for you, they need to create their own variables, and when they do, they can't access values that they didn't create (at least that's how it's currently implemented).

So you have two options I can think of: 1) calculate the density yourself, or 2) dynamically build the expression such that there are no other un-evaluated variables in it.

For option one, that might look like

dens <- density(iris$Sepal.Width, kernel = "gaussian") #geom_density equivalent
N <- 5
ggplot(iris, aes(Sepal.Width)) +
    geom_histogram() + 
    geom_area(aes(x, y*N), data=data.frame(x=dens$x, y=dens$y))

For option 2, you could so

N <- 5
dens_map <- eval(bquote(aes(y = ..density..* .(N))))
ggplot(iris, aes(Sepal.Width)) +
    geom_histogram() + 
    geom_density(dens_map)

which basically expands the variable name into it's numeric value.

0
votes

While the previous answer is good enough, stat_density allows us extract the the density values on which we can make arithmetic ops and build a layer, Just wanted to share this approach too.

if(interactive()){
  #
  # Cookie Cutting Analytics
  #
  # Author: Cody
  # Date:   10/16/2017
  # Descr:  An application to analyze David and Sharon's cookie cutting efficiency.
  #

  # Libraries -----------------------------------------------------------
  suppressWarnings(library(dplyr))
  suppressWarnings(library(ggplot2))
  suppressWarnings(library(shiny))


  # User Interface ------------------------------------------------------
  ui <- fluidPage(

    # App Title
    titlePanel("Cookie Cutting Analytics"),

    # Sidebar layout
    sidebarLayout(

      # Sidebar panel for Input
      sidebarPanel(

        # Input: Proc Name Dropdown
        selectInput("cutterPerson", "Cookie Cutter:",
                    c("David", "Sharon")),

        # Input: Histogram Bin Count Slider
        sliderInput("binCount", "Number of Bins:",
                    min = 1,
                    max = 10,
                    value = 5)

      ),

      # Main panel for displaying outputs
      mainPanel(

        tabsetPanel(type = "tabs",
                    tabPanel("Plots",
                             br(),
                             plotOutput("histogram"),
                             br(),
                             plotOutput("boxPlot")),
                    tabPanel("Stats", verbatimTextOutput("summary")),
                    tabPanel("Data", tableOutput("table"))
        )

      )

    )
  )


  # Server Logic --------------------------------------------------------
  server <- function(input, output) {

    # Reactive Expression: Cookie Data
    cookieData.df <- reactive ({
      person <- c("David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon","David","Sharon")
      cutTime <- c(5,10,8,12,6,9,8,8,4,15,9,14,5,9,7,12,6,13,8,11,6,12,6,10,8,13,9,8,5,11,4,13,7,10,5,12,6,10,5,15)
      data.frame(person, cutTime)
    })

    # Reactive Expression: Person Cutting
    cutterName <- reactive({
      input$cutterPerson
    })

    # Reactive Expression: Filtered Data
    filteredData.df <- reactive({
      cookieData.df() %>% select(person, cutTime) %>% filter(person == input$cutterPerson)
    })

    # Output: Histogram
    output$histogram <- renderPlot({
      #N <- nrow(filteredData.df())
      #binWidth = (max(filteredData.df()$cutTime)-min(filteredData.df()$cutTime) / input$binCount)
      #N <- 20
      #binWidth = 1
      n <- ggplot(filteredData.df(), aes(cutTime))

      n.data <- ggplot_build (n + stat_density(aes(y =..density..)))[['data']][[1]]

      n.data$y <- n.data$y * N() * binWidth()

      #n.data$y <- n.data$y * 20 * 1

      n + geom_histogram(aes(y = ..count..), bins = input$binCount) + 

        geom_line(data = n.data, aes(x = x, y = y), color = 'red') + 


        #geom_density(aes(y = ..density.. * 20 * 1), color = "red") + # David Curve: N = 10, binWidth = (max(cutTime)-min(cutTime))/binCount = (9-4)/5 = 1
        #geom_density(aes(y = ..density.. * 20 * 1.4), color = "red") + # Sharon Curve: N = 10, binWidth = (max(cutTime)-min(cutTime))/binCount = (15-8)/5 = 1.4
        labs(title = "Histogram of Cookie Cut Times", x = "Cut Duration (s)", y = "Frequency") +
        theme(plot.title = element_text(size = 25, face = "bold"),
              axis.title = element_text(size = 15, face = "bold"))

      #m <- print(m)

    })


    # Reactive Expression: (N) Filtered Data
    N <- reactive({
      nrow(filteredData.df())
    })

    # Reactive Expression: (binWidth) Filtered Data
    binWidth <- reactive({
      (max(filteredData.df()$cutTime) - min(filteredData.df()$cutTime)) / input$binCount
    })

  }


  shinyApp(ui, server)
}

Screenshot:

enter image description here