1
votes

I have written a shiny app that reads data from https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx that was working for a few months, but about a month or so ago it stopped working in shinyapps.io.

I found a recent post here that suggested the issue was due to a recently expired SSL certificate. The site green2.kingcounty.gov does have a certificate that expired on May 30, 2020

x <- openssl::download_ssl_cert("green2.kincounty.gov")
lapply(x, `[[`, "validity")

However, as noted by weizhang in the recent post mentioned above, the scrape (using GET in that case) works locally within RStudio, but not in the deployed version on shinyapps.io. The shinyapps.io log for my code includes a warning and then an error:

2020-07-17T16:09:23.073301+00:00 shinyapps[2571330]: Warning: Error in open.connection: SSL certificate problem: certificate has expired
2020-07-17T16:09:23.077027+00:00 shinyapps[2571330]:   68: open.connection
2020-07-17T16:09:23.077213+00:00 shinyapps[2571330]: Error in open.connection(x, "rb") : 
2020-07-17T16:09:23.077028+00:00 shinyapps[2571330]:   66: read_xml.connection
2020-07-17T16:09:23.077214+00:00 shinyapps[2571330]:   SSL certificate problem: certificate has expired
2020-07-17T16:09:23.077029+00:00 shinyapps[2571330]:   65: read_xml.character
2020-07-17T16:09:23.077029+00:00 shinyapps[2571330]:   61: read_html.default
2020-07-17T16:09:23.077030+00:00 shinyapps[2571330]:   59: server [/srv/connect/apps/shiny_test/app.R#25]

It looks like the discussion in community.rstudio.com has been dormant since June 4. I hope I can find a solution to this issue here.

A simple version of my app is provided below.

library(shiny)
library(tidyverse)
library(lubridate)
library(rvest)

# Define UI for application that gets data and creates a plot
ui <- fluidPage(

    # Application title
    titlePanel("Large Lakes Profile Plots"),

        # Show a plot of the data
        mainPanel(
           plotOutput("lakePlot")
        )
    )
# )

# Define server logic required to draw a histogram
server <- function(input, output) {
    
    mnths <- c("January","February","March","April","May","June","July","August","September","October","November","December")
    
    url <- paste("https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx?type=profile&buoy=wa&year=2020&month=6")
    webpage <- read_html(url)
    tbls_ls <- webpage %>%
        html_nodes("table") %>%
        .[1:1] %>%
        html_table(fill = TRUE)
    data <- as.data.frame(tbls_ls)
    
    data$DateTime <- as.POSIXct(data$Date, format="%m/%d/%Y %H:%M:%S %p")
    data$Date <- as.Date(data$DateTime)
    data$Locator <- "Washington"
    data <- data %>% rename(Depth="Depth..m.",Temperature="Temperature...C.",
                                Conductance="Specific.Conductance..µS.cm.",`Dissolved Oxygen`="DO.Concentration..mg.l.",
                                `DO Saturation`="DO.Saturation....",`Chlorophyll, Field`="Chlorophyll..µg.l.",
                                Turbidity="Turbidity..NTU.",`Phycocyanin, Field`="Phycocyanin..µg.l.")
    nms <- names(data)
    data <- data %>% gather(nms[3:10],key="ParmDisplayName",value="Value")

    output$lakePlot <- renderPlot({

        xlabel <- "Temperature"
        tmp <- data %>% filter(ParmDisplayName==xlabel)
        title <- paste(tmp$Locator[1],xlabel,"in",mnths[as.numeric(month(tmp$Date[1]))],year(tmp$Date[1]),sep=" ")
        mrged2 <- tmp[1:days_in_month(as.numeric(month(tmp$Date[1]))),]
        mrged2$Date <- seq(as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),1,sep="-")), as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),days_in_month(as.numeric(month(tmp$Date[1]))),sep="-")), by = "days")
        mrged2$Depth <- NA
        mrged2$Value <- NA
        #  
        tmp <- rbind(tmp,mrged2)
        # 
        ggplot(tmp, aes(x=Value,y=Depth,color=Locator)) +
            geom_point() +  scale_y_reverse() + facet_wrap(~Date) +
            xlim(0,30) + xlab("") +
            ggtitle(title) 
    })
}

# Run the application 
shinyApp(ui = ui, server = server)
2

2 Answers

0
votes

A little eery that I made it to this page searching for a solution to the same problem, also while trying to scrape King County info. I'll keep hunting and will post back if I find something useful.

0
votes

A colleague provided a solution using curl (not an ideal solution because it disables the verification of the SSL certificate but it works). At least part of the solution was posted by Cole (RStudio Employee) here.

library(shiny)
library(tidyverse)
library(lubridate)
library(rvest)

# add this function
geturl <- function(url,handle) {
    curl::curl_fetch_memory(url, handle = handle)$content
}

# Define UI for application that gets data and creates a plot
ui <- fluidPage(

    # Application title
    titlePanel("Large Lakes Profile Plots"),

        # Show a plot of the data
        mainPanel(
           plotOutput("lakePlot")
        )
    )
# )

# Define server logic required to draw a histogram
server <- function(input, output) {

    mnths <- c("January","February","March","April","May","June","July","August","September","October","November","December")
    
    url <- paste("https://green2.kingcounty.gov/lake-buoy/DataScrape.aspx?type=profile&buoy=wa&year=2020&month=6")
    
    # add next two lines 
    h <- curl::new_handle()
    curl::handle_setopt(h, ssl_verifypeer = 0)

    # webpage <- read_html(url)
    # add next two lines to replace line above
    webpage <- read_html(geturl(url,h))
    rm(h)    
    #
    tbls_ls <- webpage %>%
        html_nodes("table") %>%
        .[1:1] %>%
        html_table(fill = TRUE)
    data <- as.data.frame(tbls_ls)
    
    data$DateTime <- as.POSIXct(data$Date, format="%m/%d/%Y %H:%M:%S %p")
    data$Date <- as.Date(data$DateTime)
    data$Locator <- "Washington"
    data <- data %>% rename(Depth="Depth..m.",Temperature="Temperature...C.",
                                Conductance="Specific.Conductance..µS.cm.",`Dissolved Oxygen`="DO.Concentration..mg.l.",
                                `DO Saturation`="DO.Saturation....",`Chlorophyll, Field`="Chlorophyll..µg.l.",
                                Turbidity="Turbidity..NTU.",`Phycocyanin, Field`="Phycocyanin..µg.l.")
    nms <- names(data)
    data <- data %>% gather(nms[3:10],key="ParmDisplayName",value="Value")

    output$lakePlot <- renderPlot({

        xlabel <- "Temperature"
        tmp <- data %>% filter(ParmDisplayName==xlabel)
        title <- paste(tmp$Locator[1],xlabel,"in",mnths[as.numeric(month(tmp$Date[1]))],year(tmp$Date[1]),sep=" ")
        mrged2 <- tmp[1:days_in_month(as.numeric(month(tmp$Date[1]))),]
        mrged2$Date <- seq(as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),1,sep="-")), as.Date(paste(year(tmp$Date[1]),month(tmp$Date[1]),days_in_month(as.numeric(month(tmp$Date[1]))),sep="-")), by = "days")
        mrged2$Depth <- NA
        mrged2$Value <- NA
        #  
        tmp <- rbind(tmp,mrged2)
        # 
        ggplot(tmp, aes(x=Value,y=Depth,color=Locator)) +
            geom_point() +  scale_y_reverse() + facet_wrap(~Date) +
            xlim(0,30) + xlab("") +
            ggtitle(title) 
    })
}

# Run the application 
shinyApp(ui = ui, server = server)