0
votes

I'm using ggplot to plot forecast data. When I tried to show the legend, the color of the legends doesn't reflect the specified colors in the geom_line & geom_point. Also, the legend is shown in square box, which I'm trying to get rid off. I have used geom_ribbon to show the upper and lower values of the prediction results, However, the line of the predicted values is not shown and confidence interval was added to legend. I not sure what I'm missing. Any suggestions

here is my code :

library(ggplot2)
library(forecast)
df<-structure(list(Date = structure(c(18316, 18317, 18318, 18319, 
                                      18320, 18321, 18322, 18323, 18324, 18325, 18326, 18327, 18328, 
                                      18329, 18330, 18331, 18332, 18333, 18334, 18335, 18336, 18337, 
                                      18338, 18339, 18340, 18341, 18342, 18343, 18344, 18345, 18346, 
                                      18347, 18348, 18349, 18350, 18351, 18352, 18353, 18354, 18355, 
                                      18356, 18357, 18358, 18359, 18360, 18361, 18362, 18363, 18364, 
                                      18365, 18366, 18367, 18368, 18369, 18370, 18371, 18372, 18373, 
                                      18374, 18375, 18376, 18377, 18378, 18379, 18380, 18381, 18382, 
                                      18383, 18384, 18385, 18386, 18387, 18388, 18389, 18390, 18391, 
                                      18392, 18393, 18394, 18395, 18396, 18397, 18398, 18399, 18400, 
                                      18401, 18402, 18403, 18404, 18405, 18406, 18407, 18408, 18409, 
                                      18410), class = "Date"), Count = c(5L, 11L, 26L, 43L, 45L, 45L, 
                                                                         46L, 56L, 56L, 56L, 57L, 57L, 60L, 63L, 63L, 67L, 67L, 75L, 95L, 
                                                                         97L, 103L, 111L, 118L, 127L, 130L, 137L, 149L, 158L, 159L, 152L, 
                                                                         152L, 159L, 168L, 171L, 188L, 194L, 216L, 237L, 261L, 335L, 385L, 
                                                                         456L, 561L, 637L, 743L, 798L, 869L, 1020L, 1091L, 1148L, 1176L, 
                                                                         1196L, 1296L, 1395L, 1465L, 1603L, 1619L, 1657L, 1792L, 1887L, 
                                                                         1986L, 2217L, 2249L, 2254L, 2241L, 2327L, 2459L, 2745L, 2883L, 
                                                                         3169L, 3291L, 3732L, 4028L, 4142L, 4695L, 4952L, 5901L, 6314L, 
                                                                         7101L, 7683L, 8436L, 9124L, 9852L, 10645L, 11234L, 11962L, 12559L, 
                                                                         13275L, 13911L, 14569L, 15029L, 15181L, 15097L, 15146L, 15229L
                                      )), class = "data.frame", row.names = c(NA, -95L)) 

# frequency here in days
tm<-ts(df$Count,frequency = 365.25 )

fit.xts <- auto.arima(tm)
forecast_length <- 40
fore.xts <- forecast(fit.xts, h=forecast_length)


Date <- seq(max(df$Date) + 1, max(df$Date) + 40, "day")
forecast_point <- fore.xts$mean
forecast_lower <- fore.xts$lower[,2]
forecast_upper <- fore.xts$upper[,2]

forecast_df <- tibble(Date, forecast_point, forecast_lower, forecast_upper)

df %>% 
  ggplot(aes(x = Date, y = Count)) +
  geom_point(aes(color="Count")) + 
  geom_line(aes(color="Count")) + 
  geom_ribbon(data = forecast_df, 
              aes(x = Date, 
                  y = forecast_point, 
                  ymax = forecast_upper, 
                  ymin = forecast_lower,
                  color="blue"),
              fill = "gray90",
              alpha = 0.5)+
  scale_colour_manual(labels = c("observed", "95% C.I."), 
                      values=c("black", "blue")) +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.box = "vertical")+
  labs(colour = NULL)+
  scale_y_continuous("Y") +
  scale_x_date(date_breaks = "1 month", 
               date_labels = "%b/%d") 
1

1 Answers

1
votes

This fixes the coloring, plots the line of predicted values, and gets rid of the boxes in the legend:

df %>% 
  ggplot(aes(x = Date, y = Count)) +
  geom_point(aes(color="Count")) + 
  geom_line(aes(color="Count")) + 
  geom_ribbon(data = forecast_df, 
              aes( 
                  y = forecast_point, 
                  ymax = forecast_upper, 
                  ymin = forecast_lower,
                  color="blue"),
              fill = "gray90",
              alpha = 0.5,
              , show.legend=FALSE)+
  geom_line(data=forecast_df, aes(x=Date, y = forecast_point)) +
  scale_colour_manual(labels = c("95% C.I.", "observed"), 
                      values=c("black", "blue")) +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.box = "vertical")+
  labs(colour = NULL)+
  scale_y_continuous("Y") +
  scale_x_date(date_breaks = "1 month", 
               date_labels = "%b/%d") 

enter image description here