2
votes

I am creating a graph use ggplot in R but the legend is not showing up properly. First, I am getting two legends, one for color and one for linetype. These are both showing up despite the fact that I have put the same items in scale_color_manual and scale_linetype_manual as suggested in several other posts on this topic. Additionally, the color legend is showing the same shape (a line with a dot and an x) for each of the three items when they should all be different (the first two should be a line with a dot while the third should be an x with no line).

Here is a reproducible example.

library(ggplot2)
library(dplyr)

#specify color palette
b.navHexRGB <- c(green=rgb(149,214,0, maxColorValue=255),
             red=rgb(229,60,46, maxColorValue=255),
             gray=rgb(85,87,89, maxColorValue=255),
             dark_green=rgb(100,140,26, maxColorValue=255),
             yellow=rgb(255,183,24, maxColorValue=255),
             purple=rgb(139,24,155, maxColorValue=255),
             blue=rgb(0,147,201, maxColorValue = 255))

#create plot
ggplot(data = df, aes(x=as.character(bill_yrmo), y=mean_kwh)) +
geom_line(aes(group = treatment, colour = treatment, linetype = treatment),
        size = .9) +
geom_point(aes(group = treatment, colour=treatment),
         size = 1.5) +
geom_point(data = df %>% mutate(treatment= 'Indicates the difference is statistically significant'),
         aes(y=stat_sig, colour=treatment),
         size = 2.5,
         shape=4,
         na.rm=T) +
guides(colour=guide_legend(nrow=3)) +
scale_color_manual(name= "Variable",values=c(palette(b.navHexRGB)), breaks=c("Control","Recipient","Indicates the difference is statistically significant")) +
scale_linetype_manual(name="Variable",values=c(1,2), breaks=c("Control","Recipient","Indicates the difference is statistically significant")) +
ylab("Average Daily Consumption (kWh)") +
xlab("Year-Month") +
theme_bw() +
theme(legend.title = element_blank(),
    legend.justification = c(0,0), 
    legend.position = "bottom",
    legend.key = element_rect(fill = "white",colour = "white"),
    #legend.key.width = unit(1.1, "cm"),
    axis.text.x = element_text(angle=45, hjust=1, color="black"),
    axis.text.y = element_text(color="black"),
    axis.title.y = element_text(vjust=1)
)

Data

df <- structure(list(treatment = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
1L, 2L), .Label = c("Control", "Recipient"), class = "factor"), 
    bill_month = c(9, 9, 10, 10, 11, 11, 12, 12, 1, 1, 2, 2, 
    3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8), bill_year = c(2013, 
    2013, 2013, 2013, 2013, 2013, 2013, 2013, 2014, 2014, 2014, 
    2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 
    2014, 2014, 2014), bill_yrmo = c(201309, 201309, 201310, 
    201310, 201311, 201311, 201312, 201312, 201401, 201401, 201402, 
    201402, 201403, 201403, 201404, 201404, 201405, 201405, 201406, 
    201406, 201407, 201407, 201408, 201408), mean_kwh = c(34.1891698781763, 
    34.8263665605318, 22.998584869823, 23.6329516672246, 21.0428206185862, 
    21.7774153609304, 25.4992975653725, 25.8397296039854, 28.74368522348, 
    29.200670842288, 29.8474912589325, 30.373483172434, 26.7411627390396, 
    26.4600472396878, 21.628265542195, 21.3047667878863, 19.502019234349, 
    19.062337524723, 24.1381516068859, 24.3165665754673, 27.8915927136898, 
    28.3625761820341, 26.8570348685593, 27.1359185596385), p.value = c(9.36594553258583e-07, 
    9.36594553258583e-07, 1.76373182797948e-13, 1.76373182797948e-13, 
    2.12425701682086e-15, 2.12425701682086e-15, 0.00415203493379312, 
    0.00415203493379312, 0.00109178463449181, 0.00109178463449181, 
    0.00122110380638705, 0.00122110380638705, 0.0438138636035026, 
    0.0438138636035026, 0.00140538140516743, 0.00140538140516743, 
    5.74367939388898e-07, 5.74367939388898e-07, 0.100848768452669, 
    0.100848768452669, 0.000172505914392074, 0.000172505914392074, 
    0.145110211153141, 0.145110211153141), stat_sig = c(19, 19, 
    19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 
    19, NA, NA, 19, 19, NA, NA)), .Names = c("treatment", "bill_month", 
"bill_year", "bill_yrmo", "mean_kwh", "p.value", "stat_sig"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -24L))
4
It's just a specified color palette.carparminder

4 Answers

0
votes

Some of your calls to geom_line and geom_point can be simplified if the general aesthetic mappings for the plot are handled in the main ggplot call. But the main issue is the way you specify colors and line types in the scale calls. These functions are much less error prone if you provide a named vector to the values argument, which guarantees control over the mapping.

This code:

ggplot(data = df, aes(x=as.character(bill_yrmo), y=mean_kwh, color = treatment, lty = treatment)) +
  geom_line(size = .9, aes(group = treatment)) +
  geom_point(size = 1.5) +
  geom_point(data = df %>% mutate(treatment= 'Indicates the difference is statistically significant'),
             aes(y=stat_sig, colour=treatment),
             size = 2.5,
             shape=4,
             na.rm=T) +
  scale_color_manual(name = "Variable", values = c("Recipient" = b.navHexRGB[["gray"]], "Control" = b.navHexRGB[["green"]], "Indicates the difference is statistically significant" = b.navHexRGB[["red"]]), breaks=c("Control","Recipient","Indicates the difference is statistically significant")) +
  scale_linetype_manual(name="Variable",values = c("Recipient" = 2, "Control" = 1, "Indicates the difference is statistically significant" = 0), breaks=c("Control","Recipient","Indicates the difference is statistically significant")) +
  labs(x = "Year-Month", y = "Average Daily Consumption (kWh)") +
  theme_bw() +
  theme(legend.title = element_blank(),
        legend.justification = c(0,0), 
        legend.position = "bottom",
        legend.key = element_rect(fill = "white",colour = "white"),
        legend.direction = "vertical",
        axis.text.x = element_text(angle=45, hjust=1, color="black"),
        axis.text.y = element_text(color="black"),
        axis.title.y = element_text(vjust=1)
  )

Produces this plot:

enter image description here

0
votes

I often find it useful to line up the data before calling ggplot. I rbind the "statistically significant" rows to the main dataframe, and align the "y" aesthetic for the "statistically significant" rows to be the same as the other data (mean_kwh = stat_sig):

dd <- rbind(df, df %>% 
  mutate(treatment= 'Indicates the difference is statistically significant',
         mean_kwh = stat_sig))

Then call ggplot. Note that "statistically significant" also has a linetype, only that it is 0:

#create plot
ggplot(data = dd, aes(x=as.character(bill_yrmo), y=mean_kwh)) +
  geom_point(aes(group = treatment, colour = treatment, shape = treatment),
             size = 1.5) +
  geom_line(aes(group = treatment, colour = treatment, linetype = treatment),
             size = .9) +
  scale_shape_manual(values = c(1, 2, 4)) +
  scale_color_manual(values = c(palette(b.navHexRGB)), 
                     breaks = c("Control", "Recipient", "Indicates the difference is statistically significant")) +
  scale_linetype_manual(values = c(1, 2, 0), 
                        breaks = c("Control","Recipient","Indicates the difference is statistically significant")) +
  labs(y = "Average Daily Consumption (kWh)",
       x = "Year-Month") +
  theme_bw() +
  theme(legend.title = element_blank(),
        legend.justification = c(0,0), 
        legend.position = "bottom",
        legend.key = element_rect(fill = "white",colour = "white"),
        axis.text.x = element_text(angle=45, hjust=1, color="black"),
        axis.text.y = element_text(color="black"),
        axis.title.y = element_text(vjust=1)
  )

Output:

enter image description here

0
votes

Thanks All. I combined the two responses above to get what I needed.

ggplot(data = avgkwh_pre2, aes(x=as.character(bill_yrmo), y=mean_kwh)) +
      geom_point(aes(group = treatment, colour = treatment, shape = treatment),
                 size = 2) +
      geom_line(aes(group = treatment, colour = treatment, linetype = treatment),
                size = .9) +
      scale_shape_manual(values = c("Recipient" = 16, "Control" = 16, "Indicates the difference is statistically significant" = 4)) +
      scale_color_manual(values = c("Recipient" = b.navHexRGB[["gray"]], "Control" = b.navHexRGB[["green"]], "Indicates the difference is statistically significant" = b.navHexRGB[["red"]]), 
                         breaks = c("Control", "Recipient", "Indicates the difference is statistically significant")) +
      scale_linetype_manual(values = c("Recipient" = 1,"Control" = 2, "Indicates the difference is statistically significant" = 0), 
                            breaks = c("Control","Recipient","Indicates the difference is statistically significant")) +
      ylab("Average Daily Consumption (kWh)") +
      xlab("Year-Month") +
      ggtitle(paste("Group Starting", rct_start)) +
      theme_bw() +    
      theme(legend.title = element_blank(),
            legend.justification = c(0,0), 
            legend.position = "bottom",
            legend.key = element_rect(fill = "white",colour = "white"),
            #legend.key.width = unit(1.1, "cm"),
            axis.text.x = element_text(angle=45, hjust=1, color="black"),
            axis.text.y = element_text(color="black"),
            axis.title.y = element_text(vjust=1)
      )
0
votes

I had a similar problem. Specifying name="Variable" in scale_color_manual(), scale_shape_manual() and scale_linetype_manual() solved the problem. Named vector to the values argument did not change the result for me.