0
votes

I can't seem to get my legend labels and colours to match correctly in ggplot2. I have some geom_segments that I don't want to include in the legend. I have tried various options and none work. None of the existing questions seem to deal with the issue of not labelling some elements on the plot, so maybe that is adding complexity. The code is below:

library("distr")
library("ggplot2")
Percent_values<-c(0.5,0.75,0.9,0.95,0.995,0.999)
Dist1_mean=20219
Dist1_CV=3235/20219
Dist1_SDEV<-Dist1_mean*Dist1_CV
Dist1_parm2<-sqrt(log(1+Dist1_CV^2))
Dist1_parm1<-log(Dist1_mean)-(Dist1_parm2^2)/2
Dist1_quant<-qlnorm(Percent_values,meanlog=Dist1_parm1,sdlog=Dist1_parm2)
#Now draw CDF with vertical line at mean, median and chosen percentile
a1<-stat_function(fun = 
plnorm,args=list(meanlog=Dist1_parm1,sdlog=Dist1_parm2),geom="line", 
colour="blue",size=1.25)
lowerx<-0
upperx<-1.1*Dist1_quant[6]
plot1<-ggplot(data.frame(x = c(lowerx, upperx)), aes(x = x))+a1
plot1<-
plot1+scale_x_continuous(name="Value")+scale_y_continuous(name="Cumulative 
probability")
#add mean vertical line and associated horizontal line to axis
mean_yvalue<-plnorm(Dist1_mean,meanlog=Dist1_parm1,sdlog=Dist1_parm2)
plot1<-plot1+geom_segment(aes(x=Dist1_mean,y=0,
xend=Dist1_mean,yend=mean_yvalue,colour="red"),size=1.25)
plot1<-
plot1+geom_segment(aes(x=0,y=mean_yvalue,xend=Dist1_mean,
yend=mean_yvalue,colour="red"),size=1.25,linetype="dotted",show.legend = 
FALSE)
#and 75th percentile
perc<-0.75
p75<-Dist1_quant[2]
plot1<-
plot1+geom_segment(aes(x=p75,y=0,xend=p75,
yend=perc,colour="green"),size=1.25)
plot1<-plot1+geom_segment(aes(x=0,y=perc,xend=p75,
yend=perc,colour="green"),size=1.25,linetype="dotted",show.legend = FALSE)
#and 99.5th
perc2<-0.995
p995<-Dist1_quant[6]
plot1<-
plot1+geom_segment(aes(x=p995,y=0,xend=p995,
yend=perc2,colour="orange"),size=1.25)
plot1<-
plot1+geom_segment(aes(x=0,y=perc2,xend=p995,
yend=perc2,colour="orange"),size=1.25,linetype="dotted",show.legend = FALSE)
plot1<-plot1+ggtitle("Cumulative density function of estimated future claims 
outgo")+
scale_colour_discrete(name="", labels=c("Lognormal", "Mean","75th 
%ile","99.5th %ile"))
plot1

This produces a chart (that I can't seem to load for some reason) which a) only has three legend items ("Lognormal", "Mean" and "75th %ile") when I want four (additionally "99.5th %ile" added to these three) and the three items are coloured red, green and blue, respectively, whereas I want Lognormal as blue, Mean as red, 75th as green and 99.5th as orange. The dotted lines should remain on the plot, but not appear in the legend.

What am I doing wrong? Presumably it is something to do with aesthetics and "scale_colour_discrete", but I can't work out what to do. Any help would be much appreciated.

Thanks.

2

2 Answers

2
votes

To add to the answer by www, I find that it saves a lot of risk of confusion if you don't create aesthetic mappings to the names of colors. Instead use a meaningful label like "mean" for the mean line. That way if you change your mind about color selection later you don't end up with something absurd like a manual scale that maps "blue" to "orange".

Generally, aesthetic mappings should not map to colors directly, but rather should assign values that will be mapped to colors later (either manually or automatically).

EDIT:

At bdemarest's suggestion, I'm adding some more explanation and code. Here's what www's refactoring of the OP's code would look like:

library("distr")
library("ggplot2")

#calculations
Percent_values <- c(0.5,0.75,0.9,0.95,0.995,0.999)
Dist1_mean = 20219
Dist1_CV = 3235 / 20219
Dist1_SDEV <- Dist1_mean*Dist1_CV
Dist1_parm2 <- sqrt(log(1+Dist1_CV^2))
Dist1_parm1 <- log(Dist1_mean)-(Dist1_parm2^2)/2
Dist1_quant <- qlnorm(Percent_values, meanlog=Dist1_parm1,
                      sdlog=Dist1_parm2)
lowerx <- 0
upperx <- 1.1*Dist1_quant[6]
mean_yvalue <- plnorm(Dist1_mean, meanlog=Dist1_parm1, 
                      sdlog=Dist1_parm2)
perc <- 0.75
p75 <- Dist1_quant[2]
perc2 <- 0.995
p995 <- Dist1_quant[6]

#plot
ggplot(data.frame(x = c(lowerx, upperx)), aes(x = x))+
  stat_function(fun=plnorm,
                args=list(meanlog=Dist1_parm1,
                          sdlog=Dist1_parm2),
                geom="line",
                aes(colour="logn"), # logn label
                size=1.25) +
  scale_x_continuous(name="Value")+
  scale_y_continuous(name="Cumulative probability") +
  geom_segment(aes(x=Dist1_mean, y=0,
                   xend=Dist1_mean,
                   yend=mean_yvalue,
                   colour="mean"), # mean label
               size=1.25) +
  geom_segment(aes(x=0, y=mean_yvalue,
                   xend=Dist1_mean,
                   yend=mean_yvalue,
                   colour="mean"), # mean label
               size=1.25, linetype="dotted",
               show.legend = FALSE) +
  geom_segment(aes(x=p75, y=0,
                   xend=p75, yend=perc,
                   colour="75th"), # 75th percentile label
               size=1.25) +
  geom_segment(aes(x=0, y=perc, xend=p75,
                   yend=perc,
                   colour="75th"), # 75th percentile label
               size=1.25, linetype="dotted",
               show.legend = FALSE) +
  geom_segment(aes(x=p995, y=0, xend=p995,
                   yend=perc2,
                   colour="995th"), # 99.5th percentile label
               size=1.25) +
  geom_segment(aes(x=0, y=perc2, xend=p995,
                   yend=perc2,
                   colour="995th"), # 99.5th percentile label
               size=1.25, linetype="dotted",
               show.legend = FALSE) +
  ggtitle("Cumulative density function of estimated future claims outgo") +
  scale_colour_manual(name="",
                      # labels map onto colors and pretty labels
                      values=c("logn"="blue",
                               "mean"="red",
                               "75th"="green",
                               "995th"="orange"),
                      labels=c("logn"="Lognormal",
                               "mean"="Mean",
                               "75th"="75th %ile",
                               "995th"="99.5th %ile"))

Note that in the aes mappings, color names have been replaced with descriptive labels (e.g., "red" became "mean" and "blue" became "logn"), and the scale_colour_manual call has been changed to map these labels onto both colors and pretty labels for the legend. That way, if you later decide to make the 75th percentile segments purple instead of green, you just have to change the color mapping in the scale_colour_manual call at the bottom -- you don't have to dig through the code and find the right geom_segments to change, and, if you're too lazy to do that, you won't end up with something horrible, like:

scale_colour_manual(values=c("blue"="blue",
                             "red"="red",
                             "green"="purple", # !!!???
                             "orange"="orange"))

Instead, you (very intuitively) change the color of the 75th components to be purple:

scale_colour_manual(values=c("logn"="blue",
                             "mean"="red",
                             "75th"="purple", # makes sense
                             "995th"="orange"))

Changing the code to use meaningful labels in the aesthetic mappings simply makes it cleaner, more intuitive, and easier to read and maintain. It does not change the final output of the graph:

New output is same as old

But if you're going to be attempting a complicated graph with lots of components and an information-packed legend, this is a small change of habit that will likely save you a great deal of frustration down the road.

2
votes

Add aes() around your colour argument in the "a1" variable. Then specify colors and labels for the legend with scale_color_manual().

For future graphing, it may be easier to keep track of errors if calculations are done together in the first part of the script, then data is plotted after all calculations. Otherwise, it seems like troubleshooting might take a while longer if everything is mixed together and overlapping.

Try this:

library("distr")
library("ggplot2")

#calculations
Percent_values<-c(0.5,0.75,0.9,0.95,0.995,0.999)
Dist1_mean=20219
Dist1_CV=3235/20219
Dist1_SDEV<-Dist1_mean*Dist1_CV
Dist1_parm2<-sqrt(log(1+Dist1_CV^2))
Dist1_parm1<-log(Dist1_mean)-(Dist1_parm2^2)/2
Dist1_quant<-qlnorm(Percent_values,meanlog=Dist1_parm1,sdlog=Dist1_parm2)
lowerx<-0
upperx<-1.1*Dist1_quant[6]
mean_yvalue<-plnorm(Dist1_mean,meanlog=Dist1_parm1,sdlog=Dist1_parm2)
perc<-0.75
p75<-Dist1_quant[2]
perc2<-0.995
p995<-Dist1_quant[6]

#plot
ggplot(data.frame(x = c(lowerx, upperx)), aes(x = x))+
  stat_function(fun=plnorm,args=list(meanlog=Dist1_parm1,sdlog=Dist1_parm2),
                geom="line",aes(colour="blue"),size=1.25) +
  scale_x_continuous(name="Value")+
  scale_y_continuous(name="Cumulative probability") +
  geom_segment(aes(x=Dist1_mean,y=0,
               xend=Dist1_mean,yend=mean_yvalue,colour="red"),size=1.25) +
  geom_segment(aes(x=0,y=mean_yvalue,xend=Dist1_mean,
               yend=mean_yvalue,colour="red"),size=1.25,linetype="dotted",
               show.legend = FALSE) +
  geom_segment(aes(x=p75,y=0,xend=p75,
               yend=perc,colour="green"),size=1.25) +
  geom_segment(aes(x=0,y=perc,xend=p75,
               yend=perc,colour="green"),size=1.25,linetype="dotted",
               show.legend = FALSE) +
  geom_segment(aes(x=p995,y=0,xend=p995,
               yend=perc2,colour="orange"),size=1.25) +
  geom_segment(aes(x=0,y=perc2,xend=p995,
               yend=perc2,colour="orange"),size=1.25,linetype="dotted",
  show.legend = FALSE) +
  ggtitle("Cumulative density function of estimated future claims outgo") +
  scale_colour_manual(name="", values=c("blue"="blue", "red"="red",
                                      "green"="green","orange"="orange"),
                  labels=c("Lognormal","Mean","75th %ile","99.5th %ile"))

Output:

enter image description here