1
votes

I need some help in the grammar of ggplot2. I have some Data that looks like this:

> dput(SOData)
structure(list(Compound = c("cmpd1", "cmpd2", "cmpd3", "cmpd2", 
"cmpd3", "cmpd3", "cmpd4", "cmpd5", "cmpd6", "cmpd1", "cmpd5", 
"cmpd6", "cmpd1", "cmpd1", "cmpd1", "cmpd1", "cmpd2", "cmpd2", 
"cmpd1", "cmpd1", "cmpd1", "cmpd1", "cmpd2", "cmpd2", "cmpd2", 
"cmpd2", "cmpd2", "cmpd2", "cmpd4", "cmpd1", "cmpd1", "cmpd1", 
"cmpd1", "cmpd2", "cmpd2", "cmpd2", "cmpd2", "cmpd2", "cmpd2", 
"cmpd2", "cmpd1", "cmpd1", "cmpd1", "cmpd1", "cmpd5", "cmpd7", 
"cmpd7", "cmpd4", "cmpd4", "cmpd4", "cmpd6", "cmpd5", "cmpd6", 
"cmpd5", "cmpd6", "cmpd7", "cmpd3", "cmpd5", "cmpd3", "cmpd6", 
"cmpd5", "cmpd6", "cmpd5", "cmpd3", "cmpd5", "cmpd7", "cmpd3", 
"cmpd7", "cmpd8", "cmpd8", "cmpd8", "cmpd6", "cmpd5", "cmpd6", 
"cmpd7", "cmpd1", "cmpd2", "cmpd3", "cmpd2", "cmpd3", "cmpd3", 
"cmpd4", "cmpd5", "cmpd6", "cmpd1", "cmpd5", "cmpd6", "cmpd1", 
"cmpd1", "cmpd1", "cmpd1", "cmpd2", "cmpd2", "cmpd1", "cmpd1", 
"cmpd1", "cmpd1", "cmpd2", "cmpd2", "cmpd2", "cmpd2", "cmpd2", 
"cmpd2", "cmpd4", "cmpd1", "cmpd1", "cmpd1", "cmpd1", "cmpd2", 
"cmpd2", "cmpd2", "cmpd2", "cmpd2", "cmpd2", "cmpd2", "cmpd1", 
"cmpd1", "cmpd1", "cmpd1", "cmpd5", "cmpd7", "cmpd7", "cmpd4", 
"cmpd4", "cmpd4", "cmpd6", "cmpd5", "cmpd6", "cmpd5", "cmpd6", 
"cmpd7", "cmpd3", "cmpd5", "cmpd3", "cmpd6", "cmpd5", "cmpd6", 
"cmpd5", "cmpd3", "cmpd5", "cmpd7", "cmpd3", "cmpd7", "cmpd8", 
"cmpd8", "cmpd8", "cmpd6", "cmpd5", "cmpd6", "cmpd7"), variable = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L), class = "factor", .Label = c("Avg SS (pA)", 
"Min Peak (pA)")), value = c(274, 109, 175, 113, 86, 121, 80, 
112, 311, 110, 101, 312, 97, 419, 494, 454, 169, 80, 114, 119, 
105, 392, 207, 103, 84, 102, 100, 86, 96, 79, 339, 356, 394, 
317, 227, 158, 54, 136, 104, 107, 86, 58, 66, 84, 72, 90, 111, 
95, 134, 89, 285, 50, 69, 78, 89, 249, 90, 80, 62, 248, 72, 85, 
96, 97, 108, 85, 31, 53, 482, 551, 388, 323, 59, 74, 233, 193, 
206, 162, 79, 97, 21, 72, 170, 144, 57, 21, 68, 94, 310, 223, 
262, 191, 116, 107, 108, 116, 149, 185, 153, 76, 99, 111, 103, 
129, 119, 395, 181, 203, 293, 192, 340, 74, 130, 107, 132, 284, 
93, 72, 92, 140, 75, 57, 71, 63, 141, 154, 21, 52, 50, 106, 63, 
184, 369, 89, 223, 173, 120, 111, 191, 298, 62, 65, 72, 325, 
286, 194, 339, 128, 91, 110), Conc = c("10.0", "10.0", "10.0", 
"1.00", "1.00", "0.1", ".3", "10.0", "10.0", "1.00", "1.00", 
"1.00", "0.1", "10.0", "10.0", "10.0", "10.0", "1.00", "1.00", 
"1.00", "0.1", "10.0", "10.0", "10.0", "1.00", "1.00", "1.00", 
"0.1", ".3", "1.00", "10.0", "10.0", "10.0", "10.0", "10.0", 
"10.0", "1.00", "1.00", "0.1", "0.1", "1.00", "1.00", "0.1", 
"0.1", "10.0", "1.00", "0.1", ".3", ".3", ".3", "10.0", "1.00", 
"1.00", "0.1", "0.1", "10.0", "1.00", "10.0", "0.1", "10.0", 
"1.00", "1.00", "0.1", "10.0", "10.0", "1.00", "0.1", "0.1", 
"10.0", "1.00", "0.1", "10.0", "1.00", "0.1", "10.0", "10.0", 
"10.0", "10.0", "1.00", "1.00", "0.1", ".3", "10.0", "10.0", 
"1.00", "1.00", "1.00", "0.1", "10.0", "10.0", "10.0", "10.0", 
"1.00", "1.00", "1.00", "0.1", "10.0", "10.0", "10.0", "1.00", 
"1.00", "1.00", "0.1", ".3", "1.00", "10.0", "10.0", "10.0", 
"10.0", "10.0", "10.0", "1.00", "1.00", "0.1", "0.1", "1.00", 
"1.00", "0.1", "0.1", "10.0", "1.00", "0.1", ".3", ".3", ".3", 
"10.0", "1.00", "1.00", "0.1", "0.1", "10.0", "1.00", "10.0", 
"0.1", "10.0", "1.00", "1.00", "0.1", "10.0", "10.0", "1.00", 
"0.1", "0.1", "10.0", "1.00", "0.1", "10.0", "1.00", "0.1", "10.0"
)), .Names = c("Compound", "variable", "value", "Conc"), row.names = c(NA, 
-150L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x0000000000110788>)

My GGplot looks like so:

    FinalPlot = ggplot(data=SOData,aes(x=Conc,y=value,color=variable))+
geom_point(size =2,aes(x=Conc,y=value,color=variable), shape=16)+
stat_summary(aes(group=variable),fun.y=mean,geom='point',size=4,shape=9)+
scale_shape_manual('Legend',values =c(9,16))+
facet_wrap(~Compound)+
ylab('Current (pA)')+
xlab('Concentration (µM)')+
theme(text= element_text(size=14,face='bold'))+
theme(strip.text.x = element_text(size = 16,face = 'bold'))+
theme()

About the plot:

Each facet will be an individual compound. In each facet, I want all the values for a given condition(variable), then the mean of each variable. Right now what I have geom_point mapping the values then stat_summary doing the mean. each with a separate shape number. This works fine, but the legend is uninformative. How can I split the legend to show the point color as variable, then the shape style to be Raw or Mean?

1

1 Answers

2
votes

If you want something to show up in the legend, you generally will have to add it to the aes. You can simply map the shape to an informative name, and map that name in your scale_shape_manual call to an actual shape.

ggplot(data=SOData,aes(x=Conc,y=value,color=variable))+
  geom_point(size =2,aes(x=Conc,y=value,color=variable, shape='raw'))+
  stat_summary(aes(group=variable,shape='mean'),fun.y=mean,geom='point',size=4)+
  scale_shape_manual('Legend',values =c(mean = 9, raw = 16))+
  facet_wrap(~Compound)+
  ylab('Current (pA)')+
  xlab('Concentration (µM)')+
  theme(text= element_text(size=14,face='bold'))+
  theme(strip.text.x = element_text(size = 16,face = 'bold'))

enter image description here

(Please note that a minimal example could contain a much smaller data set, with a data.frame instead of a data.table, no facets, no labels and no theme calls.)