I am trying to plot data from two separate studies in different colors in a line plot using ggplot2. This data comes from two independent studies that used a within-group design (multiple siblings in the same family, but each in different treatment groups). Thus, I've used "group" aesthetic to group individuals by family and, thus, to draw lines between individuals within the same family. This part works well, now all I want is for the the data collected in two separate studies to be represented in different colors. I've played around a bunch, manually changing the color, etc. and I just can't get it to work. Everything that I've seen online about manually changing the line color depends on "group" aesthetic, and I need color and group to be independent of each other in this case.
The data is in long format, with Study as a factor. I then used the summarySEwithin function, which comes from the R-cookbook code for plotting means (Available here: http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_%28ggplot2%29/) to build the data frame copied below.
Similarity90SUM<- summarySEwithin(Day90, measurevar="Similarity", betweenvars=c("Condition","Study"),withinvars=c("Family"), idvar="Male_Num",na.rm=TRUE)
I then used ggplot to build the line plot. This code does everything I want, except plot the data from the two studies in separate colors.
ggplot(Similarity90SUM, aes(x=Condition, y=Similarity, group=Family),colour=Study) +
geom_line() +
geom_point(shape=19, size=2)+
scale_colour_manual(values=c("blue","darkgreen"))
Unfortunately, all of the lines in the plot are black, rather than blue and green (sorry, I can't post the image).
I think that the issue derives from the fact that the variables used for group and colour are different because if I include colour=Study in the aes(), then I get the error message: "Error: Aesthetics must either be length one, or the same length as the dataProblems:Condition, Similarity, Family". Any suggestions would be helpful! I'm sure there is some relatively simple work-around that I'm just not aware of. Thanks, in advance!
dput(Similarity90SUM)
structure(list(Condition= structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Control",
"AVT", "MC"), class = "factor"), Study = structure(c(1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L
), .Label = c("Study 1", "Study 2"), class = "factor"), Family = structure(c(7L,
19L, 20L, 21L, 1L, 4L, 5L, 8L, 12L, 17L, 3L, 6L, 18L, 19L, 22L,
1L, 4L, 5L, 8L, 12L, 17L, 6L, 7L, 10L, 13L, 14L, 18L, 19L, 20L,
1L, 4L, 5L, 12L, 17L), .Label = c("1", "150-2", "150-8", "2",
"3", "34-4", "34-8", "4", "48-1", "48-2", "48-6", "5", "54-1",
"54-5", "54-8", "6", "7", "79-2", "79-4", "87-4", "87-6", "87-8"
), class = "factor"), N = c(1, 1, 1, 1, 10, 9, 10, 10, 10, 4,
1, 1, 1, 2, 1, 10, 4, 7, 7, 10, 10, 1, 1, 1, 1, 1, 1, 1, 1, 9,
10, 10, 10, 9), Similarity = c(73, 89, 80, 91, 93.3, 80.3333333333333,
46.1, 90.4, 81, 86.25, 62, 74, 75, 76.5, 66, 83.9, 79.75, 92.1428571428571,
68.7142857142857, 47.9, 91.2, 61, 76, 52, 41, 47, 60, 78, 61,
64.8888888888889, 49.1, 26.4, 44.3, 75), Similarity_norm = c(69.1437125748503,
69.1437125748503, 69.1437125748503, 69.1437125748503, 69.1437125748503,
69.1437125748503, 69.1437125748503, 69.1437125748503, 69.1437125748503,
69.1437125748503, 69.1437125748503, 69.1437125748503, 69.1437125748503,
69.1437125748503, 69.1437125748503, 69.1437125748503, 69.1437125748503,
69.1437125748503, 69.1437125748503, 69.1437125748503, 69.1437125748503,
69.1437125748503, 69.1437125748503, 69.1437125748503, 69.1437125748503,
69.1437125748503, 69.1437125748503, 69.1437125748503, 69.1437125748503,
69.1437125748503, 69.1437125748503, 69.1437125748503, 69.1437125748503,
69.1437125748503), sd = c(NA, NA, NA, NA, 2.68860027311079, 5.29375193259087,
8.29349206197309, 3.34980059423901, 5.68856645911677, 4.12599194422499,
NA, NA, NA, 0, NA, 4.13515065015422, 4.37253331823185, 4.63154911077632,
5.5344724669591, 4.58880625622207, 3.47264441877645, NA, NA,
NA, NA, NA, NA, NA, NA, 10.5806302009727, 0.896200091036931,
7.30470602822113, 12.0629302291892, 3.02765035409749), se = c(NA,
NA, NA, NA, 0.850210058078086, 1.76458397753029, 2.62263246723613,
1.05929995851808, 1.79888266320481, 2.0629959721125, NA, NA,
NA, 0, NA, 1.30764945224135, 2.18626665911592, 1.75056101887093,
2.09183396935827, 1.4511079510892, 1.09814658672052, NA, NA,
NA, NA, NA, NA, NA, NA, 3.52687673365756, 0.283403352692695,
2.3099508687141, 3.81463347799347, 1.00921678469916), ci = c(NA,
NA, NA, NA, 1.92330877276442, 4.06913794909986, 5.93280682114534,
2.39630298871351, 4.06935530160228, 6.56537390926458, NA, NA,
NA, 0, NA, 2.95810857481691, 6.95767625166843, 4.28346850322768,
5.11853333025038, 3.28263424554985, 2.48418016695222, NA, NA,
NA, NA, NA, NA, NA, NA, 8.13299233216971, 0.641102924254807,
5.22547190337353, 8.62930044569276, 2.32725807883372)), .Names = c("Condition",
"Study", "Family", "N", "Similarity", "Similarity_norm", "sd",
"se", "ci"), row.names = c(NA, -34L), class = "data.frame")
dput
– jentjrcolour = Study
inside of youraes()
call. – Gregor Thomas