1
votes

I have a data frame that looks like this:

 genotype     DIV3     DIV4 ...
 WT           12.4     15.2
 WT           35.4     35.3
 HET          1.3      1.2
 HET          1.5      5.2

I calculate the means and sd by the following functions:

  means = aggregate(. ~ genotype, data=dat, FUN=mean)
  errors = aggregate(. ~ genotype, data=dat, FUN=sd)

I am using ggplot2 to plot the means as a scatter plot. I want to use the errors dataframe for error bars, but I am having trouble calculating ymin and ymax since I have two dataframes.

Is there a better way to do this?

EDIT: ggplot2 code:

 x = melt(means)
 ggplot(x_melt, aes(group=genotype, variable, value, col=genotype, shape = genotype)) + 
   geom_line() +
   geom_point(size=3)+
   theme(axis.text=element_text(size=14),
         axis.title.x=element_blank(),
         axis.text.x=element_text(angle = 45, vjust = 0.8, hjust = .9, color = "black"),
         axis.text.y=element_text(color="black"))
1
It is not clear about your expected result.akrun
@akrun sorry which part is unclear?bdevil
You could have both the mean and sd in a single dataset do.call(data.frame,aggregate(. ~genotype, dat, FUN= function(x) c(Mean=mean(x), SD=sd(x))))akrun
Could you show your ggplot code.akrun
@akrun yes i added itbdevil

1 Answers

1
votes

You can do this either by creating a single dataset from the aggregate step and then reshape it before plotting.

dat2 <- do.call(`data.frame`,
         aggregate(. ~genotype, dat, FUN= function(x) c(Mean=mean(x), SD=sd(x))))


nm1 <- unique(gsub("\\..*", "", colnames(dat2)[-1]))
datN <- reshape(dat2, direction="long", idvar="genotype", 
                       varying=list(c(2,4), c(3,5)),sep=".")

datN$time <- nm1[datN$time]
colnames(datN)[3:4] <- c("Mean", "SD")

library(ggplot2)

ggplot(datN, aes(group=genotype, time, Mean, col=genotype,
    shape=genotype))+
    geom_line()+
    geom_point(size=3)+
    geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), width=0.1)+
     theme(axis.text=element_text(size=14),
     axis.title.x=element_blank(),
     axis.text.x=element_text(angle = 45, vjust = 0.8, hjust = .9, color = "black"),
     axis.text.y=element_text(color="black"))

Or you can merge the melted datasets means and errors

   library(reshape2)
   x_melt <- melt(means, value.name="Mean")
   y_melt <- melt(errors, value.name="SD")

  datN1 <- merge(x_melt, y_melt)

  ggplot(datN1, aes(group=genotype, variable, Mean, col=genotype,
     shape=genotype))+
     geom_line()+
     geom_point(size=3)+
     geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), width=0.1)+
     theme(axis.text=element_text(size=14),
       axis.title.x=element_blank(),
       axis.text.x=element_text(angle = 45, vjust = 0.8, hjust = .9, color = "black"),
       axis.text.y=element_text(color="black"))

data

 dat <- structure(list(genotype = c("WT", "WT", "HET", "HET"), DIV3 = c(12.4, 
 35.4, 1.3, 1.5), DIV4 = c(15.2, 35.3, 1.2, 5.2)), .Names = c("genotype", 
 "DIV3", "DIV4"), class = "data.frame", row.names = c(NA, -4L))