1
votes

I've created these split half violin plots using ggplot. However, instead of including the boxplot, which shows the median, I'd like to include a horizontal line with the mean. This means each colored half would have its own mean line: the gold half would have a mean line which would not exactly align with the mean line on the grey half. Importantly, I'd like the mean line to reside only inside the density plot. How can I achieve this? I can't figure it out and I'd appreciate any help!

Here's some example data:

set.seed(20160229)

my_data = data.frame(
  y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 
1.5)),
  x=c(rep('a', 2000), rep('b', 2000)),
  m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000))
)

Here's the extension for geom_violin to create split_geom_violin:

GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
  data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
  grp <- data[1,'group']
  newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
  newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
  newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x']) 
  if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
    stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 
                                              1))
    quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
    aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
    aesthetics$alpha <- rep(1, nrow(quantiles))
    both <- cbind(quantiles, aesthetics)
    quantile_grob <- GeomPath$draw_panel(both, ...)
    ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
  }
  else {
    ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
  }
})

geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
  layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}

Here's the code for the graph:

library(ggplot2)
ggplot(my_data, aes(x, y, fill=m)) + 
  geom_split_violin(trim = TRUE) + 
  geom_boxplot(width = 0.25, notch = FALSE, notchwidth = .4, outlier.shape = NA, coef=0) +
  labs(x=NULL,y="GM Attitude Score") +
  theme_classic() +
  theme(text = element_text(size = 20)) +
  scale_x_discrete(labels=c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
  scale_fill_manual(values=c("#E69F00", "#999999"), 
                    name="Survey\nPart",
                    breaks=c("1", "2"),
                    labels=c("Time 1", "Time 5"))

enter image description here

1
Please edit your question to include sample data.Maurits Evers
Please share sample of your data using dput() (not str or head or picture/screenshot) so others can help. See more here stackoverflow.com/questions/5963269/…Tung
Ok, I think I've added example data. At least it works on my console...socialresearcher
Do you want a boxplot that uses mean instead of median? Or do you want just a mean line?Gregor Thomas
Also, I can't find geom_split_violin in ggplot2 or any other CRAN package. Where is it from?Gregor Thomas

1 Answers

3
votes

You can use stat_summary & geom_crossbar while setting all fun.y, fun.ymin & fun.ymax to mean only

library(ggplot2)

ggplot(my_data, aes(x, y, fill = m)) +
  geom_split_violin(trim = TRUE) +
  stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean,
               geom = "crossbar", 
               width = 0.25,
               position = position_dodge(width = .25),
  ) +
  labs(x = NULL, y = "GM Attitude Score") +
  theme_classic() +
  theme(text = element_text(size = 20)) +
  scale_x_discrete(labels = c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
  scale_fill_manual(
    values = c("#E69F00", "#999999"),
    name = "Survey\nPart",
    breaks = c("1", "2"),
    labels = c("Time 1", "Time 5")
  )

Data & function used:

set.seed(20160229)

my_data <- data.frame(
  y = c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
  x = c(rep("a", 2000), rep("b", 2000)),
  m = c(rep("i", 1000), rep("j", 2000), rep("i", 1000))
)

GeomSplitViolin <- ggproto(
  "GeomSplitViolin",
  GeomViolin,
  draw_group = function(self, data, ..., draw_quantiles = NULL) {
    data <- transform(data,
                      xminv = x - violinwidth * (x - xmin),
                      xmaxv = x + violinwidth * (xmax - x)
    )
    grp <- data[1, "group"]
    newdata <- plyr::arrange(
      transform(data, x = if (grp %% 2 == 1) xminv else xmaxv),
      if (grp %% 2 == 1) y else -y
    )
    newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
    newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
    if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
      stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
      quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
      aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
      aesthetics$alpha <- rep(1, nrow(quantiles))
      both <- cbind(quantiles, aesthetics)
      quantile_grob <- GeomPath$draw_panel(both, ...)
      ggplot2:::ggname(
        "geom_split_violin",
        grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob)
      )
    } else {
      ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
    }
  }
)

geom_split_violin <- function(mapping = NULL,
                              data = NULL,
                              stat = "ydensity",
                              position = "identity", ...,
                              draw_quantiles = NULL,
                              trim = TRUE,
                              scale = "area",
                              na.rm = FALSE,
                              show.legend = NA,
                              inherit.aes = TRUE) {
  layer(
    data = data,
    mapping = mapping,
    stat = stat,
    geom = GeomSplitViolin,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(
      trim = trim,
      scale = scale,
      draw_quantiles = draw_quantiles,
      na.rm = na.rm, ...
    )
  )
}

Created on 2018-07-08 by the reprex package (v0.2.0.9000).