
I've created these split half violin plots using ggplot. However, instead of including the boxplot, which shows the median, I'd like to include a horizontal line with the mean. This means each colored half would have its own mean line: the gold half would have a mean line which would not exactly align with the mean line on the grey half. Importantly, I'd like the mean line to reside only inside the density plot. How can I achieve this? I can't figure it out and I'd appreciate any help!

Here's some example data:


my_data = data.frame(
  y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 
  x=c(rep('a', 2000), rep('b', 2000)),
  m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000))

Here's the extension for geom_violin to create split_geom_violin:

GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
  data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
  grp <- data[1,'group']
  newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
  newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
  newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x']) 
  if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
    stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 
    quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
    aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
    aesthetics$alpha <- rep(1, nrow(quantiles))
    both <- cbind(quantiles, aesthetics)
    quantile_grob <- GeomPath$draw_panel(both, ...)
    ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
  else {
    ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))

geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
  layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))

Here's the code for the graph:

ggplot(my_data, aes(x, y, fill=m)) + 
  geom_split_violin(trim = TRUE) + 
  geom_boxplot(width = 0.25, notch = FALSE, notchwidth = .4, outlier.shape = NA, coef=0) +
  labs(x=NULL,y="GM Attitude Score") +
  theme_classic() +
  theme(text = element_text(size = 20)) +
  scale_x_discrete(labels=c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
  scale_fill_manual(values=c("#E69F00", "#999999"), 
                    breaks=c("1", "2"),
                    labels=c("Time 1", "Time 5"))

enter image description here

Please edit your question to include sample data.Maurits Evers
Please share sample of your data using dput() (not str or head or picture/screenshot) so others can help. See more here stackoverflow.com/questions/5963269/…Tung
Ok, I think I've added example data. At least it works on my console...socialresearcher
Do you want a boxplot that uses mean instead of median? Or do you want just a mean line?Gregor Thomas
Also, I can't find geom_split_violin in ggplot2 or any other CRAN package. Where is it from?Gregor Thomas

1 Answers


You can use stat_summary & geom_crossbar while setting all fun.y, fun.ymin & fun.ymax to mean only


ggplot(my_data, aes(x, y, fill = m)) +
  geom_split_violin(trim = TRUE) +
  stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean,
               geom = "crossbar", 
               width = 0.25,
               position = position_dodge(width = .25),
  ) +
  labs(x = NULL, y = "GM Attitude Score") +
  theme_classic() +
  theme(text = element_text(size = 20)) +
  scale_x_discrete(labels = c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
    values = c("#E69F00", "#999999"),
    name = "Survey\nPart",
    breaks = c("1", "2"),
    labels = c("Time 1", "Time 5")

Data & function used:


my_data <- data.frame(
  y = c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
  x = c(rep("a", 2000), rep("b", 2000)),
  m = c(rep("i", 1000), rep("j", 2000), rep("i", 1000))

GeomSplitViolin <- ggproto(
  draw_group = function(self, data, ..., draw_quantiles = NULL) {
    data <- transform(data,
                      xminv = x - violinwidth * (x - xmin),
                      xmaxv = x + violinwidth * (xmax - x)
    grp <- data[1, "group"]
    newdata <- plyr::arrange(
      transform(data, x = if (grp %% 2 == 1) xminv else xmaxv),
      if (grp %% 2 == 1) y else -y
    newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
    newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
    if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
      stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
      quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
      aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
      aesthetics$alpha <- rep(1, nrow(quantiles))
      both <- cbind(quantiles, aesthetics)
      quantile_grob <- GeomPath$draw_panel(both, ...)
        grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob)
    } else {
      ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))

geom_split_violin <- function(mapping = NULL,
                              data = NULL,
                              stat = "ydensity",
                              position = "identity", ...,
                              draw_quantiles = NULL,
                              trim = TRUE,
                              scale = "area",
                              na.rm = FALSE,
                              show.legend = NA,
                              inherit.aes = TRUE) {
    data = data,
    mapping = mapping,
    stat = stat,
    geom = GeomSplitViolin,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(
      trim = trim,
      scale = scale,
      draw_quantiles = draw_quantiles,
      na.rm = na.rm, ...

Created on 2018-07-08 by the reprex package (v0.2.0.9000).