2
votes

Just find how to create a split violin plot from here and then how to extend this for more than two groups from here. But I do not understand how to tranfer this to my own data.

Now, I am stuck in creating my own split violin plot.

I am aiming for something like this; each group should represent one cancer entity (left the reference incidence, right the cohort's incidence).

Following the first link (see above), I used this code to create the function geom_split_violin

GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
  data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
  grp <- data[1,'group']
  newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
  newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
  newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x']) 
  if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
    stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 
                                              1))
    quantiles <- create_quantile_segment_frame(data, draw_quantiles)
    aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
    aesthetics$alpha <- rep(1, nrow(quantiles))
    both <- cbind(quantiles, aesthetics)
    quantile_grob <- GeomPath$draw_panel(both, ...)
    ggplot2:::ggname("geom_split_violin", grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
  }
  else {
    ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
  }
})

geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
  layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}

I do not understand the full code above and guess, that this code will create a split violin plot with only two groups.

Hopefully, my request is not to general and you guys can show my a way how to reach my plot.

All I reached is

my_data <- read.csv("Test2.csv", sep = ";")

y <- data_vp$Age.groups
x <- cbind(data_vp[,3:8])
m <- cbind(data_vp[,3:5], data_vp[,6:8])

ggplot(my_data,
       aes(x = x,
           y = y,
           fill = m)) + 
  geom_split_violin()

After getting your answer @missuse I tried again with this code:

raw_df <- read.csv("Test2.csv", sep = ";")
View(raw_df)

inc_all <- raw_df[,2:7]
inc_oM <- raw_df[,2:4]
inc_mM <- raw_df[,5:7]


dff <- data.frame(y = raw_df$Age.groups,
                  groups = as.factor(inc_all),
                  split = as.factor(inc_oM + inc_mM))

ggplot(dff, 
       aes(x = groups,
           y = y,
           fill = split)) + 
  geom_split_violin()

Does not work :(

1

1 Answers

1
votes

Here is an example on how to use geom_split_violin with an arbitrary number of groups:

First some data:

df <- data.frame(dens = rnorm(1000),
                split = as.factor(sample(1:2, 1000, replace = T)),
                groups = as.factor(rep(1:5, each = 200)))

It is quite intuitive:

library(ggplot2)    
ggplot(df, aes(groups, dens, fill = split)) +
  geom_split_violin(alpha = 0.7)

enter image description here

You were probably struggling with it since your groups are not factors, convert them to factors in the ggplot call or prior it.

EDIT: after the OP supplied the data:

structure(list(Age.groups = structure(1:18, .Label = c("0-04", 
"05-09", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", 
"40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", 
"75-79", "80-84", "85+"), class = "factor"), Magen = c(0, 0, 
0, 0.1, 0.2, 0.5, 1.4, 2.4, 4.4, 7.6, 13.3, 20.8, 30.3, 40.6, 
56.3, 76, 97, 113.3), MH = c(0.1, 0.5, 1.5, 3.7, 4.6, 4.1, 3.4, 
3.1, 2.6, 2.4, 2.4, 2.4, 2.8, 3.1, 3.5, 4.4, 4.1, 2.9), NHL = c(0.6, 
1, 1.2, 1.9, 2.2, 3, 3.7, 5.2, 7.8, 10.6, 16.1, 23.2, 33.5, 47, 
61.1, 73.6, 84.5, 75.7), Magen_M = c(0L, 0L, 0L, 20L, 0L, 20L, 
20L, 0L, 40L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), MH_M = c(0L, 
0L, 0L, 4L, 0L, 2L, 2L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), NHL_M = c(0L, 0L, 0L, 0L, 20L, 0L, 0L, 0L, 0L, 20L, 20L, 
0L, 20L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Age.groups", "Magen", 
"MH", "NHL", "Magen_M", "MH_M", "NHL_M"), class = "data.frame", row.names = c(NA, 
-18L))

it is obvious age is in bins and density is not appropriate. I suggest to plot a geom_col graph resembling the split density:

First the data should be transformed to long format with some adjustments to formatting:

library(tidyverse)
my_data %>% 
  gather(key, value, 2:7) %>% #convert all values desired to be in `x` axes to long format 
  mutate(split = as.factor(ifelse(grepl("_M$",  key), 1, 0)), #make an additional split variable 
         key = gsub("_M$", "", key), #remove the _M at the end of the 3 variables they are now defined by the split variable
         value2 = ifelse(split == 1, value, -value)) -> dat #make values for one group negative so it resembles geom_split violin.

ggplot(dat, aes(x = Age.groups,
           y = value2,
           fill = split)) + 
  geom_col()+
  facet_wrap(~ key, scales = "free_x")+
  coord_flip() +
  scale_y_continuous(labels = abs) #make the values absoulte

enter image description here