0
votes

I'd like to create several barplot where I would represent 4 different categories, themselves spread over 4 other categories.

I have this example df:

structure(list(type = c("NE18", "NE18", "NE18", "NE18", "NE18", 
"NE18", "NE18", "NE18", "NE18", "NE18", "NE18", "NE18", "NE18", 
"NE18", "NE18", "NE18", "NE18", "NE18", "NE18", "NE18", "NE18", 
"NE18", "NE18", "NE18", "NE18", "NE21", "NE21", "NE21", "NE21", 
"NE21", "NE21", "NE21", "NE21", "NE21", "NE21", "NE21", "NE21", 
"NE21", "NE21", "NE21", "NE21", "NE21", "NE21", "NE21", "NE21", 
"NE21", "NE21", "NE21", "NE21", "NE21", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "SA", 
"SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", 
"SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", 
"SA", "SA"), score = c("R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"0.9 > R score >= 0.8", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"0.5 > R score >= 0.2", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"0.5 > R score >= 0.2", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "0.9 > R score >= 0.8", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "0.5 > R score >= 0.2", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "0.5 > R score >= 0.2", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "0.8 > R score >= 0.7", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "0.9 > R score >= 0.8", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "0.8 > R score >= 0.7", "R score = 0.96", 
"0.8 > R score >= 0.7", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"0.8 > R score >= 0.7", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "0.8 > R score >= 0.7", "R score = 0.96", "R score = 0.96", 
"R score = 0.96")), row.names = c("1", "2", "3", "4", "5", "6", 
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", 
"18", "19", "20", "21", "22", "23", "24", "25", "11000", "2620", 
"3100", "4100", "5100", "6100", "787", "8100", "9100", "10100", 
"11100", "12100", "13100", "14100", "15100", "16100", "17100", 
"18100", "19100", "20100", "21100", "22100", "23100", "24100", 
"25100", "46", "2002", "2057", "2223", "2391", "2459", "2509", 
"2533", "2029", "2062", "2089", "2102", "2131", "2139", "2159", 
"2179", "2192", "2201", "2252", "2265", "2282", "2302", "2335", 
"2346", "2362", "1410", "1411", "1412", "1413", "1414", "1415", 
"1416", "1417", "1418", "1419", "1420", "1421", "1422", "1423", 
"1424", "1425", "1426", "1427", "1428", "1448", "1449", "1450", 
"1451", "1452", "1453"), class = "data.frame")

I have 2 columns. I want 4 individual barplot, 1 for each unique score from the col score(they don't have to be all represented in one figure). Each of these 4 barplot should be composed of 4 bar one for each different unique value I have in the col type And the barplot are supposed to represent the proportion of the data in the col type which mean that through my 4 barplot the total of type == NE18 should be 100% etc.

For a better understanding I summarized this information in a table like this :

      score
type   0.5 > R score >= 0.2 0.8 > R score >= 0.7 0.9 > R score >= 0.8 R score = 0.96
  NA                      0                    4                    4             92
  NE18                    8                    0                    4             88
  NE21                    8                    0                    4             88
  SA                      0                   16                    0             84

The thing is that I have no clue how to do this with ggplot2 using geom_bar() since I need to represent this in several independent barplot. I guess I need to give ggplot my data df and not the summarized table. I tried to transform the table to a df and use it to use ggplot but still I don't how to group my data

I haven't been able to find something like this in other post nor in the ggplot documentation.

By example if I do this:

list_score <- unique(df$score)

for (my_score in list_score){
my_plot <- ggplot(df[which(df$score == my_score),], aes(x=type)) + 
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  ggtitle(my_score) +
  geom_text(stat='count', aes(group=type, label=..count../sum(..count..)), position = position_stack(vjust = 0.5))
print(my_plot)
}

I end up with percentage calculated on the figure, which mean that the sum of NE18 + NE21 + NA + SA = 1, which is not what I want

Hope i'm clear

Edit :

Here is my summary table

structure(list(type = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("NA", "NE18", 
"NE21", "SA"), class = "factor"), score = structure(c(1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("0.5 > R score >= 0.2", 
"0.8 > R score >= 0.7", "0.9 > R score >= 0.8", "R score = 0.96"
), class = "factor"), Freq = c(0, 8, 8, 0, 4, 0, 0, 16, 4, 4, 
4, 0, 92, 88, 88, 84)), class = "data.frame", row.names = c(NA, 
-16L))

I'm close to what I want here. But I can't deal with labels:

ggplot(df, aes(x = score)) + 
  geom_bar(aes(y = ..prop.., group = type, fill = type), position = position_dodge()) +
  geom_text(stat = "count", aes(group = type, label = ..prop..), size = 3, position = position_dodge(width = 1))

I need a mix between those 2 plots where the labels are not messing with my y-axis:

enter image description here

enter image description here

1
I belive is easier to plot the table you shown! Could you dput() that table and add to the post? I think I have a solution to your issue!Duck
I edited it. The figure I added is close to what I want but missing the percentage informationD Prat
But you have labels on the bars, so you want as label the cum percent until last bar, right? In the last bar should the value be 1?Duck
I think yes. I made a new edit. Which is even closer to what I want. But I have a problem with the labels now which are reducing my bar...D Prat
Could you try this on your df: first: df$Label <- round(df$Freq/max(df$Freq),2) and then ggplot(df, aes(x = score,y=Freq,group = type, fill = type)) + geom_bar(,stat='identity',position = position_dodge()) + geom_text(aes(label=ifelse(Label==0,NA,Label)),position = position_dodge(0.9),vjust=-0.5) and see if that is what you want?Duck

1 Answers

1
votes

Here a solution with most of the merit for the own user who tricked the code to make the plot:

library(tidyverse)
#Data
my_table <- structure(list(type = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("NA", "NE18", 
"NE21", "SA"), class = "factor"), score = structure(c(1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("0.5 > R score >= 0.2", 
"0.8 > R score >= 0.7", "0.9 > R score >= 0.8", "R score = 0.96"
), class = "factor"), Freq = c(0, 8, 8, 0, 4, 0, 0, 16, 4, 4, 
4, 0, 92, 88, 88, 84)), class = "data.frame", row.names = c(NA, 
-16L))

Next code:

my_tot <- c() 
type <- c() 
for (my_type in unique(my_table$type)){ 
  my_tot <- c(my_tot,sum(my_table[my_table$type == my_type,"Freq"]))
  type <- c(type, my_type) 
  } 

df_tot <- data.frame(type,my_tot) 
my_table <- merge(my_table,df_tot) 
my_table$Label <- my_table$Freq/my_table$my_tot 

ggplot(my_table, aes(x = score,y=Freq,group = type, fill = type)) +
  geom_bar(,stat='identity',position = position_dodge()) +
  geom_text(aes(label=ifelse(Label==0,NA,Label)),position = position_dodge(1),vjust=-0.5)

The output:

enter image description here