0
votes

I'd like to create several barplot where I would represent 4 different categories, themselves spread over 4 other categories.

I have this example df:

structure(list(type = c("NE18", "NE18", "NE18", "NE18", "NE18", 
"NE18", "NE18", "NE18", "NE18", "NE18", "NE18", "NE18", "NE18", 
"NE18", "NE18", "NE18", "NE18", "NE18", "NE18", "NE18", "NE18", 
"NE18", "NE18", "NE18", "NE18", "NE21", "NE21", "NE21", "NE21", 
"NE21", "NE21", "NE21", "NE21", "NE21", "NE21", "NE21", "NE21", 
"NE21", "NE21", "NE21", "NE21", "NE21", "NE21", "NE21", "NE21", 
"NE21", "NE21", "NE21", "NE21", "NE21", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "SA", 
"SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", 
"SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", 
"SA", "SA"), score = c("R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"0.9 > R score >= 0.8", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"0.5 > R score >= 0.2", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"0.5 > R score >= 0.2", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "0.9 > R score >= 0.8", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "0.5 > R score >= 0.2", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "0.5 > R score >= 0.2", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "0.8 > R score >= 0.7", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "0.9 > R score >= 0.8", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "0.8 > R score >= 0.7", "R score = 0.96", 
"0.8 > R score >= 0.7", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"0.8 > R score >= 0.7", "R score = 0.96", "R score = 0.96", "R score = 0.96", 
"R score = 0.96", "0.8 > R score >= 0.7", "R score = 0.96", "R score = 0.96", 
"R score = 0.96")), row.names = c("1", "2", "3", "4", "5", "6", 
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", 
"18", "19", "20", "21", "22", "23", "24", "25", "11000", "2620", 
"3100", "4100", "5100", "6100", "787", "8100", "9100", "10100", 
"11100", "12100", "13100", "14100", "15100", "16100", "17100", 
"18100", "19100", "20100", "21100", "22100", "23100", "24100", 
"25100", "46", "2002", "2057", "2223", "2391", "2459", "2509", 
"2533", "2029", "2062", "2089", "2102", "2131", "2139", "2159", 
"2179", "2192", "2201", "2252", "2265", "2282", "2302", "2335", 
"2346", "2362", "1410", "1411", "1412", "1413", "1414", "1415", 
"1416", "1417", "1418", "1419", "1420", "1421", "1422", "1423", 
"1424", "1425", "1426", "1427", "1428", "1448", "1449", "1450", 
"1451", "1452", "1453"), class = "data.frame")

I have 2 columns. I want 4 individual barplot, 1 for each unique score from the col score(they don't have to be all represented in one figure). Each of these 4 barplot should be composed of 4 bar one for each different unique value I have in the col type And the barplot are supposed to represent the proportion of the data in the col type which mean that through my 4 barplot the total of type == NE18 should be 100% etc.

For a better understanding I summarized this information in a table like this :

      score
type   0.5 > R score >= 0.2 0.8 > R score >= 0.7 0.9 > R score >= 0.8 R score = 0.96
  NA                      0                    4                    4             92
  NE18                    8                    0                    4             88
  NE21                    8                    0                    4             88
  SA                      0                   16                    0             84

The thing is that I have no clue how to do this with ggplot2 using geom_bar() since I need to represent this in several independent barplot. I guess I need to give ggplot my data df and not the summarized table. I tried to transform the table to a df and use it to use ggplot but still I don't how to group my data

I haven't been able to find something like this in other post nor in the ggplot documentation.

By example if I do this:

list_score <- unique(df$score)

for (my_score in list_score){
my_plot <- ggplot(df[which(df$score == my_score),], aes(x=type)) + 
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  ggtitle(my_score) +
  geom_text(stat='count', aes(group=type, label=..count../sum(..count..)), position = position_stack(vjust = 0.5))
print(my_plot)
}

I end up with percentage calculated on the figure, which mean that the sum of NE18 + NE21 + NA + SA = 1, which is not what I want

Hope i'm clear

Edit :

Here is my summary table

structure(list(type = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("NA", "NE18", 
"NE21", "SA"), class = "factor"), score = structure(c(1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("0.5 > R score >= 0.2", 
"0.8 > R score >= 0.7", "0.9 > R score >= 0.8", "R score = 0.96"
), class = "factor"), Freq = c(0, 8, 8, 0, 4, 0, 0, 16, 4, 4, 
4, 0, 92, 88, 88, 84)), class = "data.frame", row.names = c(NA, 
-16L))

I'm close to what I want here. But I can't deal with labels:

ggplot(df, aes(x = score)) + 
  geom_bar(aes(y = ..prop.., group = type, fill = type), position = position_dodge()) +
  geom_text(stat = "count", aes(group = type, label = ..prop..), size = 3, position = position_dodge(width = 1))

I need a mix between those 2 plots where the labels are not messing with my y-axis:

enter image description here

enter image description here

1
I belive is easier to plot the table you shown! Could you dput() that table and add to the post? I think I have a solution to your issue! - Duck
I edited it. The figure I added is close to what I want but missing the percentage information - D Prat
But you have labels on the bars, so you want as label the cum percent until last bar, right? In the last bar should the value be 1? - Duck
I think yes. I made a new edit. Which is even closer to what I want. But I have a problem with the labels now which are reducing my bar... - D Prat
Could you try this on your df: first: df$Label <- round(df$Freq/max(df$Freq),2) and then ggplot(df, aes(x = score,y=Freq,group = type, fill = type)) + geom_bar(,stat='identity',position = position_dodge()) + geom_text(aes(label=ifelse(Label==0,NA,Label)),position = position_dodge(0.9),vjust=-0.5) and see if that is what you want? - Duck

1 Answers

1
votes

Here a solution with most of the merit for the own user who tricked the code to make the plot:

library(tidyverse)
#Data
my_table <- structure(list(type = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("NA", "NE18", 
"NE21", "SA"), class = "factor"), score = structure(c(1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("0.5 > R score >= 0.2", 
"0.8 > R score >= 0.7", "0.9 > R score >= 0.8", "R score = 0.96"
), class = "factor"), Freq = c(0, 8, 8, 0, 4, 0, 0, 16, 4, 4, 
4, 0, 92, 88, 88, 84)), class = "data.frame", row.names = c(NA, 
-16L))

Next code:

my_tot <- c() 
type <- c() 
for (my_type in unique(my_table$type)){ 
  my_tot <- c(my_tot,sum(my_table[my_table$type == my_type,"Freq"]))
  type <- c(type, my_type) 
  } 

df_tot <- data.frame(type,my_tot) 
my_table <- merge(my_table,df_tot) 
my_table$Label <- my_table$Freq/my_table$my_tot 

ggplot(my_table, aes(x = score,y=Freq,group = type, fill = type)) +
  geom_bar(,stat='identity',position = position_dodge()) +
  geom_text(aes(label=ifelse(Label==0,NA,Label)),position = position_dodge(1),vjust=-0.5)

The output:

enter image description here