Is there a way to plot a dataframe with upper and lower confidence intervals as a correlation matrix in ggplot()
?
I am able to force a correlation "matrix" of sorts using ggplot()
like so:
Specify the dataframe:
phen1<-c("Activity", "Aggression", "PC1", "PC2", "Activity", "Aggression")
phen2<-c("Aggression", "PC1", "PC2", "Activity", "PC1", "PC2")
cors<-c(0.06, -0.003, -0.04, -0.001, -0.003, 0.004)
upper<-c(0.10, 0.01, 0.002, 0.02, 0.02, 0.02)
lower<-c(0.03, -0.01, -0.08, -0.02, -0.01, -0.02)
data<- data.frame(phen1, phen2, cors, upper, lower)
> data
phen1 phen2 cors upper lower
1 Activity Aggression 0.060 0.100 0.03
2 Aggression PC1 -0.003 0.010 -0.01
3 PC1 PC2 -0.040 0.002 -0.08
4 PC2 Activity -0.001 0.020 -0.02
5 Activity PC1 -0.003 0.020 -0.01
6 Aggression PC2 0.004 0.020 -0.02
Convert this to a matrix:
corrdata<-data %>%
select(-c(upper, lower)) %>% #exclude the CIs here because they cause problems when using spread()
spread(phen1, cors) %>%
rename(phen = "phen2") %>%
bind_rows(data %>%
select(-c(upper, lower)) %>%
spread(phen2, cors) %>%
rename(phen = "phen1")) %>%
group_by(phen) %>%
summarise_all(~ ifelse(all(is.na(.)), 1, first(na.omit(.))))
> corrdata
# A tibble: 4 x 5
phen Activity Aggression PC1 PC2
<fct> <dbl> <dbl> <dbl> <dbl>
1 Activity 1 0.06 -0.003 -0.001
2 Aggression 0.06 1 -0.003 0.004
3 PC1 -0.003 -0.003 1 -0.04
4 PC2 -0.001 0.004 -0.04 1
Create a function to extract lower half of correlation matrix:
get_lower_tri<-function(corrdata){
corrdata[upper.tri(corrdata)] <- NA
return(corrdata)
}
lower_tri <- get_lower_tri(corrdata)
melted_corr <- melt(lower_tri, na.rm = TRUE)
Plot the data:
ggplot(data = melted_corr, aes(x=phen, y=variable, fill=value)) +
geom_tile(color = "white")+
#add a colour gradient to specify which values are larger
scale_fill_gradient2(low = "gray40", high = "gray40", mid = "white",
midpoint = 0, limit = c(-0.10,0.10),
name="Robust\ncorrelation") +
theme_minimal()+
coord_fixed()+
scale_y_discrete(position = "right")+
geom_text(aes(phen, variable, label = value), color = "black", size = 7) +
labs(y="", x="")+
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text=element_text(size = 15), #changes size of axes #s
axis.title=element_text(size= 15), #changes size of axes labels
text = element_text(size = 17),
legend.position = c(0.15,0.8), #move legend into plot
legend.title=element_blank())+
#add CI values manually
annotate("text", x = 1, y = 0.75, label = "(0.03, 0.10)", size = 5)+
annotate("text", x = 2, y = 0.75, label = "(-0.01, 0.02)", size = 5)+
annotate("text", x = 2, y = 1.75, label = "(-0.01, 0.01)", size = 5)+
annotate("text", x = 3, y = 0.75, label = "(-0.02, 0.02)", size = 5)+
annotate("text", x = 3, y = 1.75, label = "(-0.02, 0.02)", size = 5)+
annotate("text", x = 3, y = 2.75, label = "(-0.08, 0.002)", size = 5)+
#add symbols to specify significance manually
annotate("text", x = 1.2, y = 1, label = "*", size = 7)+
annotate("text", x = 3.22, y = 3, label = "*", size = 7)
This gives me what I need, but it isn't a very elegant solution and involves a lot of annotate()
.
Does anyone have suggestions for how I could plot my dataframe with upper and lower confidence intervals as a correlation matrix in ggplot()
?