0
votes

Is there a way to plot a dataframe with upper and lower confidence intervals as a correlation matrix in ggplot()?

I am able to force a correlation "matrix" of sorts using ggplot() like so:

Specify the dataframe:

phen1<-c("Activity", "Aggression", "PC1", "PC2", "Activity", "Aggression")
phen2<-c("Aggression",  "PC1",         "PC2", "Activity", "PC1",     "PC2")
cors<-c(0.06,            -0.003,        -0.04, -0.001,   -0.003,      0.004)
upper<-c(0.10,          0.01,       0.002, 0.02,        0.02,       0.02)
lower<-c(0.03,          -0.01,      -0.08, -0.02,       -0.01,  -0.02)
data<- data.frame(phen1, phen2, cors, upper, lower)

> data
       phen1      phen2   cors upper lower
1   Activity Aggression  0.060 0.100  0.03
2 Aggression        PC1 -0.003 0.010 -0.01
3        PC1        PC2 -0.040 0.002 -0.08
4        PC2   Activity -0.001 0.020 -0.02
5   Activity        PC1 -0.003 0.020 -0.01
6 Aggression        PC2  0.004 0.020 -0.02

Convert this to a matrix:

corrdata<-data %>%
 select(-c(upper, lower)) %>% #exclude the CIs here because they cause problems when using spread()
 spread(phen1, cors) %>%
 rename(phen = "phen2") %>%
 bind_rows(data %>%
         select(-c(upper, lower)) %>%
            spread(phen2, cors) %>%
            rename(phen = "phen1")) %>%
 group_by(phen) %>%
 summarise_all(~ ifelse(all(is.na(.)), 1, first(na.omit(.))))

> corrdata
# A tibble: 4 x 5
  phen       Activity Aggression    PC1    PC2
  <fct>         <dbl>      <dbl>  <dbl>  <dbl>
1 Activity      1          0.06  -0.003 -0.001
2 Aggression    0.06       1     -0.003  0.004
3 PC1          -0.003     -0.003  1     -0.04 
4 PC2          -0.001      0.004 -0.04   1    

Create a function to extract lower half of correlation matrix:

get_lower_tri<-function(corrdata){
  corrdata[upper.tri(corrdata)] <- NA
  return(corrdata)
}

lower_tri <- get_lower_tri(corrdata)
melted_corr <- melt(lower_tri, na.rm = TRUE)

Plot the data:

ggplot(data = melted_corr, aes(x=phen, y=variable, fill=value)) + 
  geom_tile(color = "white")+
#add a colour gradient to specify which values are larger
  scale_fill_gradient2(low = "gray40", high = "gray40", mid = "white", 
                       midpoint = 0, limit = c(-0.10,0.10), 
                       name="Robust\ncorrelation") + 
  theme_minimal()+ 
  coord_fixed()+
  scale_y_discrete(position = "right")+
  geom_text(aes(phen, variable, label = value), color = "black", size = 7) +
  labs(y="", x="")+
  theme(axis.line = element_line(colour = "black"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          panel.background = element_blank(),
          axis.text=element_text(size = 15), #changes size of axes #s 
          axis.title=element_text(size= 15), #changes size of axes labels 
          text = element_text(size = 17), 
          legend.position = c(0.15,0.8), #move legend into plot
          legend.title=element_blank())+
#add CI values manually
  annotate("text", x = 1, y = 0.75, label = "(0.03, 0.10)", size = 5)+
  annotate("text", x = 2, y = 0.75, label = "(-0.01, 0.02)", size = 5)+
  annotate("text", x = 2, y = 1.75, label = "(-0.01, 0.01)", size = 5)+
  annotate("text", x = 3, y = 0.75, label = "(-0.02, 0.02)", size = 5)+
  annotate("text", x = 3, y = 1.75, label = "(-0.02, 0.02)", size = 5)+
  annotate("text", x = 3, y = 2.75, label = "(-0.08, 0.002)", size = 5)+
#add symbols to specify significance manually
  annotate("text", x = 1.2, y = 1, label = "*", size = 7)+
  annotate("text", x = 3.22, y = 3, label = "*", size = 7)

This gives me what I need, but it isn't a very elegant solution and involves a lot of annotate().

enter image description here

Does anyone have suggestions for how I could plot my dataframe with upper and lower confidence intervals as a correlation matrix in ggplot()?

2

2 Answers

4
votes

I think you can reshape your original data differently because it is leaving you too much work to do at the plotting stage. Instead of all the spreading and melting, you can do:

# Make a copy of data but with the first two columns switched
data2 <- data[c(2:1, 3:5)]
names(data2) <- names(data)

# Stick the two data frames together. 
bigdata <- rbind(data, data2)

# Create the confidence intervals using paste
bigdata$CI <- paste0("(", bigdata$lower, ", ", bigdata$upper, ")")

# Since bigdata contains each possible pair apart from diagonals,
# we can get just the lower triangle by selecting only those
# entries where the factor level in column 2 is lower than the
# factor level in column 3:
bigdata <- bigdata[which(as.numeric(as.factor(bigdata$phen2)) <
                         as.numeric(as.factor(bigdata$phen1))),]

Which simplifies your plot to:

library(ggplot2)

ggplot(data = bigdata, aes(x = phen1, y = phen2, fill = cors)) + 
  geom_tile(color = "white")+
  geom_text(aes(label = cors), size = 7, position = position_nudge(y = 0.1)) +
  geom_text(aes(label = CI), size = 5, position = position_nudge(y = -0.1)) +
  scale_fill_gradient2(low = "gray40", high = "gray40", mid = "white", 
                       midpoint = 0, limit = c(-0.10,0.10), name = "") +
  scale_y_discrete(position = "right", name = "") +
  labs(x = "") +
  coord_fixed() +
  theme_classic() + 
  theme(axis.text       = element_text(size = 15),
        axis.title      = element_text(size = 15), 
        text            = element_text(size = 17), 
        legend.position = c(0.15 ,0.8))

Created on 2020-11-13 by the reprex package (v0.3.0)

3
votes

Perhaps you could add the confidence interval strings to the melted_corr data object and use them in a second geom_text line while also adjusting the vertical position of the ci strings with the vjust aesthetic?

melted_corr$ci <- c("(0.03, 0.10)","(-0.01, 0.02)","(-0.02, 0.02)","(-0.01, 0.01)","(-0.02, 0.02)", "(-0.08, 0.002)")

ggplot(data = melted_corr, aes(x=phen, y=variable, fill=value)) + 
  geom_tile(color = "white")+
  #add a colour gradient to specify which values are larger
  scale_fill_gradient2(low = "gray40", high = "gray40", mid = "white", 
                       midpoint = 0, limit = c(-0.10,0.10), 
                       name="Robust\ncorrelation") + 
  theme_minimal()+ 
  coord_fixed()+
  scale_y_discrete(position = "right")+
  geom_text(aes(phen, variable, label = value), color = "black", size = 7) +
  geom_text(aes(phen, variable, label = ci), color = "black", size = 5,
            vjust = 2.5) + # ci labels added here
  labs(y="", x="")+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        axis.text=element_text(size = 15), #changes size of axes #s 
        axis.title=element_text(size= 15), #changes size of axes labels 
        text = element_text(size = 17), 
        legend.position = c(0.15,0.8), #move legend into plot
        legend.title=element_blank())+
  #add symbols to specify significance manually
  annotate("text", x = 1.2, y = 1, label = "*", size = 7)+
  annotate("text", x = 3.22, y = 3, label = "*", size = 7)

output2