0
votes

I'm making a heat map using geom_tile(). I would like to order y-axis(sp) based on clustering (the actual data has about 200 sp records).

 sp <- c("sp1","sp1","sp1","sp2","sp2","sp2","sp3","sp3","sp3","sp4","sp4","sp4","sp5","sp5","sp5")
 category <- c("a","b","c","a","b","c","a","b","c","a","b","c","a","b","c")
 count <- c(1,2,1,1,4,2,3,1,3,1,4,5,2,5,1)
 d <- data.frame(cbind(sp, category, count))
 
 t <- d %>%
    ggplot(aes(category, sp))+
    geom_tile(aes(fill = as.numeric(count)))+
         scale_fill_gradient(low = "white", high = "red")

 plot(t)

enter image description here

1

1 Answers

2
votes

Here is an example using the classic hclust approach:

library(ggplot2)

sp <- c("sp1","sp1","sp1","sp2","sp2","sp2","sp3","sp3","sp3","sp4","sp4","sp4","sp5","sp5","sp5")
category <- c("a","b","c","a","b","c","a","b","c","a","b","c","a","b","c")
count <- c(1,2,1,1,4,2,3,1,3,1,4,5,2,5,1)
d <- data.frame(cbind.data.frame(sp, category, count))

# Reshape data as matrix
m <- tidyr::pivot_wider(d, names_from = "sp", values_from = "count")
m <- as.matrix(m[, -1]) # -1 to omit categories from matrix

# Cluster based on euclidean distance
clust <- hclust(dist(t(m)))

# Set explicit y-axis limits
ggplot(d, aes(category, sp))+
  geom_tile(aes(fill = as.numeric(count)))+
  scale_fill_gradient(low = "white", high = "red") +
  scale_y_discrete(limits = colnames(m)[clust$order])

Created on 2021-06-24 by the reprex package (v1.0.0)