6
votes

I want to modify the properties of the leaves in a dendrogram produced from plot of an hclust object. Minimally, I want to change the colors, but any help you can provide will be appreciated.

I did try to google the answer, but but every solution that I saw seemed alot harder than what I would have guessed.

3

3 Answers

16
votes

A while ago, Joris Meys kindly provided me with this snippet of code that changes the color of leaves. Modify it to reflect your attributes.

clusDendro <- as.dendrogram(Clustering)
labelColors <- c("red", "blue", "darkgreen", "darkgrey", "purple")

## function to get colorlabels
colLab <- function(n) {
   if(is.leaf(n)) {
       a <- attributes(n)
       # clusMember - a vector designating leaf grouping
       # labelColors - a vector of colors for the above grouping
       labCol <- labelColors[clusMember[which(names(clusMember) == a$label)]]
       attr(n, "nodePar") <- c(a$nodePar, lab.col = labCol)
   }
   n
}

## Graph
clusDendro <- dendrapply(clusDendro, colLab)
op <- par(mar = par("mar") + c(0,0,0,2))
plot(clusDendro,
     main = "Major title",
     horiz = T, type = "triangle", center = T)

par(op)
2
votes

Here is a solution for this question using a new package called "dendextend", built exactly for this sort of thing.

You can see many examples in the presentations and vignettes of the package, in the "usage" section in the following URL: https://github.com/talgalili/dendextend

Here is the solution for this question:

# define dendrogram object to play with:
dend <- as.dendrogram(hclust(dist(USArrests[1:3,]), "ave"))
# loading the package
install.packages('dendextend') # it is now on CRAN
library(dendextend)# let's add some color:
labels_colors(dend) <- 2:4
labels_colors(dend)
plot(dend)

enter image description here

1
votes

It is not clear what you want to use it for, but I often need to identify a branch in a dendrogram. I've hacked the rect.hclust method to add a density and label input.

You would call it like this:


k <- 3 # number of branches to identify
labels.to.identify <- c('1','2','3')
required.density <- 10 # the density of shading lines, in lines per inch 
rect.hclust.nice(tree, k, labels=labels.to.identify, density=density.required)

Here is the function



rect.hclust.nice = function (tree, k = NULL, which = NULL, x = NULL, h = NULL, border = 2, 
    cluster = NULL,  density = NULL,labels = NULL, ...) 
{
    if (length(h) > 1 | length(k) > 1) 
        stop("'k' and 'h' must be a scalar")
    if (!is.null(h)) {
        if (!is.null(k)) 
            stop("specify exactly one of 'k' and 'h'")
        k <- min(which(rev(tree$height) < h))
        k <- max(k, 2)
    }
    else if (is.null(k)) 
        stop("specify exactly one of 'k' and 'h'")
    if (k < 2 | k > length(tree$height)) 
        stop(gettextf("k must be between 2 and %d", length(tree$height)), 
            domain = NA)
    if (is.null(cluster)) 
        cluster <- cutree(tree, k = k)
    clustab <- table(cluster)[unique(cluster[tree$order])]
    m <- c(0, cumsum(clustab))
    if (!is.null(x)) {
        if (!is.null(which)) 
            stop("specify exactly one of 'which' and 'x'")
        which <- x
        for (n in 1L:length(x)) which[n] <- max(which(m < x[n]))
    }
    else if (is.null(which)) 
        which <- 1L:k
    if (any(which > k)) 
        stop(gettextf("all elements of 'which' must be between 1 and %d", 
            k), domain = NA)
    border <- rep(border, length.out = length(which))
    labels <- rep(labels, length.out = length(which))
    retval <- list()
    for (n in 1L:length(which)) {
        rect(m[which[n]] + 0.66, par("usr")[3L], m[which[n] + 
            1] + 0.33, mean(rev(tree$height)[(k - 1):k]), border = border[n], col = border[n], density = density, ...)
        text((m[which[n]] + m[which[n] + 1]+1)/2, grconvertY(grconvertY(par("usr")[3L],"user","ndc")+0.02,"ndc","user"),labels[n])
        retval[[n]] <- which(cluster == as.integer(names(clustab)[which[n]]))
    }
    invisible(retval)
}