if (FALSE) {
### Getting the hc object
iris_dist <- iris[, -5] %>% dist()
hc <- iris_dist %>% hclust()
# This is how it looks without any colors:
dend <- as.dendrogram(hc)
plot(dend)
# Both functions give the same outcome
# options 1:
dend %>%
set("branches_k_color", k = 4) %>%
plot()
# options 2:
clusters <- cutree(dend, 4)[order.dendrogram(dend)]
dend %>%
branches_attr_by_clusters(clusters) %>%
plot()
# and the second option is much slower:
system.time(set(dend, "branches_k_color", k = 4)) # 0.26 sec
system.time(branches_attr_by_clusters(dend, clusters)) # 1.61 sec
# BUT, it also allows us to do more flaxible things!
#--------------------------
# Plotting dynamicTreeCut
#--------------------------
# let's get the clusters
library(dynamicTreeCut)
clusters <- cutreeDynamic(hc, distM = as.matrix(iris_dist))
# we need to sort them to the order of the dendrogram:
clusters <- clusters[order.dendrogram(dend)]
# get some functions:
library(colorspace)
no0_unique <- function(x) {
u_x <- unique(x)
u_x[u_x != 0]
}
clusters_numbers <- no0_unique(clusters)
n_clusters <- length(clusters_numbers)
cols <- rainbow_hcl(n_clusters)
dend2 <- branches_attr_by_clusters(dend, clusters, values = cols)
# dend2 <- branches_attr_by_clusters(dend, clusters)
plot(dend2)
# add colored bars:
ord_cols <- rainbow_hcl(n_clusters)[order(clusters_numbers)]
tmp_cols <- rep(1, length(clusters))
tmp_cols[clusters != 0] <- ord_cols[clusters != 0][clusters]
colored_bars(tmp_cols, y_shift = -1.1, rowLabels = "")
# all of the ordering is to handle the fact that the cluster numbers are not ascending...
# How is this compared with the usual cutree?
dend3 <- color_branches(dend, k = n_clusters)
labels(dend2) <- as.character(labels(dend2))
# this needs fixing, since the labels are not character!
# Well, both cluster solutions are not perfect, but at least they are interesting...
tanglegram(dend2, dend3,
main_left = "cutreeDynamic", main_right = "cutree",
columns_width = c(5, .5, 5),
color_lines = cols[iris[order.dendrogram(dend2), 5]]
)
# (Notice how the color_lines is of the true Species of each Iris)
# The main difference is at the bottom,
}
Run the code above in your browser using DataLab