if (FALSE) {
hc <- hclust(dist(USArrests[c(1, 6, 13, 20, 23), ]), "ave")
dend <- as.dendrogram(hc)
unbranch_dend <- unbranch(dend, 2)
cutree(hc, k = 2:4) # on hclust
cutree(dend, k = 2:4) # on dendrogram
cutree(hc, k = 2) # on hclust
cutree(dend, k = 2) # on dendrogram
cutree(dend, h = c(20, 25.5, 50, 170))
cutree(hc, h = c(20, 25.5, 50, 170))
# the default (ordered by original data's order)
cutree(dend, k = 2:3, order_clusters_as_data = FALSE)
labels(dend)
# as.hclust(unbranch_dend) # ERROR - can not do this...
cutree(unbranch_dend, k = 2) # all NA's
cutree(unbranch_dend, k = 1:4)
cutree(unbranch_dend, h = c(20, 25.5, 50, 170))
cutree(dend, h = c(20, 25.5, 50, 170))
library(microbenchmark)
## this shows how as.hclust is expensive - but still worth it if possible
microbenchmark(
cutree(hc, k = 2:4),
cutree(as.hclust(dend), k = 2:4),
cutree(dend, k = 2:4),
cutree(dend, k = 2:4, try_cutree_hclust = FALSE)
)
# the dendrogram is MUCH slower...
# Unit: microseconds
## expr min lq median uq max neval
## cutree(hc, k = 2:4) 91.270 96.589 99.3885 107.5075 338.758 100
## tree(as.hclust(dend),
## k = 2:4) 1701.629 1767.700 1854.4895 2029.1875 8736.591 100
## cutree(dend, k = 2:4) 1807.456 1869.887 1963.3960 2125.2155 5579.705 100
## cutree(dend, k = 2:4,
## try_cutree_hclust = FALSE) 8393.914 8570.852 8755.3490 9686.7930 14194.790 100
# and trying to "hclust" is not expensive (which is nice...)
microbenchmark(
cutree_unbranch_dend = cutree(unbranch_dend, k = 2:4),
cutree_unbranch_dend_not_trying_to_hclust =
cutree(unbranch_dend, k = 2:4, try_cutree_hclust = FALSE)
)
## Unit: milliseconds
## expr min lq median uq max neval
## cutree_unbranch_dend 7.309329 7.428314 7.494107 7.752234 17.59581 100
## cutree_unbranch_dend_not
## _trying_to_hclust 6.945375 7.079198 7.148629 7.577536 16.99780 100
## There were 50 or more warnings (use warnings() to see the first 50)
# notice that if cutree can't find clusters for the desired k/h, it will produce 0's instead!
# (It will produce a warning though...)
# This is a different behaviout than stats::cutree
# For example:
cutree(as.dendrogram(hclust(dist(c(1, 1, 1, 2, 2)))),
k = 5
)
}
Run the code above in your browser using DataLab