if (FALSE) {
set.seed(23235)
ss <- sample(1:150, 10)
hc1 <- iris[ss, -5] %>%
dist() %>%
hclust("com")
hc2 <- iris[ss, -5] %>%
dist() %>%
hclust("single")
dend1 <- as.dendrogram(hc1)
dend2 <- as.dendrogram(hc2)
# cutree(dend1)
cophenetic(hc1)
cophenetic(hc2)
# notice how the dist matrix for the dendrograms have different orders:
cophenetic(dend1)
cophenetic(dend2)
cor(cophenetic(hc1), cophenetic(hc2)) # 0.874
cor(cophenetic(dend1), cophenetic(dend2)) # 0.16
# the difference is becasue the order of the distance table in the case of
# stats:::cophenetic.dendrogram will change between dendrograms!
# however, this is consistant (since I force-sort the rows/columns):
cor_cophenetic(hc1, hc2)
cor_cophenetic(dend1, dend2)
cor_cophenetic(dendlist(dend1, dend2))
# we can also use different cor methods (almost the same result though):
cor_cophenetic(hc1, hc2, method = "spearman") # 0.8456014
cor_cophenetic(dend1, dend2, method = "spearman") #
# cophenetic correlation is about 10 times (!) faster than bakers_gamma cor:
library(microbenchmark)
microbenchmark(
cor_bakers_gamma = cor_bakers_gamma(dend1, dend2, try_cutree_hclust = FALSE),
cor_cophenetic = cor_cophenetic(dend1, dend2),
times = 10
)
# but only because of the cutree for dendrogram. When allowing hclust cutree
# it is only about twice as fast:
microbenchmark(
cor_bakers_gamma = cor_bakers_gamma(dend1, dend2, try_cutree_hclust = TRUE),
cor_cophenetic = cor_cophenetic(dend1, dend2),
times = 10
)
}
Run the code above in your browser using DataLab