if (FALSE) {
set.seed(23235)
ss <- TRUE # sample(1:150, 10 )
hc1 <- hclust(dist(iris[ss, -5]), "com")
hc2 <- hclust(dist(iris[ss, -5]), "single")
# dend1 <- as.dendrogram(hc1)
# dend2 <- as.dendrogram(hc2)
# cutree(dend1)
FM_index(cutree(hc1, k = 3), cutree(hc1, k = 3)) # 1 with EV
# checking speed gains
library(microbenchmark)
microbenchmark(
FM_index(cutree(hc1, k = 3), cutree(hc1, k = 3)),
FM_index(cutree(hc1, k = 3), cutree(hc1, k = 3),
assume_sorted_vectors = TRUE
),
FM_index(cutree(hc1, k = 3), cutree(hc1, k = 3),
assume_sorted_vectors = TRUE
)
)
# C code is 1.2-1.3 times faster.
set.seed(1341)
FM_index(cutree(hc1, k = 3), sample(cutree(hc1, k = 3)),
assume_sorted_vectors = TRUE
) # 0.38037
FM_index(cutree(hc1, k = 3), sample(cutree(hc1, k = 3)),
assume_sorted_vectors = FALSE
) # 1 again :)
FM_index(cutree(hc1, k = 3), cutree(hc2, k = 3)) # 0.8059
FM_index(cutree(hc1, k = 30), cutree(hc2, k = 30)) # 0.4529
fo <- function(k) FM_index(cutree(hc1, k), cutree(hc2, k))
lapply(1:4, fo)
ks <- 1:150
plot(sapply(ks, fo) ~ ks, type = "b", main = "Bk plot for the iris dataset")
}
Run the code above in your browser using DataLab