if (FALSE) {
set.seed(23235)
ss <- TRUE # sample(1:150, 10 )
hc1 <- hclust(dist(iris[ss, -5]), "com")
hc2 <- hclust(dist(iris[ss, -5]), "single")
# dend1 <- as.dendrogram(hc1)
# dend2 <- as.dendrogram(hc2)
# cutree(dend1)
# small k
A1_clusters <- cutree(hc1, k = 3) # will give a right tailed distribution
# large k
A1_clusters <- cutree(hc1, k = 50) # will give a discrete distribution
# "medium" k
A1_clusters <- cutree(hc1, k = 25) # gives almost the normal distribution!
A2_clusters <- A1_clusters
R <- 10000
set.seed(414130)
FM_index_H0 <- replicate(R, FM_index_permutation(A1_clusters, A2_clusters)) # can take 10 sec
plot(density(FM_index_H0), main = "FM Index distribution under H0\n (10000 permutation)")
abline(v = mean(FM_index_H0), col = 1, lty = 2)
# The permutation distribution is with a heavy right tail:
# Source of the skew functions is based on: library(psych)
skew <- function (x, na.rm = TRUE) {
x <- na.omit(x)
sum((x - mean(x))^3)/(length(x) * sd(x)^3)
}
skew(FM_index_H0) # 1.254
mean(FM_index_H0)
var(FM_index_H0)
the_FM_index <- FM_index(A1_clusters, A2_clusters)
the_FM_index
our_dnorm <- function(x) {
dnorm(x,
mean = attr(the_FM_index, "E_FM"),
sd = sqrt(attr(the_FM_index, "V_FM"))
)
}
# our_dnorm(0.35)
curve(our_dnorm,
col = 4,
from = -1, to = 1, n = R, add = TRUE
)
abline(v = attr(the_FM_index, "E_FM"), col = 4, lty = 2)
legend("topright", legend = c("asymptotic", "permutation"), fill = c(4, 1))
}
Run the code above in your browser using DataLab