# \donttest{
## ------------------------------------------------------------
## mtcars example
## ------------------------------------------------------------
## default SID method
o1 <- sidClustering(mtcars)
print(split(mtcars, o1$cl[, 10]))
## using artifical class approach
o1.sh <- sidClustering(mtcars, method = "sh")
print(split(mtcars, o1.sh$cl[, 10]))
## ------------------------------------------------------------
## glass data set
## ------------------------------------------------------------
if (library("mlbench", logical.return = TRUE)) {
## this is a supervised problem, so we first strip the class label
data(Glass)
glass <- Glass
y <- Glass$Type
glass$Type <- NULL
## default SID call
o2 <- sidClustering(glass, k = 6)
print(table(y, o2$cl))
print(sid.perf.metric(y, o2$cl))
## compare with Shi-Horvath mode 1
o2.sh <- sidClustering(glass, method = "sh1", k = 6)
print(table(y, o2.sh$cl))
print(sid.perf.metric(y, o2.sh$cl))
## plain-vanilla unsupervised analysis
o2.un <- sidClustering(glass, method = "unsupv", k = 6)
print(table(y, o2.un$cl))
print(sid.perf.metric(y, o2.un$cl))
}
## ------------------------------------------------------------
## vowel data set
## ------------------------------------------------------------
if (library("mlbench", logical.return = TRUE) &&
library("cluster", logical.return = TRUE)) {
## strip the class label
data(Vowel)
vowel <- Vowel
y <- Vowel$Class
vowel$Class <- NULL
## SID
o3 <- sidClustering(vowel, k = 11)
print(table(y, o3$cl))
print(sid.perf.metric(y, o3$cl))
## compare to Shi-Horvath which performs poorly in
## mixed variable settings
o3.sh <- sidClustering(vowel, method = "sh1", k = 11)
print(table(y, o3.sh$cl))
print(sid.perf.metric(y, o3.sh$cl))
## Shi-Horvath improves with PAM clustering
## but still not as good as SID
o3.sh.pam <- pam(o3.sh$dist, k = 11)$clustering
print(table(y, o3.sh.pam))
print(sid.perf.metric(y, o3.sh.pam))
## plain-vanilla unsupervised analysis
o3.un <- sidClustering(vowel, method = "unsupv", k = 11)
print(table(y, o3.un$cl))
print(sid.perf.metric(y, o3.un$cl))
}
## ------------------------------------------------------------
## two-d V-shaped cluster (y=x, y=-x) sitting in 12-dimensions
## illustrates superiority of SID to Breiman/Shi-Horvath
## ------------------------------------------------------------
p <- 10
m <- 250
n <- 2 * m
std <- .2
x <- runif(n, 0, 1)
noise <- matrix(runif(n * p, 0, 1), n)
y <- rep(NA, n)
y[1:m] <- x[1:m] + rnorm(m, sd = std)
y[(m+1):n] <- -x[(m+1):n] + rnorm(m, sd = std)
vclus <- data.frame(clus = c(rep(1, m), rep(2,m)), x = x, y = y, noise)
## SID
o4 <- sidClustering(vclus[, -1], k = 2)
print(table(vclus[, 1], o4$cl))
print(sid.perf.metric(vclus[, 1], o4$cl))
## Shi-Horvath
o4.sh <- sidClustering(vclus[, -1], method = "sh1", k = 2)
print(table(vclus[, 1], o4.sh$cl))
print(sid.perf.metric(vclus[, 1], o4.sh$cl))
## plain-vanilla unsupervised analysis
o4.un <- sidClustering(vclus[, -1], method = "unsupv", k = 2)
print(table(vclus[, 1], o4.un$cl))
print(sid.perf.metric(vclus[, 1], o4.un$cl))
## ------------------------------------------------------------
## two-d V-shaped cluster using fast random forests
## ------------------------------------------------------------
o5 <- sidClustering(vclus[, -1], k = 2, fast = TRUE)
print(table(vclus[, 1], o5$cl))
print(sid.perf.metric(vclus[, 1], o5$cl))
# }
Run the code above in your browser using DataLab