# NOT RUN {
# simple simulated example
A <- arsim(2000, c(11,13),3,0.3)
fluctile(A)
fluctile(optile(A))
fluctile(optile(A, iter = 100))
fluctile(optile(A, fun = "CA"))
fluctile(optile(A, fun = "barysort", foreign = ".Call"))
# simulated mv example
A2 <- arsim(20000, c(6,7,8,9),3,0.1)
scpcp(A2,sel="data[,1]")
scpcp(A3 <- optile(A2,iter=20),sel="data[,1]")
dev.new()
fluctile(A3)
# }
# NOT RUN {
############ ------------ EXAMPLE I ------------ ############
# ----- Cluster results from the Current Population Survey ----- #
data(CPScluster)
cpsX = subtable(CPScluster,c(5, 26, 34, 38, 39), allfactor=TRUE)
# joint and stepwise optimization of BCC
ss <- optile(cpsX,presort=TRUE, return.data=TRUE, method="joint")
ss2 <- optile(cpsX,presort=TRUE, return.data=TRUE, method="sw")
# original cpcp plot
cpcp(cpsX)
# cpcp for joint algorithm
cpcp(ss)
# cpcp and fluctuation for the stepwise algorithm
# (should be good for pcp plots and hierarchical plots)
fluctile(xtabs(Freq~.,data=ss2[,-4]))
cpcp(ss2)
# The multivariate algorithm
ss3 <- optile(cpsX,presort=TRUE, return.data=TRUE, method=NULL)
cpcp(ss3)
# cpcp for casort algorithm
ssca <- optile(cpsX,presort=FALSE, fun = "casort", return.data=TRUE, method="joint")
cpcp(ssca)
# cpcp for rmca algorithm results. works better for the dmc data
ssR <- optile(cpsX,presort=FALSE, fun = "rmca", return.data=TRUE, method=NULL)
cpcp(ssR)
# cpcp for csvd algorithm
ssC <- optile(cpsX,presort=FALSE, fun = "csvd", return.data=TRUE, method=NULL)
fluctile(xtabs(Freq~.,data=ssC[,-4]))
cpcp(ssC)
# cpcp for presort algorithm with 20 iterations
ssP <- optile(cpsX,presort=FALSE, fun = "IBCC",
return.data=TRUE, method=NULL, foreign = ".Call",iter=20)
cpcp(ssP)
############ ------------ EXAMPLE II ------------ ############
# ------------------------------- Italian wines ------------------------------ #
library(MMST)
data(wine)
swine <- scale(wine[,1:13])
kmd <- data.frame(wine$class, replicate(9, kmeans(swine, centers = 6)$cluster) )
kmd <- subtable(kmd, 1:10, allfactor = TRUE)
cpcp(kmd)
# there is a good joint order and hence the joint result is better than the stepwise
kmd2 <- optile(kmd, method = "sw")
kmd3 <- optile(kmd, method = "joint")
cpcp(kmd2)
cpcp(kmd3)
############ ------------ EXAMPLE III ------------ ############
# ---------------- The BicatYeast microarray dataset ---------------- #
# ----- with different clusterings for the genes ----- #
library(biclust)
data(BicatYeast)
Dby <- dist(BicatYeast)
hc1 <- hclust(Dby, method = "ward")
hc2 <- hclust(Dby, method = "average")
hc3 <- hclust(Dby, method = "complete")
hcc1 <- cutree(hc1, k = 6)
hcc2 <- cutree(hc2, k = 6)
hcc3 <- cutree(hc3, k = 6)
km1 <- kmeans(BicatYeast, centers = 6, nstart = 100, iter.max = 30)$cluster
library(mclust)
mc1 <- Mclust(BicatYeast, G = 6)$class
clusterings <- data.frame(hcc1,hcc2,hcc3,km1,mc1)
clusterings <- subtable(clusterings, 1:5, allfactor = TRUE)
clusterings2 <- optile(clusterings, method = "joint")
clusterings3 <- optile(clusterings, fun = "casort")
cpcp(clusterings2)
# a fluctuation diagram of all but the avg. clustering
fluctile(xtabs(Freq~.,data=clusterings2[,-2]))
# compute agreement via Fleiss kappa in irr:
require(irr)
rawdata <- untableSet(clusterings2)
for(i in 1:5) levels(rawdata[,i]) <- 1:6
(kappam.fleiss(rawdata))
(kappam.fleiss(rawdata[,-2]))
## Let's have a look at kmeans with 2:12 clusters
library(biclust)
data(BicatYeast)
cs <- NULL
for(i in 2:12) cs <- cbind(cs, kmeans(BicatYeast, centers=i,nstart=100)$cluster)
cs <- as.data.frame(cs)
names(cs) <- paste("V",2:12,sep="")
ocs <- optile(cs,method="joint")
cpcp(ocs,sort.individual=TRUE)
# and set alpha-blending, show.dots = TRUE
# and with hierarchical clusterings
cs2 <- NULL
library(amap)
hc <- hcluster(BicatYeast)
for(i in 2:20) cs2 <- cbind(cs2, subtree(hc,k=i)$data)
cs2 <- as.data.frame(cs2)
names(cs2) <- paste("V",2:20,sep="")
cpcp(cs2,sort.individual=TRUE)
# and set alpha-blending to about 0.6, show.dots = TRUE, then
ss <- iset()
ibar(ss$V6)
# and VIEW >> Set color (rainbow)
# Ideally the axes would be at a distance according to the heights of the cuts.
# e.g. for the first 12 clusters (after that there are some cuts at about the same height)
# the complete dendrogram doesn't look too attractive:
plot(hc)
# and plotting the top cuts only omits the information
# on how many cases are in each node or leaf
xcoords <- rev(tail(hc$height,11))
xcoords <- xcoords/max(hc$height)
ycoords <- data.matrix(ss[,20:30])
ycoords <- apply(ycoords,2,function(s){
y <- s - min(s)
y <- y/max(y)
return(y)
})
ycoords <- cbind(ycoords, as.integer(as.matrix(ss[,5])))
colv <- rainbow_hcl(6)
dev.new()
par(mfrow=c(1,2))
plot(1,pch="", xlim=c(0,1), ylim=c(min(xcoords)-0.007,1))
apply(ycoords,1,function(s){
points(x=s[-12], y=xcoords,)
points(x=s[-12],y=xcoords,pch=19, col = colv[s[12]])
lines(x=s[-12], y=xcoords, col = colv[s[12]])
})
hc$height <- hc$height/max(hc$height)
plclust(subtree(hc,12),hang=0.02)
############ ------------ EXAMPLE IV ------------ ############
# ------------------------- The Eisen Yeast data ------------------------- #
library(biclust)
data(EisenYeast)
SEY <- scale(EisenYeast)
Dby2 <- dist(SEY)
hc1 <- hclust(Dby2, method = "ward")
hc2 <- hclust(Dby2, method = "complete")
hcc1 <- cutree(hc1, k = 16)
km1 <- kmeans(scale(EisenYeast), centers = 16, nstart = 20, iter.max = 30)$cluster
optile( table(hcc1, km1) )
############ ------------ EXAMPLE V ------------ ############
# ------------------------- The Bicat Yeast data ------------------------- #
# how many clusters are a good choice for kmeans?
# one possible way to find out:
# compute kmeans for 100 random initial settings, sort the results (clusters)
# and compute their agreement
# e.g. via Fleiss' Kappa (available in package irr)
require(biclust)
data(BicatYeast)
require(irr)
st <- Sys.time()
fk <- NULL
for(k in 3:8){
test <- subtable(replicate(100,kmeans(BicatYeast, centers = k)$cluster),1:100)
test <- optile(test, fun = "casort")
test <- optile(test, method="joint")
test <- untableSet(test)
for(i in 1:100) levels(test[,i]) <- 1:k
fk <- c(fk,kappam.fleiss(test)$value)
}
Sys.time()-st
plot(x = 3:8, y = fk, type="l", lwd=2)
############ ------------ EXAMPLE VI ------------ ############
# ------------------------- hierarchical clustering ------------------------- #
# A list with hierarchical clustering objects:
require(amap)
hc1 <- hcluster(t(plants[,-1]), method="manhattan", link = "ward")
hc2 <- hcluster(t(plants[,-1]), method="manhattan", link = "complete")
hclist <- list(hc1, hc2)
tfluctile( optile(hclist, k= c(8,8) ) )
# or a table with corresponding tree objects:
tt <- table( subtree(hc1, 12)$data, subtree(hc2, 8)$data )
tfluctile(optile(tt, tree = list(hc1, hc2)))
# only one tree object, the other variable is free:
tt <- table( subtree(hc1, 8)$data, kk <- kmeans(t(plants[,-1]),centers=8)$cluster )
tfluctile(optile(tt, tree = list(hc1, NA)))
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab