# Example 1
library(clusterSim)
data(data_ratio)
cl1<-pam(data_ratio,4)
cl2<-pam(data_ratio,5)
clall<-cbind(cl1$clustering,cl2$clustering)
g<-index.Gap(data_ratio, clall, reference.distribution="unif", B=10,
method="pam")
print(g)
# Example 2
library(clusterSim)
means <- matrix(c(0,2,4,0,3,6), 3, 2)
cov <- matrix(c(1,-0.9,-0.9,1), 2, 2)
x <- cluster.Gen(numObjects=40, means=means, cov=cov, model=2)
x <- x$data
md <- dist(x, method="euclidean")^2
# nc - number_of_clusters
min_nc=1
max_nc=5
min <- 0
clopt <- NULL
res <- array(0, c(max_nc-min_nc+1, 2))
res[,1] <- min_nc:max_nc
found <- FALSE
for (nc in min_nc:max_nc){
cl1 <- pam(md, nc, diss=TRUE)
cl2 <- pam(md, nc+1, diss=TRUE)
clall <- cbind(cl1$clustering, cl2$clustering)
gap <- index.Gap(x,clall,B=20,method="pam",centrotypes="centroids")
res[nc-min_nc+1, 2] <- diffu <- gap$diffu
if ((res[nc-min_nc+1, 2] >=0) && (!found)){
nc1 <- nc
min <- diffu
clopt <- cl1$cluster
found <- TRUE
}
}
if (found){
print(paste("Minimal nc where diffu>=0 is",nc1,"for diffu=",round(min,4)),quote=FALSE)
}else{
print("I have not found clustering with diffu>=0", quote=FALSE)
}
plot(res,type="p",pch=0,xlab="Number of clusters",ylab="diffu",xaxt="n")
abline(h=0, untf=FALSE)
axis(1, c(min_nc:max_nc))
# Example 3
library(clusterSim)
means <- matrix(c(0,2,4,0,3,6), 3, 2)
cov <- matrix(c(1,-0.9,-0.9,1), 2, 2)
x <- cluster.Gen(numObjects=40, means=means, cov=cov, model=2)
x <- x$data
md <- dist(x, method="euclidean")^2
# nc - number_of_clusters
min_nc=1
max_nc=5
min <- 0
clopt <- NULL
res <- array(0, c(max_nc-min_nc+1, 2))
res[,1] <- min_nc:max_nc
found <- FALSE
for (nc in min_nc:max_nc){
cl1 <- pam(md, nc, diss=TRUE)
cl2 <- pam(md, nc+1, diss=TRUE)
clall <- cbind(cl1$clustering, cl2$clustering)
gap <- index.Gap(x,clall,B=20,method="pam",d=md,centrotypes="medoids")
res[nc-min_nc+1, 2] <- diffu <- gap$diffu
if ((res[nc-min_nc+1, 2] >=0) && (!found)){
nc1 <- nc
min <- diffu
clopt <- cl1$cluster
found <- TRUE
}
}
if (found){
print(paste("Minimal nc where diffu>=0 is",nc1,"for diffu=",round(min,4)),quote=FALSE)
}else{
print("I have not found clustering with diffu>=0",quote=FALSE)
}
plot(res, type="p", pch=0, xlab="Number of clusters", ylab="diffu", xaxt="n")
abline(h=0, untf=FALSE)
axis(1, c(min_nc:max_nc))
Run the code above in your browser using DataLab