rm(list=ls())
#Number of data
n <- 2000
set.seed(123)
#set.seed(4321)
d <- 2
ncl <- 4
# Sample data
sdev <- array(dim=c(d,d,ncl))
xi <- matrix(nrow=d, ncol=ncl, c(-1.5, 1, 1.5, 1, 1.5, -2, -2, -2))
psi <- matrix(nrow=d, ncol=4, c(0.4, -0.6, 0.8, 0, 0.3, -0.7, -0.3, -0.8))
p <- c(0.2, 0.1, 0.4, 0.3) # frequence des clusters
sdev[, ,1] <- matrix(nrow=d, ncol=d, c(0.3, 0, 0, 0.3))
sdev[, ,2] <- matrix(nrow=d, ncol=d, c(0.1, 0, 0, 0.3))
sdev[, ,3] <- matrix(nrow=d, ncol=d, c(0.3, 0.15, 0.15, 0.3))
sdev[, ,4] <- .3*diag(2)
c <- rep(0,n)
z <- matrix(0, nrow=d, ncol=n)
for(k in 1:n){
c[k] = which(rmultinom(n=1, size=1, prob=p)!=0)
z[,k] <- (xi[, c[k]]
+ psi[, c[k]]*abs(rnorm(1))
+ sdev[, , c[k]]%*%matrix(rnorm(d, mean = 0, sd = 1), nrow=d, ncol=1))
#cat(k, "/", n, " observations simulated\n", sep="")
}
# Set parameters of G0
hyperG0 <- list()
hyperG0[["b_xi"]] <- rep(0,d)
hyperG0[["b_psi"]] <- rep(0,d)
hyperG0[["kappa"]] <- 0.001
hyperG0[["D_xi"]] <- 100
hyperG0[["D_psi"]] <- 100
hyperG0[["nu"]] <- d + 1
hyperG0[["lambda"]] <- diag(d)
# hyperprior on the Scale parameter of DPM
a <- 0.0001
b <- 0.0001
# do some plots
doPlot <- TRUE
nbclust_init <- 30
## Data
########
library(ggplot2)
p <- (ggplot(data.frame("X"=z[1,], "Y"=z[2,]), aes(x=X, y=Y))
+ geom_point()
+ ggtitle("Simple example in 2d data")
+xlab("D1")
+ylab("D2")
+theme_bw())
p
c2plot <- factor(c)
levels(c2plot) <- c("3", "2", "4", "1")
pp <- (ggplot(data.frame("X"=z[1,], "Y"=z[2,], "Cluster"=as.character(c2plot)))
+ geom_point(aes(x=X, y=Y, colour=Cluster, fill=Cluster))
+ ggtitle("Slightly overlapping skew-normal simulation\n")
+ xlab("D1")
+ ylab("D2")
+ theme_bw()
+ scale_colour_discrete(guide=guide_legend(override.aes = list(size = 6, shape=22))))
pp
## alpha priors plots
#####################
prioralpha <- data.frame("alpha"=rgamma(n=5000, shape=a, scale=1/b),
"distribution" =factor(rep("prior",5000),
levels=c("prior", "posterior")))
p <- (ggplot(prioralpha, aes(x=alpha))
+ geom_histogram(aes(y=..density..),
colour="black", fill="white")
+ geom_density(alpha=.2, fill="red")
+ ggtitle(paste("Prior distribution on alpha: Gamma(", a,
",", b, ")\n", sep=""))
)
p
if(interactive()){
# Gibbs sampler for Dirichlet Process Mixtures
##############################################
MCMCsample_st <- DPMGibbsSkewT(z, hyperG0, a, b, N=2000,
doPlot, nbclust_init, plotevery=100, gg.add=list(theme_bw(),
guides(shape=guide_legend(override.aes = list(fill="grey45")))),
diagVar=FALSE)
s <- summary(MCMCsample_st, burnin = 350)
print(s)
plot(s)
plot_ConvDPM(MCMCsample_st, from=2)
cluster_est_binder(MCMCsample_st$mcmc_partitions[1500:2000])
}
Run the code above in your browser using DataLab