oldpar <- par(no.readonly = TRUE)
par(mar = rep(7, 4))
## Example with 3 clusters
# Data simulation
set.seed(1)
simul <- SimulateClustering(
n = c(10, 30, 15),
nu_xc = 1,
ev_xc = 0.5
)
print(simul)
plot(simul)
# Checking the proportion of explained variance
x <- simul$data[, 1]
z <- as.factor(simul$theta)
summary(lm(x ~ z)) # R-squared
## Example with 2 variables contributing to clustering
# Data simulation
set.seed(1)
simul <- SimulateClustering(
n = c(20, 10, 15), pk = 10,
theta_xc = c(1, 1, rep(0, 8)),
ev_xc = 0.8
)
print(simul)
plot(simul)
# Visualisation of the data
Heatmap(
mat = simul$data,
col = c("navy", "white", "red")
)
simul$ev # marginal proportions of explained variance
# Visualisation along contributing variables
plot(simul$data[, 1:2], col = simul$theta, pch = 19)
## Example with different levels of separation
# Data simulation
set.seed(1)
simul <- SimulateClustering(
n = c(20, 10, 15), pk = 10,
theta_xc = c(1, 1, rep(0, 8)),
ev_xc = c(0.99, 0.5, rep(0, 8))
)
# Visualisation along contributing variables
plot(simul$data[, 1:2], col = simul$theta, pch = 19)
## Example with correlated contributors
# Data simulation
pk <- 10
adjacency <- matrix(0, pk, pk)
adjacency[1, 2] <- adjacency[2, 1] <- 1
set.seed(1)
sigma <- SimulateCorrelation(
pk = pk,
theta = adjacency,
pd_strategy = "min_eigenvalue",
v_within = 0.6, v_sign = -1
)$sigma
simul <- SimulateClustering(
n = c(200, 100, 150), pk = pk, sigma = sigma,
theta_xc = c(1, 1, rep(0, 8)),
ev_xc = c(0.9, 0.8, rep(0, 8))
)
# Visualisation along contributing variables
plot(simul$data[, 1:2], col = simul$theta, pch = 19)
# Checking marginal proportions of explained variance
mymodel <- lm(simul$data[, 1] ~ as.factor(simul$theta))
summary(mymodel)$r.squared
mymodel <- lm(simul$data[, 2] ~ as.factor(simul$theta))
summary(mymodel)$r.squared
par(oldpar)
Run the code above in your browser using DataLab