# NOT RUN {
library("MASS") # generate synthetic multivariate normal data
set.seed(42)
k <- 100L # sample size of each class
n <- 3L # specify how many classes
N <- k * n # total sample size
x1 <- mvrnorm(k, mu = c(-16, 8), matrix(c(15, 1, 2, 10), ncol = 2))
x2 <- mvrnorm(k, mu = c(0, 0), matrix(c(15, 1, 2, 10), ncol = 2))
x3 <- mvrnorm(k, mu = c(16, -8), matrix(c(15, 1, 2, 10), ncol = 2))
x <- as.data.frame(rbind(x1, x2, x3)) # predictors
y <- gl(n, k) # response
# fully labeled data set with 3 classes
# need to use a line in 2D to classify
plot(x[, 1L], x[, 2L],
bg = c("#E41A1C", "#377EB8", "#4DAF4A")[y],
pch = rep(c(22, 21, 25), each = k)
)
abline(a = -10, b = 1, lty = 2)
abline(a = 12, b = 1, lty = 2)
# generate synthetic chunklets
chunks <- vector("list", 300)
for (i in 1:100) chunks[[i]] <- sample(1L:100L, 10L)
for (i in 101:200) chunks[[i]] <- sample(101L:200L, 10L)
for (i in 201:300) chunks[[i]] <- sample(201L:300L, 10L)
chks <- x[unlist(chunks), ]
# make "chunklet" vector to feed the chunks argument
chunksvec <- rep(-1L, nrow(x))
for (i in 1L:length(chunks)) {
for (j in 1L:length(chunks[[i]])) {
chunksvec[chunks[[i]][j]] <- i
}
}
# relevant component analysis
rcs <- rca(x, chunksvec)
# learned transformation of the data
rcs$RCA
# learned Mahalanobis distance metric
rcs$B
# whitening transformation applied to the chunklets
chkTransformed <- as.matrix(chks) %*% rcs$RCA
# original data after applying RCA transformation
# easier to classify - using only horizontal lines
xnew <- rcs$newX
plot(xnew[, 1L], xnew[, 2L],
bg = c("#E41A1C", "#377EB8", "#4DAF4A")[gl(n, k)],
pch = c(rep(22, k), rep(21, k), rep(25, k))
)
abline(a = -15, b = 0, lty = 2)
abline(a = 16, b = 0, lty = 2)
# }
Run the code above in your browser using DataLab