# NOT RUN {
set.seed(-88106935)
data(microarray)
# consider only four tumour classes (NOTE: "NORM" is not a class of tumour)
y <- microarray[, 2309]
train <- as.matrix(microarray[y != "NORM", -2309])
wtr <- factor(microarray[y != "NORM", 2309], levels = c("BL" , "EWS" , "NB" ,"RMS" ))
n.kpc <- 6
n.class <- length(levels(wtr)) - 1
K <- gaussKern(train)$K
# supply starting values for the parameters
# use Gaussian kernel as input
result <- bkpc(K, y = wtr, n.iter = 1000, thin = 10, n.kpc = n.kpc,
initSigmasq = 0.001, initBeta = matrix(10, n.kpc *n.class, 1),
initTau =matrix(10, n.kpc * n.class, 1), intercept = FALSE, rotate = TRUE)
# predict
out <- predict(result, n.burnin = 10)
table(out$class, as.numeric(wtr))
# plot the data projection on the kernel principal components
pairs(result$kPCA$KPCs[, 1 : n.kpc], col = as.numeric(wtr),
main = paste("symbol = predicted class", "\n", "color = true class" ),
pch = out$class, upper.panel = NULL)
par(xpd=TRUE)
legend('topright', levels(wtr), pch = unique(out$class),
text.col = as.numeric(unique(wtr)), bty = "n")
# Another example: Iris data
data(iris)
testset <- sample(1:150,50)
train <- as.matrix(iris[-testset,-5])
test <- as.matrix(iris[testset,-5])
wtr <- iris[-testset, 5]
wte <- iris[testset, 5]
# use default starting values for paramteres in the model.
result <- bkpc(train, y = wtr, n.iter = 1000, thin = 10, n.kpc = 2,
intercept = FALSE, rotate = TRUE)
# predict
out <- predict(result, test, n.burnin = 10)
# classification rate
sum(out$class == as.numeric(wte))/dim(test)[1]
table(out$class, as.numeric(wte))
# }
# NOT RUN {
# Another example: synthetic data from MASS library
library(MASS)
train<- as.matrix(synth.tr[, -3])
test<- as.matrix(synth.te[, -3])
wtr <- as.factor(synth.tr[, 3])
wte <- as.factor(synth.te[, 3])
# make training set kernel using kernelMatrix from kernlab library
library(kernlab)
kfunc <- laplacedot(sigma = 1)
Ktrain <- kernelMatrix(kfunc, train)
# make testing set kernel using kernelMatrix {kernlab}
Ktest <- kernelMatrix(kfunc, test, train)
result <- bkpc(Ktrain, y = wtr, n.iter = 1000, thin = 10, n.kpc = 3,
intercept = FALSE, rotate = TRUE)
# predict
out <- predict(result, Ktest, n.burnin = 10)
# classification rate
sum(out$class == as.numeric(wte))/dim(test)[1]
table(out$class, as.numeric(wte))
# embed data from the testing set on the new space:
KPCtest <- predict(result$kPCA, Ktest)
# new data is linearly separable in the new feature space where classification takes place
library(rgl)
plot3d(KPCtest[ , 1 : 3], col = as.numeric(wte))
# another model: do not project the data to the principal axes of the feature space.
# NOTE: Slow
# use Gaussian kernel with the default bandwidth parameter
Ktrain <- gaussKern(train)$K
Ktest <- gaussKern(train, test, theta = gaussKern(train)$theta)$K
resultBKMC <- bkpc(Ktrain, y = wtr, n.iter = 1000, thin = 10,
intercept = FALSE, rotate = FALSE)
# predict
outBKMC <- predict(resultBKMC, Ktest, n.burnin = 10)
# to compare with previous model
table(outBKMC$class, as.numeric(wte))
# another example: wine data from gclus library
library(gclus)
data(wine)
testset <- sample(1 : 178, 90)
train <- as.matrix(wine[-testset, -1])
test <- as.matrix(wine[testset, -1])
wtr <- as.factor(wine[-testset, 1])
wte <- as.factor(wine[testset, 1])
# make training set kernel using kernelMatrix from kernlab library
kfunc <- anovadot(sigma = 1, degree = 1)
Ktrain <- kernelMatrix(kfunc, train)
# make testing set kernel using kernelMatrix {kernlab}
Ktest <- kernelMatrix(kfunc, test, train)
result <- bkpc(Ktrain, y = wtr, n.iter = 1000, thin = 10, n.kpc = 3,
intercept = FALSE, rotate = TRUE)
out <- predict(result, Ktest, n.burnin = 10)
# classification rate in the test set
sum(out$class == as.numeric(wte))/dim(test)[1]
# embed data from the testing set on the new space:
KPCtest <- predict(result$kPCA, Ktest)
# new data is linearly separable in the new feature space where classification takes place
pairs(KPCtest[ , 1 : 3], col = as.numeric(wte),
main = paste("symbol = predicted class", "\n", "color = true class" ),
pch = out$class, upper.panel = NULL)
par(xpd=TRUE)
legend('topright', levels(wte), pch = unique(out$class),
text.col = as.numeric(unique(wte)), bty = "n")
# }
Run the code above in your browser using DataLab