## Real apple data (small set)
data(spikedApples)
apple.coef <- get.biom(X = spikedApples$dataMatrix,
Y = factor(rep(1:2, each = 10)),
ncomp = 2:3, type = "coef")
coef.sizes <- coef(apple.coef)
sapply(coef.sizes, range)
## stability-based selection
set.seed(17)
apple.stab <- get.biom(X = spikedApples$dataMatrix,
Y = factor(rep(1:2, each = 10)),
ncomp = 2:3, type = "stab")
selected.variables <- selection(apple.stab)
unlist(sapply(selected.variables, function(x) sapply(x, length)))
## Ranging from more than 70 for pcr, approx 40 for pls and student t,
## to 0-29 for the lasso
unlist(sapply(selected.variables,
function(x) lapply(x, function(xx, y) sum(xx %in% y),
spikedApples$biom)))
## TPs (stab): all find 5/5, except pcr.2 and the lasso with values for lambda
## larger than 0.0484
unlist(sapply(selected.variables,
function(x) lapply(x, function(xx, y) sum(!(xx %in% y)),
spikedApples$biom)))
## FPs (stab): PCR finds most FPs (approx. 60), other latent-variable
## methods approx 40, lasso allows for the optimal selection around
## lambda = 0.0702
## regression example
data(gasoline) ## from the pls package
gasoline.stab <- get.biom(gasoline$NIR, gasoline$octane,
fmethod = c("pcr", "pls", "lasso"), type = "stab")
## Not run:
# ## Same for HC-based selection
# ## Warning: takes a long time!
# apple.HC <- get.biom(X = spikedApples$dataMatrix,
# Y = factor(rep(1:2, each = 10)),
# ncomp = 2:3, type = "HC")
# sapply(apple.HC[names(apple.HC) != "info"],
# function(x, y) sum(x$biom.indices %in% y),
# spikedApples$biom)
# sapply(apple.HC[names(apple.HC) != "info"],
# function(x, y) sum(!(x$biom.indices %in% y)),
# spikedApples$biom)
# ## End(Not run)
Run the code above in your browser using DataLab