## --------------------------------------------------------------------
##
## 1) For illustration of use, a small data set with very few iterations
## of the algorithm. Escoufier's 'RV' criterion is used to select variable
## subsets of size 3 and 4.
##
data(swiss)
genetic(cor(swiss),3,4,popsize=10,nger=5,criterion="Rv")
## For cardinality k=
##[1] 4
## there is not enough genetic diversity in generation number
##[1] 3
## for acceptable levels of consanguinity (couples differing by at least 2 genes).
## Try reducing the maximum acceptable number of clones (maxclone) or
## increasing the population size (popsize)
## Best criterion value found so far:
##[1] 0.9557145
##$subsets
##, , Card.3
##
## Var.1 Var.2 Var.3 Var.4
##Solution 1 1 2 3 0
##Solution 2 1 2 3 0
##Solution 3 1 2 3 0
##Solution 4 3 4 6 0
##Solution 5 3 4 6 0
##Solution 6 3 4 5 0
##Solution 7 3 4 5 0
##Solution 8 1 3 6 0
##Solution 9 1 3 6 0
##Solution 10 1 3 6 0
##
##, , Card.4
##
## Var.1 Var.2 Var.3 Var.4
##Solution 1 2 4 5 6
##Solution 2 1 2 5 6
##Solution 3 1 2 3 5
##Solution 4 1 2 4 5
##Solution 5 1 2 4 5
##Solution 6 1 4 5 6
##Solution 7 1 4 5 6
##Solution 8 1 4 5 6
##Solution 9 1 3 4 5
##Solution 10 1 3 4 5
##
##
##$values
## card.3 card.4
##Solution 1 0.9141995 0.9557145
##Solution 2 0.9141995 0.9485699
##Solution 3 0.9141995 0.9455508
##Solution 4 0.9034868 0.9433203
##Solution 5 0.9034868 0.9433203
##Solution 6 0.9020271 0.9428967
##Solution 7 0.9020271 0.9428967
##Solution 8 0.8988192 0.9428967
##Solution 9 0.8988192 0.9357982
##Solution 10 0.8988192 0.9357982
##
##$bestvalues
## Card.3 Card.4
##0.9141995 0.9557145
##
##$bestsets
## Var.1 Var.2 Var.3 Var.4
##Card.3 1 2 3 0
##Card.4 2 4 5 6
##
##$call
##genetic(mat = cor(swiss), kmin = 3, kmax = 4, popsize = 10, nger = 5,
## criterion = "Rv")
## --------------------------------------------------------------------
##
## 2) An example of subset selection in the context of Multiple Linear
## Regression. Variable 5 (average car price) in the Cars93 MASS library
## data set is regressed on 13 other variables. The six-variable subsets
## of linear predictors are chosen using the "CCR1_2" criterion which,
## in the case of a Linear Regression, is merely the standard Coefficient
## of Determination, R^2 (as are the other three criteria for the
## multivariate linear hypothesis, "XI_2", "TAU_2" and "ZETA_2").
##
library(MASS)
data(Cars93)
CarsHmat <- lmHmat(Cars93[,c(7:8,12:15,17:22,25)],Cars93[,5])
names(Cars93[,5,drop=FALSE])
## [1] "Price"
colnames(CarsHmat)
## [1] "MPG.city" "MPG.highway" "EngineSize"
## [4] "Horsepower" "RPM" "Rev.per.mile"
## [7] "Fuel.tank.capacity" "Passengers" "Length"
## [10] "Wheelbase" "Width" "Turn.circle"
## [13] "Weight"
genetic(CarsHmat$mat, kmin=6, H=CarsHmat$H, r=1, crit="CCR12")
##
## (Partial results only)
##
## $subsets
## Var.1 Var.2 Var.3 Var.4 Var.5 Var.6
## Solution 1 4 5 9 10 11 12
## Solution 2 4 5 9 10 11 12
## Solution 3 4 5 9 10 11 12
## Solution 4 4 5 9 10 11 12
## Solution 5 4 5 9 10 11 12
## Solution 6 4 5 9 10 11 12
## Solution 7 4 5 8 10 11 12
##
## (...)
##
## Solution 94 1 4 5 6 10 11
## Solution 95 1 4 5 6 10 11
## Solution 96 1 4 5 6 10 11
## Solution 97 1 4 5 6 10 11
## Solution 98 1 4 5 6 10 11
## Solution 99 1 4 5 6 10 11
## Solution 100 1 4 5 6 10 11
##
## $values
## Solution 1 Solution 2 Solution 3 Solution 4 Solution 5 Solution 6
## 0.7310150 0.7310150 0.7310150 0.7310150 0.7310150 0.7310150
## Solution 7 Solution 8 Solution 9 Solution 10 Solution 11 Solution 12
## 0.7310150 0.7271056 0.7271056 0.7271056 0.7271056 0.7271056
## Solution 13 Solution 14 Solution 15 Solution 16 Solution 17 Solution 18
## 0.7271056 0.7270257 0.7270257 0.7270257 0.7270257 0.7270257
##
## (...)
##
## Solution 85 Solution 86 Solution 87 Solution 88 Solution 89 Solution 90
## 0.7228800 0.7228800 0.7228800 0.7228800 0.7228800 0.7228800
## Solution 91 Solution 92 Solution 93 Solution 94 Solution 95 Solution 96
## 0.7228463 0.7228463 0.7228463 0.7228463 0.7228463 0.7228463
## Solution 97 Solution 98 Solution 99 Solution 100
## 0.7228463 0.7228463 0.7228463 0.7228463
##
## $bestvalues
## Card.6
## 0.731015
##
## $bestsets
## Var.1 Var.2 Var.3 Var.4 Var.5 Var.6
## 4 5 9 10 11 12
##
## $call
## genetic(mat = CarsHmat$mat, kmin = 6, criterion = "CCR12", H = CarsHmat$H,
## r = 1)
## --------------------------------------------------------------------
## 3) An example of subset selection in the context of a Canonical
## Correlation Analysis. Two groups of variables within the Cars93
## MASS library data set are compared. The goal is to select 4- to
## 6-variable subsets of the 13-variable 'X' group that are optimal in
## terms of preserving the canonical correlations, according to the
## "ZETA_2" criterion (Warning: the 3-variable 'Y' group is kept
## intact; subset selection is carried out in the 'X'
## group only). The 'tolsym' parameter is used to relax the symmetry
## requirements on the effect matrix H which, for numerical reasons,
## is slightly asymmetric. Since corresponding off-diagonal entries of
## matrix H are different, but by less than tolsym, H is replaced
## by its symmetric part: (H+t(H))/2.
library(MASS)
data(Cars93)
CarsHmat <- lmHmat(Cars93[,c(7:8,12:15,17:22,25)],Cars93[,4:6])
names(Cars93[,4:6])
## [1] "Min.Price" "Price" "Max.Price"
colnames(CarsHmat$mat)
## [1] "MPG.city" "MPG.highway" "EngineSize"
## [4] "Horsepower" "RPM" "Rev.per.mile"
## [7] "Fuel.tank.capacity" "Passengers" "Length"
## [10] "Wheelbase" "Width" "Turn.circle"
## [13] "Weight"
genetic(CarsHmat$mat, kmin=5, kmax=6, H=CarsHmat$H, r=3, crit="zeta2", tolsym=1e-9)
## (PARTIAL RESULTS ONLY)
##
## $subsets
##
## Var.1 Var.2 Var.3 Var.4 Var.5 Var.6
## Solution 1 4 5 9 10 11 0
## Solution 2 4 5 9 10 11 0
## Solution 3 4 5 9 10 11 0
## Solution 4 4 5 9 10 11 0
## Solution 5 4 5 9 10 11 0
## Solution 6 4 5 9 10 11 0
## Solution 7 4 5 9 10 11 0
## Solution 8 3 4 9 10 11 0
## Solution 9 3 4 9 10 11 0
## Solution 10 3 4 9 10 11 0
##
## (...)
##
## Solution 87 3 4 6 9 10 11
## Solution 88 3 4 6 9 10 11
## Solution 89 3 4 6 9 10 11
## Solution 90 2 3 4 10 11 12
## Solution 91 2 3 4 10 11 12
## Solution 92 2 3 4 10 11 12
## Solution 93 2 3 4 10 11 12
## Solution 94 2 3 4 10 11 12
## Solution 95 2 3 4 10 11 12
## Solution 96 2 3 4 10 11 12
## Solution 97 1 3 4 6 10 11
## Solution 98 1 3 4 6 10 11
## Solution 99 1 3 4 6 10 11
## Solution 100 1 3 4 6 10 11
##
##
## $values
##
## card.5 card.6
## Solution 1 0.5018922 0.5168627
## Solution 2 0.5018922 0.5168627
## Solution 3 0.5018922 0.5168627
## Solution 4 0.5018922 0.5168627
## Solution 5 0.5018922 0.5168627
## Solution 6 0.5018922 0.5168627
## Solution 7 0.5018922 0.5096500
## Solution 8 0.4966191 0.5096500
## Solution 9 0.4966191 0.5096500
## Solution 10 0.4966191 0.5096500
##
## (...)
##
## Solution 87 0.4893824 0.5038649
## Solution 88 0.4893824 0.5038649
## Solution 89 0.4893824 0.5038649
## Solution 90 0.4893824 0.5035489
## Solution 91 0.4893824 0.5035489
## Solution 92 0.4893824 0.5035489
## Solution 93 0.4893824 0.5035489
## Solution 94 0.4893824 0.5035489
## Solution 95 0.4893824 0.5035489
## Solution 96 0.4893824 0.5035489
## Solution 97 0.4890986 0.5035386
## Solution 98 0.4890986 0.5035386
## Solution 99 0.4890986 0.5035386
## Solution 100 0.4890986 0.5035386
##
## $bestvalues
## Card.5 Card.6
## 0.5018922 0.5168627
##
## $bestsets
## Var.1 Var.2 Var.3 Var.4 Var.5 Var.6
## Card.5 4 5 9 10 11 0
## Card.6 4 5 9 10 11 12
##
## $call
## genetic(mat = CarsHmat$mat, kmin = 5, kmax = 6, criterion = "zeta2",
## H = CarsHmat$H, r = 3, tolsym = 1e-09)
##
## Warning message:
##
## The effect description matrix (H) supplied was slightly asymmetric:
## symmetric entries differed by up to 3.63797880709171e-12.
## (less than the 'tolsym' parameter).
## The H matrix has been replaced by its symmetric part.
## in: validnovcrit(mat, criterion, H, r, p, tolval, tolsym)
##
## The selected best variable subsets
colnames(CarsHmat$mat)[c(4,5,9,10,11)]
## [1] "Horsepower" "RPM" "Length" "Wheelbase" "Width"
colnames(CarsHmat$mat)[c(4,5,9,10,11,12)]
## [1] "Horsepower" "RPM" "Length" "Wheelbase" "Width"
## [6] "Turn.circle"
## --------------------------------------------------------------------
Run the code above in your browser using DataLab