if (requireNamespace("DMwR2", quietly = TRUE)) {
data(algae, package ="DMwR2")
clean.algae <- data.frame(algae[complete.cases(algae), ])
C.perc = list(autumn = 1, summer = 0.9, winter = 0.4)
# classes autumn and spring remain unchanged
myunder.algae <- RandUnderClassif(season~., clean.algae, C.perc)
undBalan.algae <- RandUnderClassif(season~., clean.algae, "balance")
undInvert.algae <- RandUnderClassif(season~., clean.algae, "extreme")
} else {
library(MASS)
data(cats)
myunder.cats <- RandUnderClassif(Sex~., cats, list(M = 0.8))
undBalan.cats <- RandUnderClassif(Sex~., cats, "balance")
undInvert.cats <- RandUnderClassif(Sex~., cats, "extreme")
# learn a model and check results with original and under-sampled data
library(rpart)
idx <- sample(1:nrow(cats), as.integer(0.7*nrow(cats)))
tr <- cats[idx, ]
ts <- cats[-idx, ]
idx <- sample(1:nrow(cats), as.integer(0.7*nrow(cats)))
tr <- cats[idx, ]
ts <- cats[-idx, ]
ctO <- rpart(Sex ~ ., tr)
predsO <- predict(ctO, ts, type = "class")
new.cats <- RandUnderClassif(Sex~., tr, "balance")
ct1 <- rpart(Sex ~ ., new.cats)
preds1 <- predict(ct1, ts, type = "class")
table(predsO, ts$Sex)
# predsO F M
# F 9 3
# M 7 25
table(preds1, ts$Sex)
# preds1 F M
# F 13 4
# M 3 24
}
Run the code above in your browser using DataLab