if (FALSE) {
library(UBL)
# an example with the synthetic classification data set provided with the package
data(ImbC)
plot(ImbC$X1, ImbC$X2, col = ImbC$Class, xlab = "X1", ylab = "X2")
summary(ImbC)
# randomly generate a 30-70% test and train partition
i.train <- sample(1:nrow(ImbC), as.integer(0.7*nrow(ImbC)))
trainD <- ImbC[i.train,]
testD <- ImbC[-i.train,]
model <- rpart(Class~., trainD)
preds <- predict(model, testD, type = "class")
table(preds, testD$Class)
# apply random over-sampling approach to balance the data set:
newTrain <- RandOverClassif(Class~., trainD)
newModel <- rpart(Class~., newTrain)
newPreds <- predict(newModel, testD, type = "class")
table(newPreds, testD$Class)
# an example with the synthetic regression data set provided with the package
data(ImbR)
library(ggplot2)
ggplot(ImbR, aes(x = X1, y = X2)) + geom_point(data = ImbR, aes(colour=Tgt)) +
scale_color_gradient(low = "red", high="blue")
boxplot(ImbR$Tgt)
#relevance function automatically obtained
phiF.args <- phi.control(ImbR$Tgt, method = "extremes", extr.type = "high")
y.phi <- phi(sort(ImbR$Tgt),control.parms = phiF.args)
plot(sort(ImbR$Tgt), y.phi, type = "l", xlab = "Tgt variable", ylab = "relevance value")
# set the train and test data
i.train <- sample(1:nrow(ImbR), as.integer(0.7*nrow(ImbR)))
trainD <- ImbR[i.train,]
testD <- ImbR[-i.train,]
# train a model on the original train data
if (requireNamespace("DMwR2", quietly = TRUE)) {
model <- DMwR2::rpartXse(Tgt~., trainD, se = 0)
preds <- DMwR2::predict(model, testD)
plot(testD$Tgt, preds, xlim = c(0,55), ylim = c(0,55))
abline(a = 0, b = 1)
# obtain a new train using random under-sampling strategy
newTrain <- RandUnderRegress(Tgt~., trainD)
newModel <- DMwR2::rpartXse(Tgt~., newTrain, se = 0)
newPreds <- DMwR2::predict(newModel, testD)
# plot the predictions for the model obtained with
# the original and the modified train data
plot(testD$Tgt, preds, xlim = c(0,55), ylim = c(0,55)) #black for original train
abline(a = 0, b = 1, lty=2, col="grey")
points(testD$Tgt, newPreds, col="blue", pch=2) #blue for changed train
abline(h=30, lty=2, col="grey")
abline(v=30, lty=2, col="grey")
}
}
Run the code above in your browser using DataLab