iris.d <- dist(iris[, -5])
cl1 <- iris$Species
sam <- c(rep(0, 4), 1) > 0
cl1[!sam] <- NA
table(cl1, useNA="ifany")
## based on neighbor number
iris.pred <- DNN(dst=iris.d, cl=cl1, k=5)
Misclass(iris$Species[is.na(cl1)], iris.pred)
## based on neighborhood size
iris.pred <- DNN(dst=iris.d, cl=cl1, d=0.05)
table(iris.pred, useNA="ifany")
Misclass(iris$Species[is.na(cl1)], iris.pred)
## protection against "all points relevant"
DNN(dst=iris.d, cl=cl1, d=1)[1:5]
## and all are ties:
DNN(dst=iris.d, cl=cl1, d=1, details=TRUE)[, 1:5]
## any distance works
iris.d2 <- Gower.dist(iris[, -5])
iris.pred <- DNN(dst=iris.d2, cl=cl1, k=5)
Misclass(iris$Species[is.na(cl1)], iris.pred)
## combined
cl2 <- cl1
iris.pred <- DNN(dst=iris.d, cl=cl2, d=0.05)
cl2[is.na(cl2)] <- iris.pred
table(cl2, useNA="ifany")
iris.pred2 <- DNN(dst=iris.d, cl=cl2, k=5)
cl2[is.na(cl2)] <- iris.pred2
table(cl2, useNA="ifany")
Misclass(iris$Species, cl2)
## self-training and class proximity surrogate
cl3 <- iris$Species
t(DNN(dst=iris.d, cl=cl3, k=5, details=TRUE, self=TRUE))/5
## Dnn() with more class::knn()-like interface
iris.trn <- iris[sam, ]
iris.tst <- iris[!sam, ]
Dnn(iris.trn[, -5], iris.tst[, -5], iris.trn[, 5], k=7)
## stepwise DNN, note the warning when no NAs left
cl4 <- cl1
for (d in (5:14)/100) {
iris.pred <- DNN(dst=iris.d, cl=cl4, d=d)
cl4[is.na(cl4)] <- iris.pred
}
table(cl4, useNA="ifany")
Misclass(iris$Species, cl4)
## rushing to d=14% gives much worse results
iris.pred <- DNN(dst=iris.d, cl=cl1, d=0.14)
table(iris.pred, useNA="ifany")
Misclass(iris$Species[is.na(cl1)], iris.pred)
Run the code above in your browser using DataLab