# NOT RUN {
# Continuous features with continuous target, categorical target,
# and neighbor ranking:
library(neighbr)
data(iris)
# Add an ID column to the data for neighbor ranking:
iris$ID <- c(1:150)
# Train set contains all predicted variables, features, and ID column:
train_set <- iris[1:140, ]
# Omit predicted variables and ID column from test set:
test_set <- iris[141:150, -c(4, 5, 6)]
fit <- knn(
train_set = train_set, test_set = test_set,
k = 3,
categorical_target = "Species",
continuous_target = "Petal.Width",
comparison_measure = "squared_euclidean",
return_ranked_neighbors = 3,
id = "ID"
)
fit_pmml <- pmml(fit)
# Logical features with categorical target and neighbor ranking:
library(neighbr)
data("houseVotes84")
# Remove any rows with N/A elements:
dat <- houseVotes84[complete.cases(houseVotes84), ]
# Change all {yes,no} factors to {0,1}:
feature_names <- names(dat)[!names(dat) %in% c("Class", "ID")]
for (n in feature_names) {
levels(dat[, n])[levels(dat[, n]) == "n"] <- 0
levels(dat[, n])[levels(dat[, n]) == "y"] <- 1
}
# Change factors to numeric:
for (n in feature_names) {
dat[, n] <- as.numeric(levels(dat[, n]))[dat[, n]]
}
# Add an ID column for neighbor ranking:
dat$ID <- c(1:nrow(dat))
# Train set contains features, predicted variable, and ID:
train_set <- dat[1:225, ]
# Test set contains features only:
test_set <- dat[226:232, !names(dat) %in% c("Class", "ID")]
fit <- knn(
train_set = train_set, test_set = test_set,
k = 5,
categorical_target = "Class",
comparison_measure = "jaccard",
return_ranked_neighbors = 3,
id = "ID"
)
fit_pmml <- pmml(fit)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab