data(iris)
iris <- as.matrix(iris[,1:4])
## find suitable eps parameter using a k-NN plot for k = dim + 1
## Look for the knee!
kNNdistplot(iris, k = 5)
abline(h=.4, col = "red", lty=2)
res <- dbscan(iris, eps = .4, minPts = 5)
res
pairs(iris, col = res$cluster + 1L)
## example data from fpc
set.seed(665544)
n <- 100
x <- cbind(
x = runif(10, 0, 10) + rnorm(n, sd = 0.2),
y = runif(10, 0, 10) + rnorm(n, sd = 0.2)
)
res <- dbscan(x, eps = .3, minPts = 3)
res
## plot clusters and add noise (cluster 0) as crosses.
plot(x, col=res$cluster)
points(x[res$cluster==0,], pch = 3, col = "grey")
## predict cluster membership for new data points
## (Note: 0 means it is predicted as noise)
newdata <- x[1:5,] + rnorm(10, 0, .2)
predict(res, x, newdata)
## compare speed against fpc version (if microbenchmark is installed)
## Note: we use dbscan::dbscan to make sure that we do now run the
## implementation in fpc.
## Not run:
# if (requireNamespace("fpc", quietly = TRUE) &&
# requireNamespace("microbenchmark", quietly = TRUE)) {
# t_dbscan <- microbenchmark::microbenchmark(
# dbscan::dbscan(x, .3, 3), times = 10, unit = "ms")
# t_dbscan_linear <- microbenchmark::microbenchmark(
# dbscan::dbscan(x, .3, 3, search = "linear"), times = 10, unit = "ms")
# t_fpc <- microbenchmark::microbenchmark(
# fpc::dbscan(x, .3, 3), times = 10, unit = "ms")
#
# rbind(t_fpc, t_dbscan_linear, t_dbscan)
#
# boxplot(rbind(t_fpc, t_dbscan_linear, t_dbscan),
# names = c("fpc", "dbscan (linear)", "dbscan (kdtree)"),
# main = "Runtime comparison in ms")
#
# ## speedup of the kd-tree-based version compared to the fpc implementation
# median(t_fpc$time) / median(t_dbscan$time)
# }## End(Not run)
Run the code above in your browser using DataLab