# Example 1: Static Evaluation
set.seed(0)
stream <- DSD_Gaussians(k = 3, d = 2)
dstream <- DSC_DStream(gridsize = 0.05, Cm = 1.5)
update(dstream, stream, 500)
plot(dstream, stream)
# Evaluate the micro-clusters in the clustering
# Note: we use here only n = 100 points for evaluation to speed up execution
evaluate_static(dstream, stream, n = 100)
evaluate_static(dstream, stream,
measure = c("numMicro", "numMacro", "purity", "crand", "SSQ"),
n = 100)
# DStream also provides macro clusters. Evaluate macro clusters with type = "macro"
# Note that SSQ and cRand increase.
plot(dstream, stream, type = "macro")
evaluate_static(dstream, stream, type = "macro",
measure = c("numMicro", "numMacro", "purity", "crand", "SSQ"),
n = 100)
# Points are by default assigned to micro clusters using the method
# specified for the clustering algorithm.
# However, points can also be assigned to the closest macro-cluster using
# assign = "macro".
evaluate_static(dstream, stream, type = "macro", assign = "macro",
measure = c("numMicro", "numMacro", "purity", "crand", "SSQ"),
n = 100)
# Example 2: Evaluate with Noise/Outliers
stream <- DSD_Gaussians(k = 3, d = 2, noise = .05)
dstream <- DSC_DStream(gridsize = 0.05, Cm = 1.5)
update(dstream, stream, 500)
# For cRand, noise is its own group, for SSQ, actual noise is always
# excluded.
plot(dstream, stream, 500)
evaluate_static(dstream, stream, n = 100,
measure = c("numPoints", "noisePredicted", "noiseActual",
"noisePrecision", "outlierJaccard", "cRand", "SSQ"))
# Note that if noise is excluded, the number of used points is reduced.
evaluate_static(dstream, stream, n = 100,
measure = c("numPoints", "noisePredicted", "noiseActual",
"noisePrecision", "outlierJaccard", "cRand", "SSQ"), excludeNoise = TRUE)
# Example 3: Evaluate an evolving data stream
stream <- DSD_Benchmark(1)
dstream <- DSC_DStream(gridsize = 0.05, lambda = 0.1)
evaluate_stream(dstream, stream, type = "macro", assign = "micro",
measure = c("numMicro", "numMacro", "purity", "cRand"),
n = 600, horizon = 100)
if (interactive()){
# animate the clustering process
reset_stream(stream)
dstream <- DSC_DStream(gridsize = 0.05, lambda = 0.1)
animate_cluster(dstream, stream, horizon = 100, n = 5000,
measure = "cRand", type = "macro", assign = "micro",
plot.args = list(type = "both", xlim = c(0,1), ylim = c(0,1)))
}
# Example 4: Add a custom measure as a callback
callbacks <- list(
noisePercentage = function(actual, predict, points, centers, dsc) {
sum(actual == 0L) / length(actual)
},
noiseFN = function(actual, predict, points, centers, dsc) {
sum(actual == 0L & predict != 0L)
},
noiseFP = function(actual, predict, points, centers, dsc) {
sum(actual != 0L & predict == 0L)
}
)
stream <- DSD_Gaussians(k = 3, d = 2, noise = .2)
dstream <- DSC_DStream(gridsize = 0.05, Cm = 1.5)
update(dstream, stream, 500)
evaluate_static(dstream, stream,
measure = c("numPoints", "noiseActual", "noisePredicted",
"noisePercentage", "noiseFN", "noiseFP"),
callbacks = callbacks, n = 100)
evaluate_static(dstream, stream, callbacks = callbacks)
Run the code above in your browser using DataLab