set.seed(665544)
n <- 100
x <- cbind(
x=runif(10, 0, 5) + rnorm(n, sd = 0.4),
y=runif(10, 0, 5) + rnorm(n, sd = 0.4)
)
### calculate GLOSH score
glosh <- glosh(x, k = 3)
### distribution of outlier scores
summary(glosh)
hist(glosh, breaks = 10)
### simple function to plot point size is proportional to GLOSH score
plot_glosh <- function(x, glosh){
plot(x, pch = ".", main = "GLOSH (k = 3)")
points(x, cex = glosh*3, pch = 1, col = "red")
text(x[glosh > 0.80, ], labels = round(glosh, 3)[glosh > 0.80], pos = 3)
}
plot_glosh(x, glosh)
### GLOSH with any hierarchy
x_dist <- dist(x)
x_sl <- hclust(x_dist, method = "single")
x_upgma <- hclust(x_dist, method = "average")
x_ward <- hclust(x_dist, method = "ward.D2")
## Compare what different linkage criterion consider as outliers
glosh_sl <- glosh(x_sl, k = 3)
plot_glosh(x, glosh_sl)
glosh_upgma <- glosh(x_upgma, k = 3)
plot_glosh(x, glosh_upgma)
glosh_ward <- glosh(x_ward, k = 3)
plot_glosh(x, glosh_ward)
## GLOSH is automatically computed with HDBSCAN
all(hdbscan(x, minPts = 3)$outlier_scores == glosh(x, k = 3))
Run the code above in your browser using DataLab