require(datanugget)
#2-d small example with visualization
X = rbind.data.frame(matrix(rnorm(10^4, sd = 0.3), ncol = 2),
matrix(rnorm(10^4, mean = 1, sd = 0.3), ncol = 2))
#create data nuggets
my.DN = create.DN(x = X,
R = 500,
delete.percent = .1,
DN.num1 = 500,
DN.num2 = 250,
no.cores = 0,
make.pbs = FALSE)
#refine data nuggets
my.DN2 = refine.DN(x = X,
DN = my.DN,
EV.tol = .9,
min.nugget.size = 2,
max.splits = 5,
no.cores = 0,
make.pbs = FALSE)
#plot raw large dataset
plot(X)
#transform weights to get colors for plot
w_trans = my.DN2$`Data Nuggets`[, "Weight"]/sum(my.DN2$`Data Nuggets`[, "Weight"])
w_trans = w_trans/quantile(w_trans,0.8)
col = sapply(w_trans, function(t){rgb(0,min(t,1),0)})
#plot refined data nugget centers with weights
#lighter green means more weights
plot(my.DN2$`Data Nuggets`[, c("Center1",
"Center2")],col=col,lty = 2,pch=16, cex=0.5)
#K-means Clustering for data nuggets
DN.clus = DN.Wkmeans(datanugget = my.DN2,
k = 2,
num.init = 1,
max.iterations = 5)
DN.clus$`Cluster Centers`
DN.clus$`WWCSS`
#plot the clustering result for data nuggets
plot(my.DN2$`Data Nuggets`[, c("Center1",
"Center2")],
col = DN.clus$`Cluster Assignments for data nuggets`, lty = 2,pch=16, cex=0.5)
points(DN.clus$`Cluster Centers`, col = 1:2, pch = 8, cex = 5)
#plot the clustering result for raw large dataset
plot(X, col = DN.clus$`Cluster Assignments for original dataset`)
Run the code above in your browser using DataLab