require(datanugget)
#2-d small example
X = rbind.data.frame(matrix(rnorm(5*10^3, sd = 0.3), ncol = 2),
matrix(rnorm(5*10^3, mean = 1, sd = 0.3), ncol = 2))
#create data nuggets
my.DN = create.DN(x = X,
R = 300,
delete.percent = .1,
DN.num1 = 300,
DN.num2 = 150,
no.cores = 0,
make.pbs = FALSE)
#refine data nuggets
my.DN2 = refine.DN(x = X,
DN = my.DN,
EV.tol = .9,
min.nugget.size = 2,
max.splits = 5,
no.cores = 0,
make.pbs = FALSE)
#K-means Clustering for data nuggets
DN.clus = DN.Wkmeans(datanugget = my.DN2,
k = 2,
num.init = 1,
max.iterations = 5)
#new observations to predict cluster assignments
newdata = matrix(rnorm(10^2, mean = 0.5, sd = 0.3), ncol = 2)
#predict the cluster assignments for the new observations
DNcluster.predict(my.DN2,
cl = DN.clus$`Cluster Assignments for data nuggets`,
newx = as.data.frame(newdata))
#predict cluster assignments for new data nuggets from a new large dataset
newdata = rbind.data.frame(matrix(rnorm(5*10^3, sd = 0.5), ncol = 2),
matrix(rnorm(5*10^3, mean = 1, sd = 0.5), ncol = 2))
#create data nuggets
my.DN_new = create.DN(x = newdata,
R = 300,
delete.percent = .1,
DN.num1 = 300,
DN.num2 = 150,
no.cores = 0,
make.pbs = FALSE)
#refine data nuggets
my.DN2_new = refine.DN(x = newdata,
DN = my.DN_new,
EV.tol = .9,
min.nugget.size = 2,
max.splits = 5,
no.cores = 0,
make.pbs = FALSE)
#predict the cluster assignments for the new data nuggets
DNcluster.predict(my.DN2,
cl = DN.clus$`Cluster Assignments for data nuggets`,
newx = my.DN2_new)
Run the code above in your browser using DataLab