## Make a couple of bivariate normal classes
rmvn <- function(n, mu=0, V = matrix(1))
{
p <- length(mu)
if(any(is.na(match(dim(V),p))))
stop("Dimension problem!")
D <- chol(V)
matrix(rnorm(n*p), ncol=p) %*% D + rep(mu,rep(n,p))
}
m <- 10000
## Class 1.
mu.1 <- c(20, 40)
V.1 <- matrix(c(-5,1,0,5),2,2); V.1 <- V.1%*%t(V.1)
c.1 <- cbind(rmvn(m, mu.1, V.1), rep(1, m))
## Class 2.
mu.2 <- c(30, 60)
V.2 <- matrix(c(4,2,0,2),2,2); V.2 <- V.2%*%t(V.2)
c.2 <- cbind(rmvn(m, mu.2, V.2), rep(2, m))
## Class 3.
mu.3 <- c(15, 60)
V.3 <- matrix(c(5,5,0,5),2,2); V.3 <- V.3%*%t(V.3)
c.3 <- cbind(rmvn(m, mu.3, V.3), rep(3, m))
c.all <- rbind(c.1, c.2, c.3)
max.x <- max(c.all[,1]); min.x <- min(c.all[,1])
max.y <- max(c.all[,2]); min.y <- min(c.all[,2])
## Check them out.
plot(c.1[,1], c.1[,2], xlim=c(min.x, max.x), ylim=c(min.y, max.y),
pch=19, cex=0.5,
col="blue", xlab="Variable 1", ylab="Variable 2")
points(c.2[,1], c.2[,2], pch=19, cex=0.5, col="green")
points(c.3[,1], c.3[,2], pch=19, cex=0.5, col="red")
## Take a reference sample.
n <- 2000
ref <- c.all[sample(1:nrow(c.all), n),]
## Compare search times
k <- 10
## Do a simple brute force search.
brute <- ann(ref=ref[,1:2], target=c.all[,1:2],
tree.type="brute", k=k, verbose=FALSE)
print(brute$searchTime)
## Do an exact kd-tree search.
kd.exact <- ann(ref=ref[,1:2], target=c.all[,1:2],
tree.type="kd", k=k, verbose=FALSE)
print(kd.exact$searchTime)
## Do an approximate kd-tree search.
kd.approx <- ann(ref=ref[,1:2], target=c.all[,1:2],
tree.type="kd", k=k, eps=100, verbose=FALSE)
print(kd.approx$searchTime)
## Takes too long to calculate for this many targets.
## Compare overall accuracy of the exact vs. approximate search
##knn.mode <- function(knn.indx, ref){
## x <- ref[knn.indx,]
## as.numeric(names(sort(as.matrix(table(x))[,1],
## decreasing=TRUE))[1])
##}
Run the code above in your browser using DataLab