d = 2
set.seed(1)
X1 = matrix(rnorm(100*d), nrow = 100, ncol = d)
X2 = matrix(rnorm(100*d,sd=sqrt(1.5)), nrow = 100, ncol = d)
X3 = matrix(rnorm(100*d,sd=sqrt(2)), nrow = 100, ncol = d)
X = rbind(X1,X2,X3)
Y = c(rep(1,100),rep(2,100),rep(3,100))
print(KMD_test(X, Y, M = 3, Knn = 1, Kernel = "discrete"))
# A small p-value since the three distributions are not the same.
print(KMD_test(X, Y, M = 3, Knn = 1, Kernel = "discrete", Permutation = FALSE))
# p-value of the asymptotic test is similar to that of the permutation test
print(KMD_test(X, Y, M = 3, Knn = 1, Kernel = diag(c(10,1,1))))
# p-value is improved by using a different kernel
print(KMD_test(X, Y, M = 3, Knn = 30, Kernel = "discrete"))
# The suggested choice Knn = 0.1n yields a very small p-value.
print(KMD_test(X, Y, M = 3, Knn = "MST", Kernel = "discrete"))
# One can also use the MST.
print(KMD_test(X, Y, M = 3, Knn = 2, Kernel = "discrete"))
# MST has similar performance as 2-NN, which is between 1-NN and 30-NN
# Check null distribution of the z values
ni = 100
n = 3*ni
d = 2
Null_KMD = function(id){
set.seed(id)
X = matrix(rnorm(n*d), nrow = n, ncol = d)
Y = c(rep(1,ni),rep(2,ni),rep(3,ni))
return(KMD_test(X, Y, M = 3, Knn = "MST", Kernel = "discrete", Permutation = FALSE)[1,1])
}
hist(sapply(1:500, Null_KMD), breaks = c(-Inf,seq(-5,5,length=50),Inf), freq = FALSE,
xlim = c(-4,4), ylim = c(0,0.5), main = expression(paste(n[i]," = 100")),
xlab = expression(paste("normalized ",hat(eta))))
lines(seq(-5,5,length=1000),dnorm(seq(-5,5,length=1000)),col="red")
# The histogram of the normalized KMD is close to that of a standard normal distribution.
Run the code above in your browser using DataLab