# NOT RUN {
x <- runif(200)
Lc <- LcKS(x, cdf = "pnorm", nreps = 999)
hist(Lc$D.sim)
abline(v = Lc$D.obs, lty = 2)
print(Lc, max = 50) # Print first 50 simulated statistics
# Approximate p-value (usually) << 0.05
# Confirmation uncorrected version has increased Type II error rate when
# using sample statistics to estimate parameters:
ks.test(x, "pnorm", mean(x), sd(x)) # p-value always larger, (usually) > 0.05
# Confirm critical values for normal distribution are correct
nreps <- 9999
x <- rnorm(25)
Lc <- LcKS(x, "pnorm", nreps = nreps)
sim.Ds <- sort(Lc$D.sim)
crit <- round(c(.8, .85, .9, .95, .99) * nreps, 0)
# Lilliefors' (1967) critical values, using improved values from
# Parsons & Wirsching (1982) (for n = 25):
# 0.141 0.148 0.157 0.172 0.201
round(sim.Ds[crit], 3) # Approximately the same critical values
# Confirm critical values for exponential are the same as reported by Lilliefors (1969)
nreps <- 9999
x <- rexp(25)
Lc <- LcKS(x, "pexp", nreps = nreps)
sim.Ds <- sort(Lc$D.sim)
crit <- round(c(.8, .85, .9, .95, .99) * nreps, 0)
# Lilliefors' (1969) critical values (for n = 25):
# 0.170 0.180 0.191 0.210 0.247
round(sim.Ds[crit], 3) # Approximately the same critical values
# }
# NOT RUN {
# Gamma and Weibull tests require functions from the 'MASS' package
# Takes time for maximum likelihood optimization of statistics
require(MASS)
x <- runif(100, min = 1, max = 100)
Lc <- LcKS(x, cdf = "pgamma", nreps = 499)
Lc$p.value
# Confirm critical values for Weibull the same as reported by Parsons & Wirsching (1982)
nreps <- 9999
x <- rweibull(25, shape = 1, scale = 1)
Lc <- LcKS(x, "pweibull", nreps = nreps)
sim.Ds <- sort(Lc$D.sim)
crit <- round(c(.8, .85, .9, .95, .99) * nreps, 0)
# Parsons & Wirsching (1982) critical values (for n = 25):
# 0.141 0.148 0.157 0.172 0.201
round(sim.Ds[crit], 3) # Approximately the same critical values
# Mixture test requires functions from the 'mclust' package
# Takes time to identify model parameters
require(mclust)
x <- rmixnorm(200, mean = c(10, 20), sd = 2, pro = c(1,3))
Lc <- LcKS(x, cdf = "pmixnorm", nreps = 499, G = 1:9) # Default G (1:9) takes long time
Lc$p.value
G <- Mclust(x)$parameters$variance$G # Optimal model has only two components
Lc <- LcKS(x, cdf = "pmixnorm", nreps = 499, G = G) # Restricting to likely G saves time
# But note changes null hypothesis: now testing against just two-component mixture
Lc$p.value
# Running 'in parallel'
require(doParallel)
set.seed(3124)
x <- rmixnorm(300, mean = c(110, 190, 200), sd = c(3, 15, .1), pro = c(1, 3, 1))
system.time(LcKS(x, "pgamma"))
system.time(LcKS(x, "pgamma", parallel = TRUE)) # Should be faster
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab