# Generate 20 observations from a gamma distribution with
# parameters shape = 2 and scale = 3 and:
#
# 1) Call distChoose using the Shapiro-Wilk method.
#
# 2) Call distChoose using the Shapiro-Wilk method and specify
# the bias-corrected method of estimating shape for the Gamma
# distribution.
#
# 3) Compare the results in 2) above with the results using the
# ProUCL method.
#
# Notes: The call to set.seed lets you reproduce this example.
#
# The ProUCL method chooses the Normal distribution, whereas the
# Shapiro-Wilk method chooses the Gamma distribution.
set.seed(47)
dat <- rgamma(20, shape = 2, scale = 3)
# 1) Call distChoose using the Shapiro-Wilk method.
#--------------------------------------------------
distChoose(dat)
#Results of Choosing Distribution
#--------------------------------
#
#Candidate Distributions: Normal
# Gamma
# Lognormal
#
#Choice Method: Shapiro-Wilk
#
#Type I Error per Test: 0.05
#
#Decision: Gamma
#
#Estimated Parameter(s): shape = 1.909462
# scale = 4.056819
#
#Estimation Method: MLE
#
#Data: dat
#
#Sample Size: 20
#
#Test Results:
#
# Normal
# Test Statistic: W = 0.9097488
# P-value: 0.06303695
#
# Gamma
# Test Statistic: W = 0.9834958
# P-value: 0.970903
#
# Lognormal
# Test Statistic: W = 0.9185006
# P-value: 0.09271768
#--------------------
# 2) Call distChoose using the Shapiro-Wilk method and specify
# the bias-corrected method of estimating shape for the Gamma
# distribution.
#---------------------------------------------------------------
distChoose(dat, method = "sw",
est.arg.list = list(gamma = list(method = "bcmle")))
#Results of Choosing Distribution
#--------------------------------
#
#Candidate Distributions: Normal
# Gamma
# Lognormal
#
#Choice Method: Shapiro-Wilk
#
#Type I Error per Test: 0.05
#
#Decision: Gamma
#
#Estimated Parameter(s): shape = 1.656376
# scale = 4.676680
#
#Estimation Method: Bias-Corrected MLE
#
#Data: dat
#
#Sample Size: 20
#
#Test Results:
#
# Normal
# Test Statistic: W = 0.9097488
# P-value: 0.06303695
#
# Gamma
# Test Statistic: W = 0.9834346
# P-value: 0.9704046
#
# Lognormal
# Test Statistic: W = 0.9185006
# P-value: 0.09271768
#--------------------
# 3) Compare the results in 2) above with the results using the
# ProUCL method.
#---------------------------------------------------------------
distChoose(dat, method = "proucl")
#Results of Choosing Distribution
#--------------------------------
#
#Candidate Distributions: Normal
# Gamma
# Lognormal
#
#Choice Method: ProUCL
#
#Type I Error per Test: 0.05
#
#Decision: Normal
#
#Estimated Parameter(s): mean = 7.746340
# sd = 5.432175
#
#Estimation Method: mvue
#
#Data: dat
#
#Sample Size: 20
#
#Test Results:
#
# Normal
# Shapiro-Wilk GOF
# Test Statistic: W = 0.9097488
# P-value: 0.06303695
# Lilliefors (Kolmogorov-Smirnov) GOF
# Test Statistic: D = 0.1547851
# P-value: 0.238092
#
# Gamma
# ProUCL Anderson-Darling Gamma GOF
# Test Statistic: A = 0.1853826
# P-value: >= 0.10
# ProUCL Kolmogorov-Smirnov Gamma GOF
# Test Statistic: D = 0.0988692
# P-value: >= 0.10
#
# Lognormal
# Shapiro-Wilk GOF
# Test Statistic: W = 0.9185006
# P-value: 0.09271768
# Lilliefors (Kolmogorov-Smirnov) GOF
# Test Statistic: D = 0.149317
# P-value: 0.2869177
#--------------------
# Clean up
#---------
rm(dat)
#====================================================================
# Example 10-2 of USEPA (2009, page 10-14) gives an example of
# using the Shapiro-Wilk test to test the assumption of normality
# for nickel concentrations (ppb) in groundwater collected over
# 4 years. The data for this example are stored in
# EPA.09.Ex.10.1.nickel.df.
EPA.09.Ex.10.1.nickel.df
# Month Well Nickel.ppb
#1 1 Well.1 58.8
#2 3 Well.1 1.0
#3 6 Well.1 262.0
#4 8 Well.1 56.0
#5 10 Well.1 8.7
#6 1 Well.2 19.0
#7 3 Well.2 81.5
#8 6 Well.2 331.0
#9 8 Well.2 14.0
#10 10 Well.2 64.4
#11 1 Well.3 39.0
#12 3 Well.3 151.0
#13 6 Well.3 27.0
#14 8 Well.3 21.4
#15 10 Well.3 578.0
#16 1 Well.4 3.1
#17 3 Well.4 942.0
#18 6 Well.4 85.6
#19 8 Well.4 10.0
#20 10 Well.4 637.0
# Use distChoose with the probability plot correlation method,
# and for the lognormal distribution specify the
# mean and CV parameterization:
#------------------------------------------------------------
distChoose(Nickel.ppb ~ 1, data = EPA.09.Ex.10.1.nickel.df,
choices = c("norm", "gamma", "lnormAlt"), method = "ppcc")
#Results of Choosing Distribution
#--------------------------------
#
#Candidate Distributions: Normal
# Gamma
# Lognormal
#
#Choice Method: PPCC
#
#Type I Error per Test: 0.05
#
#Decision: Lognormal
#
#Estimated Parameter(s): mean = 213.415628
# cv = 2.809377
#
#Estimation Method: mvue
#
#Data: Nickel.ppb
#
#Data Source: EPA.09.Ex.10.1.nickel.df
#
#Sample Size: 20
#
#Test Results:
#
# Normal
# Test Statistic: r = 0.8199825
# P-value: 5.753418e-05
#
# Gamma
# Test Statistic: r = 0.9749044
# P-value: 0.317334
#
# Lognormal
# Test Statistic: r = 0.9912528
# P-value: 0.9187852
#--------------------
# Repeat the above example using the ProUCL method.
#--------------------------------------------------
distChoose(Nickel.ppb ~ 1, data = EPA.09.Ex.10.1.nickel.df,
method = "proucl")
#Results of Choosing Distribution
#--------------------------------
#
#Candidate Distributions: Normal
# Gamma
# Lognormal
#
#Choice Method: ProUCL
#
#Type I Error per Test: 0.05
#
#Decision: Gamma
#
#Estimated Parameter(s): shape = 0.5198727
# scale = 326.0894272
#
#Estimation Method: MLE
#
#Data: Nickel.ppb
#
#Data Source: EPA.09.Ex.10.1.nickel.df
#
#Sample Size: 20
#
#Test Results:
#
# Normal
# Shapiro-Wilk GOF
# Test Statistic: W = 0.6788888
# P-value: 2.17927e-05
# Lilliefors (Kolmogorov-Smirnov) GOF
# Test Statistic: D = 0.3267052
# P-value: 5.032807e-06
#
# Gamma
# ProUCL Anderson-Darling Gamma GOF
# Test Statistic: A = 0.5076725
# P-value: >= 0.10
# ProUCL Kolmogorov-Smirnov Gamma GOF
# Test Statistic: D = 0.1842904
# P-value: >= 0.10
#
# Lognormal
# Shapiro-Wilk GOF
# Test Statistic: W = 0.978946
# P-value: 0.9197735
# Lilliefors (Kolmogorov-Smirnov) GOF
# Test Statistic: D = 0.08405167
# P-value: 0.9699648
#====================================================================
if (FALSE) {
# 1) Simulate 1000 trials where for each trial you:
# a) Generate 20 observations from a Gamma distribution with
# parameters mean = 10 and CV = 1.
# b) Use distChoose with the Shapiro-Wilk method.
# c) Use distChoose with the ProUCL method.
#
# 2) Compare the proportion of times the
# Normal vs. Gamma vs. Lognormal vs. Nonparametric distribution
# is chosen for b) and c) above.
#------------------------------------------------------------------
set.seed(58)
N <- 1000
Choose.fac <- factor(rep("", N), levels = c("Normal", "Gamma", "Lognormal", "Nonparametric"))
Choose.df <- data.frame(SW = Choose.fac, ProUCL = Choose.fac)
for(i in 1:N) {
dat <- rgammaAlt(20, mean = 10, cv = 1)
Choose.df[i, "SW"] <- distChoose(dat, method = "sw")$decision
Choose.df[i, "ProUCL"] <- distChoose(dat, method = "proucl")$decision
}
summaryStats(Choose.df, digits = 0)
# ProUCL(N) ProUCL(Pct) SW(N) SW(Pct)
#Normal 443 44 41 4
#Gamma 546 55 733 73
#Lognormal 9 1 215 22
#Nonparametric 2 0 11 1
#Combined 1000 100 1000 100
#--------------------
# Repeat above example for the Lognormal Distribution with mean=10 and CV = 1.
#-----------------------------------------------------------------------------
set.seed(297)
N <- 1000
Choose.fac <- factor(rep("", N), levels = c("Normal", "Gamma", "Lognormal", "Nonparametric"))
Choose.df <- data.frame(SW = Choose.fac, ProUCL = Choose.fac)
for(i in 1:N) {
dat <- rlnormAlt(20, mean = 10, cv = 1)
Choose.df[i, "SW"] <- distChoose(dat, method = "sw")$decision
Choose.df[i, "ProUCL"] <- distChoose(dat, method = "proucl")$decision
}
summaryStats(Choose.df, digits = 0)
# ProUCL(N) ProUCL(Pct) SW(N) SW(Pct)
#Normal 313 31 15 2
#Gamma 556 56 254 25
#Lognormal 121 12 706 71
#Nonparametric 10 1 25 2
#Combined 1000 100 1000 100
#--------------------
# Clean up
#---------
rm(N, Choose.fac, Choose.df, i, dat)
}
Run the code above in your browser using DataLab