# Generate 30 observations from a gamma distribution with
# parameters mean=10 and cv=1 and censor observations less than 5.
# Then test the hypothesis that these data came from a lognormal
# distribution (alternative parameterization) using the Shapiro-Wilk test.
#
# The p-value for the complete data is p = 0.056, while
# the p-value for the censored data is p = 0.11.
# (Note: the call to set.seed lets you reproduce this example.)
set.seed(598)
dat <- sort(rgammaAlt(30, mean = 10, cv = 1))
dat
# [1] 0.5313509 1.4741833 1.9936208 2.7980636 3.4509840
# [6] 3.7987348 4.5542952 5.5207531 5.5253596 5.7177872
#[11] 5.7513827 9.1086375 9.8444090 10.6247123 10.9304922
#[16] 11.7925398 13.3432689 13.9562777 14.6029065 15.0563342
#[21] 15.8730642 16.0039936 16.6910715 17.0288922 17.8507891
#[26] 19.1105522 20.2657141 26.3815970 30.2912797 42.8726101
dat.censored <- dat
censored <- dat.censored < 5
dat.censored[censored] <- 5
# Results for complete data:
#---------------------------
gofTest(dat, test = "sw", dist = "lnormAlt")
#Results of Goodness-of-Fit Test
#-------------------------------
#
#Test Method: Shapiro-Wilk GOF
#
#Hypothesized Distribution: Lognormal
#
#Estimated Parameter(s): mean = 13.757239
# cv = 1.148872
#
#Estimation Method: mvue
#
#Data: dat
#
#Sample Size: 30
#
#Test Statistic: W = 0.9322226
#
#Test Statistic Parameter: n = 30
#
#P-value: 0.05626823
#
#Alternative Hypothesis: True cdf does not equal the
# Lognormal Distribution.
# Results for censored data:
#---------------------------
gof.list <- gofTestCensored(dat.censored, censored, test = "sw",
distribution = "lnormAlt")
gof.list
#Results of Goodness-of-Fit Test
#Based on Type I Censored Data
#-------------------------------
#
#Test Method: Shapiro-Wilk GOF
# (Singly Censored Data)
#
#Hypothesized Distribution: Lognormal
#
#Censoring Side: left
#
#Censoring Level(s): 5
#
#Estimated Parameter(s): mean = 13.0382221
# cv = 0.9129512
#
#Estimation Method: MLE
#
#Data: dat.censored
#
#Censoring Variable: censored
#
#Sample Size: 30
#
#Percent Censored: 23.3%
#
#Test Statistic: W = 0.9292406
#
#Test Statistic Parameters: N = 30.0000000
# DELTA = 0.2333333
#
#P-value: 0.114511
#
#Alternative Hypothesis: True cdf does not equal the
# Lognormal Distribution.
# Plot the results for the censored data
#---------------------------------------
dev.new()
plot(gof.list)
#----------
# Redo the above example, but specify the quasi-minimum variance
# unbiased estimator of the mean. Note that the method of
# estimating the parameters has no effect on the goodness-of-fit
# test (see the DETAILS section above).
gofTestCensored(dat.censored, censored, test = "sw",
distribution = "lnormAlt", est.arg.list = list(method = "qmvue"))
#Results of Goodness-of-Fit Test
#Based on Type I Censored Data
#-------------------------------
#
#Test Method: Shapiro-Wilk GOF
# (Singly Censored Data)
#
#Hypothesized Distribution: Lognormal
#
#Censoring Side: left
#
#Censoring Level(s): 5
#
#Estimated Parameter(s): mean = 12.8722749
# cv = 0.8712549
#
#Estimation Method: Quasi-MVUE
#
#Data: dat.censored
#
#Censoring Variable: censored
#
#Sample Size: 30
#
#Percent Censored: 23.3%
#
#Test Statistic: W = 0.9292406
#
#Test Statistic Parameters: N = 30.0000000
# DELTA = 0.2333333
#
#P-value: 0.114511
#
#Alternative Hypothesis: True cdf does not equal the
# Lognormal Distribution.
#----------
# Clean up
rm(dat, dat.censored, censored, gof.list)
graphics.off()
#==========
# Check the assumption that the silver data stored in Helsel.Cohn.88.silver.df
# follows a lognormal distribution and plot the goodness-of-fit test results.
# Note that the the small p-value and the shape of the Q-Q plot
# (an inverted S-shape) suggests that the log transformation is not quite strong
# enough to "bring in" the tails (i.e., the log-transformed silver data has tails
# that are slightly too long relative to a normal distribution).
# Helsel and Cohn (1988, p.2002) note that the gross outlier of 560 mg/L tends to
# make the shape of the data resemble a gamma distribution.
dum.list <- with(Helsel.Cohn.88.silver.df,
gofTestCensored(Ag, Censored, test = "sf", dist = "lnorm"))
dum.list
#Results of Goodness-of-Fit Test
#Based on Type I Censored Data
#-------------------------------
#
#Test Method: Shapiro-Francia GOF
# (Multiply Censored Data)
#
#Hypothesized Distribution: Lognormal
#
#Censoring Side: left
#
#Censoring Level(s): 0.1 0.2 0.3 0.5 1.0 2.0 2.5 5.0
# 6.0 10.0 20.0 25.0
#
#Estimated Parameter(s): meanlog = -1.040572
# sdlog = 2.354847
#
#Estimation Method: MLE
#
#Data: Ag
#
#Censoring Variable: Censored
#
#Sample Size: 56
#
#Percent Censored: 60.7%
#
#Test Statistic: W = 0.8957198
#
#Test Statistic Parameters: N = 56.0000000
# DELTA = 0.6071429
#
#P-value: 0.03490314
#
#Alternative Hypothesis: True cdf does not equal the
# Lognormal Distribution.
dev.new()
plot(dum.list)
#----------
# Clean up
#---------
rm(dum.list)
graphics.off()
#==========
# Chapter 15 of USEPA (2009) gives several examples of looking
# at normal Q-Q plots and estimating the mean and standard deviation
# for manganese concentrations (ppb) in groundwater at five background wells.
# In EnvStats these data are stored in the data frame
# EPA.09.Ex.15.1.manganese.df.
# Here we will test whether the data appear to come from a normal
# distribution, then we will test to see whether they appear to come
# from a lognormal distribution.
#--------------------------------------------------------------------
# First look at the data:
#-----------------------
EPA.09.Ex.15.1.manganese.df
# Sample Well Manganese.Orig.ppb Manganese.ppb Censored
#1 1 Well.1 <5 5.0 TRUE
#2 2 Well.1 12.1 12.1 FALSE
#3 3 Well.1 16.9 16.9 FALSE
#...
#23 3 Well.5 3.3 3.3 FALSE
#24 4 Well.5 8.4 8.4 FALSE
#25 5 Well.5 <2 2.0 TRUE
longToWide(EPA.09.Ex.15.1.manganese.df,
"Manganese.Orig.ppb", "Sample", "Well",
paste.row.name = TRUE)
# Well.1 Well.2 Well.3 Well.4 Well.5
#Sample.1 <5 <5 <5 6.3 17.9
#Sample.2 12.1 7.7 5.3 11.9 22.7
#Sample.3 16.9 53.6 12.6 10 3.3
#Sample.4 21.6 9.5 106.3 <2 8.4
#Sample.5 <2 45.9 34.5 77.2 <2
# Now test whether the data appear to come from
# a normal distribution. Note that these data
# are multiply censored, so we'll use the
# Shapiro-Francia test.
#----------------------------------------------
gof.normal <- with(EPA.09.Ex.15.1.manganese.df,
gofTestCensored(Manganese.ppb, Censored, test = "sf"))
gof.normal
#Results of Goodness-of-Fit Test
#Based on Type I Censored Data
#-------------------------------
#
#Test Method: Shapiro-Francia GOF
# (Multiply Censored Data)
#
#Hypothesized Distribution: Normal
#
#Censoring Side: left
#
#Censoring Level(s): 2 5
#
#Estimated Parameter(s): mean = 15.23508
# sd = 30.62812
#
#Estimation Method: MLE
#
#Data: Manganese.ppb
#
#Censoring Variable: Censored
#
#Sample Size: 25
#
#Percent Censored: 24%
#
#Test Statistic: W = 0.8368016
#
#Test Statistic Parameters: N = 25.00
# DELTA = 0.24
#
#P-value: 0.004662658
#
#Alternative Hypothesis: True cdf does not equal the
# Normal Distribution.
# Plot the results:
#------------------
dev.new()
plot(gof.normal)
#----------
# Now test to see whether the data appear to come from
# a lognormal distribuiton.
#-----------------------------------------------------
gof.lognormal <- with(EPA.09.Ex.15.1.manganese.df,
gofTestCensored(Manganese.ppb, Censored, test = "sf",
distribution = "lnorm"))
gof.lognormal
#Results of Goodness-of-Fit Test
#Based on Type I Censored Data
#-------------------------------
#
#Test Method: Shapiro-Francia GOF
# (Multiply Censored Data)
#
#Hypothesized Distribution: Lognormal
#
#Censoring Side: left
#
#Censoring Level(s): 2 5
#
#Estimated Parameter(s): meanlog = 2.215905
# sdlog = 1.356291
#
#Estimation Method: MLE
#
#Data: Manganese.ppb
#
#Censoring Variable: Censored
#
#Sample Size: 25
#
#Percent Censored: 24%
#
#Test Statistic: W = 0.9864426
#
#Test Statistic Parameters: N = 25.00
# DELTA = 0.24
#
#P-value: 0.9767731
#
#Alternative Hypothesis: True cdf does not equal the
# Lognormal Distribution.
# Plot the results:
#------------------
dev.new()
plot(gof.lognormal)
#----------
# Clean up
#---------
rm(gof.normal, gof.lognormal)
graphics.off()
Run the code above in your browser using DataLab