# Basic examples
# BM vs POT
# Plotting options
# weighted mean based on Goodness of fit (GOF)
# Effect of data proportion used to estimate GOF
# compare extremeStat with other packages
library(lmomco)
library(berryFunctions)
data(annMax) # annual streamflow maxima in river in Austria
# Basic examples ---------------------------------------------------------------
dlf <- distLextreme(annMax)
plotLextreme(dlf, log=TRUE)
plotLextreme(dlf, log="xy")
plotLextreme(dlf)
# Object structure:
str(dlf, max.lev=2)
printL(dlf)
# discharge levels for default return periods:
dlf$returnlev
# Estimate discharge that could occur every 80 years (at least empirically):
Q80 <- distLextreme(dlf=dlf, RPs=80)$returnlev
round(sort(Q80[1:17,1]),1)
# 99 to 143 m^3/s can make a relevant difference in engineering!
# That's why the rows weighted by GOF are helpful. Weights are given as in
plotLweights(dlf) # See also section weighted mean below
# For confidence intervals see ?distLexBoot
# Return period of a given discharge value, say 120 m^3/s:
round0(sort(1/(1-sapply(dlf$parameter, plmomco, x=120) ) ),1)
# exponential: every 29 years
# gev (general extreme value dist): 59,
# Weibull: every 73 years only
# BM vs POT --------------------------------------------------------------------
# Return levels by Block Maxima approach vs Peak Over Threshold approach:
# BM distribution theoretically converges to GEV, POT to GPD
data(rain, package="ismev")
days <- seq(as.Date("1914-01-01"), as.Date("1961-12-30"), by="days")
BM <- tapply(rain, format(days,"%Y"), max) ; rm(days)
dlfBM <- plotLextreme(distLextreme(BM, emp=FALSE), ylim=lim0(100), log=TRUE, nbest=10)
plotLexBoot(distLexBoot(dlfBM, quiet=TRUE), ylim=lim0(100))
plotLextreme(dlfBM, log=TRUE, ylim=lim0(100))
dlfPOT99 <- distLextreme(rain, npy=365.24, trunc=0.99, emp=FALSE)
dlfPOT99 <- plotLextreme(dlfPOT99, ylim=lim0(100), log=TRUE, nbest=10, main="POT 99")
printL(dlfPOT99)
# using only nonzero values (normally yields better fits, but not here)
rainnz <- rain[rain>0]
dlfPOT99nz <- distLextreme(rainnz, npy=length(rainnz)/48, trunc=0.99, emp=FALSE)
dlfPOT99nz <- plotLextreme(dlfPOT99nz, ylim=lim0(100), log=TRUE, nbest=10,
main=paste("POT 99 x>0, npy =", round(dlfPOT99nz$npy,2)))
if (FALSE) ## Excluded from CRAN R CMD check because of computing time
dlfPOT99boot <- distLexBoot(dlfPOT99, prop=0.4)
printL(dlfPOT99boot)
plotLexBoot(dlfPOT99boot)
dlfPOT90 <- distLextreme(rain, npy=365.24, trunc=0.90, emp=FALSE)
dlfPOT90 <- plotLextreme(dlfPOT90, ylim=lim0(100), log=TRUE, nbest=10, main="POT 90")
dlfPOT50 <- distLextreme(rain, npy=365.24, trunc=0.50, emp=FALSE)
dlfPOT50 <- plotLextreme(dlfPOT50, ylim=lim0(100), log=TRUE, nbest=10, main="POT 50")
ig99 <- ismev::gpd.fit(rain, dlfPOT99$threshold)
ismev::gpd.diag(ig99); title(main=paste(99, ig99$threshold))
if (FALSE) {
ig90 <- ismev::gpd.fit(rain, dlfPOT90$threshold)
ismev::gpd.diag(ig90); title(main=paste(90, ig90$threshold))
ig50 <- ismev::gpd.fit(rain, dlfPOT50$threshold)
ismev::gpd.diag(ig50); title(main=paste(50, ig50$threshold))
}
# Plotting options -------------------------------------------------------------
plotLextreme(dlf=dlf)
# Line colors / select distributions to be plotted:
plotLextreme(dlf, nbest=17, distcols=heat.colors(17), lty=1:5) # lty is recycled
plotLextreme(dlf, selection=c("gev", "gam", "gum"), distcols=4:6, PPcol=3, lty=3:2)
plotLextreme(dlf, selection=c("gpa","glo","wei","exp"), pch=c(NA,NA,6,8),
order=TRUE, cex=c(1,0.6, 1,1), log=TRUE, PPpch=c(16,NA), n_pch=20)
# use n_pch to say how many points are drawn per line (important for linear axis)
plotLextreme(dlf, legarg=list(cex=0.5, x="bottom", box.col="red", col=3))
# col in legarg list is (correctly) ignored
if (FALSE) {
## Excluded from package R CMD check because it's time consuming
plotLextreme(dlf, PPpch=c(1,NA)) # only Weibull plotting positions
# add different dataset to existing plot:
distLextreme(Nile/15, add=TRUE, PPpch=NA, distcols=1, selection="wak", legend=FALSE)
# Logarithmic axis
plotLextreme(distLextreme(Nile), log=TRUE, nbest=8)
# weighted mean based on Goodness of fit (GOF) ---------------------------------
# Add discharge weighted average estimate continuously:
plotLextreme(dlf, nbest=17, legend=FALSE)
abline(h=115.6, v=50)
RP <- seq(1, 70, len=100)
DischargeEstimate <- distLextreme(dlf=dlf, RPs=RP, plot=FALSE)$returnlev
lines(RP, DischargeEstimate["weighted2",], lwd=3, col="orange")
# Or, on log scale:
plotLextreme(dlf, nbest=17, legend=FALSE, log=TRUE)
abline(h=115.9, v=50)
RP <- unique(round(logSpaced(min=1, max=70, n=200, plot=FALSE),2))
DischargeEstimate <- distLextreme(dlf=dlf, RPs=RP)$returnlev
lines(RP, DischargeEstimate["weighted2",], lwd=5)
# Minima -----------------------------------------------------------------------
browseURL("https://nrfa.ceh.ac.uk/data/station/meanflow/39072")
qfile <- system.file("extdata/discharge39072.csv", package="berryFunctions")
Q <- read.table(qfile, skip=19, header=TRUE, sep=",", fill=TRUE)[,1:2]
rm(qfile)
colnames(Q) <- c("date","discharge")
Q$date <- as.Date(Q$date)
plot(Q, type="l")
Qmax <- tapply(Q$discharge, format(Q$date,"%Y"), max)
plotLextreme(distLextreme(Qmax, quiet=TRUE))
Qmin <- tapply(Q$discharge, format(Q$date,"%Y"), min)
dlf <- distLextreme(-Qmin, quiet=TRUE, RPs=c(2,5,10,20,50,100,200,500))
plotLextreme(dlf, ylim=c(0,-31), yaxs="i", yaxt="n", ylab="Q annual minimum", nbest=14)
axis(2, -(0:3*10), 0:3*10, las=1)
-dlf$returnlev[c(1:14,21), ]
# Some distribution functions are an obvious bad choice for this, so I use
# weighted 3: Values weighted by GOF of dist only for the best half.
# For the Thames in Windsor, we will likely always have > 9 m^3/s streamflow
# compare extremeStat with other packages: ---------------------------------------
library(extRemes)
plot(fevd(annMax))
par(mfrow=c(1,1))
return.level(fevd(annMax, type="GEV")) # "GP", "PP", "Gumbel", "Exponential"
distLextreme(dlf=dlf, RPs=c(2,20,100))$returnlev["gev",]
# differences are small, but noticeable...
# if you have time for a more thorough control, please pass me the results!
# yet another dataset for testing purposes:
Dresden_AnnualMax <- c(403, 468, 497, 539, 542, 634, 662, 765, 834, 847, 851, 873,
885, 983, 996, 1020, 1028, 1090, 1096, 1110, 1173, 1180, 1180,
1220, 1270, 1285, 1329, 1360, 1360, 1387, 1401, 1410, 1410, 1456,
1556, 1580, 1610, 1630, 1680, 1734, 1740, 1748, 1780, 1800, 1820,
1896, 1962, 2000, 2010, 2238, 2270, 2860, 4500)
plotLextreme(distLextreme(Dresden_AnnualMax))
} # end dontrun
Run the code above in your browser using DataLab