# NOT RUN {
## see also examples in ?gam.models (e.g. 'by' variables,
## random effects and tricks for large binary datasets)
library(mgcv)
set.seed(2) ## simulate some data...
dat <- gamSim(1,n=400,dist="normal",scale=2)
b <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),data=dat)
summary(b)
plot(b,pages=1,residuals=TRUE) ## show partial residuals
plot(b,pages=1,seWithMean=TRUE) ## `with intercept' CIs
## run some basic model checks, including checking
## smoothing basis dimensions...
gam.check(b)
## same fit in two parts .....
G <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),fit=FALSE,data=dat)
b <- gam(G=G)
print(b)
## 2 part fit enabling manipulation of smoothing parameters...
G <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),fit=FALSE,data=dat,sp=b$sp)
G$lsp0 <- log(b$sp*10) ## provide log of required sp vec
gam(G=G) ## it's smoother
## change the smoothness selection method to REML
b0 <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),data=dat,method="REML")
## use alternative plotting scheme, and way intervals include
## smoothing parameter uncertainty...
plot(b0,pages=1,scheme=1,unconditional=TRUE)
## Would a smooth interaction of x0 and x1 be better?
## Use tensor product smooth of x0 and x1, basis
## dimension 49 (see ?te for details, also ?t2).
bt <- gam(y~te(x0,x1,k=7)+s(x2)+s(x3),data=dat,
method="REML")
plot(bt,pages=1)
plot(bt,pages=1,scheme=2) ## alternative visualization
AIC(b0,bt) ## interaction worse than additive
## Alternative: test for interaction with a smooth ANOVA
## decomposition (this time between x2 and x1)
bt <- gam(y~s(x0)+s(x1)+s(x2)+s(x3)+ti(x1,x2,k=6),
data=dat,method="REML")
summary(bt)
## If it is believed that x0 and x1 are naturally on
## the same scale, and should be treated isotropically
## then could try...
bs <- gam(y~s(x0,x1,k=40)+s(x2)+s(x3),data=dat,
method="REML")
plot(bs,pages=1)
AIC(b0,bt,bs) ## additive still better.
## Now do automatic terms selection as well
b1 <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),data=dat,
method="REML",select=TRUE)
plot(b1,pages=1)
## set the smoothing parameter for the first term, estimate rest ...
bp <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),sp=c(0.01,-1,-1,-1),data=dat)
plot(bp,pages=1,scheme=1)
## alternatively...
bp <- gam(y~s(x0,sp=.01)+s(x1)+s(x2)+s(x3),data=dat)
# set lower bounds on smoothing parameters ....
bp<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),
min.sp=c(0.001,0.01,0,10),data=dat)
print(b);print(bp)
# same with REML
bp<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),
min.sp=c(0.1,0.1,0,10),data=dat,method="REML")
print(b0);print(bp)
## now a GAM with 3df regression spline term & 2 penalized terms
b0 <- gam(y~s(x0,k=4,fx=TRUE,bs="tp")+s(x1,k=12)+s(x2,k=15),data=dat)
plot(b0,pages=1)
# }
# NOT RUN {
## now simulate poisson data...
set.seed(6)
dat <- gamSim(1,n=2000,dist="poisson",scale=.1)
## use "cr" basis to save time, with 2000 data...
b2<-gam(y~s(x0,bs="cr")+s(x1,bs="cr")+s(x2,bs="cr")+
s(x3,bs="cr"),family=poisson,data=dat,method="REML")
plot(b2,pages=1)
## drop x3, but initialize sp's from previous fit, to
## save more time...
b2a<-gam(y~s(x0,bs="cr")+s(x1,bs="cr")+s(x2,bs="cr"),
family=poisson,data=dat,method="REML",
in.out=list(sp=b2$sp[1:3],scale=1))
par(mfrow=c(2,2))
plot(b2a)
par(mfrow=c(1,1))
## similar example using GACV...
dat <- gamSim(1,n=400,dist="poisson",scale=.25)
b4<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=poisson,
data=dat,method="GACV.Cp",scale=-1)
plot(b4,pages=1)
## repeat using REML as in Wood 2011...
b5<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=poisson,
data=dat,method="REML")
plot(b5,pages=1)
## a binary example (see ?gam.models for large dataset version)...
dat <- gamSim(1,n=400,dist="binary",scale=.33)
lr.fit <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=binomial,
data=dat,method="REML")
## plot model components with truth overlaid in red
op <- par(mfrow=c(2,2))
fn <- c("f0","f1","f2","f3");xn <- c("x0","x1","x2","x3")
for (k in 1:4) {
plot(lr.fit,residuals=TRUE,select=k)
ff <- dat[[fn[k]]];xx <- dat[[xn[k]]]
ind <- sort.int(xx,index.return=TRUE)$ix
lines(xx[ind],(ff-mean(ff))[ind]*.33,col=2)
}
par(op)
anova(lr.fit)
lr.fit1 <- gam(y~s(x0)+s(x1)+s(x2),family=binomial,
data=dat,method="REML")
lr.fit2 <- gam(y~s(x1)+s(x2),family=binomial,
data=dat,method="REML")
AIC(lr.fit,lr.fit1,lr.fit2)
## For a Gamma example, see ?summary.gam...
## For inverse Gaussian, see ?rig
## now 2D smoothing...
eg <- gamSim(2,n=500,scale=.1)
attach(eg)
op <- par(mfrow=c(2,2),mar=c(4,4,1,1))
contour(truth$x,truth$z,truth$f) ## contour truth
b4 <- gam(y~s(x,z),data=data) ## fit model
fit1 <- matrix(predict.gam(b4,pr,se=FALSE),40,40)
contour(truth$x,truth$z,fit1) ## contour fit
persp(truth$x,truth$z,truth$f) ## persp truth
vis.gam(b4) ## persp fit
detach(eg)
par(op)
##################################################
## largish dataset example with user defined knots
##################################################
par(mfrow=c(2,2))
n <- 5000
eg <- gamSim(2,n=n,scale=.5)
attach(eg)
ind<-sample(1:n,200,replace=FALSE)
b5<-gam(y~s(x,z,k=40),data=data,
knots=list(x=data$x[ind],z=data$z[ind]))
## various visualizations
vis.gam(b5,theta=30,phi=30)
plot(b5)
plot(b5,scheme=1,theta=50,phi=20)
plot(b5,scheme=2)
par(mfrow=c(1,1))
## and a pure "knot based" spline of the same data
b6<-gam(y~s(x,z,k=64),data=data,knots=list(x= rep((1:8-0.5)/8,8),
z=rep((1:8-0.5)/8,rep(8,8))))
vis.gam(b6,color="heat",theta=30,phi=30)
## varying the default large dataset behaviour via `xt'
b7 <- gam(y~s(x,z,k=40,xt=list(max.knots=500,seed=2)),data=data)
vis.gam(b7,theta=30,phi=30)
detach(eg)
# }
Run the code above in your browser using DataLab