# NOT RUN {
## The example takes a couple of minutes to run...
# }
# NOT RUN {
require(mgcv)
par(mfrow=c(4,4),mar=c(4,4,1,1))
for (sim in c(1,7)) { ## cycle over uncorrelated and correlated covariates
n <- 350;set.seed(2)
## simulate data but randomly drop 300 covariate measurements
## leaving only 50 complete cases...
dat <- gamSim(sim,n=n,scale=3) ## 1 or 7
drop <- sample(1:n,300) ## to
for (i in 2:5) dat[drop[1:75+(i-2)*75],i] <- NA
## process data.frame producing binary indicators of missingness,
## mx0, mx1 etc. For each missing value create a level of a factor
## idx0, idx1, etc. So idx0 has as many levels as x0 has missing
## values. Replace the NA's in each variable by the mean of the
## non missing for that variable...
dname <- names(dat)[2:5]
dat1 <- dat
for (i in 1:4) {
by.name <- paste("m",dname[i],sep="")
dat1[[by.name]] <- is.na(dat1[[dname[i]]])
dat1[[dname[i]]][dat1[[by.name]]] <- mean(dat1[[dname[i]]],na.rm=TRUE)
lev <- rep(1,n);lev[dat1[[by.name]]] <- 1:sum(dat1[[by.name]])
id.name <- paste("id",dname[i],sep="")
dat1[[id.name]] <- factor(lev)
dat1[[by.name]] <- as.numeric(dat1[[by.name]])
}
## Fit a gam, in which any missing value contributes zero
## to the linear predictor from its smooth, but each
## missing has its own random effect, with the random effect
## variances being specific to the variable. e.g.
## for s(x0,by=ordered(!mx0)), declaring the `by' as an ordered
## factor ensures that the smooth is centred, but multiplied
## by zero when mx0 is one (indicating a missing x0). This means
## that any value (within range) can be put in place of the
## NA for x0. s(idx0,bs="re",by=mx0) produces a separate Gaussian
## random effect for each missing value of x0 (in place of s(x0),
## effectively). The `by' variable simply sets the random effect to
## zero when x0 is non-missing, so that we can set idx0 to any
## existing level for these cases.
b <- gam(y~s(x0,by=ordered(!mx0))+s(x1,by=ordered(!mx1))+
s(x2,by=ordered(!mx2))+s(x3,by=ordered(!mx3))+
s(idx0,bs="re",by=mx0)+s(idx1,bs="re",by=mx1)+
s(idx2,bs="re",by=mx2)+s(idx3,bs="re",by=mx3)
,data=dat1,method="REML")
for (i in 1:4) plot(b,select=i) ## plot the smooth effects from b
## fit the model to the `complete case' data...
b2 <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),data=dat,method="REML")
plot(b2) ## plot the complete case results
}
# }
Run the code above in your browser using DataLab