# NOT RUN {
# }
# NOT RUN {
library(JRM)
######################################################################
## Generate data
## Correlation between the two equations and covariate correlation 0.5
## Sample size 2000
######################################################################
set.seed(0)
n <- 2000
rh <- 0.5
sigmau <- matrix(c(1, rh, rh, 1), 2, 2)
u <- rMVN(n, rep(0,2), sigmau)
sigmac <- matrix(rh, 3, 3); diag(sigmac) <- 1
cov <- rMVN(n, rep(0,3), sigmac)
cov <- pnorm(cov)
bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + u[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + u[, 2]
yo <- y*(ys > 0)
dataSim <- data.frame(ys, yo, bi, x1, x2)
## CLASSIC SAMPLE SELECTION MODEL
## the first equation MUST be the selection equation
resp.check(yo[ys > 0], "N")
out <- copulaSampleSel(list(ys ~ bi + x1 + x2,
yo ~ bi + x1),
data = dataSim)
conv.check(out)
post.check(out)
summary(out)
AIC(out)
BIC(out)
## SEMIPARAMETRIC SAMPLE SELECTION MODEL
## "cr" cubic regression spline basis - "cs" shrinkage version of "cr"
## "tp" thin plate regression spline basis - "ts" shrinkage version of "tp"
## for smooths of one variable, "cr/cs" and "tp/ts" achieve similar results
## k is the basis dimension - default is 10
## m is the order of the penalty for the specific term - default is 2
out <- copulaSampleSel(list(ys ~ bi + s(x1, bs = "tp", k = 10, m = 2) + s(x2),
yo ~ bi + s(x1)),
data = dataSim)
conv.check(out)
post.check(out)
AIC(out)
## compare the two summary outputs
## the second output produces a summary of the results obtained when only
## the outcome equation is fitted, i.e. selection bias is not accounted for
summary(out)
summary(out$gam2)
## estimated smooth function plots
## the red line is the true curve
## the blue line is the naive curve not accounting for selection bias
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)] - mean(f21(x1.s))
plot(out, eq = 2, ylim = c(-1, 0.8)); lines(x1.s, f21.x1, col = "red")
par(new = TRUE)
plot(out$gam2, se = FALSE, lty = 3, lwd = 2, ylim = c(-1, 0.8),
ylab = "", rug = FALSE)
## IMPUTE MISSING VALUES
n.m <- 10
res <- imputeSS(out, n.m)
bet <- NA
for(i in 1:n.m){
dataSim$yo[dataSim$ys == 0] <- res[[i]]
outg <- gamlss(list(yo ~ bi + s(x1)), data = dataSim)
bet[i] <- coef(outg)["bi"]
print(i)
}
mean(bet)
##
## SEMIPARAMETRIC SAMPLE SELECTION MODEL with association
## and dispersion parameters
## depending on covariates as well
eq.mu.1 <- ys ~ bi + s(x1) + s(x2)
eq.mu.2 <- yo ~ bi + s(x1)
eq.sigma2 <- ~ bi
eq.theta <- ~ bi + x1
fl <- list(eq.mu.1, eq.mu.2, eq.sigma2, eq.theta)
out <- copulaSampleSel(fl, data = dataSim)
conv.check(out)
post.check(out)
summary(out)
out$sigma2
out$theta
jc.probs(out, 0, 0.3, intervals = TRUE)[1:4,]
outC0 <- copulaSampleSel(fl, data = dataSim, BivD = "C0")
conv.check(outC0)
post.check(outC0)
AIC(out, outC0)
BIC(out, outC0)
## IMPUTE MISSING VALUES
n.m <- 10
res <- imputeSS(outC0, n.m)
#
#
#######################################################
## example using Gumbel copula and normal-gamma margins
#######################################################
set.seed(1)
y <- exp(-0.68 - 1.5*bi + f21(x1) + u[, 2])
yo <- y*(ys > 0)
dataSim <- data.frame(ys, yo, bi, x1, x2)
out <- copulaSampleSel(list(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1)),
data = dataSim, BivD = "G0",
margins = c("probit", "GA"))
conv.check(out)
post.check(out)
summary(out)
ATE <- NA
n.m <- 10
res <- imputeSS(out, n.m)
for(i in 1:n.m){
dataSim$yo[dataSim$ys == 0] <- res[[i]]
outg <- gamlss(list(yo ~ bi + s(x1)), margin = "GA", data = dataSim)
out$gamlss <- outg
ATE[i] <- AT(out, nm.end = "bi", type = "univariate")$res[2]
print(i)
}
AT(out, nm.end = "bi")
mean(ATE)
#
#
# }
Run the code above in your browser using DataLab