library(SemiParBIVProbit)
############
## EXAMPLE 1
############
## Generate data
## Correlation between the two equations 0.5 - Sample size 400
set.seed(0)
n <- 400
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
x1 <- round(runif(n)); x2 <- runif(n); x3 <- runif(n)
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse(-0.25 - 1.25*x1 + f2(x2) + u[,2] > 0, 1, 0)
dataSim <- data.frame(y1, y2, x1, x2, x3)
#
#
## CLASSIC BIVARIATE PROBIT
out <- SemiParBIVProbit(list(y1 ~ x1 + x2 + x3,
y2 ~ x1 + x2 + x3),
data = dataSim)
conv.check(out)
summary(out)
AIC(out)
BIC(out)
## SEMIPARAMETRIC BIVARIATE PROBIT
## "cr" cubic regression spline basis - "cs" shrinkage version of "cr"
## "tp" thin plate regression spline basis - "ts" shrinkage version of "tp"
## for smooths of one variable, "cr/cs" and "tp/ts" achieve similar results
## k is the basis dimension - default is 10
## m is the order of the penalty for the specific term - default is 2
## For COPULA models use BivD argument
out <- SemiParBIVProbit(list(y1 ~ x1 + s(x2, bs = "tp", k = 10, m = 2) + s(x3),
y2 ~ x1 + s(x2) + s(x3)),
data = dataSim)
conv.check(out)
summary(out)
AIC(out)
## estimated smooth function plots - red lines are true curves
x2 <- sort(x2)
f1.x2 <- f1(x2)[order(x2)] - mean(f1(x2))
f2.x2 <- f2(x2)[order(x2)] - mean(f2(x2))
f3.x3 <- rep(0, length(x3))
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(out, eq = 1, select = 1, seWithMean = TRUE, scale = 0)
lines(x2, f1.x2, col = "red")
plot(out, eq = 1, select = 2, seWithMean = TRUE, scale = 0)
lines(x3, f3.x3, col = "red")
plot(out, eq = 2, select = 1, seWithMean = TRUE, scale = 0)
lines(x2, f2.x2, col = "red")
plot(out, eq = 2, select = 2, seWithMean = TRUE, scale = 0)
lines(x3, f3.x3, col = "red")
## p-values suggest to drop x3 from both equations, with a stronger
## evidence for eq. 2. This can be also achieved using shrinkage smoothers
outSS <- SemiParBIVProbit(list(y1 ~ x1 + s(x2, bs = "ts") + s(x3, bs = "cs"),
y2 ~ x1 + s(x2, bs = "cs") + s(x3, bs = "ts")),
data = dataSim)
conv.check(outSS)
plot(outSS, eq = 1, select = 1, scale = 0, shade = TRUE)
plot(outSS, eq = 1, select = 2, ylim = c(-0.1,0.1))
plot(outSS, eq = 2, select = 1, scale = 0, shade = TRUE)
plot(outSS, eq = 2, select = 2, ylim = c(-0.1,0.1))
## SEMIPARAMETRIC BIVARIATE PROBIT with association parameter
## depending on covariates as well
eq.mu.1 <- y1 ~ x1 + s(x2)
eq.mu.2 <- y2 ~ x1 + s(x2)
eq.theta <- ~ x1 + s(x2)
fl <- list(eq.mu.1, eq.mu.2, eq.theta)
outD <- SemiParBIVProbit(fl, data = dataSim)
conv.check(outD)
summary(outD)
outD$theta
plot(outD, eq = 1, seWithMean = TRUE)
plot(outD, eq = 2, seWithMean = TRUE)
plot(outD, eq = 3, seWithMean = TRUE)
graphics.off()
#
#
############
## EXAMPLE 2
############
## Generate data with one endogenous variable
## and exclusion restriction
set.seed(0)
n <- 400
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
cov <- rMVN(n, rep(0,2), Sigma)
cov <- pnorm(cov)
x1 <- round(cov[,1]); x2 <- cov[,2]
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse(-0.25 - 1.25*y1 + f2(x2) + u[,2] > 0, 1, 0)
dataSim <- data.frame(y1, y2, x1, x2)
#
## Testing the hypothesis of absence of endogeneity...
LM.bpm(list(y1 ~ x1 + s(x2), y2 ~ y1 + s(x2)), dataSim, Model = "B")
# p-value suggests presence of endogeneity, hence fit a bivariate model
## CLASSIC RECURSIVE BIVARIATE PROBIT
out <- SemiParBIVProbit(list(y1 ~ x1 + x2,
y2 ~ y1 + x2),
data = dataSim)
conv.check(out)
summary(out)
AIC(out); BIC(out)
## SEMIPARAMETRIC RECURSIVE BIVARIATE PROBIT
out <- SemiParBIVProbit(list(y1 ~ x1 + s(x2),
y2 ~ y1 + s(x2)),
data = dataSim)
conv.check(out)
summary(out)
AIC(out); BIC(out)
#
## Testing the hypothesis of absence of endogeneity post estimation...
gt.bpm(out)
#
## reatment effect, risk ratio and odds ratio with CIs
mb(y1, y2, Model = "B")
AT(out, nm.end = "y1", hd.plot = TRUE)
RR(out, nm.end = "y1")
OR(out, nm.end = "y1")
AT(out, nm.end = "y1", type = "univariate")
## try a Clayton copula model...
outC <- SemiParBIVProbit(list(y1 ~ x1 + s(x2),
y2 ~ y1 + s(x2)),
data = dataSim, BivD = "C0")
conv.check(outC)
summary(outC)
AT(outC, nm.end = "y1")
## try a Joe copula model...
outJ <- SemiParBIVProbit(list(y1 ~ x1 + s(x2),
y2 ~ y1 + s(x2)),
data = dataSim, BivD = "J0")
conv.check(outJ)
summary(outJ)
AT(outJ, "y1")
VuongClarke(out, outJ)
#
## recursive bivariate probit modelling with unpenalized splines
## can be achieved as follows
outFP <- SemiParBIVProbit(list(y1 ~ x1 + s(x2, bs = "cr", k = 5),
y2 ~ y1 + s(x2, bs = "cr", k = 6)),
fp = TRUE, data = dataSim)
conv.check(outFP)
summary(outFP)
# in the above examples a third equation could be introduced
# as illustrated in Example 1
#
#################
## See also ?meps
#################
############
## EXAMPLE 3
############
## Generate data with a non-random sample selection mechanism
## and exclusion restriction
set.seed(0)
n <- 2000
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
SigmaC <- matrix(0.5, 3, 3); diag(SigmaC) <- 1
cov <- rMVN(n, rep(0,3), SigmaC)
cov <- pnorm(cov)
bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + u[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + + u[, 2] > 0
yo <- y*(ys > 0)
dataSim <- data.frame(y, ys, yo, bi, x1, x2)
## Testing the hypothesis of absence of non-random sample selection...
LM.bpm(list(ys ~ bi + s(x1) + s(x2), yo ~ bi + s(x1)), dataSim, Model = "BSS")
# p-value suggests presence of sample selection, hence fit a bivariate model
#
## SEMIPARAMETRIC SAMPLE SELECTION BIVARIATE PROBIT
## the first equation MUST be the selection equation
out <- SemiParBIVProbit(list(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1)),
data = dataSim, Model = "BSS")
conv.check(out)
gt.bpm(out)
## compare the two summary outputs
## the second output produces a summary of the results obtained when
## selection bias is not accounted for
summary(out)
summary(out$gam2)
## corrected predicted probability that 'yo' is equal to 1
mb(ys, yo, Model = "BSS")
prev(out, hd.plot = TRUE)
prev(out, type = "univariate", hd.plot = TRUE)
## estimated smooth function plots
## the red line is the true curve
## the blue line is the univariate model curve not accounting for selection bias
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)]-mean(f21(x1.s))
plot(out, eq = 2, ylim = c(-1.65,0.95)); lines(x1.s, f21.x1, col="red")
par(new = TRUE)
plot(out$gam2, se = FALSE, col = "blue", ylim = c(-1.65,0.95),
ylab = "", rug = FALSE)
#
#
## try a Clayton copula model...
outC <- SemiParBIVProbit(list(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1)),
data = dataSim, Model = "BSS", BivD = "C0")
conv.check(outC)
summary(outC)
prev(outC)
# in the above examples a third equation could be introduced
# as illustrated in Example 1
#
################
## See also ?hiv
################
############
## EXAMPLE 4
############
## Generate data with partial observability
set.seed(0)
n <- 10000
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
x1 <- round(runif(n)); x2 <- runif(n); x3 <- runif(n)
y1 <- ifelse(-1.55 + 2*x1 + x2 + u[,1] > 0, 1, 0)
y2 <- ifelse( 0.45 - x3 + u[,2] > 0, 1, 0)
y <- y1*y2
dataSim <- data.frame(y, x1, x2, x3)
## BIVARIATE PROBIT with Partial Observability
out <- SemiParBIVProbit(list(y ~ x1 + x2,
y ~ x3),
data = dataSim, Model = "BPO")
conv.check(out)
summary(out)
# first ten estimated probabilities for the four events from object out
cbind(out$p11, out$p10, out$p00, out$p01)[1:10,]
# case with smooth function
# (more computationally intensive)
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse( 0.45 - x3 + u[,2] > 0, 1, 0)
y <- y1*y2
dataSim <- data.frame(y, x1, x2, x3)
out <- SemiParBIVProbit(list(y ~ x1 + s(x2),
y ~ x3),
data = dataSim, Model = "BPO")
conv.check(out)
summary(out)
# plot estimated and true functions
x2 <- sort(x2); f1.x2 <- f1(x2)[order(x2)] - mean(f1(x2))
plot(out, eq = 1, scale = 0); lines(x2, f1.x2, col = "red")
#
################
## See also ?war
################
Run the code above in your browser using DataLab