# NOT RUN {
require( npsf )
# Cross-sectional examples begin ------------------------------------------
# Load Penn World Tables 5.6 dataset
data( pwt56 )
head( pwt56 )
# Create some missing values
pwt56 [4, "K"] <- NA
# Stochastic production frontier model with
# homoskedastic error components (half-normal)
# Use subset of observations - for year 1965
m1 <- sf(log(Y) ~ log(L) + log(K), data = pwt56,
subset = year == 1965, distribution = "h")
m1 <- sf(log(Y) ~ log(L) + log(K), data = pwt56,
subset = year == 1965, distribution = "e")
# Test CRS: 'car' package in required for that
## Not run: linearHypothesis(m1, "log(L) + log(K) = 1")
# Write efficiencies to the data frame using 'esample':
pwt56$BC[ m1$esample ] <- m1$efficiencies$BC
## Not run: View(pwt56)
# Computation using matrices
Y1 <- as.matrix(log(pwt56[pwt56$year == 1965,
c("Y"), drop = FALSE]))
X1 <- as.matrix(log(pwt56[pwt56$year == 1965,
c("K", "L"), drop = FALSE]))
X1 [51, 2] <- NA # create missing
X1 [49, 1] <- NA # create missing
m2 <- sf(Y1 ~ X1, distribution = "h")
# Load U.S. commercial banks dataset
data(banks05)
head(banks05)
# Doubly heteroskedastic stochastic cost frontier
# model (truncated normal)
# Print summaries of cost efficiencies' estimates
m3 <- sf(lnC ~ lnw1 + lnw2 + lny1 + lny2, ln.var.u.0i = ~ ER,
ln.var.v.0i = ~ LA, data = banks05, distribution = "t",
prod = FALSE, print.level = 3)
m3 <- sf(lnC ~ lnw1 + lnw2 + lny1 + lny2, ln.var.u.0i = ~ ER,
ln.var.v.0i = ~ LA, data = banks05, distribution = "e",
prod = FALSE, print.level = 3)
# Non-monotonic marginal effects of equity ratio on
# the mean of distribution of inefficiency term
m4 <- sf(lnC ~ lnw1 + lnw2 + lny1 + lny2, ln.var.u.0i = ~ ER,
mean.u.0i = ~ ER, data = banks05, distribution = "t",
prod = FALSE, marg.eff = TRUE)
summary(m4$marg.effects)
# Cross-sectional examples end --------------------------------------------
# }
# NOT RUN {
# Panel data examples begin -----------------------------------------------
# The only way to differentiate between cross-sectional and panel-data
# models is by specifying "it".
# If "it" is not specified, cross-sectional model will be estimated.
# Example is below.
# Read data ---------------------------------------------------------------
# Load U.S. commercial banks dataset
data(banks00_07)
head(banks00_07, 5)
banks00_07$trend <- banks00_07$year - min(banks00_07$year) + 1
# Model specification -----------------------------------------------------
my.prod <- FALSE
my.it <- c("id","year")
# my.model = "BC1992"
# my.model = "K1990modified"
# my.model = "K1990"
# translog ----------------------------------------------------------------
formu <- log(TC) ~ (log(Y1) + log(Y2) + log(W1) + log(W2) + trend)^2 +
I(0.5*log(Y1)^2) + I(0.5*log(Y2)^2) + I(0.5*log(W1)^2) + I(0.5*log(W2)^2) +
trend + I(0.5*trend^2)
# Cobb-Douglas ------------------------------------------------------------
# formu <- log(TC) ~ log(Y1) + log(Y2) + log(W1) + log(W2) + trend + I(trend^2)
ols <- lm(formu, data = banks00_07)
# just to mark the results of the OLS model
summary(ols)
# Components specifications -----------------------------------------------
ln.var.v.it <- ~ log(TA)
ln.var.u.0i <- ~ ER_ave
mean.u.0i_1 <- ~ LLP_ave + LA_ave
mean.u.0i_2 <- ~ LLP_ave + LA_ave - 1
# Suppose "it" is not specified
# Then it is a cross-sectional model
t0_1st_0 <- sf(formu, data = banks00_07, subset = year > 2000,
prod = my.prod,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
eff.time.invariant = TRUE,
mean.u.0i.zero = TRUE)
# 1st generation models ---------------------------------------------------
# normal-half normal ------------------------------------------------------
# the same as above but "it" is specified
t0_1st_0 <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
eff.time.invariant = TRUE,
mean.u.0i.zero = TRUE)
# Note efficiencies are time-invariant
# confidence intervals for efficiencies -----------------------------------
head(t0_1st_0$efficiencies, 20)
# normal-truncated normal -------------------------------------------------
# truncation point is constant (for all ids) ------------------------------
t0_1st_1 <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod,
eff.time.invariant = TRUE,
mean.u.0i.zero = FALSE,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
mean.u.0i = NULL,
cost.eff.less.one = TRUE)
# truncation point is determined by variables -----------------------------
t0_1st_2 <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod,
eff.time.invariant = TRUE,
mean.u.0i.zero = FALSE,
mean.u.0i = mean.u.0i_1,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = TRUE)
# the same, but without intercept in mean.u.0i
t0_1st_3 <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod,
eff.time.invariant = TRUE,
mean.u.0i.zero = FALSE,
mean.u.0i = mean.u.0i_2,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = TRUE)
# 2nd generation models ---------------------------------------------------
# normal-half normal ------------------------------------------------------
# Kumbhakar (1990) model --------------------------------------------------
t_nhn_K1990 <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod,
eff.time.invariant = FALSE,
mean.u.0i.zero = TRUE,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = FALSE)
# Kumbhakar (1990) modified model -----------------------------------------
t_nhn_K1990modified <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod, model = "K1990modified",
eff.time.invariant = FALSE,
mean.u.0i.zero = TRUE,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = FALSE)
# Battese and Coelli (1992) model -----------------------------------------
t_nhn_BC1992 <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod, model = "BC1992",
eff.time.invariant = FALSE,
mean.u.0i.zero = TRUE,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = FALSE)
# normal-truncated normal -------------------------------------------------
# Kumbhakar (1990) model --------------------------------------------------
# truncation point is constant (for all ids) ------------------------------
t_ntn_K1990_0 <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod,
eff.time.invariant = FALSE,
mean.u.0i.zero = FALSE,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = FALSE)
# truncation point is determined by variables -----------------------------
t_ntn_K1990_1 <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod,
eff.time.invariant = FALSE,
mean.u.0i.zero = FALSE,
mean.u.0i = mean.u.0i_1,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = FALSE)
# Efficiencies are tiny, since empirically truncation points are quite big.
# Try withouth an intercept in conditional mean f-n
t_ntn_K1990_2 <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod,
eff.time.invariant = FALSE,
mean.u.0i.zero = FALSE,
mean.u.0i = mean.u.0i_2,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = FALSE)
# Kumbhakar (1990) modified model -----------------------------------------
t_ntn_K1990modified <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod, model = "K1990modified",
eff.time.invariant = FALSE,
mean.u.0i.zero = FALSE,
mean.u.0i = mean.u.0i_1,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = FALSE)
# Battese and Coelli (1992) model -----------------------------------------
t_ntn_BC1992 <- sf(formu, data = banks00_07, it = my.it, subset = year > 2000,
prod = my.prod, model = "BC1992",
eff.time.invariant = FALSE,
mean.u.0i.zero = FALSE,
mean.u.0i = mean.u.0i_1,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = FALSE)
# The next one (without "subset = year > 2000" option) converges OK
t_ntn_BC1992 <- sf(formu, data = banks00_07, it = my.it,
prod = my.prod, model = "BC1992",
eff.time.invariant = FALSE,
mean.u.0i.zero = FALSE,
mean.u.0i = mean.u.0i_1,
ln.var.v.it = ln.var.v.it,
ln.var.u.0i = ln.var.u.0i,
cost.eff.less.one = FALSE)
# 4 component model ------------------------------------------------------
# Note, R should better be more than 200, this is just for illustration.
# This is the model that takes long to be estimated.
# For the following example, 'mlmaximize' required 357 iterations and
# took 8 minutes.
# The time will increase with more data and more parameters.
formu <- log(TC) ~ log(Y1) + log(Y2) + log(W1) + log(W2) + trend
t_4comp <- sf(formu, data = banks00_07, it = my.it,
subset = year >= 2001 & year < 2006,
prod = my.prod, model = "4comp",
R = 500, initialize.halton = TRUE,
lmtol = 1e-5, maxit = 500, print.level = 4)
# With R = 500, 'mlmaximize' required 124 iterations and
# took 7 minutes.
# The time will increase with more data and more parameters.
formu <- log(TC) ~ log(Y1) + log(Y2) + log(W1) + log(W2) + trend
t_4comp_500 <- sf(formu, data = banks00_07, it = my.it,
subset = year >= 2001 & year < 2006,
prod = my.prod, model = "4comp",
R = 500, initialize.halton = TRUE,
lmtol = 1e-5, maxit = 500, print.level = 4)
# @e_i0, @e_it, and @e_over give efficiencies,
# where @ is either 'c' or 't' for cost or production function.
# e.g., t_ntn_4comp$ce_i0 from last model, gives persistent cost efficiencies
# Panel data examples end -------------------------------------------------
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab