# NOT RUN {
# }
# NOT RUN {
# Download data from the link in "Source"
banks00_07 <- read.delim("2b_QLH.txt")
# rename 'entity' to 'id'
colnames(banks00_07) [colnames(banks00_07) == "entity"] <- "id"
table(banks00_07$year)
# keep if 2000 -- 2007
banks00_07 <-
banks00_07[(banks00_07$year >= 2000 & banks00_07$year <= 2007),]
dim(banks00_07)
q1q3 <- quantile(banks00_07$TA, probs = c(.25,.75))
banks00_07 <-
banks00_07[(banks00_07$TA >= q1q3[1] & banks00_07$TA <= q1q3[2]),]
dim(banks00_07)
# generate required variables
banks00_07$TC <-banks00_07$TOC
banks00_07$ER <- banks00_07$Z / banks00_07$TA
banks00_07$LA <- banks00_07$Y2 / banks00_07$TA
banks00_07 <-
banks00_07[, colnames(banks00_07) <!-- %in% -->
c("id", "year", "Ti", "TC", "Y1", "Y2", "W1","W2", "ER", "LA", "TA", "LLP")]
dim(banks00_07)
t0 <- as.vector( by(data = banks00_07$id,
INDICES = banks00_07$id,
FUN = function(qq) length(qq)) )
banks00_07$Ti <- rep(t0, times = t0)
banks00_07 <- banks00_07[banks00_07$Ti > 4,]
# complete observations
banks00_07 <- banks00_07[complete.cases(banks00_07),]
dim(banks00_07)
id_names <- unique(banks00_07$id)
N_total <- length(id_names)
set.seed(816376586)
ids_n2choose <- sample(1:N_total, 500)
ids2choose <- id_names[ids_n2choose]
banks00_07 <- banks00_07[banks00_07$id <!-- %in% ids2choose,] -->
dim(banks00_07)
t0 <- as.vector( by(data = banks00_07$id,
INDICES = banks00_07$id,
FUN = function(qq) length(qq)) )
length(rep(t0, times = t0))
banks00_07$Ti <- rep(t0, times = t0)
banks00_07[1:50,c("id","year","Ti")]
# keep if Ti > 4
banks00_07 <- banks00_07[banks00_07$Ti > 4,]
dim(banks00_07)
# sort
banks00_07 <- banks00_07[order(banks00_07$id, banks00_07$year),]
# TC = TOC
#
# ER = Z / TA
# Gross total equity to gross total assets ratio.
#
# LA = Y2 / TA
# Total loans and leases to gross total assets ratio.
banks00_07$TA_ave <-
rep(as.vector( by(data = banks00_07$TA,
INDICES = banks00_07$id,
FUN = function(qq) mean(qq))), times = t0)
banks00_07$TA_initial <-
rep(as.vector( by(data = banks00_07$TA,
INDICES = banks00_07$id,
FUN = function(qq) qq[1])), times = t0)
banks00_07$LLP_ave <-
rep(as.vector( by(data = banks00_07$LLP,
INDICES = banks00_07$id,
FUN = function(qq) mean(qq))), times = t0)
banks00_07$LLP_initial <-
rep(as.vector( by(data = banks00_07$LLP,
INDICES = banks00_07$id,
FUN = function(qq) qq[1])), times = t0)
banks00_07$ER_ave <-
rep(as.vector( by(data = banks00_07$ER,
INDICES = banks00_07$id,
FUN = function(qq) mean(qq))), times = t0)
banks00_07$ER_initial <-
rep(as.vector( by(data = banks00_07$ER,
INDICES = banks00_07$id,
FUN = function(qq) qq[1])), times = t0)
banks00_07$LA_ave <-
rep(as.vector( by(data = banks00_07$LA,
INDICES = banks00_07$id,
FUN = function(qq) mean(qq))), times = t0)
banks00_07$LA_initial <-
rep(as.vector( by(data = banks00_07$LA,
INDICES = banks00_07$id,
FUN = function(qq) qq[1])), times = t0)
cols2export <- c("id","year","Ti","TA","TA_ave",
"TA_initial","LLP","LLP_ave",
"LLP_initial","ER_ave","ER_initial","LA_ave","LA_initial")
write.table(x = banks00_07, file = "banks00_07.txt", row.names = FALSE)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab