data(quine, package="MASS") #loads quine from MASS
str(quine)
quine$c.Days <- cut(quine$Days, c(-1, seq(0,20,10),100))
table(quine$c.Days)
# split quine in two subsets
set.seed(124)
lab.A <- sample(nrow(quine), 70, replace=TRUE)
quine.A <- quine[lab.A, c("Eth","Sex","Age","Lrn")]
quine.B <- quine[-lab.A, c("Eth","Sex","Age","c.Days")]
# create svydesign objects
require(survey)
quine.A$f <- 70/nrow(quine) # sampling fraction
quine.B$f <- (nrow(quine)-70)/nrow(quine)
svy.qA <- svydesign(~1, fpc=~f, data=quine.A)
svy.qB <- svydesign(~1, fpc=~f, data=quine.B)
# Harmonizazion wrt the joint distribution
# of ('Sex' x 'Age' x 'Eth')
# vector of population total known
# estimated from the full data set
# note the formula!
tot.m <- colSums(model.matrix(~Eth:Sex:Age-1, data=quine))
tot.m
out.hz <- harmonize.x(svy.A=svy.qA, svy.B=svy.qB, x.tot=tot.m,
form.x=~Eth:Sex:Age-1, cal.method="linear")
# estimation of 'Lrn' vs. 'c.Days' under the CIA
svy.qA.h <- out.hz$cal.A
svy.qB.h <- out.hz$cal.B
out.1 <- comb.samples(svy.A=svy.qA.h, svy.B=svy.qB.h,
svy.C=NULL, y.lab="Lrn", z.lab="c.Days",
form.x=~Eth:Sex:Age-1)
out.1$yz.CIA
addmargins(out.1$yz.CIA)
#
# incomplete two-way stratification
# select a sample C from quine
# and define a survey object
set.seed(4321)
lab.C <- sample(nrow(quine), 50, replace=TRUE)
quine.C <- quine[lab.C, c("Lrn","c.Days")]
quine.C$f <- 50/nrow(quine) # sampling fraction
svy.qC <- svydesign(~1, fpc=~f, data=quine.C)
# call comb.samples
out.2 <- comb.samples(svy.A=svy.qA.h, svy.B=svy.qB.h,
svy.C=svy.qC, y.lab="Lrn", z.lab="c.Days",
form.x=~Eth:Sex:Age-1, estimation="incomplete",
calfun="linear", maxit=100)
summary(weights(out.2$cal.C))
out.2$yz.est # estimated table of 'Lrn' vs. 'c.Days'
# difference wrt the table 'Lrn' vs. 'c.Days' under CIA
addmargins(out.2$yz.est)-addmargins(out.2$yz.CIA)
# synthetic two-way stratification
# only macro estimation
quine.C <- quine[lab.C, ]
quine.C$f <- 50/nrow(quine) # sampling fraction
svy.qC <- svydesign(~1, fpc=~f, data=quine.C)
out.3 <- comb.samples(svy.A=svy.qA.h, svy.B=svy.qB.h,
svy.C=svy.qC, y.lab="Lrn", z.lab="c.Days",
form.x=~Eth:Sex:Age-1, estimation="synthetic",
calfun="linear",bounds=c(.5,Inf), maxit=100)
summary(weights(out.3$cal.C))
out.3$yz.est # estimated table of 'Lrn' vs. 'c.Days'
# difference wrt the table of 'Lrn' vs. 'c.Days' under CIA
addmargins(out.3$yz.est)-addmargins(out.3$yz.CIA)
# diff wrt the table of 'Lrn' vs. 'c.Days' under incomplete 2ws
addmargins(out.3$yz.est)-addmargins(out.2$yz.CIA)
# synthetic two-way stratification
# with micro predictions
out.4 <- comb.samples(svy.A=svy.qA.h, svy.B=svy.qB.h,
svy.C=svy.qC, y.lab="Lrn", z.lab="c.Days",
form.x=~Eth:Sex:Age-1, estimation="synthetic",
micro=TRUE, calfun="linear",bounds=c(.5,Inf),
maxit=100)
head(out.4$Z.A)
head(out.4$Y.B)
Run the code above in your browser using DataLab