# Example from An and Watts (New SAS procedures for Analysis of Sample Survey Data)
# generates artificial data (a 235X3 matrix with 3 columns: state, region, income).
# the variable "state" has 2 categories ('nc' and 'sc').
# the variable "region" has 3 categories (1, 2 and 3).
# the sampling frame is stratified by region within state.
# the income variable is randomly generated
m <- rbind(matrix(rep("nc",165), 165, 1, byrow=TRUE),
matrix(rep("sc", 70), 70, 1, byrow=TRUE))
m <- cbind.data.frame(m, c(rep(1, 100), rep(2,50), rep(3,15),
rep(1, 30), rep(2, 40)), 1000 * runif(235))
names(m) <- c("state", "region", "income")
# computes the population stratum sizes
table(m$region, m$state)
# not run
# nc sc
# 1 100 30
# 2 50 40
# 3 15 0
# there are 5 cells with non-zero values
# one draws 5 samples (1 sample in each stratum)
# the sample stratum sizes are 10,5,10,4,6, respectively
# the method is 'srswor' (equal probability, without replacement)
s <- Strata(m, c("region", "state"), size=c(10, 5, 10, 4, 6), method="srswor")
# extracts the observed data
data.frame(income=m[s$id, "income"], s)
# see the result using a contigency table
table(s$region, s$state)
# The same data as in Example 1
# the method is 'systematic' (unequal probability, without replacement)
# the selection probabilities are computed using the variable 'income'
s <- Strata(m,c("region", "state"), size=c(10, 5, 10, 4, 6),
method="systematic", pik=m$income)
# extracts the observed data
data.frame(income=m[s$id, "income"], s)
# see the result using a contigency table
table(s$region, s$state)
Run the code above in your browser using DataLab