# NOT RUN {
## World Development Panel Data
# Simple Summaries -------------------------
qsu(wlddev) # Simple summary
qsu(wlddev, vlabels = TRUE) # Display variable labels
qsu(wlddev, higher = TRUE) # Add skewness and kurtosis
# Grouped Summaries ------------------------
qsu(wlddev, ~ region, vlabels = TRUE) # Statistics by World Bank Region
qsu(wlddev, PCGDP + LIFEEX ~ income) # Summarize GDP per Capita and Life Expectancy by
stats <- qsu(wlddev, ~ region + income, # World Bank Income Level
cols = 9:10, higher = TRUE) # Same variables, by both region and income
aperm(stats) # A different perspective on the same stats
# Panel Data Summaries ---------------------
qsu(wlddev, pid = ~ iso3c, vlabels = TRUE) # Adding between and within countries statistics
# -> They show amongst other things that year and decade are individual-invariant,
# that we have GINI-data on only 161 countries, with only 8.42 observations per country on average,
# and that GDP, LIFEEX and GINI vary more between-countries, but ODA received varies more within
# countries over time.
# Let's do this manually for PCGDP:
x <- wlddev$PCGDP
g <- wlddev$iso3c
# This is the exact variance decomposion
all.equal(fvar(x), fvar(B(x, g)) + fvar(W(x, g)))
# What qsu does is calculate
r <- rbind(Overall = qsu(x),
Between = qsu(fmean(x, g)), # Aggregation instead of between-transform
Within = qsu(fwithin(x, g, mean = "overall.mean"))) # Same as qsu(W(x, g) + fmean(x))
r[3, 1] <- r[1, 1] / r[2, 1]
print.qsu(r)
# Proof:
qsu(x, pid = g)
# }
# NOT RUN {
<!-- % No code relying on suggested package -->
# Using plm:
pwlddev <- plm::pdata.frame(wlddev, # Creating a Panel Data Frame frame from this data
index = c("iso3c","year"))
qsu(pwlddev) # Summary for pdata.frame -> qsu(wlddev, pid = ~ iso3c)
qsu(pwlddev$PCGDP) # Default summary for Panel Series (class pseries)
qsu(G(pwlddev$PCGDP)) # Summarizing GDP growth, see also ?G
# Grouped Panel Data Summaries -------------
qsu(wlddev, ~ region, ~ iso3c, cols = 9:12) # Panel-Statistics by region
psr <- qsu(pwlddev, ~ region, cols = 9:12) # Same on plm pdata.frame
psr # -> Gives a 4D array
psr[,"N/T",,] # Checking out the number of observations:
# In North america we only have 3 countries, for the GINI we only have 3.91 observations on average
# for 45 Sub-Saharan-African countries, etc..
psr[,"SD",,] # Considering only standard deviations
# -> In all regions variations in inequality (GINI) between countries are greater than variations
# in inequality within countries. The opposite is true for Life-Expectancy in all regions apart
# from Europe, etc..
# }
# NOT RUN {
# Again let's do this manually for PDGCP:
d <- cbind(Overall = x,
Between = fbetween(x, g),
Within = fwithin(x, g, mean = "overall.mean"))
r <- qsu(d, g = wlddev$region)
r[,"N","Between"] <- fndistinct(g[!is.na(x)], wlddev$region[!is.na(x)])
r[,"N","Within"] <- r[,"N","Overall"] / r[,"N","Between"]
r
# Proof:
qsu(wlddev, PCGDP ~ region, ~ iso3c)
# Same as above, but output as nested list
psrl <- qsu(wlddev, ~ region, ~ iso3c,
cols = 9:12, array = FALSE)
psrl # We can use unlist2d to create a tidy data.frame
head(unlist2d(psrl, c("Variable","Trans"),
row.names = "Region"))
# Weighted Summaries -----------------------
n <- nrow(wlddev)
weights <- abs(rnorm(n)) # Generate random weights
qsu(wlddev, w = weights, higher = TRUE) # Computed weighted mean, SD, skewness and kurtosis
weightsNA <- weights # Weights may contain missing values.. inserting 1000
weightsNA[sample.int(n, 1000)] <- NA
qsu(wlddev, w = weightsNA, higher = TRUE) # But now these values are removed from all variables
# Grouped and panel-summaries can also be weighted in the same manor
# }
Run the code above in your browser using DataLab