if (FALSE) {
## In this script we will create 5km resolution pixellated grid over Kenya,
## and generate tables of estimated (both target and general) population
## totals at the area (e.g. Admin-1) and subarea (e.g. Admin-2) levels. Then
## we will use that to simulate populations of
# download Kenya GADM shapefiles from SUMMERdata github repository
githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/",
"kenyaMaps.rda?raw=true")
tempDirectory = "~/"
mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
if(!file.exists(mapsFilename)) {
download.file(githubURL,mapsFilename)
}
# load it in
out = load(mapsFilename)
out
kenyaMesh <- fmesher::fm_as_fm(kenyaMesh)
adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
# some Admin-2 areas have the same name
adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") &
(adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") &
(adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") &
(adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") &
(adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
# The spatial area of unknown 8 is so small, it causes problems unless its removed or
# unioned with another subarea. Union it with neighboring Kakeguria:
newadm2 = adm2
unknown8I = which(newadm2$NAME_2 == "unknown 8")
newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <-
"Kapenguria + unknown 8"
admin2.IDs <- newadm2$NAME_2
newadm2@data = cbind(newadm2@data, NAME_2OLD = newadm2@data$NAME_2)
newadm2@data$NAME_2OLD = newadm2@data$NAME_2
newadm2@data$NAME_2 = admin2.IDs
newadm2$NAME_2 = admin2.IDs
temp <- terra::aggregate(as(newadm2, "SpatVector"), by="NAME_2")
library(sf)
temp <- sf::st_as_sf(temp)
temp <- sf::as_Spatial(temp)
tempData = newadm2@data[-unknown8I,]
tempData = tempData[order(tempData$NAME_2),]
newadm2 <- sp::SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
adm2 = newadm2
# download 2014 Kenya population density TIF file
githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/",
"Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
if(!file.exists(popTIFFilename)) {
download.file(githubURL,popTIFFilename)
}
# load it in
pop = terra::rast(popTIFFilename)
east.lim = c(-110.6405, 832.4544)
north.lim = c(-555.1739, 608.7130)
## Construct poppsubKenya, a table of urban/rural general population totals
## in each subarea. Technically, this is not necessary since we can load in
## poppsubKenya via data(kenyaPopulationData). First, we will need to calculate
## the areas in km^2 of the areas and subareas
# use Lambert equal area projection of areas (Admin-1) and subareas (Admin-2)
midLon = mean(adm1@bbox[1,])
midLat = mean(adm1@bbox[2,])
p4s = paste0("+proj=laea +x_0=0 +y_0=0 +lon_0=", midLon,
" +lat_0=", midLat, " +units=km")
adm1_sf = st_as_sf(adm1)
adm1proj_sf = st_transform(adm1_sf, p4s)
adm1proj = as(adm1proj_sf, "Spatial")
adm2_sf = st_as_sf(adm2)
adm2proj_sf = st_transform(adm2_sf, p4s)
adm2proj = as(adm2proj_sf, "Spatial")
# now calculate spatial area in km^2
admin1Areas = as.numeric(st_area(adm1proj_sf))
admin2Areas = as.numeric(st_area(adm2proj_sf))
areapaKenya = data.frame(area=adm1proj@data$NAME_1, spatialArea=admin1Areas)
areapsubKenya = data.frame(area=adm2proj@data$NAME_1, subarea=adm2proj@data$NAME_2,
spatialArea=admin2Areas)
# Calculate general population totals at the subarea (Admin-2) x urban/rural
# level and using 1km resolution population grid. Assign urbanicity by
# thresholding population density based on estimated proportion population
# urban/rural, making sure total area (Admin-1) urban/rural populations in
# each area matches poppaKenya.
# NOTE: the following function will typically take ~15-20 minutes. Can speed up
# by setting km.res to be higher, but we recommend fine resolution for
# this step, since it only needs to be done once. Instead of running
# the code in the following if(FALSE) section,
# you can simply run data(kenyaPopulationData)
if(FALSE){
system.time(poppsubKenya <- getPoppsub(
km.res=1, pop=pop, domain.map.dat=adm0,
east.lim=east.lim, north.lim=north.lim, map.projection=projKenya,
poppa = poppaKenya, areapa=areapaKenya, areapsub=areapsubKenya,
area.map.dat=adm1, subarea.map.dat=adm2,
areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
}
data(kenyaPopulationData)
# Now generate a general population integration table at 5km resolution,
# based on subarea (Admin-2) x urban/rural population totals. This takes
# ~1 minute
pop.matKenya <- makePopIntegrationTab(
km.res=5, pop=pop, domain.map.dat=adm0,
east.lim=east.lim, north.lim=north.lim, map.projection=projKenya,
poppa = poppaKenya, poppsub=poppsubKenya,
area.map.dat = adm1, subarea.map.dat = adm2,
areaNameVar = "NAME_1", subareaNameVar="NAME_2")
## Adjust pop.mat to be target (neonatal) rather than general population
## density. First create the target population frame
## (these numbers are based on IPUMS microcensus data)
mothersPerHouseholdUrb = 0.3497151
childrenPerMotherUrb = 1.295917
mothersPerHouseholdRur = 0.4787696
childrenPerMotherRur = 1.455222
targetPopPerStratumUrban = easpaKenya$HHUrb * mothersPerHouseholdUrb *
childrenPerMotherUrb
targetPopPerStratumRural = easpaKenya$HHRur * mothersPerHouseholdRur *
childrenPerMotherRur
easpaKenyaNeonatal = easpaKenya
easpaKenyaNeonatal$popUrb = targetPopPerStratumUrban
easpaKenyaNeonatal$popRur = targetPopPerStratumRural
easpaKenyaNeonatal$popTotal = easpaKenyaNeonatal$popUrb +
easpaKenyaNeonatal$popRur
easpaKenyaNeonatal$pctUrb = 100 * easpaKenyaNeonatal$popUrb /
easpaKenyaNeonatal$popTotal
easpaKenyaNeonatal$pctTotal =
100 * easpaKenyaNeonatal$popTotal / sum(easpaKenyaNeonatal$popTotal)
# Generate the target population density by scaling the current
# population density grid at the Admin1 x urban/rural level
pop.matKenyaNeonatal = adjustPopMat(pop.matKenya, easpaKenyaNeonatal)
# Generate neonatal population table from the neonatal population integration
# matrix. This is technically not necessary for population simulation purposes,
# but is here for illustrative purposes
poppsubKenyaNeonatal = poppRegionFromPopMat(pop.matKenyaNeonatal,
pop.matKenyaNeonatal$subarea)
poppsubKenyaNeonatal =
cbind(subarea=poppsubKenyaNeonatal$region,
area=adm2@data$NAME_1[match(poppsubKenyaNeonatal$region, adm2@data$NAME_2)],
poppsubKenyaNeonatal[,-1])
print(head(poppsubKenyaNeonatal))
## Now we're ready to simulate neonatal populations along with neonatal
## mortality risks and prevalences
# use the following model to simulate the neonatal population based roughly
# on Paige et al. (2020) neonatal mortality modeling for Kenya.
beta0=-2.9 # intercept
gamma=-1 # urban effect
rho=(1/3)^2 # spatial variance
eff.range = 400 # effective spatial range in km
sigma.epsilon=sqrt(1/2.5) # cluster (nugget) effect standard deviation
# Run a simulation! This produces multiple dense nEA x nsim and nPixel x nsim
# matrices. In the future sparse matrices and chunk by chunk computations
# may be incorporated.
simPop = simPopSPDE(nsim=1, easpa=easpaKenyaNeonatal,
pop.mat=pop.matKenya, target.pop.mat=pop.matKenyaNeonatal,
poppsub=poppsubKenya, spde.mesh=kenyaMesh,
marg.var=rho, sigma.epsilon=sigma.epsilon,
gamma=gamma, eff.range=eff.range, beta0=beta0,
seed=12, inla.seed=12, n.HH.sampled=25,
stratify.by.urban=TRUE, subarea.level=TRUE,
do.fine.scale.risk=TRUE, do.smooth.risk=TRUE,
min1.per.subarea=TRUE)
# get average absolute percent error relative to fine scale prevalence at Admin-2 level
tempDat = simPop$subareaPop$aggregationResults[c("region", "pFineScalePrevalence",
"pFineScaleRisk", "pSmoothRisk")]
100*mean(abs(tempDat$pFineScalePrevalence - tempDat$pFineScaleRisk) /
tempDat$pFineScalePrevalence)
100*mean(abs(tempDat$pFineScalePrevalence - tempDat$pSmoothRisk) /
tempDat$pFineScalePrevalence)
100*mean(abs(tempDat$pFineScaleRisk - tempDat$pSmoothRisk) /
tempDat$pFineScalePrevalence)
# verify number of EAs per area and subarea
cbind(aggregate(simPop$eaPop$ea.samples[,1], by=list(area=pop.matKenya$area), FUN=sum),
trueNumEAs=easpaKenya$EATotal[order(easpaKenya$area)])
aggregate(simPop$eaPop$ea.samples[,1], by=list(area=pop.matKenya$subarea), FUN=sum)
## plot simulated population
# directory for plotting
# (mapPlot takes longer when it doesn't save to a file)
tempDirectory = "~/"
# pixel level plots. Some pixels have no simulated EAs, in which case they will be
# plotted as white. Expected noisy looking plots of fine scale risk and prevalence
# due to EAs being discrete, as compared to a very smooth plot of smooth risk.
zlim = c(0, quantile(probs=.995, c(simPop$pixelPop$pFineScalePrevalence,
simPop$pixelPop$pFineScaleRisk,
simPop$pixelPop$pSmoothRisk), na.rm=TRUE))
par(mfrow=c(2,2))
plot(adm2, asp=1)
points(simPop$eaPop$eaDatList[[1]]$lon, simPop$eaPop$eaDatList[[1]]$lat, pch=".", col="blue")
plot(adm2, asp=1)
fields::quilt.plot(pop.matKenya$lon, pop.matKenya$lat, simPop$pixelPop$pFineScalePrevalence,
zlim=zlim, add=TRUE, FUN=function(x) {mean(x, na.rm=TRUE)})
plot(adm2, asp=1)
fields::quilt.plot(pop.matKenya$lon, pop.matKenya$lat, simPop$pixelPop$pFineScaleRisk,
zlim=zlim, add=TRUE, FUN=function(x) {mean(x, na.rm=TRUE)})
fields::quilt.plot(pop.matKenya$lon, pop.matKenya$lat, simPop$pixelPop$pSmoothRisk,
zlim=zlim, FUN=function(x) {mean(x, na.rm=TRUE)}, asp=1)
plot(adm2, add=TRUE)
range(simPop$eaPop$eaDatList[[1]]$N)
# areal (Admin-1) level: these results should look essentially identical
tempDat = simPop$areaPop$aggregationResults[c("region", "pFineScalePrevalence",
"pFineScaleRisk", "pSmoothRisk")]
mapPlot(tempDat,
variables=c("pFineScalePrevalence", "pFineScaleRisk", "pSmoothRisk"),
geo=adm1, by.geo="NAME_1", by.data="region", is.long=FALSE)
# subareal (Admin-2) level: these results should look subtley different
# depending on the type of prevalence/risk considered
tempDat = simPop$subareaPop$aggregationResults[c("region", "pFineScalePrevalence",
"pFineScaleRisk", "pSmoothRisk")]
mapPlot(tempDat,
variables=c("pFineScalePrevalence", "pFineScaleRisk", "pSmoothRisk"),
geo=adm2, by.geo="NAME_2", by.data="region", is.long=FALSE)
}
Run the code above in your browser using DataLab