set.seed(1)
# generate some example observations
n_obs <- 10000
checklists <- data.frame(longitude = rnorm(n_obs, sd = 0.1),
latitude = rnorm(n_obs, sd = 0.1),
day_of_year = sample.int(28, n_obs, replace = TRUE),
year = NA_integer_,
obs = rpois(n_obs, lambda = 0.1),
forest_cover = runif(n_obs),
island = as.integer(runif(n_obs) > 0.95))
# add a year column, giving more data to recent years
checklists$year <- sample(seq(2016, 2020), size = n_obs, replace = TRUE,
prob = seq(0.3, 0.7, length.out = 5))
# create several rare islands
checklists$island[sample.int(nrow(checklists), 9)] <- 2:10
# basic spatiotemporal grid sampling
sampled <- grid_sample(checklists)
# plot original data and grid sampled data
par(mar = c(0, 0, 0, 0))
plot(checklists[, c("longitude", "latitude")],
pch = 19, cex = 0.3, col = "#00000033",
axes = FALSE)
points(sampled[, c("longitude", "latitude")],
pch = 19, cex = 0.3, col = "red")
# case control sampling stratified by year and island
# return a maximum of 1000 checklists
sampled_cc <- grid_sample_stratified(checklists, sample_by = "island",
maximum_ss = 1000)
# case control sampling increases the prevalence of detections
mean(checklists$obs > 0)
mean(sampled$obs > 0)
mean(sampled_cc$obs > 0)
# stratifying by island ensures all levels are retained, even rare ones
table(checklists$island)
# normal grid sampling loses rare island levels
table(sampled$island)
# stratified grid sampling retain at least one observation from each level
table(sampled_cc$island)
Run the code above in your browser using DataLab