########################################################################
# Example 1: Simulated data - Randomly-distributed training points
########################################################################
library(sf)
library(ggplot2)
# Simulate 1000 random training points in a 100x100 square
set.seed(1234)
simarea <- list(matrix(c(0,0,0,100,100,100,100,0,0,0), ncol=2, byrow=TRUE))
simarea <- sf::st_polygon(simarea)
train_points <- sf::st_sample(simarea, 1000, type = "random")
pred_points <- sf::st_sample(simarea, 1000, type = "regular")
plot(simarea)
plot(pred_points, add = TRUE, col = "blue")
plot(train_points, add = TRUE, col = "red")
# Run kNNDM for the whole domain, here the prediction points are known.
knndm_folds <- knndm(train_points, ppoints = pred_points, k = 5)
knndm_folds
plot(knndm_folds)
folds <- as.character(knndm_folds$clusters)
ggplot() +
  geom_sf(data = simarea, alpha = 0) +
  geom_sf(data = train_points, aes(col = folds))
########################################################################
# Example 2: Simulated data - Clustered training points
########################################################################
if (FALSE) {
library(sf)
library(ggplot2)
# Simulate 1000 clustered training points in a 100x100 square
set.seed(1234)
simarea <- list(matrix(c(0,0,0,100,100,100,100,0,0,0), ncol=2, byrow=TRUE))
simarea <- sf::st_polygon(simarea)
train_points <- clustered_sample(simarea, 1000, 50, 5)
pred_points <- sf::st_sample(simarea, 1000, type = "regular")
plot(simarea)
plot(pred_points, add = TRUE, col = "blue")
plot(train_points, add = TRUE, col = "red")
# Run kNNDM for the whole domain, here the prediction points are known.
knndm_folds <- knndm(train_points, ppoints = pred_points, k = 5)
knndm_folds
plot(knndm_folds)
folds <- as.character(knndm_folds$clusters)
ggplot() +
  geom_sf(data = simarea, alpha = 0) +
  geom_sf(data = train_points, aes(col = folds))
}
########################################################################
# Example 3: Real- world example; using a modeldomain instead of previously
# sampled prediction locations
########################################################################
if (FALSE) {
library(sf)
library(terra)
library(ggplot2)
### prepare sample data:
dat <- readRDS(system.file("extdata","Cookfarm.RDS",package="CAST"))
dat <- aggregate(dat[,c("DEM","TWI", "NDRE.M", "Easting", "Northing","VW")],
   by=list(as.character(dat$SOURCEID)),mean)
pts <- dat[,-1]
pts <- st_as_sf(pts,coords=c("Easting","Northing"))
st_crs(pts) <- 26911
studyArea <- rast(system.file("extdata","predictors_2012-03-25.tif",package="CAST"))
studyArea[!is.na(studyArea)] <- 1
studyArea <- as.polygons(studyArea, values = FALSE, na.all = TRUE) |>
    st_as_sf() |>
    st_union()
pts <- st_transform(pts, crs = st_crs(studyArea))
plot(studyArea)
plot(st_geometry(pts), add = TRUE, col = "red")
knndm_folds <- knndm(pts, modeldomain=studyArea, k = 5)
knndm_folds
plot(knndm_folds)
folds <- as.character(knndm_folds$clusters)
ggplot() +
  geom_sf(data = pts, aes(col = folds))
#use for cross-validation:
library(caret)
ctrl <- trainControl(method="cv",
   index=knndm_folds$indx_train,
   savePredictions='final')
model_knndm <- train(dat[,c("DEM","TWI", "NDRE.M")],
   dat$VW,
   method="rf",
   trControl = ctrl)
global_validation(model_knndm)
}
Run the code above in your browser using DataLab