## Not run: ------------------------------------
# # Let's load a dummy dataset
# data(mtcars)
# setDT(mtcars) # Transform to data.table for easier manipulation
#
# # We train a xgboost model on 31 observations, keep last to analyze later
# set.seed(0)
# xgboost_model <- xgboost(data = data.matrix(mtcars[, -1]),
# label = mtcars$mpg,
# nrounds = 20)
#
# # Perform partial dependence grid prediction to analyze the behavior of the 32th observation
# # We want to check how it behaves with:
# # => horsepower (hp)
# # => number of cylinders (cyl)
# # => transmission (am)
# # => number of carburetors (carb)
# preds_partial <- partial_dep.obs_all(model = xgboost_model,
# predictor = predictor_xgb, # Default for xgboost
# data = mtcars[, -1], # train data
# observation = mtcars[, -1], # train data
# # when column is not specified => all columns
# accuracy = 20, # Up to 20 unique values per column
# exact_only = TRUE, # Not allowing approximations,
# label_name = "mpg", # Label is supposed "mpg"
# comparator_name = "evo") # Comparator +/-/eq for analysis
#
# # How many observations? 3360, that's a lot coming from original 32 observations.
# nrow(preds_partial$grid_exp)
#
# # How many observations analyzed per column?
# summary(preds_partial$grid_init)
# # Length Class Mode
# # cyl 3 -none- numeric
# # disp 19 -none- numeric
# # hp 16 -none- numeric
# # drat 16 -none- numeric
# # wt 19 -none- numeric
# # qsec 19 -none- numeric
# # vs 2 -none- numeric
# # am 2 -none- numeric
# # gear 3 -none- numeric
# # carb 6 -none- numeric
#
# # Great plotting skills!
# partial_dep.plot(preds_partial$grid_exp,
# backend = c("plotly", "line"),
# label_name = "mpg",
# comparator_name = "evo")
#
# # Get statistics to analyze fast
# partial_dep.feature(preds_partial$grid_exp, metric = "emp", in_depth = FALSE)
#
# # Get statistics to analyze, but is very slow when there is large data
# # Note: unreliable for large amount of observations due to asymptotic infinites
# partial_dep.feature(preds_partial$grid_exp, metric = "emp", in_depth = TRUE)
## ---------------------------------------------
Run the code above in your browser using DataLab