## Not run: ------------------------------------
# # Let's load a dummy dataset
# data(mtcars)
# setDT(mtcars) # Transform to data.table for easier manipulation
#
# # We train a xgboost model on 31 observations, keep last to analyze later
# set.seed(0)
# xgboost_model <- xgboost(data = data.matrix(mtcars[-32, -1]),
# label = mtcars$mpg[-32],
# nrounds = 20)
#
# # Perform partial dependence grid prediction to analyze the behavior of the 32th observation
# # We want to check how it behaves with:
# # => horsepower (hp)
# # => number of cylinders (cyl)
# # => transmission (am)
# # => number of carburetors (carb)
# preds_partial <- partial_dep.obs(model = xgboost_model,
# predictor = predictor_xgb, # Default for xgboost
# data = mtcars[-32, -1], # train data = 31 first observations
# observation = mtcars[32, -1], # 32th observation to analyze
# column = c("hp", "cyl", "am", "carb"),
# accuracy = 20, # Up to 20 unique values per column
# safeguard = TRUE, # Prevent high memory usage
# safeguard_val = 1048576, # No more than 1048576 observations,
# exact_only = TRUE, # Not allowing approximations,
# label_name = "mpg", # Label is supposed "mpg"
# comparator_name = "evo") # Comparator +/-/eq for analysis
#
# # How many observations? 300
# nrow(preds_partial$grid_exp)
#
# # How many observations analyzed per column? hp=10, cyl=3, am=2, carb=5
# summary(preds_partial$grid_init)
#
# # When cyl decreases, mpg increases!
# partial_dep.plot(grid_data = preds_partial$grid_exp,
# backend = "tableplot",
# label_name = "mpg",
# comparator_name = "evo")
#
# # Another way of plotting... hp/mpg relationship is not obvious
# partial_dep.plot(grid_data = preds_partial$grid_exp,
# backend = "car",
# label_name = "mpg",
# comparator_name = "evo")
#
# # Do NOT do this on >1k samples, this will kill RStudio
# # Histograms make it obvious when decrease/increase happens.
# partial_dep.plot(grid_data = preds_partial$grid_exp,
# backend = "plotly",
# label_name = "mpg",
# comparator_name = "evo")
#
# # Get statistics to analyze fast
# partial_dep.feature(preds_partial$grid_exp, metric = "emp", in_depth = FALSE)
#
# # Get statistics to analyze, but is very slow when there is large data
# # Note: unreliable for large amount of observations due to asymptotic infinites
# partial_dep.feature(preds_partial$grid_exp, metric = "emp", in_depth = TRUE)
## ---------------------------------------------
Run the code above in your browser using DataLab