# NOT RUN {
# Load the standard audit dataset, part of the pmml package:
data(audit)
# First wrap the data:
audit_box <- xform_wrap(audit)
# }
# NOT RUN {
# One of the variables, "Sex", has 2 possible values: "Male"
# and "Female". If these string values have to be mapped to a
# numeric value, a file has to be created, say "map_audit.csv",
# whose content is, for example:
#
# Male,1
# Female,2
#
# Transform the variable "Gender" to a variable "d_gender"
# such that:
# if Sex = "Male" then d_sex = "1"
# if Sex = "Female" then d_sex = "2"
#
# Give "d_sex" the value 0 if the input variable value is
# missing.
audit_box <- xform_map(audit_box,
xform_info = "[Sex -> d_sex][string->integer]",
table = "map_audit.csv", map_missing_to = "0"
)
# }
# NOT RUN {
# Same as above, with an extra variable, but using data frames.
# The top 2 rows give the variable names and their data types.
# The rest represent the map. For example, the third row
# indicates that when the input variable "Sex" has the value
# "Male" and the input variable "Employment" has
# the value "PSLocal", the output variable "d_sex" should have
# the value 1.
t <- list()
m <- data.frame(
c("Sex", "string", "Male", "Female"),
c("Employment", "string", "PSLocal", "PSState"),
c("d_sex", "integer", 1, 0),
stringsAsFactors = TRUE
)
t[[1]] <- m
# Give default value as a vector and missing value as a string,
# this is only possible as there is only one map defined. If
# default values is not given, it will simply not be given in
# the PMML file as well. In general, the default values and the
# missing values should be given as a vector, each element of
# the vector corresponding to the element at the same index in
# the list. If these values are not given as a vector, they will
# be used for the first list element only.
audit_box <- xform_map(audit_box,
xform_info = t, default_value = c(3),
map_missing_to = "2"
)
# check what the pmml looks like
fit <- lm(Adjusted ~ ., data = audit_box$data)
fit_pmml <- pmml(fit, transforms = audit_box)
# }
Run the code above in your browser using DataLab