# NOT RUN {
# First wrap the data
iris_box <- xform_wrap(iris)
# }
# NOT RUN {
# Convert the continuous variable "Sepal.Length" to a discrete
# variable "dsl". The intervals to be used for this transformation is
# given in a file, "intervals.csv", whose content is, for example,:
#
# 5],val1
# (5:6],22
# (6,val2
#
# This will be used to create a discrete variable named "dsl" of dataType
# "string" such that:
# if(Sepal.length <= 5) then dsl = "val1"
# if((Sepal.Lenght > 5) and (Sepal.Length <= 6)) then dsl = "22"
# if(Sepal.Length > 6) then dsl = "val2"
#
# Give "dsl" the value 0 if the input variable value is missing.
iris_box <- xform_discretize(iris_box,
xform_info = "[Sepal.Length -> dsl][double -> string]",
table = "intervals.csv", map_missing_to = "0"
)
# }
# NOT RUN {
# A different transformation using a list of data frames, of size 1:
t <- list()
m <- data.frame(rbind(
c(
"Petal.Length", "dis_pl", "leftInterval", "leftValue",
"rightInterval", "rightValue"
),
c(
"double", "integer", "string", "double", "string",
"double"
),
c("0)", 0, "open", NA, "Open", 0),
c(NA, 1, "closed", 0, "Open", 1),
c(NA, 2, "closed", 1, "Open", 2),
c(NA, 3, "closed", 2, "Open", 3),
c(NA, 4, "closed", 3, "Open", 4),
c("[4", 5, "closed", 4, "Open", NA)
), stringsAsFactors = TRUE)
# Give column names to make it look nice; not necessary!
colnames(m) <- c(
"Petal.Length", "dis_pl", "leftInterval", "leftValue",
"rightInterval", "rightValue"
)
# A textual representation of the data frame is:
# Petal.Length dis_pl leftInterval leftValue rightInterval rightValue
# 1 Petal.Length dis_pl leftInterval leftValue rightInterval rightValue
# 2 double integer string double string double
# 3 0) 0 open <NA> Open 0
# 4 <NA> 1 closed 0 Open 1
# 5 <NA> 2 closed 1 Open 2
# 6 <NA> 3 closed 2 Open 3
# 7 <NA> 4 closed 3 Open 4
# 8 (4 5 closed 4 Open <NA>
#
# This is a transformation that defines a derived field 'dis_pl'
# which has the integer value '0' if the original field
# 'Petal.Length' has a value less than 0. The derived field has a
# value '1' if the input is greater than or equal to 0 and less
# than 1. Note that the values of the 1st column after row 2 have
# been deliberately given NA values in the middle. This is to
# show that that column is meant for a textual representation of
# the transformation as defined for the method involving external
# files; however in this methodtheir values are not used.
# Add the data frame to a list. The default values and the missing
# values should be given as a vector, each element of the vector
# corresponding to the element at the same index in the list. If
# these values are not given as a vector, they will be used for the
# first list element only.
t[[1]] <- m
def <- c(11)
mis <- c(22)
iris_box <- xform_discretize(iris_box,
xform_info = t, default_value = def,
map_missing_to = mis
)
# Make a simple model to see the effect.
fit <- lm(Petal.Width ~ ., iris_box$data[, -5])
fit_pmml <- pmml(fit, transforms = iris_box)
# }
Run the code above in your browser using DataLab