# NOT RUN {
raw_data <- read.df(
"data/mllib/sample_fpgrowth.txt",
source = "csv",
schema = structType(structField("raw_items", "string")))
data <- selectExpr(raw_data, "split(raw_items, ' ') as items")
model <- spark.fpGrowth(data)
# Show frequent itemsets
frequent_itemsets <- spark.freqItemsets(model)
showDF(frequent_itemsets)
# Show association rules
association_rules <- spark.associationRules(model)
showDF(association_rules)
# Predict on new data
new_itemsets <- data.frame(items = c("t", "t,s"))
new_data <- selectExpr(createDataFrame(new_itemsets), "split(items, ',') as items")
predict(model, new_data)
# Save and load model
path <- "/path/to/model"
write.ml(model, path)
read.ml(path)
# Optional arguments
baskets_data <- selectExpr(createDataFrame(itemsets), "split(items, ',') as baskets")
another_model <- spark.fpGrowth(data, minSupport = 0.1, minConfidence = 0.5,
itemsCol = "baskets", numPartitions = 10)
# }
Run the code above in your browser using DataLab