# NOT RUN {
# }
# NOT RUN {
<!-- %% @test .port Database port number -->
<!-- %% @test .dbname Database name -->
## set up the database connection
## Assume that .port is port number and .dbname is the database name
cid <- db.connect(port = .port, dbname = .dbname, verbose = FALSE)
## create db.table object pointing to a data table
delete("abalone", conn.id = cid)
x <- as.db.data.frame(abalone, "abalone", conn.id = cid, verbose = FALSE)
## Example 1 --------
fit <- madlib.lm(rings ~ . - sex - id, data = x)
fit
pred <- predict(fit, x) # prediction
content(pred)
ans <- x$rings # the actual value
lk((ans - pred)^2, 10) # squared error
lk(mean((ans - pred)^2)) # mean squared error
## Example 2 ---------
y <- x
y$sex <- as.factor(y$sex)
fit <- madlib.lm(rings ~ . - id, data = y)
lk(mean((y$rings - predict(fit, y))^2))
## Example 3 ---------
fit <- madlib.lm(rings ~ . - id | sex, data = x)
fit
pred <- predict(fit, x)
content(pred)
ans <- x$rings
lk(mean((ans - pred)^2))
## predictions for one group of data where sex = I
idx <- which(groups(fit)[["sex"]] == "I") # which sub-model
pred1 <- predict(fit[[idx]], x[x$sex == "I",]) # predict on part of data
## Example 3 --------
## plot the predicted values v.s. the true values
ap <- ans # true values
ap$pred <- pred # add a column which is the predicted values
## If the data set is very big, you do not want to load all the
## data points into R and plot. We can just plot a random sample.
random.sample <- lk(sort(ap, FALSE, NULL), 1000) # sort randomly
plot(random.sample)
## ------------------------------------------------------------
## GLM prediction
fit <- madlib.glm(rings ~ . - id | sex, data = x, family = poisson(log),
control = list(max.iter = 20))
p <- predict(f)
lk(p, 10)
db.disconnect(cid, verbose = FALSE)
# }
Run the code above in your browser using DataLab