# NOT RUN {
# }
# NOT RUN {
<!-- %% @test .port Database port number -->
<!-- %% @test .dbname Database name -->
## set up the database connection
## Assume that .port is port number and .dbname is the database name
cid <- db.connect(port = .port, dbname = .dbname, verbose = FALSE)
source_data <- as.db.data.frame(abalone, conn.id = cid, verbose = FALSE)
lk(source_data, 10)
## linear regression conditioned on nation value
## i.e. grouping
fit <- madlib.glm(rings ~ . -id | sex, data = source_data, heteroskedasticity = T)
fit
## logistic regression
## logistic regression
## The dependent variable must be a logical variable
## Here it is y < 10.
fit <- madlib.glm(rings < 10 ~ . - id - 1 , data = source_data, family = binomial)
fit <- madlib.glm(rings < 10 ~ sex + length + diameter,
data = source_data, family = "logistic")
## 3rd example
## The table has two columns: x is an array, y is double precision
dat <- source_data
dat$arr <- db.array(source_data[,-c(1,2)])
array.data <- as.db.data.frame(dat)
## Fit to y using every element of x
## This does not work in R's lm, but works in madlib.lm
fit <- madlib.glm(rings < 10 ~ arr, data = array.data, family = binomial)
fit <- madlib.glm(rings < 10 ~ arr - arr[1:2], data = array.data, family = binomial)
fit <- madlib.glm(rings < 10 ~ arr[1:7] + sex | id <!-- %% 3, data = array.data, family = 'binomial') -->
fit <- madlib.glm(rings < 10 ~ arr - arr[8] + sex | id <!-- %% 3, data = array.data, family = 'binomial') -->
## 4th example
## Step-wise feature selection
start <- madlib.glm(rings < 10 ~ . - id - sex, data = source_data, family = "binomial")
## step(start)
## ------------------------------------------------------------
## Examples for using GLM model
fit <- madlib.glm(rings < 10 ~ . - id - sex, data = source_data, family = binomial(probit),
control = list(max.iter = 10))
fit <- madlib.glm(rings ~ . - id | sex, data = source_data, family = poisson(log),
control = list(max.iter = 10))
fit <- madlib.glm(rings ~ . - id, data = source_data, family = Gamma(inverse),
control = list(max.iter = 10))
db.disconnect(cid, verbose = FALSE)
# }
Run the code above in your browser using DataLab