Learn R Programming

superml (version 0.5.7)

LMTrainer: Linear Models Trainer

Description

Trains regression, lasso, ridge model in R

Arguments

Public fields

family

type of regression to perform, values can be "gaussian" ,"binomial", "multinomial","mgaussian"

weights

observation weights. Can be total counts if responses are proportion matrices. Default is 1 for each observation

alpha

The elasticnet mixing parameter, alpha=1 is the lasso penalty, alpha=0 the ridge penalty, alpha=NULL is simple regression

lambda

the number of lambda values - default is 100

standardize

normalise the features in the given data

standardize.response

normalise the dependent variable between 0 and 1, default = FALSE

model

internal use

cvmodel

internal use

Flag

internal use

is_lasso

internal use

iid_names

internal use

Methods


Method new()

Usage

LMTrainer$new(family, weights, alpha, lambda, standardize.response)

Arguments

family

character, type of regression to perform, values can be "gaussian" ,"binomial", "multinomial","mgaussian"

weights

numeric, observation weights. Can be total counts if responses are proportion matrices. Default is 1 for each observation

alpha

integer, The elasticnet mixing parameter, alpha=1 is the lasso penalty, alpha=0 the ridge penalty, alpha=NULL is simple regression

lambda

integer, the number of lambda values - default is 100

standardize.response

logical, normalise the dependent variable between 0 and 1, default = FALSE

Details

Create a new `LMTrainer` object.

Returns

A `LMTrainer` object.

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
}


Method fit()

Usage

LMTrainer$fit(X, y)

Arguments

X

data.frame containing train featuers

y

character, name of target variable

Details

Fits the LMTrainer model on given data

Returns

NULL, train the model and saves internally

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$fit(X = housing, y = 'MEDV')
}


Method predict()

Usage

LMTrainer$predict(df, lambda = NULL)

Arguments

df

data.frame containing test features

lambda

integer, the number of lambda values - default is 100. By default it picks the best value from the model.

Details

Returns predictions for test data

Returns

vector, a vector containing predictions

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$fit(X = housing, y = 'MEDV')
predictions <- lf$cv_predict(df = housing)
}


Method cv_model()

Usage

LMTrainer$cv_model(X, y, nfolds, parallel, type.measure = "deviance")

Arguments

X

data.frame containing test features

y

character, name of target variable

nfolds

integer, number of folds

parallel

logical, if do parallel computation. Default=FALSE

type.measure

character, evaluation metric type. Default = deviance

Details

Train regression model using cross validation

Returns

NULL, trains the model and saves it in memory

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
}


Method cv_predict()

Usage

LMTrainer$cv_predict(df, lambda = NULL)

Arguments

df

data.frame containing test features

lambda

integer, the number of lambda values - default is 100. By default it picks the best value from the model.

Details

Get predictions from the cross validated regression model

Returns

vector a vector containing predicted values

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
predictions <- lf$cv_predict(df = housing)
}


Method get_importance()

Usage

LMTrainer$get_importance()

Details

Get feature importance using model coefficients

Returns

a matrix containing feature coefficients

Examples

\dontrun{
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
predictions <- lf$cv_predict(df = housing)
coefs <- lf$get_importance()
}


Method clone()

The objects of this class are cloneable with this method.

Usage

LMTrainer$clone(deep = FALSE)

Arguments

deep

Whether to make a deep clone.

Details

Trains linear models such as Logistic, Lasso or Ridge regression model. It is built on glmnet R package. This class provides fit, predict, cross valdidation functions.

Examples

Run this code

## ------------------------------------------------
## Method `LMTrainer$new`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
}

## ------------------------------------------------
## Method `LMTrainer$fit`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$fit(X = housing, y = 'MEDV')
}

## ------------------------------------------------
## Method `LMTrainer$predict`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$fit(X = housing, y = 'MEDV')
predictions <- lf$cv_predict(df = housing)
}

## ------------------------------------------------
## Method `LMTrainer$cv_model`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
}

## ------------------------------------------------
## Method `LMTrainer$cv_predict`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
predictions <- lf$cv_predict(df = housing)
}

## ------------------------------------------------
## Method `LMTrainer$get_importance`
## ------------------------------------------------

if (FALSE) {
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
           "RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing)  <-  names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
predictions <- lf$cv_predict(df = housing)
coefs <- lf$get_importance()
}

Run the code above in your browser using DataLab