gdescent: Gradient Descent Algorithm

Description

gdescent Performs gradient descent algorithm given an objective function and a gradient for the objective function

Usage

gdescent(f, grad_f, X, y, alpha = 1e-06, iter = 3000, liveupdates = FALSE, tol = 1e-06, intercept = TRUE, autoscaling = TRUE)

Arguments

objective function as a function of X, y, and b

grad_f

gradient of f as a function of X,y, and b

matrix of independent variables

vector containing dependent variable

alpha

(optional) step size for the algorithm

iter

(optional) the number of iterations to include in the algorithm

liveupdates

(optional) if TRUE, the function will print live updates showing the norm of the gradient vector in each iteration

tol

(optional) tolerance for determining convergence

intercept

(optional) if TRUE, the model includes an estimate for the intercept

autoscaling

(optional) if TRUE, the function will automatically rescale the columns of X (divides each element in X by the maximal element in that column)

Examples

Run this code

#--------------------------------------------------------
# EXAMPLE 1 - A Simple Example
#--------------------------------------------------------

# Generate some data for a simple bivariate example
set.seed(12345)
x <- sample(seq(from = -1, to = 1, by = 0.1), size = 50, replace = TRUE)
y <- 2*x + rnorm(50)
plot(x,y)

# Setting up for gradient descent
X <- as.matrix(x)
y <- as.vector(y)
f <- function(X,y,b) {
   (1/2)*norm(y-X%*%b,"F")^{2}
}
grad_f <- function(X,y,b) {
   t(X)%*%(X%*%b - y)
}

# Run a simple gradient descent example
simple_ex <- gdescent(f,grad_f,X,y,0.01)

# We can compare our gradient descent results with what we get if we use the lm function
lm(y~X)

# Notice that the algorithm may diverge if the step size (alpha) is not small enough
# THE FOLLOWING NOT RUN
# simple_ex2 <- gdescent(f,grad_f,X,y,alpha=0.05,liveupdates=TRUE)
# The live updates show the norm of the gradient in each iteration.
# We notice that the norm of the gradient diverges when alpha is not small enough.

#--------------------------------------------------------
# EXAMPLE 2 - Linear Regression & Feature Scaling
#--------------------------------------------------------

f <- function(X,y,b) {
  (1/2)*norm(y-X%*%b,"F")^{2}
}
grad_f <- function(X,y,b) {
  t(X)%*%(X%*%b - y)
}

data(moviebudgets)
X <- as.matrix(moviebudgets$budget)
y <- as.vector(moviebudgets$rating)
# THE FOLLOWING NOT RUN
# movies1 <- gdescent(f,grad_f,X,y,1e-4,5000)

# We can compare our gradient descent results with what we get if we use the lm function
# THE FOLLOWING NOT RUN
# lm(y~X)

# Compare the above result with what we get without feature scaling
# Not run:
# movies2 <- gdescent(f,grad_f,X,y,alpha=1e-19,iter=10000,liveupdates=TRUE,autoscaling=FALSE)
## Note that running the gradient descent algorithm on unscaled column vectors
## requires a much smaller step size and many more iterations.

#--------------------------------------------------------
# EXAMPLE 3 - Multivariate Linear Regression
#--------------------------------------------------------

f <- function(X,y,b) {
  (1/2)*norm(y-X%*%b,"F")^{2}
}
grad_f <- function(X,y,b) {
  t(X)%*%(X%*%b - y)
}

data(baltimoreyouth)
B <- baltimoreyouth
X <- matrix(c(B$farms11,B$susp11,B$sclemp11,B$abshs11), nrow=nrow(B), byrow=FALSE)
y <- as.vector(B$compl11)
# THE FOLLOWING NOT RUN
# meals_graduations <- gdescent(f,grad_f,X,y,0.01,12000)

# We can compare our gradient descent results with what we get if we use the lm function
# THE FOLLOWING NOT RUN
# lm(y~X)

#--------------------------------------------------------
# EXAMPLE 4 - Logistic Regression
#--------------------------------------------------------

set.seed(12345)
n <- 100
p <- 10
X <- matrix(rnorm(n*p),n,p)
b <- matrix(rnorm(p),p,1)
e <- 0.5*matrix(rnorm(n),n,1)
z <- X%*%b + e
y <- as.vector((plogis(z) <= runif(n)) + 0)

l <- function(X,y,b) {
  -t(y)%*%(X%*%b) + sum(log(1+exp(X%*%b)))
}
grad_l <- function(X,y,b) {
  -t(X)%*%(y-plogis(X%*%b))
}
alpha = 1/(0.25*svd(cbind(1,X))$d[1]**2)

# Use gradient descent algorithm to solve logistic regression problem
# THE FOLLOWING NOT RUN
# logistic_ex <- gdescent(l,grad_l,X,y,alpha=alpha,iter=15000)

# Use glm function to solve logistic regression problem
# THE FOLLOWING NOT RUN
# glm(y~X, family=binomial)

Run the code above in your browser using DataLab