rf.effectSize: Random Forest effect size

Description

Parameter effect size based on partial dependency (Cafri & Bailey, 2016)

Usage

rf.effectSize(x, y, ...)

Arguments

A randomForest model object

A vector represent the independent variable of intrests

...

Arguments passed to the partial dependency function, requires x.var, pred.data,

Value

A vector (single value) of the parameter effect size

References

Cafri, G., B.A. Bailey (2016) Understanding Variable Effects from Black Box Prediction: Quantifying Effects in Tree Ensembles Using Partial Dependence. Journal of Data Science 14:67-96

Examples

Run this code

# NOT RUN {
 library(randomForest)
   data(airquality)
   airquality <- na.omit(airquality)
     fit.reg <- randomForest(Ozone ~ ., data=airquality)
 
 # Parameter effect sizes	
 rf.effectSize(fit.reg, y = airquality$Solar.R, pred.data = airquality, x.var = Solar.R)
 rf.effectSize(fit.reg, y = airquality$Wind, pred.data = airquality, x.var = Wind)
 rf.effectSize(fit.reg, y = airquality$Temp, pred.data = airquality, x.var = Temp)
 rf.effectSize(fit.reg, y = airquality$Month, pred.data = airquality, x.var = Month)
 rf.effectSize(fit.reg, y = airquality$Day, pred.data = airquality, x.var = Day)
 	
# }
# NOT RUN {
 # Bootstrap of effect size for Wind and Temp
 B = 999
 n = nrow(airquality)
 es.boot.wind <- vector()
 es.boot.temp <- vector()
   for(i in 1:B) {
     boot.samples <- airquality[sample(1:nrow(airquality), n, replace = TRUE),]
       fmla <- stats::as.formula(paste(paste("Ozone", "~", sep=""), paste(".", collapse= "")))   
         fit <- randomForest(fmla, data = boot.samples)
     es.boot.wind <- append(es.boot.wind, rf.effectSize(fit, y = boot.samples$Wind, 
 	                       pred.data = boot.samples, x.var = Wind))
     es.boot.temp <- append(es.boot.temp, rf.effectSize(fit, y = boot.samples$Temp, 
 	                       pred.data = boot.samples,x.var = Temp))
   }        
  se <- function(x) sqrt(var(x, na.rm = TRUE) / length(na.omit(x)))
    cat("Bootstrap variance for Wind:", var(es.boot.wind), "\n")
      cat("Bootstrap standard error for Wind:", se(es.boot.wind), "\n","\n")
    cat("Bootstrap variance for Temp:", var(es.boot.temp), "\n")
  cat("Bootstrap standard error for Temp:", se(es.boot.temp), "\n")

 # Confidence intervals of Bootstrap of effect size for Wind
 p=0.95
 y <- sort(es.boot.wind) 
 x <- 1:length(y)
   plx <- stats::predict(stats::loess(y ~ x), se=TRUE)
   lci = plx$fit - stats::qt(p, plx$df) * plx$se.fit
   uci = plx$fit + stats::qt(p, plx$df) * plx$se.fit
       graphics::plot(x, y, type="n", main="Effect size Bootstrap CI for Wind", 
 	    sub=paste("confidence intervals at", p))
       graphics::polygon(c(x,rev(x)), c(lci, rev(uci)), col="grey86")
       graphics::points(x, y, pch=20, cex=0.70)
       graphics::lines(x, plx[["fit"]], lty=3)
 
 # Confidence intervals of Bootstrap of effect size for Temp
 p=0.95
 y <- sort(es.boot.temp) 
 x <- 1:length(y)
   plx <- stats::predict(stats::loess(y ~ x), se=TRUE)
   lci = plx$fit - stats::qt(p, plx$df) * plx$se.fit
   uci = plx$fit + stats::qt(p, plx$df) * plx$se.fit
       graphics::plot(x, y, type="n", main="Effect size Bootstrap CI for Temp", 
 	    sub=paste("confidence intervals at", p))
       graphics::polygon(c(x,rev(x)), c(lci, rev(uci)), col="grey86")
       graphics::points(x, y, pch=20, cex=0.70)
       graphics::lines(x, plx[["fit"]], lty=3)
 	  
 # Plot bootstrap of wind effect size
 pdf <- density(es.boot.wind)
 plot(pdf, type="n", main="Bootstrap of effect size wind (n=99)", 
      ylab="p", xlab="effect size")
   polygon(pdf, col="grey")
   abline(v=mean(es.boot.wind))
     abline(v=mean(es.boot.wind)-sd(es.boot.wind), col="blue", lty=3)
     abline(v=mean(es.boot.wind)+sd(es.boot.wind), col="blue", lty=3)	  
 
 # Plot bootstrap of temp effect size	
 pdf <- density(es.boot.temp)
 plot(pdf, type="n", main="Bootstrap of effect size temp (n=99)", 
      ylab="p", xlab="effect size")
   polygon(pdf, col="grey")	
   abline(v=mean(es.boot.temp))
     abline(v=mean(es.boot.temp)-sd(es.boot.temp), col="blue", lty=3)
     abline(v=mean(es.boot.temp)+sd(es.boot.temp), col="blue", lty=3)
# }
# NOT RUN {
# }