# NOT RUN {
##------------------------------------------------------------
## Synthetic example (Response is continuous)
##
## High correlation, quadratic time with quadratic interaction
## largish number of noisy variables
##
## Illustrates how modified gradient improves performance
## also compares performance to ideal and well specified linear models
##----------------------------------------------------------------------------
## simulate the data
## simulation 2: main effects (x1, x3, x4), quad-time-interaction (x2)
dtaO <- simLong(n = 100, ntest = 100, model = 2, family = "Continuous", q = 25)
## save the data as both a list and data frame
dtaL <- dtaO$dtaL
dta <- dtaO$dta
## get the training data
trn <- dtaO$trn
## save formulas for linear model comparisons
f.true <- dtaO$f.true
f.linr <- "y~g( x1+x2+x3+x4+x1*time+x2*time+x3*time+x4*time )"
## modified tree gradient (default)
o.1 <- boostmtree(dtaL$features[trn, ], dtaL$time[trn], dtaL$id[trn],dtaL$y[trn],
family = "Continuous",M = 350)
p.1 <- predict(o.1, dtaL$features[-trn, ], dtaL$time[-trn], dtaL$id[-trn], dtaL$y[-trn])
## non-modified tree gradient (nmtg)
o.2 <- boostmtree(dtaL$features[trn, ], dtaL$time[trn], dtaL$id[trn], dtaL$y[trn],
family = "Continuous",M = 350, mod.grad = FALSE)
p.2 <- predict(o.2, dtaL$features[-trn, ], dtaL$time[-trn], dtaL$id[-trn], dtaL$y[-trn])
## set rho = 0
o.3 <- boostmtree(dtaL$features[trn, ], dtaL$time[trn], dtaL$id[trn], dtaL$y[trn],
family = "Continuous",M = 350, rho = 0)
p.3 <- predict(o.3, dtaL$features[-trn, ], dtaL$time[-trn], dtaL$id[-trn], dtaL$y[-trn])
##rmse values compared to generalized least squares (GLS)
##for true model and well specified linear models (LM)
cat("true LM :", boostmtree:::gls.rmse(f.true,dta,trn),"\n")
cat("well specified LM :", boostmtree:::gls.rmse(f.linr,dta,trn),"\n")
cat("boostmtree :", p.1$rmse,"\n")
cat("boostmtree (nmtg):", p.2$rmse,"\n")
cat("boostmtree (rho=0):", p.3$rmse,"\n")
##predicted value plots
plot(p.1)
plot(p.2)
plot(p.3)
##------------------------------------------------------------
## Synthetic example (Response is binary)
##
## High correlation, quadratic time with quadratic interaction
## largish number of noisy variables
##----------------------------------------------------------------------------
## simulate the data
## simulation 2: main effects (x1, x3, x4), quad-time-interaction (x2)
dtaO <- simLong(n = 100, ntest = 100, model = 2, family = "Binary", q = 25)
## save the data as both a list and data frame
dtaL <- dtaO$dtaL
dta <- dtaO$dta
## get the training data
trn <- dtaO$trn
## save formulas for linear model comparisons
f.true <- dtaO$f.true
f.linr <- "y~g( x1+x2+x3+x4+x1*time+x2*time+x3*time+x4*time )"
## modified tree gradient (default)
o.1 <- boostmtree(dtaL$features[trn, ], dtaL$time[trn], dtaL$id[trn],dtaL$y[trn],
family = "Binary",M = 350)
p.1 <- predict(o.1, dtaL$features[-trn, ], dtaL$time[-trn], dtaL$id[-trn], dtaL$y[-trn])
# }
Run the code above in your browser using DataLab