# NOT RUN {
# load care library
library("care")
## example with large number of samples and small dimension
## (using empirical estimates of regression coefficients)
# diabetes data
data(efron2004)
x = efron2004$x
y = efron2004$y
n = dim(x)[1]
d = dim(x)[2]
xnames = colnames(x)
# empirical regression coefficients
fit = slm(x, y, lambda=0, lambda.var=0)
fit
# note that in this example the regression coefficients
# and the standardized regression coefficients are identical
# as the input data have been standardized to mean zero and variance one
# compute corresponding t scores / partial correlations
df = n-d-1
pcor = pcor.shrink(cbind(y,x), lambda=0)[-1,1]
t = pcor * sqrt(df/(1-pcor^2))
t.pval = 2 - 2 * pt(abs(t), df)
b = fit$coefficients[1,-1]
cbind(b, pcor, t, t.pval)
# compare results with those from lm function
lm.out = lm(y ~ x)
summary(lm.out)
# prediction of fitted values at the position of the training data
lm.out$fitted.values
mu.hat = predict(fit, x) # precticted means
mu.hat
attr(mu.hat, "sd") # predictive error
sd(y-mu.hat)
# ordering of the variables using squared empirical CAR score
car = carscore(x, y, lambda=0)
ocar = order(car^2, decreasing=TRUE)
xnames[ocar]
# CAR regression models with 5, 7, 9 included predictors
car.predlist = make.predlist(ocar, numpred = c(5,7,9), name="CAR")
car.predlist
slm(x, y, car.predlist, lambda=0, lambda.var=0)
# plot regression coefficients for all possible CAR models
p=ncol(x)
car.predlist = make.predlist(ocar, numpred = 1:p, name="CAR")
cm = slm(x, y, car.predlist, lambda=0, lambda.var=0)
bmat = cm$coefficients[,-1]
bmat
par(mfrow=c(2,1))
plot(1:p, bmat[,1], type="l",
ylab="estimated regression coefficients",
xlab="number of included predictors",
main="CAR Regression Models for Diabetes Data",
xlim=c(1,p+1), ylim=c(min(bmat), max(bmat)))
for (i in 2:p) lines(1:p, bmat[,i], col=i, lty=i)
for (i in 1:p) points(1:p, bmat[,i], col=i)
for (i in 1:p) text(p+0.5, bmat[p,i], xnames[i])
plot(1:p, cm$R2, type="l",
ylab="estimated R2",
xlab="number of included predictors",
main="Proportion of Explained Variance",
ylim=c(0,0.6))
R2max = max(cm$R2)
lines(c(1,p), c(R2max, R2max), col=2)
par(mfrow=c(1,1))
## example with small number of samples and large dimension
## (using shrinkage estimates of regression coefficients)
data(lu2004)
dim(lu2004$x) # 30 403
fit = slm(lu2004$x, lu2004$y)
fit
# }
Run the code above in your browser using DataLab