if (FALSE) {
#############################################################################
# EXAMPLE 1: Two-level random intercept model
#############################################################################
#--------------------------------------------------------------
# Simulate data
#--------------------------------------------------------------
set.seed(976)
G <- 150 ; rg <- c(10,20) # 150 groups with group sizes ranging from 10 to 20
#* simulate group sizes
ng <- round( stats::runif( G, min=rg[1], max=rg[2] ) )
idcluster <- rep(1:G, ng )
#* simulate covariate
iccx <- .3
x <- rep( stats::rnorm( G, sd=sqrt( iccx) ), ng ) +
stats::rnorm( sum(ng), sd=sqrt( 1 - iccx) )
#* simulate outcome
b0 <- 1.5 ; b1 <- .4 ; iccy <- .2
y <- b0 + b1*x + rep( stats::rnorm( G, sd=sqrt( iccy) ), ng ) +
stats::rnorm( sum(ng), sd=sqrt( 1 - iccy) )
#-----------------------
#--- arrange input for mlnormal function
id <- idcluster # cluster is identifier
X <- cbind( 1, x ) # matrix of covariates
N <- length(id) # number of units (clusters), which is G
MD <- max(ng) # maximum number of persons in a group
NP <- 2 # number of covariance parameters theta
#* list of design matrix for covariance matrix
# In the case of the random intercept model, the covariance structure is
# tau^2 * J + sigma^2 * I, where J is a matrix of ones and I is the
# identity matrix
Z <- as.list(1:G)
for (gg in 1:G){
Ngg <- ng[gg]
Z[[gg]] <- as.list( 1:2 )
Z[[gg]][[1]] <- matrix( 1, nrow=Ngg, ncol=Ngg ) # level 2 variance
Z[[gg]][[2]] <- diag(1,Ngg) # level 1 variance
}
Z_list <- Z
#* parameter list containing the powers of parameters
Z_index <- array( 0, dim=c(G,2,2) )
Z_index[ 1:G, 1, 1] <- Z_index[ 1:G, 2, 2] <- 1
#** starting values and parameter names
beta <- c( 1, 0 )
names(beta) <- c("int", "x")
theta <- c( .05, 1 )
names(theta) <- c("tau2", "sig2" )
#** create dataset for lme4 for comparison
dat <- data.frame(y=y, x=x, id=id )
#--------------------------------------------------------------
# Model 1: Maximum likelihood estimation
#--------------------------------------------------------------
#** mlnormal function
mod1a <- LAM::mlnormal( y=y, X=X, id=id, Z_list=Z_list, Z_index=Z_index,
beta=beta, theta=theta, method="ML" )
summary(mod1a)
# lme4::lmer function
library(lme4)
mod1b <- lme4::lmer( y ~ x + (1 | id ), data=dat, REML=FALSE )
summary(mod1b)
#--------------------------------------------------------------
# Model 2: Restricted maximum likelihood estimation
#--------------------------------------------------------------
#** mlnormal function
mod2a <- LAM::mlnormal( y=y, X=X, id=id, Z_list=Z_list, Z_index=Z_index,
beta=beta, theta=theta, method="REML" )
summary(mod2a)
# lme4::lmer function
mod2b <- lme4::lmer( y ~ x + (1 | id ), data=dat, REML=TRUE )
summary(mod2b)
#--------------------------------------------------------------
# Model 3: Estimation of standard deviation instead of variances
#--------------------------------------------------------------
# The model is now parametrized in standard deviations
# Variances are then modeled as tau^2 and sigma^2, respectively.
Z_index2 <- 2*Z_index # change loading matrix
# estimate model
mod3 <- LAM::mlnormal( y=y, X=X, id=id, Z_list=Z_list, Z_index=Z_index2,
beta=beta, theta=theta )
summary(mod3)
#--------------------------------------------------------------
# Model 4: Maximum posterior estimation
#--------------------------------------------------------------
# specify prior distributions for parameters
prior <- "
tau2 ~ dgamma(NA, 2, .5 )
sig2 ~ dinvgamma(NA, .1, .1 )
x ~ dnorm( NA, .2, 1000 )
"
# estimate model in mlnormal
mod4 <- LAM::mlnormal( y=y, X=X, id=id, Z_list=Z_list, Z_index=Z_index,
beta=beta, theta=theta, method="REML", prior=prior, vcov=FALSE )
summary(mod4)
#--------------------------------------------------------------
# Model 5: Estimation with regularization on beta and theta parameters
#--------------------------------------------------------------
#*** penalty on theta parameter
lambda_theta <- 10
weights_theta <- 1 + 0 * theta
#*** penalty on beta parameter
lambda_beta <- 3
weights_beta <- c( 0, 1.8 )
# estimate model
mod5 <- LAM::mlnormal( y=y, X=X, id=id, Z_list=Z_list, Z_index=Z_index,
beta=beta, theta=theta, method="ML", maxit=maxit,
lambda_theta=lambda_theta, weights_theta=weights_theta,
lambda_beta=lambda_beta, weights_beta=weights_beta )
summary(mod5)
#############################################################################
# EXAMPLE 2: Latent covariate model, two-level regression
#############################################################################
# Yb=beta_0 + beta_b*Xb + eb (between level) and
# Yw=beta_w*Xw + ew (within level)
#--------------------------------------------------------------
# Simulate data from latent covariate model
#--------------------------------------------------------------
set.seed(865)
# regression parameters
beta_0 <- 1 ; beta_b <- .7 ; beta_w <- .3
G <- 200 # number of groups
n <- 15 # group size
iccx <- .2 # intra class correlation x
iccy <- .35 # (conditional) intra class correlation y
# simulate latent variables
xb <- stats::rnorm(G, sd=sqrt( iccx ) )
yb <- beta_0 + beta_b * xb + stats::rnorm(G, sd=sqrt( iccy ) )
xw <- stats::rnorm(G*n, sd=sqrt( 1-iccx ) )
yw <- beta_w * xw + stats::rnorm(G*n, sd=sqrt( 1-iccy ) )
group <- rep( 1:G, each=n )
x <- xw + xb[ group ]
y <- yw + yb[ group ]
# test results on true data
lm( yb ~ xb )
lm( yw ~ xw )
# create vector of outcomes in the form
# ( y_11, x_11, y_21, x_21, ... )
dat <- cbind( y, x )
dat
Y <- as.vector( t(dat) ) # outcome vector
ny <- length(Y)
X <- matrix( 0, nrow=ny, ncol=2 )
X[ seq(1,ny,2), 1 ] <- 1 # design vector for mean y
X[ seq(2,ny,2), 2 ] <- 1 # design vector for mean x
id <- rep( group, each=2 )
#--------------------------------------------------------------
# Model 1: Linear regression ignoring multilevel structure
#--------------------------------------------------------------
# y=beta_0 + beta_t *x + e
# Var(y)=beta_t^2 * var_x + var_e
# Cov(y,x)=beta_t * var_x
# Var(x)=var_x
#** initial parameter values
theta <- c( 0, 1, .5 )
names(theta) <- c( "beta_t", "var_x", "var_e")
beta <- c(0,0)
names(beta) <- c("mu_y","mu_x")
# The unit i is a cluster in this example.
#--- define design matrices | list Z_list
Hlist <- list( matrix( c(1,0,0,0), 2, 2 ), # var(y)
matrix( c(1,0,0,0), 2, 2 ), # var(y) (two terms)
matrix( c(0,1,1,0), 2, 2 ), # cov(x,y)
matrix( c(0,0,0,1), 2, 2 ) ) # var(x)
U0 <- matrix( 0, nrow=2*n,ncol=2*n )
Ulist <- list( U0, U0, U0, U0 )
M <- length(Hlist)
for (mm in 1:M){ # mm <- 1
for (nn in 1:n){ # nn <- 0
Ulist[[ mm ]][ 2*(nn-1) + 1:2, 2*(nn-1) + 1:2 ] <- Hlist[[ mm ]]
}
}
Z_list <- as.list(1:G)
for (gg in 1:G){
Z_list[[gg]] <- Ulist
}
#--- define index vectors
Z_index <- array( 0, dim=c(G, 4, 3 ) )
K0 <- matrix( 0, nrow=4, ncol=3 )
colnames(K0) <- names(theta)
# Var(y)=beta_t^2 * var_x + var_e (matrices withn indices 1 and 2)
K0[ 1, c("beta_t","var_x") ] <- c(2,1) # beta_t^2 * var_x
K0[ 2, c("var_e") ] <- c(1) # var_e
# Cov(y,x)=beta_t * var_x
K0[ 3, c("beta_t","var_x")] <- c(1,1)
# Var(x)=var_x
K0[ 4, c("var_x") ] <- c(1)
for (gg in 1:G){
Z_index[gg,,] <- K0
}
#*** estimate model with mlnormal
mod1a <- LAM::mlnormal( y=Y, X=X, id=id, Z_list=Z_list, Z_index=Z_index,
beta=beta, theta=theta, method="REML", vcov=FALSE )
summary(mod1a)
#*** estimate linear regression with stats::lm
mod1b <- stats::lm( y ~ x )
summary(mod1b)
#--------------------------------------------------------------
# Model 2: Latent covariate model
#--------------------------------------------------------------
#** initial parameters
theta <- c( 0.12, .6, .5, 0, .2, .2 )
names(theta) <- c( "beta_w", "var_xw", "var_ew",
"beta_b", "var_xb", "var_eb")
#--- define design matrices | list Z_list
Hlist <- list( matrix( c(1,0,0,0), 2, 2 ), # var(y)
matrix( c(1,0,0,0), 2, 2 ), # var(y) (two terms)
matrix( c(0,1,1,0), 2, 2 ), # cov(x,y)
matrix( c(0,0,0,1), 2, 2 ) ) # var(x)
U0 <- matrix( 0, nrow=2*n,ncol=2*n )
Ulist <- list( U0, U0, U0, U0, # within structure
U0, U0, U0, U0 ) # between structure
M <- length(Hlist)
#*** within structure
design_within <- diag(n) # design matrix within structure
for (mm in 1:M){ # mm <- 1
Ulist[[ mm ]] <- base::kronecker( design_within, Hlist[[mm]] )
}
#*** between structure
design_between <- matrix(1, nrow=n, ncol=n)
# matrix of ones corresponding to group size
for (mm in 1:M){ # mm <- 1
Ulist[[ mm + M ]] <- base::kronecker( design_between, Hlist[[ mm ]] )
}
Z_list <- as.list(1:G)
for (gg in 1:G){
Z_list[[gg]] <- Ulist
}
#--- define index vectors Z_index
Z_index <- array( 0, dim=c(G, 8, 6 ) )
K0 <- matrix( 0, nrow=8, ncol=6 )
colnames(K0) <- names(theta)
# Var(y)=beta^2 * var_x + var_e (matrices withn indices 1 and 2)
K0[ 1, c("beta_w","var_xw") ] <- c(2,1) # beta_t^2 * var_x
K0[ 2, c("var_ew") ] <- c(1) # var_e
K0[ 5, c("beta_b","var_xb") ] <- c(2,1) # beta_t^2 * var_x
K0[ 6, c("var_eb") ] <- c(1) # var_e
# Cov(y,x)=beta * var_x
K0[ 3, c("beta_w","var_xw")] <- c(1,1)
K0[ 7, c("beta_b","var_xb")] <- c(1,1)
# Var(x)=var_x
K0[ 4, c("var_xw") ] <- c(1)
K0[ 8, c("var_xb") ] <- c(1)
for (gg in 1:G){
Z_index[gg,,] <- K0
}
#--- estimate model with mlnormal
mod2a <- LAM::mlnormal( y=Y, X=X, id=id, Z_list=Z_list, Z_index=Z_index,
beta=beta, theta=theta, method="ML" )
summary(mod2a)
#############################################################################
# EXAMPLE 3: Simple linear regression, single level
#############################################################################
#--------------------------------------------------------------
# Simulate data
#--------------------------------------------------------------
set.seed(875)
N <- 300
x <- stats::rnorm( N, sd=1.3 )
y <- .4 + .7 * x + stats::rnorm( N, sd=.5 )
dat <- data.frame( x, y )
#--------------------------------------------------------------
# Model 1: Linear regression modelled with residual covariance structure
#--------------------------------------------------------------
# matrix of predictros
X <- cbind( 1, x )
# list with design matrices
Z <- as.list(1:N)
for (nn in 1:N){
Z[[nn]] <- as.list( 1 )
Z[[nn]][[1]] <- matrix( 1, nrow=1, ncol=1 ) # residual variance
}
#* loading matrix
Z_index <- array( 0, dim=c(N,1,1) )
Z_index[ 1:N, 1, 1] <- 2 # parametrize residual standard deviation
#** starting values and parameter names
beta <- c( 0, 0 )
names(beta) <- c("int", "x")
theta <- c(1)
names(theta) <- c("sig2" )
# id vector
id <- 1:N
#** mlnormal function
mod1a <- LAM::mlnormal( y=y, X=X, id=id, Z_list=Z, Z_index=Z_index,
beta=beta, theta=theta, method="ML" )
summary(mod1a)
# estimate linear regression with stats::lm
mod1b <- stats::lm( y ~ x )
summary(mod1b)
#--------------------------------------------------------------
# Model 2: Linear regression modelled with bivariate covariance structure
#--------------------------------------------------------------
#** define design matrix referring to mean structure
X <- matrix( 0, nrow=2*N, ncol=2 )
X[ seq(1,2*N,2), 1 ] <- X[ seq(2,2*N,2), 2 ] <- 1
#** create outcome vector
y1 <- dat[ cbind( rep(1:N, each=2), rep(1:2, N ) ) ]
#** list with design matrices
Z <- as.list(1:N)
Z0 <- 0*matrix( 0, nrow=2,ncol=2)
ZXY <- ZY <- ZX <- Z0
# design matrix Var(X)
ZX[1,1] <- 1
# design matrix Var(Y)
ZY[2,2] <- 1
# design matrix covariance
ZXY[1,2] <- ZXY[2,1] <- 1
# Var(X)=sigx^2
# Cov(X,Y)=beta * sigx^2
# Var(Y)=beta^2 * sigx^2 + sige^2
Z_list0 <- list( ZY, ZY, ZXY, ZX )
for (nn in 1:N){
Z[[nn]] <- Z_list0
}
#* parameter list containing the powers of parameters
theta <- c(1,0.3,1)
names(theta) <- c("sigx", "beta", "sige" )
Z_index <- array( 0, dim=c(N,4,3) )
for (nn in 1:N){
# Var(X)
Z_index[nn, 4, ] <- c(2,0,0)
# Cov(X,Y)
Z_index[nn, 3, ] <- c(2,1,0)
# Var(Y)
Z_index[nn,1,] <- c(2,2,0)
Z_index[nn,2,] <- c(0,0,2)
}
#** starting values and parameter names
beta <- c( 0, 0 )
names(beta) <- c("Mx", "My")
# id vector
id <- rep( 1:N, each=2 )
#** mlnormal function
mod2a <- LAM::mlnormal( y=y1, X=X, id=id, Z_list=Z, Z_index=Z_index,
beta=beta, theta=theta, method="ML" )
summary(mod2a)
#--------------------------------------------------------------
# Model 3: Bivariate normal distribution in (sigma_X, sigma_Y, sigma_XY) parameters
#--------------------------------------------------------------
# list with design matrices
Z <- as.list(1:N)
Z0 <- 0*matrix( 0, nrow=2,ncol=2)
ZXY <- ZY <- ZX <- Z0
# design matrix Var(X)
ZX[1,1] <- 1
# design matrix Var(Y)
ZY[2,2] <- 1
# design matrix covariance
ZXY[1,2] <- ZXY[2,1] <- 1
Z_list0 <- list( ZX, ZY, ZXY )
for (nn in 1:N){
Z[[nn]] <- Z_list0
}
#* parameter list
theta <- c(1,1,.3)
names(theta) <- c("sigx", "sigy", "sigxy" )
Z_index <- array( 0, dim=c(N,3,3) )
for (nn in 1:N){
# Var(X)
Z_index[nn, 1, ] <- c(2,0,0)
# Var(Y)
Z_index[nn, 2, ] <- c(0,2,0)
# Cov(X,Y)
Z_index[nn, 3, ] <- c(0,0,1)
}
#** starting values and parameter names
beta <- c( 0, 0 )
names(beta) <- c("Mx", "My")
#** mlnormal function
mod3a <- LAM::mlnormal( y=y1, X=X, id=id, Z_list=Z, Z_index=Z_index,
beta=beta, theta=theta, method="ML" )
summary(mod3a)
#--------------------------------------------------------------
# Model 4: Bivariate normal distribution in parameters of Cholesky decomposition
#--------------------------------------------------------------
# list with design matrices
Z <- as.list(1:N)
Z0 <- 0*matrix( 0, nrow=2,ncol=2)
ZXY <- ZY <- ZX <- Z0
# design matrix Var(X)
ZX[1,1] <- 1
# design matrix Var(Y)
ZY[2,2] <- 1
# design matrix covariance
ZXY[1,2] <- ZXY[2,1] <- 1
Z_list0 <- list( ZX, ZXY, ZY, ZY )
for (nn in 1:N){
Z[[nn]] <- Z_list0
}
#* parameter list containing the powers of parameters
theta <- c(1,0.3,1)
names(theta) <- c("L11", "L21", "L22" )
Z_index <- array( 0, dim=c(N,4,3) )
for (nn in 1:N){
Z_index[nn,1,] <- c(2,0,0)
Z_index[nn,2,] <- c(1,1,0)
Z_index[nn,3,] <- c(0,2,0)
Z_index[nn,4,] <- c(0,0,2)
}
#** starting values and parameter names
beta <- c( 0, 0 )
names(beta) <- c("Mx", "My")
# id vector
id <- rep( 1:N, each=2 )
#** mlnormal function
mod4a <- LAM::mlnormal( y=y1, X=X, id=id, Z_list=Z, Z_index=Z_index,
beta=beta, theta=theta, method="ML" )
# parameter with lower diagonal entries of Cholesky matrix
mod4a$theta
# fill-in parameters for Cholesky matrix
L <- matrix(0,2,2)
L[ ! upper.tri(L) ] <- mod4a$theta
#** reconstruct covariance matrix
L
stats::cov.wt(dat, method="ML")$cov
}
Run the code above in your browser using DataLab