#############################################################
### Running a model with an additive and dominance effect ###
#############################################################
# generate random data
rand_data(500,5000)
### compute the relationship matrices
G.A <- cgrm.A(M,lambda=0.01)
G.D <- cgrm.D(M,lambda=0.01)
### generate the list of design matrices for clmm
Z_list = list(t(chol(G.A)),t(chol(G.D)))
### specify options
par_random = list(list(method="ridge",scale=var(y)/2 ,df=5, name="add"),
list(method="ridge",scale=var(y)/10,df=5, name="dom"))
### run
set_num_threads(1)
fit <- clmm(y = y, Z = Z_list, par_random=par_random, niter=5000, burnin=2500)
### inspect results
str(fit)
########################
### Cross Validation ###
########################
### 4-fold cross-validation with one repetition:
# generate random data
rand_data(500,5000)
### compute the list of masked phenotype-vectors for CV
y_CV <- cCV(y, fold=4, reps=1)
### Cross Validation using GBLUP
G.A <- cgrm.A(M,lambda=0.01)
### generate the list of design matrices for clmm
Z_list = list(t(chol(G.A)))
### specify options
h2 = 0.3
scale = unlist(lapply(y_CV,function(x)var(x,na.rm=T))) * h2
df = rep(5,length(y_CV))
par_random = list(list(method="ridge",scale=scale,df=df, name="animal"))
### run model with 4 threads
set_num_threads(4)
fit <- clmm(y = y_CV, Z = Z_list, par_random=par_random, niter=5000, burnin=2500)
### inspect results
str(fit)
### obtain predictions
pred <- get_pred(fit)
### prediction accuracy
get_cor(pred,y_CV,y)
########################################################
### GWAS using Bayesian Regression on marker windows ###
########################################################
# generate random data
rand_data(500,5000)
### generate the list of design matrices for clmm
Z_list = list(M)
### specify options
h2 = 0.3
scale = var(y) * h2
df = 5
# specifying the model
# Here we use ridge regression on the marker covariates
# and define a window size of 100 and a threshold of 0.01
# which defines the proportion of genetic variance accounted
# for by a single window
par_random = list(list(method="ridge",scale=scale,df=df,
GWAS=list(window_size=100, threshold=0.01), name="markers"))
### run
set_num_threads(1)
fit <- clmm(y =y, Z = Z_list, par_random=par_random, niter=2000, burnin=1000)
### inspect results
str(fit)
### extract GWAS part
gwas <- fit$markers$GWAS
# plot window variance proportions
plot(gwas$window_variance_proportion)
##########################################################
### Sparse Animal Model using the pedigreemm milk data ###
##########################################################
# cpgen offers two ways of running models with random effects that
# are assumed to follow some covariance structure.
# 1) Construct the Covariance matrix and pass the cholesky of that
# as design matrix for that random effect
# 2) Construct the inverse of the covariance matrix (ginverse) and pass the design
# matrix 'Z' that relates observations to factors in ginverse in conjunction
# with the symmetric ginverse.
# This is approach 2) which is more convenient for pedigree based
# animal models, as ginverse (Inverse of numerator relationship matrix) is
# very sparse and can be obtained very efficiently
set_num_threads(1)
# load the data
data(milk)
# get Ainverse
# Ainv <- as(getAInv(pedCows),"dgCMatrix")
T_Inv <- as(pedCows, "sparseMatrix")
D_Inv <- Diagonal(x=1/Dmat(pedCows))
Ainv<-t(T_Inv)dimnames(Ainv)[[1]]<-dimnames(Ainv)[[2]] <-pedCows@label
Ainv <- as(Ainv, "dgCMatrix")
# We need to construct the design matrix.
# Therefore we create a second id column with factor levels
# equal to the animals in the pedigree
milk$id2 <- factor(as.character(milk$id), levels = pedCows@label)
# set up the design matrix
Z <- sparse.model.matrix(~ -1 + id2, data = milk, drop.unused.levels=FALSE)
# run the model
niter = 5000
burnin = 2500
modAinv <- clmm(y = as.numeric(milk$milk), Z = list(Z), ginverse = list(Ainv),
niter = niter, burnin = burnin)
# This is approach 1) run an equivalent model using the cholesky of A
# get L from A = LL'
L <- as(t(relfactor(pedCows)),"dgCMatrix")
# match with ids
ZL <- L[match(milk$id, pedCows@label),]
# run the model
modL <- clmm(as.numeric(milk$milk), Z= list(ZL),
niter = niter, burnin = burnin)
### a more advanced model
# y = Xb + Zu + a + e
#
# u = permanent environment of animal
# a = additive genetic effect of animal
Zpe <- sparse.model.matrix(~ -1 + id2, drop.unused.levels = TRUE, data = milk)
# make X and account for lactation and herd
X <- sparse.model.matrix(~ 1 + lact + herd, data = milk)
niter = 10000
burnin = 2500
mod2 <- clmm(as.numeric(milk$milk), X = X, Z = list(Zpe,Z), ginverse = list(NULL, Ainv),
niter = niter, burnin = burnin)
# run all phenotypes in the milk dataset at once in parallel
Y <- list(as.numeric(milk$milk),as.numeric(milk$fat),as.numeric(milk$prot),as.numeric(milk$scs))
set_num_threads(4)
# ginverse version
model <- clmm(Y, X = X, Z = list(Zpe,Z), ginverse = list(NULL, Ainv),
niter = niter, burnin = burnin)
# get heritabilities and repeatabilities with their standard deviations
heritabilities <- array(0, dim=c(length(Y),2))
colnames(heritabilities) <- c("h2","sd")
# only use post-burnin samples
range <- (burnin+1):niter
# h2
heritabilities[,1] <- unlist(lapply(model, function(x)
mean(
x$Effect_2$posterior$variance[range] /
(x$Effect_1$posterior$variance[range] +
x$Effect_2$posterior$variance[range] +
x$Residual_Variance$Posterior[range]))
)
)
# standard deviation of h2
heritabilities[,2] <- unlist(lapply(model, function(x)
sd(
x$Effect_2$posterior$variance[range] /
(x$Effect_1$posterior$variance[range] +
x$Effect_2$posterior$variance[range] +
x$Residual_Variance$Posterior[range]))
)
)
Run the code above in your browser using DataLab