# NOT RUN {
#############################################################################
# EXAMPLE 1: Examples based on dataset fractions.subtraction.data
#############################################################################
## dataset fractions.subtraction.data and corresponding Q-Matrix
head(fraction.subtraction.data)
fraction.subtraction.qmatrix
## Misspecification in parameter specification for method CDM::din()
## leads to warnings and terminates estimation procedure. E.g.,
# See Q-Matrix specification
fractions.dina.warning1 <- CDM::din(data=fraction.subtraction.data,
q.matrix=t(fraction.subtraction.qmatrix))
# See guess.init specification
fractions.dina.warning2 <- CDM::din(data=fraction.subtraction.data,
q.matrix=fraction.subtraction.qmatrix, guess.init=rep(1.2,
ncol(fraction.subtraction.data)))
# See rule specification
fractions.dina.warning3 <- CDM::din(data=fraction.subtraction.data,
q.matrix=fraction.subtraction.qmatrix, rule=c(rep("DINA",
10), rep("DINO", 9)))
## Parameter estimation of DINA model
# rule="DINA" is default
fractions.dina <- CDM::din(data=fraction.subtraction.data,
q.matrix=fraction.subtraction.qmatrix, rule="DINA")
attributes(fractions.dina)
str(fractions.dina)
## For instance assessing the guessing parameters through
## assignment
fractions.dina$guess
## corresponding summaries, including IDI,
## most frequent skill classes and information
## criteria AIC and BIC
summary(fractions.dina)
## In particular, assessing detailed summary through assignment
detailed.summary.fs <- summary(fractions.dina)
str(detailed.summary.fs)
## Item discrimination index of item 8 is too low. This is also
## visualized in the first plot
plot(fractions.dina)
## The reason therefore is a high guessing parameter
round(fractions.dina$guess[,1], 2)
## Estimate DINA model with different random initial parameters using seed=1345
fractions.dina1 <- CDM::din(data=fraction.subtraction.data,
q.matrix=fraction.subtraction.qmatrix, rule="DINA", seed=1345)
## Fix the guessing parameters of items 5, 8 and 9 equal to .20
# define a constraint.guess matrix
constraint.guess <- matrix(c(5,8,9, rep(0.2, 3)), ncol=2)
fractions.dina.fixed <- CDM::din(data=fraction.subtraction.data,
q.matrix=fraction.subtraction.qmatrix,
constraint.guess=constraint.guess)
## The second plot shows the expected (MAP) and observed skill
## probabilities. The third plot visualizes the skill class
## occurrence probabilities; Only the 'top.n.skill.classes' most frequent
## skill classes are labeled; it is obvious that the skill class '11111111'
## (all skills are mastered) is the most probable in this population.
## The fourth plot shows the skill probabilities conditional on response
## patterns; in this population the skills 3 and 6 seem to be
## mastered easier than the others. The fourth plot shows the
## skill probabilities conditional on a specified response
## pattern; it is shown whether a skill is mastered (above
## .5+'uncertainty') unclassifiable (within the boundaries) or
## not mastered (below .5-'uncertainty'). In this case, the
## 527th respondent was chosen; if no response pattern is
## specified, the plot will not be shown (of course)
pattern <- paste(fraction.subtraction.data[527, ], collapse="")
plot(fractions.dina, pattern=pattern, display.nr=4)
#uncertainty=0.1, top.n.skill.classes=6 are default
plot(fractions.dina.fixed, uncertainty=0.1, top.n.skill.classes=6,
pattern=pattern)
# }
# NOT RUN {
#############################################################################
# EXAMPLE 2: Examples based on dataset sim.dina
#############################################################################
# DINA Model
d1 <- CDM::din(sim.dina, q.matr=sim.qmatrix, rule="DINA",
conv.crit=0.01, maxit=500, progress=TRUE)
summary(d1)
# DINA model with hierarchical skill classes (Hierarchical DINA model)
# 1st step: estimate an initial full model to look at the indexing
# of skill classes
d0 <- CDM::din(sim.dina, q.matr=sim.qmatrix, maxit=1)
d0$attribute.patt.splitted
# [,1] [,2] [,3]
# [1,] 0 0 0
# [2,] 1 0 0
# [3,] 0 1 0
# [4,] 0 0 1
# [5,] 1 1 0
# [6,] 1 0 1
# [7,] 0 1 1
# [8,] 1 1 1
#
# In this example, following hierarchical skill classes are only allowed:
# 000, 001, 011, 111
# We define therefore a vector of indices for skill classes with
# zero probabilities (see entries in the rows of the matrix
# d0$attribute.patt.splitted above)
zeroprob.skillclasses <- c(2,3,5,6) # classes 100, 010, 110, 101
# estimate the hierarchical DINA model
d1a <- CDM::din(sim.dina, q.matr=sim.qmatrix,
zeroprob.skillclasses=zeroprob.skillclasses )
summary(d1a)
# Mixed DINA and DINO Model
d1b <- CDM::din(sim.dina, q.matr=sim.qmatrix, rule=
c(rep("DINA", 7), rep("DINO", 2)), conv.crit=0.01,
maxit=500, progress=FALSE)
summary(d1b)
# DINO Model
d2 <- CDM::din(sim.dina, q.matr=sim.qmatrix, rule="DINO",
conv.crit=0.01, maxit=500, progress=FALSE)
summary(d2)
# Comparison of DINA and DINO estimates
lapply(list("guessing"=rbind("DINA"=d1$guess[,1],
"DINO"=d2$guess[,1]), "slipping"=rbind("DINA"=
d1$slip[,1], "DINO"=d2$slip[,1])), round, 2)
# Comparison of the information criteria
c("DINA"=d1$AIC, "MIXED"=d1b$AIC, "DINO"=d2$AIC)
# following estimates:
d1$coef # guessing and slipping parameter
d1$guess # guessing parameter
d1$slip # slipping parameter
d1$skill.patt # probabilities for skills
d1$attribute.patt # skill classes with probabilities
d1$subj.pattern # pattern per subject
# posterior probabilities for every response pattern
d1$posterior
# Equal guessing parameters
d2a <- CDM::din( data=sim.dina, q.matrix=sim.qmatrix,
guess.equal=TRUE, slip.equal=FALSE )
d2a$coef
# Equal guessing and slipping parameters
d2b <- CDM::din( data=sim.dina, q.matrix=sim.qmatrix,
guess.equal=TRUE, slip.equal=TRUE )
d2b$coef
#############################################################################
# EXAMPLE 3: Examples based on dataset sim.dino
#############################################################################
# DINO Estimation
d3 <- CDM::din(sim.dino, q.matr=sim.qmatrix, rule="DINO",
conv.crit=0.005, progress=FALSE)
# Mixed DINA and DINO Model
d3b <- CDM::din(sim.dino, q.matr=sim.qmatrix,
rule=c(rep("DINA", 4), rep("DINO", 5)), conv.crit=0.001,
progress=FALSE)
# DINA Estimation
d4 <- CDM::din(sim.dino, q.matr=sim.qmatrix, rule="DINA",
conv.crit=0.005, progress=FALSE)
# Comparison of DINA and DINO estimates
lapply(list("guessing"=rbind("DINO"=d3$guess[,1], "DINA"=d4$guess[,1]),
"slipping"=rbind("DINO"=d3$slip[,1], "DINA"=d4$slip[,1])), round, 2)
# Comparison of the information criteria
c("DINO"=d3$AIC, "MIXED"=d3b$AIC, "DINA"=d4$AIC)
#############################################################################
# EXAMPLE 4: Example estimation with weights based on dataset sim.dina
#############################################################################
# Here, a weighted maximum likelihood estimation is used
# This could be useful for survey data.
# i.e. first 200 persons have weight 2, the other have weight 1
(weights <- c(rep(2, 200), rep(1, 200)))
d5 <- CDM::din(sim.dina, sim.qmatrix, rule="DINA", conv.crit=
0.005, weights=weights, progress=FALSE)
# Comparison of the information criteria
c("DINA"=d1$AIC, "WEIGHTS"=d5$AIC)
#############################################################################
# EXAMPLE 5: Example estimation within a balanced incomplete
## block (BIB) design generated on dataset sim.dina
#############################################################################
# generate BIB data
# The next example shows that the din function works for
# (relatively arbitrary) missing value pattern
# Here, a missing by design is generated in the dataset dinadat.bib
sim.dina.bib <- sim.dina
sim.dina.bib[1:100, 1:3] <- NA
sim.dina.bib[101:300, 4:8] <- NA
sim.dina.bib[301:400, c(1,2,9)] <- NA
d6 <- CDM::din(sim.dina.bib, sim.qmatrix, rule="DINA",
conv.crit=0.0005, weights=weights, maxit=200)
d7 <- CDM::din(sim.dina.bib, sim.qmatrix, rule="DINO",
conv.crit=0.005, weights=weights)
# Comparison of DINA and DINO estimates
lapply(list("guessing"=rbind("DINA"=d6$guess[,1],
"DINO"=d7$guess[,1]), "slipping"=rbind("DINA"=
d6$slip[,1], "DINO"=d7$slip[,1])), round, 2)
#############################################################################
# EXAMPLE 6: DINA model with attribute hierarchy
#############################################################################
set.seed(987)
# assumed skill distribution: P(000)=P(100)=P(110)=P(111)=.245 and
# "deviant pattern": P(010)=.02
K <- 3 # number of skills
# define alpha
alpha <- scan()
0 0 0
1 0 0
1 1 0
1 1 1
0 1 0
alpha <- matrix( alpha, length(alpha)/K, K, byrow=TRUE )
alpha <- alpha[ c( rep(1:4,each=245), rep(5,20) ), ]
# define Q-matrix
q.matrix <- scan()
1 0 0 1 0 0 1 0 0
0 1 0 0 1 0 0 1 0
0 0 1 0 1 0 0 0 1
1 1 0 1 0 1 0 1 1
q.matrix <- matrix( q.matrix, nrow=length(q.matrix)/K, ncol=K, byrow=TRUE )
# simulate DINA data
dat <- CDM::sim.din( alpha=alpha, q.matrix=q.matrix )$dat
#*** Model 1: estimate DINA model | no skill space restriction
mod1 <- CDM::din( dat, q.matrix )
#*** Model 2: DINA model | hierarchy A2 > A3
B <- "A2 > A3"
skill.names <- paste0("A",1:3)
skillspace <- CDM::skillspace.hierarchy( B, skill.names )$skillspace.reduced
mod2 <- CDM::din( dat, q.matrix, skillclasses=skillspace )
#*** Model 3: DINA model | linear hierarchy A1 > A2 > A3
# This is a misspecied model because due to P(010)=.02 the relation A1>A2
# does not hold.
B <- "A1 > A2
A2 > A3"
skill.names <- paste0("A",1:3)
skillspace <- CDM::skillspace.hierarchy( B, skill.names )$skillspace.reduced
mod3 <- CDM::din( dat, q.matrix, skillclasses=skillspace )
#*** Model 4: 2PL model in gdm
mod4 <- CDM::gdm( dat, theta.k=seq(-5,5,len=21),
decrease.increments=TRUE, skillspace="normal" )
summary(mod4)
anova(mod1,mod2)
## Model loglike Deviance Npars AIC BIC Chisq df p
## 2 Model 2 -7052.460 14104.92 29 14162.92 14305.24 0.9174 2 0.63211
## 1 Model 1 -7052.001 14104.00 31 14166.00 14318.14 NA NA NA
anova(mod2,mod3)
## Model loglike Deviance Npars AIC BIC Chisq df p
## 2 Model 2 -7059.058 14118.12 27 14172.12 14304.63 13.19618 2 0.00136
## 1 Model 1 -7052.460 14104.92 29 14162.92 14305.24 NA NA NA
anova(mod2,mod4)
## Model loglike Deviance Npars AIC BIC Chisq df p
## 2 Model 2 -7220.05 14440.10 24 14488.10 14605.89 335.1805 5 0
## 1 Model 1 -7052.46 14104.92 29 14162.92 14305.24 NA NA NA
# compare fit statistics
summary( CDM::modelfit.cor.din( mod2 ) )
summary( CDM::modelfit.cor.din( mod4 ) )
#############################################################################
# EXAMPLE 7: Fitting the basic local independence model (BLIM) with din
#############################################################################
library(pks)
data(DoignonFalmagne7, package="pks")
## str(DoignonFalmagne7)
## $ K : int [1:9, 1:5] 0 1 0 1 1 1 1 1 1 0 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:9] "00000" "10000" "01000" "11000" ...
## .. ..$ : chr [1:5] "a" "b" "c" "d" ...
## $ N.R: Named int [1:32] 80 92 89 3 2 1 89 16 18 10 ...
## ..- attr(*, "names")=chr [1:32] "00000" "10000" "01000" "00100" ...
# The idea is to fit the local independence model with the din function.
# This can be accomplished by specifying a DINO model with
# prespecified skill classes.
# extract dataset
dat <- as.numeric( unlist( sapply( names(DoignonFalmagne7$N.R),
FUN=function( ll){ strsplit( ll, split="") } ) ) )
dat <- matrix( dat, ncol=5, byrow=TRUE )
colnames(dat) <- colnames(DoignonFalmagne7$K)
rownames(dat) <- names(DoignonFalmagne7$N.R)
# sample weights
weights <- DoignonFalmagne7$N.R
# define Q-matrix
q.matrix <- t(DoignonFalmagne7$K)
v1 <- colnames(q.matrix) <- paste0("S", colnames(q.matrix))
q.matrix <- q.matrix[, - 1] # remove S00000
# define skill classes
SC <- ncol(q.matrix)
skillclasses <- matrix( 0, nrow=SC+1, ncol=SC)
colnames(skillclasses) <- colnames(q.matrix)
rownames(skillclasses) <- v1
skillclasses[ cbind( 2:(SC+1), 1:SC ) ] <- 1
# estimate BLIM with din function
mod1 <- CDM::din(data=dat, q.matrix=q.matrix, skillclasses=skillclasses,
rule="DINO", weights=weights )
summary(mod1)
## Item parameters
## item guess slip IDI rmsea
## a a 0.158 0.162 0.680 0.011
## b b 0.145 0.159 0.696 0.009
## c c 0.008 0.181 0.811 0.001
## d d 0.012 0.129 0.859 0.001
## e e 0.025 0.146 0.828 0.007
# estimate basic local independence model with pks package
mod2 <- pks::blim(K, N.R, method="ML") # maximum likelihood estimation by EM algorithm
mod2
## Error and guessing parameters
## beta eta
## a 0.164871 0.103065
## b 0.163113 0.095074
## c 0.188839 0.000004
## d 0.079835 0.000003
## e 0.088648 0.019910
# }
Run the code above in your browser using DataLab