simdata: Simulate response patterns

Description

Simulates response patterns for compensatory and noncompensatory MIRT models from multivariate normally distributed factor ($\theta$) scores, or from a user input matrix of $\theta$'s.

Usage

simdata(a, d, N, itemtype, sigma = NULL, mu = NULL, guess = 0,
  upper = 1, nominal = NULL, Theta = NULL, gpcm_mats = list(),
  returnList = FALSE, model = NULL, which.items = NULL, mins = 0)

Arguments

a matrix/vector of slope parameters. If slopes are to be constrained to zero then use NA or simply set them equal to 0

a matrix/vector of intercepts. The matrix should have as many columns as the item with the largest number of categories, and filled empty locations with NA. When a vector is used the test is assumed to consist only of dichotomous items (becau

sample size

itemtype

a character vector of length nrow(a) (or 1, if all the item types are the same) specifying the type of items to simulate. Inputs can either be the same as the inputs found in mirt or the inter

sigma

a covariance matrix of the underlying distribution. Default is the identity matrix. Used when Theta is not supplied

a mean vector of the underlying distribution. Default is a vector of zeros. Used when Theta is not supplied

guess

a vector of guessing parameters for each item; only applicable for dichotomous items. Must be either a scalar value that will affect all of the dichotomous items, or a vector with as many values as to be simulated items

upper

same as guess, but for upper bound parameters

nominal

a matrix of specific item category slopes for nominal models. Should be the dimensions as the intercept specification with one less column, with NA in locations where not applicable. Note that during estimation the first slope will be constra

Theta

a user specified matrix of the underlying ability parameters, where nrow(Theta) == N and ncol(Theta) == ncol(a). When this is supplied the N input is not required

gpcm_mats

a list of matricies specifying the scoring scheme for generalized partial credit models (see mirt for details)

returnList

logical; return a list containing the data, item objects defined by mirt containing the population parameters and item structure, and the latent trait matrix Theta? Default is FALSE

model

a single group object, typically returned by functions such as mirt or bfactor. Supplying this will render all other parameter elements (excluding the The

which.items

an integer vector used to indicate which items to simulate when a model input is included. Default simulates all items

mins

an integer vector (or single value to be used for each item) indicating what the lowest category should be. If model is supplied then this will be extracted from slot(mod, 'Data')$mins, otherwise the default is 0

Details

Returns a data matrix simulated from the parameters, or a list containing the data, item objects, and Theta matrix.

References

Reckase, M. D. (2009). Multidimensional Item Response Theory. New York: Springer.

Examples

Run this code

### Parameters from Reckase (2009), p. 153

set.seed(1234)

a <- matrix(c(
 .7471, .0250, .1428,
 .4595, .0097, .0692,
 .8613, .0067, .4040,
1.0141, .0080, .0470,
 .5521, .0204, .1482,
1.3547, .0064, .5362,
1.3761, .0861, .4676,
 .8525, .0383, .2574,
1.0113, .0055, .2024,
 .9212, .0119, .3044,
 .0026, .0119, .8036,
 .0008, .1905,1.1945,
 .0575, .0853, .7077,
 .0182, .3307,2.1414,
 .0256, .0478, .8551,
 .0246, .1496, .9348,
 .0262, .2872,1.3561,
 .0038, .2229, .8993,
 .0039, .4720, .7318,
 .0068, .0949, .6416,
 .3073, .9704, .0031,
 .1819, .4980, .0020,
 .4115,1.1136, .2008,
 .1536,1.7251, .0345,
 .1530, .6688, .0020,
 .2890,1.2419, .0220,
 .1341,1.4882, .0050,
 .0524, .4754, .0012,
 .2139, .4612, .0063,
 .1761,1.1200, .0870),30,3,byrow=TRUE)*1.702

d <- matrix(c(.1826,-.1924,-.4656,-.4336,-.4428,-.5845,-1.0403,
  .6431,.0122,.0912,.8082,-.1867,.4533,-1.8398,.4139,
  -.3004,-.1824,.5125,1.1342,.0230,.6172,-.1955,-.3668,
  -1.7590,-.2434,.4925,-.3410,.2896,.006,.0329),ncol=1)*1.702

mu <- c(-.4, -.7, .1)
sigma <- matrix(c(1.21,.297,1.232,.297,.81,.252,1.232,.252,1.96),3,3)

dataset1 <- simdata(a, d, 2000, itemtype = 'dich')
dataset2 <- simdata(a, d, 2000, itemtype = 'dich', mu = mu, sigma = sigma)

#mod <- mirt(dataset1, 3, method = 'MHRM')
#coef(mod)

### Unidimensional graded response model with 5 categories each

a <- matrix(rlnorm(20,.2,.3))

# for the graded model, ensure that there is enough space between the intercepts,
# otherwise closer categories will not be selected often (minimum distance of 0.3 here)
diffs <- t(apply(matrix(runif(20*4, .3, 1), 20), 1, cumsum))
diffs <- -(diffs - rowMeans(diffs))
d <- diffs + rnorm(20)

dat <- simdata(a, d, 500, itemtype = 'graded')
# mod <- mirt(dat, 1)

### An example of a mixed item, bifactor loadings pattern with correlated specific factors

a <- matrix(c(
.8,.4,NA,
.4,.4,NA,
.7,.4,NA,
.8,NA,.4,
.4,NA,.4,
.7,NA,.4),ncol=3,byrow=TRUE)

d <- matrix(c(
-1.0,NA,NA,
 1.5,NA,NA,
 0.0,NA,NA,
0.0,-1.0,1.5,  #the first 0 here is the recommended constraint for nominal
0.0,1.0,-1, #the first 0 here is the recommended constraint for gpcm
2.0,0.0,NA),ncol=3,byrow=TRUE)

nominal <- matrix(NA, nrow(d), ncol(d))
#the first 0 and last (ncat - 1) = 2 values are the recommended constraints
nominal[4, ] <- c(0,1.2,2)

sigma <- diag(3)
sigma[2,3] <- sigma[3,2] <- .25
items <- c('dich','dich','dich','nominal','gpcm','graded')

dataset <- simdata(a,d,2000,items,sigma=sigma,nominal=nominal)

#mod <- bfactor(dataset, c(1,1,1,2,2,2), itemtype=c(rep('2PL', 3), 'nominal', 'gpcm','graded'))
#coef(mod)

#### Convert standardized factor loadings to slopes

F2a <- function(F, D=1.702){
    h2 <- rowSums(F^2)
    a <- (F / sqrt(1 - h2)) * D
    a
}

(F <- matrix(c(rep(.7, 5), rep(.5,5))))
(a <- F2a(F))
d <- rnorm(10)

dat <- simdata(a, d, 5000, itemtype = 'dich')
mod <- mirt(dat, 1)
coef(mod, simplify=TRUE)$items
summary(mod)

mod2 <- mirt(dat, 'F1 = 1-10
                   CONSTRAIN = (1-5, a1), (6-10, a1)')
summary(mod2)
anova(mod, mod2)

#### Unidimensional nonlinear factor pattern

theta <- rnorm(2000)
Theta <- cbind(theta,theta^2)

a <- matrix(c(
.8,.4,
.4,.4,
.7,.4,
.8,NA,
.4,NA,
.7,NA),ncol=2,byrow=TRUE)
d <- matrix(rnorm(6))
itemtype <- rep('dich',6)

nonlindata <- simdata(a=a, d=d, itemtype=itemtype, Theta=Theta)

#model <- '
#F1 = 1-6
#(F1 * F1) = 1-3'
#mod <- mirt(nonlindata, model)
#coef(mod)

#### 2PLNRM model for item 4 (with 4 categories), 2PL otherwise

a <- matrix(rlnorm(4,0,.2))

#first column of item 4 is the intercept for the correct category of 2PL model,
#    otherwise nominal model configuration
d <- matrix(c(
-1.0,NA,NA,NA,
 1.5,NA,NA,NA,
 0.0,NA,NA,NA,
 1, 0.0,-0.5,0.5),ncol=4,byrow=TRUE)

nominal <- matrix(NA, nrow(d), ncol(d))
nominal[4, ] <- c(NA,0,.5,.6)

items <- c(rep('dich',3),'nestlogit')

dataset <- simdata(a,d,2000,items,nominal=nominal)

#mod <- mirt(dataset, 1, itemtype = c('2PL', '2PL', '2PL', '2PLNRM'), key=c(NA,NA,NA,1))
#coef(mod)
#itemplot(mod,4)

#return list of simulation parameters
listobj <- simdata(a,d,2000,items,nominal=nominal, returnList=TRUE)
str(listobj)

# generate dataset from converged model
mod <- mirt(Science, 1, itemtype = c(rep('gpcm', 3), 'nominal'))
sim <- simdata(model=mod, N=1000)
head(sim)

Theta <- matrix(rnorm(100))
sim <- simdata(model=mod, Theta=Theta)
head(sim)

# alternatively, define a suitable object with functions from the mirtCAT package
# help(generate.mirt_object)
library(mirtCAT)

nitems <- 50
a1 <- rlnorm(nitems, .2,.2)
d <- rnorm(nitems)
g <- rbeta(nitems, 20, 80)
pars <- data.frame(a1=a1, d=d, g=g)
head(pars)

obj <- generate.mirt_object(pars, '3PL')
dat <- simdata(N=200, model=obj)

Run the code above in your browser using DataLab