# Simulate observations from two classes
set.seed(123)
obs <- seqdef(rbind(
matrix(sample(letters[1:3], 500, TRUE, prob = c(0.1, 0.6, 0.3)), 50, 10),
matrix(sample(letters[1:3], 200, TRUE, prob = c(0.4, 0.4, 0.2)), 20, 10)
))
# Initialize the model
set.seed(9087)
model <- build_lcm(obs, n_clusters = 2)
# Estimate model parameters
fit <- fit_model(model)
# How many of the observations were correctly classified:
sum(summary(fit$model)$most_probable_cluster == rep(c("Class 2", "Class 1"), times = c(500, 200)))
############################################################
if (FALSE) {
# LCM for longitudinal data
# Define sequence data
data("mvad", package = "TraMineR")
mvad_alphabet <- c(
"employment", "FE", "HE", "joblessness", "school",
"training"
)
mvad_labels <- c(
"employment", "further education", "higher education",
"joblessness", "school", "training"
)
mvad_scodes <- c("EM", "FE", "HE", "JL", "SC", "TR")
mvad_seq <- seqdef(mvad, 17:86,
alphabet = mvad_alphabet, states = mvad_scodes,
labels = mvad_labels, xtstep = 6
)
# Initialize the LCM with random starting values
set.seed(7654)
init_lcm_mvad1 <- build_lcm(
observations = mvad_seq,
n_clusters = 2, formula = ~male, data = mvad
)
# Own starting values for emission probabilities
emiss <- rbind(rep(1 / 6, 6), rep(1 / 6, 6))
# LCM with own starting values
init_lcm_mvad2 <- build_lcm(
observations = mvad_seq,
emission_probs = emiss, formula = ~male, data = mvad
)
# Estimate model parameters (EM algorithm with random restarts)
lcm_mvad <- fit_model(init_lcm_mvad1,
control_em = list(restart = list(times = 5))
)$model
# Plot the LCM
plot(lcm_mvad, interactive = FALSE, ncol = 2)
###################################################################
# Binomial regression (comparison to glm)
require("MASS")
data("birthwt")
model <- build_lcm(
observations = seqdef(birthwt$low), emission_probs = diag(2),
formula = ~ age + lwt + smoke + ht, data = birthwt
)
fit <- fit_model(model)
summary(fit$model)
summary(glm(low ~ age + lwt + smoke + ht, binomial, data = birthwt))
# Multinomial regression (comparison to multinom)
require("nnet")
set.seed(123)
n <- 100
X <- cbind(1, x1 = runif(n, 0, 1), x2 = runif(n, 0, 1))
coefs <- cbind(0, c(-2, 5, -2), c(0, -2, 2))
pr <- exp(X %*% coefs) + rnorm(n * 3)
pr <- pr / rowSums(pr)
y <- apply(pr, 1, which.max)
table(y)
model <- build_lcm(
observations = seqdef(y), emission_probs = diag(3),
formula = ~ x1 + x2, data = data.frame(X[, -1])
)
fit <- fit_model(model)
summary(fit$model)
summary(multinom(y ~ x1 + x2, data = data.frame(X[, -1])))
}
Run the code above in your browser using DataLab