data(mvad)
mvad.seq <- seqdef(mvad, 17:86)
## For sake of simplicity we recode all "education" states to only one common state.
mvad.seq <- seqrecode(mvad.seq, list("education"=c("FE", "HE", "school", "training")))
## We now have three states
seqdplot(mvad.seq)
###########################################################################
## STEP I: Subsequence extraction
###########################################################################
## We start by extracting all subsequence of length 6
## We also add covariates from the mvad data frame
mvad.samm <- seqsamm(mvad.seq, 6, covar=mvad[, c("Grammar", "funemp", "gcse5eq")])
## Plotting the results to visualize the transitions out of each states.
plot(mvad.samm)
## Descriptive information on the seqsamm object
summary(mvad.samm)
###########################################################################
### STEP II: Typology of trajectory out of joblessness
###########################################################################
## We retrieve the subsequences following a transition out of a joblessness spell
jlseq <- seqsammseq(mvad.samm, "joblessness")
## Now we create a typology of these subsequences.
## Compute the clustering using LCS
jldist <- seqdist(jlseq, method="LCS")
## For sake of simplicity, use only 2 groups
library(cluster)
jlclust <- pam(jldist, diss=TRUE, k=2, cluster.only=TRUE)
## Specify the names of the types in the 2-cluster typology (here joblessness1 or joblessness2).
jltype <- paste0("joblessness", jlclust)
###########################################################################
### STEP III: Competing risks model of trajectories out of joblessness
###########################################################################
## Get the data to estimate competing risks models of the kind of trajectory
## out of jobjlessness
## We specify the SAMM object, the ending spell (joblessness) and our typology.
jleha <- seqsammeha(mvad.samm, "joblessness", jltype)
if (FALSE) {
## Now jleha stores the data in person period format for competing risks
## Discrete time model using multinomial regression
## SAMMtypology and spell.time are variables created and stored in the jleha dataset
library(nnet)
multinom(SAMMtypology~spell.time+Grammar+funemp+gcse5eq, data=jleha)
## We can also have only one line per ending spell
## Plot the results
library(survival)
jleha <- seqsammeha(mvad.samm, "joblessness", jltype, persper=FALSE)
plot(survfit(Surv(spell.time, SAMMjoblessness1)~1, data=jleha))
## Cox model
summary(coxph(Surv(spell.time, SAMMjoblessness1)~gcse5eq+Grammar+funemp, data=jleha))
## Most of the time methods for recurrent events should be used.
## See for instance the coxme library to do so.
library(coxme)
summary(coxme(Surv(spell.time, SAMMjoblessness1)~gcse5eq+Grammar+funemp+(1|id), data=jleha))
}
###########################################################################
### Now repeat steps II and III for employment and then education
### (Not shown here)
###########################################################################
Run the code above in your browser using DataLab