data(mvad)
## Small subsample to reduce computations
mvad <- mvad[1:50,]
## Sequence object
mvad.seq <- seqdef(mvad[, 17:86])
## Compute distance using Hamming distance
diss <- seqdist(mvad.seq, method="HAM")
## Ward clustering
wardCluster <- hclust(as.dist(diss), method="ward.D")
## Computing clustrange from Ward clustering up to 5 groups
wardRange <- as.clustrange(wardCluster, diss=diss, ncluster=5)
## Compute clustassoc
## How many groups are required to account for the relationship
## between trajectories and the gcse5eq covariate
assoc <- clustassoc(wardRange, covar=mvad$gcse5eq, diss=diss)
## Plot unaccounted share of the association
## A value close to zero means that the relationship is accounted for.
## Here at least 2-4 groups are required
plot(assoc)
## Plot BIC
## A low value means that an association between trajectories and the covariate is identified.
## 2-3 groups show best results.
plot(assoc, stat="BIC")
## Plot remaining share of the variability of the sequences not explained by clustering
## A value close to zero means that there is no association left (similar)
## Here at least 2-4 groups are required
plot(assoc, stat="Remaining")
Run the code above in your browser using DataLab