## First, run the algorithm without including time as a covariate.
orderedPair <- pairData[pairData$infectionDiffY > 0, ]
## Create a variable called snpClose that will define probable links
# (<3 SNPs) and nonlinks (>12 SNPs) all pairs with between 2-12 SNPs
# will not be used to train.
orderedPair$snpClose <- ifelse(orderedPair$snpDist < 3, TRUE,
ifelse(orderedPair$snpDist > 12, FALSE, NA))
table(orderedPair$snpClose)
## Running the algorithm
#NOTE should run with nReps > 1.
resGen <- nbProbabilities(orderedPair = orderedPair,
indIDVar = "individualID",
pairIDVar = "pairID",
goldStdVar = "snpClose",
covariates = c("Z1", "Z2", "Z3", "Z4"),
label = "SNPs", l = 1,
n = 10, m = 1, nReps = 1)
## Merging the probabilities back with the pair-level data
nbResultsNoT <- merge(resGen[[1]], orderedPair, by = "pairID", all = TRUE)
## Estimating the serial interval
# \donttest{
# Using all pairs and plotting the parameters
performPEM(nbResultsNoT, indIDVar = "individualID", timeDiffVar = "infectionDiffY",
pVar = "pScaled", initialPars = c(2, 2), shift = 0, plot = TRUE)
# }
# Clustering the probabilities first
allClust <- clusterInfectors(nbResultsNoT, indIDVar = "individualID", pVar = "pScaled",
clustMethod = "hc_absolute", cutoff = 0.05)
performPEM(allClust[allClust$cluster == 1, ], indIDVar = "individualID",
timeDiffVar = "infectionDiffY", pVar = "pScaled",
initialPars = c(2, 2), shift = 0, plot = TRUE)
# \donttest{
# The above is equivalent to the following code using the function estimateSI()
# though the plot will not be printed and more details will be added
estimateSI(nbResultsNoT, indIDVar = "individualID", timeDiffVar = "infectionDiffY",
pVar = "pScaled", clustMethod = "hc_absolute", cutoff = 0.05,
initialPars = c(2, 2))
# }
Run the code above in your browser using DataLab