## Use the pairData dataset which represents a TB-like outbreak
# First create a dataset of ordered pairs
orderedPair <- pairData[pairData$infectionDiffY >= 0, ]
## Create a variable called snpClose that will define probable links
# (<3 SNPs) and nonlinks (>12 SNPs) all pairs with between 2-12 SNPs
# will not be used to train.
orderedPair$snpClose <- ifelse(orderedPair$snpDist < 3, TRUE,
ifelse(orderedPair$snpDist > 12, FALSE, NA))
table(orderedPair$snpClose)
## Running the algorithm
#NOTE should run with nReps > 1.
resGen <- nbProbabilities(orderedPair = orderedPair,
indIDVar = "individualID",
pairIDVar = "pairID",
goldStdVar = "snpClose",
covariates = c("Z1", "Z2", "Z3", "Z4", "timeCat"),
label = "SNPs", l = 1,
n = 10, m = 1, nReps = 1)
## Merging the probabilities back with the pair-level data
nbResults <- merge(resGen[[1]], orderedPair, by = "pairID", all = TRUE)
Run the code above in your browser using DataLab