# NOT RUN {
# Generate a data set consisting of 100 columns and 2000 rows (actually,
# knncatimputeLarge is made for much larger data sets), where the values
# are randomly drawn from the integers 1, 2, and 3.
# Afterwards, remove 200 of the observations randomly.
mat <- matrix(sample(3, 200000, TRUE), 2000)
mat[sample(200000, 20)] <- NA
# Apply knncatimputeLarge to mat to remove the missing values.
mat2 <- knncatimputeLarge(mat)
sum(is.na(mat))
sum(is.na(mat2))
# Now assume that the first 100 rows belong to SNPs from chromosome 1,
# the second 100 rows to SNPs from chromosome 2, and so on.
chromosome <- rep(1:20, e = 100)
# Apply knncatimputeLarge to mat chromosomewise, i.e. only consider
# the SNPs that belong to the same chromosome when replacing missing
# genotypes.
mat4 <- knncatimputeLarge(mat, fac = chromosome)
# }
Run the code above in your browser using DataLab