if (FALSE) {
fake <- snp_attachExtdata("example-missing.bed")
G <- fake$genotypes
CHR <- fake$map$chromosome
infos <- snp_fastImpute(G, CHR)
infos[, 1:5]
# Still missing values
big_counts(G, ind.col = 1:10)
# You need to change the code of G
# To make this permanent, you need to save (modify) the file on disk
fake$genotypes$code256 <- CODE_IMPUTE_PRED
fake <- snp_save(fake)
big_counts(fake$genotypes, ind.col = 1:10)
# Plot for post-checking
## Here there is no SNP with more than 1% error (estimated)
pvals <- c(0.01, 0.005, 0.002, 0.001); colvals <- 2:5
df <- data.frame(pNA = infos[1, ], pError = infos[2, ])
# base R
plot(subset(df, pNA > 0.001), pch = 20)
idc <- lapply(seq_along(pvals), function(i) {
curve(pvals[i] / x, from = 0, lwd = 2,
col = colvals[i], add = TRUE)
})
legend("topright", legend = pvals, title = "p(NA & Error)",
col = colvals, lty = 1, lwd = 2)
# ggplot2
library(ggplot2)
Reduce(function(p, i) {
p + stat_function(fun = function(x) pvals[i] / x, color = colvals[i])
}, x = seq_along(pvals), init = ggplot(df, aes(pNA, pError))) +
geom_point() +
coord_cartesian(ylim = range(df$pError, na.rm = TRUE)) +
theme_bigstatsr()
}
Run the code above in your browser using DataLab