### Example of analyses using snmf ###
# creation of the genotype file, genotypes.geno.
# It contains 400 SNPs for 50 individuals.
data("tutorial")
write.geno(tutorial.R, "genotypes.geno")
################
# runs of snmf #
################
# main options, K: (the number of ancestral populations),
# entropy: calculate the cross-entropy criterion,
# CPU: the number of CPUs.
# Runs with K between 1 and 5 with cross-entropy and 2 repetitions.
project = NULL
project = snmf("genotypes.geno", K=1:10, entropy = TRUE, repetitions = 10,
project = "new")
# plot cross-entropy criterion of all runs of the project
plot(project, lwd = 5, col = "red", pch=1)
# get the cross-entropy of each run for K = 4
ce = cross.entropy(project, K = 4)
# select the run with the lowest cross-entropy
best = which.min(ce)
# plot the best run for K = 4 (ancestry coefficients).
barplot(t(Q(project, K = 4, run = best)))
###################
# Post-treatments #
###################
# show the project
show(project)
# summary of the project
summary(project)
# get the cross-entropy for all runs for K = 4
ce = cross.entropy(project, K = 4)
# get the cross-entropy for the 2nd run for K = 4
ce = cross.entropy(project, K = 4, run = 2)
# get the ancestral genotype frequency matrix, G, for the 2nd run for K = 4.
res = G(project, K = 4, run = 2)
#############################
# Advanced snmf run options #
#############################
# Q.input.file: init a run with a given ancestry coefficient matrix Q.
# Here, it is initialized with the Q matrix from the first run with K=4
project = snmf("genotypes.geno", K = 4,
Q.input.file = "./genotypes.snmf/K4/run1/genotypes_r1.4.Q")
# I: init the Q matrix of a run from a smaller run with 100 randomly chosen
# SNPs.
project = snmf("genotypes.geno", K = 4, I = 100)
# CPU: run snmf with 2 CPUs.
project = snmf("genotypes.geno", K = 4, CPU=2)
# percentage: run snmf and calculate the cross-entropy criterion with 10% of
# masked genotypes, instead of 5% of masked genotypes.
project = snmf("genotypes.geno", K = 4, entropy= TRUE, percentage = 0.1)
# seed: choose the seed to init the randomization.
project = snmf("genotypes.geno", K = 4, seed=42)
# alpha: choose the regularization parameter.
project = snmf("genotypes.geno", K = 4, alpha = 100)
# tolerance: choose the tolerance parameter.
project = snmf("genotypes.geno", K = 4, tolerance = 0.0001)
##########################
# Manage an snmf project #
##########################
# All the runs of snmf for a given file are
# automatically saved into a snmf project directory and a file.
# The name of the snmfProject file is the same name as
# the name of the input file with a .snmfProject extension
# ("genotypes.snmfProject").
# The name of the snmfProject directory is the same name as
# the name of the input file with a .snmf extension ("genotypes.snmf/")
# There is only one snmf Project for each input file including all the runs.
# An snmfProject can be load in a different session.
project = load.snmfProject("genotypes.snmfProject")
# An snmfProject can be exported to be imported in another directory
# or in another computer
export.snmfProject("genotypes.snmfProject")
windows
dir.create("test", showWarnings = TRUE)
#import
newProject = import.snmfProject("genotypes_snmfProject.zip", "test")
# combine projects
combinedProject = combine.snmfProject("genotypes.snmfProject", "test/genotypes.snmfProject")
# remove
remove.snmfProject("test/genotypes.snmfProject")
windows
# remove
remove.snmfProject("genotypes.snmfProject")
#import
newProject = import.snmfProject("genotypes_snmfProject.zip")
# An snmfProject can be erased.
# Caution: All the files associated with the project will be removed.
remove.snmfProject("genotypes.snmfProject")
Run the code above in your browser using DataLab