# the GDS file
(gds.fn <- seqExampleFileName("gds"))
# display
(f <- seqOpen(gds.fn))
# get 'sample.id
(samp.id <- seqGetData(f, "sample.id"))
# "NA06984" "NA06985" "NA06986" ...
# get 'variant.id'
head(variant.id <- seqGetData(f, "variant.id"))
# set sample and variant filters
set.seed(100)
seqSetFilter(f, sample.id=samp.id[c(2,4,6,8,10)],
variant.id=sample(variant.id, 10))
# read
seqApply(f, "genotype", FUN=print, margin="by.variant")
seqApply(f, "genotype", FUN=print, margin="by.variant", .useraw=TRUE)
seqApply(f, "genotype", FUN=print, margin="by.sample")
seqApply(f, "genotype", FUN=print, margin="by.sample", .useraw=TRUE)
# read multiple variables variant by variant
seqApply(f, c(geno="genotype", phase="phase", qual="annotation/id",
DP="annotation/format/DP"), FUN=print, as.is="none")
# get the numbers of alleles per variant
seqApply(f, "allele",
FUN=function(x) length(unlist(strsplit(x,","))), as.is="integer")
# output to a file
fl <- file("tmp.txt", "wt")
seqApply(f, "genotype", FUN=sum, na.rm=TRUE, as.is=fl)
close(fl)
readLines("tmp.txt")
seqApply(f, "genotype", FUN=sum, na.rm=TRUE, as.is=stdout())
seqApply(f, "genotype", FUN=sum, na.rm=TRUE, as.is="integer")
# should be identical
################################################################
# with an index of variant
seqApply(f, c(geno="genotype", phase="phase", qual="annotation/id"),
FUN=function(index, x) { print(index); print(x); index },
as.is="integer", var.index="relative")
# it is as the same as
which(seqGetFilter(f)$variant.sel)
################################################################
# reset sample and variant filters
seqResetFilter(f)
# calculate the frequency of reference allele,
# a faster version could be obtained by C coding
af <- seqApply(f, "genotype", FUN=function(x) mean(x==0L, na.rm=TRUE),
as.is="double")
length(af)
summary(af)
################################################################
# apply the user-defined function sample by sample
# reset sample and variant filters
seqResetFilter(f)
summary(seqApply(f, "genotype", FUN=function(x) { mean(is.na(x)) },
margin="by.sample", as.is="double"))
# set sample and variant filters
set.seed(100)
seqSetFilter(f, sample.id=samp.id[c(2,4,6,8,10)],
variant.id=sample(variant.id, 10))
seqApply(f, "genotype", FUN=print, margin="by.variant", as.is="none")
seqApply(f, "genotype", FUN=print, margin="by.sample", as.is="none")
seqApply(f, c(sample.id="sample.id", genotype="genotype"), FUN=print,
margin="by.sample", as.is="none")
# close the GDS file
seqClose(f)
# delete the temporary file
unlink("tmp.txt")
Run the code above in your browser using DataLab