## example 1: simulation of random sequence for non-model species without reference genome:
# generating 1Mb of DNA sequence with 44.4% GC content:
smsq <- sim.DNAseq(size=1000000, GCfreq=0.444)
# length:
width(smsq)
# GC content:
require(seqinr)
GC(s2c(smsq))
## example 2: simulating random sequence with parameter following a known reference genome sequence:
# generating a Fasta file for the example:
sq<-c()
for(i in 1:10){
sq <- c(sq, sim.DNAseq(size=rpois(1, 1000), GCfreq=0.444))
}
sq <- DNAStringSet(sq)
writeFasta(sq, file="SimRAD-exampleRefSeq-Fasta.fa", mode="w")
# importing the Fasta file and sub-selecting 25% of the contigs
rfsq <- ref.DNAseq("SimRAD-exampleRefSeq-Fasta.fa", subselect.contigs = TRUE, prop.contigs = 0.25)
# length of the reference sequence:
width(rfsq)
# computing GC content:
require(seqinr)
GC(s2c(rfsq))
# simulating random generated DNA sequence with characteristics equivalent to
# the sub-selected reference genome for comparison purpose:
smsq <- sim.DNAseq(size=width(rfsq), GCfreq=GC(s2c(rfsq)))
Run the code above in your browser using DataLab