#Simulate some fossil ranges with simFossilTaxa
set.seed(444)
taxa <- simFossilTaxa(p=0.1,q=0.1,nruns=1,mintaxa=100,maxtime=1000,maxExtant=0)
#simulate a fossil record with imperfect sampling with sampleRanges
rangesCont <- sampleRanges(taxa,r=0.1)
#Now let's use binTimeData to bin in intervals of 5 time units
rangesDisc <- binTimeData(rangesCont,int.length=5)
#now, get an estimate of the sampling rate (we set it to 0.5 above)
#for discrete data we can estimate the sampling probability per interval (R)
#i.e. this is not the same thing as the instantaneous sampling rate (r)
#can use sRate2sProb to see what we would expect
sRate2sProb(r=0.1,int.length=5)
#expect R = ~0.39
#now we can use maximum likelihood to taxon ranges to get sampling probability
SampProb <- freqRat(rangesDisc,plot=TRUE)
SampProb
#est. R = ~0.25
#################
#The following example code (which is not run by default) examines how
#the freqRat estimates vary with sample size, interval length
#and compare it to using getSampProbDisc
#how good is the freqRat at 20 sampled taxa on avg?
set.seed(444)
r<-runif(100)
int.length=1
R<-sapply(r,sRate2sProb,int.length=1) #estimate R from r, assuming stuff like p=q
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=20,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-get_freqRat(timeList)
}
plot(R,freqRats);abline(0,1)
#without the gigantic artifacts bigger than 1...
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#very worrisome lookin'!
#how good is it at 100 sampled taxa on average?
set.seed(444)
r<-runif(100)
int.length=1
R<-sapply(r,sRate2sProb,int.length=1)
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=100,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-get_freqRat(timeList)
}
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#not so hot, eh?
################
#LETS CHANGE THE TIME BIN LENGTH!
#how good is it at 100 sampled taxa on average, with longer time bins?
set.seed(444)
r<-runif(100)
int.length<-10
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=100,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-get_freqRat(timeList)
}
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#things get more accurate as interval length increases... odd, eh?
#how good is it at 20 sampled taxa on average, with longer time bins?
set.seed(444)
r<-runif(100)
int.length<-10
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=20,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-get_freqRat(timeList)
}
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#still not so hot at low sample sizes, even with longer bins
########################
#ML METHOD
#how good is the ML method at 20 taxa, 1 time-unit bins?
set.seed(444)
r<-runif(100)
int.length<-1
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-ML_sampProb<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=20,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
ML_sampProb[i]<-getSampProbDisc(timeList)[[2]][2]
}
plot(R,ML_sampProb);abline(0,1)
#Not so great due to likelihood surface ridges, but it returns values between 0-1
#how good is the ML method at 100 taxa, 1 time-unit bins?
set.seed(444)
r<-runif(100)
int.length<-1
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-ML_sampProb<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=100,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
ML_sampProb[i]<-getSampProbDisc(timeList)[[2]][2]
}
plot(R,ML_sampProb);abline(0,1)
#Oh, fairly nice, although still a biased uptick as R gets larger
Run the code above in your browser using DataLab