#Simulate some fossil ranges with simFossilTaxa
set.seed(444)
taxa <- simFossilTaxa(p=0.1,q=0.1,nruns=1,mintaxa=100,maxtime=1000,maxExtant=0)
#simulate a fossil record with imperfect sampling with sampleRanges
rangesCont <- sampleRanges(taxa,r=0.1)
#Now let's use binTimeData to bin in intervals of 5 time units
rangesDisc <- binTimeData(rangesCont,int.length=5)
#now, get an estimate of the sampling rate (we set it to 0.5 above)
#for discrete data we can estimate the sampling probability per interval (R)
#i.e. this is not the same thing as the instantaneous sampling rate (r)
#can use sRate2sProb to see what we would expect
sRate2sProb(r=0.1,int.length=5)
#expect R = ~0.39
#now we can apply freqRat to get sampling probability
SampProb <- freqRat(rangesDisc,plot=TRUE)
SampProb
#est. R = ~0.25
#Not wildly accurate, is it?
#can also calculate extinction rate per interval of time
freqRat(rangesDisc,calcExtinction=TRUE)
#est. ext rate = ~0.44 per interval
#5 time-unit intervals, so ~0.44 / 5 = ~0.08 per time-unite
#That's pretty close to the generating value of 0.01, used in simFossilTaxa
#################
#The following example code (which is not run by default) examines how
#the freqRat estimates vary with sample size, interval length
#and compare it to using getSampProbDisc
#how good is the freqRat at 20 sampled taxa on avg?
set.seed(444)
r<-runif(100)
int.length=1
R<-sapply(r,sRate2sProb,int.length=1) #estimate R from r, assuming stuff like p=q
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=20,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-freqRat(timeList)
}
plot(R,freqRats);abline(0,1)
#without the gigantic artifacts bigger than 1...
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#very worrisome lookin'!
#how good is it at 100 sampled taxa on average?
set.seed(444)
r<-runif(100)
int.length=1
R<-sapply(r,sRate2sProb,int.length=1)
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=100,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-freqRat(timeList)
}
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#not so hot, eh?
################
#LETS CHANGE THE TIME BIN LENGTH!
#how good is it at 100 sampled taxa on average, with longer time bins?
set.seed(444)
r<-runif(100)
int.length<-10
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=100,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-freqRat(timeList)
}
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#things get more accurate as interval length increases... odd, eh?
#how good is it at 20 sampled taxa on average, with longer time bins?
set.seed(444)
r<-runif(100)
int.length<-10
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=20,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-freqRat(timeList)
}
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#still not so hot at low sample sizes, even with longer bins
########################
#ML METHOD
#how good is the ML method at 20 taxa, 1 time-unit bins?
set.seed(444)
r<-runif(100)
int.length<-1
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-ML_sampProb<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=20,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
ML_sampProb[i]<-getSampProbDisc(timeList)[[2]][2]
}
plot(R,ML_sampProb);abline(0,1)
#Not so great due to likelihood surface ridges, but it returns values between 0-1
#how good is the ML method at 100 taxa, 1 time-unit bins?
set.seed(444)
r<-runif(100)
int.length<-1
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-ML_sampProb<-numeric()
for(i in 1:length(r)){
#assuming budding model
taxa<-simFossilTaxa_SRCond(r=r[i],avgtaxa=100,p=0.1,q=0.1,maxExtant=0)
ranges<-sampleRanges(taxa,r=r[i])
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
ML_sampProb[i]<-getSampProbDisc(timeList)[[2]][2]
}
plot(R,ML_sampProb);abline(0,1)
#Oh, fairly nice, although still a biased uptick as R gets larger
Run the code above in your browser using DataLab