# Simulate some fossil ranges with simFossilRecord
set.seed(444)
record <- simFossilRecord(p = 0.1,
q = 0.1,
nruns = 1,
nTotalTaxa = c(30,40),
nExtant = 0
)
taxa <- fossilRecord2fossilTaxa(record)
# simulate a fossil record with imperfect sampling with sampleRanges
rangesCont <- sampleRanges(taxa,r = 0.1)
# Now let's use binTimeData to bin in intervals of 5 time units
rangesDisc <- binTimeData(rangesCont,int.length = 5)
# now, get an estimate of the sampling rate (we set it to 0.5 above)
# for discrete data we can estimate the sampling probability per interval (R)
# i.e. this is not the same thing as the instantaneous sampling rate (r)
# can use sRate2sProb to see what we would expect
sRate2sProb(r = 0.1, int.length = 5)
# expect R = ~0.39
# now we can apply freqRat to get sampling probability
SampProb <- freqRat(rangesDisc, plot = TRUE)
SampProb
# I estimated R = ~0.25
# Not wildly accurate, is it?
# can also calculate extinction rate per interval of time
freqRat(rangesDisc, calcExtinction = TRUE)
# est. ext rate = ~0.44 per interval
# 5 time-unit intervals, so ~0.44 / 5 = ~0.08 per time-unit
# That's pretty close to the generating value of 0.01, used in sampleRanges
if (FALSE) {
#################
# The following example code (which is not run by default) examines how
# the freqRat estimates vary with sample size, interval length
# and compares it to using make_durationFreqDisc
# how good is the freqRat at 20 sampled taxa on avg?
set.seed(444)
r <- runif(100)
int.length = 1
# estimate R from r, assuming stuff like p = q
R <- sapply(r, sRate2sProb, int.length = 1)
ntaxa <- freqRats <- numeric()
for(i in 1:length(r)){
# assuming budding model
record <- simFossilRecord(p = 0.1,
q = 0.1,
r = r[i],
nruns = 1,
nSamp = c(15,25),
nExtant = 0,
plot = TRUE
)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges,int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
freqRats[i] <- freqRat(timeList)
}
plot(R,freqRats);abline(0,1)
# without the gigantic artifacts bigger than 1...
plot(R,freqRats,ylim = c(0,1));abline(0,1)
# very worrisome lookin'!
# how good is it at 100 sampled taxa on average?
set.seed(444)
r <- runif(100)
int.length = 1
R <- sapply(r,sRate2sProb,int.length = 1)
ntaxa <- freqRats <- numeric()
for(i in 1:length(r)){
# assuming budding model
record <- simFossilRecord(p = 0.1,
q = 0.1,
r = r[i],
nruns = 1,
nSamp = c(80,150),
nExtant = 0,
plot = TRUE)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges,
int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
freqRats[i] <- freqRat(timeList)
}
plot(R, freqRats,
ylim = c(0,1)
)
abline(0,1)
#not so hot, eh?
################
#LETS CHANGE THE TIME BIN LENGTH!
# how good is it at 100 sampled taxa on average, with longer time bins?
set.seed(444)
r <- runif(100)
int.length <- 10
R <- sapply(r, sRate2sProb, int.length = int.length)
ntaxa <- freqRats <- numeric()
for(i in 1:length(r)){
# assuming budding model
record <- simFossilRecord(p = 0.1,
q = 0.1,
r = r[i],
nruns = 1,
nSamp = c(80,150),
nExtant = 0,
plot = TRUE)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges, int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
freqRats[i] <- freqRat(timeList)
}
plot(R, freqRats, ylim = c(0,1))
abline(0,1)
# things get more accurate as interval length increases... odd, eh?
# how good is it at 20 sampled taxa on average, with longer time bins?
set.seed(444)
r <- runif(100)
int.length <- 10
R <- sapply(r, sRate2sProb, int.length = int.length)
ntaxa <- freqRats <- numeric()
for(i in 1:length(r)){
# assuming budding model
record <- simFossilRecord(p = 0.1,
q = 0.1,
r = r[i],
nruns = 1,
nSamp = c(15,25),
nExtant = 0,
plot = TRUE)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges, int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
freqRats[i] <- freqRat(timeList)
}
plot(R, freqRats, ylim = c(0,1))
abline(0,1)
# still not so hot at low sample sizes, even with longer bins
########################
# ML METHOD
# how good is the ML method at 20 taxa, 1 time-unit bins?
set.seed(444)
r <- runif(100)
int.length <- 1
R <- sapply(r,sRate2sProb,int.length = int.length)
ntaxa <- ML_sampProb <- numeric()
for(i in 1:length(r)){
# assuming budding model
record <- simFossilRecord(p = 0.1,
q = 0.1,
r = r[i],
nruns = 1,
nSamp = c(15,25),
nExtant = 0,
plot = TRUE
)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges, int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
likFun <- make_durationFreqDisc(timeList)
ML_sampProb[i] <- optim(
parInit(likFun), likFun,
lower = parLower(likFun),
upper = parUpper(likFun),
method = "L-BFGS-B",
control = list(maxit = 1000000)
)[[1]][2]
}
plot(R, ML_sampProb)
abline(0,1)
# Not so great due to likelihood surface ridges
# but it returns values between 0-1
# how good is the ML method at 100 taxa, 1 time-unit bins?
set.seed(444)
r <- runif(100)
int.length <- 1
R <- sapply(r, sRate2sProb,
int.length = int.length)
ntaxa <- ML_sampProb <- numeric()
for(i in 1:length(r)){
# assuming budding model
record <- simFossilRecord(p = 0.1,
q = 0.1,
r = r[i],
nruns = 1,
nSamp = c(80,150),
nExtant = 0,
plot = TRUE)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges,int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
likFun <- make_durationFreqDisc(timeList)
ML_sampProb[i] <- optim(parInit(likFun),
likFun,
lower = parLower(likFun),
upper = parUpper(likFun),
method = "L-BFGS-B",
control = list(maxit = 1000000)
)[[1]][2]
}
plot(R,ML_sampProb)
abline(0,1)
# Oh, fairly nice, although still a biased uptick as R gets larger
}
Run the code above in your browser using DataLab