# NOT RUN {
#========================================================================================
# The 1-st example
#========================================================================================
#
#
# Making FROC Data and Fitting a Model to the data
#
# Notations
#
# h = hits = TP = True Positives
# f = False alarms = FP = False Positives
#
#
#========================================================================================
# 1) Build a data-set
#========================================================================================
BayesianFROC:::clearWorkspace()
# For a single reader and a single modality case.
dat <- list(c=c(3,2,1), # Confidence level. Note that c is ignored.
h=c(97,32,31), # Number of hits for each confidence level
f=c(1,14,74), # Number of false alarms for each confidence level
NL=259, # Number of lesions
NI=57, # Number of images
C=3) # Number of confidence level
if (interactive()){ viewdata(dat)}
# where,
# c denotes confidence level, i.e., rating of reader.
# 3 = Definitely diseased,
# 2 = subtle,.. diseased
# 1 = very subtle
# h denotes number of hits (True Positives: TP) for each confidence level,
# f denotes number of false alarms (False Positives: FP) for each confidence level,
# NL denotes number of lesions,
# NI denotes number of images,
# For example, in the above example data,
# the number of hits with confidence level 3 is 97,
# the number of hits with confidence level 2 is 32,
# the number of hits with confidence level 1 is 31,
# the number of false alarms with confidence level 3 is 1,
# the number of false alarms with confidence level 2 is 14,
# the number of false alarms with confidence level 1 is 74,
#========================================================================================
# 2) Fit an FROC model to the above dataset.
#========================================================================================
fit <- fit_Bayesian_FROC(
dat, # dataset
ite = 111, #To run in time <5s.
cha = 1, # number of chains, it is better more large.
summary = FALSE
)
# The return value "fit" is an S4 object of class "stanfitExtended" which is inherited
# from the S4 class "stanfit".
#========================================================================================
# 3) Change the S4 class of fitted model object
# Change the S4 class from "stanfitExtended" to "stanfit" to apply other packages.
# The fitted model object of class "stanfit" is widely available.
# For example the package ggmcmc, rstan, shinystan::launch_shinystan(stanfit_object)
# Thus, to use such packages, we get back the inherited class into "stanfit" as follows:
# Changing the class from stanfitExtended to stanfit,
# we can apply other pakcage's functions to the resulting object.
#========================================================================================
fit.stan <- methods::as(fit,"stanfit")
# Then, return value "fit.stan" is no longer an S4 object of class "stanfitExtended" but
# the S4 object of class "stanfit" which is widely adequate for many packages.
#========================================================================================
# 3.1) Apply the functions for the class stanfit
#========================================================================================
grDevices::dev.new();rstan::stan_hist(fit.stan, bins=33,pars = c("A"))
grDevices::dev.new();rstan::stan_hist(fit.stan, bins=22,pars = c("A"))
grDevices::dev.new();rstan::stan_hist(fit.stan, bins=11,pars = c("A"))
grDevices::dev.off()
# I am not sure why the above stan_hist also works for the new S4 class "stanfitExtended"
# Get pipe operator
# `%>%` <- utils::getFromNamespace("%>%", "magrittr")
# Plot about MCMC samples of parameter name "A", representing AUC
# The author does not think the inherited class "stanfitExtended" is good,
# cuz the size of object is very redundant and large,
# which caused by the fact that inherited class contains plot data for FROC curve.
# To show the difference of size for the fitted model object of class
# stanfitExtended and stanfit, we execute the following code;
size_of_return_value(fit) - size_of_return_value(methods::as(fit,"stanfit"))
#4) Using the S4 object fit, we can go further step, such as calculation of the
# Chisquare and the p value as the Bayesian sense for testing the goodness of fit.
# I think p value has problems that it relies on the sample size monotonically.
# But it is widely used, thus I hate it but I implement the p value.
#========================================================================================
# REMARK
#========================================================================================
#
# Should not write the above data as follows:
# MANNER (A) dat <- list(c=c(1,2,3),h=c(31,32,97),f=c(74,14,1),NL=259,NI=57,C=3)
# Even if user writes data in the above MANNER (A),
# the program interprets it as the following MANNER (B);
# MANNER (B) dat <- list(c=c(3,2,1),h=c(31,32,97),f=c(74,14,1),NL=259,NI=57,C=3)
# Because the vector c is ignored in the program,
# and it is generated by the code rep(C:1) automatically in the internal of the function.
# So, we can omit the vector c from the list.
#This package is very rigid format, so please be sure that data-format is
#exactly same to the format in this package.
#More precisely, the confidence level vector should be denoted rep(C:1) (Not rep(1:C)).
# Note that confidence level vector c should not be specified.
# If specified, will be ignored ,
# since it is created by c <-c(rep(C:1)) in the program and
# do not refer from user input confidence level vector,
# where C is the highest number of confidence levels.
# I regret this order, this order is made when I start, so I was very beginner,
# but it is too late to fix,...tooooooo late.
#========================================================================================
# The 2-nd example
#========================================================================================
#
# (1)First, we prepare the data from this package.
dat <- BayesianFROC::dataList.Chakra.1
# (2)Second, we run fit_Bayesian_FROC() in which the rstan::stan() is implemented.
# with data named "dat" and the author's Bayesian model.
fit <- fit_Bayesian_FROC(dat,
ite = 111 #To run in time <5s.
)
# Now, we get the object named "fit" which is an S4 object of class stanfitExtended.
# << Minor Comments>>
# More precisely, this is an S4 object of some inherited class (named stanfitExtended)
# which is extended using stan's S4 class named "stanfit".
fit.stan <- methods::as(fit,"stanfit")
# Using the output "fit.stan",
# we can use the functions in the "rstan" package, for example, as follows;
grDevices::dev.new();
rstan::stan_trace(fit.stan, pars = c("A"))# stochastic process of a posterior estimate
rstan::stan_hist(fit.stan, pars = c("A")) # Histogram of a posterior estimate
rstan::stan_rhat(fit.stan, pars = c("A")) # Histogram of rhat for all parameters
rstan::summary(fit.stan, pars = c("A")) # summary of fit.stan by rstan
grDevices::dev.off()
#========================================================================================
# The 3-rd example
#========================================================================================
# Fit a model to a hand made data
# 1) Build the data for a single reader and a single modality case.
dat <- list(
c=c(3,2,1), # Confidence level, which is ignored.
h=c(97,32,31), # Number of hits for each confidence level
f=c(1,14,74), # Number of false alarms for each confidence level
NL=259, # Number of lesions
NI=57, # Number of images
C=3) # Number of confidence level
# where,
# c denotes confidence level, , each components indicates that
# 3 = Definitely lesion,
# 2 = subtle,
# 1 = very subtle
# That is the high number indicates the high confidence level.
# h denotes number of hits
# (True Positives: TP) for each confidence level,
# f denotes number of false alarms
# (False Positives: FP) for each confidence level,
# NL denotes number of lesions,
# NI denotes number of images,
# 2) Fit and draw FROC and AFROC curves.
fit <- fit_Bayesian_FROC(dat, DrawCurve = TRUE)
# (( REMARK ))
# Changing the hits and false alarms denoted by h and f
# in the above dataset denoted by dat,
# user can fit a model to various datasets and draw corresponding FROC curves.
# Enjoy drawing the curves for various datasets in case of
# a single reader and a single modality data
#========================================================================================
# For Prior and Bayesian Update:
# Calculates a posterior mean and variance
# for each parameter
#========================================================================================
# Mean values of posterior samples are used as a point estimates, and
# Although the variance of posteriors receives less attention,
# but to make a prior, we will need the it.
# For, example, if we assume that model parameter m has prior distributed by
# Gaussian, then we have to know the mean and variance to characterize prior.
e <- rstan::extract(fit)
# model parameter m and v is a number,
# indicating the mean and variance of signal distribution, respectively.
stats::var(e$m)
mean(e$m)
stats::var(e$v)
mean(e$v)
# The model parameter z or dz is a vector, and thus we execute the following;
# z = ( z[1], z[2], z[3] )
# dz = ( z[2]-z[1], z[3]-z[2] )
# `Posterior mean of posterior MCMC samples for parameter z and dz
apply(e$dz, 2, mean)
apply(e$z, 2, mean)
# `Posterior variance of posterior MCMC samples for parameter z and dz
apply(e$dz, 2, var)
apply(e$z, 2, var)
apply(e$dl, 2, mean)
apply(e$l, 2, mean)
apply(e$p, 2, mean)
apply(e$p, 2, var)
# Revised 2019 Sept 6
#========================================================================================
# The 4-th example
#========================================================================================
#
## Only run examples in interactive R sessions
if (interactive()) {
# 1) Build the data interactively,
dataList <- create_dataset()
#Now, as as a return value of create_dataset(), we get the FROC data (list) named dataList.
# 2) Fit an MRMC or srsc FROC model.
fit <- fit_Bayesian_FROC(dataList)
}## Only run examples in interactive R sessions
#========================================================================================
# The 5-th example
#========================================================================================
# Comparison of the posterior probability for AUC
# In the following, we calculate the probability of the events that
# the AUC of some modality is greater than the AUC of another modality.
#========================================================================================
# Posterior Probability for some events of AUCs by using posterior MCMC samples
#========================================================================================
# This example shows how to use the stanfit (stanfit.Extended) object.
# Using stanfit object, we can extract posterior samples and using these samples,
# we can calculate the posterior probability of research questions.
fit <- fit_Bayesian_FROC(dataList.Chakra.Web.orderd,ite = 111,summary =FALSE)
# For example, we shall show the code to compute the posterior probability of the ever
# that the AUC of modality 1 is larger than that of modality 2:
e <- extract(fit)
# Then, the MCMC samples are extracted in the object "e" for all parameters.
# From this, e.g., AUC can be extracted by the code e$A that is a two dimensional array.
# The first component of e$A indicates the ID of MCMC samples and
# the second component indicates the modality ID.
# For example, the code e$A[,1] means the vector of MCMC samples of the 1 st modality.
# For example, the code e$A[,2] means the vector of MCMC samples of the 2 nd modality.
# For example, the code e$A[,3] means the vector of MCMC samples of the 3 rd modality.
# To calculate the posterior probability of the event
# that the AUC of modality 1 is larger than that of modality 2,
# we execute the following R script:
mean(e$A[,1] > e$A[,2])
# Similarly, to compute the posterior probability of the event that
# the AUC of modality 1 is larger than that of modality 3:
mean(e$A[,1] > e$A[,3])
# Similarly, to compute the posterior probability of the event that
# the AUC of modality 1 is larger than that of modality 4:
mean(e$A[,1] > e$A[,4])
# Similarly, to compute the posterior probability of the event that
# the AUC of modality 1 is larger than that of modality 5:
mean(e$A[,1] > e$A[,5])
# Similarly, to compute the posterior probability of the event that
# the AUC of modality 1 is larger than that of modality 5 at least 0.01
mean(e$A[,1] > e$A[,5]+0.01)
# Similarly,
mean( e$A[,1] > e$A[,5] + 0.01 )
mean( e$A[,1] > e$A[,5] + 0.02 )
mean( e$A[,1] > e$A[,5] + 0.03 )
mean( e$A[,1] > e$A[,5] + 0.04 )
mean( e$A[,1] > e$A[,5] + 0.05 )
mean( e$A[,1] > e$A[,5] + 0.06 )
mean( e$A[,1] > e$A[,5] + 0.07 )
mean( e$A[,1] > e$A[,5] + 0.08 )
# Since any posterior distribution tends to the Dirac measure whose center is
# true parameter under the assumption that the model is correct in the sense that the
# true distribution is belongs to a family of models.
# Thus using this procedure, we will get
# the true parameter if any more large sample size we can take.
# Close the graphic device to avoid errors in R CMD check.
Close_all_graphic_devices()
#========================================================================================
# The 6-th Example for MRMC data
#========================================================================================
# To draw FROC curves for each modality and each reader, the author provides codes.
# First, we make a fitted object of class stanfitExtended as following manner.
fit <- fit_Bayesian_FROC( ite = 1111,
cha = 1,
summary = FALSE,
Null.Hypothesis = FALSE,
dataList = dd # This is a MRMC dataset.
)
# Using this fitted model object called fit, we can draw FROC curves for the
# 1-st modality as following manner:
DrawCurves(
# This is a fitted model object
fit,
# Here, the modality is specified
modalityID = 1,
# Reader is specified as 1,2,3,4
readerID = 1:4,
# If TRUE, the new imaging device is created and curves are drawn on it.
new.imaging.device = TRUE
)
# The next codes are quite same, except modality ID and new.imaging.device
# The code that "new.imaging.device = F" means that the curves are drawn using
# the previous imaging device to plot the 1-st and 2-nd modality curves draw in the same
# Plot plain. Drawing in different curves in same plain, we can compare the curve
# of modality. Of course, the interpretation of FROC curve is the ordinal ROC curve,
# that is,
# if curve is upper then the observer performance with its modality is more greater.
# So, please enjoy drawing curves.
DrawCurves(fit,modalityID = 2,readerID = 1:4, new.imaging.device = FALSE)
DrawCurves(fit,modalityID = 3,readerID = 1:4, new.imaging.device = FALSE)
DrawCurves(fit,modalityID = 4,readerID = 1:4, new.imaging.device = FALSE)
DrawCurves(fit,modalityID = 5,readerID = 1:4, new.imaging.device = FALSE)
Close_all_graphic_devices()
#========================================================================================
# The 7-th example NON-CONVERGENT CASE 2019 OCT.
#========================================================================================
ff <- fit_Bayesian_FROC( ite = 1111, cha = 1, summary = TRUE, dataList = ddd )
#'
dat <- list(
c=c(3,2,1), #Confidence level
h=c(73703933,15661264,12360003), #Number of hits for each confidence level
f=c(1738825,53666125 , 254965774), #Number of false alarms for each confidence level
NL=100000000, #Number of lesions
NI=200000000, #Number of images
C=3) #Number of confidence level
# From the examples of the function mu_truth_creator_for_many_readers_MRMC_data()
#========================================================================================
# Large number of readers cause non-convergence
#========================================================================================
v <- v_truth_creator_for_many_readers_MRMC_data(M=4,Q=6)
m <- mu_truth_creator_for_many_readers_MRMC_data(M=4,Q=6)
d <-create_dataList_MRMC(mu.truth = m,v.truth = v)
#fit <- fit_Bayesian_FROC( ite = 111, cha = 1, summary = TRUE, dataList = d )
plot_FPF_and_TPF_from_a_dataset(d)
#========================================================================================
# convergence
#========================================================================================
v <- v_truth_creator_for_many_readers_MRMC_data(M=2,Q=21)
m <- mu_truth_creator_for_many_readers_MRMC_data(M=2,Q=21)
d <- create_dataList_MRMC(mu.truth = m,v.truth = v)
fit <- fit_Bayesian_FROC( ite = 200, cha = 1, summary = TRUE, dataList = d)
plot_FPF_TPF_via_dataframe_with_split_factor(d)
plot_empirical_FROC_curves(d,readerID = 1:21)
#========================================================================================
# non-convergence
#========================================================================================
v <- v_truth_creator_for_many_readers_MRMC_data(M=5,Q=6)
m <- mu_truth_creator_for_many_readers_MRMC_data(M=5,Q=6)
d <- create_dataList_MRMC(mu.truth = m,v.truth = v)
#fit <- fit_Bayesian_FROC( ite = 111, cha = 1, summary = TRUE, dataList = d)
#========================================================================================
# convergence
#========================================================================================
v <- v_truth_creator_for_many_readers_MRMC_data(M=1,Q=36)
m <- mu_truth_creator_for_many_readers_MRMC_data(M=1,Q=36)
d <- create_dataList_MRMC(mu.truth = m,v.truth = v)
#fit <- fit_Bayesian_FROC(ite = 111, cha = 1,summary = TRUE, dataList = d, see = 123)
#========================================================================================
# non-convergence
#========================================================================================
v <- v_truth_creator_for_many_readers_MRMC_data(M=1,Q=37)
m <- mu_truth_creator_for_many_readers_MRMC_data(M=1,Q=37)
d <- create_dataList_MRMC(mu.truth = m,v.truth = v)
#fit <- fit_Bayesian_FROC( ite = 111, cha = 1, summary = TRUE, dataList = d)
#========================================================================================
# convergence A single modality and 11 readers
#========================================================================================
v <- v_truth_creator_for_many_readers_MRMC_data(M=1,Q=11)
m <- mu_truth_creator_for_many_readers_MRMC_data(M=1,Q=11)
d <- create_dataList_MRMC(mu.truth = m,v.truth = v)
fit <- fit_Bayesian_FROC( ite = 111,
cha = 1,
summary = TRUE,
dataList = d,
see = 123455)
DrawCurves( summary = FALSE,
modalityID = c(1:fit@dataList$M),
readerID = c(1:fit@dataList$Q),
StanS4class = fit )
#========================================================================================
# convergence A single modality and 17 readers
#========================================================================================
v <- v_truth_creator_for_many_readers_MRMC_data(M=1,Q=17)
m <- mu_truth_creator_for_many_readers_MRMC_data(M=1,Q=17)
d <- create_dataList_MRMC(mu.truth = m,v.truth = v)
fit <- fit_Bayesian_FROC( ite = 1111, cha = 1, summary = TRUE, dataList = d,see = 123455)
DrawCurves( summary = FALSE, modalityID = c(1:fit@dataList$M),
readerID = c(1:fit@dataList$Q),fit )
DrawCurves( summary = FALSE, modalityID = 1,
readerID = c(8,9),fit )
#
## For readerID 8,9, this model is bad
#
Close_all_graphic_devices()
#========================================================================================
# convergence 37 readers, 1 modality
#========================================================================================
v <- v_truth_creator_for_many_readers_MRMC_data(M=1,Q=37)
m <- mu_truth_creator_for_many_readers_MRMC_data(M=1,Q=37)
d <- create_dataList_MRMC(mu.truth = m,v.truth = v)
fit <- fit_Bayesian_FROC(see = 2345678, ite = 1111, cha = 1, summary = TRUE, dataList = d)
DrawCurves( summary = FALSE, modalityID = c(1:fit@dataList$M),
readerID = c(1:fit@dataList$Q),fit )
DrawCurves( summary = FALSE, modalityID = 1,
readerID = c(8,9),fit )
# In the following, consider two readers whose ID are 8 and 15, respectively.
# Obviously, one of them will have high performamce than the other,
# however,
# Sometimes, the FROC curve does not reflect it,
# Namely, one of the FROC curve is upper than the other
# even if the FPF and TPF are not.... WHY???
DrawCurves( summary = FALSE, modalityID = 1,
readerID = c(8,15),fit )
Close_all_graphic_devices()
Close_all_graphic_devices()
# }
# NOT RUN {
# dontrun
# }
Run the code above in your browser using DataLab