SFA: Surrogate Function Approximation via the Generalized Linear Model

Description

Given a simulation that was executed with runSimulation, potentially with the argument store_results = TRUE to store the unsummarised analysis results, fit a surrogate function approximation (SFA) model to the results and (optionally) perform a root-solving step to solve a target quantity. See Schoemann et al. (2014) for details.

Usage

SFA(
  results,
  formula,
  family = "binomial",
  b = NULL,
  design = NULL,
  CI = 0.95,
  interval = NULL,
  ...
)
# S3 method for SFA
print(x, ...)

Arguments

results: data returned from runSimulation. This can be the original results object or the extracted results stored when using store_results = TRUE included to store the analysis results.
formula: formula to specify for the regression model
family: character vector indicating the family of GLMs to use (see family)
b: (optional) Target quantity to use for root solving given the fitted surrogate function (e.g., find sample size associated with SFA implied power of .80)
design: (optional) data.frame object containing all the information relevant for the surrogate model (passed to newdata in predict) with an NA value in the variable to be solved
CI: advertised confidence interval of SFA prediction around solved target
interval: interval to be passed to uniroot if not specified then the lowest and highest values from results for the respective variable will be used
...: additional arguments to pass to glm
x: an object of class SFA

Author

Phil Chalmers rphilip.chalmers@gmail.com

References

Chalmers, R. P., & Adkins, M. C. (2020). Writing Effective and Reliable Monte Carlo Simulations with the SimDesign Package. The Quantitative Methods for Psychology, 16(4), 248-280. tools:::Rd_expr_doi("10.20982/tqmp.16.4.p248")

Schoemann, A. M., Miller, P., Pornprasertmanit, S., and Wu, W. (2014). Using Monte Carlo simulations to determine power and sample size for planned missing designs. International Journal of Behavioral Development, SAGE Publications, 38, 471-479.

Examples

Run this code

if (FALSE) {

# create long Design object to fit surrogate over
Design <- createDesign(N = 100:500,
                       d = .2)
Design

#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions

Generate <- function(condition, fixed_objects) {
    Attach(condition)
    group1 <- rnorm(N)
    group2 <- rnorm(N, mean=d)
    dat <- data.frame(group = gl(2, N, labels=c('G1', 'G2')),
                      DV = c(group1, group2))
    dat
}

Analyse <- function(condition, dat, fixed_objects) {
    p <- c(p = t.test(DV ~ group, dat, var.equal=TRUE)$p.value)
    p
}

Summarise <- function(condition, results, fixed_objects) {
    ret <- EDR(results, alpha = .05)
    ret
}

#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Estimate power over N

# Use small number of replications given range of sample sizes
## note that due to the lower replications disabling the
## RAM printing will help reduce overhead

sim <- runSimulation(design=Design, replications=10,
                     generate=Generate, analyse=Analyse,
                     summarise=Summarise, store_results=TRUE, save=FALSE,
                     progress=FALSE, control=list(print_RAM=FALSE))
sim

# total of 4010 replication
sum(sim$REPLICATIONS)

# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim)
sim_results <- within(sim_results, sig <- p < .05)
sim_results

# fitted model
sfa <- SFA(sim_results, formula = sig ~ N)
sfa
summary(sfa)

# plot the observed and SFA expected values
plot(p ~ N, sim, las=1, pch=16, main='Rejection rates with R=10')
pred <- predict(sfa, type = 'response')
lines(sim_results$N, pred, col='red', lty=2)

# fitted model + root-solved solution given f(.) = b,
#   where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N,
                b=.8, design=design)
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.2)


################
# example with smaller range but higher precision
Design <- createDesign(N = 375:425,
                       d = .2)
Design

sim2 <- runSimulation(design=Design, replications=100,
                     generate=Generate, analyse=Analyse,
                     summarise=Summarise, store_results=TRUE, save=FALSE,
                     progress=FALSE, control=list(print_RAM=FALSE))
sim2
sum(sim2$REPLICATIONS) # more replications in total

# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim2)
sim_results <- within(sim_results, sig <- p < .05)
sim_results

# fitted model
sfa <- SFA(sim_results, formula = sig ~ N)
sfa
summary(sfa)

# plot the observed and SFA expected values
plot(p ~ N, sim2, las=1, pch=16, main='Rejection rates with R=100')
pred <- predict(sfa, type = 'response')
lines(sim_results$N, pred, col='red', lty=2)

# fitted model + root-solved solution given f(.) = b,
#   where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N,
                b=.8, design=design, interval=c(100, 500))
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.2)

###################
# vary multiple parameters (e.g., sample size + effect size) to fit
# multi-parameter surrogate

Design <- createDesign(N = seq(from=10, to=500, by=10),
                       d = seq(from=.1, to=.5, by=.1))
Design

sim3 <- runSimulation(design=Design, replications=50,
                      generate=Generate, analyse=Analyse,
                      summarise=Summarise, store_results=TRUE, save=FALSE,
                      progress=FALSE, control=list(print_RAM=FALSE))
sim3
sum(sim3$REPLICATIONS)

# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim3)
sim_results <- within(sim_results, sig <- p < .05)
sim_results

# additive effects (logit(sig) ~ N + d)
sfa0 <- SFA(sim_results, formula = sig ~ N+d)
sfa0

# multiplicative effects (logit(sig) ~ N + d + N:d)
sfa <- SFA(sim_results, formula = sig ~ N*d)
sfa

# multiplicative better fit (sample size interacts with effect size)
anova(sfa0, sfa, test = "LRT")
summary(sfa)

# plot the observed and SFA expected values
library(ggplot2)
sim3$pred <- predict(sfa, type = 'response', newdata=sim3)
ggplot(sim3, aes(N, p, color = factor(d))) +
  geom_point() + geom_line(aes(y=pred)) +
  facet_wrap(~factor(d))

# fitted model + root-solved solution given f(.) = b,
#   where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N * d,
                b=.8, design=design, interval=c(100, 500))
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.2)

# root prediction where d *not* used in original data
design <- data.frame(N=NA, d=.25)
sfa.root <- SFA(sim_results, formula = sig ~ N * d,
                b=.8, design=design, interval=c(100, 500))
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.25)

}