#-------------------------------------------------------------------------------
# Example 1: Sampling distribution of mean
# This example demonstrate some of the simpler uses of SimDesign,
# particularly for classroom settings. The only factor varied in this simulation
# is sample size.
# skeleton functions to be saved and edited
SimFunctions()
#### Step 1 --- Define your conditions under study and create design data.frame
Design <- createDesign(N = c(10, 20, 30))
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions
# help(Generate)
Generate <- function(condition, fixed_objects) {
dat <- with(condition, rnorm(N, 10, 5)) # distributed N(10, 5)
dat
}
# help(Analyse)
Analyse <- function(condition, dat, fixed_objects) {
ret <- c(mean=mean(dat)) # mean of the sample data vector
ret
}
# help(Summarise)
Summarise <- function(condition, results, fixed_objects) {
# mean and SD summary of the sample means
ret <- c(mu=mean(results$mean), SE=sd(results$mean))
ret
}
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Collect results by looping over the rows in design
# run the simulation in testing mode (replications = 2)
Final <- runSimulation(design=Design, replications=2,
generate=Generate, analyse=Analyse, summarise=Summarise)
Final
SimResults(Final)
# reproduce exact simulation
Final_rep <- runSimulation(design=Design, replications=2, seed=Final$SEED,
generate=Generate, analyse=Analyse, summarise=Summarise)
Final_rep
SimResults(Final_rep)
if (FALSE) {
# run with more standard number of replications
Final <- runSimulation(design=Design, replications=1000,
generate=Generate, analyse=Analyse, summarise=Summarise)
Final
SimResults(Final)
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Extras
# compare SEs estimates to the true SEs from the formula sigma/sqrt(N)
5 / sqrt(Design$N)
# To store the results from the analyse function either
# a) omit a definition of summarise() to return all results,
# b) use store_results = TRUE (default) to store results internally and later
# extract with SimResults(), or
# c) pass save_results = TRUE to runSimulation() and read the results in with SimResults()
#
# Note that method c) should be adopted for larger simulations, particularly
# if RAM storage could be an issue and error/warning message information is important.
# a) approach
res <- runSimulation(design=Design, replications=100,
generate=Generate, analyse=Analyse)
res
# b) approach (store_results = TRUE by default)
res <- runSimulation(design=Design, replications=100,
generate=Generate, analyse=Analyse, summarise=Summarise)
res
SimResults(res)
# c) approach
Final <- runSimulation(design=Design, replications=100, save_results=TRUE,
generate=Generate, analyse=Analyse, summarise=Summarise)
# read-in all conditions (can be memory heavy)
res <- SimResults(Final)
res
head(res[[1]]$results)
# just first condition
res <- SimResults(Final, which=1)
head(res$results)
dplyr::tibble(res$condition, res$results)
# obtain empirical bootstrapped CIs during an initial run
# the simulation was completed (necessarily requires save_results = TRUE)
res <- runSimulation(design=Design, replications=1000, boot_method = 'basic',
generate=Generate, analyse=Analyse, summarise=Summarise)
res
# alternative bootstrapped CIs that uses saved results via reSummarise().
# Default directory save to:
dirname <- paste0('SimDesign-results_', unname(Sys.info()['nodename']), "/")
res <- reSummarise(summarise=Summarise, dir=dirname, boot_method = 'basic')
res
# remove the saved results from the hard-drive if you no longer want them
SimClean(results = TRUE)
}
#-------------------------------------------------------------------------------
# Example 2: t-test and Welch test when varying sample size, group sizes, and SDs
# skeleton functions to be saved and edited
SimFunctions()
if (FALSE) {
# in real-world simulations it's often better/easier to save
# these functions directly to your hard-drive with
SimFunctions('my-simulation')
}
#### Step 1 --- Define your conditions under study and create design data.frame
Design <- createDesign(sample_size = c(30, 60, 90, 120),
group_size_ratio = c(1, 4, 8),
standard_deviation_ratio = c(.5, 1, 2))
Design
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions
Generate <- function(condition, fixed_objects) {
N <- condition$sample_size # could use Attach() to make objects available
grs <- condition$group_size_ratio
sd <- condition$standard_deviation_ratio
if(grs < 1){
N2 <- N / (1/grs + 1)
N1 <- N - N2
} else {
N1 <- N / (grs + 1)
N2 <- N - N1
}
group1 <- rnorm(N1)
group2 <- rnorm(N2, sd=sd)
dat <- data.frame(group = c(rep('g1', N1), rep('g2', N2)), DV = c(group1, group2))
dat
}
Analyse <- function(condition, dat, fixed_objects) {
welch <- t.test(DV ~ group, dat)$p.value
independent <- t.test(DV ~ group, dat, var.equal=TRUE)$p.value
# In this function the p values for the t-tests are returned,
# and make sure to name each element, for future reference
ret <- nc(welch, independent)
ret
}
Summarise <- function(condition, results, fixed_objects) {
#find results of interest here (e.g., alpha < .1, .05, .01)
ret <- EDR(results, alpha = .05)
ret
}
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Collect results by looping over the rows in design
# first, test to see if it works
res <- runSimulation(design=Design, replications=2,
generate=Generate, analyse=Analyse, summarise=Summarise)
res
if (FALSE) {
# complete run with 1000 replications per condition
res <- runSimulation(design=Design, replications=1000, parallel=TRUE,
generate=Generate, analyse=Analyse, summarise=Summarise)
res
View(res)
## save final results to a file upon completion, and play a beep when done
runSimulation(design=Design, replications=1000, parallel=TRUE, filename = 'mysim',
generate=Generate, analyse=Analyse, summarise=Summarise, beep=TRUE)
## same as above, but send a notification via Pushbullet upon completion
library(RPushbullet) # read-in default JSON file
runSimulation(design=Design, replications=1000, parallel=TRUE, filename = 'mysim',
generate=Generate, analyse=Analyse, summarise=Summarise,
notification = 'complete')
## Submit as RStudio job (requires job package and active RStudio session)
job::job({
res <- runSimulation(design=Design, replications=100,
generate=Generate, analyse=Analyse, summarise=Summarise)
}, title='t-test simulation')
res # object res returned to console when completed
## Debug the generate function. See ?browser for help on debugging
## Type help to see available commands (e.g., n, c, where, ...),
## ls() to see what has been defined, and type Q to quit the debugger
runSimulation(design=Design, replications=1000,
generate=Generate, analyse=Analyse, summarise=Summarise,
parallel=TRUE, debug='generate')
## Alternatively, place a browser() within the desired function line to
## jump to a specific location
Summarise <- function(condition, results, fixed_objects) {
#find results of interest here (e.g., alpha < .1, .05, .01)
browser()
ret <- EDR(results[,nms], alpha = .05)
ret
}
## The following debugs the analyse function for the
## second row of the Design input
runSimulation(design=Design, replications=1000,
generate=Generate, analyse=Analyse, summarise=Summarise,
parallel=TRUE, debug='analyse-2')
####################################
## EXTRA: To run the simulation on a user-define cluster, use the following setup (not run)
## Network linked via ssh (two way ssh key-paired connection must be
## possible between master and slave nodes)
##
## Define IP addresses, including primary IP
primary <- '192.168.2.20'
IPs <- list(
list(host=primary, user='phil', ncore=8),
list(host='192.168.2.17', user='phil', ncore=8)
)
spec <- lapply(IPs, function(IP)
rep(list(list(host=IP$host, user=IP$user)), IP$ncore))
spec <- unlist(spec, recursive=FALSE)
cl <- parallel::makeCluster(type='PSOCK', master=primary, spec=spec)
res <- runSimulation(design=Design, replications=1000, parallel = TRUE,
generate=Generate, analyse=Analyse, summarise=Summarise, cl=cl)
## Using parallel='future' to allow the future framework to be used instead
library(future) # future structure to be used internally
plan(multisession) # specify different plan (default is sequential)
res <- runSimulation(design=Design, replications=100, parallel='future',
generate=Generate, analyse=Analyse, summarise=Summarise)
head(res)
# The progressr package is used for progress reporting with futures. To redefine
# use progressr::handlers() (see below)
library(progressr)
with_progress(res <- runSimulation(design=Design, replications=100, parallel='future',
generate=Generate, analyse=Analyse, summarise=Summarise))
head(res)
# re-define progressr's bar (below requires cli)
handlers(handler_pbcol(
adjust = 1.0,
complete = function(s) cli::bg_red(cli::col_black(s)),
incomplete = function(s) cli::bg_cyan(cli::col_black(s))
))
with_progress(res <- runSimulation(design=Design, replications=100, parallel='future',
generate=Generate, analyse=Analyse, summarise=Summarise))
# reset future computing plan when complete (good practice)
plan(sequential)
####################################
###### Post-analysis: Analyze the results via functions like lm() or SimAnova(), and create
###### tables(dplyr) or plots (ggplot2) to help visualize the results.
###### This is where you get to be a data analyst!
library(dplyr)
res %>% summarise(mean(welch), mean(independent))
res %>% group_by(standard_deviation_ratio, group_size_ratio) %>%
summarise(mean(welch), mean(independent))
# quick ANOVA analysis method with all two-way interactions
SimAnova( ~ (sample_size + group_size_ratio + standard_deviation_ratio)^2, res,
rates = TRUE)
# or more specific ANOVAs
SimAnova(independent ~ (group_size_ratio + standard_deviation_ratio)^2,
res, rates = TRUE)
# make some plots
library(ggplot2)
library(tidyr)
dd <- res %>%
select(group_size_ratio, standard_deviation_ratio, welch, independent) %>%
pivot_longer(cols=c('welch', 'independent'), names_to = 'stats')
dd
ggplot(dd, aes(factor(group_size_ratio), value)) + geom_boxplot() +
geom_abline(intercept=0.05, slope=0, col = 'red') +
geom_abline(intercept=0.075, slope=0, col = 'red', linetype='dotted') +
geom_abline(intercept=0.025, slope=0, col = 'red', linetype='dotted') +
facet_wrap(~stats)
ggplot(dd, aes(factor(group_size_ratio), value, fill = factor(standard_deviation_ratio))) +
geom_boxplot() + geom_abline(intercept=0.05, slope=0, col = 'red') +
geom_abline(intercept=0.075, slope=0, col = 'red', linetype='dotted') +
geom_abline(intercept=0.025, slope=0, col = 'red', linetype='dotted') +
facet_grid(stats~standard_deviation_ratio) +
theme(legend.position = 'none')
}
Run the code above in your browser using DataLab