# NOT RUN {
#-------------------------------------------------------------------------------
# Example 1: Sampling distribution of mean
# This example demonstrate some of the simpler uses of SimDesign,
# particularly for classroom settings. The only factor varied in this simulation
# is sample size.
# skeleton functions to be saved and edited
SimFunctions()
#### Step 1 --- Define your conditions under study and create design data.frame
Design <- createDesign(N = c(10, 20, 30))
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions
# help(Generate)
Generate <- function(condition, fixed_objects = NULL) {
dat <- with(condition, rnorm(N, 10, 5)) # distributed N(10, 5)
dat
}
# help(Analyse)
Analyse <- function(condition, dat, fixed_objects = NULL) {
ret <- mean(dat) # mean of the sample data vector
ret
}
# help(Summarise)
Summarise <- function(condition, results, fixed_objects = NULL) {
ret <- c(mu=mean(results), SE=sd(results)) # mean and SD summary of the sample means
ret
}
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Collect results by looping over the rows in design
# run the simulation
Final <- runSimulation(design=Design, replications=1000,
generate=Generate, analyse=Analyse, summarise=Summarise)
Final
# reproduce exact simulation
Final_rep <- runSimulation(design=Design, replications=1000, seed=Final$SEED,
generate=Generate, analyse=Analyse, summarise=Summarise)
Final_rep
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Extras
# }
# NOT RUN {
# compare SEs estimates to the true SEs from the formula sigma/sqrt(N)
5 / sqrt(Design$N)
# To store the results from the analyse function either
# a) omit a definition of of summarise(), or
# b) pass save_results = TRUE to runSimulation() and read the results in with SimResults()
# Note that the latter method should be adopted for longer simulations
# e.g., the a) approach
res <- runSimulation(design=Design, replications=1000,
generate=Generate, analyse=Analyse)
str(res)
head(res[[1]])
# or b) approach
Final <- runSimulation(design=Design, replications=1000, save_results=TRUE,
generate=Generate, analyse=Analyse, summarise=Summarise)
res <- SimResults(Final)
str(res)
head(res[[1]]$results)
# obtain empirical bootstrapped CIs during an initial run
# the simulation was completed (necessarily requires save_results = TRUE)
res <- runSimulation(design=Design, replications=1000, boot_method = 'basic',
generate=Generate, analyse=Analyse, summarise=Summarise)
res
# alternative bootstrapped CIs that uses saved results via reSummarise().
# Default directory save to:
dirname <- paste0('SimDesign-results_', unname(Sys.info()['nodename']), "/")
res <- reSummarise(summarise=Summarise, dir=dirname, boot_method = 'basic')
res
# remove the saved results from the hard-drive if you no longer want them
SimClean(results = TRUE)
# }
# NOT RUN {
#-------------------------------------------------------------------------------
# Example 2: t-test and Welch test when varying sample size, group sizes, and SDs
# skeleton functions to be saved and edited
SimFunctions()
# }
# NOT RUN {
# in real-world simulations it's often better/easier to save
# these functions directly to your hard-drive with
SimFunctions('my-simulation')
# }
# NOT RUN {
#### Step 1 --- Define your conditions under study and create design data.frame
Design <- createDesign(sample_size = c(30, 60, 90, 120),
group_size_ratio = c(1, 4, 8),
standard_deviation_ratio = c(.5, 1, 2))
Design
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions
Generate <- function(condition, fixed_objects = NULL) {
N <- condition$sample_size # alternatively, could use Attach() to make objects available
grs <- condition$group_size_ratio
sd <- condition$standard_deviation_ratio
if(grs < 1){
N2 <- N / (1/grs + 1)
N1 <- N - N2
} else {
N1 <- N / (grs + 1)
N2 <- N - N1
}
group1 <- rnorm(N1)
group2 <- rnorm(N2, sd=sd)
dat <- data.frame(group = c(rep('g1', N1), rep('g2', N2)), DV = c(group1, group2))
dat
}
Analyse <- function(condition, dat, fixed_objects = NULL) {
welch <- t.test(DV ~ group, dat)
ind <- t.test(DV ~ group, dat, var.equal=TRUE)
# In this function the p values for the t-tests are returned,
# and make sure to name each element, for future reference
ret <- c(welch = welch$p.value, independent = ind$p.value)
ret
}
Summarise <- function(condition, results, fixed_objects = NULL) {
#find results of interest here (e.g., alpha < .1, .05, .01)
ret <- EDR(results, alpha = .05)
ret
}
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Collect results by looping over the rows in design
# first, test to see if it works
res <- runSimulation(design=Design, replications=5,
generate=Generate, analyse=Analyse, summarise=Summarise)
res
# }
# NOT RUN {
# complete run with 1000 replications per condition
res <- runSimulation(design=Design, replications=1000, parallel=TRUE,
generate=Generate, analyse=Analyse, summarise=Summarise)
res
View(res)
## save final results to a file upon completion (not run)
runSimulation(design=Design, replications=1000, parallel=TRUE, filename = 'mysim',
generate=Generate, analyse=Analyse, summarise=Summarise)
## Debug the generate function. See ?browser for help on debugging
## Type help to see available commands (e.g., n, c, where, ...),
## ls() to see what has been defined, and type Q to quit the debugger
runSimulation(design=Design, replications=1000,
generate=Generate, analyse=Analyse, summarise=Summarise,
parallel=TRUE, debug='generate')
## Alternatively, place a browser() within the desired function line to
## jump to a specific location
Summarise <- function(condition, results, fixed_objects = NULL) {
#find results of interest here (e.g., alpha < .1, .05, .01)
browser()
ret <- EDR(results[,nms], alpha = .05)
ret
}
runSimulation(design=Design, replications=1000,
generate=Generate, analyse=Analyse, summarise=Summarise,
parallel=TRUE)
## EXTRA: To run the simulation on a MPI cluster, use the following setup on each node (not run)
# library(doMPI)
# cl <- startMPIcluster()
# registerDoMPI(cl)
# Final <- runSimulation(design=Design, replications=1000, MPI=TRUE,
# generate=Generate, analyse=Analyse, summarise=Summarise)
# saveRDS(Final, 'mysim.rds')
# closeCluster(cl)
# mpi.quit()
## Similarly, run simulation on a network linked via ssh
## (two way ssh key-paired connection must be possible between master and slave nodes)
##
## define IP addresses, including primary IP
# primary <- '192.168.2.20'
# IPs <- list(
# list(host=primary, user='phil', ncore=8),
# list(host='192.168.2.17', user='phil', ncore=8)
# )
# spec <- lapply(IPs, function(IP)
# rep(list(list(host=IP$host, user=IP$user)), IP$ncore))
# spec <- unlist(spec, recursive=FALSE)
#
# cl <- parallel::makeCluster(type='PSOCK', master=primary, spec=spec)
# res <- runSimulation(design=Design, replications=1000, parallel = TRUE,
# generate=Generate, analyse=Analyse, summarise=Summarise, cl=cl)
#~~~~~~~~~~~~~~~~~~~~~~~~
###### Post-analysis: Analyze the results via functions like lm() or SimAnova(), and create
###### tables(dplyr) or plots (ggplot2) to help visualize the results.
###### This is where you get to be a data analyst!
library(dplyr)
res %>% summarise(mean(welch), mean(independent))
res %>% group_by(standard_deviation_ratio, group_size_ratio) %>%
summarise(mean(welch), mean(independent))
# quick ANOVA analysis method with all two-way interactions
SimAnova( ~ (sample_size + group_size_ratio + standard_deviation_ratio)^2, res,
rates = TRUE)
# or more specific ANOVAs
SimAnova(independent ~ (group_size_ratio + standard_deviation_ratio)^2,
res, rates = TRUE)
# make some plots
library(ggplot2)
library(tidyr)
dd <- res %>%
select(group_size_ratio, standard_deviation_ratio, welch, independent) %>%
pivot_longer(cols=c('welch', 'independent'), names_to = 'stats')
dd
ggplot(dd, aes(factor(group_size_ratio), value)) + geom_boxplot() +
geom_abline(intercept=0.05, slope=0, col = 'red') +
geom_abline(intercept=0.075, slope=0, col = 'red', linetype='dotted') +
geom_abline(intercept=0.025, slope=0, col = 'red', linetype='dotted') +
facet_wrap(~stats)
ggplot(dd, aes(factor(group_size_ratio), value, fill = factor(standard_deviation_ratio))) +
geom_boxplot() + geom_abline(intercept=0.05, slope=0, col = 'red') +
geom_abline(intercept=0.075, slope=0, col = 'red', linetype='dotted') +
geom_abline(intercept=0.025, slope=0, col = 'red', linetype='dotted') +
facet_grid(stats~standard_deviation_ratio) +
theme(legend.position = 'none')
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab