Learn R Programming

misty (version 0.7.1)

ci.var: (Bootstrap) Confidence Intervals for Variances and Standard Deviations

Description

The function ci.var computes and plots confidence intervals for variances, and the function ci.sd computes confidence intervals for the standard deviations, optionally by a grouping and/or split variable. These functions also supports three types of bootstrap confidence intervals (e.g., bias-corrected (BC) percentile bootstrap or bias-corrected and accelerated (BCa) bootstrap confidence intervals) and plots the bootstrap samples with histograms and density curves.

Usage

ci.var(data, ..., method = c("chisq", "bonett"),
       boot = c("none", "perc", "bc", "bca"), R = 1000, seed = NULL,
       alternative = c("two.sided", "less", "greater"),
       conf.level = 0.95, group = NULL, split = NULL, sort.var = FALSE,
       na.omit = FALSE, digits = 2, as.na = NULL,
       plot = c("none", "ci", "boot"), point.size = 2.5, point.shape = 19,
       errorbar.width = 0.3, dodge.width = 0.5, hist = TRUE,
       binwidth = NULL, bins = NULL, hist.alpha = 0.4, fill = "gray85", density = TRUE,
       density.col = "#0072B2", density.linewidth = 0.5, density.linetype = "solid",
       point = TRUE, point.col = "#CC79A7", point.linewidth = 0.6,
       point.linetype = "solid", ci = TRUE, ci.col = "black",
       ci.linewidth = 0.6, ci.linetype = "dashed", line = FALSE, intercept = 0,
       linetype = "solid", line.col = "gray65", xlab = NULL, ylab = NULL,
       xlim = NULL, ylim = NULL, xbreaks = ggplot2::waiver(), ybreaks = ggplot2::waiver(),
       axis.title.size = 11, axis.text.size = 10, strip.text.size = 11, title = NULL,
       subtitle = NULL, group.col = NULL, plot.margin = NA,  legend.title = "",
       legend.position = c("right", "top", "left", "bottom", "none"),
       legend.box.margin = c(-10, 0, 0, 0), facet.ncol = NULL, facet.nrow = NULL,
       facet.scales = "free", filename = NULL, width = NA, height = NA,
       units = c("in", "cm", "mm", "px"), dpi = 600, write = NULL, append = TRUE,
       check = TRUE, output = TRUE)

ci.sd(data, ..., method = c("chisq", "bonett"), boot = c("none", "perc", "bc", "bca"), R = 1000, seed = NULL, alternative = c("two.sided", "less", "greater"), conf.level = 0.95, group = NULL, split = NULL, sort.var = FALSE, na.omit = FALSE, digits = 2, as.na = NULL, plot = c("none", "ci", "boot"), point.size = 2.5, point.shape = 19, errorbar.width = 0.3, dodge.width = 0.5, hist = TRUE, binwidth = NULL, bins = NULL, hist.alpha = 0.4, fill = "gray85", density = TRUE, density.col = "#0072B2", density.linewidth = 0.5, density.linetype = "solid", point = TRUE, point.col = "#CC79A7", point.linewidth = 0.6, point.linetype = "solid", ci = TRUE, ci.col = "black", ci.linewidth = 0.6, ci.linetype = "dashed", line = FALSE, intercept = 0, linetype = "solid", line.col = "gray65", xlab = NULL, ylab = NULL, xlim = NULL, ylim = NULL, xbreaks = ggplot2::waiver(), ybreaks = ggplot2::waiver(), axis.title.size = 11, axis.text.size = 10, strip.text.size = 11, title = NULL, subtitle = NULL, group.col = NULL, plot.margin = NA, legend.title = "", legend.position = c("right", "top", "left", "bottom", "none"), legend.box.margin = c(-10, 0, 0, 0), facet.ncol = NULL, facet.nrow = NULL, facet.scales = "free", filename = NULL, width = NA, height = NA, units = c("in", "cm", "mm", "px"), dpi = 600, write = NULL, append = TRUE, check = TRUE, output = TRUE)

Value

Returns an object of class misty.object, which is a list with following entries:

call

function call

type

type of analysis

data

list with the input specified in ..., data, group, and split

args

specification of function arguments

boot

data frame with bootstrap replicates of the variance or standard deviation when bootstrapping was requested

plot

ggplot2 object for plotting the results and the data frame used for plotting

result

result table

Arguments

data

a numeric vector or data frame with numeric variables, i.e., factors and character variables are excluded from data before conducting the analysis.

...

an expression indicating the variable names in data, e.g., ci.var(dat, x1, x2, x3). Note that the operators ., +, -, ~, :, ::, and ! can also be used to select variables, see 'Details' in the df.subset function.

method

a character string specifying the method for computing the confidence interval, must be one of "chisq", or "bonett" (default).

boot

a character string specifying the type of bootstrap confidence intervals (CI), i.e., "none" (default) for not conducting bootstrapping, "perc", for the percentile bootstrap CI "bc" (default) for the bias-corrected (BC) percentile bootstrap CI (without acceleration), and "bca" for the bias-corrected and accelerated (BCa) bootstrap CI, see 'Details' in the ci.cor function.

R

a numeric value indicating the number of bootstrap replicates (default is 1000).

seed

a numeric value specifying seeds of the pseudo-random numbers used in the bootstrap algorithm when conducting bootstrapping.

alternative

a character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less".

conf.level

a numeric value between 0 and 1 indicating the confidence level of the interval.

group

either a character string indicating the variable name of the grouping variable in data, or a vector representing the grouping variable.

split

either a character string indicating the variable name of the split variable in 'data', or a vector representing the split variable.

sort.var

logical: if TRUE, output table is sorted by variables when specifying group.

na.omit

logical: if TRUE, incomplete cases are removed before conducting the analysis (i.e., listwise deletion) when specifying more than one outcome variable.

digits

an integer value indicating the number of decimal places to be used.

as.na

a numeric vector indicating user-defined missing values, i.e. these values are converted to NA before conducting the analysis. Note that as.na() function is only applied to data, but not to group or split.

plot

a character string indicating the type of the plot to display, i.e., "none" (default) for not displaying any plots, "ci" for displaying confidence intervals for variances or standard deviations, "boot" for displaying bootstrap samples with histograms and density curves when the argument "boot" is other than "none".

point.size

a numeric value indicating the size argument in the geom_point function for controlling the size of points when plotting confidence intervals (plot = "ci").

point.shape

a numeric value between 0 and 25 or a character string as plotting symbol indicating the shape argument in the geom_point function for controlling the symbols of points when plotting confidence intervals (plot = "ci").

errorbar.width

a numeric value indicating the width argument in the geom_errorbar function for controlling the width of the whiskers in the geom_errorbar function when plotting confidence intervals (plot = "ci").

dodge.width

a numeric value indicating the width argument controlling the width of the geom elements to be dodged when specifying a grouping variable using the argument group and plotting confidence intervals (plot = "ci").

hist

logical: if TRUE (default), histograms are drawn when plotting bootstrap samples (plot = "boot").

binwidth

a numeric value or a function for specifying the binwidth argument in the geom_histogram function for controlling the width of the bins when plotting bootstrap samples (plot = "boot").

bins

a numeric value for specifying the bins argument in the geom_histogram function for controlling the number of bins when plotting bootstrap samples (plot = "boot").

hist.alpha

a numeric value between 0 and 1 for specifying the alpha argument in the geom_histogram function for controlling the opacity of the bars when plotting bootstrap samples (plot = "boot").

fill

a character string specifying the fill argument in the geom_histogram function controlling the fill aesthetic when plotting bootstrap samples (plot = "boot"). Note that this argument applied only when no grouping variable was specified group = NULL.

density

logical: if TRUE (default), density curves are drawn when plotting bootstrap samples (plot = "boot").

density.col

a character string specifying the color argument in the geom_density function controlling the color of the density curves when plotting bootstrap samples (plot = "boot"). Note that this argument applied only when no grouping variable was specified group = NULL.

density.linewidth

a numeric value specifying the linewidth argument in the geom_density function controlling the line width of the density curves when plotting bootstrap samples (plot = "boot").

density.linetype

a numeric value or character string specifying the linetype argument in the geom_density function controlling the line type of the density curves when plotting bootstrap samples (plot = "boot").

point

logical: if TRUE (default), vertical lines representing the point estimate of the variance or standard deviation are drawn when plotting bootstrap samples (plot = "boot").

point.col

a character string specifying the color argument in the geom_vline function for controlling the color of the vertical line displaying the variance or standard deviation when plotting bootstrap samples (plot = "boot"). Note that this argument applied only when no grouping variable was specified group = NULL.

point.linewidth

a numeric value specifying the linewdith argument in the geom_vline function for controlling the line width of the vertical line displaying the variance or standard deviation when plotting bootstrap samples (plot = "boot").

point.linetype

a numeric value or character string specifying the linetype argument in the geom_vline function controlling the line type of the vertical line displaying the variance or standard deviation when plotting bootstrap samples (plot = "boot").

ci

logical: if TRUE (default), vertical lines representing the bootstrap confidence intervals of the variance or standard deviation are drawn when plotting bootstrap samples (plot = "boot").

ci.col

character string specifying the color argument in the geom_vline function for controlling the color of the vertical line displaying bootstrap confidence intervals when plotting bootstrap samples (plot = "boot"). Note that this argument applied only when no grouping variable was specified group = NULL.

ci.linewidth

a numeric value specifying the linewdith argument in the geom_vline function for controlling the line width of the vertical line displaying bootstrap confidence intervals when plotting bootstrap samples (plot = "boot").

ci.linetype

a numeric value or character string specifying the linetype argument in the geom_vline function controlling the line type of the vertical line displaying bootstrap confidence intervals when plotting bootstrap samples (plot = "boot").

line

logical: if TRUE, a horizontal line is drawn when plot = "ci" or a vertical line is drawn when plot = "boot"

intercept

a numeric value indicating the yintercept or xintercept argument in the geom_hline or geom_vline function controlling the position of the horizontal or vertical line when plot = "ci" and line = TRUE or when plot = "boot" and line = TRUE. By default, the horizontal or vertical line is drawn at 0.

linetype

a character string indicating the linetype argument in the geom_hline or geom_vline function controlling the line type of the horizontal or vertical line (default is linetype = "dashed").

line.col

a character string indicating the color argument in the geom_hline or geom_vline function for controlling the color of the horizontal or vertical line.

xlab

a character string indicating the name argument in the scale_x_continuous function for labeling the x-axis. The default setting is xlab = NULL when plot = "ci" and xlab = "Variance" or xlab = "Standard Deviation" when plot = "boot".

ylab

a character string indicating the name argument in the scale_y_continuous function for labeling the y-axis. The default setting is ylab = "Variance" or ylab = "Standard Deviation" when plot = "ci" and ylab = "Probability Density, f(x)" when plot = "boot".

xlim

a numeric vector with two elements indicating the limits argument in the scale_x_continuous function for controlling the scale range of the x-axis. The default setting is xlim = NULL when plot = "ci" and xlim = c(-1, 1) when plot = "boot".

ylim

a numeric vector with two elements indicating the limits argument in the scale_y_continuous function for controlling the scale range of the y-axis. The default setting is ylim = c(-1, 1) when plot = "ci" and xlim = NULL when plot = "boot".

xbreaks

a numeric vector indicating the breaks argument in the scale_x_continuous function for controlling the x-axis breaks. The default setting is xbreaks = NULL when plot = "ci" and xbreaks = seq(-1, 1, by = 0.25) when plot = "boot".

ybreaks

a numeric vector indicating the breaks argument in the scale_y_continuous function for controlling the y-axis breaks. The default setting is ybreaks = seq(-1, 1, by = 0.25) when plot = "ci" and ybreaks = NULL when plot = "boot".

axis.title.size

a numeric value indicating the size argument in the element_text function for specifying the function controlling the font size of the axis title, i.e., theme(axis.title = element_text(size = axis.text.size)).

axis.text.size

a numeric value indicating the size argument in the element_text function for specifying the function controlling the font size of the axis text, i.e., theme(axis.text = element_text(size = axis.text.size)).

strip.text.size

a numeric value indicating the size argument in the element_text function for specifying the function controlling the font size of the strip text, i.e., theme(strip.text = element_text(size = strip.text.size)).

title

a character string indicating the title argument in the labs function for the subtitle of the plot.

subtitle

a character string indicating the subtite argument in the labs function for the subtitle of the plot.

group.col

a character vector indicating the color argument in the scale_color_manual and scale_fill_manual functions when specifying a grouping variable using the argument group.

plot.margin

a numeric vector with four elements indicating the plot.margin argument in the theme function controlling the plot margins . The default setting is c(5.5, 5.5, 5.5, 5.5), but switches to c(5.5, 5.5, -2.5, 5.5) when specifying a grouping variable using the argument group.

legend.title

a character string indicating the color argument in the labs function for specifying the legend title when specifying a grouping variable using the argument group.

legend.position

a character string indicating the legend.position in the theme argument for controlling the position of the legend function when specifying a grouping variable using the argument group. By default, the legend is placed at the bottom the plot.

legend.box.margin

a numeric vector with four elements indicating the legend.box.margin argument in the theme function for controlling the margins around the full legend area when specifying a grouping variable using the argument group.

facet.ncol

a numeric value indicating the ncol argument in the facet_wrap function for controlling the number of columns when specifying a split variable using the argument split.

facet.nrow

a numeric value indicating the nrow argument in the facet_wrap function for controlling the number of rows when specifying a split variable using the argument split.

facet.scales

a character string indicating the scales argument in the facet_wrap function for controlling the scales shared across facets, i.e., "fixed", "free_x", "free_y", or "free" (default) when specifying a split variable using the argument split.

filename

a character string indicating the filename argument including the file extension in the ggsave function. Note that one of ".eps", ".ps", ".tex", ".pdf" (default), ".jpeg", ".tiff", ".png", ".bmp", ".svg" or ".wmf" needs to be specified as file extension in the file argument. Note that plots can only be saved when plot = "ci" or plot = "boot".

width

a numeric value indicating the width argument (default is the size of the current graphics device) in the ggsave function.

height

a numeric value indicating the height argument (default is the size of the current graphics device) in the ggsave function.

units

a character string indicating the units argument (default is in) in the ggsave function.

dpi

a numeric value indicating the dpi argument (default is 600) in the ggsave function.

write

a character string naming a file for writing the output into either a text file with file extension ".txt" (e.g., "Output.txt") or Excel file with file extension ".xlsx" (e.g., "Output.xlsx"). If the file name does not contain any file extension, an Excel file will be written.

append

logical: if TRUE (default), output will be appended to an existing text file with extension .txt specified in write, if FALSE existing text file will be overwritten.

check

logical: if TRUE (default), argument specification is checked.

output

logical: if TRUE (default), output is shown on the console.

Author

Takuya Yanagida takuya.yanagida@univie.ac.at

Details

The confidence interval based on the chi-square distribution is computed by specifying method = "chisq", while the Bonett (2006) confidence interval is requested by specifying method = "bonett". By default, the Bonett confidence interval interval is computed which performs well under moderate departure from normality, while the confidence interval based on the chi-square distribution is highly sensitive to minor violations of the normality assumption and its performance does not improve with increasing sample size. Note that at least four valid observations are needed to compute the Bonett confidence interval.

References

Rasch, D., Kubinger, K. D., & Yanagida, T. (2011). Statistics in psychology - Using R and SPSS. John Wiley & Sons.

Canty, A., & Ripley, B. (2024). boot: Bootstrap R (S-Plus) Functions. R package version 1.3-31.

Bonett, D. G. (2006). Approximate confidence interval for standard deviation of nonnormal distributions. Computational Statistics and Data Analysis, 50, 775-782. https://doi.org/10.1016/j.csda.2004.10.003

See Also

ci.mean, ci.mean.diff, ci.median, ci.prop, ci.prop.diff, ci.cor, descript

Examples

Run this code
#----------------------------------------------------------------------------
# Confidence Interval (CI) for the Variance

# Example 1a: Two-Sided 95% CI
ci.var(mtcars)

# Example 1b: One-Sided 99% CI based on the chi-square distributio
ci.var(mtcars, alternative = "less", method = "chisq")

#----------------------------------------------------------------------------
# Confidence Interval (CI) for the Standard Deviation

# Example 2a: Two-Sided 95% CI
ci.sd(mtcars)

# Example 2b: One-Sided 99% CI based on the chi-square distributio
ci.sd(mtcars, alternative = "less", method = "chisq")

if (FALSE) {
#----------------------------------------------------------------------------
# Bootstrap Confidence Interval (CI)

# Example 3a: Bias-corrected (BC) percentile bootstrap CI
ci.var(mtcars, boot = "bc")

# Example 3b: Bias-corrected and accelerated (BCa) bootstrap CI,
# 5000 bootstrap replications, set seed of the pseudo-random number generator
ci.var(mtcars, boot = "bca", R = 5000, seed = 123)

#----------------------------------------------------------------------------
# Grouping and Split Variable

# Example 4a: Grouping variable
ci.var(mtcars, mpg, cyl, disp, group = "vs")

# Alternative specification without using the '...' argument
ci.var(mtcars[, c("mpg", "cyl", "disp")], group = mtcars$vs)

# Example 4b: Split variable
ci.var(mtcars, mpg, cyl, disp, split = "am")

# Alternative specification without using the '...' argument
ci.var(mtcars[, c("mpg", "cyl", "disp")], split = mtcars$am)

# Example 4c: Grouping and split variable
ci.var(mtcars, mpg, cyl, disp, group = "vs", split = "am")

# Alternative specification without using the '...' argument
ci.var(mtcars[, c("mpg", "cyl", "disp")], group = mtcars$vs, split = mtcars$am)

#----------------------------------------------------------------------------
# Write Output

# Example 5a: Text file
ci.var(mtcars, write = "CI_Var_Text.txt")

# Example 5b: Excel file
ci.var(mtcars, write = "CI_Var_Excel.xlsx")

#----------------------------------------------------------------------------
# Plot Confidence Intervals

# Example 6a: Two-Sided 95
ci.var(mtcars, plot = "ci")

# Example 6b: Grouping variable
ci.var(mtcars, disp, hp, group = "vs", plot = "ci")

# Example 6c: Split variable
ci.var(mtcars, disp, hp, split = "am", plot = "ci")

# Example 6d: Save plot as PDF file
ci.var(mtcars, disp, hp, plot = "ci", saveplot = "CI_Var.pdf",
       width = 9, height = 6)

# Example 6e: Save plot as PNG file
ci.var(mtcars, disp, hp, plot = "ci", saveplot = "CI_Var.png",
       width = 9, height = 6)

#----------------------------------------------------------------------------
# Plot Bootstrap Samples

# Example 7a: Two-Sided 95
ci.var(mtcars, disp, hp, boot = "bc", plot = "boot")

# Example 7b: Grouping variable
ci.var(mtcars, disp, hp, group = "vs", boot = "bc", plot = "boot")

# Example 7c: Split variable
ci.var(mtcars, disp, hp, split = "am", boot = "bc", plot = "boot")

# Example 7d: Save plot as PDF file
ci.var(mtcars, disp, hp, boot = "bc", plot = "boot",
       saveplot = "CI_Var_Boot.pdf", width = 12, height = 7)

# Example 7e: Save plot as PNG file
ci.var(mtcars, disp, hp, boot = "bc", plot = "boot",
       saveplot = "CI_Var_Boot.png", width = 12, height = 7)
}

Run the code above in your browser using DataLab