# NOT RUN {
# First, load and attach the ggplot2 package.
#--------------------------------------------
library(ggplot2)
#==========
#---------------------
# 3 Independent Groups
#---------------------
# Example 1:
# Using the built-in data frame mtcars,
# create a stipchart of miles per gallon vs. number of cylinders
# using different colors for each level of the number of cylinders.
#------------------------------------------------------------------
p <- ggplot(mtcars, aes(x = factor(cyl), y = mpg, color = factor(cyl)))
p + geom_stripchart() +
labs(x = "Number of Cylinders", y = "Miles per Gallon")
#==========
# Example 2:
# Repeat Example 1, but include the results of the
# standard parametric analysis of variance.
#-------------------------------------------------
dev.new()
p + geom_stripchart(test.text = TRUE) +
labs(x = "Number of Cylinders", y = "Miles per Gallon")
# }
# NOT RUN {
#==========
# Example 3:
# Using Example 2, show explicitly the layering
# process that geom_stripchart is using.
#
# This plot should look identical to the previous one.
#-----------------------------------------------------
set.seed(47)
dev.new()
p + theme(legend.position = "none") +
geom_jitter(pch = 1, width = 0.15, height = 0) +
stat_summary(fun.y = "mean", geom = "point",
size = 2, position = position_nudge(x = 0.3)) +
stat_summary(fun.data = "mean_cl_normal", geom = "errorbar",
size = 0.75, width = 0.075, position = position_nudge(x = 0.3)) +
stat_n_text() +
stat_mean_sd_text() +
stat_test_text() +
labs(x = "Number of Cylinders", y = "Miles per Gallon")
#==========
# Example 4:
# Repeat Example 2, but put all text in a text box.
#--------------------------------------------------
dev.new()
p + geom_stripchart(text.box = TRUE, test.text = TRUE) +
labs(x = "Number of Cylinders", y = "Miles per Gallon")
#==========
# Example 5:
# Repeat Example 2, but put just the test results
# in a text box.
#------------------------------------------------
dev.new()
p + geom_stripchart(test.text = TRUE, test.text.box = TRUE) +
labs(x = "Number of Cylinders", y = "Miles per Gallon")
#==========
# Example 6:
# Repeat Example 2, but:
# 1) plot the median and IQR instead of the mean and the 95<!-- % CI, -->
# 2) show text for the median and IQR, and
# 3) use the nonparametric test to compare groups.
#
# Note that following what the ggplot2 stat_summary function
# does when you specify a "confidence interval" for the
# median (i.e., when you call stat_summary with the arguments
# geom="errorbar" and fun.data="median_hilow"), the displayed
# error bars show intervals based on estimated quuantiles.
# By default, stat_summary with the arguments
# geom="errorbar" and fun.data="median_hilow" displays
# error bars using the 2.5'th and 97.5'th percentiles.
# The function geom_stripchart, however, by default
# displays error bars using the 25'th and 75'th percentiles
# (see the explanation for the argument ci above).
#------------------------------------------------------------
dev.new()
p + geom_stripchart(location = "median", test.text = TRUE) +
labs(x = "Number of Cylinders", y = "Miles per Gallon")
#==========
# Clean up
#---------
graphics.off()
rm(p)
#========================================
#---------------------
# 2 Independent Groups
#---------------------
# Example 7:
# Repeat Example 2, but use only the groups with
# 4 and 8 cylinders.
#-----------------------------------------------
dev.new()
p <- ggplot(subset(mtcars, cyl %in% c(4, 8)),
aes(x = factor(cyl), y = mpg, color = cyl))
p + geom_stripchart(test.text = TRUE) +
labs(x = "Number of Cylinders", y = "Miles per Gallon")
#==========
# Example 8:
# Repeat Example 7, but
# 1) facet by transmission type
# 2) make the text smaller
# 3) put the text for the test results in a text box
# and make them blue.
dev.new()
p + geom_stripchart(test.text = TRUE, test.text.box = TRUE,
n.text.params = list(size = 3),
location.scale.text.params = list(size = 3),
test.text.params = list(size = 3, color = "blue")) +
facet_wrap(~ am, labeller = label_both) +
labs(x = "Number of Cylinders", y = "Miles per Gallon")
#==========
# Clean up
#---------
graphics.off()
rm(p)
#========================================
#---------------------
# 2 Independent Groups
#---------------------
# Example 9:
# The guidance document USEPA (1994b, pp. 6.22--6.25)
# contains measures of 1,2,3,4-Tetrachlorobenzene (TcCB)
# concentrations (in parts per billion) from soil samples
# at a Reference area and a Cleanup area. These data are strored
# in the data frame EPA.94b.tccb.df.
#
# First create one-dimensional scatterplots to compare the
# TcCB concentrations between the areas and use a nonparametric
# test to test for a difference between areas.
dev.new()
p <- ggplot(EPA.94b.tccb.df, aes(x = Area, y = TcCB, color = Area))
p + geom_stripchart(location = "median", test.text = TRUE) +
labs(y = "TcCB (ppb)")
#==========
# Example 10:
# Now log-transform the TcCB data and use a parametric test
# to compare the areas.
dev.new()
p <- ggplot(EPA.94b.tccb.df, aes(x = Area, y = log10(TcCB), color = Area))
p + geom_stripchart(test.text = TRUE) +
labs(y = "log10[ TcCB (ppb) ]")
#==========
# Example 11:
# Repeat Example 10, but allow the variances to differ
# between Areas.
#-----------------------------------------------------
dev.new()
p + geom_stripchart(test.text = TRUE,
test.text.params = list(test.arg.list = list(var.equal=FALSE))) +
labs(y = "log10[ TcCB (ppb) ]")
#==========
# Clean up
#---------
graphics.off()
rm(p)
#========================================
#--------------------
# Paired Observations
#--------------------
# Example 12:
# The data frame ACE.13.TCE.df contians paired observations of
# trichloroethylene (TCE; mg/L) at 10 groundwater monitoring wells
# before and after remediation.
#
# Create one-dimensional scatterplots to compare TCE concentrations
# before and after remediation and use a paired t-test to
# test for a difference between periods.
ACE.13.TCE.df
# TCE.mg.per.L Well Period
#1 20.900 1 Before
#2 9.170 2 Before
#3 5.960 3 Before
#... ...... .. ......
#18 0.520 8 After
#19 3.060 9 After
#20 1.900 10 After
dev.new()
p <- ggplot(ACE.13.TCE.df, aes(x = Period, y = TCE.mg.per.L, color = Period))
p + geom_stripchart(paired = TRUE, group = "Well", test.text = TRUE) +
labs(y = "TCE (mg/L)")
#==========
# Example 13:
# Repeat Example 11, but use a one-sided alternative since
# remediation should decrease TCE concentration.
#---------------------------------------------------------
dev.new()
p + geom_stripchart(paired = TRUE, group = "Well", test.text = TRUE,
test.text.params = list(test.arg.list = list(alternative="less"))) +
labs(y = "TCE (mg/L)")
#==========
# Clean up
#---------
graphics.off()
rm(p)
#========================================
#----------------------------------------
# Paired Observations, Nonparametric Test
#----------------------------------------
# Example 14:
# The data frame Helsel.Hirsch.02.Mayfly.df contains paired counts
# of mayfly nymphs above and below industrial outfalls in 12 streams.
#
# Create one-dimensional scatterplots to compare the
# counts between locations and use a nonparametric test
# to compare counts above and below the outfalls.
Helsel.Hirsch.02.Mayfly.df
# Mayfly.Count Stream Location
#1 12 1 Above
#2 15 2 Above
#3 11 3 Above
#... ... .. .....
#22 60 10 Below
#23 53 11 Below
#24 124 12 Below
dev.new()
p <- ggplot(Helsel.Hirsch.02.Mayfly.df,
aes(x = Location, y = Mayfly.Count, color = Location))
p + geom_stripchart(location = "median", paired = TRUE,
group = "Stream", test.text = TRUE) +
labs(y = "Number of Mayfly Nymphs")
#==========
# Clean up
#---------
graphics.off()
rm(p)
# }
Run the code above in your browser using DataLab