# NOT RUN {
# read the data
mydata <- rd("Employee", format="lessR", quiet=TRUE)
mydata <- Subset(random=.4, quiet=TRUE) # less computationally intensive
# many examples commented out to reduce CPU time for the CRAN submission
#---------------------------------------------------
# traditional scatterplot with two numeric variables
#---------------------------------------------------
# scatterplot with all defaults
Plot(Years, Salary)
# or use abbreviation sp in place of Plot
# new shape and point size, no grid or background color
# Plot(Years, Salary, size=2, shape="diamond", bg.fill="off"="off")
# bubble plot with size determined by the value of Pre
# display the value for the bubbles with values of min, median and max
# Plot(Years, Salary, size=Pre, size.cut=3)
# plot 0.95 data ellipse with the points identified that represent
# the 0.10 largest Mahalanobis distances (i.e., potential outliers)
# Plot(Years, Salary, ellipse=0.95, ID.cut=0.1)
# variables of interest are in a data frame not the default mydata
# plot 0.6 and 0.9 data ellipses
# change color theme to gold with black background
style("gold", sub.theme="black")
Plot(eruptions, waiting, ellipse=seq(.6,.9), data=faithful)
# translucent data ellipses without points or edges showing the
# idealized joint distribution assuming bivariate normality
Plot(Years, Salary, size=0, ellipse=seq(.1,.9,.10), ellipse.stroke="off")
# scatterplot with two x-variables, plotted against Salary
# define a completely new style, then back to default
# style(device.fill=rgb(247,242,230, maxColorValue=255),
# bg.fill="off", bg.stroke="off", pt.fill="black", trans=0,
# lab.stroke="black", values.stroke="black",
# axis.y.stroke="off", grid.x.stroke="off", grid.y.stroke="black",
# grid.lty="dotted", grid.lwd=1)
# Plot(c(Pre, Post), Salary)
# style("lightbronze")
# increase span (smoothing) from default of .7 to 1.25
# span is a loess parameter and generates a caution that can be
# ignored that it is not a graphical parameter -- we know that
# Plot(Years, Salary, fit="loess", span=1.25)
# 2-D kernel density (more useful for larger sample sizes)
# Plot(Years, Salary, smoothed=TRUE)
#------------------------------------------------------
# scatterplot matrix from a vector of numeric variables
#------------------------------------------------------
# with least squares fit line and color options
Plot(c("Salary", "Years", "Pre"), fit="ls", bg.fill="powderblue", fit.stroke="red")
#--------------------------------------------------------------
# Trellis graphics and by for groups with two numeric variables
#--------------------------------------------------------------
# Trellis plot with condition on 1-variable
Plot(Years, Salary, by1=Dept)
# Trellis plot with condition on 2-variables and groups
Plot(Years, Salary, by1=Dept, by2=Gender, fit="ls", by=HealthPlan)
# vary both shape and color with a least-squares fit line for each group
# Plot(Years, Salary, by1=Gender, fit="ls",
# color=c("darkgreen", "brown"), shape=c("F","M"), size=.8)
# compare the men and women Salary according to Years worked
# Plot(Years, Salary, by=Gender, ellipse=.50)
#--------------------------------------------------
# analysis of a single numeric variable (or vector)
#--------------------------------------------------
# 1-variable scatterplots
# ------------------------
# 1-variable scatterplot, continuous
# custom colors for outliers
Plot(Salary, out15="hotpink", out30="darkred")
# one variable scatterplot with added jitter of points and a boxplot
# Plot(Salary, method="jitter", boxplot=TRUE)
# binned values to plot counts
# ----------------------------
# bin the values of Salary to plot counts as a frequency polygon
# Plot(Salary, values="count") # bin the values
# time charts
#------------
# run chart, with fill area
Plot(Salary, run=TRUE, area="steelblue")
# two run charts in same plot
# or could do a multivariate time series
# Plot(c(Pre, Post), run=TRUE)
# Trellis graphics run chart with custom line width
# Plot(Salary, run=TRUE, by1=Gender, lwd=3)
# daily time series plot
# create the daily time series from R built-in data set airquality
# oz.ts <- ts(airquality$Ozone, start=c(1973, 121), frequency=365)
# Plot(oz.ts)
# multiple time series plotted from dates and stacked
# black background with translucent areas
# date <- seq(as.Date("2013/1/1"), as.Date("2016/1/1"), by1="quarter")
# x1 <- rnorm(13, 100, 15)
# x2 <- rnorm(13, 100, 15)
# x3 <- rnorm(13, 100, 15)
# df <- data.frame(date, x1, x2, x3)
# Plot(date, x1:x3, data=df, area="steelblue3", stroke="steelblue2",
# trans=.55, bg.fill="gray10"="gray25")
#------------------------------------------
# analysis of a single categorical variable
#------------------------------------------
# default 1-D bubble plot
# frequency plot, replaces bar chart
Plot(Dept)
# abbreviated category labels
# Plot(Dept, label.max=2)
# plot of frequencies for each category (level), replaces bar chart
# Plot(Dept, values="count")
#----------------------------------------------------
# scatterplot of numeric against categorical variable
#----------------------------------------------------
# generate a chart with the plotted mean of each level
# rotate x-axis labels and then offset to fit
Plot(Dept, Salary, rotate.x=45, offset=1)
#-------------------
# Cleveland dot plot
#-------------------
# row.names on the y-axis
Plot(Salary, row.names)
# standard scatterplot
# Plot(Salary, row.names, sort.yx=FALSE, segments.y=FALSE="on")
# Cleveland dot plot with two x-variables
# Plot(c(Pre, Post), row.names)
#------------
# annotations
#------------
# add text at the one location specified by x1 and x2
# Plot(Years, Salary, add="Hi There", x1=12, y1=80000)
# add text at three different specified locations
# Plot(Years, Salary, add="Hi There", x1=c(12, 16, 18), y1=c(80000, 100000, 60000))
# add three different text blocks at three different specified locations
# Plot(Years, Salary, add=c("Hi", "Bye", "Wow"), x1=c(12, 16, 18), y1=c(80000, 100000, 60000))
# add an 0.95 data ellipse and horizontal and vertical lines through the
# respective means
Plot(Years, Salary, ellipse=TRUE, add=c("v.line", "h.line"),
x1="mean.x", y1="mean.y")
# can be done also with the following short-hand
# Plot(Years, Salary, ellipse=TRUE, add=c("means"))
# a rectangle requires two points, <x1,y1> and <x2,y2>
# Plot(Years, Salary, add="rect", x1=12, y1=80000, x2=16, y2=115000,
# add.trans=.8, add.fill="gold", add.stroke="gold4", add.lwd=0.5)
# the first object, a rectangle, requires all four coordinates
# the vertical line at x=2 requires only an x1 coordinate, listed 2nd
# Plot(Years, Salary, add=c("rect", "v.line"), x1=c(10, 2), y1=80000, x2=12, y2=115000)
# two different rectangles with different locations, fill colors and translucence
# Plot(Years, Salary, add=c("rect", "rect"),
# x1=c(10, 2), y1=c(60000, 45000), x2=c(12, 75000), y2=c(80000, 55000),
# add.fill=c("gold3", "green"), add.trans=c(.8,.4))
#----------------------------------------------------
# analysis of two categorical variables (Likert data)
#----------------------------------------------------
mydata <- rd("Mach4", format="lessR", quiet=TRUE) # Likert data, 0 to 5
mydata <- Subset(random=.4, quiet=TRUE) # less computationally intensive
# size of each plotted point (bubble) depends on its joint frequency
# triggered by default when < n.cat=8 unique values for each variable
Plot(m06, m07)
# use value labels for the integer values, modify color options
LikertCats <- c("Strongly Disagree", "Disagree", "Slightly Disagree",
"Slightly Agree", "Agree", "Strongly Agree")
# Plot(m06, m07, value.labels=LikertCats,
# fill="powderblue", stroke="blue", bubble.text="darkred")
# get correlation analysis instead of cross-tab analysis:
# maximum number of categories of equally spaced integer values
# to define a variable as categorical here specified as 0
Plot(m06, m07, n.cat=0)
# proportions within each level of the other variable
# Plot(m06, m07, proportion=TRUE)
#-----------------------------
# Bubble Plot Frequency Matrix
#-----------------------------
Plot(c(m06,m07,m09,m10), value.labels=LikertCats)
#---------------
# function curve
#---------------
# x <- seq(10,50,by=2)
# y1 <- sqrt(x)
# y2 <- x**.33
# x is sorted with equal intervals so run chart by default
# Plot(x, y1)
# custom function plot
# Plot(x, y1, ylab="My Y", xlab="My X", main="My Curve", stroke="blue",
# bg.fill="snow", area="lightsteelblue")
# multiple plots, need data frame
# mydata <- data.frame(x, y1, y2)
# Plot(x, c(y1, y2))
#-----------
# modern art
#-----------
# clr <- colors()
# clr[-(153:353)] # get rid of most of the grays
# n <- sample(2:30, size=1)
# x <- rnorm(n)
# y <- rnorm(n)
# color1 <- clr[sample(1:length(clr), size=1)]
# color2 <- clr[sample(1:length(clr), size=1)]
# Plot(x, y, run=TRUE, area=color1, stroke=color2,
# xy.ticks=FALSE, main="Modern Art", xlab="", ylab="",
# cex.main=2, col.main="lightsteelblue", n.cat=0)
# }
Run the code above in your browser using DataLab