ggplot(midwest, aes(state, area)) + geom_point()
# Boxplot and Violin plots convey information on the distribution but not the
# number of samples, while Jitter does the opposite.
ggplot(midwest, aes(state, area)) + geom_violin()
ggplot(midwest, aes(state, area)) + geom_jitter()
# Sina does both!
ggplot(midwest, aes(state, area)) + geom_violin() + geom_sina()
p <- ggplot(midwest, aes(state, popdensity)) + scale_y_log10()
p + geom_sina()
# Colour the points based on the data set's columns
p + geom_sina(aes(colour = inmetro))
# Or any other way
cols <- midwest$popdensity > 10000
p + geom_sina(colour = cols + 1L)
# Sina plots with continuous x:
p <- ggplot(midwest, aes(cut_width(area, 0.02), popdensity)) + scale_y_log10()
p + geom_sina()
###Sample gaussian distributions
# Unimodal
a <- rnorm(500, 6, 1)
b <- rnorm(400, 5, 1.5)
# Bimodal
c <- c(rnorm(200, 3, .7), rnorm(50, 7, 0.4))
# Trimodal
d <- c(rnorm(200, 2, 0.7), rnorm(300, 5.5, 0.4), rnorm(100, 8, 0.4))
df <- data.frame(
"Distribution" = c(rep("Unimodal 1", length(a)),
rep("Unimodal 2", length(b)),
rep("Bimodal", length(c)),
rep("Trimodal", length(d))),
"Value" = c(a, b, c, d))
# Reorder levels
df$Distribution <- factor(df$Distribution,
levels(df$Distribution)[c(3, 4, 1, 2)])
p <- ggplot(df, aes(Distribution, Value))
p + geom_boxplot()
p + geom_violin() + geom_sina()
# By default, Sina plot scales the width of the class according to the width
# of the class with the highest density. Turn group-wise scaling off with:
p + geom_violin() + geom_sina(scale = FALSE)
Run the code above in your browser using DataLab