# Summarize a dataset by two variables
dfx <- data.frame(
group = c(rep('A', 8), rep('B', 15), rep('C', 6)),
sex = sample(c("M", "F"), size = 29, replace = TRUE),
age = runif(n = 29, min = 18, max = 54)
)
# Note the use of the '.' function to allow
# group and sex to be used without quoting
ddply(dfx, .(group, sex), summarize,
mean = round(mean(age), 2),
sd = round(sd(age), 2))
# An example using a formula for .variables
ddply(baseball[1:100,], ~ year, nrow)
# Applying two functions; nrow and ncol
ddply(baseball, .(lg), c("nrow", "ncol"))
# Calculate mean runs batted in for each year
rbi <- ddply(baseball, .(year), summarise,
mean_rbi = mean(rbi, na.rm = TRUE))
# Plot a line chart of the result
plot(mean_rbi ~ year, type = "l", data = rbi)
# make new variable career_year based on the
# start year for each player (id)
base2 <- ddply(baseball, .(id), mutate,
career_year = year - min(year) + 1
)
Run the code above in your browser using DataLab