data(dietox)
dietox12 <- subset(dietox,Time==12)
summaryBy(Weight+Feed~Evit+Cu, data=dietox12,
FUN=c(mean,var,length))
summaryBy(list(c("Weight","Feed"), c("Evit","Cu")), data=dietox12,
FUN=c(mean,var,length))
summaryBy(Weight+Feed~Evit+Cu+Time, data=subset(dietox,Time>1),
FUN=c(mean,var,length))
## Calculations on transformed data:
summaryBy(log(Weight)+Feed~Evit+Cu, data=dietox12)
## Calculations on all numerical variables (not mentioned elsewhere):
summaryBy(.~Evit+Cu, data=dietox12,
id=~Litter, FUN=mean)
## There are missing values in the 'airquality' data, so we remove these
## before calculating mean and variance with 'na.rm=TRUE'. However the
## length function does not accept any such argument. Hence we get
## around this by defining our own summary function in which length is
## not supplied with this argument while mean and var are:
sumfun <- function(x, ...){
c(m=mean(x, ...), v=var(x, ...), l=length(x))
}
summaryBy(Ozone+Solar.R~Month, data=airquality, FUN=sumfun, na.rm=TRUE)
## Using '.' on the right hand side of a formula means to stratify by
## all variables not used elsewhere:
data(warpbreaks)
summaryBy(breaks ~ wool+tension, warpbreaks)
summaryBy(breaks ~., warpbreaks)
summaryBy(.~ wool+tension, warpbreaks)
## Keep the names of the variables (works only if FUN only returns one
## value):
summaryBy(Ozone+Wind~Month, data=airquality,FUN=c(mean),na.rm=TRUE,
keep.names=TRUE)
## Using full.dimension=TRUE
## Consider:
summaryBy(breaks~wool, data=warpbreaks)
## Rows of result are replicated below
summaryBy(breaks~wool, data=warpbreaks, full.dimension=TRUE)
## Notice: Previous result is effectively the same as
with(warpbreaks, ave(breaks, wool))
## A possible application of full.dimension=TRUE is if we want to
## standardize (center and scale) data within groups:
ss <- summaryBy(breaks~wool, data=warpbreaks, full.dimension=TRUE, FUN=c(mean,sd))
(warpbreaks$breaks-ss$breaks.mean)/ss$breaks.sd
Run the code above in your browser using DataLab