## EXAMPLE 1:
## A cross-sectional study of Australian thoroughbred race horses was
## carried out. The sampling frame for this study comprised all horses
## registered with Racing Australia in 2017 -- 2018. A random sample of horses
## was selected from the sampling frame and the owners of each horse
## invited to take part in the study. Counts of source population horses
## and study population horses are provided below. How well did the geographic
## distribution of study population horses match the source population?
state <- c("NSW","VIC","QLD","WA","SA","TAS","NT","Abroad")
srcp <- c(11372,10722,7371,4200,2445,1029,510,101)
stup <- c(622,603,259,105,102,37,22,0)
dat.df01 <- data.frame(state, srcp, stup)
epi.psi(dat.df01, itno = 99, conf.level = 0.95)
## The proportional similarity index for these data was 0.88 (95% CI 0.86 to
## 0.90). We conclude that the distribution of sampled horses by state
## was consistent with the distribution of the source population by state.
if (FALSE) {
## Compare the relative frequencies of the source and study populations
## by state graphically:
library(ggplot2)
dat.df01$psrcp <- dat.df01$srcp / sum(dat.df01$srcp)
dat.df01$pstup <- dat.df01$stup / sum(dat.df01$stup)
dat.df01 <- dat.df01[sort.list(dat.df01$psrcp),]
dat.df01$state <- factor(dat.df01$state, levels = dat.df01$state)
## Data frame for ggplot2:
gdat.df01 <- data.frame(state = rep(dat.df01$state, times = 2),
pop = c(rep("Source", times = nrow(dat.df01)),
rep("Study", times = nrow(dat.df01))),
pfreq = c(dat.df01$psrcp, dat.df01$pstup))
gdat.df01$state <- factor(gdat.df01$state, levels = dat.df01$state)
## Bar chart of relative frequencies by state faceted by population:
ggplot(data = gdat.df01, aes(x = state, y = pfreq)) +
geom_bar(stat = "identity", position = position_dodge(), color = "grey") +
facet_grid(~ pop) +
scale_x_discrete(name = "State") +
scale_y_continuous(limits = c(0,0.50), name = "Proportion")
}
Run the code above in your browser using DataLab