Learn R Programming

intubate (version 1.0.0)

survey: Interfaces for survey package for data science pipelines.

Description

Interfaces to survey functions that can be used in a pipeline implemented by magrittr.

Usage

ntbt_svyby(data, ...) ## data <-> design ntbt_svycoxph(data, ...) ## data <-> design ntbt_svydesign(data, ...) ntbt_svyglm(data, ...) ## data <-> design ntbt_svymean(data, ...) ## data <-> design ntbt_svyquantile(data, ...) ## data <-> design ntbt_svyratio(data, ...) ## data <-> design ntbt_svytotal(data, ...) ## data <-> design ntbt_twophase(data, ...)

Arguments

data
data frame, tibble, list, ...
...
Other arguments passed to the corresponding interfaced function.

Value

Object returned by interfaced function.

Details

Interfaces call their corresponding interfaced function.

Examples

Run this code
## Not run: 
# library(intubate)
# library(magrittr)
# library(survey)
# 
# ## svydesign
# data(api)
# ## Original function to interface
# # stratified sample
# dstrat <- svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
# # one-stage cluster sample
# dclus1 <- svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
# # two-stage cluster sample: weights computed from population sizes.
# dclus2 <- svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2)
# 
# ## The interface puts data as first parameter
# # stratified sample
# dstrat <- ntbt_svydesign(data=apistrat, id=~1,strata=~stype, weights=~pw, fpc=~fpc)
# # one-stage cluster sample
# dclus1 <- ntbt_svydesign(data=apiclus1, id=~dnum, weights=~pw, fpc=~fpc)
# # two-stage cluster sample: weights computed from population sizes.
# dclus2 <- ntbt_svydesign(data=apiclus2, id=~dnum+snum, fpc=~fpc1+fpc2)
# 
# ## so it can be used easily in a pipeline.
# dstrat <- apistrat %>%
#   ntbt_svydesign(id=~1,strata=~stype, weights=~pw, fpc=~fpc)
# # one-stage cluster sample
# dclus1 <- apiclus1 %>%
#   ntbt_svydesign(id=~dnum, weights=~pw, fpc=~fpc)
# # two-stage cluster sample: weights computed from population sizes.
# dclus2 <- apiclus2 %>%
#   ntbt_svydesign(id=~dnum+snum, fpc=~fpc1+fpc2)
# 
# ## twofase
# ## two-phase simple random sampling.
# data(pbc, package="survival")
# pbc$randomized <- with(pbc, !is.na(trt) & trt>0)
# pbc$id<-1:nrow(pbc)
# 
# ## Original function to interface
# d2pbc <- twophase(id=list(~id,~id), data=pbc, subset=~randomized)
# svymean(~bili, d2pbc)
# 
# ## The interface puts data as first parameter
# d2pbc <- ntbt_twophase(data=pbc, id=list(~id,~id), subset=~randomized)
# svymean(~bili, d2pbc)
# 
# ## so it can be used easily in a pipeline.
# d2pbc <- pbc %>%
#   ntbt_twophase(id=list(~id,~id), subset=~randomized)
# svymean(~bili, d2pbc)
# 
# 
# ## ntbt_svyby, ntbt_svyglm, ntbt_svymean,
# ## ntbt_svyquantile, ntbt_svyratio, ntbt_svytotal
# 
# ## From vignette of survey
# vars<-names(apiclus1)[c(12:13,16:23,27:37)] 
# 
# ## original
# dclus1 <- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
# summary(dclus1)
# ## direct call
# dclus1 <- apiclus1 %>%
#   ntbt(svydesign, id = ~dnum, weights = ~pw, fpc = ~fpc)
# summary(dclus1)
# ## interface
# dclus1 <- apiclus1 %>%
#   ntbt_svydesign(id = ~dnum, weights = ~pw, fpc = ~fpc)
# summary(dclus1)
# 
# ## original
# svymean(~api00, dclus1)
# ## direct call
# dclus1 %>%
#   ntbt(svymean, x=~api00)
# ## interface
# dclus1 %>%
#   ntbt_svymean(x=~api00)
# 
# ## original
# svyquantile(~api00, dclus1, quantile=c(0.25,0.5,0.75), ci=TRUE)
# ## direct call
# dclus1 %>%
#   ntbt(svyquantile, ~api00, quantile=c(0.25,0.5,0.75), ci=TRUE)
# ## interface
# dclus1 %>%
#   ntbt(svyquantile, ~api00, quantile=c(0.25,0.5,0.75), ci=TRUE)
# 
# ## original
# svytotal(~stype, dclus1)
# svytotal(~enroll, dclus1)
# ## direct call
# dclus1 %>%
#   ntbt(svytotal, ~stype)
# dclus1 %>%
#   ntbt(svytotal,~enroll)
# ## interface
# dclus1 %>%
#   ntbt(svytotal, ~stype)
# dclus1 %>%
#   ntbt(svytotal,~enroll)
# 
# ## original
# svyratio(~api.stu, ~enroll, dclus1)
# svyratio(~api.stu, ~enroll, design=subset(dclus1, stype=="H"))
# ## direct call
# dclus1 %>%
#   ntbt(svyratio, ~api.stu, ~enroll)
# dclus1 %>%
#   ntbt(svyratio, ~api.stu, ~enroll, design=subset("#", stype=="H"))
# ## interface
# dclus1 %>%
#   ntbt_svyratio(~api.stu, ~enroll)
# dclus1 %>%
#   ntbt_svyratio(~api.stu, ~enroll, design=subset("#", stype=="H"))
# 
# ## original
# svymean(make.formula(vars),dclus1,na.rm=TRUE)
# ## direct call
# dclus1 %>%
#   ntbt(svymean, make.formula(vars), na.rm=TRUE)
# ## interface
# dclus1 %>%
#   ntbt_svymean(make.formula(vars), na.rm=TRUE)
# 
# ## original
# svyby(~ell+meals, ~stype, design=dclus1, svymean)
# ## direct call
# dclus1 %>%
#   ntbt(svyby, ~ell+meals, ~stype, svymean)
# ## interface
# dclus1 %>%
#   ntbt_svyby(~ell+meals, ~stype, svymean)
# 
# ## original
# regmodel <- svyglm(api00~ell+meals, design=dclus1)
# summary(regmodel)
# logitmodel <- svyglm(I(sch.wide=="Yes")~ell+meals, design=dclus1,
#                      family=quasibinomial()) 
# summary(logitmodel)
# ## direct call
# dclus1 %>%
#   ntbt(svyglm, api00~ell+meals) %>%
#   summary()
# dclus1 %>%
#   ntbt(svyglm, I(sch.wide=="Yes")~ell+meals, family=quasibinomial()) %>%
#   summary()
# ## interface
# dclus1 %>%
#   ntbt_svyglm(api00~ell+meals) %>%
#   summary()
# dclus1 %>%
#   ntbt_svyglm(I(sch.wide=="Yes")~ell+meals, family=quasibinomial()) %>%
#   summary()
# 
# ## ntbt_svycoxph
# ## stratified on case status
# data(nwtco)
# ## original
# dcchs <- twophase(id=list(~seqno,~seqno), strata=list(NULL,~rel),
#                   subset=~I(in.subcohort | rel), data=nwtco)
# svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12), design=dcchs)
# ## direct call
# nwtco %>%
#   ntbt(twophase,id = list(~seqno,~seqno), strata = list(NULL,~rel),
#        subset = ~I(in.subcohort | rel)) %>%
#   ntbt(svycoxph, Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12))
# ## interface
# nwtco %>%
#   ntbt_twophase(id = list(~seqno,~seqno), strata = list(NULL,~rel),
#        subset = ~I(in.subcohort | rel)) %>%
#   ntbt_svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12))
# 
# ## Involved example using `intubOrders`, transforming the code in:
# 
# ## https://cran.r-project.org/web/packages/survey/vignettes/survey.pdf
# 
# data(api)
# 
# ## First, the original code from the vignette
# vars<-names(apiclus1)[c(12:13,16:23,27:37)] 
# 
# dclus1 <- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
# summary(dclus1)
# svymean(~api00, dclus1)
# svyquantile(~api00, dclus1, quantile=c(0.25,0.5,0.75), ci=TRUE)
# svytotal(~stype, dclus1)
# svytotal(~enroll, dclus1)
# svyratio(~api.stu,~enroll, dclus1)
# svyratio(~api.stu, ~enroll, design=subset(dclus1, stype=="H"))
# svymean(make.formula(vars),dclus1,na.rm=TRUE)
# svyby(~ell+meals, ~stype, design=dclus1, svymean)
# regmodel <- svyglm(api00~ell+meals,design=dclus1)
# logitmodel <- svyglm(I(sch.wide=="Yes")~ell+meals, design=dclus1, family=quasibinomial()) 
# summary(regmodel)
# summary(logitmodel)
# 
# ## Now using intubOrders and ntbt.
# 
# ## Strategy 1: long pipeline, light use of intubOrders.
# 
# apiclus1 %>%
#   ntbt(svydesign, id = ~dnum, weights = ~ pw, fpc = ~ fpc, "<|| summary >") %>%
#   ntbt(svymean, ~ api00, "<|f| print >") %>%
#   ntbt(svyquantile, ~ api00, quantile = c(0.25,0.5,0.75), ci = TRUE, "<|f| print >") %>%
#   ntbt(svytotal, ~ stype, "<|f| print >") %>%
#   ntbt(svytotal, ~ enroll, "<|f| print >") %>%
#   ntbt(svyratio, ~ api.stu, ~ enroll, "<|f| print >") %>%
#   ntbt(svyratio, ~ api.stu, ~ enroll, design = subset("#", stype == "H"), "<|f| print >") %>%
#   ntbt(svymean, make.formula(vars), na.rm = TRUE, "<|f| print >") %>%
#   ntbt(svyby, ~ ell + meals, ~ stype, svymean, "<|f| print >") %>%
#   ntbt(svyglm, api00 ~ ell + meals, "<|f| summary >") %>%
#   ntbt(svyglm, I(sch.wide == "Yes") ~ ell + meals, family = quasibinomial(), "<|f| summary >") %>%
#   summary() ## We have forwarded the result from svydesign (line 2),
#             ## so we could still continue using it downstream.
# 
# ## Strategy 2: short pipeline, heavy use of *one* intubOrder.
# apiclus1 %>%
#   ntbt(svydesign, id = ~dnum, weights = ~pw, fpc = ~fpc,
#        "<|f|
#          summary;
#          svymean(~api00, #);
#          svyquantile(~api00, #, quantile = c(0.25, 0.5, 0.75), ci = TRUE);
#          svytotal(~stype, #);
#          svytotal(~enroll, #);
#          svyratio(~api.stu,~enroll, #);
#          svyratio(~api.stu, ~enroll, design = subset(#, stype == 'H'));
#          svymean(make.formula(vars), #, na.rm = TRUE);
#          svyby(~ell+meals, ~stype, #, svymean);
#          summary(svyglm(api00~ell+meals, #));
#          summary(svyglm(I(sch.wide == 'Yes')~ell+meals, #, family = quasibinomial())) >") %>%
#   head()  ## We have forwarded the original dataset,
#           ## so we could continue using it downstream.
# ## End(Not run)

Run the code above in your browser using DataLab