Learn R Programming

SOR (version 0.23.1)

sor: Sequentially Offsetted Regression

Description

Fits model for data which was sampled based on a variable associated with the outcome. This function works for binary, count, and continuous responses.

Usage

sor(y.formula,
              	w1.formula,
                w2.formula = ~1,
                id,
                waves = NULL,
                family = "binomial",
                y0 = 0,
                hfunc = identity, 
                support = c(0,1),
                pi1.pi0.ratio = 1,  
                data = parent.frame(),
                init.beta=NULL,
                init.sig.2 = 1,
                weights=NULL,
                est.var = TRUE,
                CORSTR="independence")

Arguments

y.formula

Regression formula for response

w1.formula

Formula for Z, not interacted with hfunc(Y). Of form Z~terms

w2.formula

Formula for Z, interacted with hfunc(Y). Of form ~terms

id

a vector identifying the clusters. By default, data are assumed to be sorted such that observations in a cluster are in consecutive rows and higher numbered rows in a cluster are assumed to be later. If NULL, then each observation is assigned its own cluster.

waves

an integer vector identifying components of a cluster. For example, this could be a time ordering. If integers are skipped within a cluster, then dummy rows with weight 0 are added in an attempt to preserve the correlation structure (except if corstr = "exchangeable" or "independent"). This can be skipped by setting nodummy=TRUE.

family

Character string representing reference distribution for the response. Can be one of "normal", "poisson", or "binomial".

y0

Representative value of response. Ignored if family="binomial".

hfunc

Function h, used with Y. Set to identity if family="binomial".

support

Values on which to evaluate the integrals. The lowest value should be less than the minimum response and the highest should be higher than the maximum response. If response is binary, support should be c(0,1). If response is count data, support should be an integer vector, for instance 0:50. If response is continuous, support should be a vector of points on which to integrate.

pi1.pi0.ratio

The referral ratio

data

Data frame or environment with all the data

init.beta

Initial values for parameters in y.formula. Convergence may depend heavily on the initial values used. If family="binomial", the default is recommended.

init.sig.2

Initial value for sigma^2. Only for family="normal".

weights

A vector of weights for each observation. If an observation has weight 0, it is excluded from the calculations of any parameters. Observations with a NA anywhere (even in variables not included in the model) will be assigned a weight of 0. This should normally be used to preserve the correlation structure.

est.var

Logical. Should the variance be estimated. Only for family="normal".

CORSTR

Correlation structure

Value

Returns a list with values from the fit.

References

This package relies heavily on code from geeM:

McDaniel, L. S., Henderson, N. C., & Rathouz, P. J. (2013). Fast pure R implementation of GEE: application of the matrix package. The R journal, 5(1), 181.

Examples

Run this code
# NOT RUN {
generatedata <- function(beta,alpha,X,ntime,nsubj, betat, betat1)  {
  
  mean.vec <- exp(crossprod(t(X), beta))
  y <- matrix(0,nrow=nsubj, ncol=ntime)
  y[,1] <- rpois(nsubj ,lambda = mean.vec)
  old.mean <- mean.vec
  new.mean <- old.mean*exp(betat + betat1*X[,2])
  for (t in 1:(ntime-1))  {
    lambda.t <- new.mean - alpha*sqrt(old.mean*new.mean)
    theta.t <- alpha*sqrt(new.mean/old.mean)
    I <- rpois(nsubj, lambda = lambda.t)
    W <- rbinom(nsubj, y[,t], theta.t)
    
    y[,t+1] = W + I
    old.mean <- new.mean
    new.mean <- old.mean*exp(betat + betat1*X[,2])
  }
  longform <- c(t(y))
  time <- rep(1:ntime,times=nsubj)
  subject <- rep(c(1:nsubj),each=ntime)
  
  simdata <- data.frame(count = longform, time = time, subject=subject)
  return(simdata)
}
logit <- function(p) log(p)-log(1-p)
expit <- function(x) exp(x)/(1+exp(x))
set.seed(1)

npop <- 10000
beta0 <- -1.4
beta1 <- 0.4
alpha <- 0.9
gam0 <- -3.15
gam1 <- 6.3
nsubj <- 200
ntime <- 8
betat <- -0.1; betat1 <- 0.1
thresh <- 1

x0 <- rep(1, npop)
x1 <- rbinom(npop, 1, 0.5)

Xmat <- cbind(x0, x1)
timevec <- 0:(ntime-1)

testdat <- generatedata(c(beta0, beta1), alpha, Xmat, ntime, npop, betat = betat, betat1 = betat1)
Y <- matrix(testdat$count, nrow=npop, ncol=ntime, byrow=TRUE)
lambdap <- expit(gam0 + gam1*as.numeric(Y[,1]>=thresh))
Z <- rbinom(npop, 1, lambdap)

casesamp <- rep(0, npop)
casesamp[Z==1] <- rbinom(sum(Z), 1, nsubj/(2*sum(Z)))
controlsamp <- rep(0, npop)
controlsamp[Z==0] <- rbinom(sum(1-Z), 1, nsubj/(2*sum(1-Z)))


case <- which(casesamp==1)
control <- which(controlsamp==1)
id <- sort(c(case, control))
nsubj <- length(control) + length(case)
Ysamp <- NULL
lamsamp <- NULL
zsamp <- NULL
x1samp <- NULL
idsamp <- NULL
time <- NULL
obspersubj <- sample(3:ntime, size=nsubj, replace=TRUE)
for(i in 1:nsubj){
  Ysamp <- c(Ysamp, Y[id[i],1:obspersubj[i]])
  zsamp <- c(zsamp, rep(as.numeric(Z[id[i]]), obspersubj[i]))
  x1samp <- c(x1samp, rep(x1[id[i]], obspersubj[i]))
  time <- c(time, 0:(obspersubj[i]-1))
  idsamp <- c(idsamp, rep(i, obspersubj[i]))
}
p1p0 <- sum((1-Z))/sum(Z)

timemax <- pmax(time-2, 0)
y0 <- 1
betas <- c(beta0, beta1, betat, betat1)
init <-  runif(4, betas -0.1, betas + 0.1)

y.formula <- y~x1+time + x1:time
w1 <- z ~ x1+ as.factor(time) +  x1:time + x1:timemax 
w2 <- ~x1 + time + timemax + x1:time + x1:timemax 

DAT.ods <- data.frame("x1"= x1samp, "time" = time,
                      "timemax" = timemax, "z" = zsamp, "y" = Ysamp, "id" = idsamp)

sor(y.formula, w1, w2, id, family="poisson",y0=1,
    support=0:25, pi1.pi0.ratio=p1p0, data=DAT.ods, init.beta=init, CORSTR="ar1")



# }

Run the code above in your browser using DataLab