Learn R Programming

sdcMicro (version 5.6.1)

sdcMicroObj-class: Class "sdcMicroObj"

Description

Class to save all information about the SDC process

Usage

createSdcObj(
  dat,
  keyVars,
  numVars = NULL,
  pramVars = NULL,
  ghostVars = NULL,
  weightVar = NULL,
  hhId = NULL,
  strataVar = NULL,
  sensibleVar = NULL,
  excludeVars = NULL,
  options = NULL,
  seed = NULL,
  randomizeRecords = FALSE,
  alpha = 1
)

undolast(object)

strataVar(object) <- value

# S4 method for sdcMicroObj,characterOrNULL strataVar(object) <- value

Value

a sdcMicroObj-class object

an object of class sdcMicroObj with modified slot @strataVar

Arguments

dat

The microdata set. A numeric matrix or data frame containing the data.

keyVars

Indices or names of categorical key variables. They must, of course, match with the columns of ‘dat’.

numVars

Index or names of continuous key variables.

pramVars

Indices or names of categorical variables considered to be pramed.

ghostVars

if specified a list which each element being a list of exactly two elements. The first element must be a character vector specifying exactly one variable name that was also specified as a categorical key variable (keyVars), while the second element is a character vector of valid variable names (that must not be listed as keyVars). If localSuppression or kAnon was applied, the resulting suppression pattern for each key-variable is transferred to the depending variables.

weightVar

Indices or name determining the vector of sampling weights.

hhId

Index or name of the cluster ID (if available).

strataVar

Indices or names of stratification variables.

sensibleVar

Indices or names of sensible variables (for l-diversity)

excludeVars

which variables of dat should not be included in result-object? Users may specify a vector of variable-names available in dat that were not specified in either keyVars, numVars, pramVars, ghostVars, hhId, strataVar or sensibleVar.

options

additional options (if specified, a list must be used as input)

seed

(numeric) number specifiying the seed which will be set to allow for reproducablity. The number will be rounded and saved as element seed in slot options.

randomizeRecords

(logical) if TRUE, the order of observations in the input microdata set will be randomized.

alpha

numeric between 0 and 1 specifying the fraction on how much keys containing NAs should contribute to the frequency calculation which is also crucial for risk-estimation.

object

a sdcMicroObj-class object

value

NULL or a character vector of length 1 specifying a valid variable name

Objects from the Class

Objects can be created by calls of the form new("sdcMicroObj", ...).

Author

Bernhard Meindl, Alexander Kowarik, Matthias Templ, Elias Rut

References

Templ, M. and Meindl, B. and Kowarik, A.: Statistical Disclosure Control for Micro-Data Using the R Package sdcMicro, Journal of Statistical Software, 67 (4), 1--36, 2015. tools:::Rd_expr_doi("10.18637/jss.v067.i04")

Examples

Run this code

showClass("sdcMicroObj")
if (FALSE) {
data(testdata)
sdc <- createSdcObj(testdata,
  keyVars=c('urbrur','roof','walls','water','electcon','relat','sex'),
  numVars=c('expend','income','savings'), w='sampling_weight')
head(sdc@manipNumVars)
### Display Risks
sdc@risk$global
sdc <- dRisk(sdc)
sdc@risk$numeric
### use addNoise without Parameters
sdc <- addNoise(sdc,variables=c("expend","income"))
head(sdc@manipNumVars)
sdc@risk$numeric
### undolast
sdc <- undolast(sdc)
head(sdc@manipNumVars)
sdc@risk$numeric
### redo addNoise with Parameter
sdc <- addNoise(sdc, noise=0.2)
head(sdc@manipNumVars)
sdc@risk$numeric
### dataGen
#sdc <- undolast(sdc)
#head(sdc@risk$individual)
#sdc@risk$global
#sdc <- dataGen(sdc)
#head(sdc@risk$individual)
#sdc@risk$global
### LocalSuppression
sdc <- undolast(sdc)
head(sdc@risk$individual)
sdc@risk$global
sdc <- localSuppression(sdc)
head(sdc@risk$individual)
sdc@risk$global
### microaggregation
sdc <- undolast(sdc)
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc <- microaggregation(sdc)
head(get.sdcMicroObj(sdc, type="manipNumVars"))
### pram
sdc <- undolast(sdc)
head(sdc@risk$individual)
sdc@risk$global
sdc <- pram(sdc,keyVar="water")
head(sdc@risk$individual)
sdc@risk$global
### rankSwap
sdc <- undolast(sdc)
head(sdc@risk$individual)
sdc@risk$global
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc <- rankSwap(sdc)
head(get.sdcMicroObj(sdc, type="manipNumVars"))
head(sdc@risk$individual)
sdc@risk$global
### suda2
sdc <- suda2(sdc)
sdc@risk$suda2
### topBotCoding
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc@risk$numeric
sdc <- topBotCoding(sdc, value=60000000, replacement=62000000, column="income")
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc@risk$numeric

### LocalRecProg
data(testdata2)
keyVars <- c("urbrur", "roof", "walls", "water", "sex")
w <- "sampling_weight"
sdc <- createSdcObj(testdata2,
  keyVars = keyVars,
  weightVar = w)
sdc@risk$global
sdc <- LocalRecProg(sdc)
sdc@risk$global
### model-based risks
#' formula
form <- as.formula(paste("~", paste(keyVars, collapse = "+")))
sdc <- modRisk(sdc, method = "default", formulaM = form)
get.sdcMicroObj(sdc, "risk")$model
sdc <- modRisk(sdc, method = "CE", formulaM = form)
get.sdcMicroObj(sdc, "risk")$model
sdc <- modRisk(sdc, method = "PLM", formulaM = form)
get.sdcMicroObj(sdc, "risk")$model
sdc <- modRisk(sdc, method = "weightedLLM", formulaM = form)
get.sdcMicroObj(sdc, "risk")$model
sdc <- modRisk(sdc, method = "IPF", formulaM = form)
get.sdcMicroObj(sdc, "risk")$model
}
## we can also specify ghost (linked) variables
## these variables are linked to some categorical key variables
## and have the sampe suppression pattern as the variable that they
## are linked to after \code{\link{localSuppression}} has been applied
data(testdata)
testdata$electcon2 <- testdata$electcon
testdata$electcon3 <- testdata$electcon
testdata$water2 <- testdata$water

keyVars <- c("urbrur","roof","walls","water","electcon","relat","sex")
numVars <- c("expend","income","savings")
w <- "sampling_weight"

## we want to make sure that some variables not used as key-variables
## have the same suppression pattern as variables that have been
## selected as key variables. Thus, we are using 'ghost'-variables.
ghostVars <- list()

## we want variables 'electcon2' and 'electcon3' to be linked
## to key-variable 'electcon'
ghostVars[[1]] <- list()
ghostVars[[1]][[1]] <- "electcon"
ghostVars[[1]][[2]] <- c("electcon2","electcon3")

if (FALSE) {
# dontrun because Examples with CPU time > 2.5 times elapsed time
## we want variable 'water2' to be linked to key-variable 'water'
ghostVars[[2]] <- list()
ghostVars[[2]][[1]] <- "water"
ghostVars[[2]][[2]] <- "water2"

## create the sdcMicroObj
obj <- createSdcObj(testdata, keyVars=keyVars,
  numVars=numVars, w=w, ghostVars=ghostVars)

## apply 3-anonymity to selected key variables
obj <- kAnon(obj, k=3); obj

## check, if the suppression patterns are identical
manipGhostVars <- get.sdcMicroObj(obj, "manipGhostVars")
manipKeyVars <- get.sdcMicroObj(obj, "manipKeyVars")
all(is.na(manipKeyVars$electcon) == is.na(manipGhostVars$electcon2))
all(is.na(manipKeyVars$electcon) == is.na(manipGhostVars$electcon3))
all(is.na(manipKeyVars$water) == is.na(manipGhostVars$water2))

## exclude some variables
obj <- createSdcObj(testdata, keyVars=c("urbrur","roof","walls"), numVars="savings",
   weightVar=w, excludeVars=c("relat","electcon","hhcivil","ori_hid","expend"))
colnames(get.sdcMicroObj(obj, "origData"))
}

Run the code above in your browser using DataLab