# NOT RUN {
require(ff)
require(survival)
n <- 1e6
d <- data.frame(x=rnorm(n), y=sample(0:1, n, TRUE),
i=as.Date('2013-01-02'), S=Surv(runif(n)),
z=factor(sample(1:3, n, TRUE), 1:3,
c('elephant','giraffe','dog')))
## Cannot have labels for variables; ff will reject as non-atomic vectors
storage.mode(d$y)
object.size(d)
n * (8 + 4 + 4 + 4)
f <- as.ffdf(d, vmode=c('single', 'quad', 'integer', 'single', 'quad'))
vmode(f)
n * (4 + 0.25 + 4 + 0.25)
object.size(as.data.frame(f))
f[1:10,]
hist(d[,'x'] - f[,'x'], nclass=100)
table(d[,'z'], f[,'z'])
system.time(subset(f, z == 'dog'))
system.time({i <- ffwhich(f, z == 'dog'); f[i,]})
table(subset(f, z == 'dog')[,'z'])
class(subset(f, z == 'dog'))
ffsave(f, file='/tmp/f') # creates /tmp/f.ffData /tmp/f.RData
## To load: ffload('/tmp/f')
d <- upData(d, labels=c(y='Y'), units=c(z='units z'))
f <- ffCompress(d)
vmode(f)
load('ras.rda') # dataset is not available
r <- ffCompress(ras)
vmode(r)
attr(r, 'label')
attr(r, 'units')
all.equal(ras, as.data.frame(r))
dr <- as.data.frame(r)
g <- function(x) names(attributes(x))
nam <- names(dr)
for(i in 1 : ncol(dr)) {
a <- ras[[i]]
b <- dr[[i]]
cat(nam[i], '\n')
cat(g(a), '\n', g(b), '\n')
cat(max(w <- abs(unclass(a) - unclass(b)), na.rm=TRUE), '\n')
if(nam[i] == 'ldl') {
j <- which.max(abs(w))
cat(a[j], b[j], '\n')
}
}
dr <- as.data.frame(r)
xless(contents(dr))
xless(contents(r[1:10,]))
xless(contents(r[,1:10]))
table(r[, 'gender'])
## subset invokes [] so uses method from ffdflabel
m <- subset(r, gender == 'Male')
class(m)
dim(m)
attr(m, 'label')
attributes(m[,'age'])
df <- as.data.frame(m)
class(df$age)
label(df$age)
## But if subset again things are not OK
k <- subset(m, age < 3)
class(k)
contents(k[, 'age', drop=FALSE])
invisible(ffsave(r, file='/tmp/r'))
## w <- read.csv.ffdf(file='/tmp/data.csv', first.rows=10000)
## table(vmode(w))
## From ff manual: vmode definitions
# boolean 1 bit logical without NA
# logical 2 bit logical with NA
# quad 2 bit unsigned integer without NA
# nibble 4 bit unsigned integer without NA
# byte 8 bit signed integer with NA
# ubyte 8 bit unsigned integer without NA
# short 16 bit signed integer with NA
# ushort 16 bit unsigned integer without NA
# integer 32 bit signed integer with NA
# single 32 bit float
# double 64 bit float
# complex 2x64 bit float
# raw 8 bit unsigned char
# character character
# }
Run the code above in your browser using DataLab