Learn R Programming

rpg (version 1.6)

cursor: Iterator support

Description

Construct a row iterator

Usage

cursor(sql, by = 1, pars = NULL)

Arguments

sql

any valid query returning rows

by

how many rows to return each iteration

pars

optional query parameters

Details

This function generates an interator object that can be used with the foreach-package.

It is possible to use the %dopar% operator as shown in the example below. You must establish a connection to the database on each node and in your current session because the call to cursor requires it. Note that the cursor's lifetime is the current transaction block, so if anything happens to the transaction or you call END or ROLLBACK, then the cursor will no longer function. Apparently a named SQL cursor is visible to any database session, as evidenced by the example below, even though it is declared within a transaction. This is not stated explicitely in the PostgreSQL documentation.

See Also

foreach, rollback, query

Examples

Run this code
# NOT RUN {
# example requires foreach
if (! require(foreach, quietly = TRUE))
 stop("This example requires the \'foreach\' package")

# connect using defaults
createdb("rpgtesting")
connect("rpgtesting")
begin()
 
# write data frame contents
data(mtcars)
write_table(mtcars, row_names = "id", pkey = "id", overwrite = TRUE)

# expand rows to columns 8 rows at a time
x = foreach(i = cursor("SELECT * FROM mtcars", by = 8),
            .combine = rbind) %do% { i$mpg }
print(x, digits = 2)
        
# parallel example
if (require(doParallel, quietly = TRUE))
{
 # make the cluster
 cl = makeCluster(2)
 
 # must connect to database on each node
 clusterEvalQ(cl, library(rpg))
 clusterEvalQ(cl, connect("rpgtesting"))
 clusterEvalQ(cl, begin())
 
 # setup the dopar call
 registerDoParallel(cl)
 
 # take column averages 4 rows at a time
 curs1 = cursor("SELECT * FROM mtcars", by = 4)
 x = foreach(i = curs1, .combine = rbind, .inorder = FALSE) %dopar%
 {
   rr = paste0(range(abbreviate(i$id)), collapse = "-")
   pid = get_conn_info("server.pid")
   j = names(i) != "id"
   mn = signif(apply(i[, j], 2, mean), 2)
   c(rows = rr, backend = pid, mn)
 }
 x = as.data.frame(x)
 row.names(x) = x$rows
 x$rows = NULL
 print(noquote(x))
 
 clusterEvalQ(cl, rollback())
 stopCluster(cl)
}

#cleanup
disconnect()
dropdb("rpgtesting")
# }
# NOT RUN {
# }

Run the code above in your browser using DataLab