# NOT RUN {
# connection setup
(con <- connect())
# Basic usage - can use across all indices
res <- Search(con, time_scroll="1m")
scroll(con, res)$`_scroll_id`
# use on a specific index - and specify a query
res <- Search(con, index = 'shakespeare', q="a*", time_scroll="1m")
res$`_scroll_id`
# Setting "sort=_doc" to turn off sorting of results - faster
res <- Search(con, index = 'shakespeare', q="a*", time_scroll="1m",
body = '{"sort": ["_doc"]}')
res$`_scroll_id`
# Pass scroll_id to scroll function
scroll(con, res$`_scroll_id`)
# Get all results - one approach is to use a while loop
res <- Search(con, index = 'shakespeare', q="a*", time_scroll="5m",
body = '{"sort": ["_doc"]}')
out <- res$hits$hits
hits <- 1
while(hits != 0){
res <- scroll(con, res$`_scroll_id`, time_scroll="5m")
hits <- length(res$hits$hits)
if(hits > 0)
out <- c(out, res$hits$hits)
}
length(out)
res$hits$total
out[[1]]
# clear scroll
## individual scroll id
res <- Search(con, index = 'shakespeare', q="a*", time_scroll="5m",
body = '{"sort": ["_doc"]}')
scroll_clear(con, res$`_scroll_id`)
## many scroll ids
res1 <- Search(con, index = 'shakespeare', q="c*", time_scroll="5m",
body = '{"sort": ["_doc"]}')
res2 <- Search(con, index = 'shakespeare', q="d*", time_scroll="5m",
body = '{"sort": ["_doc"]}')
nodes_stats(con, metric = "indices")$nodes[[1]]$indices$search$open_contexts
scroll_clear(con, c(res1$`_scroll_id`, res2$`_scroll_id`))
nodes_stats(con, metric = "indices")$nodes[[1]]$indices$search$open_contexts
## all scroll ids
res1 <- Search(con, index = 'shakespeare', q="f*", time_scroll="1m",
body = '{"sort": ["_doc"]}')
res2 <- Search(con, index = 'shakespeare', q="g*", time_scroll="1m",
body = '{"sort": ["_doc"]}')
res3 <- Search(con, index = 'shakespeare', q="k*", time_scroll="1m",
body = '{"sort": ["_doc"]}')
scroll_clear(con, all = TRUE)
## sliced scrolling
body1 <- '{
"slice": {
"id": 0,
"max": 2
},
"query": {
"match" : {
"text_entry" : "a*"
}
}
}'
body2 <- '{
"slice": {
"id": 1,
"max": 2
},
"query": {
"match" : {
"text_entry" : "a*"
}
}
}'
res1 <- Search(con, index = 'shakespeare', time_scroll="1m", body = body1)
res2 <- Search(con, index = 'shakespeare', time_scroll="1m", body = body2)
scroll(con, res1$`_scroll_id`)
scroll(con, res2$`_scroll_id`)
out1 <- list()
hits <- 1
while(hits != 0){
tmp1 <- scroll(con, res1$`_scroll_id`)
hits <- length(tmp1$hits$hits)
if(hits > 0)
out1 <- c(out1, tmp1$hits$hits)
}
out2 <- list()
hits <- 1
while(hits != 0){
tmp2 <- scroll(con, res2$`_scroll_id`)
hits <- length(tmp2$hits$hits)
if(hits > 0)
out2 <- c(out2, tmp2$hits$hits)
}
c(
lapply(out1, "[[", "_source"),
lapply(out2, "[[", "_source")
)
# using jsonlite::stream_out
res <- Search(con, time_scroll = "1m")
file <- tempfile()
scroll(con,
x = res$`_scroll_id`,
stream_opts = list(file = file)
)
jsonlite::stream_in(file(file))
unlink(file)
## stream_out and while loop
(file <- tempfile())
res <- Search(con, index = "shakespeare", time_scroll = "5m",
size = 1000, stream_opts = list(file = file))
while(!inherits(res, "warning")) {
res <- tryCatch(scroll(
conn = con,
x = res$`_scroll_id`,
time_scroll = "5m",
stream_opts = list(file = file)
), warning = function(w) w)
}
NROW(df <- jsonlite::stream_in(file(file)))
head(df)
# }
Run the code above in your browser using DataLab