# NOT RUN {
# From a data.frame
ff <- tempfile(fileext = ".json")
docs_bulk_prep(mtcars, index = "hello", path = ff)
readLines(ff)
## field names cannot contain dots
names(iris) <- gsub("\\.", "_", names(iris))
docs_bulk_prep(iris, "iris", path = tempfile(fileext = ".json"))
## type can be missing, but index can not
docs_bulk_prep(iris, "flowers", path = tempfile(fileext = ".json"))
# From a list
docs_bulk_prep(apply(iris, 1, as.list), index="iris",
path = tempfile(fileext = ".json"))
docs_bulk_prep(apply(USArrests, 1, as.list), index="arrests",
path = tempfile(fileext = ".json"))
# when chunking
## multiple files created, one for each chunk
bigiris <- do.call("rbind", replicate(30, iris, FALSE))
docs_bulk_prep(bigiris, index = "big", path = tempfile(fileext = ".json"))
# When using in a loop
## We internally get last _id counter to know where to start on next bulk
## insert but you need to sleep in between docs_bulk_prep calls, longer the
## bigger the data is
files <- c(system.file("examples", "test1.csv", package = "elastic"),
system.file("examples", "test2.csv", package = "elastic"),
system.file("examples", "test3.csv", package = "elastic"))
paths <- vector("list", length = length(files))
for (i in seq_along(files)) {
d <- read.csv(files[[i]])
paths[i] <- docs_bulk_prep(d, index = "stuff",
path = tempfile(fileext = ".json"))
}
unlist(paths)
# You can include your own document id numbers
## Either pass in as an argument
files <- c(system.file("examples", "test1.csv", package = "elastic"),
system.file("examples", "test2.csv", package = "elastic"),
system.file("examples", "test3.csv", package = "elastic"))
tt <- vapply(files, function(z) NROW(read.csv(z)), numeric(1))
ids <- list(1:tt[1],
(tt[1] + 1):(tt[1] + tt[2]),
(tt[1] + tt[2] + 1):sum(tt))
paths <- vector("list", length = length(files))
for (i in seq_along(files)) {
d <- read.csv(files[[i]])
paths[i] <- docs_bulk_prep(d, index = "testes",
doc_ids = ids[[i]], path = tempfile(fileext = ".json"))
}
unlist(paths)
## or include in the input data
### from data.frame's
files <- c(system.file("examples", "test1_id.csv", package = "elastic"),
system.file("examples", "test2_id.csv", package = "elastic"),
system.file("examples", "test3_id.csv", package = "elastic"))
paths <- vector("list", length = length(files))
for (i in seq_along(files)) {
d <- read.csv(files[[i]])
paths[i] <- docs_bulk_prep(d, index = "testes",
path = tempfile(fileext = ".json"))
}
unlist(paths)
### from lists via file inputs
paths <- vector("list", length = length(files))
for (i in seq_along(files)) {
d <- read.csv(files[[i]])
d <- apply(d, 1, as.list)
paths[i] <- docs_bulk_prep(d, index = "testes",
path = tempfile(fileext = ".json"))
}
unlist(paths)
# A mix of actions
## make sure you use a column named 'es_action' or this won't work
## if you need to delete or update you need document IDs
if (index_exists(x, "baz")) index_delete(x, "baz")
df <- data.frame(a = 1:5, b = 6:10, c = letters[1:5], stringsAsFactors = FALSE)
f <- tempfile(fileext = ".json")
invisible(docs_bulk_prep(df, "baz", f))
cat(readLines(f), sep = "\n")
docs_bulk(x, f)
Sys.sleep(2)
(res <- Search(x, 'baz', asdf=TRUE)$hits$hits)
df[1, "a"] <- 99
df[1, "c"] <- "aa"
df[3, "c"] <- 33
df[3, "c"] <- "cc"
df$es_action <- c('update', 'delete', 'update', 'delete', 'delete')
df$id <- res$`_id`
df
f <- tempfile(fileext = ".json")
invisible(docs_bulk_prep(df, "baz", path = f, doc_ids = df$id))
cat(readLines(f), sep = "\n")
docs_bulk(x, f)
# suppress progress bar
docs_bulk_prep(mtcars, index = "hello",
path = tempfile(fileext = ".json"), quiet = TRUE)
## vs.
docs_bulk_prep(mtcars, index = "hello",
path = tempfile(fileext = ".json"), quiet = FALSE)
# }
Run the code above in your browser using DataLab