path <- system.file(package = "word2vec", "models", "example.bin")
model <- read.word2vec(path)
x <- data.frame(doc_id = c("doc1", "doc2", "testmissingdata"),
text = c("there is no toilet. on the bus", "no tokens from dictionary", NA),
stringsAsFactors = FALSE)
emb <- doc2vec(model, x, type = "embedding")
emb
newdoc <- doc2vec(model, "i like busses with a toilet")
word2vec_similarity(emb, newdoc)
## similar way of extracting embeddings
x <- setNames(object = c("there is no toilet. on the bus", "no tokens from dictionary", NA),
nm = c("a", "b", "c"))
emb <- doc2vec(model, x, type = "embedding")
emb
## similar way of extracting embeddings
x <- setNames(object = c("there is no toilet. on the bus", "no tokens from dictionary", NA),
nm = c("a", "b", "c"))
x <- strsplit(x, "[ .]")
emb <- doc2vec(model, x, type = "embedding")
emb
## show behaviour in case of NA or character data of no length
x <- list(a = character(), b = c("bus", "toilet"), c = NA)
emb <- doc2vec(model, x, type = "embedding")
emb
Run the code above in your browser using DataLab