if (FALSE) {
data(dekamer, package = "ruimtehol")
x <- strsplit(dekamer$question, "\\W")
x <- lapply(x, FUN = function(x) x[x != ""])
x <- sapply(x, FUN = function(x) paste(x, collapse = " "))
idx <- sample.int(n = nrow(dekamer), size = round(nrow(dekamer) * 0.7))
writeLines(x[idx], con = "traindata.txt")
writeLines(x[-idx], con = "validationdata.txt")
set.seed(123456789)
m <- starspace(file = "traindata.txt", validationFile = "validationdata.txt",
trainMode = 5, dim = 10,
loss = "softmax", lr = 0.01, ngrams = 2, minCount = 5,
similarity = "cosine", adagrad = TRUE, ws = 7, epoch = 3,
maxTrainTime = 10)
str(starspace_dictionary(m))
wordvectors <- as.matrix(m)
wv <- starspace_embedding(m,
x = c("Nationale Loterij", "migranten", "pensioen"),
type = "ngram")
wv
mostsimilar <- embedding_similarity(wordvectors, wv["pensioen", ])
head(sort(mostsimilar[, 1], decreasing = TRUE), 10)
starspace_knn(m, "koning")
## clean up for cran
file.remove(c("traindata.txt", "validationdata.txt"))
}
Run the code above in your browser using DataLab