The word2vec algorithms include skip-gram and CBOW models, using either hierarchical softmax or negative sampling.

model_word2vec(...)

load_word2vec(file)

Arguments

...

Any other options, from the official documentation.

file

Path to a saved model.

Examples

docs <- prepare_documents(corpus)
#> Preprocessing 9 documents #> 9 documents after perprocessing
# initialise word2vec <- model_word2vec(size = 100L, window = 5L, min_count = 1L) word2vec$build_vocab(docs)
#> None
word2vec$train(docs, total_examples = word2vec$corpus_count, epochs = 20L)
#> (76, 580)
word2vec$init_sims(replace = TRUE)
#> None
# use word2vec$wv$most_similar(positive = c("interface"))
#> [('user', 0.10656532645225525), ('minors', 0.08684408664703369), ('human', 0.08643960952758789), ('eps', 0.054633546620607376), ('trees', 0.024800226092338562), ('graph', 0.01699760928750038), ('response', 0.015698809176683426), ('computer', -0.0017682183533906937), ('survey', -0.031362384557724), ('time', -0.13173900544643402)]
word2vec$wv$doesnt_match(c("human", "interface", "trees"))
#> trees
word2vec$wv$similarity("human", "trees")
#> 0.016136127