Train word embeddings from a training corpus with the additional ability to obtain word vectors for out-of-vocabulary words.

model_fasttext(...)

load_fasttext(file)

Arguments

...

Any option, from the official documentation.

file

Path to a saved model.

Examples

docs <- prepare_documents(corpus)
#> Preprocessing 9 documents #> 9 documents after perprocessing
# fit model ft <- model_fasttext(size = 4L, window = 3L, min_count = 1L) # build vocabulary ft$build_vocab(sentences = unname(docs))
#> None
# train ft$train(sentences = unname(docs), total_examples = length(docs), epochs = 10L)
#> None
# most similar ft$wv$most_similar(positive = c('computer', 'human'), negative = c('interface'))
#> [('system', 0.4660586714744568), ('user', 0.43439602851867676), ('trees', 0.37251460552215576), ('time', 0.25313329696655273), ('survey', 0.08980631828308105), ('graph', 0.08002881705760956), ('interface', -0.12998628616333008), ('response', -0.1862679272890091), ('minors', -0.2511548101902008), ('eps', -0.863673746585846)]
# odd one out ft$wv$doesnt_match(c("human", "computer", "interface", "tree"))
#> tree
# similarity score ft$wv$similarity('computer', 'human')
#> 0.51893425