API.Rmd
You can download external datasets to easily build models. External dataset can be found on RaRe-Technologies/gensim-data.
library(gensimr)
dataset <- "glove-twitter-25"
# model description
api_info(dataset) %>%
.[["description"]]
#> [1] "Pre-trained vectors based on 2B tweets, 27B tokens, 1.2M vocab, uncased (https://nlp.stanford.edu/projects/glove/)."
# download the model
model <- api_load(dataset)
# find words most similar to "cat"
model$most_similar("cat") %>%
reticulate::py_to_r()
#> [[1]]
#> [[1]][[1]]
#> [1] "dog"
#>
#> [[1]][[2]]
#> [1] 0.9590819
#>
#>
#> [[2]]
#> [[2]][[1]]
#> [1] "monkey"
#>
#> [[2]][[2]]
#> [1] 0.9203578
#>
#>
#> [[3]]
#> [[3]][[1]]
#> [1] "bear"
#>
#> [[3]][[2]]
#> [1] 0.9143137
#>
#>
#> [[4]]
#> [[4]][[1]]
#> [1] "pet"
#>
#> [[4]][[2]]
#> [1] 0.9108031
#>
#>
#> [[5]]
#> [[5]][[1]]
#> [1] "girl"
#>
#> [[5]][[2]]
#> [1] 0.888063
#>
#>
#> [[6]]
#> [[6]][[1]]
#> [1] "horse"
#>
#> [[6]][[2]]
#> [1] 0.8872727
#>
#>
#> [[7]]
#> [[7]][[1]]
#> [1] "kitty"
#>
#> [[7]][[2]]
#> [1] 0.8870542
#>
#>
#> [[8]]
#> [[8]][[1]]
#> [1] "puppy"
#>
#> [[8]][[2]]
#> [1] 0.8867697
#>
#>
#> [[9]]
#> [[9]][[1]]
#> [1] "hot"
#>
#> [[9]][[2]]
#> [1] 0.8865255
#>
#>
#> [[10]]
#> [[10]][[1]]
#> [1] "lady"
#>
#> [[10]][[2]]
#> [1] 0.8845519