% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tr_utils.R
\name{transformer_vocab}
\alias{transformer_vocab}
\title{Returns the vocabulary of a model}
\usage{
transformer_vocab(
  model = getOption("pangoling.causal.default"),
  add_special_tokens = NULL,
  decode = FALSE,
  config_tokenizer = NULL
)
}
\arguments{
\item{model}{Name of a pre-trained model or folder. One should be able to use
models based on "gpt2". See
\href{https://huggingface.co/models?other=gpt2}{hugging face website}.}

\item{add_special_tokens}{Whether to include special tokens. It has the
same default as the
\href{https://huggingface.co/docs/transformers/v4.25.1/en/model_doc/auto#transformers.AutoTokenizer}{AutoTokenizer}
method in Python.}

\item{decode}{Logical. If \code{TRUE}, decodes the tokens into human-readable
strings, handling special characters and diacritics. Default is
\code{FALSE}.}

\item{config_tokenizer}{List with other arguments that control how the
tokenizer from Hugging Face is accessed.}
}
\value{
A vector with the vocabulary of a model.
}
\description{
Returns the (decoded) vocabulary of a model.
}
\examples{
\dontshow{if (installed_py_pangoling()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
transformer_vocab(model = "gpt2") |>
 head()
\dontshow{\}) # examplesIf}
}
\seealso{
Other token-related functions: 
\code{\link{ntokens}()},
\code{\link{tokenize_lst}()}
}
\concept{token-related functions}
