% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/keyToEnglish.R
\name{corpora_to_word_list}
\alias{corpora_to_word_list}
\title{Corpora to Word List}
\usage{
corpora_to_word_list(
  paths,
  ascii_only = TRUE,
  custom_regex = NA,
  max_word_length = 20,
  stopword_fn = DEFAULT_STOPWORDS,
  min_word_count = 5,
  max_size = 16^3,
  min_word_length = 3,
  output_file = NA,
  json_path = NA
)
}
\arguments{
\item{paths}{Paths of plaintext documents}

\item{ascii_only}{Will omit non-ascii characters if TRUE}

\item{custom_regex}{If not NA, will override ascii_only and
this will determine what a valid word
consists of}

\item{max_word_length}{Maximum length of extracted words}

\item{stopword_fn}{Filename containing stopwords to use or a list of
stopwords (if length > 1)}

\item{min_word_count}{Minimum number of ocurrences for a
word to be added to word list}

\item{max_size}{Maximum size of list}

\item{min_word_length}{Minimum length of words}

\item{output_file}{File to write list to}

\item{json_path}{If input text is JSON, then it will be parsed as such
if this is a character of JSON keys to follow}
}
\value{
A `character` vector of words
}
\description{
Converts a collection of documents to a word list
}
