% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/spacy_extract_nounphrases.R
\name{spacy_extract_nounphrases}
\alias{spacy_extract_nounphrases}
\title{Extract noun phrases from texts using spaCy}
\usage{
spacy_extract_nounphrases(x, output = c("data.frame", "list"),
  multithread = TRUE, ...)
}
\arguments{
\item{x}{a character object or a TIF-compliant corpus data.frame (see
\url{https://github.com/ropensci/tif})}

\item{output}{type of returned object, either \code{"data.frame"} or
\code{"list"}}

\item{multithread}{logical; If true, the processing is parallelized using
pipe functionality of spaCy (\url{https://spacy.io/api/pipe}).}

\item{...}{unused}
}
\value{
either a \code{list} or \code{data.frame} of tokens
}
\description{
This function extracts noun phrases from documents, based on the
\code{noun_chunks} attributes of documents objects parsed by spaCy (see
\url{https://spacy.io/usage/linguistic-features#noun-chunks}).
}
\details{
When the option \code{output = "data.frame"} is selected, the
  function returns a \code{data.frame} with the following fields.
\describe{\item{\code{text}}{contents of noun-phrase}
\item{\code{root_text}}{contents of root token}
\item{\code{start_id}}{serial number ID of starting token. This number
corresponds with the number of \code{data.frame} returned from
\code{spacy_tokenize(x)} with default options.}
\item{\code{root_id}}{serial number ID of root token}
\item{\code{length}}{number of words (tokens) included in a noun-phrase (e.g.
for a noun-phrase, "individual car owners", \code{length = 3})}}
}
\examples{
\donttest{
spacy_initialize()

txt <- c(doc1 = "Natural language processing is a branch of computer science.",
         doc2 = "Paul earned a postgraduate degree from MIT.")
spacy_extract_nounphrases(txt)
spacy_extract_nounphrases(txt, output = "list")
}
}
