% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/btm.R
\name{terms.data.frame}
\alias{terms.data.frame}
\title{Get the set of Biterms from a tokenised data frame}
\usage{
\method{terms}{data.frame}(x, type = c("tokens", "biterms"), window = 15, ...)
}
\arguments{
\item{x}{a tokenised data frame containing one row per token with 2 columns 
\itemize{
\item the first column is a context identifier (e.g. a tweet id, a document id, a sentence id, an identifier of a survey answer, an identifier of a part of a text)
\item the second column is a column called of type character containing the sequence of words occurring within the context identifier 
}}

\item{type}{a character string, either 'tokens' or 'biterms'. Defaults to 'tokens'.}

\item{window}{integer with the window size for biterm extraction. Defaults to 15.}

\item{...}{not used}
}
\value{
Depending if type is set to 'tokens' or 'biterms' the following is returned:
\itemize{
\item{If \code{type='tokens'}: }{a list containing 2 elements: 
\itemize{
\item \code{n} which indicates the number of tokens
\item \code{tokens} which is a data.frame with columns id, token and freq, 
indicating for all tokens found in the data the frequency of occurrence
}
}
\item{If \code{type='biterms'}: }{a list containing 2 elements: 
\itemize{
\item \code{n} which indicates the number of biterms used to train the model
\item \code{biterms} which is a data.frame with columns term1 and term2, 
indicating all biterms found in the data. The same biterm combination can occur several times.
}
Note that a biterm is unordered, in the output of \code{type='biterms'} term1 is always smaller than or equal to term2.}
}
}
\description{
This extracts words occurring in the neighbourhood of one another, within a certain window range.
The default setting provides the biterms used when fitting \code{\link{BTM}} with the default window parameter.
}
\note{
If \code{x} is a data.frame which has an attribute called 'terms', it just returns that \code{'terms'} attribute
}
\examples{
\dontshow{if(require(udpipe))\{}
library(udpipe)
data("brussels_reviews_anno", package = "udpipe")
x <- subset(brussels_reviews_anno, language == "nl")
x <- subset(x, xpos \%in\% c("NN", "NNP", "NNS"))
x <- x[, c("doc_id", "lemma")]
biterms <- terms(x, window = 15, type = "biterms")
str(biterms)
tokens <- terms(x, type = "tokens")
str(tokens)
\dontshow{\} # End of main if statement running only if the required packages are installed}
}
\seealso{
\code{\link{BTM}}, \code{\link{predict.BTM}}, \code{\link{logLik.BTM}}
}
