% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dfm-weighting.R
\docType{methods}
\name{docfreq}
\alias{df}
\alias{df,dfm-method}
\alias{docfreq}
\alias{docfreq,dfm-method}
\title{compute the (weighted) document frequency of a feature}
\usage{
docfreq(x, scheme = c("count", "inverse", "inversemax", "inverseprob",
  "unary"), smoothing = 0, k = 0, base = 10, threshold = 0,
  USE.NAMES = TRUE)

\S4method{docfreq}{dfm}(x, scheme = c("count", "inverse", "inversemax",
  "inverseprob", "unary"), smoothing = 0, k = 0, base = 10,
  threshold = 0, USE.NAMES = TRUE)

df(x, ...)

\S4method{df}{dfm}(x, ...)
}
\arguments{
\item{x}{a \link{dfm-class} document-feature matrix}

\item{scheme}{type of document frequency weighting}

\item{smoothing}{added to the quotient before taking the logarithm}

\item{k}{added to the denominator in the "inverse" weighting types, to 
prevent a zero document count for a term}

\item{base}{the base with respect to which logarithms in the inverse document
frequency weightings are computed; default is 10 (see Manning, 
  Raghavan, and Schutze 2008, p123).}

\item{threshold}{numeric value of the threshold \emph{above which} a feature 
will considered in the computation of document frequency.  The default is 
0, meaning that a feature's document frequency will be the number of 
documents in which it occurs greater than zero times.}

\item{USE.NAMES}{logical; if \code{TRUE} attach feature labels as names of 
the resulting numeric vector}

\item{...}{not used}
}
\value{
a numeric vector of document frequencies for each feature
}
\description{
For a \link{dfm-class} object, returns a (weighted) document frequency for 
each term.  The default is a simple count of the number of documents in which
a feature occurs more than a given frequency threshold.  (The default 
threshold is  zero, meaning that any feature occuring at least once in a 
document will be counted.)
}
\examples{
mydfm <- dfm(inaugTexts[1:2], verbose = FALSE)
docfreq(mydfm[, 1:20])

#' # replication of worked example from
# https://en.wikipedia.org/wiki/Tf-idf#Example_of_tf.E2.80.93idf
(wikiDfm <- new("dfmSparse", 
                Matrix::Matrix(c(1,1,2,1,0,0, 1,1,0,0,2,3),
                   byrow = TRUE, nrow = 2,  
                   dimnames = list(docs = c("document1", "document2"), 
                     features = c("this", "is", "a", "sample", "another",
                                  "example")), sparse = TRUE)))
docfreq(wikiDfm)
df(wikiDfm)
docfreq(wikiDfm, scheme = "inverse")
docfreq(wikiDfm, scheme = "inverse", k = 1, smoothing = 1)
docfreq(wikiDfm, scheme = "unary")
docfreq(wikiDfm, scheme = "inversemax")
docfreq(wikiDfm, scheme = "inverseprob")
}
\references{
Manning, C. D., Raghavan, P., & Schutze, H. (2008). 
  \emph{Introduction to Information Retrieval}. Cambridge University Press.
}

