% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/slice.R
\name{slice.corpus}
\alias{slice.corpus}
\alias{slice_head.corpus}
\alias{slice_tail.corpus}
\alias{slice_sample.corpus}
\alias{slice_min.corpus}
\alias{slice_max.corpus}
\title{Subset documents using their positions}
\usage{
\method{slice}{corpus}(.data, ..., .preserve = FALSE)

\method{slice_head}{corpus}(.data, ..., n, prop)

\method{slice_tail}{corpus}(.data, ..., n, prop)

\method{slice_sample}{corpus}(.data, ..., n, prop, weight_by = NULL, replace = FALSE)

\method{slice_min}{corpus}(.data, ..., n, prop, with_ties = TRUE)

\method{slice_max}{corpus}(.data, ..., n, prop, with_ties = TRUE)
}
\arguments{
\item{.data}{a \pkg{quanteda} corpus object}

\item{...}{additional arguments passed to methods}

\item{.preserve}{Relevant when the \code{.data} input is grouped.
If \code{.preserve = FALSE} (the default), the grouping structure
is recalculated based on the resulting data, otherwise the grouping is kept as is.}

\item{n, prop}{Provide either \code{n}, the number of documents, or \code{prop}, the
proportion of documents to select. If neither are supplied, \code{n = 1} will be
used.

If \code{n} is greater than the number of rows in the group (or \code{prop > 1}),
the result will be silently truncated to the group size. If the
\code{prop}ortion of a group size is not an integer, it is rounded down.}

\item{weight_by}{<\code{\link[rlang:args_data_masking]{data-masking}}> Sampling
weights. This must evaluate to a vector of non-negative numbers the same
length as the input. Weights are automatically standardised to sum to 1.}

\item{replace}{Should sampling be performed with (\code{TRUE}) or without
(\code{FALSE}, the default) replacement.}

\item{with_ties}{Should ties be kept together? The default, \code{TRUE},
may return more rows than you request. Use \code{FALSE} to ignore ties,
and return the first \code{n} rows.}
}
\value{
An object of the same type as \code{.data}. The output has the following
properties:
\itemize{
\item Each document may appear 0, 1, or many times in the output.
(If duplicated, then document names will be modified to remain unique.)
\item Document variables are not modified.
}
}
\description{
\code{slice()} lets you index documents by their (integer) locations. It allows you
to select, remove, and duplicate documents.  It is accompanied by a number of
helpers for common use cases:
\itemize{
\item \code{slice_head()} and \code{slice_tail()} select the first or last documents.
\item \code{slice_sample()} randomly selects documents.
\item \code{slice_min()} and \code{slice_max()} select documents with highest or lowest values
of a document variable.
}
}
\examples{
slice(data_corpus_inaugural, 2:5)
slice(data_corpus_inaugural, 55:n())
slice_head(data_corpus_inaugural, n = 2)
slice_tail(data_corpus_inaugural, n = 3)
slice_tail(data_corpus_inaugural, prop = .05)

set.seed(42)
slice_sample(data_corpus_inaugural, n = 3)
slice_sample(data_corpus_inaugural, prop = .10, replace = TRUE)

data_corpus_inaugural <- data_corpus_inaugural \%>\%
    mutate(ntoks = ntoken(data_corpus_inaugural))
# shortest three texts
slice_min(data_corpus_inaugural, ntoks, n = 3)
# longest three texts
slice_max(data_corpus_inaugural, ntoks, n = 3)
}
