% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/multi_strata.R
\name{multi_strata}
\alias{multi_strata}
\title{Create Strata from Multiple Features}
\usage{
multi_strata(df, strategy = c("kmeans", "interaction"), k = 3L)
}
\arguments{
\item{df}{A \code{data.frame} used to form the stratification vector.}

\item{strategy}{A string (either "kmeans" or "interaction") to compute the strata,
see description.}

\item{k}{An integer. For \code{strategy = "kmeans"}, it is the desired number of strata,
while for \code{strategy = "interaction"}, it is the approximate number of bins per
numeric feature before forming all combinations.}
}
\value{
Factor with strata as levels.
}
\description{
Creates a stratification vector based on multiple columns of
a data.frame that can then be passed to the splitting functions.
Currently, the function offers two strategies: "kmeans" runs a k-means
cluster analysis on scaled input. (Ordered factors are integer encoded first,
unordered factors and character columns are one-hot-encoded.)
The second \code{stategy = "interaction"} creates all feature value
combinations (after binning numeric columns into approximately \code{k} bins).
}
\examples{
y_multi <- data.frame(
  A = rep(c(letters[1:4]), each = 20),
  B = factor(sample(c(0, 1), 80, replace = TRUE)),
  c = rnorm(80)
)
y <- multi_strata(y_multi, k = 3)
folds <- create_folds(y, k = 5)
}
\seealso{
\code{\link{partition}}, \code{\link{create_folds}}.
}
