% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/partition.R
\name{create_grouped_data_partition}
\alias{create_grouped_data_partition}
\title{Split into train and test set while splitting by groups.
When \code{group_partitions} is \code{NULL}, all samples from each group will go into
either the training set or the testing set.
Otherwise, the groups will be split according to \code{group_partitions}}
\usage{
create_grouped_data_partition(
  groups,
  group_partitions = NULL,
  training_frac = 0.8
)
}
\arguments{
\item{groups}{Vector of groups to keep together when splitting the data into
train and test sets. If the number of groups in the training set is larger
than \code{kfold}, the groups will also be kept together for cross-validation.
Length matches the number of rows in the dataset (default: \code{NULL}).}

\item{group_partitions}{Specify how to assign \code{groups} to the training and
testing partitions (default: \code{NULL}). If \code{groups} specifies that some
samples belong to group \code{"A"} and some belong to group \code{"B"}, then setting
\code{group_partitions = list(train = c("A", "B"), test = c("B"))} will result
in all samples from group \code{"A"} being placed in the training set, some
samples from \code{"B"} also in the training set, and the remaining samples from
\code{"B"} in the testing set. The partition sizes will be as close to
\code{training_frac} as possible. If the number of groups in the training set is
larger than \code{kfold}, the groups will also be kept together for
cross-validation.}

\item{training_frac}{Fraction of data for training set (default: \code{0.8}). Rows
from the dataset will be randomly selected for the training set, and all
remaining rows will be used in the testing set. Alternatively, if you
provide a vector of integers, these will be used as the row indices for the
training set. All remaining rows will be used in the testing set.}
}
\value{
vector of row indices for the training set
}
\description{
Split into train and test set while splitting by groups.
When \code{group_partitions} is \code{NULL}, all samples from each group will go into
either the training set or the testing set.
Otherwise, the groups will be split according to \code{group_partitions}
}
\examples{
\dontrun{
groups <- c("A", "B", "A", "B", "C", "C", "A", "A", "D")
set.seed(0)
create_grouped_data_partition(groups, training_frac = 0.8)
groups <- rep.int(c("A", "B", "C"), 3)
create_grouped_data_partition(groups,
  group_partitions = list(train = c("A"), test = c("A", "B", "C"))
)
}
}
\author{
Zena Lapp, {zenalapp@umich.edu}

Kelly Sovacool, {sovacool@umich.edu}
}
\keyword{internal}
