% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simulate_data.R
\name{simulate_data}
\alias{simulate_data}
\alias{simulate_data.default}
\alias{simulate_data.simdesign}
\title{Simulate design matrix}
\usage{
simulate_data(generator, ...)

\method{simulate_data}{default}(
  generator = function(n) matrix(rnorm(n)),
  n_obs = 1,
  transform_initial = base::identity,
  names_final = NULL,
  prefix_final = NULL,
  process_final = list(),
  seed = NULL,
  ...
)

\method{simulate_data}{simdesign}(
  generator,
  n_obs = 1,
  seed = NULL,
  apply_transformation = TRUE,
  apply_processing = TRUE,
  ...
)
}
\arguments{
\item{generator}{Function which generates data from the underlying base distribution. It is
assumed it takes the number of simulated observations \code{n_obs} as first
argument, as all random generation functions in the \pkg{stats} and
\pkg{extraDistr} do. Furthermore, it is expected to return a two-dimensional
array as output (matrix or data.frame). Alternatively an R object derived
from the \code{simdata::simdesign} class. See details.}

\item{...}{Further arguments passed to \code{generator} function.}

\item{n_obs}{Number of simulated observations.}

\item{transform_initial}{Function which specifies the transformation of the underlying
dataset \code{Z} to final dataset \code{X}. See details.}

\item{names_final}{NULL or character vector with variable names for final dataset \code{X}.
Length needs to equal the number of columns of \code{X}.
Overrides other naming options. See details.}

\item{prefix_final}{NULL or prefix attached to variables in final dataset \code{X}. Overriden
by \code{names_final} argument. Set to NULL if no prefixes should
be added. See details.}

\item{process_final}{List of lists specifying post-processing functions applied to final
datamatrix \code{X} before returning it. See \code{\link{do_processing}}.}

\item{seed}{Set random seed to ensure reproducibility of results.}

\item{apply_transformation}{This argument can be set to FALSE to override the information stored in the
passed \code{simdesign} object and not transform and process data.
Thus, the raw data from the design generator is returned. This can be useful
for debugging purposes.}

\item{apply_processing}{This argument can be set to FALSE to override the information stored in the
passed \code{simdesign} object and not transform and process data after
the initial data is transformed. This can be useful for debugging purposes.}
}
\value{
Data.frame or matrix with \code{n_obs} rows for simulated dataset \code{X}.
}
\description{
Generate simulated dataset based on transformation of
an underlying base distribution.
}
\details{
Data is generated using the following procedure:
\enumerate{
\item An underlying dataset \code{Z} is sampled from some distribution. This is
done by a call to the \code{generator} function.
\item \code{Z} is then transformed into the final dataset \code{X} by applying the
\code{transform} function to \code{Z}.
\item \code{X} is post-processed if specified (e.g. truncation to avoid
outliers).
}
}
\section{Methods (by class)}{
\itemize{
\item \code{simulate_data(default)}: Function to be used if no \code{\link{simdesign}}
S3 class is used.

\item \code{simulate_data(simdesign)}: Function to be used with \code{\link{simdesign}}
S3 class.

}}
\note{
This function is best used in conjunction with the \code{\link{simdesign}}
S3 class or any template based upon it, which facilitates further data
visualization and conveniently stores information as a template for
simulation tasks.
}
\section{Generators}{

The \code{generator} function which is either passed directly, or via a
\code{simdata::simdesign} object, is assumed to provide the same interface
as the random generation functions in the R \pkg{stats} and \pkg{extraDistr}
packages. Specifically, that means it takes the number of observations as
first argument. All further arguments can be set via passing them as
named argument to this function. It is expected to return a two-dimensional
array (matrix or data.frame) for which the number of columns can be
determined. Otherwise the \code{check_and_infer} step will fail.
}

\section{Transformations}{

Transformations should be applicable to the output of the \code{generator}
function (i.e. take a data.frame or matrix as input) and output another
data.frame or matrix. A convenience function \code{\link{function_list}} is
provided by this package to specify transformations as a list of functions,
which take the whole datamatrix \code{Z} as single argument and can be used to
apply specific transformations to the columns of that matrix. See the
documentation for \code{\link{function_list}} for details.
}

\section{Post-processing}{

Post-processing the datamatrix is based on \code{\link{do_processing}}.
}

\section{Naming of variables}{

Variables are named by \code{names_final} if not NULL and of correct length.
Otherwise, if \code{prefix_final} is not NULL, it is used as prefix for variable
numbers. Otherwise, variables names remain as returned by the \code{generator}
function.
}

\examples{
generator <- function(n) mvtnorm::rmvnorm(n, mean = 0)
simulate_data(generator, 10, seed = 24)

}
\seealso{
\code{\link{simdesign}},
\code{\link{simdesign_mvtnorm}},
\code{\link{simulate_data_conditional}},
\code{\link{do_processing}}
}
