% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mixture_generator.R
\name{mixture_generator}
\alias{mixture_generator}
\title{Gaussian mixtures dataset generator with regression between the covariates}
\usage{
mixture_generator(n = 130, p = 100, ratio = 0.4, max_compl = 1,
  valid = 1000, positive = 0.6, sigma_Y = 10, sigma_X = NULL,
  R2 = NULL, R2Y = 0.4, meanvar = NULL, sigmavar = NULL, lambda = 3,
  Amax = NULL, lambdapois = 10, gamma = FALSE, gammashape = 1,
  gammascale = 0.5, tp1 = 1, tp2 = 1, tp3 = 1, nonlin = 0,
  pnonlin = 2, scale = TRUE, Z = NULL)
}
\arguments{
\item{n}{the number of individuals in the learning dataset}

\item{p}{the number of covariates (without the response)}

\item{ratio}{the ratio of covariates generated by sub-regressions on others}

\item{max_compl}{the number of covariates in each sub-regression}

\item{valid}{the number of individuals in the validation sample}

\item{positive}{the ratio of positive coefficients in both the regression and the sub-regressions}

\item{sigma_Y}{the standard deviation for the noise of the regression}

\item{sigma_X}{the standard deviation for the noise of the sub-regressions (all). ignored if \code{gamma=TRUE} or if \code{R2} is not NULL}

\item{R2}{the strength of the sub-regressions (coefficients will be chosen to obtain this value).}

\item{R2Y}{the strength of the main regression (coefficients will be chosen to obtain this value).}

\item{meanvar}{vector of means for the covariates.}

\item{sigmavar}{standard deviation of the covariates.}

\item{lambda}{parameter of the Poisson's law that defines the number of components in Gaussian Mixture models}

\item{Amax}{the maximum number of covariates with non-zero coefficients in the regression}

\item{lambdapois}{parameter used to generate the coefficient in the subregressions. Poisson's distribution.}

\item{gamma}{(boolean) to generate a p-sized vector \code{sigma_X} gamma-distributed}

\item{gammashape}{shape parameter of the gamma distribution (if needed)}

\item{gammascale}{scale parameter of the gamma distribution (if needed)}

\item{tp1}{the ratio of right-side (explicative) covariates allowed to have a non-zero coefficient in the regression}

\item{tp2}{the ratio of left-side (redundant) covariates allowed to have a non-zero coefficient in the regression}

\item{tp3}{the ratio of strictly independent covariates allowed to have a non-zero coefficient in the regression}

\item{nonlin}{to use non linear structure (squared or log). If not null, it is the proba to use power pnonlin instead of log. The type is drawn for each link between covariates}

\item{pnonlin}{the power used if non linear structure}

\item{scale}{(boolean) to scale X before computing Y}

\item{Z}{the binary squared adjacency matrix (size p) to obtain. If NULL it is randomly generated, based on \code{ratio} and \code{max_compl} parameters.}
}
\value{
a list that contains:
\item{X_appr}{matrix of the learning set. \code{p} covariates following Gaussian Mixtures with some of them generated by sub-regressions on others.}
\item{Y_appr}{Response variable vector (size \code{n}) generated by linear regression on \code{X_appr} with coefficients \code{A} and residual standard deviation \code{sigma_Y}.}
\item{A}{vector of the of the regression generating \code{Y_appr}}
\item{B}{Matrix of the coefficients of sub-regressions (first line : the intercepts) then \code{B[i-1,j]} is the coefficient associated to \code{X_appr[,i]} in the sub-regression that generates \code{X_appr[,j]}}
\item{Z}{Binary squared adjacency matrix of size \code{p} that describes the structure of sub-regressions. \code{Z[i,j]}=1 if \code{X_appr[,i]} explains \code{X_appr[,j]}}
\item{X_test}{validation sample generated the same way as \code{X_appr}, with \code{valid} individuals.}
\item{Y_test}{Response vector associated to the validation sample}
\item{sigma_X}{Vector of the standard deviations of the residuals of the sub-regressions (one value for each sub-regression)}
\item{sigma_Y}{Standard deviation of the residual of the regression that generates \code{Y_appr} and \code{Y_test}.}
\item{nbcomp}{vector of the number of components for covariates that are not explained by others.}
}
\description{
Generates a dataset (with an additional validation sample) made of Gaussian mixtures with some of them generated by sub-regressions on others. A response variable is then added by linear regression. This function is used to generate datasets for simulations using CorReg, or just with Gaussian Mitures.
}
\examples{
\dontrun{
require(CorReg)
   #dataset generation
   base=mixture_generator(n=1500,p=10,valid=0)
   X_appr=base$X_appr #learning sample
   Y_appr=base$Y_appr#response variable
   for(i in 1:ncol(X_appr)){
   hist(X_appr[,i])
   }
   }

}
