% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simulations.R
\name{SimulateComponents}
\alias{SimulateComponents}
\title{Data simulation for sparse Principal Component Analysis}
\usage{
SimulateComponents(
  n = 100,
  pk = c(10, 10),
  adjacency = NULL,
  nu_within = 1,
  v_within = c(0.5, 1),
  v_sign = -1,
  continuous = TRUE,
  pd_strategy = "min_eigenvalue",
  ev_xx = 0.1,
  scale_ev = TRUE,
  u_list = c(1e-10, 1),
  tol = .Machine$double.eps^0.25,
  scale = TRUE,
  output_matrices = FALSE
)
}
\arguments{
\item{n}{number of observations in the simulated dataset.}

\item{pk}{vector of the number of variables per group in the simulated
dataset. The number of nodes in the simulated graph is \code{sum(pk)}. With
multiple groups, the simulated (partial) correlation matrix has a block
structure, where blocks arise from the integration of the \code{length(pk)}
groups. This argument is only used if \code{theta} is not provided.}

\item{adjacency}{optional binary and symmetric adjacency matrix encoding the
conditional graph structure between observations. The clusters encoded in
this argument must be in line with those indicated in \code{pk}. Edges in
off-diagonal blocks are not allowed to ensure that the simulated orthogonal
components are sparse. Corresponding entries in the precision matrix will
be set to zero.}

\item{nu_within}{probability of having an edge between two nodes belonging to
the same group, as defined in \code{pk}. If \code{length(pk)=1}, this is
the expected density of the graph. If \code{implementation=HugeAdjacency},
this argument is only used for \code{topology="random"}. Only used if
\code{nu_mat} is not provided.}

\item{v_within}{vector defining the (range of) nonzero entries in the
diagonal blocks of the precision matrix. These values must be between -1
and 1 if \code{pd_strategy="min_eigenvalue"}. If \code{continuous=FALSE},
\code{v_within} is the set of possible precision values. If
\code{continuous=TRUE}, \code{v_within} is the range of possible precision
values.}

\item{v_sign}{vector of possible signs for precision matrix entries. Possible
inputs are: \code{-1} for positive partial correlations, \code{1} for
negative partial correlations, or \code{c(-1, 1)} for both positive and
negative partial correlations.}

\item{continuous}{logical indicating whether to sample precision values from
a uniform distribution between the minimum and maximum values in
\code{v_within} (diagonal blocks) or \code{v_between} (off-diagonal blocks)
(if \code{continuous=TRUE}) or from proposed values in \code{v_within}
(diagonal blocks) or \code{v_between} (off-diagonal blocks) (if
\code{continuous=FALSE}).}

\item{pd_strategy}{method to ensure that the generated precision matrix is
positive definite (and hence can be a covariance matrix). If
\code{pd_strategy="diagonally_dominant"}, the precision matrix is made
diagonally dominant by setting the diagonal entries to the sum of absolute
values on the corresponding row and a constant u. If
\code{pd_strategy="min_eigenvalue"}, diagonal entries are set to the sum of
the absolute value of the smallest eigenvalue of the precision matrix with
zeros on the diagonal and a constant u.}

\item{ev_xx}{expected proportion of explained variance by the first Principal
Component (PC1) of a Principal Component Analysis. This is the largest
eigenvalue of the correlation (if \code{scale_ev=TRUE}) or covariance (if
\code{scale_ev=FALSE}) matrix divided by the sum of eigenvalues. If
\code{ev_xx=NULL} (the default), the constant u is chosen by maximising the
contrast of the correlation matrix.}

\item{scale_ev}{logical indicating if the proportion of explained variance by
PC1 should be computed from the correlation (\code{scale_ev=TRUE}) or
covariance (\code{scale_ev=FALSE}) matrix. If \code{scale_ev=TRUE}, the
correlation matrix is used as parameter of the multivariate normal
distribution.}

\item{u_list}{vector with two numeric values defining the range of values to
explore for constant u.}

\item{tol}{accuracy for the search of parameter u as defined in
\code{\link[stats]{optimise}}.}

\item{scale}{logical indicating if the true mean is zero and true variance is
one for all simulated variables. The observed mean and variance may be
slightly off by chance.}

\item{output_matrices}{logical indicating if the true precision and (partial)
correlation matrices should be included in the output.}
}
\value{
A list with: \item{data}{simulated data with \code{n} observation and
  \code{sum(pk)} variables.} \item{loadings}{loadings coefficients of the
  orthogonal latent variables (principal components).} \item{theta}{support
  of the loadings coefficients.} \item{ev}{proportion of explained variance
  by each of the orthogonal latent variables.} \item{adjacency}{adjacency
  matrix of the simulated graph.} \item{omega}{simulated (true) precision
  matrix. Only returned if \code{output_matrices=TRUE}.} \item{phi}{simulated
  (true) partial correlation matrix. Only returned if
  \code{output_matrices=TRUE}.} \item{C}{ simulated (true) correlation
  matrix. Only returned if \code{output_matrices=TRUE}.}
}
\description{
Simulates data with with independent groups of variables.
}
\details{
The data is simulated from a centered multivariate Normal
  distribution with a block-diagonal covariance matrix. Independence between
  variables from the different blocks ensures that sparse orthogonal
  components can be generated.

  The block-diagonal partial correlation matrix is obtained using a graph
  structure encoding the conditional independence between variables. The
  orthogonal latent variables are obtained from eigendecomposition of the
  true correlation matrix. The sparse eigenvectors contain the weights of the
  linear combination of variables to construct the latent variable (loadings
  coefficients). The proportion of explained variance by each of the latent
  variable is computed from eigenvalues.

  As latent variables are defined from the true correlation matrix, the
  number of sparse orthogonal components is not limited by the number of
  observations and is equal to \code{sum(pk)}.
}
\examples{
\donttest{
# Simulation of 3 components with high e.v.
set.seed(1)
simul <- SimulateComponents(pk = c(5, 3, 4), ev_xx = 0.4)
print(simul)
plot(simul)
plot(cumsum(simul$ev), ylim = c(0, 1), las = 1)

# Simulation of 3 components with moderate e.v.
set.seed(1)
simul <- SimulateComponents(pk = c(5, 3, 4), ev_xx = 0.25)
print(simul)
plot(simul)
plot(cumsum(simul$ev), ylim = c(0, 1), las = 1)

# Simulation of multiple components with low e.v.
pk <- sample(3:10, size = 5, replace = TRUE)
simul <- SimulateComponents(
  pk = pk,
  nu_within = 0.3, v_within = c(0.8, 0.5), v_sign = -1, ev_xx = 0.1
)
plot(simul)
plot(cumsum(simul$ev), ylim = c(0, 1), las = 1)
}
}
\references{
\insertRef{ourstabilityselection}{fake}
}
\seealso{
\code{\link{MakePositiveDefinite}}

Other simulation functions: 
\code{\link{SimulateAdjacency}()},
\code{\link{SimulateClustering}()},
\code{\link{SimulateCorrelation}()},
\code{\link{SimulateGraphical}()},
\code{\link{SimulateRegression}()},
\code{\link{SimulateStructural}()}
}
\concept{simulation functions}
