% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bootstrap.R
\name{bootstrap_pca}
\alias{bootstrap_pca}
\title{Fast, Exact Bootstrap for PCA Results from \code{pca} function}
\usage{
bootstrap_pca(
  x,
  nboot = 100,
  k = NULL,
  parallel = FALSE,
  cores = NULL,
  seed = NULL,
  epsilon = 1e-15,
  ...
)
}
\arguments{
\item{x}{An object of class 'pca' as returned by the provided \code{pca} function.
It's expected to contain loadings (\code{v}), scores (\code{s}), singular values (\code{sdev}),
left singular vectors (\code{u}), and pre-processing info (\code{preproc}).}

\item{nboot}{The number of bootstrap resamples to perform. Must be a positive
integer (default: 100).}

\item{k}{The number of principal components to bootstrap (default: all
components available in the fitted PCA model \code{x}). Must be less than or
equal to the number of components in \code{x}.}

\item{parallel}{Logical flag indicating whether to use parallel processing
via the \code{future} framework (default: FALSE). Requires the \code{future.apply} package
and a configured \code{future} backend (e.g., \code{future::plan(future::multisession)}).}

\item{cores}{The number of cores to use for parallel processing if \code{parallel = TRUE}
(default: \code{future::availableCores()}). This is used if no \code{future} plan is set.}

\item{seed}{An integer value for the random number generator seed for
reproducibility (default: NULL, no seed is set).}

\item{epsilon}{A small positive value added to standard deviations before
division to prevent division by zero or instability (default: 1e-15).}

\item{...}{Additional arguments (currently ignored).}
}
\value{
A \code{list} object of class \code{bootstrap_pca_result} containing:
\item{E_Vb}{Matrix (p x k) of the estimated bootstrap means of the principal components (loadings V^b = coefficients).}
\item{sd_Vb}{Matrix (p x k) of the estimated bootstrap standard deviations of the principal components (loadings V^b).}
\item{z_loadings}{Matrix (p x k) of the bootstrap Z-scores for the loadings, calculated as \code{E_Vb / sd_Vb}.}
\item{E_Scores}{Matrix (n x k) of the estimated bootstrap means of the principal component scores (S^b).}
\item{sd_Scores}{Matrix (n x k) of the estimated bootstrap standard deviations of the principal component scores (S^b).}
\item{z_scores}{Matrix (n x k) of the bootstrap Z-scores for the scores, calculated as \code{E_Scores / sd_Scores}.}
\item{E_Ab}{Matrix (k x k) of the estimated bootstrap means of the internal rotation matrices A^b.}
\item{Ab_array}{Array (k x k x nboot) containing all the bootstrap rotation matrices A^b.}
\item{Scores_array}{Array (n x k x nboot) containing all the bootstrap score matrices (S^b, with NAs for non-sampled subjects).}
\item{nboot}{The number of bootstrap samples used (successful ones).}
\item{k}{The number of components bootstrapped.}
\item{call}{The matched call to the function.}
}
\description{
Performs bootstrap resampling for Principal Component Analysis (PCA) based on
the method described by Fisher et al. (2016), optimized for high-dimensional
data (p >> n). This version is specifically adapted to work with the output
object generated by the provided \code{pca} function (which returns a \code{bi_projector}
object of class 'pca').
}
\details{
This function implements the fast bootstrap PCA algorithm proposed by
Fisher et al. (2016), adapted for the output structure of the provided \code{pca} function.
The \code{pca} function returns an object containing:
\itemize{
\item \code{v}: Loadings (coefficients, p x k) - equivalent to V in SVD Y = U D V'. Note the transpose difference from \code{prcomp}.
\item \code{s}: Scores (n x k) - calculated as U \%*\% D.
\item \code{sdev}: Singular values (vector of length k) - equivalent to d.
\item \code{u}: Left singular vectors (n x k).
}

The bootstrap algorithm works by resampling the \emph{subjects} (rows) and recomputing
the SVD on a low-dimensional representation. Specifically, it computes the SVD
of the resampled matrix \verb{D U' P^b}, where \verb{Y = U D V'} is the SVD of the original
(pre-processed) data, and \code{P^b} is a resampling matrix operating on the subjects (columns of U').

The SVD of the resampled low-dimensional matrix is \verb{svd(D U' P^b) = A^b S^b (R^b)'}.
The bootstrap principal components (loadings) are then calculated as \verb{V^b = V A^b},
and the bootstrap scores are \verb{Scores^b = R^b S^b}.

Z-scores are provided as \code{mean / sd}.

\strong{Important Note:} The algorithm assumes the data \code{Y} used for the \emph{original} SVD (\verb{Y = U D V'})
was appropriately centered (or pre-processed according to \code{x$preproc}). The bootstrap
samples are generated based on the components derived from this pre-processed data.
}
\examples{
# Simulate data (p=50, n=20)
set.seed(123)
p_dim <- 50
n_obs <- 20
Y_mat <- matrix(rnorm(p_dim * n_obs), nrow = p_dim, ncol = n_obs)
# Transpose for pca function input (n x p)
X_mat <- t(Y_mat)

# Perform PCA using the provided pca function
# Use center() pre-processing
pca_res <- pca(X_mat, ncomp = 5, preproc = center(), method = "fast")

# Run bootstrap on the pca result
boot_res <- bootstrap_pca(pca_res, nboot = 5, k = 5, seed = 456)

# Explore results
print(dim(boot_res$z_loadings)) # p x k Z-scores for loadings (coefficients)
print(dim(boot_res$z_scores))   # n x k Z-scores for scores

}
\references{
Fisher, Aaron, Brian Caffo, Brian Schwartz, and Vadim Zipunnikov. 2016.
"Fast, Exact Bootstrap Principal Component Analysis for P > 1 Million."
\emph{Journal of the American Statistical Association} 111 (514): 846–60.
\doi{10.1080/01621459.2015.1062383}.
}
\concept{pca bootstrap}
