% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/array.R
\name{seekerArray}
\alias{seekerArray}
\title{Process microarray data end to end}
\usage{
seekerArray(
  study,
  geneIdType,
  platform = NULL,
  parentDir = ".",
  metadataOnly = FALSE
)
}
\arguments{
\item{study}{String indicating the study accession and used to name the output
directory within \code{parentDir}. Must start with "GSE", "E-", or "LOCAL". If
starts with "GSE", data are fetched using \code{\link[GEOquery:getGEO]{GEOquery::getGEO()}}. If starts
with "E-", data are fetched using \code{\link[ArrayExpress:getAE]{ArrayExpress::getAE()}}. If starts with
"LOCAL", data in the form of cel(.gz) files must in the directory
\code{parentDir}/\code{study}/raw, and \code{parentDir}/\code{study} must contain
a file "sample_metadata.csv" that has a column \code{sample_id} containing the
names of the cel(.gz) files without the file extension.}

\item{geneIdType}{String indicating whether to map probes to gene IDs from
Ensembl ("ensembl") or Entrez ("entrez").}

\item{platform}{String indicating the GEO-based platform accession for the raw
data. See \url{https://www.ncbi.nlm.nih.gov/geo/browse/?view=platforms}.
Only necessary if \code{study} starts with "LOCAL", or starts with "GSE"
and the study uses multiple platforms.}

\item{parentDir}{Directory in which to store the output, which will be a
directory named according to \code{study}.}

\item{metadataOnly}{Logical indicating whether to only process the sample
metadata, and skip processing the expression data.}
}
\value{
Path to the output directory \code{parentDir}/\code{study}, invisibly.
}
\description{
This function fetches data and metadata from NCBI GEO and ArrayExpress,
processes raw Affymetrix data using RMA and custom CDFs from Brainarray, and
maps probes to genes. See also the vignettes:
\code{browseVignettes('seeker')}.
}
\details{
The standard output:
\itemize{
\item naive_expression_set.qs: Initial \code{ExpresssionSet} generated by
\link[GEOquery:getGEO]{GEOquery::getGEO} or \code{\link[ArrayExpress:ae2bioc]{ArrayExpress::ae2bioc()}}. Should generally \emph{not} be
used if sample_metadata.csv and gene_expression_matrix.qs are available.
\item sample_metadata.csv: Table of sample metadata. Column \code{sample_id} matches
colnames of the gene expression matrix.
\item gene_expression_matrix.qs: Rows correspond to genes, columns to samples.
Expression values are log2-transformed.
\item custom_cdf_name.txt: Name of custom CDF package used by \code{\link[affy:justrma]{affy::justRMA()}}
to process and normalize raw Affymetrix data and map probes to genes.
\item feature_metadata.qs: \code{GPL} object, if gene expression matrix was generated
from processed data.
\item probe_gene_mapping.csv.gz: Table of probes and genes, if gene expression
matrix was generated from processed data.
\item "raw" directory: Contains raw Affymetrix files.
\item params.yml: Parameters used to process the dataset.
\item session.log: R session information.
}

The output may include other files from NCBI GEO or ArrayExpress. Files with
extension "qs" can be read into R using \code{\link[qs:qread]{qs::qread()}}.
}
\examples{
\dontrun{
seekerArray('GSE25585', 'entrez')
}

}
\seealso{
\code{\link[=seeker]{seeker()}}
}
