% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/preProcess.R
\name{preProcess}
\alias{preProcess}
\title{Download, Checksum, Extract files}
\usage{
preProcess(targetFile = NULL, url = NULL, archive = NULL,
  alsoExtract = NULL, destinationPath = ".", fun = NULL,
  quick = getOption("reproducible.quick"), overwrite = FALSE,
  purge = FALSE, useCache = getOption("reproducible.useCache", FALSE), ...)
}
\arguments{
\item{targetFile}{Character string giving the path to the eventual file
(raster, shapefile, csv, etc.) after downloading and extracting from a zip
or tar archive. This is the file \emph{before} it is passed to
\code{postProcess}. Currently, the internal checksumming does not checksum
the file after it is \code{postProcess}ed (e.g., cropped/reprojected/masked).
Using \code{Cache} around \code{prepInputs} will do a sufficient job in these cases.
See table in \code{\link{preProcess}}.}

\item{url}{Optional character string indicating the URL to download from.
If not specified, then no download will be attempted. If not entry
exists in the \code{CHECKSUMS.txt} (in \code{destinationPath}), an entry
will be created or appended to. This \code{CHECKSUMS.txt} entry will be used
in subsequent calls to
\code{prepInputs} or \code{preProcess}, comparing the file on hand with the ad hoc
\code{CHECKSUMS.txt}. See table in \code{\link{preProcess}}.}

\item{archive}{Optional character string giving the path of an archive
containing \code{targetFile}, or a vector giving a set of nested archives
(e.g., \code{c("xxx.tar", "inner.zip")}). If there is/are (an) inner
archive(s), but they are unknown, the function will try all until it finds
the \code{targetFile}. See table in \code{\link{preProcess}}.}

\item{alsoExtract}{Optional character string naming files other than
\code{targetFile} that must be extracted from the \code{archive}. If
\code{NULL}, the default, then it will extract all files. Other options:
\code{"similar"} will extract all files with the same filename without
file extension as \code{targetFile}. \code{NA} will extract nothing other
than \code{targetFile}. A character string of specific file names will cause
only those to be extracted. See table in \code{\link{preProcess}}.}

\item{destinationPath}{Character string of a directory in which to download
and save the file that comes from \code{url} and is also where the function
will look for \code{archive} or \code{targetFile}.}

\item{fun}{Character string indicating the function to use to load
\code{targetFile} into an \code{R} object.}

\item{quick}{Logical. This is passed internally to \code{\link{Checksums}}
(the quickCheck argument), and to
\code{\link{Cache}} (the quick argument). This results in faster, though
less robust checking of inputs. See the respective functions.}

\item{overwrite}{Logical. Should downloading and all the other actions occur
even if they pass the checksums or the files are all there.}

\item{purge}{Logical or Integer. \code{0/FALSE} (default) keeps existing
\code{CHECKSUMS.txt} file and
\code{prepInputs} will write or append to it. \code{1/TRUE} will deleted the entire
\code{CHECKSUMS.txt} file. Other options, see details.}

\item{useCache}{Passed to Cache in various places. Default \code{FALSE}}

\item{...}{Additional arguments passed to \code{fun} (i.e,. user supplied),
 \code{\link{postProcess}} and \code{\link[reproducible]{Cache}}.
Since \code{...} is passed to \code{\link{postProcess}}, these will
\code{...} will also be passed into the inner
functions, e.g., \code{\link{cropInputs}}. See details and examples.}
}
\value{
A list with 5 elements, \code{checkSums} (the result of a \code{Checksums}
after downloading), \code{dots} (cleaned up ..., including deprecated argument checks),
\code{fun} (the function to be used to load the preProcessed object from disk),
\code{targetFilePath} (the fully qualified path to the \code{targetFile}),
and \code{tryRasterFn} (a logical whether the the \code{targetFilePath}
should be loaded with \code{\link[raster]{raster}}).
}
\description{
This does downloading (via \code{downloadFile}), checksumming (\code{Checksums}),
and extracting from archives (\code{extractFromArchive}), plus cleaning up of input
arguments (e.g., paths, function names).
This is the first stage of three used in \code{prepInputs}.
}
\section{Combinations of \code{targetFile}, \code{url}, \code{archive}, \code{alsoExtract}}{


  \tabular{ccccclll}{
 # Params \tab \code{url} \tab \code{targetFile} \tab \code{archive}\tab \code{alsoExtract} \tab Result \tab Checksum 1st time \tab Checksum 2nd time \cr
 ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------  \cr
\bold{1} \tab      char \tab NULL \tab NULL \tab NULL             \tab Download, extract all files if an archive, guess at \code{targetFile}, load into R \tab write or append all new files \tab same as 1st -- no \code{targetFile}* \cr
     \tab NULL \tab char \tab NULL \tab NULL             \tab load \code{targetFile} into R \tab write or append \code{targetFile} \tab no downloading, so no checksums use \cr
     \tab NULL \tab NULL \tab char \tab NULL             \tab extract all files, guess at \code{targetFile}, load into R \tab write or append all new files \tab no downloading, so no checksums use \cr
     \tab NULL \tab NULL \tab NULL \tab char             \tab guess at \code{targetFile} from files in \code{alsoExtract}, load into R \tab write or append all new files \tab no downloading, so no checksums use \cr
 ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \cr
\bold{2} \tab char \tab char \tab NULL \tab NULL             \tab Download, extract all files if an archive, load \code{targetFile} into R\tab write or append all new files \tab use Checksums, skip downloading \cr
     \tab char \tab NULL \tab char \tab NULL             \tab Download, extract all files, guess at \code{targetFile}, load into R\tab write or append all new files \tab same as 1st -- no \code{targetFile}* \cr
     \tab char \tab NULL \tab NULL \tab char             \tab Download, extract only named files in \code{alsoExtract}, guess at \code{targetFile}, load into R\tab write or append all new files \tab same as 1st -- no \code{targetFile}* \cr
     \tab NULL \tab char \tab NULL \tab char             \tab load \code{targetFile} into R \tab write or append all new files \tab no downloading, so no checksums use \cr
     \tab NULL \tab char \tab char \tab NULL             \tab Extract all files, load \code{targetFile} into R\tab write or append all new files \tab no downloading, so no checksums use \cr
     \tab NULL \tab NULL \tab char \tab char             \tab Extract only named files in \code{alsoExtract}, guess at \code{targetFile}, load into R\tab write or append all new files \tab no downloading, so no checksums use \cr
 ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \cr
\bold{3} \tab char \tab char \tab char \tab NULL             \tab Download, extract all files, load \code{targetFile} into R\tab write or append all new files \tab use Checksums, skip downloading \cr
     \tab char \tab NULL \tab char \tab char             \tab Download, extract files named in \code{alsoExtract}, guess at \code{targetFile}, load into R\tab write or append all new files \tab use Checksums, skip downloading \cr
     \tab char \tab NULL \tab char \tab \code{"similar"} \tab Download, extract all files (can't understand "similar"), guess at \code{targetFile}, load into R\tab write or append all new files \tab same as 1st -- no \code{targetFile}* \cr
     \tab char \tab char \tab NULL \tab char             \tab Download, if an archive, extract files named in \code{targetFile} and \code{alsoExtract}, load \code{targetFile} into R\tab write or append all new files \tab use Checksums, skip downloading \cr
     \tab char \tab char \tab NULL \tab \code{"similar"} \tab Download, if an archive, extract files with same base as \code{targetFile}, load \code{targetFile} into R\tab write or append all new files \tab use Checksums, skip downloading \cr
     \tab char \tab char \tab char \tab NULL             \tab Download, extract all files from archive, load \code{targetFile} into R\tab write or append all new files \tab use Checksums, skip downloading \cr
     \tab NULL \tab char \tab char \tab char             \tab Extract  files named in \code{alsoExtract} from archive, load \code{targetFile} into R\tab write or append all new files \tab no downloading, so no checksums use \cr
 ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \tab ------ \cr
\bold{4} \tab char \tab char \tab char \tab char             \tab Download, extract files named in \code{targetFile} and \code{alsoExtract}, load \code{targetFile} into R\tab write or append all new files \tab use Checksums, skip downloading \cr
     \tab char \tab char \tab char \tab \code{"similar"} \tab Download, extract all files with same base as \code{targetFile}, load \code{targetFile} into R\tab write or append all new files \tab use Checksums, skip downloading \cr
  }
 \code{*} If the \code{url} is a file on Google Drive, checksumming will work
 even without a \code{targetFile} specified because there is an initial attempt
 to get the remove file information (e.g., file name). With that, the connection
 between the \code{url} and the filename used in the CHECKSUMS.txt file can be made.
}

\author{
Eliot McIntire
}
