% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/shiny_related_functions.R
\name{StrelkaIDVCFFilesToCatalog}
\alias{StrelkaIDVCFFilesToCatalog}
\title{Create ID (small insertion and deletion) catalog from Strelka ID VCF files}
\usage{
StrelkaIDVCFFilesToCatalog(
  files,
  ref.genome,
  region = "unknown",
  names.of.VCFs = NULL,
  flag.mismatches = 0,
  return.annotated.vcfs = FALSE,
  suppress.discarded.variants.warnings = TRUE
)
}
\arguments{
\item{files}{Character vector of file paths to the Strelka ID VCF files.}

\item{ref.genome}{A \code{ref.genome} argument as described in
\code{\link{ICAMS}}.}

\item{region}{A character string designating a genomic region;
see \code{\link{as.catalog}} and \code{\link{ICAMS}}.}

\item{names.of.VCFs}{Optional. Character vector of names of the VCF files.
The order of names in \code{names.of.VCFs} should match the order of VCF
file paths in \code{files}. If \code{NULL}(default), this function will
remove all of the path up to and including the last path separator (if any)
in \code{files} and file paths without extensions (and the leading dot)
will be used as the names of the VCF files.}

\item{flag.mismatches}{Deprecated. If there are ID variants whose \code{REF}
do not match the extracted sequence from \code{ref.genome}, the function
will automatically discard these variants and an element
\code{discarded.variants} will appear in the return value. See
\code{\link{AnnotateIDVCF}} for more details.}

\item{return.annotated.vcfs}{Logical. Whether to return the annotated VCFs
with additional columns showing mutation class for each variant. Default is
FALSE.}

\item{suppress.discarded.variants.warnings}{Logical. Whether to suppress
warning messages showing information about the discarded variants. Default
is TRUE.}
}
\description{
Create ID (small insertion and deletion) catalog from the Strelka ID VCFs
specified by \code{files}
}
\details{
This function calls \code{\link{VCFsToIDCatalogs}}
}
\section{Value}{

A \strong{list} of elements:
\itemize{
\item \code{catalog}: The ID (small insertion and deletion) catalog with
attributes added. See \code{\link{as.catalog}} for more details.
\item \code{discarded.variants}: \strong{Non-NULL only if} there are variants
that were excluded from the analysis. See the added extra column
\code{discarded.reason} for more details.
\item \code{annotated.vcfs}:
\strong{Non-NULL only if} \code{return.annotated.vcfs} = TRUE. A list of
data frames which contain the original VCF's ID mutation rows with three
additional columns \code{seq.context.width}, \code{seq.context} and
\code{ID.class} added. The category assignment of each ID mutation in VCF can
be obtained from \code{ID.class} column.
}
}

\section{ID classification}{

See \url{https://github.com/steverozen/ICAMS/blob/master/data-raw/PCAWG7_indel_classification_2021_09_03.xlsx}
for additional information on ID (small insertion and deletion) mutation
classification.

See the documentation for \code{\link{Canonicalize1Del}} which first handles
deletions in homopolymers, then handles deletions in simple repeats with
longer repeat units, (e.g. \code{CACACACA}, see
\code{\link{FindMaxRepeatDel}}), and if the deletion is not in a simple
repeat, looks for microhomology (see \code{\link{FindDelMH}}).

See the code for unexported function \code{\link{CanonicalizeID}}
and the functions it calls for handling of insertions.
}

\section{Note}{

In ID (small insertion and deletion) catalogs, deletion repeat sizes range
from 0 to 5+, but for plotting and end-user documentation deletion repeat
sizes range from 1 to 6+.
}

\examples{
file <- c(system.file("extdata/Strelka-ID-vcf",
                      "Strelka.ID.GRCh37.s1.vcf",
                      package = "ICAMS"))
if (requireNamespace("BSgenome.Hsapiens.1000genomes.hs37d5", quietly = TRUE)) {
  catID <- StrelkaIDVCFFilesToCatalog(file, ref.genome = "hg19",
                                      region = "genome")}
}
