% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/contaminants.R
\name{remove_contaminants}
\alias{remove_contaminants}
\title{Identifying contaminants and false positives taxa (cell line quantile test)}
\usage{
remove_contaminants(
  kraken_reports,
  study = "current study",
  taxon = c("d__Bacteria", "d__Fungi", "d__Viruses"),
  quantile = 0.95,
  alpha = 0.05,
  alternative = "greater",
  exclusive = FALSE
)
}
\arguments{
\item{kraken_reports}{A character of path to all kraken report files.}

\item{study}{A string of the study name, used to differentiate with cell line
data.}

\item{taxon}{An atomic character specify the taxa name wanted. Should follow
the kraken style, connected by rank codes, two underscores, and the
scientific name of the taxon (e.g., "d__Viruses")}

\item{quantile}{Probabilities with values in \verb{[0, 1]} specifying the quantile
to calculate.}

\item{alpha}{Level of significance.}

\item{alternative}{A string specifying the alternative hypothesis, must be
one of "two.sided", "greater" (default) or "less". You can specify just the
initial letter.}

\item{exclusive}{A boolean value, indicates whether taxa not found in
celllines data should be regarded as truly. Default: \code{FALSE}.}
}
\value{
A polars \link[polars:DataFrame_class]{DataFrame} with following
attributes:
\enumerate{
\item \code{pvalues}: Quantile test pvalue.
\item \code{exclusive}: taxids in current study but not found in cellline data.
\item \code{significant}: significant taxids with \code{pvalues < alpha}.
\item \code{truly}: truly taxids based on \code{alpha} and \code{exclusive}. If \code{exclusive} is
\code{TRUE}, this should be the union of \code{exclusive} and \code{significant},
otherwise, this should be the same with \code{significant}.
}
}
\description{
Identifying contaminants and false positives taxa (cell line quantile test)
}
\examples{
\dontrun{
# `paths` should be the output directory for each sample from
# `blit::kraken2()`
truly_microbe <- remove_contaminants(
    kraken_reports = file.path(paths, "kraken_report.txt"),
    quantile = 0.99, exclusive = FALSE
)
microbe_for_plot <- attr(truly_microbe, "truly")[
    order(attr(truly_microbe, "pvalue")[attr(truly_microbe, "truly")])
]
microbe_for_plot <- microbe_for_plot[
    !microbe_for_plot \%in\% attr(truly_microbe, "exclusive")
]
ggplot(
    truly_microbe$filter(pl$col("taxid")$is_in(microbe_for_plot))$
        to_data_frame(),
    aes(rpmm),
) +
    geom_density(aes(fill = study), alpha = 0.5) +
    scale_x_log10() +
    facet_wrap(facets = vars(taxa), scales = "free") +
    theme(
        strip.clip = "off",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        legend.position = "inside",
        legend.position.inside = c(1, 0),
        legend.justification.inside = c(1, 0)
    )
}
}
