% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot_size_dist.R
\name{plot_size_dist}
\alias{plot_size_dist}
\title{Plot distribution of size values}
\usage{
plot_size_dist(
  fastx_input,
  input_format = NULL,
  cutoff = NULL,
  y_breaks = NULL,
  plot_title = "Size distribution",
  log_scale_y = TRUE,
  n_bins = 30
)
}
\arguments{
\item{fastx_input}{(Required). A FASTA/FASTQ file path or FASTA/FASTQ object
containing reads with size values embedded in the \code{Header} column. See
\emph{Details}.}

\item{input_format}{(Optional). The format of the input file. Must be
\code{"fasta"} or \code{"fastq"} if \code{fastx_input} is a file path.
Defaults to \code{NULL}.}

\item{cutoff}{(Optional). A numeric value specifying a size threshold. Reads
with size greater than this value will be grouped into a single category
labeled \code{"> cutoff"} in the plot. Defaults to \code{NULL} (no cutoff
applied).}

\item{y_breaks}{(Optional). A numeric vector specifying the breakpoints for
the y-axis if log10 scaling is applied (\code{log_scale_y = TRUE}. Defaults
to \code{NULL}.}

\item{plot_title}{(Optional). The title of the plot. Defaults to
\code{"Size distribution"}. Set to \code{""} for no title.}

\item{log_scale_y}{(Optional). If \code{TRUE} (default), applies a log10
scale to the y-axis. If \code{FALSE}, the y-axis remains linear.}

\item{n_bins}{(Optional). Number of bins used in the histogram if
\code{cutoff} is unspecified. Defaults to \code{30}, which is the default
value in \code{ggplot2::geom_histogram()}.}
}
\value{
A ggplot2 object displaying a plot of size distribution.
}
\description{
Generates a plot representing the distribution of size values from a FASTA or
FASTQ file/object.
}
\details{
\code{fastx_input} can either be a file path to FASTA/FASTQ file or a
FASTA/FASTQ object. FASTA objects are tibbles that contain the
columns \code{Header} and \code{Sequence}, see
\code{\link[microseq]{readFasta}}. FASTQ objects are tibbles that contain the
columns \code{Header}, \code{Sequence}, and \code{Quality}, see
\code{\link[microseq]{readFastq}}.
The \code{Header} column must contain the size values for each read.

The \code{Header} column must contain size annotations formatted as
\code{;size=<int>}.

The y-axis of the plot can be log10-transformed to handle variations in read
counts across different size values. If \code{y_breaks} is specified, the
given breakpoints will be used. If \code{y_breaks} is \code{NULL},
\code{ggplot2} will automatically determine suitable breaks.
}
\examples{
# Define input file
fastx_input <- system.file("extdata/small_derep_R1.fa", package = "Rsearch")

# Generate and display plot without cutoff
size_plot <- plot_size_dist(fastx_input = fastx_input,
                            input_format = "fasta")
print(size_plot)

# Generate and display plot with a cutoff at size 100
size_plot <- plot_size_dist(fastx_input = fastx_input,
                            input_format = "fasta",
                            cutoff = 100)
print(size_plot)

# Generate and display plot with custom y-axis breaks
size_plot <- plot_size_dist(fastx_input = fastx_input,
                            input_format = "fasta",
                            y_breaks = c(1, 50, 500, 5000))
print(size_plot)

# Generate and display plot with linear y-axis
size_plot <- plot_size_dist(fastx_input = fastx_input,
                            input_format = "fasta",
                            log_scale_y = FALSE)
print(size_plot)

}
