% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/beta_div.r
\name{bdiv_functions}
\alias{bdiv_functions}
\alias{aitchison}
\alias{bhattacharyya}
\alias{bray}
\alias{canberra}
\alias{chebyshev}
\alias{chord}
\alias{clark}
\alias{divergence}
\alias{euclidean}
\alias{gower}
\alias{hellinger}
\alias{horn}
\alias{jensen}
\alias{jsd}
\alias{lorentzian}
\alias{manhattan}
\alias{matusita}
\alias{minkowski}
\alias{morisita}
\alias{motyka}
\alias{psym_chisq}
\alias{soergel}
\alias{squared_chisq}
\alias{squared_chord}
\alias{squared_euclidean}
\alias{topsoe}
\alias{wave_hedges}
\alias{hamming}
\alias{jaccard}
\alias{ochiai}
\alias{sorensen}
\alias{unweighted_unifrac}
\alias{weighted_unifrac}
\alias{normalized_unifrac}
\alias{generalized_unifrac}
\alias{variance_adjusted_unifrac}
\title{Beta Diversity Metrics}
\usage{
aitchison(
  counts,
  pseudocount = NULL,
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

bhattacharyya(
  counts,
  norm = "percent",
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

bray(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

canberra(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

chebyshev(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

chord(counts, margin = 1L, pairs = NULL, cpus = n_cpus())

clark(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

divergence(
  counts,
  norm = "percent",
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

euclidean(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

gower(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

hellinger(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

horn(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

jensen(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

jsd(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

lorentzian(
  counts,
  norm = "percent",
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

manhattan(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

matusita(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

minkowski(
  counts,
  norm = "percent",
  power = 1.5,
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

morisita(counts, margin = 1L, pairs = NULL, cpus = n_cpus())

motyka(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

psym_chisq(
  counts,
  norm = "percent",
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

soergel(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

squared_chisq(
  counts,
  norm = "percent",
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

squared_chord(
  counts,
  norm = "percent",
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

squared_euclidean(
  counts,
  norm = "percent",
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

topsoe(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())

wave_hedges(
  counts,
  norm = "percent",
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

hamming(counts, margin = 1L, pairs = NULL, cpus = n_cpus())

jaccard(counts, margin = 1L, pairs = NULL, cpus = n_cpus())

ochiai(counts, margin = 1L, pairs = NULL, cpus = n_cpus())

sorensen(counts, margin = 1L, pairs = NULL, cpus = n_cpus())

unweighted_unifrac(
  counts,
  tree = NULL,
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

weighted_unifrac(
  counts,
  tree = NULL,
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

normalized_unifrac(
  counts,
  tree = NULL,
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

generalized_unifrac(
  counts,
  tree = NULL,
  alpha = 0.5,
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)

variance_adjusted_unifrac(
  counts,
  tree = NULL,
  margin = 1L,
  pairs = NULL,
  cpus = n_cpus()
)
}
\arguments{
\item{counts}{A numeric matrix of count data where each column is a
feature, and each row is a sample. Any object coercible with
\code{as.matrix()} can be given here, as well as \code{phyloseq}, \code{rbiom},
\code{SummarizedExperiment}, and \code{TreeSummarizedExperiment} objects. For
optimal performance with very large datasets, see the guide in
\code{vignette('performance')}.}

\item{pseudocount}{The value to add to all counts in \code{counts} to prevent
taking \code{log(0)} for unobserved features. The default, \code{NULL}, selects
the smallest non-zero value in \code{counts}.}

\item{margin}{If your samples are in the matrix's rows, set to \code{1L}. If
your samples are in columns, set to \code{2L}. Ignored when \code{counts} is a
\code{phyloseq}, \code{rbiom}, \code{SummarizedExperiment}, or
\code{TreeSummarizedExperiment} object. Default: \code{1L}}

\item{pairs}{Which combinations of samples should distances be
calculated for? The default value (\code{NULL}) calculates all-vs-all.
Provide a numeric or logical vector specifying positions in the
distance matrix to calculate. See examples.}

\item{cpus}{How many parallel processing threads should be used. The
default, \code{n_cpus()}, will use all logical CPU cores.}

\item{norm}{Normalize the incoming counts. Options are:
\describe{
\item{\code{norm = "percent"} - }{ Relative abundance (sample abundances sum to 1). }
\item{\code{norm = "binary"}  - }{ Unweighted presence/absence (each count is either 0 or 1). }
\item{\code{norm = "clr"}     - }{ Centered log ratio. }
\item{\code{norm = "none"}    - }{ No transformation. }
}
Default: \code{'percent'}, which is the expected input for these formulas.}

\item{power}{Scaling factor for the magnitude of differences between
communities (\eqn{p}). Default: \code{1.5}}

\item{tree}{A \code{phylo}-class object representing the phylogenetic tree for
the OTUs in \code{counts}. The OTU identifiers given by \code{colnames(counts)}
must be present in \code{tree}. Can be omitted if a tree is embedded with
the \code{counts} object or as \code{attr(counts, 'tree')}.}

\item{alpha}{How much weight to give to relative abundances; a value
between 0 and 1, inclusive. Setting \code{alpha=1} is equivalent to
\code{normalized_unifrac()}.}
}
\value{
A \code{dist} object.
}
\description{
Beta Diversity Metrics
}
\section{Formulas}{


Given:
\itemize{
\item \eqn{n} : The number of features.
\item \eqn{X_i}, \eqn{Y_i} : Absolute counts for the \eqn{i}-th feature in samples \eqn{X} and \eqn{Y}.
\item \eqn{X_T}, \eqn{Y_T} : Total counts in each sample. \eqn{X_T = \sum_{i=1}^{n} X_i}
\item \eqn{P_i}, \eqn{Q_i} : Proportional abundances of \eqn{X_i} and \eqn{Y_i}. \eqn{P_i = X_i / X_T}
\item \eqn{X_L}, \eqn{Y_L} : Mean log of abundances. \eqn{X_L = \frac{1}{n}\sum_{i=1}^{n} \ln{X_i}}
\item \eqn{R_i} : The range of the \eqn{i}-th feature across all samples (max - min).
}\tabular{ll}{
    \tab  \cr
   \strong{Aitchison distance}                            \if{html}{\out{<br>}} \code{aitchison()} \tab \eqn{\sqrt{\sum_{i=1}^{n} [(\ln{X_i} - X_L) - (\ln{Y_i} - Y_L)]^2}} \cr
   \strong{Bhattacharyya distance}                        \if{html}{\out{<br>}} \code{bhattacharyya()} \tab \eqn{-\ln{\sum_{i=1}^{n}\sqrt{P_{i}Q_{i}}}} \cr
   \strong{Bray-Curtis dissimilarity}                     \if{html}{\out{<br>}} \code{bray()} \tab \eqn{\displaystyle \frac{\sum_{i=1}^{n} |P_i - Q_i|}{\sum_{i=1}^{n} (P_i + Q_i)}} \cr
   \strong{Canberra distance}                             \if{html}{\out{<br>}} \code{canberra()} \tab \eqn{\displaystyle \sum_{i=1}^{n} \frac{|P_i - Q_i|}{P_i + Q_i}} \cr
   \strong{Chebyshev distance}                            \if{html}{\out{<br>}} \code{chebyshev()} \tab \eqn{\max(|P_i - Q_i|)} \cr
   \strong{Chord distance}                                \if{html}{\out{<br>}} \code{chord()} \tab \eqn{\displaystyle \sqrt{\sum_{i=1}^{n} \left(\frac{X_i}{\sqrt{\sum_{j=1}^{n} X_j^2}} - \frac{Y_i}{\sqrt{\sum_{j=1}^{n} Y_j^2}}\right)^2}} \cr
   \strong{Clark's divergence distance}                   \if{html}{\out{<br>}} \code{clark()} \tab \eqn{\displaystyle \sqrt{\sum_{i=1}^{n}\left(\frac{P_i - Q_i}{P_i + Q_i}\right)^{2}}} \cr
   \strong{Divergence}                                    \if{html}{\out{<br>}} \code{divergence()} \tab \eqn{\displaystyle 2\sum_{i=1}^{n} \frac{(P_i - Q_i)^2}{(P_i + Q_i)^2}} \cr
   \strong{Euclidean distance}                            \if{html}{\out{<br>}} \code{euclidean()} \tab \eqn{\sqrt{\sum_{i=1}^{n} (P_i - Q_i)^2}} \cr
   \strong{Gower distance}                                \if{html}{\out{<br>}} \code{gower()} \tab \eqn{\displaystyle \frac{1}{n}\sum_{i=1}^{n}\frac{|P_i - Q_i|}{R_i}} \cr
   \strong{Hellinger distance}                            \if{html}{\out{<br>}} \code{hellinger()} \tab \eqn{\sqrt{\sum_{i=1}^{n}(\sqrt{P_i} - \sqrt{Q_i})^{2}}} \cr
   \strong{Horn-Morisita dissimilarity}                   \if{html}{\out{<br>}} \code{horn()} \tab \eqn{\displaystyle 1 - \frac{2\sum_{i=1}^{n}P_{i}Q_{i}}{\sum_{i=1}^{n}P_i^2 + \sum_{i=1}^{n}Q_i^2}} \cr
   \strong{Jensen-Shannon distance}                       \if{html}{\out{<br>}} \code{jensen()} \tab \eqn{\displaystyle \sqrt{\frac{1}{2}\left[\sum_{i=1}^{n}P_i\ln\left(\frac{2P_i}{P_i + Q_i}\right) + \sum_{i=1}^{n}Q_i\ln\left(\frac{2Q_i}{P_i + Q_i}\right)\right]}} \cr
   \strong{Jensen-Shannon divergence (JSD)}               \if{html}{\out{<br>}} \code{jsd()} \tab \eqn{\displaystyle \frac{1}{2}\left[\sum_{i=1}^{n}P_i\ln\left(\frac{2P_i}{P_i + Q_i}\right) + \sum_{i=1}^{n}Q_i\ln\left(\frac{2Q_i}{P_i + Q_i}\right)\right]} \cr
   \strong{Lorentzian distance}                           \if{html}{\out{<br>}} \code{lorentzian()} \tab \eqn{\sum_{i=1}^{n}\ln{(1 + |P_i - Q_i|)}} \cr
   \strong{Manhattan distance}                            \if{html}{\out{<br>}} \code{manhattan()} \tab \eqn{\sum_{i=1}^{n} |P_i - Q_i|} \cr
   \strong{Matusita distance}                             \if{html}{\out{<br>}} \code{matusita()} \tab \eqn{\sqrt{\sum_{i=1}^{n}\left(\sqrt{P_i} - \sqrt{Q_i}\right)^2}} \cr
   \strong{Minkowski distance}                            \if{html}{\out{<br>}} \code{minkowski()} \tab \eqn{\sqrt[p]{\sum_{i=1}^{n} (P_i - Q_i)^p}} \if{html}{\out{<br>}} Where \eqn{p} is the geometry of the space. \cr
   \strong{Morisita dissimilarity} \if{html}{\out{<br>}} * Integers Only   \if{html}{\out{<br>}} \code{morisita()} \tab \eqn{\displaystyle 1 - \frac{2\sum_{i=1}^{n}X_{i}Y_{i}}{\displaystyle \left(\frac{\sum_{i=1}^{n}X_i(X_i - 1)}{X_T(X_T - 1)} + \frac{\sum_{i=1}^{n}Y_i(Y_i - 1)}{Y_T(Y_T - 1)}\right)X_{T}Y_{T}}} \cr
   \strong{Motyka dissimilarity}                          \if{html}{\out{<br>}} \code{motyka()} \tab \eqn{\displaystyle \frac{\sum_{i=1}^{n} \max(P_i, Q_i)}{\sum_{i=1}^{n} (P_i + Q_i)}} \cr
   \strong{Probabilistic Symmetric \eqn{\chi^2} distance} \if{html}{\out{<br>}} \code{psym_chisq()} \tab \eqn{\displaystyle 2\sum_{i=1}^{n}\frac{(P_i - Q_i)^2}{P_i + Q_i}} \cr
   \strong{Soergel distance}                              \if{html}{\out{<br>}} \code{soergel()} \tab \eqn{\displaystyle \frac{\sum_{i=1}^{n} |P_i - Q_i|}{\sum_{i=1}^{n} \max(P_i, Q_i)}} \cr
   \strong{Squared \eqn{\chi^2} distance}                 \if{html}{\out{<br>}} \code{squared_chisq()} \tab \eqn{\displaystyle \sum_{i=1}^{n}\frac{(P_i - Q_i)^2}{P_i + Q_i}} \cr
   \strong{Squared Chord distance}                        \if{html}{\out{<br>}} \code{squared_chord()} \tab \eqn{\sum_{i=1}^{n}\left(\sqrt{P_i} - \sqrt{Q_i}\right)^2} \cr
   \strong{Squared Euclidean distance}                    \if{html}{\out{<br>}} \code{squared_euclidean()} \tab \eqn{\sum_{i=1}^{n} (P_i - Q_i)^2} \cr
   \strong{Topsoe distance}                               \if{html}{\out{<br>}} \code{topsoe()} \tab \eqn{\displaystyle \sum_{i=1}^{n}P_i\ln\left(\frac{2P_i}{P_i + Q_i}\right) + \sum_{i=1}^{n}Q_i\ln\left(\frac{2Q_i}{P_i + Q_i}\right)} \cr
   \strong{Wave Hedges distance}                          \if{html}{\out{<br>}} \code{wave_hedges()} \tab \eqn{\displaystyle \frac{\sum_{i=1}^{n} |P_i - Q_i|}{\sum_{i=1}^{n} \max(P_i, Q_i)}} \cr
}

\subsection{Presence / Absence}{

Given:
\itemize{
\item \eqn{A}, \eqn{B} : Number of features in each sample.
\item \eqn{J} : Number of features in common.
}\tabular{ll}{
    \tab  \cr
   \strong{Dice-Sorensen dissimilarity} \if{html}{\out{<br>}} \code{sorensen()} \tab \eqn{\displaystyle \frac{2J}{(A + B)}} \cr
   \strong{Hamming distance}            \if{html}{\out{<br>}} \code{hamming()} \tab \eqn{\displaystyle (A + B) - 2J} \cr
   \strong{Jaccard distance}            \if{html}{\out{<br>}} \code{jaccard()} \tab \eqn{\displaystyle 1 - \frac{J}{(A + B - J)]}} \cr
   \strong{Otsuka-Ochiai dissimilarity} \if{html}{\out{<br>}} \code{ochiai()} \tab \eqn{\displaystyle 1 - \frac{J}{\sqrt{AB}}} \cr
}

}

\subsection{Phylogenetic}{

Given \eqn{n} branches with lengths \eqn{L} and a pair of samples' binary
(\eqn{A} and \eqn{B}) or proportional abundances (\eqn{P} and \eqn{Q}) on
each of those branches.\tabular{ll}{
    \tab  \cr
   \strong{Unweighted UniFrac}                  \if{html}{\out{<br>}} \code{unweighted_unifrac()} \tab \eqn{\displaystyle \frac{1}{n}\sum_{i=1}^{n} L_i|A_i - B_i|} \cr
   \strong{Weighted UniFrac}                    \if{html}{\out{<br>}} \code{weighted_unifrac()} \tab \eqn{\displaystyle \sum_{i=1}^{n} L_i|P_i - Q_i|} \cr
   \strong{Normalized Weighted UniFrac}         \if{html}{\out{<br>}} \code{normalized_unifrac()} \tab \eqn{\displaystyle \frac{\sum_{i=1}^{n} L_i|P_i - Q_i|}{\sum_{i=1}^{n} L_i(P_i + Q_i)}} \cr
   \strong{Generalized UniFrac (GUniFrac)}      \if{html}{\out{<br>}} \code{generalized_unifrac()} \tab \eqn{\displaystyle \frac{\sum_{i=1}^{n} L_i(P_i + Q_i)^{\alpha}\left|\displaystyle \frac{P_i - Q_i}{P_i + Q_i}\right|}{\sum_{i=1}^{n} L_i(P_i + Q_i)^{\alpha}}} \if{html}{\out{<br>}} Where \eqn{\alpha} is a scalable weighting factor. \cr
   \strong{Variance-Adjusted Weighted UniFrac}  \if{html}{\out{<br>}} \code{variance_adjusted_unifrac()} \tab \eqn{\displaystyle \frac{\displaystyle \sum_{i=1}^{n} L_i\displaystyle \frac{|P_i - Q_i|}{\sqrt{(P_i + Q_i)(2 - P_i - Q_i)}} }{\displaystyle \sum_{i=1}^{n} L_i\displaystyle \frac{P_i + Q_i}{\sqrt{(P_i + Q_i)(2 - P_i - Q_i)}} }} \cr
}


See \code{vignette('unifrac')} for detailed example UniFrac calculations.
}
}

\examples{
    # Example counts matrix
    t(ex_counts)
    
    bray(ex_counts)
    
    jaccard(ex_counts)
    
    generalized_unifrac(ex_counts, tree = ex_tree)
    
    # Only calculate distances for Saliva vs all.
    bray(ex_counts, pairs = 1:3)
    
}
\references{
Levy, A., Shalom, B. R., & Chalamish, M. (2024). A guide to similarity
measures. \emph{arXiv}.

Cha, S.-H. (2007). Comprehensive survey on distance/similarity measures
between probability density functions. \emph{International Journal of Mathematical
Models and Methods in Applied Sciences}, 1(4), 300–307.
}
\concept{bdiv_functions}
