% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/higher_freq.R
\name{seg_multi}
\alias{seg_multi}
\title{Parallelized likelihood ratio test for segregation distortion for
arbitrary (even) ploidies.}
\usage{
seg_multi(
  g,
  p1_ploidy,
  p2_ploidy = p1_ploidy,
  p1 = NULL,
  p2 = NULL,
  model = c("seg", "auto", "auto_dr", "allo", "allo_pp", "auto_allo"),
  outlier = TRUE,
  ret_out = FALSE,
  ob = 0.03,
  db = c("ces", "prcs"),
  ntry = 3,
  df_tol = 0.001
)
}
\arguments{
\item{g}{One of two inputs
\itemize{
\item{A matrix of genotype counts. The rows index the loci and the columns index the genotypes.}
\item{An array of genotype log-likelihoods. The rows index the loci, the columns index the individuals, and the slices index the genotypes. Log-likelihoods are base e (natural log).}
}}

\item{p1_ploidy, p2_ploidy}{The ploidy of the first or second parent. Should be even.}

\item{p1}{One of three inputs
\itemize{
\item{A vector of parent 1's genotypes.}
\item{A matrix of parent 1's genotype log-likelihoods. The rows index the loci and the columns index the genotypes. Logs are in base e (natural log).}
\item{\code{NULL} (only supported when using genotype likelihoods for the offspring)}
}}

\item{p2}{One of three inputs
\itemize{
\item{A vector of parent 1's genotypes.}
\item{A matrix of parent 1's genotype log-likelihoods. The rows index the loci and the columns index the genotypes. Logs are in base e (natural log).}
\item{\code{NULL} (only supported when using genotype likelihoods for the offspring)}
}}

\item{model}{One of six forms:
\describe{
\item{\code{"seg"}}{Segmental allopolyploid. Allows for arbitrary levels of polysomic and disomic inheritance. This can account for partial preferential pairing. It also accounts for double reduction at simplex loci.}
\item{\code{"auto"}}{Autopolyploid. Allows only for polysomic inheritance. No double reduction.}
\item{\code{"auto_dr"}}{Autopolyploid, allowing for the effects of double reduction.}
\item{\code{"allo"}}{Allopolyploid. Only complete disomic inheritance is explored.}
\item{\code{"allo_pp"}}{Allopolyploid, allowing for the effects of partial preferential pairing. Though, autopolyploid (with complete bivalent pairing and no double reduction) is a special case of this model.}
\item{\code{"auto_allo"}}{Only complete disomic and complete polysomic inheritance is studied.}
}}

\item{outlier}{A logical. Should we allow for outliers (\code{TRUE}) or not (\code{FALSE})?}

\item{ret_out}{A logical. Should we return the probability that each individual is an outlier (\code{TRUE}) or not (\code{FALSE})?}

\item{ob}{The default upper bound on the outlier proportion.}

\item{db}{Should we use the complete equational segregation model (\code{"ces"}) or
the pure random chromatid segregation model (\code{"prcs"}) to determine the upper
bound(s) on the double reduction rate(s). See \code{\link{drbounds}()}
for details.}

\item{ntry}{The number of times to try the optimization.
You probably do not want to touch this.}

\item{df_tol}{Threshold for the rank of the Jacobian for the degrees of
freedom calculation. This accounts for weak identifiability in the
null model. You probably do not want to touch this.}
}
\value{
A data frame with the following elements:
\describe{
\item{\code{statistic}}{The likelihood ratio test statistic}
\item{\code{p_value}}{The p-value of the likelihood ratio test.}
\item{\code{df}}{The (estimated) degrees of freedom of the test.}
\item{\code{null_bic}}{The BIC of the null model (no segregation distortion).}
\item{\code{df0}}{The (estimated) number of parameters under null.}
\item{\code{df1}}{The (estimated) number of parameters under the alternative.}
\item{\code{p1}}{The (estimated) genotype of parent 1.}
\item{\code{p2}}{The (estimated) genotype of parent 2.}
\item{\code{q0}}{The MLE of the genotype frequencies under the null.}
\item{\code{q1}}{The MLE of the genotype frequencies under the alternative.}
\item{\code{outprob}}{Outlier probabilities. Only returned in \code{ret_out = TRUE}.
\itemize{
\item{If using genotype counts, element \code{i} is the probability that an individual \emph{with genotype} \code{i-1} is an outlier. So the return vector has length ploidy plus 1.}
\item{If using genotype log-likelihoods, element \code{i} is the probability that individual \code{i} is an outlier. So the return vector has the same length as the number of individuals.}
}
These outlier probabilities are only valid if the null of no segregation is true.
}
}
Note that since this data frame contains the list-columns \code{q0} and
\code{q1}, you cannot use \code{\link[utils]{write.csv}()} to save it.
You have to either remove those columns first or use something
like \code{\link[base]{saveRDS}()}
}
\description{
Uses the future package to implement parallelization support for
the likelihood ratio tests for segregation distortion. Details of
this test are provided in the \code{\link{seg_lrt}()} function's
documentation. See Gerard et al. (2025) for details of the methods.
}
\section{Parallel Computation}{


The \code{seg_multi()} function supports parallel computing. It does
so through the \href{https://cran.r-project.org/package=future}{future}
package.

You first specify the evaluation plan with \code{\link[future]{plan}()}
from the \code{future} package. On a local machine, this is typically
just \code{future::plan(future::multisession, workers = nc)} where
\code{nc} is the number of workers you want. You can find the maximum
number of possible workers with \code{\link[future]{availableCores}()}.
You then run \code{seg_multi()}, then shut down the workers with
\code{future::plan(future::sequential)}. The pseudo code is
\preformatted{
  future::plan(future::multisession, workers = nc)
  seg_multi()
  future::plan(future::sequential)
}
}

\section{Null Model}{

The gamete frequencies under the null model can be calculated via
\code{\link{gamfreq}()}. The genotype frequencies, which are just
a discrete linear convolution (\code{\link[stats]{convolve}()}) of the
gamete frequencies, can be calculated via \code{\link{gf_freq}()}.

The null model's gamete frequencies for true autopolyploids
(\code{model = "auto"}) or
true allopolyploids (\code{model = "allo"}) are given in the \code{\link{seg}} data frame
that comes with this package. I only made that data frame go up to
ploidy 20, but let me know if you need it for higher ploidies.

The polyRAD folks test for full autopolyploid and full allopolyploid, so I
included that as an option (\code{model = "auto_allo"}).

We can account for arbitrary levels of double reduction in autopolyploids
(\code{model = "auto_dr"}) using the gamete frequencies from
Huang et al (2019).

The null model for segmental allopolyploids (\code{model = "allo_pp"}) is the mixture model of
the possible allopolyploid gamete frequencies. The autopolyploid model
(without double reduction) is a subset of this mixture model.

In the above mixture model, we can account for double reduction for simplex
loci (\code{model = "seg"}) by just slightly reducing the
number of simplex gametes and increasing the number of duplex and
nullplex gametes. That is, the frequencies for (nullplex, simplex, duplex)
gametes go from \code{(0.5, 0.5, 0)} to
\code{(0.5 + b, 0.5 - 2 * b, b)}.

\code{model = "seg"} is the most general, so it is the default. But you
should use other models if you have more information on your species. E.g.
if you know you have an autopolyploid, use either \code{model = "auto"}
or \code{model = "auto_dr"}.
}

\section{Unidentified Parameters}{

Do NOT interpret the estimated parameters in the \code{null$gam} list.
These parameters are weakly identified (I had to do some fancy
spectral methods to account for this in the null distribution
of the tests). Even though they are technically identified, you would
need a massive data set to be able to estimate them accurately.
}

\examples{
\donttest{
## Assuming genotypes are known (typically a bad idea)
glist <- multidog_to_g(
  mout = ufit,
  ploidy = 4,
  type = "all_g",
  p1 = "indigocrisp",
  p2 = "sweetcrisp")
p1_1 <- glist$p1
p2_1 <- glist$p2
g_1 <- glist$g
s1 <- seg_multi(
  g = g_1,
  p1_ploidy = 4,
  p2_ploidy = 4,
  p1 = p1_1,
  p2 = p2_1)
s1[, c("snp", "p_value")]

## Put NULL if you have absolutely no information on the parents
s2 <- seg_multi(g = g_1, p1_ploidy = 4, p2_ploidy = 4, p1 = NULL, p2 = NULL)
s2[, c("snp", "p_value")]

## Using genotype likelihoods (typically a good idea)
## Also demonstrate parallelization through future package.
glist <- multidog_to_g(
  mout = ufit,
  ploidy = 4,
  type = "all_gl",
  p1 = "indigocrisp",
  p2 = "sweetcrisp")
p1_2 <- glist$p1
p2_2 <- glist$p2
g_2 <- glist$g

# future::plan(future::multisession, workers = 2)
# s3 <- seg_multi(
#   g = g_2,
#   p1_ploidy = 4,
#   p2_ploidy = 4,
#   p1 = p1_2,
#   p2 = p2_2,
#   ret_out = TRUE)
# future::plan(future::sequential)
# s3[, c("snp", "p_value")]

## Outlier probabilities are returned if `ret_out = TRUE`
# graphics::plot(s3$outprob[[6]], ylim = c(0, 1))
}

}
\references{
\itemize{
\item{Gerard, D, Ambrosano, GB, Pereira, GdS, & Garcia, AAF (2025). Tests for segregation distortion in higher ploidy F1 populations. \emph{bioRxiv}, p. 1-20. \doi{10.1101/2025.06.23.661114}}
}
}
\seealso{
\itemize{
\item \code{\link[=seg_lrt]{seg_lrt()}} Single locus LRT for segregation distortion.
\item \code{\link[=gamfreq]{gamfreq()}} Gamete frequencies under various models of meiosis
\item \code{\link[=gf_freq]{gf_freq()}} F1 genotype frequencies under various models of meiosis.
\item \code{\link[=multidog_to_g]{multidog_to_g()}} Converts the output of \code{updog::multidog()} into something that you can input into \code{seg_multi()}.
}
}
\author{
David Gerard
}
