% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kmm.R
\name{kmm}
\alias{kmm}
\title{Kernel mean matching approach to density ratio estimation}
\usage{
kmm(
  df_numerator,
  df_denominator,
  scale = "numerator",
  constrained = FALSE,
  nsigma = 10,
  sigma_quantile = NULL,
  sigma = NULL,
  ncenters = 200,
  centers = NULL,
  cv = TRUE,
  nfold = 5,
  parallel = FALSE,
  nthreads = NULL,
  progressbar = TRUE,
  osqp_settings = NULL,
  cluster = NULL
)
}
\arguments{
\item{df_numerator}{\code{data.frame} with exclusively numeric variables with
the numerator samples}

\item{df_denominator}{\code{data.frame} with exclusively numeric variables
with the denominator samples (must have the same variables as
\code{df_denominator})}

\item{scale}{\code{"numerator"}, \code{"denominator"}, or \code{NULL},
indicating whether to standardize each numeric variable according to the
numerator means and standard deviations, the denominator means and standard
deviations, or apply no standardization at all.}

\item{constrained}{\code{logical} equals \code{FALSE} to use unconstrained
optimization, \code{TRUE} to use constrained optimization. Defaults to
\code{FALSE}.}

\item{nsigma}{Integer indicating the number of sigma values (bandwidth
parameter of the Gaussian kernel gram matrix) to use in cross-validation.}

\item{sigma_quantile}{\code{NULL} or numeric vector with probabilities to
calculate the quantiles of the distance matrix to obtain sigma values. If
\code{NULL}, \code{nsigma} values between \code{0.25} and \code{0.75} are
used.}

\item{sigma}{\code{NULL} or a scalar value to determine the bandwidth of the
Gaussian kernel gram matrix. If \code{NULL}, \code{nsigma} values between
\code{0.25} and \code{0.75} are used.}

\item{ncenters}{Maximum number of Gaussian centers in the kernel gram
matrix. Defaults to all numerator samples.}

\item{centers}{Option to specify the Gaussian samples manually.}

\item{cv}{Logical indicating whether or not to do cross-validation}

\item{nfold}{Number of cross-validation folds used in order to calculate the
optimal \code{sigma} value (default is 5-fold cv).}

\item{parallel}{logical indicating whether to use parallel processing in the
cross-validation scheme.}

\item{nthreads}{\code{NULL} or integer indicating the number of threads to
use for parallel processing. If parallel processing is enabled, it defaults
to the number of available threads minus one.}

\item{progressbar}{Logical indicating whether or not to display a progressbar.}

\item{osqp_settings}{Optional: settings to pass to the \code{osqp} solver for
constrained optimization.}

\item{cluster}{Optional: a cluster object to use for parallel processing,
see \code{parallel::makeCluster}.}
}
\value{
\code{kmm}-object, containing all information to calculate the
density ratio using optimal sigma and optimal weights.
}
\description{
Kernel mean matching approach to density ratio estimation
}
\examples{
set.seed(123)
# Fit model
dr <- kmm(numerator_small, denominator_small)
# Inspect model object
dr
# Obtain summary of model object
summary(dr)
# Plot model object
plot(dr)
# Plot density ratio for each variable individually
plot_univariate(dr)
# Plot density ratio for each pair of variables
plot_bivariate(dr)
# Predict density ratio and inspect first 6 predictions
head(predict(dr))
# Fit model with custom parameters
kmm(numerator_small, denominator_small,
    nsigma = 5, ncenters = 100, nfold = 10,
    constrained = TRUE)

}
\references{
Huang, J., Smola, A. J., Gretton, A., Borgwardt, K. M., &
Schölkopf, B. (2006). Correcting sample selection bias by unlabeled data.
In \if{html}{\out{<i>}}Advances in Neural Information Processing Systems\if{html}{\out{</i>}}, edited by B.
Schölkopf, J. Platt and T. Hoffman. Available from
\url{https://proceedings.neurips.cc/paper/2006/hash/a2186aa7c086b46ad4e8bf81e2a3a19b-Abstract.html}.
}
