% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/explanatory_performance.R
\name{Incremental}
\alias{Incremental}
\title{Incremental prediction performance in regression}
\usage{
Incremental(
  xdata,
  ydata,
  new_xdata = NULL,
  new_ydata = NULL,
  stability = NULL,
  family = NULL,
  implementation = NULL,
  prediction = NULL,
  resampling = "subsampling",
  n_predictors = NULL,
  K = 100,
  tau = 0.8,
  seed = 1,
  n_thr = NULL,
  time = 1000,
  verbose = TRUE,
  ...
)
}
\arguments{
\item{xdata}{matrix of predictors with observations as rows and variables as
columns.}

\item{ydata}{optional vector or matrix of outcome(s). If \code{family} is set
to \code{"binomial"} or \code{"multinomial"}, \code{ydata} can be a vector
with character/numeric values or a factor.}

\item{new_xdata}{optional test set (predictor data).}

\item{new_ydata}{optional test set (outcome data).}

\item{stability}{output of \code{\link{VariableSelection}}. If
\code{stability=NULL} (the default), a model including all variables in
\code{xdata} as predictors is fitted. Argument \code{family} must be
provided in this case.}

\item{family}{type of regression model. Possible values include
\code{"gaussian"} (linear regression), \code{"binomial"} (logistic
regression), and \code{"cox"} (survival analysis). If provided, this
argument must be consistent with input \code{stability}.}

\item{implementation}{optional function to refit the model. If
\code{implementation=NULL} and \code{stability} is the output of
\code{\link{VariableSelection}}, \code{\link[stats]{lm}} (linear
regression), \code{\link[survival]{coxph}} (Cox regression),
\code{\link[stats]{glm}} (logistic regression), or
\code{\link[nnet]{multinom}} (multinomial regression) is used.}

\item{prediction}{optional function to compute predicted values from the
model refitted with \code{implementation}.}

\item{resampling}{resampling approach to create the training set. The default
is \code{"subsampling"} for sampling without replacement of a proportion
\code{tau} of the observations. Alternatively, this argument can be a
function to use for resampling. This function must use arguments named
\code{data} and \code{tau} and return the IDs of observations to be
included in the resampled dataset.}

\item{n_predictors}{number of predictors to consider.}

\item{K}{number of training-test splits. Only used if \code{new_xdata} and
\code{new_ydata} are not provided.}

\item{tau}{proportion of observations used in the training set. Only used if
\code{new_xdata} and \code{new_ydata} are not provided.}

\item{seed}{value of the seed to ensure reproducibility of the results. Only
used if \code{new_xdata} and \code{new_ydata} are not provided.}

\item{n_thr}{number of thresholds to use to construct the ROC curve. If
\code{n_thr=NULL}, all predicted probability values are iteratively used as
thresholds. For faster computations on large data, less thresholds can be
used. Only applicable to logistic regression.}

\item{time}{numeric indicating the time for which the survival probabilities
are computed. Only applicable to Cox regression.}

\item{verbose}{logical indicating if a loading bar and messages should be
printed.}

\item{...}{additional parameters passed to the function provided in
\code{resampling}.}
}
\value{
An object of class \code{incremental}.

  For logistic regression, a list with: \item{FPR}{A list with, for each of
  the models (sequentially added predictors), the False Positive Rates for
  different thresholds (columns) and different data splits (rows).}
  \item{TPR}{A list with, for each of the models (sequentially added
  predictors), the True Positive Rates for different thresholds (columns) and
  different data splits (rows).} \item{AUC}{A list with, for each of the
  models (sequentially added predictors), a vector of Area Under the Curve
  (AUC) values obtained with different data splits.} \item{Beta}{Estimated
  regression coefficients from visited models.} \item{names}{Names of the
  predictors by order of inclusion.} \item{stable}{Binary vector indicating
  which predictors are stably selected. Only returned if \code{stability} is
  provided.}

  For Cox regression, a list with: \item{concordance}{A list with, for each
  of the models (sequentially added predictors), a vector of concordance
  indices obtained with different data splits.} \item{Beta}{Estimated
  regression coefficients from visited models.} \item{names}{Names of the
  predictors by order of inclusion.} \item{stable}{Binary vector indicating
  which predictors are stably selected. Only returned if \code{stability} is
  provided.}

  For linear regression, a list with: \item{Q_squared}{A list with, for each
  of the models (sequentially added predictors), a vector of Q-squared
  obtained with different data splits.} \item{Beta}{Estimated regression
  coefficients from visited models.} \item{names}{Names of the predictors by
  order of inclusion.} \item{stable}{Binary vector indicating which
  predictors are stably selected. Only returned if \code{stability} is
  provided.}
}
\description{
Computes the prediction performance of regression models where predictors are
sequentially added by order of decreasing selection proportion. This function
can be used to evaluate the marginal contribution of each of the selected
predictors over and above more stable predictors. Performances are evaluated
as in \code{\link{ExplanatoryPerformance}}.
}
\examples{
\donttest{
# Data simulation
set.seed(1)
simul <- SimulateRegression(
  n = 1000, pk = 20,
  family = "binomial", ev_xy = 0.8
)

# Data split: selection, training and test set
ids <- Split(
  data = simul$ydata,
  family = "binomial",
  tau = c(0.4, 0.3, 0.3)
)
xselect <- simul$xdata[ids[[1]], ]
yselect <- simul$ydata[ids[[1]], ]
xtrain <- simul$xdata[ids[[2]], ]
ytrain <- simul$ydata[ids[[2]], ]
xtest <- simul$xdata[ids[[3]], ]
ytest <- simul$ydata[ids[[3]], ]

# Stability selection
stab <- VariableSelection(
  xdata = xselect,
  ydata = yselect,
  family = "binomial"
)

# Performances in test set of model refitted in training set
incr <- Incremental(
  xdata = xtrain, ydata = ytrain,
  new_xdata = xtest, new_ydata = ytest,
  stability = stab, n_predictors = 10
)
plot(incr)

# Alternative with multiple training/test splits
incr <- Incremental(
  xdata = rbind(xtrain, xtest),
  ydata = c(ytrain, ytest),
  stability = stab, K = 10, n_predictors = 10
)
plot(incr)
}

}
\seealso{
\code{\link{VariableSelection}}, \code{\link{Refit}}

Other prediction performance functions: 
\code{\link{ExplanatoryPerformance}()}
}
\concept{prediction performance functions}
