% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PlotKaplanMeier.R
\name{plot_kaplan_meier}
\alias{plot_kaplan_meier}
\alias{plot_kaplan_meier,ANY-method}
\alias{plot_kaplan_meier,familiarCollection-method}
\title{Plot Kaplan-Meier survival curves.}
\usage{
plot_kaplan_meier(
  object,
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  color_by = NULL,
  linetype_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  combine_legend = TRUE,
  ggtheme = NULL,
  discrete_palette = NULL,
  x_label = "time",
  x_label_shared = "column",
  y_label = "survival probability",
  y_label_shared = "row",
  legend_label = waiver(),
  plot_title = waiver(),
  plot_sub_title = waiver(),
  caption = NULL,
  x_range = NULL,
  x_n_breaks = 5,
  x_breaks = NULL,
  y_range = c(0, 1),
  y_n_breaks = 5,
  y_breaks = NULL,
  confidence_level = NULL,
  conf_int_style = c("ribbon", "step", "none"),
  conf_int_alpha = 0.4,
  censoring = TRUE,
  censor_shape = "plus",
  show_logrank = TRUE,
  show_survival_table = TRUE,
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  ...
)

\S4method{plot_kaplan_meier}{ANY}(
  object,
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  color_by = NULL,
  linetype_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  combine_legend = TRUE,
  ggtheme = NULL,
  discrete_palette = NULL,
  x_label = "time",
  x_label_shared = "column",
  y_label = "survival probability",
  y_label_shared = "row",
  legend_label = waiver(),
  plot_title = waiver(),
  plot_sub_title = waiver(),
  caption = NULL,
  x_range = NULL,
  x_n_breaks = 5,
  x_breaks = NULL,
  y_range = c(0, 1),
  y_n_breaks = 5,
  y_breaks = NULL,
  confidence_level = NULL,
  conf_int_style = c("ribbon", "step", "none"),
  conf_int_alpha = 0.4,
  censoring = TRUE,
  censor_shape = "plus",
  show_logrank = TRUE,
  show_survival_table = TRUE,
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  ...
)

\S4method{plot_kaplan_meier}{familiarCollection}(
  object,
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  color_by = NULL,
  linetype_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  combine_legend = TRUE,
  ggtheme = NULL,
  discrete_palette = NULL,
  x_label = "time",
  x_label_shared = "column",
  y_label = "survival probability",
  y_label_shared = "row",
  legend_label = waiver(),
  plot_title = waiver(),
  plot_sub_title = waiver(),
  caption = NULL,
  x_range = NULL,
  x_n_breaks = 5,
  x_breaks = NULL,
  y_range = c(0, 1),
  y_n_breaks = 5,
  y_breaks = NULL,
  confidence_level = NULL,
  conf_int_style = c("ribbon", "step", "none"),
  conf_int_alpha = 0.4,
  censoring = TRUE,
  censor_shape = "plus",
  show_logrank = TRUE,
  show_survival_table = TRUE,
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  ...
)
}
\arguments{
\item{object}{\code{familiarCollection} object, or one or more \code{familiarData}
objects, that will be internally converted to a \code{familiarCollection} object.
It is also possible to provide a \code{familiarEnsemble} or one or more
\code{familiarModel} objects together with the data from which data is computed
prior to export. Paths to such files can also be provided.}

\item{draw}{(\emph{optional}) Draws the plot if TRUE.}

\item{dir_path}{(\emph{optional}) Path to the directory where created figures are
saved to. Output is saved in the \code{stratification} subdirectory. If \code{NULL} no
figures are saved, but are returned instead.}

\item{split_by}{(\emph{optional}) Splitting variables. This refers to column names
on which datasets are split. A separate figure is created for each split.
See details for available variables.}

\item{color_by}{(\emph{optional}) Variables used to determine fill colour of plot
objects. The variables cannot overlap with those provided to the \code{split_by}
argument, but may overlap with other arguments. See details for available
variables.}

\item{linetype_by}{(\emph{optional}) Variables that are used to determine the
linetype of lines in a plot. The variables cannot overlap with those
provided to the \code{split_by} argument, but may overlap with other arguments.
Sett details for available variables.}

\item{facet_by}{(\emph{optional}) Variables used to determine how and if facets of
each figure appear. In case the \code{facet_wrap_cols} argument is \code{NULL}, the
first variable is used to define columns, and the remaing variables are
used to define rows of facets. The variables cannot overlap with those
provided to the \code{split_by} argument, but may overlap with other arguments.
See details for available variables.}

\item{facet_wrap_cols}{(\emph{optional}) Number of columns to generate when facet
wrapping. If NULL, a facet grid is produced instead.}

\item{combine_legend}{(\emph{optional}) Flag to indicate whether the same legend
is to be shared by multiple aesthetics, such as those specified by
\code{color_by} and \code{linetype_by} arguments.}

\item{ggtheme}{(\emph{optional}) \code{ggplot} theme to use for plotting.}

\item{discrete_palette}{(\emph{optional}) Palette to use to color the different
risk strata in case a non-singular variable was provided to the \code{color_by}
argument.}

\item{x_label}{(\emph{optional}) Label to provide to the x-axis. If NULL, no label
is shown.}

\item{x_label_shared}{(\emph{optional}) Sharing of x-axis labels between facets.
One of three values:
\itemize{
\item \code{overall}: A single label is placed at the bottom of the figure. Tick
text (but not the ticks themselves) is removed for all but the bottom facet
plot(s).
\item \code{column}: A label is placed at the bottom of each column. Tick text (but
not the ticks themselves) is removed for all but the bottom facet plot(s).
\item \code{individual}: A label is placed below each facet plot. Tick text is kept.
}}

\item{y_label}{(\emph{optional}) Label to provide to the y-axis. If NULL, no label
is shown.}

\item{y_label_shared}{(\emph{optional}) Sharing of y-axis labels between facets.
One of three values:
\itemize{
\item \code{overall}: A single label is placed to the left of the figure. Tick text
(but not the ticks themselves) is removed for all but the left-most facet
plot(s).
\item \code{row}: A label is placed to the left of each row. Tick text (but not the
ticks themselves) is removed for all but the left-most facet plot(s).
\item \code{individual}: A label is placed below each facet plot. Tick text is kept.
}}

\item{legend_label}{(\emph{optional}) Label to provide to the legend. If NULL, the
legend will not have a name.}

\item{plot_title}{(\emph{optional}) Label to provide as figure title. If NULL, no
title is shown.}

\item{plot_sub_title}{(\emph{optional}) Label to provide as figure subtitle. If
NULL, no subtitle is shown.}

\item{caption}{(\emph{optional}) Label to provide as figure caption. If NULL, no
caption is shown.}

\item{x_range}{(\emph{optional}) Value range for the x-axis.}

\item{x_n_breaks}{(\emph{optional}) Number of breaks to show on the x-axis of the
plot. \code{x_n_breaks} is used to determine the \code{x_breaks} argument in case it
is unset.}

\item{x_breaks}{(\emph{optional}) Break points on the x-axis of the plot.}

\item{y_range}{(\emph{optional}) Value range for the y-axis.}

\item{y_n_breaks}{(\emph{optional}) Number of breaks to show on the y-axis of the
plot. \code{y_n_breaks} is used to determine the \code{y_breaks} argument in case it
is unset.}

\item{y_breaks}{(\emph{optional}) Break points on the y-axis of the plot.}

\item{confidence_level}{(\emph{optional}) Confidence level for the strata in the
plot.}

\item{conf_int_style}{(\emph{optional}) Confidence interval style. See details for
allowed styles.}

\item{conf_int_alpha}{(\emph{optional}) Alpha value to determine transparency of
confidence intervals or, alternatively, other plot elements with which the
confidence interval overlaps. Only values between 0.0 (fully transparent)
and 1.0 (fully opaque) are allowed.}

\item{censoring}{(\emph{optional}) Flag to indicate whether censored samples should
be indicated on the survival curve.}

\item{censor_shape}{(\emph{optional}) Shape used to indicate censored samples on
the survival curve. Available shapes are documented in the \code{ggplot2}
vignette \emph{Aesthetic specifications}. By default a plus shape is used.}

\item{show_logrank}{(\emph{optional}) Specifies whether the results of a logrank
test to assess differences between the risk strata is annotated in the plot.
A log-rank test can only be shown when \code{color_by} and \code{linestyle_by} are
either unset, or only contain \code{risk_group}.}

\item{show_survival_table}{(\emph{optional}) Specifies whether a survival table is
shown below the Kaplan-Meier survival curves. Survival in the risk strata is
assessed for each of the breaks in \code{x_breaks}.}

\item{width}{(\emph{optional}) Width of the plot. A default value is derived from
the number of facets.}

\item{height}{(\emph{optional}) Height of the plot. A default value is derived from
number of facets and the inclusion of survival tables.}

\item{units}{(\emph{optional}) Plot size unit. Either \code{cm} (default), \code{mm} or \verb{in}.}

\item{export_collection}{(\emph{optional}) Exports the collection if TRUE.}

\item{...}{
  Arguments passed on to \code{\link[=as_familiar_collection]{as_familiar_collection}}, \code{\link[ggplot2:ggsave]{ggplot2::ggsave}}, \code{\link[=extract_risk_stratification_data]{extract_risk_stratification_data}}
  \describe{
    \item{\code{familiar_data_names}}{Names of the dataset(s). Only used if the \code{object} parameter
is one or more \code{familiarData} objects.}
    \item{\code{collection_name}}{Name of the collection.}
    \item{\code{filename}}{File name to create on disk.}
    \item{\code{plot}}{Plot to save, defaults to last plot displayed.}
    \item{\code{device}}{Device to use. Can either be a device function
(e.g. \link{png}), or one of "eps", "ps", "tex" (pictex),
"pdf", "jpeg", "tiff", "png", "bmp", "svg" or "wmf" (windows only).}
    \item{\code{path}}{Path of the directory to save plot to: \code{path} and \code{filename}
are combined to create the fully qualified file name. Defaults to the
working directory.}
    \item{\code{scale}}{Multiplicative scaling factor.}
    \item{\code{dpi}}{Plot resolution. Also accepts a string input: "retina" (320),
"print" (300), or "screen" (72). Applies only to raster output types.}
    \item{\code{limitsize}}{When \code{TRUE} (the default), \code{ggsave()} will not
save images larger than 50x50 inches, to prevent the common error of
specifying dimensions in pixels.}
    \item{\code{bg}}{Background colour. If \code{NULL}, uses the \code{plot.background} fill value
from the plot theme.}
    \item{\code{data}}{A \code{dataObject} object, \code{data.table} or \code{data.frame} that
constitutes the data that are assessed.}
    \item{\code{is_pre_processed}}{Flag that indicates whether the data was already
pre-processed externally, e.g. normalised and clustered. Only used if the
\code{data} argument is a \code{data.table} or \code{data.frame}.}
    \item{\code{cl}}{Cluster created using the \code{parallel} package. This cluster is then
used to speed up computation through parallellisation.}
    \item{\code{ensemble_method}}{Method for ensembling predictions from models for the
same sample. Available methods are:
\itemize{
\item \code{median} (default): Use the median of the predicted values as the ensemble
value for a sample.
\item \code{mean}: Use the mean of the predicted values as the ensemble value for a
sample.
}}
    \item{\code{verbose}}{Flag to indicate whether feedback should be provided on the
computation and extraction of various data elements.}
    \item{\code{message_indent}}{Number of indentation steps for messages shown during
computation and extraction of various data elements.}
    \item{\code{detail_level}}{(\emph{optional}) Sets the level at which results are computed
and aggregated.
\itemize{
\item \code{ensemble}: Results are computed at the ensemble level, i.e. over all
models in the ensemble. This means that, for example, bias-corrected
estimates of model performance are assessed by creating (at least) 20
bootstraps and computing the model performance of the ensemble model for
each bootstrap.
\item \code{hybrid} (default): Results are computed at the level of models in an
ensemble. This means that, for example, bias-corrected estimates of model
performance are directly computed using the models in the ensemble. If there
are at least 20 trained models in the ensemble, performance is computed for
each model, in contrast to \code{ensemble} where performance is computed for the
ensemble of models. If there are less than 20 trained models in the
ensemble, bootstraps are created so that at least 20 point estimates can be
made.
\item \code{model}: Results are computed at the model level. This means that, for
example, bias-corrected estimates of model performance are assessed by
creating (at least) 20 bootstraps and computing the performance of the model
for each bootstrap.
}

Note that each level of detail has a different interpretation for bootstrap
confidence intervals. For \code{ensemble} and \code{model} these are the confidence
intervals for the ensemble and an individual model, respectively. That is,
the confidence interval describes the range where an estimate produced by a
respective ensemble or model trained on a repeat of the experiment may be
found with the probability of the confidence level. For \code{hybrid}, it
represents the range where any single model trained on a repeat of the
experiment may be found with the probability of the confidence level. By
definition, confidence intervals obtained using \code{hybrid} are at least as
wide as those for \code{ensemble}. \code{hybrid} offers the correct interpretation if
the goal of the analysis is to assess the result of a single, unspecified,
model.

\code{hybrid} is generally computationally less expensive then \code{ensemble}, which
in turn is somewhat less expensive than \code{model}.

A non-default \code{detail_level} parameter can be specified for separate
evaluation steps by providing a parameter value in a named list with data
elements, e.g. \code{list("auc_data"="ensemble", "model_performance"="hybrid")}.
This parameter can be set for the following data elements: \code{auc_data},
\code{decision_curve_analyis}, \code{model_performance}, \code{permutation_vimp},
\code{ice_data}, \code{prediction_data} and \code{confusion_matrix}.}
  }}
}
\value{
\code{NULL} or list of plot objects, if \code{dir_path} is \code{NULL}.
}
\description{
This function creates Kaplan-Meier survival curves from
stratification data stored in a familiarCollection object.
}
\details{
This function generates a Kaplan-Meier survival plot based on risk
group stratification by the learners.

\code{familiar} does not determine what units the x-axis has or what kind of
survival the y-axis represents. It is therefore recommended to provide
\code{x_label} and \code{y_label} arguments.

Available splitting variables are: \code{fs_method}, \code{learner}, \code{data_set},
\code{risk_group} and \code{stratification_method}. By default, separate figures are
created for each combination of \code{fs_method} and \code{learner}, with faceting by
\code{data_set}, colouring of the strata in each individual plot by \code{risk_group}.

Available palettes for \code{discrete_palette} are those listed by
\code{grDevices::palette.pals()} (requires R >= 4.0.0), \code{grDevices::hcl.pals()}
(requires R >= 3.6.0) and \code{rainbow}, \code{heat.colors}, \code{terrain.colors},
\code{topo.colors} and \code{cm.colors}, which correspond to the palettes of the same
name in \code{grDevices}. If not specified, a default palette based on palettes
in Tableau are used. You may also specify your own palette by using colour
names listed by \code{grDevices::colors()} or through hexadecimal RGB strings.

Greenwood confidence intervals of the Kaplan-Meier curve can be shown using
various styles set by \code{conf_int_style}:
\itemize{
\item \code{ribbon} (default): confidence intervals are shown as a ribbon with an
opacity of \code{conf_int_alpha} around the point estimate of the Kaplan-Meier
curve.
\item \code{step} (default): confidence intervals are shown as a step function around
the point estimate of the Kaplan-Meier curve.
\item \code{none}: confidence intervals are not shown. The point estimate of the ROC
curve is shown as usual.
}

Labelling methods such as \code{set_risk_group_names} or \code{set_data_set_names} can
be applied to the \code{familiarCollection} object to update labels, and order
the output in the figure.
}
