% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compute_intermediate_results.r
\name{compute_intermediate_results}
\alias{compute_intermediate_results}
\alias{compute_intermediate_results_dplyr}
\title{Compute intermediate set retrieval results per group}
\usage{
compute_intermediate_results(
  gold_vs_pred,
  grouping_var,
  propensity_scored = FALSE,
  cost_fp = NULL,
  drop_empty_groups = options::opt("drop_empty_groups"),
  check_group_names = options::opt("check_group_names")
)

compute_intermediate_results_dplyr(
  gold_vs_pred,
  grouping_var,
  propensity_scored = FALSE,
  cost_fp = NULL
)
}
\arguments{
\item{gold_vs_pred}{A data.frame with logical columns \code{"suggested",
"gold"} as produced by \code{create_comparison}.}

\item{grouping_var}{A character vector of grouping variables that must be
present in \code{gold_vs_pred} (dplyr version requires rlang symbols).}

\item{propensity_scored}{Logical, whether to use propensity scores as
weights.}

\item{cost_fp}{A numeric value > 0, defaults to NULL.}

\item{drop_empty_groups}{Should empty levels of factor variables be dropped in grouped set retrieval
computation? (Defaults to \code{TRUE}, overwritable using option 'casimir.drop_empty_groups' or environment variable 'R_CASIMIR_DROP_EMPTY_GROUPS')}

\item{check_group_names}{Perform replacement of dots in grouping columns. Disable for faster
computation if you can make sure that all columns used for grouping
("doc_id", "label_id", "doc_groups", "label_groups") do not contain
dots. (Defaults to \code{TRUE}, overwritable using option 'casimir.check_group_names' or environment variable 'R_CASIMIR_CHECK_GROUP_NAMES')}
}
\value{
A list of two elements:
\itemize{
\item \code{results_table} A data.frame with columns \code{"n_gold",
      "n_suggested", "tp", "fp", "fn", "prec", "rprec", "rec", "f1"}.
\item \code{grouping_var} The input vector \code{grouping_var}.
}
}
\description{
Compute intermediate set retrieval results per group such as number of gold
standard and predicted labels, number of true positives, false positives and
false negatives, precision, R-precision, recall and F1 score.
}
\section{Functions}{
\itemize{
\item \code{compute_intermediate_results_dplyr()}: Variant with dplyr based
internals rather than collapse internals.

}}
\examples{

library(casimir)

gold <- tibble::tribble(
  ~doc_id, ~label_id,
  "A", "a",
  "A", "b",
  "A", "c",
  "B", "a",
  "B", "d",
  "C", "a",
  "C", "b",
  "C", "d",
  "C", "f"
)

pred <- tibble::tribble(
  ~doc_id, ~label_id,
  "A", "a",
  "A", "d",
  "A", "f",
  "B", "a",
  "B", "e",
  "C", "f"
)

gold_vs_pred <- create_comparison(gold, pred)

compute_intermediate_results(gold_vs_pred, "doc_id")

}
