% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compare_datasets.R
\name{compare_datasets}
\alias{compare_datasets}
\title{Compare Two Datasets}
\usage{
compare_datasets(df1, df2, tolerance = 0, vars = NULL, id_vars = NULL)
}
\arguments{
\item{df1}{A data frame (the \emph{base} dataset).}

\item{df2}{A data frame (the \emph{compare} dataset).}

\item{tolerance}{Numeric tolerance value for floating-point comparisons (default 0).
When tolerance > 0, numeric values are considered equal if their absolute
difference is within the tolerance threshold. Character and factor columns
always use exact matching regardless of tolerance.}

\item{vars}{Optional character vector of variable names to compare. When provided, only these columns are included in the observation-level comparison. Structural comparison (extra columns, type mismatches) still covers all columns. Default is NULL (compare all common columns).}

\item{id_vars}{Optional character vector of column names to use as matching
keys. When provided, rows are matched by these key columns instead of by
position. This allows comparison of datasets with different row counts or
different row orders. Rows that exist in only one dataset are reported in
\code{unmatched_rows}. Default is NULL (positional matching).}
}
\value{
A \code{dataset_comparison} list containing:
\item{nrow_df1, ncol_df1}{Dimensions of df1.}
\item{nrow_df2, ncol_df2}{Dimensions of df2.}
\item{common_columns}{Character vector of columns present in both.}
\item{extra_in_df1}{Columns only in df1.}
\item{extra_in_df2}{Columns only in df2.}
\item{type_mismatches}{Data frame of columns whose class differs
(columns: \code{column}, \code{type_df1}, \code{type_df2}), or
\code{NULL} if none.}
\item{missing_values}{Data frame summarising NA counts per column per
dataset (columns: \code{column}, \code{na_df1}, \code{na_df2}), or
\code{NULL} if no missingness.}
\item{variable_comparison}{Output of \code{\link[=compare_variables]{compare_variables()}}.}
\item{observation_comparison}{Output of \code{\link[=compare_observations]{compare_observations()}}, or a
list with a \code{message} element when row counts differ.}
\item{id_vars}{Character vector of key columns used for matching, or
\code{NULL} if positional matching was used.}
\item{unmatched_rows}{List with \code{df1_only} and \code{df2_only}
data frames of rows with no match in the other dataset (key-based
matching only), or \code{NULL}.}
}
\description{
Compares two datasets at three levels in a single call:

\enumerate{
\item \strong{Dataset level} -- dimensions, column overlap, missing-value
totals.
\item \strong{Variable level} -- column name discrepancies and data-type
mismatches (delegates to \code{\link[=compare_variables]{compare_variables()}}).
\item \strong{Observation level} -- row-by-row value differences on common
columns. Uses positional matching by default, or key-based matching when
\code{id_vars} is provided.
}

The return value is a list with class \code{"dataset_comparison"}, which has
a tidy \code{\link[=print]{print()}} method. The same object is accepted by
\code{\link[=generate_summary_report]{generate_summary_report()}}, \code{\link[=generate_detailed_report]{generate_detailed_report()}}, and
\code{\link[=compare_by_group]{compare_by_group()}}.
}
\examples{
\donttest{
# Positional matching (default)
df1 <- data.frame(id = 1:3, val = c(10, 20, 30))
df2 <- data.frame(id = 1:3, val = c(10, 25, 30))
result <- compare_datasets(df1, df2)
result

# Key-based matching (for different row counts or row orders)
df1 <- data.frame(id = c(1, 2, 3), val = c(10, 20, 30))
df2 <- data.frame(id = c(2, 3, 4), val = c(20, 35, 40))
result <- compare_datasets(df1, df2, id_vars = "id")
result
result$unmatched_rows
}
}
