% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tier_match.R
\name{tier_match}
\alias{tier_match}
\title{Perform an iterative match by tier}
\usage{
tier_match(
  data1,
  data2,
  by = NULL,
  by.x = NULL,
  by.y = NULL,
  suffixes = c("_1", "_2"),
  check_merge = TRUE,
  unique_key_1,
  unique_key_2,
  tiers = list(),
  takeout = "both",
  match_type = "exact",
  clean = FALSE,
  clean_settings = build_clean_settings(),
  score_settings = NULL,
  filter = NULL,
  filter.args = list(),
  evaluate = match_evaluate,
  evaluate.args = list(),
  allow.cartesian = TRUE,
  fuzzy_settings = build_fuzzy_settings(),
  multivar_settings = build_multivar_settings(),
  verbose = FALSE
)
}
\arguments{
\item{data1}{data.frame. First to-merge dataset.}

\item{data2}{data.frame. Second to-merge dataset.}

\item{by}{character string. Variables to merge on (common across data 1 and data 2). See \code{merge}}

\item{by.x}{character string. Variable to merge on in data1. See \code{merge}}

\item{by.y}{character string. Variable to merge on in data2. See \code{merge}}

\item{suffixes}{see \code{merge}}

\item{check_merge}{logical. Checks that your unique_keys are indeed unique, and prevents merge from running if merge would result in data.frames larger than 5 million rows}

\item{unique_key_1}{character vector. Primary key of data1 that uniquely identifies each row (can be multiple fields)}

\item{unique_key_2}{character vector. Primary key of data2 that uniquely identifies each row (can be multiple fields)}

\item{tiers}{list(). tier is a list of lists, where each list holds the parameters for creating that tier. All arguments to tier_match listed after this argument can either be supplied directly to tier_match, or indirectly via tiers.}

\item{takeout}{character vector, either 'data1', 'data2', 'both', or 'neither'. Removes observations after each tier from the selected dataset.}

\item{match_type}{string. If 'exact', match is exact, if 'fuzzy', match is fuzzy.}

\item{clean}{Boolean, T/F, whether or not to clean strings prior to the match.}

\item{clean_settings}{list. Settings for string cleaning. See \code{clean_strings} and \code{build_clean_settings}.}

\item{score_settings}{list. Settings for post-hoc matchscoring. See \code{build_score_settings}.}

\item{filter}{function or numeric. Filters a merged data1-data2 dataset. If a function, should take in
a data.frame (data1 and data2 merged by name1 and name2) and spit out a trimmed verion
of the data.frame (fewer rows). Think of this function as applying other conditions
to matches, other than a match by name. The first argument of filter should be the data.frame.
If numeric, will drop all observations with a matchscore lower than or equal to filter.}

\item{filter.args}{list. Arguments passed to filter, if a function}

\item{evaluate}{Function to evalute merge_plus output. see \code{evaluate_match}.}

\item{evaluate.args}{list. Arguments passed to function specified by evaluate}

\item{allow.cartesian}{whether or not to allow many-many matches, see data.table::merge()}

\item{fuzzy_settings}{additional arguments for amatch, to be used if match_type = 'fuzzy'. Suggested defaults provided. (see amatch, method='jw')}

\item{multivar_settings}{list of settings to go to the multivar match if match_type
== 'multivar'. See \code{multivar-match}.}

\item{verbose}{boolean, whether or not to print tier names and time to match each tier as the matching happens.}
}
\value{
list with matches, data1 and data2 minus matches, and match evaluation
}
\description{
Constructs a tier_match by running \code{merge_plus} with different parameters sequentially
on the same data. Allows for sequential removal of observations after each tier.
}
\details{
See the tier match vignette to get a clear understanding of the tier_match syntax.
}
\seealso{
merge_plus clean_strings
}
