% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sum_lump.R
\name{sum_lump}
\alias{sum_lump}
\title{Sum and lump together small flows into a "other" category}
\usage{
sum_lump(
  m,
  threshold = 1,
  lump = "flow",
  other_level = "other",
  complete = FALSE,
  fill = 0,
  return_matrix = TRUE,
  orig = "orig",
  dest = "dest",
  flow = "flow"
)
}
\arguments{
\item{m}{A \code{matrix} or data frame of origin-destination flows. For \code{matrix} the first and second dimensions correspond to origin and destination respectively. For a data frame ensure the correct column names are passed to \code{orig}, \code{dest} and \code{flow}.}

\item{threshold}{Numeric value used to determine small flows, origins or destinations that will be grouped (lumped) together.}

\item{lump}{Character string to indicate where to apply the threshold. Choose from the \code{flow} values, \code{in} migration region and/or \code{out} migration region.}

\item{other_level}{Character string for the origin and/or destination label for the lumped values below the \code{threshold}. Default \code{"other"}.}

\item{complete}{Logical value to return a \code{tibble} with complete the origin-destination combinations}

\item{fill}{Numeric value for to fill small cells below the \code{threshold} when \code{complete = TRUE}. Default of zero.}

\item{return_matrix}{Logical to return a matrix. Default \code{FALSE}.}

\item{orig}{Character string of the origin column name (when \code{m} is a data frame rather than a \code{matrix})}

\item{dest}{Character string of the destination column name (when \code{m} is a data frame rather than a \code{matrix})}

\item{flow}{Character string of the flow column name (when \code{m} is a data frame rather than a \code{matrix})}
}
\value{
A \code{tibble} with an additional \code{other} origins and/or destinations region based on the grouping together of small values below the \code{threshold} argument and the \code{lump} argument to indicate on where to apply the threshold.
}
\description{
Lump together regions/countries if their flows are below a given threshold.
}
\details{
The \code{lump} argument can take values \code{flow} or \code{bilat} to apply the threshold to the data values for between region migration, \code{in} or \code{imm} to apply the threshold to the incoming region region and \code{out} or \code{emi} to apply the threshold to outgoing region region.
}
\examples{
r <- LETTERS[1:4]
m <- matrix(data = c(0, 100, 30, 10, 50, 0, 50, 5, 10, 40, 0, 40, 20, 25, 20, 0),
            nrow = 4, ncol = 4, dimnames = list(orig = r, dest = r), byrow = TRUE)
m

# threshold on in and out region
sum_lump(m, threshold = 100, lump = c("in", "out"))

# threshold on flows (default)
sum_lump(m, threshold = 40)

# return a matrix (only possible when input is a matrix and
# complete = TRUE) with small values replaced by zeros
sum_lump(m, threshold = 50, complete = TRUE)

# return a data frame with small values replaced with zero
sum_lump(m, threshold = 80, complete = TRUE, return_matrix = FALSE)

\dontrun{
# data frame (tidy) format
library(tidyverse)

# download Abel and Cohen (2019) estimates
f <- read_csv("https://ndownloader.figshare.com/files/38016762", show_types = FALSE)
f

# large 1990-1995 flow estimates
f \%>\%
  filter(year0 == 1990) \%>\%
  sum_lump(flow = "da_pb_closed", threshold = 1e5)

# large flow estimates for each year
f \%>\%
  group_by(year0) \%>\%
  sum_lump(flow = "da_pb_closed", threshold = 1e5)
}
}
