% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/map.r
\name{map}
\alias{map}
\alias{map.disk.frame}
\alias{map_dfr}
\alias{map_dfr.default}
\alias{map_dfr.disk.frame}
\alias{imap}
\alias{imap.default}
\alias{imap.disk.frame}
\alias{imap_dfr.disk.frame}
\alias{imap_dfr}
\alias{imap_dfr.default}
\alias{lazy}
\alias{lazy.disk.frame}
\alias{delayed}
\alias{delayed.disk.frame}
\alias{chunk_lapply}
\title{Apply the same function to all chunks}
\usage{
map(.x, .f, ...)

\method{map}{disk.frame}(.x, .f, ..., outdir = NULL, keep = NULL,
  chunks = nchunks(.x), compress = 50, lazy = TRUE,
  overwrite = FALSE, vars_and_pkgs = future::getGlobalsAndPackages(.f,
  envir = parent.frame()), .progress = TRUE)

map_dfr(.x, .f, ..., .id = NULL)

\method{map_dfr}{default}(.x, .f, ..., .id = NULL)

\method{map_dfr}{disk.frame}(.x, .f, ..., .id = NULL, use.names = fill,
  fill = FALSE, idcol = NULL)

imap(.x, .f, ...)

\method{imap}{default}(.x, .f, ...)

\method{imap}{disk.frame}(.x, .f, outdir = NULL, keep = NULL,
  chunks = nchunks(.x), compress = 50, lazy = TRUE,
  overwrite = FALSE, ...)

\method{imap_dfr}{disk.frame}(.x, .f, ..., .id = NULL,
  use.names = fill, fill = FALSE, idcol = NULL)

imap_dfr(.x, .f, ..., .id = NULL)

\method{imap_dfr}{default}(.x, .f, ..., .id = NULL)

lazy(.x, .f, ...)

\method{lazy}{disk.frame}(.x, .f, ...)

delayed(.x, .f, ...)

\method{delayed}{disk.frame}(.x, .f, ...)

chunk_lapply(...)
}
\arguments{
\item{.x}{a disk.frame}

\item{.f}{a function to apply to each of the chunks}

\item{...}{for compatibility with `purrr::map`}

\item{outdir}{the output directory}

\item{keep}{the columns to keep from the input}

\item{chunks}{The number of chunks to output}

\item{compress}{0-100 fst compression ratio}

\item{lazy}{if TRUE then do this lazily}

\item{overwrite}{if TRUE removes any existing chunks in the data}

\item{vars_and_pkgs}{variables and packages to send to a background session. This is typically automatically detected}

\item{.progress}{A logical, for whether or not to print a progress bar for multiprocess, multisession, and multicore plans. From {furrr}}

\item{.id}{not used}

\item{use.names}{for map_dfr's call to data.table::rbindlist. See data.table::rbindlist}

\item{fill}{for map_dfr's call to data.table::rbindlist. See data.table::rbindlist}

\item{idcol}{for map_dfr's call to data.table::rbindlist. See data.table::rbindlist}
}
\description{
Apply the same function to all chunks

`imap.disk.frame` accepts a two argument function where the first argument is a data.frame and the 
second is the chunk ID

`lazy` is convenience function to apply `.f` to every chunk

`delayed` is an alias for lazy and is consistent with the naming in Dask and Dagger.jl
}
\examples{
cars.df = as.disk.frame(cars)

# return the first row of each chunk lazily
# 
cars2 = map(cars.df, function(chunk) {
 chunk[,1]
})

collect(cars2)

# same as above but using purrr 
cars2 = map(cars.df, ~.x[1,])

collect(cars2)

# return the first row of each chunk eagerly as list
map(cars.df, ~.x[1,], lazy = FALSE)

# return the first row of each chunk eagerly as data.table/data.frame by row-binding
map_dfr(cars.df, ~.x[1,])

# lazy and delayed are just an aliases for map(..., lazy = TRUE)
collect(lazy(cars.df, ~.x[1,]))
collect(delayed(cars.df, ~.x[1,]))

# clean up cars.df
delete(cars.df)
cars.df = as.disk.frame(cars)

# .x is the chunk and .y is the ID as an integer

# lazy = TRUE support is not available at the moment
imap(cars.df, ~.x[, id := .y], lazy = FALSE)

imap_dfr(cars.df, ~.x[, id := .y])

# clean up cars.df
delete(cars.df)
}
