% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read.R
\name{wb_to_df}
\alias{wb_to_df}
\alias{read_xlsx}
\alias{wb_read}
\title{Create a data frame from a Workbook}
\usage{
wb_to_df(
  file,
  sheet,
  start_row = NULL,
  start_col = NULL,
  row_names = FALSE,
  col_names = TRUE,
  skip_empty_rows = FALSE,
  skip_empty_cols = FALSE,
  skip_hidden_rows = FALSE,
  skip_hidden_cols = FALSE,
  rows = NULL,
  cols = NULL,
  detect_dates = TRUE,
  na = "#N/A",
  fill_merged_cells = FALSE,
  dims,
  show_formula = FALSE,
  convert = TRUE,
  types,
  named_region,
  keep_attributes = FALSE,
  check_names = FALSE,
  show_hyperlinks = FALSE,
  apply_numfmts = FALSE,
  ...
)

read_xlsx(
  file,
  sheet,
  start_row = NULL,
  start_col = NULL,
  row_names = FALSE,
  col_names = TRUE,
  skip_empty_rows = FALSE,
  skip_empty_cols = FALSE,
  rows = NULL,
  cols = NULL,
  detect_dates = TRUE,
  named_region,
  na = "#N/A",
  fill_merged_cells = FALSE,
  check_names = FALSE,
  show_hyperlinks = FALSE,
  ...
)

wb_read(
  file,
  sheet = 1,
  start_row = NULL,
  start_col = NULL,
  row_names = FALSE,
  col_names = TRUE,
  skip_empty_rows = FALSE,
  skip_empty_cols = FALSE,
  rows = NULL,
  cols = NULL,
  detect_dates = TRUE,
  named_region,
  na = "NA",
  check_names = FALSE,
  show_hyperlinks = FALSE,
  ...
)
}
\arguments{
\item{file}{A workbook file path, a \link{wbWorkbook} object, or a valid URL.}

\item{sheet}{The name or index of the worksheet to read. Defaults to the first sheet.}

\item{start_row, start_col}{Optional numeric values specifying the first row or column
to begin data discovery.}

\item{row_names}{Logical; if TRUE, uses the first column of the selection as row names.}

\item{col_names}{Logical; if TRUE, uses the first row of the selection as column headers.}

\item{skip_empty_rows, skip_empty_cols}{Logical; if TRUE, filters out rows or
columns containing only missing values.}

\item{skip_hidden_rows, skip_hidden_cols}{Logical; if TRUE, excludes rows or
columns marked as hidden in the worksheet metadata.}

\item{rows, cols}{Optional numeric vectors specifying the exact indices to read.}

\item{detect_dates}{Logical; if TRUE, identifies date and datetime styles for conversion.}

\item{na}{A character vector or a named list (e.g., \code{list(strings = "", numbers = -99)})
defining values to treat as \code{NA}.}

\item{fill_merged_cells}{Logical; if TRUE, propagates the top-left value of a
merged range to all cells in that range.}

\item{dims}{A character string defining the range. Supports wildcards
(e.g., "A1:++" or "A-:+5").}

\item{show_formula}{Logical; if TRUE, returns the formula strings instead of
calculated values.}

\item{convert}{Logical; if TRUE, attempts to coerce columns to appropriate R classes.}

\item{types}{A named vector (numeric or character) to explicitly define column types.}

\item{named_region}{A character string referring to a defined name or spreadsheet Table.}

\item{keep_attributes}{Logical; if TRUE, attaches metadata such as the internal
type table (tt) and types as attributes to the output.}

\item{check_names}{Logical; if TRUE, ensures column names are syntactically
valid R names via \code{\link[=make.names]{make.names()}}.}

\item{show_hyperlinks}{Logical; if TRUE, replaces cell values with their
underlying hyperlink targets.}

\item{apply_numfmts}{Logical; if TRUE, applies spreadsheet number formatting
and returns strings.}

\item{...}{Additional arguments passed to internal methods.}
}
\description{
The \code{wb_to_df()} function is the primary interface for extracting data from
spreadsheet files into R. It interprets the underlying XML structure of a
worksheet to reconstruct a data frame, handling cell types, dimensions, and
formatting according to user specification. While \code{read_xlsx()} and
\code{wb_read()} are available as streamlined internal wrappers for users
accustomed to other spreadsheet packages, wb_to_df() serves as the
foundational function and provides the most comprehensive access to the
package's data extraction and configuration parameters.
}
\details{
The function extracts data based on a defined range or the total data extent
of a worksheet. If \code{col_names = TRUE}, the first row of the selection is
treated as the header; otherwise, spreadsheet column letters are used. If
\code{row_names = TRUE}, the first column of the selected range is assigned to
the data frame's row names.

Dimension selection is highly flexible. The \code{dims} argument supports standard
"A1:B2" notation as well as dynamic wildcards for rows and columns. Using
\verb{++} or \verb{--} allows ranges to adapt to the spreadsheet's content. For
instance, \code{dims = "A2:C+"} reads from A2 to the last available row in
column C, while \code{dims = "A-:+9"} reads from the first populated row in
column A to the last column in row 9. If neither \code{dims} nor \code{named_region}
is provided, the function automatically calculates the range based on the
minimum and maximum populated cells, modified by \code{start_row} and \code{start_col}.

Type conversion is governed by an internal guessing engine. If \code{detect_dates}
is enabled, serial dates are converted to R Date or POSIXct objects. All
datetimes are standardized to UTC. The function's handling of time variables
depends on the presence of the \code{hms} package; if loaded, \code{wb_to_df()} returns
\code{hms} variables. Otherwise, they are returned as string variables in
\code{hh:mm:ss} format. Users can provide explicit column types via the \code{types}
argument using numeric codes: 0 (character), 1 (numeric), 2 (Date), 3 (POSIXct),
4 (logical), 5 (hms), and 6 (formula).

Regarding formulas, it is important to note that \code{wb_to_df()} will not
automatically evaluate formulas added to a workbook object via
\code{\link[=wb_add_formula]{wb_add_formula()}}. In the underlying spreadsheet XML, only the formula
expression is written; the resulting value is typically generated by the
spreadsheet software's calculation engine when the file is opened and saved.
Consequently, reading a newly added formula cell without prior evaluation in
external software will result in an empty value unless \code{show_formula = TRUE}
is used to retrieve the formula string itself.

If \code{keep_attributes} is TRUE, the data frame is returned with additional
metadata. This includes the internal type-guessing table (\code{tt}), which
identifies the derived type for every cell in the range, and the specific
\code{types} vector used for conversion. These attributes are useful for
debugging or for applications requiring precise knowledge of the
spreadsheet's original cell metadata.

Specialized spreadsheet features include the ability to extract hyperlink
targets (\code{show_hyperlinks = TRUE}) instead of display text. For complex
layouts, \code{fill_merged_cells} propagates the value of a top-left merged cell
to all cells within the merge range. The \code{na} argument supports sophisticated
missing value definitions, accepting either a character vector or a named
list to differentiate between string and numeric \code{NA} types.
}
\section{Notes}{

Recent versions of \code{openxlsx2} have introduced several changes to the
\code{wb_to_df()} API:
\itemize{
\item Legacy arguments such as \code{na.strings} and \code{na.numbers} are no longer part
of the public API and have been consolidated into the \code{na} argument.
\item As of version 1.15, all datetime variables are imported with the
timezone set to "UTC" to prevent system-specific local timezone shifts.
\item The function now supports reverse-order or specific-order imports when
a numeric vector is passed to the \code{rows} argument.
}

For extensive real-world examples and advanced usage patterns, consult
the package vignettes—specifically "openxlsx2 read to data frame"—and
the dedicated chapter in the \code{openxlsx2} book for real-life case studies.
}

\examples{
###########################################################################
# numerics, dates, missings, bool and string
example_file <- system.file("extdata", "openxlsx2_example.xlsx", package = "openxlsx2")
wb1 <- wb_load(example_file)

# import workbook
wb_to_df(wb1)

# do not convert first row to column names
wb_to_df(wb1, col_names = FALSE)

# do not try to identify dates in the data
wb_to_df(wb1, detect_dates = FALSE)

# return the underlying spreadsheet formula instead of their values
wb_to_df(wb1, show_formula = TRUE)

# read dimension without colNames
wb_to_df(wb1, dims = "A2:C5", col_names = FALSE)

# read selected cols
wb_to_df(wb1, cols = c("A:B", "G"))

# read selected rows
wb_to_df(wb1, rows = c(2, 4, 6))

# convert characters to numerics and date (logical too?)
wb_to_df(wb1, convert = FALSE)

# erase empty rows from dataset
wb_to_df(wb1, skip_empty_rows = TRUE)

# erase empty columns from dataset
wb_to_df(wb1, skip_empty_cols = TRUE)

# convert first row to rownames
wb_to_df(wb1, sheet = 2, dims = "C6:G9", row_names = TRUE)

# define type of the data.frame
wb_to_df(wb1, cols = c(2, 5), types = c("Var1" = 0, "Var3" = 1))

# start in row 5
wb_to_df(wb1, start_row = 5, col_names = FALSE)

# na string
wb_to_df(wb1, na = "a")

# read names from row two and data starting from row 4
wb_to_df(wb1, dims = "B2:C2,B4:C+")

###########################################################################
# Named regions
file_named_region <- system.file("extdata", "namedRegions3.xlsx", package = "openxlsx2")
wb2 <- wb_load(file_named_region)

# read dataset with named_region (returns global first)
wb_to_df(wb2, named_region = "MyRange", col_names = FALSE)

# read named_region from sheet
wb_to_df(wb2, named_region = "MyRange", sheet = 4, col_names = FALSE)

# read_xlsx() and wb_read()
example_file <- system.file("extdata", "openxlsx2_example.xlsx", package = "openxlsx2")
read_xlsx(file = example_file)
df1 <- wb_read(file = example_file, sheet = 1)
df2 <- wb_read(file = example_file, sheet = 1, rows = c(1, 3, 5), cols = 1:3)
}
