% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getData.R
\name{getData}
\alias{getData}
\title{Read Data to a Data Frame}
\usage{
getData(data, varnames = NULL, drop = FALSE, dropUnusedLevels = TRUE,
  omittedLevels = TRUE, defaultConditions = TRUE, formula = NULL,
  recode = NULL, includeNaLabel = FALSE, addAttributes = FALSE,
  returnJKreplicates = TRUE)
}
\arguments{
\item{data}{an \code{edsurvey.data.frame} or
a \code{light.edsurvey.data.frame}}

\item{varnames}{a character vector of variable names that will be returned.
When both \code{varnames} and
a \code{formula} are specified, variables associated with both are
returned. Set to \code{NULL} by default.}

\item{drop}{a logical value. When set to the default value of \code{FALSE},
when a single column is returned, it is still represented as a
\code{data.frame} and is not converted to a vector.}

\item{dropUnusedLevels}{a logical value. When set to the default value of
\code{TRUE}, drops unused levels of all factor
variables.}

\item{omittedLevels}{a logical value. When set to the default value of
\code{TRUE}, drops those levels of all factor variables
that are specified in an \code{edsurvey.data.frame}. Use
\code{print} on an \code{edsurvey.data.frame} to see
the omitted levels.}

\item{defaultConditions}{a logical value. When set to the default value of
\code{TRUE}, uses the default conditions stored in
 an \code{edsurvey.data.frame} to subset the data. Use
\code{print} on an \code{edsurvey.data.frame} to
see the default conditions.}

\item{formula}{a \ifelse{latex}{\code{formula}}{\code{\link[stats]{formula}}}.
When included, \code{getData} returns data associated with
all variables of the \code{formula}. When both \code{varnames} and a
formula are specified, the variables associated with both are
returned. Set to \code{NULL} by default.}

\item{recode}{a list of lists to recode variables. Defaults to \code{NULL}.
Can be set as \code{recode} \code{=} \code{list(var1}
\code{=} \code{list(from} \code{=} \code{c("a","b","c"), to}
\code{=} \code{"d"))}. See Examples.}

\item{includeNaLabel}{a logical value to indicate if \code{NA} (missing) values are
returned as literal \code{NA} values or as factor levels
coded as \code{NA}.}

\item{addAttributes}{a logical value set to \code{TRUE} to get a
\code{data.frame} that can be used in calls to
other functions that usually would take an
\code{edsurvey.data.frame}. This \code{data.frame} is also called \code{light.edsurvey.data.frame}.
See Details section in \code{\link{edsurvey.data.frame}} for
more information on \code{light.edsurvey.data.frame}.}

\item{returnJKreplicates}{a logical value indicating if JK replicate weights
should be returned. Defaults to \code{TRUE}.}
}
\value{
When \code{addAttributes} is \code{FALSE}, returns a
\code{data.frame} containing data associated with requested
variables. When \code{addAttributes} is \code{TRUE}, returns a
\code{light.edsurvey.data.frame}.
}
\description{
Reads in selected columns to a \code{data.frame} or a
             \code{light.edsurvey.data.frame}. On an \code{edsurvey.data.frame},
             the data are stored on disk.
}
\details{
By default, an \code{edsurvey.data.frame} does not have data read
into memory until \code{getData} is called and returns a data frame.
This structure allows \code{EdSurvey} to have a minimal memory footprint.
To keep the footprint small, you need to limit \code{varnames} to just
the necessary variables.

When \code{getData} is called, it returns a \code{data.frame}. When the
\code{addAttributes} argument is set to \code{TRUE}, that \code{data.frame}
has several attributes added to make it usable by the functions in 
the \code{EdSurvey} package (e.g., \code{lm.sdf}), and the class is a
\code{light.edsurvey.data.frame}.

Note that if both \code{formula} and \code{varnames} are populated, the
variables on both will be included.

See the vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-getData.pdf}{getData}
for long-form documentation on this function.
}
\examples{
# read in the example data (generated, not real student data)
sdf <- readNAEP(system.file("extdata/data", "M36NT2PM.dat", package = "NAEPprimer"))

# get two variables, without weights
df <- getData(data=sdf, varnames=c("dsex", "b017451"))
table(df)

# example of using recode
df2 <- getData(data=sdf, varnames=c("dsex", "t088301"),
               recode=list(t088301=list(from=c("Yes, available","Yes, I have access"),
                                        to=c("Yes")),
                           t088301=list(from=c("No, have no access"),
                                        to=c("No"))))
table(df2)

# When readNAEP is called on a data file, it appends a default 
# condition to the edsurvey.data.frame. You can see these conditions
# by printing the sdf
sdf

# As per the default condition specified, getData restricts the data to only
# Reporting Sample. This behavior can be changed as follows:
df2 <- getData(data=sdf, varnames=c("dsex", "b017451"), defaultConditions = FALSE)
table(df2)

# Similarly, the default behavior of omitting certain levels specified
# in the edsurvey.data.frame can be changed as follows:
df2 <- getData(data=sdf, varnames=c("dsex", "b017451"), omittedLevels = FALSE)
table(df2)

# the variable "c052601" is from the school-level data file; merging is handled automatically
# returns a light.edsurvey.data.frame using addAttributes=TRUE argument
gddat <- getData(data=sdf, 
                 varnames=c("composite", "dsex", "b017451","c052601"),
                 addAttributes = TRUE)
class(gddat)
# look at the first few lines
head(gddat)
}
\seealso{
\code{\link{subset.edsurvey.data.frame}} for how to remove
         rows from the output
}
\author{
Tom Fink, Paul Bailey, and Ahmad Emad
}
