% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.R
\docType{data}
\name{people}
\alias{people}
\title{Synthetic Sample of US population}
\format{A tibble with 20,000 rows and 40 variables:
\describe{
\item{id}{Sequential unique ID}
\item{fname}{Random first name, see details}
\item{lname}{Random last name, see details}
\item{gender}{Biological sex}
\item{age}{Age capped at 85}
\item{race}{Race and Ethnicity}
\item{edu}{Educational attainment}
\item{div}{Census regional division}
\item{married}{Marital status}
\item{house_size}{Household size}
\item{children}{Has children}
\item{us_citizen}{Is a US citizen}
\item{us_born}{Was born in the Us}
\item{house_income}{Family income}
\item{emp_status}{Employment status}
\item{emp_sector}{Employment sector}
\item{hours_work}{Hours worked per week}
\item{hours_vary}{Hours vary week to week}
\item{mil}{Has served in the military}
\item{house_own}{Home ownership}
\item{metro}{Lives in metropolitan area}
\item{internet}{Household has internet access}
\item{foodstamp}{Receives food stamps}
\item{house_moved}{Moved in the last year}
\item{pub_contact}{Contacted or visited a public official}
\item{boycott}{}
\item{hood_group}{Participated in a community association}
\item{hood_talks}{Talked with neighbors}
\item{hood_trust}{Trusts neighbors}
\item{tablet}{Uses a tablet or e-reader}
\item{texting}{Uses text messaging}
\item{social}{Uses social media}
\item{volunteer}{Volunteered}
\item{register}{Is registered to vote}
\item{vote}{Voted in the 2014 midterm elections}
\item{party}{Political party}
\item{religion}{Religious (evangelical) affiliation}
\item{ideology}{Political ideology}
\item{govt}{Follows government and public affairs}
\item{guns}{Owns a gun}
}}
\source{
“For Weighting Online Opt-In Samples, What Matters Most?” Pew
Research Center, Washington, D.C. (January 26, 2018)
\url{http://pewrsr.ch/2rNawC7}
}
\usage{
people
}
\description{
A statistically representative synthetic sample of 20,000 Americans. Each
record is a simulated survey respondent.
}
\details{
This dataset was originally produced by the Pew Research center for their
paper entitled \href{http://pewrsr.ch/2rNawC7}{\emph{For Weighting Online Opt-In Samples, What Matters Most?}}
The synthetic population dataset was created to serve as a reference for
making online opt-in surveys more representative of the overall population.

See \href{https://www.pewresearch.org/methods/?p=85}{Appendix B: Synthetic population dataset} for a more detailed
description of the method for and rationale behind creating this dataset.

In short, the dataset was created to overcome the limitations of using large,
federal benchmark survey datasets such as the American Community Survey (ACS)
or Current Population Survey (CPS). These surveys often do not contain the
exact questions asked in online-opt in surveys, keeping them from being used
for proper adjustment.

This \emph{synthetic} dataset was created by combining nine separate benchmark
datasets. Each had a set of common demographic variables but many added
unique variables such as gun ownership or voter registration. The surveys
were combined, stratified, sampled, combined, and imputed to fill missing
values from each. From this large dataset, the original 20,000 surveys from
the ACS were kept to ensure accurate demographic distribution.

The names were \emph{RANDOMLY} assigned to respondents to better simulate a
synthetic sample of the population. First names were taken from the
\code{babynames} dataset which contains the Social Security Administration's
record of baby names from 1880 to 2017 along with gender and proportion.
First names were proportionally randomly assigned by birth year and sex. Last
names were taken from the Census Bureau, who provides the 162,254 most common
last names in the 2010 Census, covering over 90\% of the population. For a
given surname, the proportion of that name belonging to members of each race
and ethnicity is provided. The last names were proportionally randomly
assigned by race.
}
\keyword{datasets}
