% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generate-data.R
\name{generate_qualitative_data_did}
\alias{generate_qualitative_data_did}
\title{Generate Qualitative Data (Difference-in-Differences)}
\usage{
generate_qualitative_data_did(n, assignment, outcome_type)
}
\arguments{
\item{n}{Sample size.}

\item{assignment}{String controlling treatment assignment. Must be either \code{"randomized"} (random assignment)
or \code{"observational"} (assignment based on covariates).}

\item{outcome_type}{String controlling the outcome type. Must be either \code{"multinomial"} or \code{"ordered"}.}
}
\value{
A list storing a data frame with the observed data, the true propensity score, and the true probabilities of shift on the treated.
}
\description{
Generate a synthetic data set with qualitative outcomes under a difference-in-differences design. The data include two time periods, a binary treatment indicator (applied only in the second period),
and a matrix of covariates. Probabilities time shift among the treated and control groups evolve similarly across the two time periods (parallel trends on the probability mass functions).
}
\details{
\subsection{Outcome type}{

Potential outcomes are generated differently according to \code{outcome_type}. If \code{outcome_type == "multinomial"}, \code{\link{generate_qualitative_data_did}} computes linear predictors for each class using the covariates:

\deqn{\eta_{mi} (d, s) = \beta_{m1}^d X_{i1} + \beta_{m2}^d X_{i2} + \beta_{m3}^d X_{i3}, \quad d = 0, 1, \quad s = t-1, t,}

and then transforms \eqn{\eta_{mi} (d, s)} into valid probability distributions using the softmax function:

\deqn{P(Y_{is}(d) = m | X_i) = \frac{\exp(\eta_{mi} (d, s))}{\sum_{m'} \exp(\eta_{m'i}(d, s))}, \quad d = 0, 1, \quad s = t-1, t.}

It then generates potential outcomes \eqn{Y_{it-1}(1)}, \eqn{Y_{it}(1)}, \eqn{Y_{it-1}(0)}, and \eqn{Y_{it}(0)} by sampling from \{1, 2, 3\} using \eqn{P(Y(d, s) = m \mid X), \, d = 0, 1, \, s = t-1, t}.\cr

If instead \code{outcome_type == "ordered"}, \code{\link{generate_qualitative_data_did}} first generates latent potential outcomes:

\deqn{Y_i^* (d, s) = \tau d + X_{i1} + X_{i2} + X_{i3} + N (0, 1), \quad d = 0, 1, \quad s = t-1, t,}

with \eqn{\tau = 2}. It then constructs \eqn{Y_i (d, s)} by discretizing \eqn{Y_i^* (d, s)} using threshold parameters \eqn{\zeta_1 = 2} and \eqn{\zeta_2 = 4}. Then,

\deqn{P(Y_i(d, s) = m | X_i) = P(\zeta_{m-1} < Y_i^*(d, s) \leq \zeta_m | X_i) = \Phi (\zeta_m - \sum_j X_{ij} - \tau d) - \Phi (\zeta_{m-1} - \sum_j X_{ij} - \tau d), \quad d = 0, 1, \quad s = t-1, t,}

which allows us to analytically compute the probabilities of shift on the treated.
}

\subsection{Treatment assignment}{

Treatment is always assigned as \eqn{D_i \sim \text{Bernoulli}(\pi(X_i))}. If \code{assignment == "randomized"}, then the propensity score is specified as \eqn{\pi(X_i) = P ( D_i = 1 | X_i)) = 0.5}.
If instead \code{assignment == "observational"}, then \eqn{\pi(X_i) = (X_{i1} + X_{i3}) / 2}.
}

\subsection{Other details}{

The function always generates three independent covariates from \eqn{U(0,1)}. Observed outcomes \eqn{Y_{is}} are always constructed using the usual observational rule.
}
}
\examples{
\donttest{## Generate synthetic data.
set.seed(1986)

data <- generate_qualitative_data_did(100,
                                      assignment = "observational",
                                      outcome_type = "ordered")

data$pshifts_treated}

}
\seealso{
\code{\link{generate_qualitative_data_soo}} \code{\link{generate_qualitative_data_iv}} \code{\link{generate_qualitative_data_rd}}
}
\author{
Riccardo Di Francesco
}
