## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  fig.width  = 6.5,
  fig.height = 4,
  out.width  = "100%"
)
library(RobustFlow)
library(ggplot2)

## ----make-data----------------------------------------------------------------
set.seed(2024)
n_children <- 200
n_waves    <- 5

# Simulate SES group with unequal sizes
ses <- sample(
  c("Low SES", "Mid SES", "High SES"),
  size    = n_children,
  replace = TRUE,
  prob    = c(0.35, 0.40, 0.25)
)

# Risk probability increases over waves for Low SES,
# decreases slightly for High SES
risk_prob <- function(ses_grp, wave) {
  base <- switch(ses_grp,
    "Low SES"  = 0.55,
    "Mid SES"  = 0.35,
    "High SES" = 0.20
  )
  trend <- switch(ses_grp,
    "Low SES"  =  0.04,
    "Mid SES"  =  0.01,
    "High SES" = -0.02
  )
  pmin(pmax(base + trend * (wave - 1), 0.02), 0.98)
}

rows <- lapply(seq_len(n_children), function(i) {
  data.frame(
    child_id  = i,
    wave      = seq_len(n_waves),
    ses_group = ses[i],
    school_id = sample(seq_len(20), 1),
    risk_math = rbinom(n_waves, 1,
                       vapply(seq_len(n_waves),
                              function(w) risk_prob(ses[i], w),
                              numeric(1)))
  )
})
ecls_demo <- do.call(rbind, rows)
head(ecls_demo, 8)

## ----validate-----------------------------------------------------------------
validated <- validate_panel_data(
  data     = ecls_demo,
  id       = "child_id",
  time     = "wave",
  decision = "risk_math",
  group    = "ses_group",
  cluster  = "school_id"
)

cat(sprintf(
  "Individuals: %d\nWaves:       %d\nBalanced:    %s\n",
  validated$n_ids,
  validated$n_times,
  validated$balanced
))

## ----paths--------------------------------------------------------------------
paths <- build_paths(
  data     = validated$data,
  id       = "child_id",
  time     = "wave",
  decision = "risk_math"
)

# Top 10 paths
head(paths$path_counts, 10)

## ----entropy------------------------------------------------------------------
cat(sprintf("Path entropy: %.3f bits\n", paths$path_entropy))

## ----trans-mat----------------------------------------------------------------
paths$transition_matrix

## ----path-plot, fig.cap="Top 10 most common five-wave risk trajectories."-----
df_paths <- head(paths$path_counts, 10)
df_paths$path <- factor(df_paths$path, levels = rev(df_paths$path))

ggplot(df_paths, aes(x = path, y = n)) +
  geom_col(fill = "#2c7bb6") +
  coord_flip() +
  labs(x = "Path", y = "Count",
       title = "Top 10 Decision Paths") +
  theme_minimal()

## ----dii----------------------------------------------------------------------
drift <- compute_drift(
  data      = validated$data,
  id        = "child_id",
  time      = "wave",
  decision  = "risk_math",
  normalize = TRUE
)

drift$summary
cat(sprintf("Mean DII:             %.4f\n", drift$mean_dii))
cat(sprintf("Period of max drift:  wave %s\n", drift$max_dii_period))

## ----dii-plot, fig.cap="DII over time. Higher values indicate greater structural change in risk transitions."----
ggplot(drift$summary, aes(x = time, y = DII)) +
  geom_line(color = "#1a9641", linewidth = 1) +
  geom_point(size = 2.5) +
  labs(x = "Wave", y = "DII",
       title = "Drift Intensity Index Over Time") +
  theme_minimal()

## ----gaps---------------------------------------------------------------------
gaps <- compute_group_gaps(
  data        = validated$data,
  time        = "wave",
  decision    = "risk_math",
  group       = "ses_group",
  focal_value = 1
)

gaps$gap_df

## ----traj-plot, fig.cap="At-risk rates by SES group across five waves."-------
ggplot(gaps$long_format,
       aes(x = time, y = rate, color = group, group = group)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2.5) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1),
                     limits = c(0, 1)) +
  labs(x = "Wave", y = "At-Risk Rate", color = "SES Group",
       title = "Math Risk Trajectories by SES Group") +
  theme_minimal() +
  theme(legend.position = "bottom")

## ----gap-plot, fig.cap="Disparity gap (first two alphabetic SES groups) over time."----
ggplot(gaps$gap_df, aes(x = time, y = gap)) +
  geom_line(color = "#762a83", linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
  geom_point(size = 2.5) +
  labs(x = "Wave", y = "Gap",
       title = "Disparity Gap Over Time") +
  theme_minimal()

## ----bai----------------------------------------------------------------------
bai <- compute_bai(gaps$gap)
cat(sprintf("BAI:       %.4f\n", bai$bai))
cat(sprintf("Direction: %s\n",   bai$direction))
cat(sprintf("Gap at t=1: %.4f  |  Gap at t=T: %.4f\n",
            bai$gap_start, bai$gap_end))

## ----bai-std------------------------------------------------------------------
bai_std <- compute_bai(gaps$gap, standardize = TRUE)
cat(sprintf("Standardized BAI: %.4f\n", bai_std$bai))

## ----tfi----------------------------------------------------------------------
dii_vals <- drift$summary$DII[!is.na(drift$summary$DII)]

tfi <- compute_tfi_simple(dii_vals)
tfi$summary_table

## ----tfi-plot, fig.cap="TFI sensitivity curve. The red dashed line marks the null effect threshold."----
sc <- tfi$sensitivity_curve

ggplot(sc, aes(x = perturbation, y = adjusted_effect)) +
  geom_line(color = "#4575b4", linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "#d73027",
             linewidth = 0.8) +
  labs(x = "Hidden Bias Parameter (u)",
       y = "Adjusted Slope",
       title = "TFI Sensitivity Curve") +
  theme_minimal()

## ----rscript, eval=FALSE------------------------------------------------------
# generate_r_script(
#   id_var       = "child_id",
#   time_var     = "wave",
#   decision_var = "risk_math",
#   group_var    = "ses_group",
#   cluster_var  = "school_id",
#   focal_value  = 1,
#   output_file  = "my_robustflow_analysis.R"
# )

## ----app, eval=FALSE----------------------------------------------------------
# run_app()

## ----session------------------------------------------------------------------
sessionInfo()

