#' Create Table1 of group summary with stats for scientific publication
#'
#' The `FullTable1` function can be used to create a Table1 for
#' scientific publication. This is intended to summarize demographic
#' and other variables (`vars`) split by a grouping variable (`strata`)
#' from an input dataset (`data`).
#' Continuous variables will be summarized as mean (SD)
#' and tested across groups using t-test or ANOVA (for 3+ level `strata`).
#' Categorical variables will be summarized as N (%)
#' and tested across groups as chi-squared.
#' Effect sizes for group differences will be calculated as Cohen's d,
#'  partial eta-squared, Odds Ratio, Cramer's V depending on the test.
#' Requires `tidyverse` and `stats` libraries.
#' @param data The input dataset (will be converted to tibble).
#' @param strata The grouping variable of interest (converted to factor),
#'  if NULL will make one column table.
#' @param vars A list of variables to summarize, e.g. c("Age","sex","WASI").
#' @param var_names An optional list to rename the variable colnames in the
#' output table, e.g. c("Age (years)","Sex","IQ"). Must match `vars` in length.
#' If not supplied, `vars` will be printed as is.
#' @param factor_vars An optional list of variables from `vars` to use
#' as class factor, e.g. c("sex"). Note that any character, factor, or
#' logical class variables will be summarized as categorical by default.
#' @param round_n The number of decimal places to round output to (default=2).
#' @param es_col Include a column for effect size
#' of group difference? (default=T).
#' @param p_col Include a column for p-value of
#'  group difference? (default=TRUE).
#' @param stars Where to include stars indicating
#' significance of group differences.
#' Options: "col"=separate column (default), "name"= append to variable name,
#' "stat"= append to group difference statistic, "none" for no stars.
#' @param html Format as html in viewer or not
#'  (default=FALSE, print in console),
#'  needs library(htmlTable) installed.
#' @return Output Table 1
#' @import 	dplyr
#' @importFrom 	purrr map_dfc set_names
#' @importFrom 	stats anova aov chisq.test
#'  complete.cases fisher.test sd setNames t.test
#' @importFrom 	stringr str_c
#' @importFrom 	tibble add_row
#' @import 	tidyr
#' @importFrom 	tidyselect all_of
#' @export
#' @examples
#' \dontrun{
#' FullTable1(
#'   data = psydat,
#'   vars = c("Age", "Height", "depressT"), strata = "Sex"
#' )
#' FullTable1(
#'   data = psydat,
#'   vars = c("Age", "Height", "depressT"), strata = "Sex"
#' )
#' FullTable1(
#'   data = psydat, vars = c("Age", "Sex", "Height", "depressT"),
#'   var_names = c("Age (months)", "Sex", "Height (inches)", "Depression T"),
#'   strata = "Income", stars = "name", p_col = FALSE
#' )
#' tmp <- FullTable1(data = psydat,
#'   vars = c("Age", "Height", "depressT"), strata = "Sex")
#'   tmp$caption <- "Write your own caption"
#'   #print(htmlTable(x$table, useViewer=T, rnames=F,caption=x$caption, pos.caption="bottom"))
#'   }

#' @details
#' Variables are automatically classified as numeric or categorical. Character,
#' logical, ordered, and binary variables are converted to factors with warnings.
#'
#' Effect sizes: Cohen's d (t-test), partial n2 (ANOVA), OR (2x2 chi-squared),
#' Cramer's V (larger chi-squared tables).
#'
#' Stars indicate significance: * p<.05, ** p<.01, *** p<.001
#'

FullTable1 <- function(data,
                       strata = NULL,
                       vars = NULL,
                       var_names = vars,
                       factor_vars = NULL,
                       round_n = 2,
                       es_col = TRUE,
                       p_col = TRUE,
                       stars = "col",
                       html = FALSE) {

  # Validate inputs
  validate_table1_inputs(data, vars, var_names, strata)

  # Prepare data
  prepared <- prepare_table1_data(data, vars, strata, factor_vars)
  data_clean <- prepared$data
  strata_var <- prepared$strata
  vars_final <- prepared$vars
  factor_vars_final <- prepared$factor_vars

  # Determine if we need statistical tests
  include_tests <- strata_var != ".dummy_strata"
  is_mixed <- length(factor_vars_final) > 0 &&
    length(factor_vars_final) < length(vars_final)

  # Create group labels with N
  if (include_tests) {
    group_counts <- table(data_clean[[strata_var]])
    group_labels <- paste0(names(group_counts), " (N=", group_counts, ")")
  } else {
    group_labels <- "Sample (N=1)"
    group_labels <- paste0(group_labels[1], nrow(data_clean), ")")
  }

  # Build table rows
  all_rows <- list()
  all_metadata <- list()

  for (i in seq_along(vars_final)) {
    var <- vars_final[i]
    var_display <- var_names[i]

    if (var %in% factor_vars_final) {
      # Categorical variable
      result <- create_categorical_rows(
        var_display,
        data_clean[[var]],
        data_clean[[strata_var]],
        round_n,
        include_tests
      )
      # Extract rows and metadata
      for (item in result) {
        all_rows[[length(all_rows) + 1]] <- item$row
        all_metadata[[length(all_metadata) + 1]] <- list(
          test_type = item$test_type,
          es_type = item$es_type
        )
      }
    } else {
      # Numeric variable
      result <- create_numeric_row(
        var_display,
        data_clean[[var]],
        data_clean[[strata_var]],
        round_n,
        include_tests
      )
      all_rows[[length(all_rows) + 1]] <- result$row
      all_metadata[[length(all_metadata) + 1]] <- list(
        test_type = result$test_type,
        es_type = result$es_type
      )
    }
  }

  # Convert to data frame
  result_df <- as.data.frame(do.call(rbind, all_rows), stringsAsFactors = FALSE)

  # Format statistics with labels if mixed table
  if (include_tests && is_mixed) {
    for (i in seq_len(nrow(result_df))) {
      test_type <- all_metadata[[i]]$test_type
      es_type <- all_metadata[[i]]$es_type

      if (!is.null(test_type) && test_type != "" && result_df$Stat[i] != "-") {
        stat_val <- suppressWarnings(as.numeric(result_df$Stat[i]))
        if (!is.na(stat_val)) {
          result_df$Stat[i] <- format_statistic(
            stat_val,
            test_type,
            round_n,
            include_label = TRUE
          )
        }
      }

      if (!is.null(es_type) && es_type != "" && result_df$es[i] != "-") {
        es_val <- suppressWarnings(as.numeric(result_df$es[i]))
        if (!is.na(es_val)) {
          result_df$es[i] <- format_effect_size(
            es_val,
            es_type,
            round_n,
            include_label = TRUE
          )
        }
      }
    }
  } else if (include_tests) {
    # Format without labels for homogeneous tables
    result_df$Stat <- sapply(result_df$Stat, function(x) {
      if (x == "-" || is.na(x)) return("-")
      val <- suppressWarnings(as.numeric(x))
      if (is.na(val)) return(x)
      format(round(val, round_n), nsmall = round_n)
    })
    result_df$es <- sapply(result_df$es, function(x) {
      if (x == "-" || is.na(x)) return("-")
      val <- suppressWarnings(as.numeric(x))
      if (is.na(val)) return(x)
      format(round(val, round_n), nsmall = round_n)
    })
  }

  # Format p-values
  if (include_tests) {
    result_df$p <- sapply(result_df$p, function(x) {
      if (x == "-") return("-")
      format_p_value(as.numeric(x))
    })
  }

  # Apply star preferences
  if (stars == "name") {
    result_df$Variable <- paste(result_df$Variable, result_df$sig)
    result_df$sig <- NULL
  } else if (stars == "stat") {
    result_df$Stat <- paste(result_df$Stat, result_df$sig)
    result_df$sig <- NULL
  } else if (stars == "none") {
    result_df$sig <- NULL
  }
  # Otherwise stars == "col", keep sig column

  # Remove columns based on preferences
  if (!es_col) {
    result_df$es <- NULL
  }
  if (!p_col) {
    result_df$p <- NULL
  }
  if (!include_tests) {
    result_df$Stat <- NULL
  }

  # Set proper column names
  colnames(result_df)[2:(1 + length(group_labels))] <- group_labels

  # Replace NA with -
  result_df[is.na(result_df)] <- "-"

  # Build caption
  missing_info <- check_missingness(data, vars_final, var_names)

  caption <- paste0(
    "Note. ",
    if (prepared$n_missing_strata > 0 && include_tests) {
      paste0("N=", prepared$n_missing_strata,
             " excluded for missing group variable. ")
    } else {
      ""
    },
    missing_info,
    if (include_tests) " * p<.05, ** p<.01, *** p<.001" else ""
  )

  # Return results
  if (html) {
    if (!requireNamespace("htmlTable", quietly = TRUE)) {
      stop("Package 'htmlTable' required for HTML output. Install with: install.packages('htmlTable')",
           call. = FALSE)
    }

    return(htmlTable::htmlTable(result_df,
                                useViewer = TRUE,
                                rnames = FALSE,
                                caption = caption,
                                pos.caption = "bottom"))
  } else {
    return(list(table = result_df, caption = caption))
  }
}

#' Check for missing data in variables
#' @keywords internal
#' @noRd
check_missingness <- function(data, vars, var_names) {
  missing_counts <- sapply(data[vars], function(x) sum(is.na(x)))
  has_missing <- missing_counts > 0

  if (!any(has_missing)) {
    return("")
  }

  missing_vars <- var_names[has_missing]
  missing_ns <- missing_counts[has_missing]

  paste0(
    paste0("N=", missing_ns, " missing ", missing_vars, collapse = ". "),
    ". "
  )
}
