#' @title Panel Data Models for Value-Price Analysis
#' @name panel_models
#' @description Functions for fitting two-way fixed effects and Mundlak CRE models.
NULL

#' Fit Two-Way Fixed Effects Panel Model
#'
#' Fits a two-way fixed effects model with sector and time effects,
#' regressing log production prices on log direct prices.
#'
#' @param panel_data Data frame in panel format with columns: year, sector,
#'   log_direct, log_production.
#' @param robust_se Logical. Compute robust standard errors. Default TRUE.
#' @param cluster_type Character. Type of cluster for robust SE.
#'   One of "group", "time", or "twoway". Default "group".
#'
#' @return A list containing:
#' \describe{
#'   \item{model}{The fitted plm model object}
#'   \item{summary}{Model summary}
#'   \item{r2_within}{Within R-squared}
#'   \item{coeftest_robust}{Coefficient test with robust SE (if robust_se=TRUE)}
#'   \item{metrics}{In-sample evaluation metrics}
#' }
#'
#' @details
#' This function requires the 'plm' package. The model specification is:
#' log_production ~ log_direct with two-way (sector and time) fixed effects.
#'
#' @examples
#' \donttest{
#' if (requireNamespace("plm", quietly = TRUE)) {
#'   set.seed(123)
#'   panel <- data.frame(
#'     year = rep(2000:2009, 5),
#'     sector = rep(LETTERS[1:5], each = 10),
#'     log_direct = rnorm(50, 5, 0.5),
#'     log_production = rnorm(50, 5, 0.5)
#'   )
#'   panel$log_production <- panel$log_direct * 0.95 + rnorm(50, 0, 0.1)
#'
#'   result <- fit_twoway_fe(panel)
#'   print(result$r2_within)
#' }
#' }
#'
#' @export
fit_twoway_fe <- function(panel_data,
                           robust_se = TRUE,
                           cluster_type = c("group", "time", "twoway")) {

    check_package("plm", "panel data models")

    validate_panel_data(panel_data, require_log = TRUE)

    cluster_type <- match.arg(cluster_type)

    pdata <- plm::pdata.frame(panel_data, index = c("sector", "year"))

    model <- plm::plm(
        log_production ~ log_direct,
        data = pdata,
        model = "within",
        effect = "twoways"
    )

    model_summary <- summary(model)
    r2_within <- as.numeric(model_summary$r.squared["rsq"])

    coeftest_robust <- NULL
    if (robust_se) {

        check_package("lmtest", "robust coefficient tests")
        check_package("sandwich", "robust variance estimation")

        vcov_robust <- plm::vcovHC(
            model,
            method = "arellano",
            type = "HC1",
            cluster = cluster_type
        )

        coeftest_robust <- lmtest::coeftest(model, vcov = vcov_robust)
    }

    predictions <- as.numeric(stats::predict(model))
    actual <- panel_data$log_production

    metrics <- evaluate_insample(predictions, actual)

    list(
        model = model,
        summary = model_summary,
        r2_within = r2_within,
        coeftest_robust = coeftest_robust,
        metrics = metrics,
        pdata = pdata
    )
}


#' Fit Mundlak Correlated Random Effects Model
#'
#' Fits a Mundlak (CRE) model that decomposes effects into within and
#' between components, allowing for correlation between unit effects
#' and regressors.
#'
#' @param panel_data Data frame in panel format.
#' @param include_time_fe Logical. Include time fixed effects. Default TRUE.
#' @param robust_se Logical. Compute robust standard errors. Default TRUE.
#'
#' @return A list containing:
#' \describe{
#'   \item{model}{The fitted plm model object}
#'   \item{summary}{Model summary}
#'   \item{panel_data_augmented}{Panel data with Mundlak transformations}
#'   \item{coeftest_robust}{Robust coefficient tests}
#'   \item{variance_components}{Random effects variance components}
#'   \item{metrics}{In-sample evaluation metrics}
#' }
#'
#' @details
#' The Mundlak transformation adds sector-level means of the regressors
#' to a random effects model, allowing consistent estimation even when
#' the random effects are correlated with the regressors.
#'
#' @examples
#' \donttest{
#' if (requireNamespace("plm", quietly = TRUE)) {
#'   set.seed(123)
#'   panel <- data.frame(
#'     year = rep(2000:2009, 5),
#'     sector = rep(LETTERS[1:5], each = 10),
#'     log_direct = rnorm(50, 5, 0.5),
#'     log_production = rnorm(50, 5, 0.5)
#'   )
#'   panel$log_production <- panel$log_direct * 0.95 + rnorm(50, 0, 0.1)
#'
#'   result <- fit_mundlak_cre(panel)
#'   print(result$variance_components)
#' }
#' }
#'
#' @export
fit_mundlak_cre <- function(panel_data,
                             include_time_fe = TRUE,
                             robust_se = TRUE) {

    check_package("plm", "panel data models")

    validate_panel_data(panel_data, require_log = TRUE)

    panel_augmented <- create_mundlak_data(panel_data, x_var = "log_direct")

    pdata <- plm::pdata.frame(panel_augmented, index = c("sector", "year"))

    if (include_time_fe) {
        formula_str <- "log_production ~ x_within + x_mean_sector + factor(year)"
    } else {
        formula_str <- "log_production ~ x_within + x_mean_sector"
    }

    model <- plm::plm(
        stats::as.formula(formula_str),
        data = pdata,
        model = "random",
        random.method = "swar",
        effect = "individual"
    )

    model_summary <- summary(model)

    variance_components <- extract_re_variances(model_summary)

    coeftest_robust <- NULL
    if (robust_se) {
        check_package("lmtest", "robust coefficient tests")

        vcov_robust <- plm::vcovHC(
            model,
            method = "arellano",
            type = "HC1",
            cluster = "group"
        )

        coeftest_robust <- lmtest::coeftest(model, vcov = vcov_robust)
    }

    predictions <- as.numeric(stats::predict(model))
    actual <- panel_augmented$log_production

    metrics <- evaluate_insample(predictions, actual)

    list(
        model = model,
        summary = model_summary,
        panel_data_augmented = panel_augmented,
        pdata = pdata,
        coeftest_robust = coeftest_robust,
        variance_components = variance_components,
        metrics = metrics
    )
}


#' Extract Random Effects Variance Components
#'
#' Internal function to extract variance components from plm summary.
#'
#' @param summary_obj Summary object from plm random effects model.
#'
#' @return A list with id (between) and idios (within) variance components.
#'
#' @keywords internal
extract_re_variances <- function(summary_obj) {

    sig_u <- NA_real_
    sig_e <- NA_real_

    if (!is.null(summary_obj$ercomp) && !is.null(summary_obj$ercomp$sigma2)) {
        s2 <- summary_obj$ercomp$sigma2
        sig_u <- suppressWarnings(as.numeric(s2[["id"]]))
        sig_e <- suppressWarnings(as.numeric(s2[["idios"]]))
    } else if (!is.null(summary_obj$sigma2)) {
        s2 <- summary_obj$sigma2
        sig_u <- suppressWarnings(as.numeric(s2[["id"]]))
        sig_e <- suppressWarnings(as.numeric(s2[["idios"]]))
    }

    list(
        sigma2_between = sig_u,
        sigma2_within = sig_e
    )
}


#' Test Mundlak Specification
#'
#' Performs Wald test on the sector-mean coefficient to test whether
#' fixed effects would be preferred over random effects.
#'
#' @param mundlak_result Result from fit_mundlak_cre.
#'
#' @return A list with test statistic, degrees of freedom, and p-value.
#'
#' @details
#' Under the null hypothesis that the sector means coefficient equals zero,
#' random effects would be appropriate. Rejection suggests fixed effects
#' should be used.
#'
#' @examples
#' \donttest{
#' if (requireNamespace("plm", quietly = TRUE)) {
#'   set.seed(123)
#'   panel <- data.frame(
#'     year = rep(2000:2009, 5),
#'     sector = rep(LETTERS[1:5], each = 10),
#'     log_direct = rnorm(50, 5, 0.5),
#'     log_production = rnorm(50, 5, 0.5)
#'   )
#'   panel$log_production <- panel$log_direct * 0.95 + rnorm(50, 0, 0.1)
#'
#'   mundlak_fit <- fit_mundlak_cre(panel)
#'   test_result <- test_mundlak_specification(mundlak_fit)
#'   print(test_result)
#' }
#' }
#'
#' @export
test_mundlak_specification <- function(mundlak_result) {

    check_package("plm", "panel data models")

    model <- mundlak_result$model

    beta <- stats::coef(model)
    coef_names <- names(beta)

    if (!("x_mean_sector" %in% coef_names)) {
        stop("Coefficient 'x_mean_sector' not found in model.")
    }

    vcov_robust <- plm::vcovHC(
        model,
        method = "arellano",
        type = "HC1",
        cluster = "group"
    )

    n_coefs <- length(beta)
    R <- matrix(0, nrow = 1L, ncol = n_coefs)
    colnames(R) <- coef_names
    R[1L, "x_mean_sector"] <- 1

    r <- 0

    Rb <- R %*% beta - r
    RVR <- R %*% vcov_robust %*% t(R)

    wald_stat <- as.numeric(t(Rb) %*% solve(RVR) %*% Rb)
    df <- 1L
    p_value <- 1 - stats::pchisq(wald_stat, df = df)

    list(
        statistic = wald_stat,
        df = df,
        p_value = p_value,
        interpretation = if (p_value < 0.05) {
            "Reject H0: FE preferred over RE"
        } else {
            "Fail to reject H0: RE may be appropriate"
        }
    )
}


#' Fit Bayesian Hierarchical Panel Model
#'
#' Fits a Bayesian mixed effects model with random slopes by sector.
#'
#' @param panel_data Data frame in panel format.
#' @param include_time Logical. Include time trend. Default TRUE.
#' @param chains Number of MCMC chains. Default 4.
#' @param iter Number of iterations. Default 4000.
#' @param seed Random seed. Default 12345.
#'
#' @return A list containing:
#' \describe{
#'   \item{model}{The fitted rstanarm model object}
#'   \item{r2_bayes}{Bayesian R-squared (mean)}
#'   \item{summary}{Model summary for fixed effects}
#'   \item{metrics}{In-sample evaluation metrics}
#' }
#'
#' @examples
#' \dontrun{
#' if (requireNamespace("rstanarm", quietly = TRUE)) {
#'   set.seed(123)
#'   panel <- data.frame(
#'     year = rep(2000:2009, 5),
#'     sector = rep(LETTERS[1:5], each = 10),
#'     time = rep(1:10, 5),
#'     log_direct = rnorm(50, 5, 0.5),
#'     log_production = rnorm(50, 5, 0.5)
#'   )
#'   panel$log_production <- panel$log_direct * 0.95 + rnorm(50, 0, 0.1)
#'
#'   result <- fit_bayesian_hierarchical(panel, chains = 2, iter = 1000)
#'   print(result$r2_bayes)
#' }
#' }
#'
#' @export
fit_bayesian_hierarchical <- function(panel_data,
                                      include_time = TRUE,
                                      chains = 4L,
                                      iter = 4000L,
                                      seed = 12345L) {
  
  check_package("rstanarm", "Bayesian hierarchical models")
  
  # Agregar esta línea para cargar el namespace completo
  requireNamespace("rstanarm", quietly = TRUE)

    if (!("time" %in% names(panel_data))) {
        panel_data$time <- panel_data$year - min(panel_data$year) + 1L
    }

    if (include_time) {
        formula_obj <- log_production ~ log_direct + time + (1 + log_direct | sector)
    } else {
        formula_obj <- log_production ~ log_direct + (1 + log_direct | sector)
    }

    n_cores <- max(1L, parallel::detectCores() - 1L)

    model <- rstanarm::stan_lmer(
        formula_obj,
        data = panel_data,
        prior = rstanarm::normal(0, 2.5),
        prior_intercept = rstanarm::normal(0, 5),
        chains = chains,
        iter = iter,
        seed = seed,
        cores = n_cores,
        refresh = 0
    )

    r2_draws <- rstanarm::bayes_R2(model)
    r2_bayes <- mean(r2_draws)

    fixed_pars <- c("(Intercept)", "log_direct")
    if (include_time) {
        fixed_pars <- c(fixed_pars, "time")
    }

    model_summary <- summary(
        model,
        pars = fixed_pars,
        probs = c(0.025, 0.975)
    )

    predictions <- as.numeric(stats::predict(model))
    actual <- panel_data$log_production

    metrics <- evaluate_insample(predictions, actual)

    list(
        model = model,
        r2_bayes = r2_bayes,
        summary = model_summary,
        metrics = metrics
    )
}


#' Panel Granger Causality Test (Dumitrescu-Hurlin)
#'
#' Performs panel Granger causality tests between direct and production prices.
#'
#' @param panel_data Data frame in panel format.
#' @param lags Integer vector of lag orders to test. Default c(1, 2).
#'
#' @return A data frame with test results for each direction and lag.
#'
#' @details
#' Tests both directions: direct -> production and production -> direct.
#'
#' @examples
#' \donttest{
#' if (requireNamespace("plm", quietly = TRUE)) {
#'   set.seed(123)
#'   panel <- data.frame(
#'     year = rep(2000:2019, 5),
#'     sector = rep(LETTERS[1:5], each = 20),
#'     log_direct = rnorm(100, 5, 0.5),
#'     log_production = rnorm(100, 5, 0.5)
#'   )
#'
#'   granger_results <- panel_granger_test(panel)
#'   print(granger_results)
#' }
#' }
#'
#' @export
panel_granger_test <- function(panel_data, lags = c(1L, 2L)) {

    check_package("plm", "panel Granger causality tests")

    validate_panel_data(panel_data, require_log = TRUE)

    pdata <- plm::pdata.frame(panel_data, index = c("sector", "year"))

    results <- list()

    for (lag in lags) {

        test_y_on_x <- tryCatch(
            plm::pgrangertest(
                log_production ~ log_direct,
                data = pdata,
                order = lag
            ),
            error = function(e) NULL
        )

        test_x_on_y <- tryCatch(
            plm::pgrangertest(
                log_direct ~ log_production,
                data = pdata,
                order = lag
            ),
            error = function(e) NULL
        )

        if (!is.null(test_y_on_x)) {
            results[[length(results) + 1L]] <- data.frame(
                direction = "direct -> production",
                lag = lag,
                statistic = as.numeric(test_y_on_x$statistic),
                p_value = test_y_on_x$p.value,
                stringsAsFactors = FALSE
            )
        }

        if (!is.null(test_x_on_y)) {
            results[[length(results) + 1L]] <- data.frame(
                direction = "production -> direct",
                lag = lag,
                statistic = as.numeric(test_x_on_y$statistic),
                p_value = test_x_on_y$p.value,
                stringsAsFactors = FALSE
            )
        }
    }

    if (length(results) == 0L) {
        warning("All Granger tests failed.")
        return(NULL)
    }

    do.call(rbind, results)
}
