#' Multi-stage MCMC Bayesian Method for DMR Detection
#'
#' @description
#' This function implements a multistage MCMC Bayesian method for detecting differentially methylated 
#' regions (DMRs) between two groups (typically cancer and normal). The method operates on methylation
#' measurements on the M-values. 
#'  
#' For each candidate region and for each group, the function summarizes the region at the sample level
#' by averaging M-values across CpG sites within the region. These sample-wise means are using an alpha-skewed
#' generalized normal (ASGN) distribution. A Bayes factor (BF) comparing the two groups is then used within
#' a multistage region-splitting scheme to identify the DMRs.  
#'
#' @details
#' The inputs \code{cancer_data} and \code{normal_data} must have the same
#' set of CpG sites in the same order. Each row corresponds to a CpG site,
#' and the first two columns are required to be:
#' \itemize{
#'   \item \code{CpG_ID}: character CpG identifier.
#'   \item \code{Chromosome}: chromosome label (integer or character).
#' }
#' All remaining columns are assumed to be numeric M-values for individual
#' samples in the respective group (e.g., \code{"M_sample1"}, \code{"M_sample2"}, \dots).
#' 
#' For each group, a sample wise mean M-values are computed and passed to
#' \code{\link{asgn_func}} to obtain posterior mean of the ASGN parameters.
#' A Bayes factor (BF) comparing the two groups is then computed for the
#' current region. If the BF exceeds a stage-specific threshold, the region
#' is either accepted as a DMR (at the final stage) or split into subregions
#' and analyzed at the next stage. This continues until either
#' \code{max_stages} is reached or no subregion passes the BF thresholds.
#' 
#' The values used in the examples are intentionally small to ensure fast
#' execution and are not intended as recommended settings for real analyses.
#' 
#' @param cancer_data A data frame of methylation data for the cancer group.
#'   Rows correspond to CpG sites and columns to variables. The first two
#'   columns must be \code{CpG_ID} and \code{Chromosome}, and the remaining
#'   columns must be numeric M-values for cancer samples.
#' @param normal_data A data frame of methylation data for the normal group
#'   in the same format and CpG ordering as \code{cancer_data}.
#' @param stage Integer indicating the starting stage for the multistage
#'   analysis. Usually left at the default \code{stage = 1}.
#' @param max_stages Integer giving the maximum number of stages in the
#'   splitting procedure (default \code{3}). Larger values allow deeper
#'   splitting of regions at the cost of additional computation.
#' @param num_splits Integer giving the number of subregions created when a region
#'   is split at each stage (default \code{50}). Increasing \code{num_splits}
#'   typically improves sensitivity but increases computation time.
#' @param mcmc A list of MCMC control parameters passed to
#'   \code{\link{asgn_func}}. Expected components are
#'   \code{nburn} (burn-in iterations),
#'   \code{niter} (total iterations), and
#'   \code{thin} (thinning interval).
#'   If \code{NULL}, default values
#'   \code{list(nburn = 5000, niter = 10000, thin = 1)} are used.
#' @param priors_cancer Optional list of prior hyperparameters for the ASGN
#'   model in the cancer group, passed to \code{\link{asgn_func}}. If
#'   \code{NULL}, default priors from \code{\link{asgn_func}} are used.
#' @param priors_normal Optional list of prior hyperparameters for the ASGN
#'   model in the normal group, passed to \code{\link{asgn_func}}. If
#'   \code{NULL}, default priors from \code{\link{asgn_func}} are used.
#' @param bf_thresholds Numeric vector of Bayes factor thresholds, one for
#'   each stage (e.g., \code{c(0.5, 0.8, 1.05)}). If the length of
#'   \code{bf_thresholds} is shorter than \code{max_stages}, the last value
#'   is recycled so that each stage has an associated threshold. If
#'   \code{NULL}, default thresholds \code{c(0.5, 0.8, 1.05)} are used.
#'   
#' @return 
#'  A data frame with one row per detected DMR and the following columns:
#' \itemize{
#'   \item \code{Chromosome}: chromosome label.
#'   \item \code{Start_CpG}: CpG ID where the region starts.
#'   \item \code{End_CpG}: CpG ID where the region ends.
#'   \item \code{CpG_Count}: number of CpG sites in the region.
#'   \item \code{Decision_Value}: final Bayes factor for the region.
#'   \item \code{Stage}: stage at which the region was detected.
#' }
#' If no regions pass the BF thresholds, \code{NULL} is returned.
#' 
#'
#' @examples
#' \donttest{
#' # Load the datasets
#' data(cancer_demo)
#' data(normal_demo)
#'
#' mcmc <- list(nburn = 1000, niter = 2000, thin = 1)
#'
#' set.seed(2021)
#'
#' rst <- mmcmcBayes(cancer_demo, normal_demo,
#'                  stage = 1,
#'                  max_stages = 2,
#'                  num_splits = 5,
#'                  mcmc = mcmc,
#'                  priors_cancer = NULL,
#'                  priors_normal = NULL,
#'                  bf_thresholds = c(0.5, 0.8, 1.05))
#'
#' print(rst)
#' 
#' }
#'
#' @author Zhexuan Yang, Duchwan Ryu, and Feng Luan
#'
#' @seealso
#' \code{\link{asgn_func}} for ASGN parameter estimation,
#' \code{\link{plot_dmr_region}} for visualizing individual DMR profiles,
#' \code{\link{summarize_dmrs}} for summarizing detected regions,
#' \code{\link{compare_dmrs}} for comparing DMR sets.
#' 
#' @importFrom utils tail
#' 
#' @export
mmcmcBayes <- function(cancer_data, normal_data,
                       stage = 1, max_stages = 3,
                       num_splits = 50,
                       mcmc = NULL,
                       priors_cancer = NULL,
                       priors_normal = NULL,
                       bf_thresholds = c(0.5, 0.8, 1.05)) {
  
  # --- 1. Validation & Setup ---
  if (!is.data.frame(cancer_data) || !is.data.frame(normal_data)) stop("Data must be data frames")
  if (is.null(mcmc)) mcmc <- list(nburn = 5000, niter = 10000, thin = 1)
  if (is.null(bf_thresholds)) bf_thresholds <- c(0.5, 0.8, 1.05)
  if (length(bf_thresholds) < max_stages) {
    bf_thresholds <- c(bf_thresholds, rep(tail(bf_thresholds, 1), max_stages - length(bf_thresholds)))
  }
  
  total_cpgs <- nrow(cancer_data)
  if (total_cpgs == 0) return(NULL)
  
  # --- 2. Calculate Means ---
  ybar_cancer <- .cal_mean(cancer_data)
  ybar_normal <- .cal_mean(normal_data)
  
  if (is.null(ybar_cancer) || is.null(ybar_normal)) return(NULL)
  
  # --- 3. Run ASGN ---
  posterior_cancer <- tryCatch(asgn_func(ybar_cancer, priors_cancer, mcmc), error=function(e) NULL)
  posterior_normal <- tryCatch(asgn_func(ybar_normal, priors_normal, mcmc), error=function(e) NULL)
  
  if (is.null(posterior_cancer) || is.null(posterior_normal)) return(NULL)
  
  # --- 4. Calculate BF ---
  BF <- .cal_bf(ybar_cancer, ybar_normal, posterior_cancer$posteriors, posterior_normal$posteriors)
  
  # FAIL-SAFE: If BF is NA, treat as 0 (Not Significant) to prevent crash
  if (is.na(BF) || is.nan(BF)) {
    BF <- 0 
  }
  
  threshold <- bf_thresholds[stage]
  is_significant <- (BF >= threshold)
  
  # --- 5. Decision Logic ---
  if (stage == max_stages || !is_significant) {
    if (is_significant) {
      detected_DMR <- data.frame(
        Chromosome     = cancer_data$Chromosome[1],
        Start_CpG      = cancer_data$CpG_ID[1],
        End_CpG        = cancer_data$CpG_ID[nrow(cancer_data)],
        CpG_Count      = total_cpgs,
        Decision_Value = BF,
        Stage          = stage,
        stringsAsFactors = FALSE
      )
      return(detected_DMR)
    } else {
      return(NULL)
    }
  }
  
  
  # --- 6. Recursion ---
  actual_splits <- min(num_splits, total_cpgs)
  split_indices <- split(seq_len(total_cpgs), cut(seq_len(total_cpgs), breaks = actual_splits, labels = FALSE))
  
  new_priors_cancer <- list(alpha=posterior_cancer$posteriors[1], mu=posterior_cancer$posteriors[2], sigma2=posterior_cancer$posteriors[3])
  new_priors_normal <- list(alpha=posterior_normal$posteriors[1], mu=posterior_normal$posteriors[2], sigma2=posterior_normal$posteriors[3])
  
  child_results <- lapply(split_indices, function(idx) {
    mmcmcBayes(
      cancer_data[idx, , drop = FALSE],
      normal_data[idx, , drop = FALSE],
      stage         = stage + 1,
      max_stages    = max_stages,
      num_splits    = num_splits,
      mcmc          = mcmc,
      priors_cancer = new_priors_cancer,
      priors_normal = new_priors_normal,
      bf_thresholds = bf_thresholds
    )
  })
  
  valid_results <- Filter(Negate(is.null), child_results)
  if (length(valid_results) == 0) return(NULL)
  
  final_dmrs <- do.call(rbind, valid_results)
  return(final_dmrs)
}

# --- Internal Helpers ---
.cal_mean <- function(data) {
  req <- c("CpG_ID", "Chromosome")
  if (!all(req %in% colnames(data))) return(NULL)
  data_numeric <- data[, !colnames(data) %in% req, drop = FALSE]
  if (ncol(data_numeric) == 0) return(NULL)
  
  mean_meth <- colMeans(data_numeric, na.rm = TRUE)
  mean_meth[is.nan(mean_meth)] <- NA 
  return(matrix(mean_meth, ncol = 1))
}

.asgn_density <- function(x, alpha, mu, sigma2) {
  if (sigma2 <= 0) return(rep(0, length(x)))
  const_denom <- 4 * 0.886227 * (alpha^2) + 4 * 1.77245
  numer <- sqrt(2) * ((1 - alpha * x)^2 + 1)
  e_term <- exp(-((x - mu)^2) / (2 * sigma2))
  return((numer / const_denom) * e_term)
}

.cal_bf <- function(ybar_cancer, ybar_normal, post_c, post_n) {
  lik_cancer <- sapply(ybar_cancer, .asgn_density, alpha = post_c[1], mu = post_c[2], sigma2 = post_c[3])
  lik_normal <- sapply(ybar_normal, .asgn_density, alpha = post_n[1], mu = post_n[2], sigma2 = post_n[3])
  sum_c <- sum(lik_cancer, na.rm = TRUE)
  sum_n <- sum(lik_normal, na.rm = TRUE)
  if (sum_n == 0 && sum_c == 0) return(0) 
  if (sum_n == 0) return(Inf)
  return(sum_c / sum_n)
}