#' Compare Differentially Methylated Regions (DMRs) from Two Methods
#'
#' @description
#' \code{compare_dmrs()} identifies overlapping regions between two sets of
#' differentially methylated regions (DMRs), typically obtained from two
#' different detection methods. It reports pairwise overlaps and a simple
#' overlap percentage that can be used to assess consistency between methods.
#'
#' @param rst1 A data frame containing the first set of DMR results. Must contain
#'   at least the columns \code{Chromosome}, \code{Start_CpG}, and \code{End_CpG}.
#' @param rst2 A data frame containing the second set of DMR results, in the same
#'   format as \code{rst1}. The \code{Chromosome} values must be comparable to
#'   those in \code{rst1} (e.g., both using \code{"chr6"} or both using \code{"6"}).
#'
#' @details
#' This function compares genomic regions between two DMR result objects. For each
#' region in \code{rst1}, it searches for regions in \code{rst2} on the same
#' chromosome that have any overlap in CpG index range (partial or complete).
#'
#' CpG identifiers in \code{Start_CpG} and \code{End_CpG} are assumed to contain
#' an embedded numeric component that reflects their ordering along the genome
#' (e.g., \code{"cg00017002"}). Internally, these IDs are converted to numeric
#' values by stripping non-digit characters; rows for which this conversion fails
#' are removed before comparison.
#' 
#' For each overlapping pair of regions, the function computes
#' \itemize{
#'   \item the size of the overlapping segment (in CpG index units), and
#'   \item an overlap percentage, defined as
#'     \deqn{\text{overlap\_size} / \max(\text{length\_method1}, \text{length\_method2}) \times 100,}
#'     where \code{length_method1} and \code{length_method2} are the lengths of the
#'     two regions in CpG index units.
#' }
#'
#' The overlap percentage is therefore symmetric in the two methods and can be
#' interpreted as “how much of the larger region is covered by the overlap.”
#'
#' @return
#' A data frame with one row per overlapping pair of regions and the columns:
#' \itemize{
#'   \item \code{Chromosome}: chromosome name of the overlapping regions.
#'   \item \code{Start_CpG_Method1}: start CpG ID from \code{rst1}.
#'   \item \code{End_CpG_Method1}: end CpG ID from \code{rst1}.
#'   \item \code{Start_CpG_Method2}: start CpG ID from \code{rst2}.
#'   \item \code{End_CpG_Method2}: end CpG ID from \code{rst2}.
#'   \item \code{Overlap_Percentage}: overlap percentage relative to the larger
#'         of the two regions (in CpG index units), rounded to two decimals.
#' }
#'
#' Returns \code{NULL} if no overlaps are found or if, after cleaning, one of
#' the inputs has no usable rows.
#'
#' @examples
#' \donttest{
#' # Create sample DMR results
#' dmr_method1 <- data.frame(
#'   Chromosome = c("chr1", "chr1", "chr2"),
#'   Start_CpG = c("cg0001", "cg0050", "cg0100"),
#'   End_CpG = c("cg0020", "cg0070", "cg0150")
#' )
#'
#' dmr_method2 <- data.frame(
#'   Chromosome = c("chr1", "chr2", "chr2"),
#'   Start_CpG = c("cg0005", "cg0120", "cg0090"),
#'   End_CpG = c("cg0025", "cg0160", "cg0110")
#' )
#'
#' # Compare overlapping regions
#' overlaps <- compare_dmrs(dmr_method1, dmr_method2)
#' }
#'
#' @author 
#' Zhexuan Yang, Duchwan Ryu, and Feng Luan
#'
#' @seealso
#' Related functions in this package:
#' \code{\link{mmcmcBayes}} for DMR detection using multi-stage MCMC,
#' \code{\link{asgn_func}} for parameter estimation with ASGN distribution
#'
#' @importFrom stats complete.cases
#'
#' @export
compare_dmrs <- function(rst1, rst2) {
  required_cols <- c("Chromosome", "Start_CpG", "End_CpG")
  
  if (!all(required_cols %in% colnames(rst1)) || !all(required_cols %in% colnames(rst2))) {
    stop("Both result datasets must contain the columns: Chromosome, Start_CpG, and End_CpG.")
  }

  rst1 <- as.data.frame(rst1)
  rst2 <- as.data.frame(rst2)
  
  clean_numeric <- function(x) {
    x <- gsub("[^0-9]", "", x) 
    as.numeric(x)  
  }
  
  rst1$Start_CpG_numeric <- clean_numeric(rst1$Start_CpG)
  rst1$End_CpG_numeric <- clean_numeric(rst1$End_CpG)
  rst2$Start_CpG_numeric <- clean_numeric(rst2$Start_CpG)
  rst2$End_CpG_numeric <- clean_numeric(rst2$End_CpG)

  rst1 <- rst1[complete.cases(rst1[, c("Start_CpG_numeric", "End_CpG_numeric")]), ]
  rst2 <- rst2[complete.cases(rst2[, c("Start_CpG_numeric", "End_CpG_numeric")]), ]

  if (nrow(rst1) == 0 || nrow(rst2) == 0) {
    return(NULL)
  }
  
  overlap_results <- vector("list", nrow(rst1))
  result_count <- 0

  for (i in 1:nrow(rst1)) {
    chr1 <- rst1$Chromosome[i]
    start1_num <- rst1$Start_CpG_numeric[i]
    end1_num <- rst1$End_CpG_numeric[i]
    start1_orig <- rst1$Start_CpG[i]
    end1_orig <- rst1$End_CpG[i]
    
    overlaps <- rst2[rst2$Chromosome == chr1 & 
                       ((rst2$Start_CpG_numeric <= end1_num & rst2$End_CpG_numeric >= start1_num) |  
                          (rst2$Start_CpG_numeric >= start1_num & rst2$End_CpG_numeric <= end1_num)), ]  
    
    if (nrow(overlaps) > 0) {
      for (j in 1:nrow(overlaps)) {
        start2_num <- overlaps$Start_CpG_numeric[j]
        end2_num <- overlaps$End_CpG_numeric[j]
        start2_orig <- overlaps$Start_CpG[j]
        end2_orig <- overlaps$End_CpG[j]

        overlap_start <- max(start1_num, start2_num)
        overlap_end <- min(end1_num, end2_num)
        overlap_size <- max(0, overlap_end - overlap_start + 1)

        total_region_size <- max(end1_num - start1_num + 1, end2_num - start2_num + 1)
        overlap_percentage <- (overlap_size / total_region_size) * 100
        
        result_count <- result_count + 1
        overlap_results[[result_count]] <- data.frame(
          Chromosome = chr1,
          Start_CpG_Method1 = start1_orig,
          End_CpG_Method1 = end1_orig,
          Start_CpG_Method2 = start2_orig,
          End_CpG_Method2 = end2_orig,
          Overlap_Percentage = round(overlap_percentage, 2),
          stringsAsFactors = FALSE
        )
      }
    }
  }

  if (result_count > 0) {
    final_results <- do.call(rbind, overlap_results[1:result_count])
    return(final_results)
  } else {
    return(NULL)
  }
}