#' @title DNA Methylation Prediction in Dicot Plants
#' @description Predicting sequences with DNA methylation sites like 4mC or 6mA based on Arabidopsis as reference model.
#' @param fasta_file_path Sequence file (.fasta format)
#' @param Reference Arabidopsis as Reference Model for Dicot plants
#' @return Methylation Status: Sequences with their probable DNA methylation state such as 4mC, 6mA or Non Methhylated.
#' @importFrom Biostrings readDNAStringSet oligonucleotideFrequency
#' @importFrom seqinr read.fasta GC 
#' @importFrom stringr str_sub
#' @importFrom tibble tibble
#' @importFrom stats predict
#' @importFrom entropy mi.plugin
#' @importFrom ftrCOOL EIIP PSTNPss_DNA fa.read
#' @export
#' 
#' @examples
#' \donttest{
#' library(OpEnCAST)
#' data<-system.file("exdata/test.fasta", package = "OpEnCAST")
#' pred<-Dicot_MethPred(fasta_file_path=data, Reference="Arabidopsis")
#' }
#' 
#' @references Lv, H., Dao, F. Y., Zhang, D., Guan, Z. X., Yang, H., Su, W., ... & Lin, H. (2020). iDNA-MS: an integrated computational tool for detecting DNA modification sites in multiple genomes. Iscience, 23(4).
Dicot_MethPred<- function(fasta_file_path, Reference="Arabidopsis"){
  fasta_file <- fasta_file_path
  ################################Training##############################
  # Function to pad DNA sequence
  pad_dna_sequence <- function(sequence, desired_length, padding_char = "N") {
    current_length <- nchar(sequence)
    
    if (current_length >= desired_length) {
      # If the sequence is equal or longer than the desired length, no padding needed
      return(sequence)
    } else {
      # Calculate the number of characters to pad
      pad_length <- desired_length - current_length
      
      # Pad the sequence with the specified character
      padded_sequence <- paste0(sequence, strrep(padding_char, pad_length))
      
      return(padded_sequence)
    }
  }
  
  # Function to pad DNA sequences in a multifasta file
  pad_multifasta_file <- function(input_file, desired_length, padding_char = "N") {
    # Read multifasta file
    fasta_sequences <- readDNAStringSet(input_file, format = "fasta")
    
    # Pad each sequence
    padded_sequences <- sapply(as.character(fasta_sequences), function(seq) {
      pad_dna_sequence(seq, desired_length, padding_char)
    })
    
    # Convert the padded sequences to a data frame
    padded_sequences_df <- data.frame(ID = names(fasta_sequences), Sequence = padded_sequences, stringsAsFactors = FALSE)
    
    return(padded_sequences_df)
  }
  
  
  ######################### Tabular format to Fasta format###############################
  
  #this is a function to convert tabular fasta into plain fasta file
  #first column should be squence names
  #second column should be sequence
  
  #######################Fasta to Tabular format##################################
  
  FastaToTabular <- function (filename){
    
    #read fasta file
    
    file1 <- readLines(filename)
    
    #find the genename location by grepping >
    
    location <- which((stringr::str_sub(file1,1,1))==">")
    
    #start an empty vector to collect name and sequence
    
    name=c()
    sequence =c()
    
    
    
    #number of genes= number of loops
    #extract name first
    for ( i in 1:length(location)){
      name_line = location[i]
      name1 = file1[name_line]
      name=c(name,name1)
      #extract sequence between the names
      #the last sequence will be missed using this strategy
      #so, we are using if condition to extract last sequence
      start= location[i]+1
      end = location[i+1]-1
      if ( i < length (location)){
        
        end=end
        
      } else {
        
        end=length(file1)
      }
      
      lines = start:end
      sequence1= as.character(paste(file1[lines],collapse = ""))
      sequence =c(sequence,sequence1)
    }
    
    #now create table using name and sequence vector
    
    data <- tibble::tibble(name,sequence)
    
    
    
    #finally export the file
    #before that remove preexisting file
    unlink(c("dna_table.csv"),force=TRUE)
    as.matrix(data,"dna_table.csv")
    
    #function ends
  }
  #########################alphabetcheck###########################
  alphabetCheck<-function (sequences, alphabet = "aa", label = c())
  {
    if (length(sequences) == 0) {
      stop("ERROR: sequence parameter is empty")
    }
    if (length(label) != 0 && length(label) != length(sequences)) {
      stop("ERROR: The lenght of the label vector and the number of sequences do not match!")
    }
    if (alphabet == "rna") {
      alphabet <- c("A", "C", "G", "U")
    }
    else if (alphabet == "dna") {
      alphabet <- c("A", "C", "G", "T")
    }
    else if (alphabet == "aa") {
      alphabet <- c("A", "C", "D", "E", "F", "G", "H", "I",
                    "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V",
                    "W", "Y")
    }
    else {
      stop("ERROR: alphabet shoud be 'dna' or 'rna' or 'aa' ")
    }
    alphabetCheck = sapply(sequences, function(i) all(strsplit(i,
                                                               split = "")[[1]] %in% alphabet))
    flag = 0
    if (length(label) == length(sequences)) {
      flag = 1
      label = label[alphabetCheck]
    }
    else if (length(label) > 0 && length(label) != length(sequences)) {
      stop("ERROR: The number of labels is not equal to the number of sequences!")
    }
    if (is.null(names(sequences))) {
      names(sequences) <- as.character(1:length(sequences))
    }
    nonstanSeq <- names(sequences)[!alphabetCheck]
    if (length(nonstanSeq) != 0) {
      nonstanSeq <- toString(nonstanSeq)
      warMessage <- paste("The sequences (", nonstanSeq, ") were deleted. They contained non-standard alphabets")
      message(warMessage)
    }
    sequences = sequences[alphabetCheck]
    if (length(sequences) == 0) {
      stop("All sequences contained non-standard alphabets. No sequences remained for analysis :) ")
    }
    if (flag == 1) {
      names(label) = names(sequences)
    }
    seq_lab <- list(sequences = sequences, Lab = label)
    return(seq_lab)
  }
  #################################NCP_DNA############################
  
  ncp_dna<-function (seqs, binaryType = "numBin", outFormat = "mat", outputFileDist = "",
                     label = c())
  {
    if (length(seqs) == 1 && file.exists(seqs)) {
      seqs <- ftrCOOL::fa.read(seqs, alphabet = "dna")
      seqs_Lab <- alphabetCheck(seqs, alphabet = "dna", label)
      seqs <- seqs_Lab[[1]]
      label <- seqs_Lab[[2]]
    }
    else if (is.vector(seqs)) {
      seqs <- sapply(seqs, toupper)
      seqs_Lab <- alphabetCheck(seqs, alphabet = "dna", label)
      seqs <- seqs_Lab[[1]]
      label <- seqs_Lab[[2]]
    }
    else {
      stop("ERROR: Input sequence is not in the correct format. It should be a FASTA file or a string vector.")
    }
    lenSeqs <- sapply(seqs, nchar)
    nucs <- list(A = c(1, 1, 1), C = c(0, 0, 1), G = c(1, 0,
                                                       0), T = c(0, 1, 0), U = c(0, 1, 0))
    numSeqs <- length(seqs)
    if (outFormat == "mat") {
      if (length(unique(lenSeqs)) > 1) {
        stop("ERROR: All sequences should have the same length in 'mat' mode. For sequences with different lengths, please use 'txt' for outFormat parameter")
      }
      if (binaryType == "strBin") {
        nucs <- c(A = "111", C = "001", G = "100", T = "010",
                  U = "010")
        featureMatrix <- sapply(seqs, function(x) {
          charList <- unlist(strsplit(x, split = ""))
          cods <- nucs[charList]
          return(cods)
        })
        featureMatrix <- t(featureMatrix)
        colnames(featureMatrix) <- paste("ncp_pos", 1:lenSeqs[1],
                                         sep = "")
        row.names(featureMatrix) <- names(seqs)
      }
      else if (binaryType == "logicBin") {
        nucs <- list(A = c(TRUE, TRUE, TRUE), C = c(FALSE,
                                                    TRUE, FALSE), G = c(TRUE, FALSE, FALSE), T = c(FALSE,
                                                                                                   FALSE, TRUE), U = c(FALSE, FALSE, TRUE))
        featureMatrix <- sapply(seqs, function(x) {
          charList <- unlist(strsplit(x, split = ""))
          cods <- nucs[charList]
          cods <- unlist(cods)
          return(cods)
        })
        featureMatrix <- t(featureMatrix)
        temp1 <- rep(c("P", "A", "H"), lenSeqs[1])
        temp2 <- rep(1:lenSeqs[1], each = 3)
        colnames(featureMatrix) <- paste("ncp_pos", temp2, "-",
                                         temp1, sep = "")
        row.names(featureMatrix) <- names(seqs)
      }
      else if (binaryType == "numBin") {
        featureMatrix <- sapply(seqs, function(x) {
          charList <- unlist(strsplit(x, split = ""))
          cods <- nucs[charList]
          cods <- unlist(cods)
          return(cods)
        })
        featureMatrix <- t(featureMatrix)
        temp1 <- rep(c("P", "A", "H"), lenSeqs[1])
        temp2 <- rep(1:lenSeqs[1], each = 3)
        colnames(featureMatrix) <- paste("ncp_pos", temp2, "-",
                                         temp1, sep = "")
        row.names(featureMatrix) <- names(seqs)
      }
      else {
        stop("ERROR! Choose one of 'strBin', 'logicBin', or 'numBin' for binaryFormat")
      }
      return(featureMatrix)
    }
    else if (outFormat == "txt") {
      nucs <- c(A = "111", C = "001", G = "100", T = "010",
                U = "010")
      counter <- 0
      namesSeqs <- names(seqs)
      codes <- lapply(seqs, function(x) {
        counter <- counter + 1
        charList <- unlist(strsplit(x, split = ""))
        cods <- nucs[charList]
        namecods <- namesSeqs[counter]
        cods <- unlist(cods)
        cods <- c(namecods, cods)
        temp <- paste(cods, collapse = "\t")
        write(temp, outputFileDist, append = TRUE)
      })
    }
    else {
      stop("ERROR: outFormat should be 'mat' or 'txt' ")
    }
  }
  
  
  
  ###########################GC_Content##########################
  GC.content <- function(fasta_file){
    x <- seqinr::read.fasta(file=fasta_file)
    tt<-function(x){
      res<-seqinr::GC(x)
      val=round(res,4)
      return(val)
    }
    
    f_res<-lapply(x,tt)
    s=data.frame(f_res)
    
    rownames(s) <- c("GC-content")
    
    w=t(s)
    return(w)
  }
  ################################ONF#################################
  oligo.freq <- function(fasta_file,f){
    x<- readDNAStringSet(fasta_file)
    y <- oligonucleotideFrequency(x,width = f)
    z <- data.frame(y)
    rownames(z) <- names(x)
    
    return(z)
  }
  
  #########################################AMIP################################
  
  AMIP<-function(fasta_file,n1=1,n2=4){
    x=readDNAStringSet(fasta_file)
    #calculating frequency of occurence of nucleotides k bases apart
    AMI_fun<-function(x){
      y=oligonucleotideFrequency(x, width=1, step=1)
      z<-matrix(nrow=(n2-n1)+1,ncol=4)
      F<-list()
      length(F)<-(n2-n1)+1
      R<-numeric((n2-n1)+1)
      for (i in 1:((n2-n1)+1)){
        z[i,]=oligonucleotideFrequency(x, width=1, step=i+n1-1)
        F[[i]]=rbind(y,z[i,])
        R[i]=entropy::mi.plugin(F[[i]])
      }
      R=round(R,4)
      mean_AMI<-round(mean(R),4)
      AMI<-list( mean_AMI)
      return(AMI)
    }
    res<-lapply(x,AMI_fun)
    ress= data.frame(res)
    ress = t(ress)
    row.names(ress) <- names(x)
    colnames(ress) <- c("Mean Of AMIP")
    return(ress)
  }
  
  #######################mononucleotide_binary_encoding##################################
  
  FastaToTabular <- function (filename){
    
    #read fasta file
    
    file1 <- readLines(filename)
    
    #find the genename location by grepping >
    
    location <- which((stringr::str_sub(file1,1,1))==">")
    
    #start an empty vector to collect name and sequence
    
    name=c()
    sequence =c()
    
    
    
    #number of genes= number of loops
    #extract name first
    for ( i in 1:length(location)){
      name_line = location[i]
      name1 = file1[name_line]
      name=c(name,name1)
      #extract sequence between the names
      #the last sequence will be missed using this strategy
      #so, we are using if condition to extract last sequence
      start= location[i]+1
      end = location[i+1]-1
      if ( i < length (location)){
        
        end=end
        
      } else {
        
        end=length(file1)
      }
      
      lines = start:end
      sequence1= as.character(paste(file1[lines],collapse = ""))
      sequence =c(sequence,sequence1)
    }
    
    #now create table using name and sequence vector
    
    data <- tibble::tibble(name,sequence)
    
    
    
    #finally export the file
    #before that remove preexisting file
    unlink(c("dna_table.csv"),force=TRUE)
    as.matrix(data,"dna_table.csv")
    
    #function ends
  }
  #########################alphabetcheck###########################
  alphabetCheck<-function (sequences, alphabet = "aa", label = c())
  {
    if (length(sequences) == 0) {
      stop("ERROR: sequence parameter is empty")
    }
    if (length(label) != 0 && length(label) != length(sequences)) {
      stop("ERROR: The lenght of the label vector and the number of sequences do not match!")
    }
    if (alphabet == "rna") {
      alphabet <- c("A", "C", "G", "U")
    }
    else if (alphabet == "dna") {
      alphabet <- c("A", "C", "G", "T")
    }
    else if (alphabet == "aa") {
      alphabet <- c("A", "C", "D", "E", "F", "G", "H", "I",
                    "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V",
                    "W", "Y")
    }
    else {
      stop("ERROR: alphabet shoud be 'dna' or 'rna' or 'aa' ")
    }
    alphabetCheck = sapply(sequences, function(i) all(strsplit(i,
                                                               split = "")[[1]] %in% alphabet))
    flag = 0
    if (length(label) == length(sequences)) {
      flag = 1
      label = label[alphabetCheck]
    }
    else if (length(label) > 0 && length(label) != length(sequences)) {
      stop("ERROR: The number of labels is not equal to the number of sequences!")
    }
    if (is.null(names(sequences))) {
      names(sequences) <- as.character(1:length(sequences))
    }
    nonstanSeq <- names(sequences)[!alphabetCheck]
    if (length(nonstanSeq) != 0) {
      nonstanSeq <- toString(nonstanSeq)
      warMessage <- paste("The sequences (", nonstanSeq, ") were deleted. They contained non-standard alphabets")
      message(warMessage)
    }
    sequences = sequences[alphabetCheck]
    if (length(sequences) == 0) {
      stop("All sequences contained non-standard alphabets. No sequences remained for analysis :) ")
    }
    if (flag == 1) {
      names(label) = names(sequences)
    }
    seq_lab <- list(sequences = sequences, Lab = label)
    return(seq_lab)
  }
  #################################MBE_DNA############################
  
  mbe_dna<-function (seqs, binaryType = "numBin", outFormat = "mat", outputFileDist = "",
                     label = c())
  {
    if (length(seqs) == 1 && file.exists(seqs)) {
      seqs <- ftrCOOL::fa.read(seqs, alphabet = "dna")
      seqs_Lab <- alphabetCheck(seqs, alphabet = "dna", label)
      seqs <- seqs_Lab[[1]]
      label <- seqs_Lab[[2]]
    }
    else if (is.vector(seqs)) {
      seqs <- sapply(seqs, toupper)
      seqs_Lab <- alphabetCheck(seqs, alphabet = "dna", label)
      seqs <- seqs_Lab[[1]]
      label <- seqs_Lab[[2]]
    }
    else {
      stop("ERROR: Input sequence is not in the correct format. It should be a FASTA file or a string vector.")
    }
    lenSeqs <- sapply(seqs, nchar)
    nucs <- list(A = c(1, 0, 0, 0), C = c(0, 1, 0, 0), G = c(0, 0, 1, 0), T = c(0, 0, 0, 1), U = c(0, 0, 0, 1))
    numSeqs <- length(seqs)
    if (outFormat == "mat") {
      if (length(unique(lenSeqs)) > 1) {
        stop("ERROR: All sequences should have the same length in 'mat' mode. For sequences with different lengths, please use 'txt' for outFormat parameter")
      }
      if (binaryType == "strBin") {
        nucs <- c(A = "1000", C = "0100", G = "0010", T = "0001",
                  U = "0001")
        featureMatrix <- sapply(seqs, function(x) {
          charList <- unlist(strsplit(x, split = ""))
          cods <- nucs[charList]
          return(cods)
        })
        featureMatrix <- t(featureMatrix)
        colnames(featureMatrix) <- paste("pos_mbe", 1:lenSeqs[1],
                                         sep = "")
        row.names(featureMatrix) <- names(seqs)
      }
      else if (binaryType == "logicBin") {
        nucs <- list(A = c(TRUE, TRUE, TRUE), C = c(FALSE,
                                                    TRUE, FALSE), G = c(TRUE, FALSE, FALSE), T = c(FALSE,
                                                                                                   FALSE, TRUE), U = c(FALSE, FALSE, TRUE))
        featureMatrix <- sapply(seqs, function(x) {
          charList <- unlist(strsplit(x, split = ""))
          cods <- nucs[charList]
          cods <- unlist(cods)
          return(cods)
        })
        featureMatrix <- t(featureMatrix)
        temp1 <- rep(c("P", "A", "H"), lenSeqs[1])
        temp2 <- rep(1:lenSeqs[1], each = 3)
        colnames(featureMatrix) <- paste("pos_mbe", temp2, "-",
                                         temp1, sep = "")
        row.names(featureMatrix) <- names(seqs)
      }
      else if (binaryType == "numBin") {
        featureMatrix <- sapply(seqs, function(x) {
          charList <- unlist(strsplit(x, split = ""))
          cods <- nucs[charList]
          cods <- unlist(cods)
          return(cods)
        })
        featureMatrix <- t(featureMatrix)
        temp1 <- rep(c("P", "A", "H"), lenSeqs[1])
        temp2 <- rep(1:lenSeqs[1], each = 3)
        colnames(featureMatrix) <- paste("pos_mbe", temp2, "-",
                                         temp1, sep = "")
        row.names(featureMatrix) <- names(seqs)
      }
      else {
        stop("ERROR! Choose one of 'strBin', 'logicBin', or 'numBin' for binaryFormat")
      }
      return(featureMatrix)
    }
    else if (outFormat == "txt") {
      nucs <- c(A = "1000", C = "0100", G = "0010", T = "0001",
                U = "0001")
      counter <- 0
      namesSeqs <- names(seqs)
      codes <- lapply(seqs, function(x) {
        counter <- counter + 1
        charList <- unlist(strsplit(x, split = ""))
        cods <- nucs[charList]
        namecods <- namesSeqs[counter]
        cods <- unlist(cods)
        cods <- c(namecods, cods)
        temp <- paste(cods, collapse = "\t")
        write(temp, outputFileDist, append = TRUE)
      })
    }
    else {
      stop("ERROR: outFormat should be 'mat' or 'txt' ")
    }
  }
  #########EIIP############
  
  
  EIIP_final<-ftrCOOL::EIIP(fasta_file)
  
  ############PSTNP###############
  
  ref<-readDNAStringSet(fasta_file)
  
  oldopt <- options(timeout = 600)
  on.exit(options(oldopt), add = TRUE)
  
  # Define a substring or keyword to search for
  # find patterns in names
  posSeqs <- suppressWarnings(
    ftrCOOL::fa.read(url("https://zenodo.org/records/17934726/files/AT__Pos.fasta?download=1"))
  )
  
  negSeqs <- suppressWarnings(
    ftrCOOL::fa.read(url("https://zenodo.org/records/17934726/files/AT_Neg.fasta?download=1"))
  )
  
  seqs <- ftrCOOL::fa.read(file = fasta_file, alphabet = "dna")
  PSTNPss_Final <-ftrCOOL::PSTNPss_DNA(seqs = seqs, pos = posSeqs, neg = negSeqs)
  

  rf_model <- readRDS(url("https://zenodo.org/records/17934726/files/rf_AT.rds?download=1"))
  xgb_model <- readRDS(url("https://zenodo.org/records/17934726/files/xgb_AT.rds?download=1"))
  svm_model <- readRDS(url("https://zenodo.org/records/17934726/files/svm_AT.rds?download=1"))
    
  #################################data_preparation######################
  res<-FastaToTabular(fasta_file)
  data<-as.vector(res[,2])
  mat<-as.matrix(ncp_dna(seqs = data,binaryType="strBin",outFormat="mat"))
  sequence<-rownames(mat)
  seq_id<-res[,1]
  ncp<-cbind(seq_id,sequence,mat)
  rownames(ncp)<-seq_id
  ncp_temp<-data.frame(ncp[,-1], stringsAsFactors = FALSE)
  ncp_final<-as.data.frame(apply(ncp_temp[,-1], 2, as.numeric))
  log_gc_temp<-log((GC.content(fasta_file))*100, base = exp(1))
  log_gc<-as.data.frame(as.numeric((ifelse(log_gc_temp>0,log_gc_temp,'0'))))
  onf<-oligo.freq(fasta_file, 2)
  res_temp_mbe<-FastaToTabular(fasta_file)
  data_mbe<-as.vector(res_temp_mbe[,2])
  res_mbe<-as.matrix(mbe_dna(seqs = data_mbe,binaryType="strBin",outFormat="mat"))
  mbe_temp<-data.frame(res_mbe[,-1], stringsAsFactors = FALSE)
  mbe_final<-as.data.frame(apply(mbe_temp[,-1], 2, as.numeric))
  
  
  temp1<- cbind(onf, gcc =log_gc[,1], ncp_final, mbe_final,  EIIP_final, PSTNPss_Final)
  my_data_temp<- temp1
  inputData <-as.data.frame(my_data_temp)
  
  selected_columns <- c("21",	"20",	"19",	"ncp_pos21",	"pos_mbe21",
                        "pos21",	"22",	"23",	"24",	"25",	"26",	"27",	
                        "ncp_pos24",	"pos24",	"18",	"pos27",	"ncp_pos27",	
                        "pos_mbe27",	"3",	"2",	"pos25",	"ncp_pos25",	"pos_mbe25",
                        "17",	"pos_mbe22",	"pos22",	"ncp_pos22",	"16",	"pos20",
                        "ncp_pos20",	"pos_mbe20",	"gcc",	"15",	"TC",	
                        "TA",	"TT",	"ncp_pos23",	"pos_mbe23")
  
  data_temp <- inputData[,selected_columns]
  test_data<-data_temp
  #test_data<-cbind(Sequence=ncp_temp$sequence, data_temp)
  
  .required_pkgs <- c(
    "caret",
    "kernlab",
    "ranger",
    "xgboost",
    "gbm"
  )
  
  missing_pkgs <- .required_pkgs[!vapply(
    .required_pkgs,
    requireNamespace,
    logical(1),
    quietly = TRUE
  )]
  
  if (length(missing_pkgs) > 0) {
    stop(
      "The following packages are required but not installed: ",
      paste(missing_pkgs, collapse = ", "),
      call. = FALSE
    )
  }
  
  
  
  ############################Prediction########################
  predicted_prob_svm <- predict(svm_model, newdata = test_data, type = "prob")
  #predicted_prob_svm <- attr(predicted_prob_svm_temp, "probabilities")
  predicted_value_svm <- predict(svm_model, newdata = test_data)
  predicted_prob_rf <- predict(rf_model, newdata = test_data, type = "prob") 
  predicted_value_rf <- predict(rf_model, newdata = test_data)
  predicted_prob_xgb <- predict(xgb_model, newdata = test_data, type = "prob")  
  predicted_value_xgb <- predict(xgb_model, newdata = test_data)
  test_res_en_prob <-  cbind(SVM = predicted_prob_svm, RF = predicted_prob_rf, XGB = predicted_prob_xgb)
  
  
  ##################Ensemble###################
  # Define weights
  weights <- c(SVM = 0.19070627, RF = 0.78879226, XGB = 0.02050146)
  
  # Apply weights
  weighted_data <- as.data.frame(test_res_en_prob)
  weighted_data[1:3] <- test_res_en_prob[1:3] * weights["SVM"]
  weighted_data[4:6] <- test_res_en_prob[4:6] * weights["RF"]
  weighted_data[7:9] <- test_res_en_prob[7:9] * weights["XGB"]
  
  weighted_data$Class_1 <- rowSums(weighted_data[c("SVM.X1", "RF.X1", "XGB.1")])
  weighted_data$Class_2 <- rowSums(weighted_data[c("SVM.X2", "RF.X2", "XGB.2")])
  weighted_data$Class_3 <- rowSums(weighted_data[c("SVM.X3", "RF.X3", "XGB.3")])
  
  
  
  weighted_data$Predicted_Class <- apply(weighted_data[, c("Class_1", "Class_2", "Class_3")], 1, which.max)
  
  
  final_results <- weighted_data[, c("Class_1", "Class_2", "Class_3", "Predicted_Class")]
  
  final_results$Predicted_Class_Label <- ifelse(final_results$Predicted_Class == 2, "DNA 6mA Methylation",
                                                ifelse(final_results$Predicted_Class == 3, "DNA 4mC Methylation",
                                                       ifelse(final_results$Predicted_Class == 1, "No Modification", NA)))
  Ids <- rownames(ncp_temp)
  Ids <- sub("^>", "", Ids)
  
  final_pred <- data.frame(Ids= Ids,
                           Sequence = ncp_temp[,1],
                           Modification = final_results$Predicted_Class_Label,
                           Probability = round(apply(final_results[, c("Class_1", "Class_2", "Class_3")], 1, max), 2))
  rownames(final_pred) <- NULL
  return(final_pred)
}

