#' @title Function to simulate one-sample composite endpoint data under staggered entry.
#' @description Simulate one-sample composite endpoints data with recurrent events and a terminal event
#' under two time scales: event time \code{t} and calendar time \code{s}. A uniform recruitment period is assumed,
#' and the function returns all observed data available at a specified calendar time. Recurrent event occurrences
#' are generated from an underlying Poisson process with subject-specific Gamma frailty.
#' @param beta Regression coefficient vector for the proportional mean functions, the default is \code{c(0,0)}, corresponds to no covariates effects.
#' @param lambda_0 Rate parameter for the underlying Poisson process, default is \code{1.15} for a mean frequeny of 2 at \code{t=2}.
#' @param size Total sample size.
#' @param recruitment Length of the recruitment period (in years), default is \code{3}.
#' @param calendar Calendar time of the end of the trial (in years), default is \code{5}.
#' @param random.censor.rate Rate parameter for independent random right censoring.
#' @param seed Seed for reproducibility.
#'
#' @returns A data frame in long format containing simulated composite endpoint data.
#' Each subject may contributing multiple rows corresponding to recurrent events, a terminal event (death), or censoring. The data include:
#' \itemize{
#' \item \code{id}: Subject identifier.
#' \item \code{e}: Enrollment time on the calendar scale.
#' \item \code{event_time_cal}: Cumulative event time on the calendar scale.
#' \item \code{status}: Event indicator with values
#' \code{2}=recurrent event, \code{1}=death, and \code{0}=censoring.
#' \item \code{Z1}, \code{Z2}: Simulated covariates used in the proportional mean model.
#' \item \code{tau_star}: Subject-specific stopping time, the last event observed in \code{[0, tau_star]} is classified as death.
#' \item \code{death}: Binary indicator for death.
#' \item \code{recurrent}: Binary indicator for recurrent events.
#' \item \code{event}: Binary event indicator, \code{event = death + recurrent}.
#' \item \code{calendar}: Calendar time cutoff used to generate the returned data.
#' \item \code{lambda_0}: Baseline Poisson process rate parameter.
#' \item \code{lambda_star}: Rate parameter of an exponential distribution in generating \code{tau_star}.
#' \item \code{gamma_scale}, \code{gamma_shape}: Parameters of the Gamma distribution used to generate subject-specific frailty terms.
#' }
#' @export
#'
#' @importFrom dplyr mutate
#' @importFrom stats rgamma rexp rnorm rbinom runif
#' @importFrom pracma newtonRaphson
#' @references Mao L, Lin DY. Semiparametric regression for the weighted composite endpoint of recurrent and terminal events. \emph{Biostatistics}. 2016 Apr; \strong{17(2)} :390-403.
#'
#' @examples
#' # Generate one-sample composite endpoint data
#' df <- Onesample.generate.sequential(size = 200,
#' recruitment = 3, calendar = 5,
#' random.censor.rate = 0.05, seed = 1123)
Onesample.generate.sequential <- function(beta = c(0,0), lambda_0 = 1.15, size, recruitment = 3,
                                          calendar = 5, random.censor.rate, seed){
  # Set the default value of 'calendar' equals 5, which simulates the entire trial data
  study_end <- 5 # Administrative censoring time, assuming the trial ends at year 5
  admin_censoring <- min(study_end, calendar)

  lambda_star <- 0.1
  # lambda_0 <- 0.8
  lambda0 <- lambda_0
  gamma_shape = 2
  # gamma_shape = 0.25
  gamma_scale = 0.5
  # gamma_scale = 4

  set.seed(seed)
  df <- NULL

  # Step1: simulate a recruitment time 'e'
  enroll <- runif(size, 0, recruitment)

  # Qinghua 04/05/2025 Update: Assume certain percentage of random censoring per year
  if (random.censor.rate > 0){
    # Random censoring happened
    random.censor.lambda <- -log(1-random.censor.rate)
    random.censor.cal <- enroll + rexp(size, random.censor.lambda)
  }

  for (j in 1:size){
    # Step 2: Simulate recurrent events from a homogeneous Poisson process

    # assume that the frailty term xi follows a gamma distribution (k: shape, theta: scale)
    # gamma distribution: mean = shape*scale, variance = shape*scale^2
    xi <- rgamma(1, shape = gamma_shape, scale = gamma_scale)
    beta <- beta
    Z1 <- rbinom(1, 1, 0.5)
    Z2 <- rnorm(1)
    Z <- c(Z1,Z2)
    times <- rexp(100, rate = lambda_0*as.vector(exp(Z%*%beta))*xi)

    # Step 3: Simulate tau_star, the stopping time (unique for each patient), the last event in [0, tau_star] is labeled as death.
    # Step 3.1: simulate tau_tilde
    F_tau_tilde2 <- function(t){
      (1 - exp(-lambda_star*xi*t))/(1 - exp(-lambda_0*xi*as.vector(exp(Z%*%beta))*t))
    }

    u <- runif(1, 0, 1)
    if (u <= lambda_star/(lambda_0*exp(as.vector(Z%*%beta)))){
      tau_tilde <- 0
    }else{
      tau_tilde <- newtonRaphson(function(t) {F_tau_tilde2(t)-u}, 0.05)$root
      # Qinghua 04/12/2025 Update: changed the starting value for newton raphson to 0.05 to avoid
      # the root finding algorithm starting from a flat region (ie. when xi and lambda_0 are both large,
      # the denominator is very close to 1, and the derivative equals zero)
      # tau_tilde <- newton.raphson(function(t) {F_tau_tilde(t)-u}, 1e-3, 1000)
      # tau_tilde <- uniroot(function(t) {F_tau_tilde2(t)-u}, lower=1e-3, upper=2000,
      #                      extendInt = "yes", maxiter = 5000)$root
    }

    # Step 3.2: find tau_1 (the smallest event time)
    tau_1 <- times[1]
    tau_star <- max(tau_1, tau_tilde)
    event_times <- cumsum(times)
    event_times <- event_times[event_times <= tau_star] # a vector of all event times (recurrent and death)


    # Events starts happening after the patient is enrolled.
    # 'event_times_calendar' is a vector of event on the calendar scale
    event_times_calendar = enroll[j]+ event_times


    # Step 4: Apply the censoring at calendar time
    if (random.censor.rate > 0){
      censoring_time <- min(admin_censoring, random.censor.cal[j])
    } else {
      censoring_time <- admin_censoring
    }

    obs_time <- event_times_calendar[event_times_calendar <= censoring_time] # Observed times

    if (enroll[j] > censoring_time) {
      # patient has not been enrolled yet, should not contribute to the total sample size
      obs_time <- NA
      status <- NA
    } else if (length(obs_time) == 0) {
      # patient was enrolled and no events happened before given calendar time
      obs_time <- censoring_time
      status <- 0
    } else if (length(obs_time) < length(event_times_calendar)) {
      # patient was enrolled and experienced at least one event and censored at this given calendar time
      obs_time <- c(obs_time, censoring_time)
      status <- c(rep(2, length(obs_time) - 1),0)
    } else {
      # patient is enrolled and died before this given calendar time
      status <- c(rep(2, length(obs_time) - 1),1)
    }

    id <- rep(j, length(obs_time))
    e <- rep(enroll[j], length(obs_time))
    Z1 <- rep(Z1, length(obs_time))
    Z2 <- rep(Z2, length(obs_time))
    temp <- data.frame(id = id, e = e, event_time_cal = obs_time, status = status, Z1 = Z1,Z2 = Z2,
                       tau_star = tau_star)
    df <- rbind(df, temp)


    # Clear memory from large objects
    rm(times, obs_time, temp)
    # gc(verbose = FALSE)  # trigger garbage collection to free memory (Note: very time consuming)
  } # End of the "j" loop

  # df <- df %>%
  #   dplyr::mutate(death = ifelse(status == 1,1,0)) %>%
  #   mutate(recurrent = ifelse(status == 2,1,0)) %>%
  #   mutate(event = death + recurrent) %>%
  #   mutate(calendar = calendar, lambda_0 = lambda_0, lambda_star = lambda_star,
  #          gamma_scale = gamma_scale, gamma_shape = gamma_shape)

  df <- df %>%
    dplyr::mutate(
      death     = ifelse(.data$status == 1, 1, 0),
      recurrent = ifelse(.data$status == 2, 1, 0),
      event     = ifelse(.data$status %in% c(1, 2), 1, 0),
      calendar  = calendar,
      lambda_0  = lambda_0,
      lambda_star = lambda_star,
      gamma_scale = gamma_scale,
      gamma_shape = gamma_shape
    )

  return(df)
}
