#' Hierarchical Exponential Family Model
#'
#' Produces random samples from the posterior distribution of the parameters
#' of certain hierarchical exponential family models.
#'
#' @param n An integer scalar.  The size of the posterior sample required.
#' @param model A character string.  Abbreviated name for the
#'   response-population distribution combination.
#'   For a hierarchical normal model see \code{\link{hanova1}}
#'   (hierarchical one-way analysis of variance (ANOVA)).
#' @param data A numeric matrix.  The format depends on \code{model}.
#'   See \strong{Details}.
#' @param ... Optional further arguments to be passed to
#'   \code{\link[rust]{ru}}.
#' @param prior The log-prior for the parameters of the hyperprior
#'   distribution.  If the user wishes to specify their own prior then
#'   \code{prior} must be an object returned from a call to
#'   \code{\link{set_user_prior}}.
#'   Otherwise, \code{prior} is a character scalar giving the name of the
#'   required in-built prior.
#'   If \code{prior} is not supplied then a default prior is used.
#'   See \strong{Details}.
#' @param hpars A numeric vector.  Used to set parameters (if any) in
#'   an in-built prior.
#' @param param A character scalar.
#'   If \code{param = "trans"} (the default) then the marginal posterior
#'   of hyperparameter vector \eqn{\phi} is reparameterized in a way
#'   designed to improve the efficiency of sampling from this posterior.
#'   If \code{param = "original"} the original parameterization is used.
#'   The former tends to make the optimizations involved in the
#'   ratio-of-uniforms algorithm more stable and to increase the probability
#'   of acceptance, but at the expense of slower function evaluations.
#' @param init A numeric vector of length 2.  Optional initial estimates
#'   for the search for the mode of the posterior density of the
#'   hyperparameter vector \eqn{\phi}.
#' @param nrep A numeric scalar.  If \code{nrep} is not \code{NULL} then
#'   \code{nrep} gives the number of replications of the original dataset
#'   simulated from the posterior predictive distribution.
#'   Each replication is based on one of the samples from the posterior
#'   distribution.  Therefore, \code{nrep} must not be greater than \code{n}.
#'   In that event \code{nrep} is set equal to \code{n}.
#' @details Conditional on population-specific parameter vectors
#'   \eqn{\theta}1, ..., \eqn{\theta}\eqn{J}
#'   the observed \emph{response} data \eqn{y}1, ..., \eqn{y}J within each
#'   population are modelled as random samples from a distribution in an
#'   exponential family. The population parameters \eqn{\theta}1, ...,
#'   \eqn{\theta}\eqn{J} are modelled as random samples from a common
#'   \emph{population distribution}, chosen to be conditionally conjugate
#'   to the response distribution, with \emph{hyperparameter} vector
#'   \eqn{\phi}.  Conditionally on
#'   \eqn{\theta}1, ..., \eqn{\theta}\eqn{J}, \eqn{y}1, ..., \eqn{y}\eqn{J}
#'   are independent of each other and are independent of \eqn{\phi}.
#'   A \emph{hyperprior} is placed on \eqn{\phi}.  The user can either
#'   choose parameter values of a default hyperprior or specify their own
#'   hyperprior using \code{\link{set_user_prior}}.
#'
#'   The \code{\link[rust]{ru}} function in the \code{\link[rust]{rust}}
#'   package is used to draw a random sample
#'   from the marginal posterior of the hyperparameter vector \eqn{\phi}.
#'   Then, conditional on these values, population parameters are sampled
#'   directly from the conditional posterior density of
#'   \eqn{\theta}1, ..., \eqn{\theta}\eqn{J} given \eqn{\phi} and the data.
#'
#'   We outline each \code{model}, specify the format of the
#'   \code{data}, give the default (log-)priors (up to an additive constant)
#'   and detail the choices of ratio-of-uniforms parameterization
#'   \code{param}.
#'
#' \strong{Beta-binomial:} For \eqn{j = 1, ..., J},
#'   \eqn{Yj | pj} are i.i.d binomial\eqn{(nj, pj)},
#'   where \eqn{pj} is the probability of success in group \eqn{j}
#'   and \eqn{nj} is the number of trials in group \eqn{j}.
#'   \eqn{pj} are i.i.d. beta\eqn{(\alpha, \beta)}, so
#'   and \eqn{\phi = (\alpha, \beta)}.
#'   \code{data} is a 2-column matrix: the numbers of successes in column 1
#'   and the corresponding numbers of trials in column 2.
#'
#' \emph{Priors:}
#'
#' \code{prior = "bda"} (the default):
#' \eqn{log \pi(\alpha, \beta) = - 2.5 log(\alpha + \beta),
#'   \alpha > 0, \beta > 0.} [See Section 5.3 of Gelman et al. (2014).]
#'
#' \code{prior = "gamma"}: independent gamma priors on \eqn{\alpha}
#' and \eqn{\beta}, i.e.
#' \eqn{log \pi(\alpha, \beta) =
#'   (s1 - 1)log\alpha - r1 \alpha +
#'   (s2 - 1)log\beta - r2 \beta,  \alpha > 0, \beta > 0.}
#' where the respective shape (\eqn{s1}, \eqn{s2}) and rate
#' (\eqn{r1}, \eqn{r2}) parameters are specified using
#' \code{hpars} = \eqn{(s1, r1, s2, r2)}.  The default setting is
#' \code{hpars = c(1, 0.01, 1, 0.01).}
#'
#' \emph{Parameterizations for sampling:}
#'
#'  \code{param = "original"} is (\eqn{\alpha, \beta}),
#'  \code{param = "trans"} (the default) is
#' \eqn{\phi1 = logit(\alpha/(\alpha+\beta)) = log(\alpha/\beta),
#'   \phi2 = log(\alpha+\beta)}.
#' See Section 5.3 of Gelman et al. (2014).
#'
#' \strong{Gamma-Poisson:} For \eqn{j = 1, ..., J},
#'   \eqn{Yj | \lambda}j are i.i.d Poisson(\eqn{e}j\eqn{\lambda}j),
#'   where
#'   \eqn{ej} is the \emph{exposure} in group \eqn{j}, based on the
#'   total length of observation time and/or size of the population at
#'   risk of the event of interest and \eqn{\lambda}j is the mean number
#'   of events per unit of exposure.
#'   \eqn{\lambda}j are i.i.d. gamma\eqn{(\alpha, \beta)}, so
#'   \eqn{\phi = (\alpha, \beta)}.
#'   \code{data} is a 2-column matrix: the counts \eqn{yj} of the numbers of
#'   events in column 1 and the corresponding exposures \eqn{ej} in column 2.
#'
#' \emph{Priors:}
#'
#' \code{prior = "gamma"} (the default): independent gamma priors
#'   on \eqn{\alpha} and \eqn{\beta}, i.e.
#' \eqn{log \pi(\alpha, \beta) =
#'   (s1 - 1)log\alpha - r1 \alpha +
#'   (s2 - 1)log\beta - r2 \beta,  \alpha > 0, \beta > 0.}
#' where the respective shape (\eqn{s1}, \eqn{s2}) and rate
#' (\eqn{r1}, \eqn{r2}) parameters are specified using
#' \code{hpars} = \eqn{(s1, r1, s2, r2)}.  The default setting is
#' \code{hpars = c(1, 0.01, 1, 0.01).}
#'
#' \emph{Parameterizations for sampling:}
#'
#'  \code{param = "original"} is (\eqn{\alpha, \beta}),
#'  \code{param = "trans"} (the default) is
#' \eqn{\phi1 = log(\alpha/\beta), \phi2 = log(\beta).}
#' @return An object (list) of class \code{"hef"}, which has the same
#'   structure as an object of class "ru" returned from \code{\link[rust]{ru}}.
#'   In particular, the columns of the \code{n}-row matrix \code{sim_vals}
#'   contain the simulated values of \eqn{\phi}.
#'   In addition this list contains the arguments \code{model}, \code{data}
#'   and \code{prior} detailed above, an \code{n} by \eqn{J} matrix
#'   \code{theta_sim_vals}: column \eqn{j} contains the simulated values of
#'   \eqn{\theta}\eqn{j} and \code{call}: the matched call to \code{hef}.
#'
#'   If \code{nrep} is not \code{NULL} then this list also contains
#'   \code{data_rep}, a numerical matrix with \code{nrep} columns.
#'   Each column contains a replication of the first column of the original
#'   data \code{data[, 1]}, simulated from the posterior predictive
#'   distribution.
#' @references Gelman, A., Carlin, J. B., Stern, H. S. Dunson, D. B.,
#'  Vehtari, A. and Rubin, D. B. (2014) \emph{Bayesian Data Analysis}.
#'  Chapman & Hall / CRC.
#'   \url{http://www.stat.columbia.edu/~gelman/book}
#' @seealso The \code{\link[rust]{ru}} function in the \code{\link{rust}}
#'   package for details of the arguments that can be passed to \code{ru} via
#'   \code{hef}.
#' @seealso \code{\link{hanova1}} for hierarchical one-way analysis of
#'   variance (ANOVA).
#' @seealso \code{\link{set_user_prior}} to set a user-defined prior.
#' @examples
#' ############################ Beta-binomial #################################
#'
#' # ------------------------- Rat tumor data ------------------------------- #
#'
#' # Default prior, sampling on (rotated) (log(mean), log(alpha + beta)) scale
#' rat_res <- hef(model = "beta_binom", data = rat)
#' \dontrun{
#' # Hyperparameters alpha and beta
#' plot(rat_res)
#' # Parameterization used for sampling
#' plot(rat_res, ru_scale = TRUE)
#' }
#' summary(rat_res)
#'
#' # Choose rats with extreme sample probabilities
#' pops <- c(which.min(rat[, 1] / rat[, 2]), which.max(rat[, 1] / rat[, 2]))
#' # Population-specific posterior samples: separate plots
#' plot(rat_res, params = "pop", plot_type = "both", which_pop = pops)
#' # Population-specific posterior samples: one plot
#' plot(rat_res, params = "pop", plot_type = "dens", which_pop = pops,
#'      one_plot = TRUE, add_legend = TRUE)
#'
#' # Default prior, sampling on (rotated) (alpha, beta) scale
#' rat_res <- hef(model = "beta_binom", data = rat, param = "original")
#' \dontrun{
#' plot(rat_res)
#' plot(rat_res, ru_scale = TRUE)
#' }
#' summary(rat_res)
#'
#' # To produce a plot akin to Figure 5.3 of Gelman et al. (2014) we
#' # (a) Use the same prior for (alpha, beta)
#' # (b) Don't use axis rotation (rotate = FALSE)
#' # (c) Plot on the scale used for ratio-of-uniforms sampling (ru_scale = TRUE)
#' # (d) Note that the mode is relocated to (0, 0) in the plot
#' rat_res <- hef(model = "beta_binom", data = rat, rotate = FALSE)
#' \dontrun{
#' plot(rat_res, ru_scale = TRUE)
#' }
#' # This is the estimated location of the posterior mode
#' rat_res$f_mode
#'
#' # User-defined prior, passing parameters
#' # (equivalent to prior = "gamma" with hpars = c(1, 0.01, 1, 0.01))
#' user_prior <- function(x, hpars) {
#'   return(dexp(x[1], hpars[1], log = TRUE) + dexp(x[2], hpars[2], log = TRUE))
#' }
#' user_prior_fn <- set_user_prior(user_prior, hpars = c(0.01, 0.01))
#' rat_res <- hef(model = "beta_binom", data = rat, prior = user_prior_fn)
#' \dontrun{
#' plot(rat_res)
#' }
#' summary(rat_res)
#'
#' ############################ Gamma-Poisson #################################
#'
#' # ------------------------ Pump failure data ------------------------------ #
#'
#' pump_res <- hef(model = "gamma_pois", data = pump)
#' # Hyperparameters alpha and beta
#' \dontrun{
#' plot(pump_res)
#' }
#' # Parameterization used for sampling
#' plot(pump_res, ru_scale = TRUE)
#' summary(pump_res)
#'
#' # Choose pumps with extreme sample rates
#' pops <- c(which.min(pump[, 1] / pump[, 2]), which.max(pump[, 1] / pump[, 2]))
#' plot(pump_res, params = "pop", plot_type = "dens", which_pop = pops)
#' @export
hef <- function(n = 1000, model = c("beta_binom", "gamma_pois"),
                data, ..., prior = "default", hpars = NULL,
                param = c("trans", "original"), init = NULL, nrep = NULL) {
  model <- match.arg(model)
  param <- match.arg(param)
  the_call <- match.call()
  #
  # Calculate the data summaries required in the posterior
  # and check for posterior propriety, where possible
  ds <- switch(model,
               beta_binom = binomial_data(data, prior),
               gamma_pois = poisson_data(data))
  #
  # Create a list that defines the prior and any parameters in the prior
  prior <- check_prior(prior, model, hpars)
  #
  # Set the function to calculate the log-likelihood
  loglik_fn <- switch(model,
                      beta_binom = beta_binom_marginal_loglik,
                      gamma_pois = gamma_pois_marginal_loglik)
  #
  # Set a model-specific log-posterior
  logpost <- function(x, ds) {
    loglik <- do.call(loglik_fn, c(list(x = x), ds))
    if (is.infinite(loglik)) {
      return(loglik)
    }
    logprior <- do.call(prior$prior, c(list(x), prior[-1]))
    return(loglik + logprior)
  }
  # Extract arguments to be passed to ru() and set default values for
  # trans and rotate if they have not been supplied.
  ru_args <- list(...)
  if (is.null(ru_args$trans)) {
    if (param == "original") {
      ru_args$trans <- "none"
    } else {
      ru_args$trans <- "user"
    }
  }
  if (is.null(ru_args$rotate)) {
    ru_args$rotate <- TRUE
  }
  # Calculate initial estimates
  if (is.null(init)) {
    init <- switch(model,
                   beta_binom = beta_init_ests(data, param = param),
                   gamma_pois = gamma_init_ests(data, param = param))
  } else {
    if (length(init) != 2) {
      stop("init must have length 2")
    }
  }
  #
  # Create list of objects to send to function ru()
  fr_list <- list(model = model, trans = ru_args$trans,
                  rotate = ru_args$rotate, param = param)
  fr <- switch(model,
               beta_binom = do.call(beta_create_ru_list, fr_list),
               gamma_pois = do.call(gamma_create_ru_list, fr_list))
  #
  # Set ru_args$n_grid and ru_args$ep_bc to NULL just in case they have been
  # specified in ...
  ru_args$n_grid <- NULL
  ru_args$ep_bc <- NULL
  for_ru <- c(list(logf = logpost, ds = ds), fr, list(init = init, n = n),
              ru_args)
  #
  # If we use the transformed parameterization then set the transformation
  # and the log-Jacobian of this transformation.
  if (param == "trans") {
    phi_to_theta <- switch(model,
                           beta_binom = beta_phi_to_theta,
                           gamma_pois = gamma_phi_to_theta)
    log_j <- switch(model,
                    beta_binom = beta_log_j,
                    gamma_pois = gamma_log_j)
    for_ru <- c(for_ru, list(phi_to_theta = phi_to_theta, log_j = log_j))
  }
  res <- do.call(rust::ru, for_ru)
  #
  # Sample from the conditional posterior distribution of the population
  # parameters given the hyperparameters and the data
  temp <- switch(model,
                 beta_binom = beta_binom_cond_sim(res$sim_vals, data, n),
                 gamma_pois = gamma_pois_cond_sim(res$sim_vals, data, n))
  res <- c(res, temp)
  #
  # Add information about the model, the data and the prior
  res$model <- model
  res$data <- data
  res$prior <- prior
  res$ru <- 1:2
  #
  # Simulate nrep datasets from the posterior predictive distribution
  if (!is.null(nrep)) {
    nrep <- min(nrep, n)
    res$data_rep <- switch(model,
                           beta_binom = sim_pred_beta_binom(res$theta_sim_vals,
                                                            data, nrep),
                           gamma_pois = sim_pred_gamma_pois(res$theta_sim_vals,
                                                            data, nrep))
    # Transpose, so that the replicates are in the rows
    res$data_rep <- t(res$data_rep)
  }
  res$call <- the_call
  class(res) <- "hef"
  return(res)
}
