# -----------------------------------------------------------------------------
# File: glmstarma.R
# Purpose: Implement fitting of glmSTARMA models
# Author: Steffen Maletz
# Last modified: 2025-11-16
# -----------------------------------------------------------------------------


#' @rdname glmstarma
#' @title Fit STARMA Models based on generalized linear models
#' @description The function \code{glmstarma} estimates a multivariate time series model based on generalised linear models (GLM). The primary application is for spatio-temporal data, but different applications, like time-varying network data can be ttacked by this methodology.
#' The model framework links the mean of the time series conditional on the past, to a linear predictor. This linear predictor allows regression on past observations, past values of the linear predictor and covariates, as described in the details.
#' Various distributions with several link-functions are available.
#'
# There will be an paper available soon that describes the model and the fitting procedure in detail. (TODO: Update when paper is available)
#'
#' @param ts Multivariate time-series. Rows indicate the locations and columns the time.
#' @param model a named list specifying the model orders of the linear predictor, which can be of the following elements:
#' * \code{intercept} : (Optional) character
#'      - \code{'homogenous'} (default) for a homogenous model, i.e. the same intercept for all components
#'      - \code{'inhomogenous'} for inhomogenous models, i.e. fitting an individual intercept for each component
#' * \code{past_obs} :
#'      - Integer vector with the maximal spatial orders for the time lags in \code{past_obs_time_lags}. 
#'      - Alternatively: a binary matrix, with the entry in row \eqn{i} and column \eqn{j} indicating whether the \eqn{(i - 1)}-spatial lag for the \eqn{j}-th time lag is included in the model.
#' * \code{past_obs_time_lags} : (Optional) integer vector
#'      - indicates the time lags for \code{past_obs}. Defaults to \code{seq(length(past_obs))} (for vectors) and \code{seq(ncol(past_obs))} (for a matrix)
#' * \code{past_mean} : (Optional)
#'      - Spatial orders for the regression on past values of (latent) linear process values. 
#'      - Values can be entered in the same format as in \code{past_obs}. If not specified, no regression to the feedback process is performed.
#' * \code{past_mean_time_lags} : (Optional) integer vector
#'      - Time lags for the regression on the (latent) linear process. Values can be entered in the same format as in \code{past_obs_time_lags}.
#' * \code{covariates} : (Optional)
#'      - spatial orders for the covariate processes passed in the argument \code{covariates}. The values can be passed as in \code{past_obs} and \code{past_means}, where the \eqn{j}-th entry or column represents the \eqn{j}-th covariate. 
#'      - Default is spatial order 0 for all covariates, which corresponds to the first matrix in argument \code{wlist_covariates}.
#' @param wlist A list of quadratic matrices, with the same dimension as the time series has rows, which describe the spatial dependencies. Row-normalized matrices are recommended. See Details.
#' @param family A list generated by one of the family functions of this package \code{\link{stfamily}}. This argument specifies the marginal distributions and the type of model fitted.
#' @param covariates (Potentially named) list of covariates, containing matrices of same dimension as \code{ts} or returns of the covariate functions of this package (see \code{\link{TimeConstant}}, \code{\link{SpatialConstant}}).
#' @param wlist_past_mean (Optional) List of matrices, which describes spatial dependencies for the values of the linear predictor. If this is \code{NULL}, the matrices from \code{wlist} are used.
#' @param wlist_covariates (Optional) List of matrices, which describes spatial dependencies for the covariates. If this is \code{NULL}, the matrices from \code{wlist} are used.
#' @param control A list of parameters for controlling the fitting process. This list is passed to \code{\link{glmstarma.control}}.
#' @details For the time series \eqn{\{Y_t = (Y_{1,t}, \ldots, Y_{p,t})'\}}, we assume that the (marginal) conditional components \eqn{Y_{i,t} \mid \mathcal{F}_{t-1}} follow a distribution that is a member of the exponential family. The term \eqn{\mathcal{F}_{t-1}} denotes the history of the process up to time \eqn{t-1}.
#' The multivariate distribution of \eqn{Y_t \mid \mathcal{F}_{t-1}} is not necessarily identifiable. The conditional expected value \eqn{\mathbf{\mu}_t := \mathbb{E}(Y_t \mid \mathcal{F}_{t-1})} is connected to the linear process by the link-function, i.e. \eqn{g(\mathbf{\mu}_t) = \mathbf{\psi}_t}, which is applied elementwise.
#' The linear process has the following structure:
#' \deqn{\mathbf{\psi}_t = \mathbf{\delta} + \sum_{i = 1}^{q} \sum_{\ell = 0}^{a_i} \alpha_{i, \ell} W_{\alpha}^{(\ell)} h(\mathbf{\psi}_{t - i}) + \sum_{j = 1}^{r} \sum_{\ell = 0}^{b_j} \beta_{j, \ell} W_{\beta}^{(\ell)} \tilde{h}(\mathbf{Y}_{t - j}) + \sum_{k = 1}^{m} \sum_{\ell = 0}^{c_k} \gamma_{k, \ell} W_{\gamma}^{(\ell)} \mathbf{X}_{k, t},}
#' where the matrices \eqn{W_{\alpha}^{(\ell)}}, \eqn{W_{\beta}^{(\ell)}}, and \eqn{W_{\gamma}^{(\ell)}} are taken from the lists \code{wlist_past_mean}, \code{wlist}, and \code{wlist_covariates}, respectively, and \eqn{\ell} denotes the spatial order.
#' If \eqn{\delta = \delta_0 \mathbf{1}} with a scalar \eqn{\delta_0}, the model is called homogenous with respect to the intercept; otherwise, it is inhomogenous.
#' Spatial orders, intercept structure and deviating time lags are specified in the argument \code{model}. If \code{past_mean} is specified, it is also required that \code{past_mean} is specified for identifiability.
#'
#' The functions \eqn{h} and \eqn{\tilde{h}} are set internally with the \code{family} argument. In nearly all cases, \eqn{h} corresponds to the identity function, i.e. \eqn{h(\mathbf{\psi}_{t - i}) = \mathbf{\psi}_{t - i}}, and \eqn{\tilde{h}} is similar to the link-function.
#' Using count data as an example, for \code{family = vpoisson("identity")} and \code{family = vpoisson("log")} result in the linear and log-linear Poisson STARMA models from Maletz et al. (2024), where \code{vpoisson("softplus")} results in the approximately linear model by Jahn et al. (2023).
#'
#' The unknown parameters \eqn{\delta}, \eqn{\alpha_{i, \ell}}, \eqn{\beta_{j, \ell}}, and \eqn{\gamma_{k, \ell}} are estimated by quasi-maximum likelihood estimation, assuming conditional independence of the components given the past for calculating the quasi-likelihood.
#' Parameter estimation is by default performed under stability constraints to ensure stability of the model. These constraints can be modified or deactivated via the \code{control} argument. See \code{\link{glmstarma.control}} for details.
#'
#' @return The function returns an object of class \code{glmstarma}, which contains beside the (maybe revised) input to the function:
#'  * \code{target_dim} Number of locations. Corresponds to the number of rows in \code{ts}.
#'  * \code{n_obs_effective} Effective number of observation times. Corresponds to the number of columns in \code{ts} minus the maximum time lag of the model.
#'  * \code{max_time_lag} Maximum time lag in the model.
#'  * \code{log_likelihood} The (quasi)-log-likelihood of the fitted model, which is based on \code{n_obs_effective} observation times.
#'  * \code{score} The (quasi)-score vector at the quasi maximum likelihood estimation.
#'  * \code{information} The (quasi)-information matrix at the quasi maximum likelihood estimation.
#'  * \code{variance_estimation} The variance estimation of the parameter estimates. Calculated based on a sandwich estimator.
#'  * \code{aic} AIC of the model based on the quasi log-likelihood, see \link{information_criteria}.
#'  * \code{bic} BIC of the model based on the quasi log-likelihood, see \link{information_criteria}.
#'  * \code{qic} QIC of the model based on the quasi log-likelihood, see \link{QIC}.
#'  * \code{design_matrix} The final design matrix of the model
#'  * \code{derivatives} The derivatives of the linear predictor with respect to the parameters at each time point.
#'  * \code{fitted.values} The fitted values of the model, which can be extracted by the \code{\link{fitted}} method.
#'  * \code{link_values} Fitted values of the linear process, i.e. \eqn{\mathbf{\psi}_t}.
#'  * \code{algorithm} Information about the fitting method.
#'  * \code{convergence} A named list with information about the convergence of the optimization:
#'     - \code{start} The values used for the coefficients at the start of the estimation.
#'     - \code{fncount} Number of calls of the quasi-loglikelihood during optimization.
#'     - \code{grcount} Number of calls of the quasi-score during optimization.
#'     - \code{hecount} Number of calls of the quasi-information during optimization. In algorithms not using the information, this is 0 or the number how often constrains are evaluated.
#'     - \code{fitting_time} The time in milliseconds it took to estimate the model.
#'     - \code{convergence} Logical value indicating the convergence of the algorithm.
#'     - \code{message} An optional message by the optimization algorithm.
#'  * \code{call} The function call.
#' @references 
# - TODO: Add reference to paper when available
#' - Cliff, A. D., & Ord, J. K. (1975). Space-Time Modelling with an Application to Regional Forecasting. Transactions of the Institute of British Geographers, 64, 119–128. \doi{10.2307/621469}
#' - Jahn, M., Weiß, C.H., & Kim, H., (2023), Approximately linear INGARCH models for spatio-temporal counts, \emph{Journal of the Royal Statistical Society Series C: Applied Statistics}, 72(2), 476–497, \doi{10.1093/jrsssc/qlad018}
#' - Maletz, S., Fokianos, K., & Fried, R. (2024). Spatio-Temporal Count Autoregression. \emph{Data Science in Science}, 3(1), \doi{10.1080/26941899.2024.2425171}
#' - Pfeifer, P. E., & Deutsch, S. J. (1980). A Three-Stage Iterative Procedure for Space-Time Modeling Phillip. Technometrics, 22(1), 35–47. \doi{10.2307/1268381}
#' @seealso \code{\link{stfamily}}, \code{\link{glmstarma.control}}, \code{\link{dglmstarma}}, \code{\link{TimeConstant}}, \code{\link{SpatialConstant}}
#' @examples
#' \donttest{
#' dat <- load_data("chickenpox", directory = tempdir())
#' chickenpox <- dat$chickenpox
#' population_hungary <- dat$population_hungary
#' W_hungary <- dat$W_hungary
#'
#' model_autoregressive <- list(intercept = "homogeneous", past_obs = rep(1, 7))
#' glmstarma(chickenpox, model_autoregressive, W_hungary, family = vpoisson("log"), 
#'           covariates = list(population = population_hungary), 
#'           control = list(parameter_init = "zero"))
#' }
#' @export
glmstarma <- function(ts, model = list(), wlist, family = NULL, covariates = NULL, wlist_past_mean = NULL, wlist_covariates = NULL, control = list()){
    stopifnot("family must be specified" = !is.null(family), 
              "family must be of class stfamily" = inherits(family, "stfamily"),
              "ts must be a numeric matrix" = is.matrix(ts) && (is.numeric(ts)),
              "ts must not contain NA values" = !any(is.na(ts)),
              "ts must not contain infinite values" = !any(is.infinite(ts)),
              "family does not match the data" = data_family_check(ts, family),
              "covariates must be submitted in a list" = covariate_check(covariates, ncol(ts), nrow(ts), family),
              "wlist must be a list of numeric matrices" = is.list(wlist),
              "wlist must not be an empty list" = length(wlist) > 0,
              "wlist_past_mean must be a list of matrices" = is.null(wlist_past_mean) | is.list(wlist_past_mean),
              "wlist_covariates must be a list of matrices" = is.null(wlist_covariates) | is.list(wlist_covariates),
              "control must be a list" = is.list(control))
    
    dim <- c(wlist_check(wlist),
        wlist_check(wlist_past_mean),
        wlist_check(wlist_covariates))
    dim <- dim[!is.na(dim)]
    dim <- unique(dim)
    if(length(dim) != 1){
        stop("All wlist matrices must have the same dimension.")
    }

    if(is.null(model$covariates) && length(covariates) > 0){
        model$covariates <- rep(0, length(covariates))
    }
    if(nrow(ts) != dim){
        stop("The number of rows (p) in the time series must match the dimension of the wlist matrices (p x p).")
    }
    
    model <- model_check(model)
    wlist_length <- length(wlist)
    check_length_of_wlist(nrow(model$past_obs), wlist_length, 0, "wlist")
    check_length_of_wlist(nrow(model$past_mean), length(wlist_past_mean), wlist_length, "wlist_past_mean")
    if(length(covariates) > 0)
    {
        stopifnot("Model orders for covariates do not match the number of covariates" = ncol(model$covariates) == length(covariates))
        check_length_of_wlist(nrow(model$covariates), length(wlist_covariates), wlist_length, "wlist_covariates")
        if(is.null(names(covariates))){
            colnames(model$covariates) <- paste0("X", seq(length(covariates)))
            names(covariates) <- paste0("X", seq(length(covariates)))
        } else {
            names_temp <- names(covariates)
            names_temp[names_temp == ""] <- paste0("X", which(names_temp == ""))
            colnames(model$covariates) <- names_temp
            names(covariates) <- names_temp
        }
    }
    stopifnot("The maximum time lag is greater than the number of observations" = ncol(ts) > max(model$past_obs_time_lags, model$past_mean_time_lags, 0, na.rm = TRUE))
    control <- do.call("glmstarma.control", control) 

    if(family$distribution == "negative_binomial"){
        control$dispersion_est_type <- "pearson"
    }


    result <- glmstarma_cpp(ts, covariates, model, wlist, wlist_past_mean, wlist_covariates, family, control)
    result$call <- match.call()
    
    # TODO: Eingabeparameter zuerueckgeben:
    result$ts <- ts
    result$wlist_ar <- wlist
    result$wlist_ma <- wlist_past_mean
    result$wlist_covariates <- wlist_covariates

    class(result) <- "glmstarma"
    return(result)
}