% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/interface.R
\docType{class}
\name{HaldDP}
\alias{HaldDP}
\title{Builds a HaldDP source attribution model}
\format{
Object of \code{\link{R6Class}} with methods for creating a HaldDP model,
running the model, and accessing and plotting the results.
}
\usage{
HaldDP(y, x, k, priors, a_q, inits = NULL)
}
\arguments{
\item{y}{a \code{\link{Y}} object containing case observations}

\item{x}{an \code{\link{X}} object containing source observations}

\item{k}{a \code{\link{Prev}} object containing source prevalences}

\item{priors}{\code{priors} list with elements named \code{a_r}, \code{a_alpha}, \code{a_theta} and \code{b_theta},
  corresponding to the prior parameters for the \code{r}, \code{alpha}, and base
  distribution for the DP parameters respectively.

  \tabular{lllll}{
  \emph{Parameter} \tab \emph{Prior Distribution} \tab \emph{Prior Parameters}\cr
  \code{a_r} \tab Dirichlet(concentration) \tab A single positive number or an \code{\link{X}} \cr
  \tab \tab  object containing the prior values for each source,\cr
  \tab \tab time and type. If a single number is supplied,\cr
  \tab \tab it will be used for all times, sources and types. \cr

  \code{a_alpha} \tab Dirichlet(concentration) \tab A single positive number or an \code{\link{Alpha}} \cr
  \tab \tab  object containing the prior values for each source,\cr
  \tab \tab time and location. If a single number is supplied,\cr
  \tab \tab it will be used for all times, sources and locations. \cr

  Type effects base \tab Gamma(shape, rate) \tab Single number for each of the shape (a_theta) and \cr
  distribution parameters \tab \tab rate (b_theta) of the Gamma base distribution.\cr
  }}

\item{a_q}{the Dirichlet Process concentration parameter.}

\item{inits}{initial values for the mcmc algorithm. This is an optional list
  that may contain any of the following items: \code{alpha},\code{q}, and \code{r}.

  \tabular{lll}{
  \emph{Parameter} \tab \emph{Description} \cr
  \code{r}
  \tab An object of type \code{\link{X}} giving the initial values for $R$ matrix,\cr
  \tab If not specified defaults to the element-wise maximum likelihood\cr
  \tab estimates of \code{r} from the source matrix.\cr
  Source effects (\code{alpha})
  \tab An object of type \code{\link{Alpha}} specifying alpha value for each source/time/location.\cr
  \tab If not specified, default initial values\cr
  \tab for the source effects are drawn from the prior distribution. \cr
  Type effects (\code{q})
  \tab An object of type \code{\link{Q}} giving the initial clustering and values for \eqn{q}\cr
  \tab If not specified, defaults to a single group with a theta value calculated as \cr
  \tab \eqn{\theta = sum(y_itl) / sum_l=1^L(sum_t=1^T(sum_i=1^n(sum_j=1^m(alpha_jtl * r_ijt * k_jt))))}. \cr
  \tab i.e. \eqn{theta = sum(y_itl) / sum(lambda_ijtl / theta)}
  }}
}
\value{
Object of \code{\link{HaldDP}} with methods for creating a HaldDP model,
running the model, and accessing and plotting the results.
}
\description{
Builds a HaldDP source attribution model
}
\section{Description}{

This function fits a non-parametric Poisson source attribution model for human cases of
disease. It supports multiple types, sources, times and locations. The number of
human cases for each type, time and location follow a Poisson likelihood.
}

\section{HaldDP Object Methods}{

\describe{
  \item{\code{mcmc_params(n_iter = 1000, burn_in = 0, thin = 1,
  n_r = ceiling(private$nTypes * 0.2), update_schema = c('q','alpha','r'))}}{when called, sets the mcmc
  parameters.

  \code{n_iter} sets the number of iterations returned (after removing
  \code{burn_in} and thinning results by \code{thin} i.e. a total of
  (n_iter * thin) + burn_in iterations are run)

  \code{n_r} is a positive
  integer that sets the total number of \code{r_{ijtl}} parameters to be updated
  at each time-location-source combination (the default is 20 percent updated
  per iteration)

  \code{update_schema} a character vector containing the parameters to update
  (any of '\code{q}','\code{alpha}','\code{r}').
  }

  \item{\code{update(n_iter, append = TRUE)}}{when called, updates the \code{HaldDP}
  model by running \code{n_iter} iterations.

  If missing \code{n_iter}, the \code{n_iter} last set using \code{mcmc_params()}
  or \code{update()} is used.

  \code{append}
  is a logical value which determines whether the next \code{n_iter} iterations
  are appended to any previous iterations, or overwrites them. When
  \code{append = TRUE}, the starting values are the last iteration and no
  \code{burn_in} is removed. Running the model for the first time, or changing any
  model or fitting parameters will set \code{append = FALSE}. }

  \item{\code{get_data}}{returns a list containing the human data \code{y}
  (an array y[types, times, locations]), the source data \code{X} (an array X[types, sources, times]),
  the prevalence data (an array k[sources, times]), the type names, source names,
  time names, location names and number of different types, sources, times and locations.
  }

  \item{\code{get_priors}}{returns a list containing the DP concentration
  parameter \code{a_q}, and the priors (R6 class with members named \code{a_alpha}
  (members are array \code{a_alpha[sources, times, locations]}), \code{a_r} (an array
  \code{a_r[types, sources, times]}), \code{a_theta} and \code{b_theta}).}

  \item{\code{get_inits}}{returns an R6 class holding the initial values
  (members are \code{alpha} (an array \code{alpha[sources, times, locations]}),
  \code{theta} (an array \code{theta[types, iters]}), \code{s} (an array
  \code{s[types, iters]}), and \code{r} (an array \code{r[types, sources, times]})).}

  \item{\code{get_mcmc_params}}{returns a list of fitting parameters (\code{n_iter},
  \code{append}, \code{burn_in}, \code{thin}, \code{update_schema} (R6 class with members
  \code{alpha}, \code{q}, \code{r})).}

  \item{\code{get_acceptance}}{returns an R6 class containing the acceptance
  rates for each parameter (members are \code{alpha} (an array \code{alpha[sources, times, locations]}),
  and \code{r} (an array \code{r[types, sources, times]})).}

  \item{\code{extract(params = c("alpha", "q", "s", "r", "lambda_i", "xi", "xi_prop"),
  times = NULL, locations = NULL, sources = NULL, types = NULL, iters = NULL,
  flatten = FALSE, drop = TRUE)}}{returns a list contining a subset of the parameters
  (determined by the \code{params} vector, \code{times}, \code{locations}, \code{sources}, \code{types} and \code{iters}).

  If \code{flatten} is set to \code{TRUE}, it returns a dataframe with 1 column per
  parameter, otherwise it returns a list containing \code{params} containing a
  subset of the following arrays: \code{alpha[Sources, Times, Locations, iters]}, \code{q[Types, iters]},
  \code{s[Types, iters]}, \code{r[Types, Sources, Times, iters]},
  \code{lambda_i[Types, Times, Locations, iters]},
  \code{xi[Sources, Times, Locations, iters]}.

  \code{drop}
  determines whether to delete the dimensions of an array which have only one
  level when \code{flatten = FALSE}.}

  \item{\code{summary(alpha = 0.05, params = c("alpha", "q", "s", "r", "lambda_i",
  "xi" ,"xi_prop"), times = NULL, locations = NULL, sources = NULL,
  types = NULL, iters = NULL, flatten = FALSE, drop = TRUE, CI_type = "chen-shao")}}{
  returns a list contining the
  median and credible intervals for a subset of the parameters. The default credible
  interval type are Chen-Shao (\code{"chen-shao"}) highest posterior density intervals (alternatives
  are \code{"percentiles"} and \code{"spin"}).
  See \code{extract} for details on the subsetting. \code{xi_prop} returns the
  proportion of cases attributed to each source \code{j} and is calculated by dividing
  each iteration of \code{lambda_{jtl}} values by their sum within each time \code{t}
  and location \code{l}.}

  \item{\code{plot_heatmap(iters, cols = c("blue","white"), hclust_method = "complete")}}{
  Creates a dendrogram and heatmap for the type effect groupings (\code{s} parameter
  in the model). This uses the heatmap.2 function from gplots.

  \code{iters} is a vector containing the iterations to be used in constructing
  the graph. Default is all iterations in posterior.

  \code{hclust_method} allows the user to select the method used by \code{stats::hclust} to
  cluster the type effect groupings \code{s}.

  \code{cols} gives the colours for completely dissimilar (dissimilarity value
  of 1), and identical (dissimilarity value of 0). All other values will be in
  between the two chosen colours. See ?colorRampPalette for more details..}
}
}

\section{Details}{

\describe{
This function fits a source attribution model for human cases of disease.
It supports multiple types, sources, times and locations. The number of human cases
for each type, time and location follows a Poisson or Negative Binomial likelihood.
\emph{Model}
\deqn{y_{itl}\sim\textsf{Poisson}(\lambda_{itl})}
where
\deqn{\lambda_{itl}=\sum_{j=1}^{m}\lambda_{ijtl}=q_{k(i)}\sum_{j=1}^{m}(r_{ijt}\cdot k_{j}\cdot alpha_{jtl})}

The parameters are defined as follows:
\deqn{a_{jtl}} is the unknown source effect for source \eqn{j}, time \eqn{t}, location \eqn{l}
\deqn{q_{s(i)}} is the unknown type effect for type \eqn{i} in group \eqn{s}.
\deqn{x_{ij}} is the known number of positive samples for each source \eqn{j} type\eqn{i} combination
\deqn{n_{ij}} is the known total number of samples for each source \eqn{j} type \eqn{i} combination
\deqn{k_{j}} is the fixed prevalence in source (i.e. the number of positive samples
divided by the number of negative samples) \eqn{j}
\deqn{r_{ijt}}  is the unknown relative occurrence of type \eqn{i} on source \eqn{j}.

\emph{Priors}
\deqn{r_{.jt}\sim Dirichlet(a\_r_{1jt},..., a\_r_{njt})}
\deqn{a_{tl}\sim Dirichlet(a\_alpha_{1tl},..., a\_alpha_{mtl})}
\deqn{q\sim DP(a_q, Gamma(a_{theta},b_{theta}))}
}
}

\examples{

#### Format data using Y, X, and Prev functions #############################
## Input data must be in long format
y <- Y(                      # Cases
  data = sim_SA$cases,
  y = "Human",
  type = "Type",
  time = "Time",
  location = "Location"
)

x <- X(                      # Sources
  data = sim_SA$sources,
  x = "Count",
  type = "Type",
  time = "Time",
  source = "Source"
)

k <- Prev(                   # Prevalences
  data = sim_SA$prev,
  prev = "Value",
  time = "Time",
  source = "Source"
)

#### Create Dirichlet(1) priors #############################################

## Create alpha prior data frame
prior_alpha_long <- expand.grid(
  Source   = unique(sim_SA$sources$Source),
  Time     = unique(sim_SA$sources$Time),
  Location = unique(sim_SA$cases$Location),
  Alpha    = 1
)
# Use the Alpha() constructor to specify alpha prior
prior_alpha <- Alpha(
  data     = prior_alpha_long,
  alpha    = 'Alpha',
  source   = 'Source',
  time     = 'Time',
  location = 'Location'
)

## Create r prior data frame
prior_r_long <- expand.grid(
  Type   = unique(sim_SA$sources$Type),
  Source = unique(sim_SA$sources$Source),
  Time   = unique(sim_SA$sources$Time),
  Value  = 0.1
)
# Use X() constructor to specify r prior
prior_r <- X(
  data   = prior_r_long,
  x      = 'Value',
  type   = 'Type',
  time   = 'Time',
  source = 'Source'
)

## Pack all priors into a list
priors <- list(
  a_theta = 0.01,
  b_theta = 0.00001,
  a_alpha = prior_alpha,
  a_r     = prior_r
)

## If all prior values are the same, they can be specified in shorthand
## Equivalent result to the longform priors specified above
priors <- list(
  a_theta = 0.01,
  b_theta = 0.00001,
  a_alpha = 1,
  a_r     = 0.1
)

#### Set initial values (optional) ##########################################
types  <- unique(sim_SA$cases$Type)
q_long <- data.frame(q=rep(15, length(types)), Type=types)
init_q <- Q(q_long, q = 'q', type = 'Type')
inits <- list(q = init_q) # Pack starting values into a list

#### Construct model ########################################################
my_model <- HaldDP(y = y, x = x, k = k, priors = priors, inits = inits, a_q = 0.1)

#### Set mcmc parameters ####################################################
my_model$mcmc_params(n_iter = 2, burn_in = 2, thin = 1)

#### Update model ###########################################################
my_model$update()
## Add an additional 10 iterations
my_model$update(n_iter = 2, append = TRUE)

#### Extract posterior ######################################################
## returns the posterior for the r, alpha, q, c,
## lambda_i, xi and xi_prop parameters,
## for all times, locations, sources and types
## the posterior is returned as a list or arrays
\dontrun{str(my_model$extract())}

## returns the posterior for the r and alpha parameters,
## for time 1, location B, sources Source3, and Source4,
## types 5, 25, and 50, and iterations 200:300
## the posterior is returned as a list of dataframes
\dontrun{
str(my_model$extract(params = c("r", "alpha"),
                 times = "1", location = "B",
                 sources = c("Source3", "Source4"),
                 types = c("5", "25", "50"),
                 iters = 5:15,
                 flatten = TRUE))
}

#### Calculate medians and credible intervals ###############################
\dontrun{my_model$summary(alpha = 0.05, CI_type = "chen-shao")}
## subsetting is done in the same way as extract()
\dontrun{my_model$summary(alpha = 0.05, CI_type = "chen-shao",
                 params = c("r", "alpha"),
                 times = "1", location = "B",
                 sources = c("Source3", "Source4"),
                 types = c("5", "25", "50"),
                 iters = 5:15,
                 flatten = TRUE)
}

#### Plot heatmap and dendrogram of the type effect grouping ################
my_model$plot_heatmap()

#### Extract data, initial values, prior values, acceptance
## rates for the mcmc algorithm and mcmc parameters
my_model$get_data()
my_model$get_inits()
my_model$get_priors()
my_model$get_acceptance()
my_model$get_mcmc_params()



}
\references{
Chen, M.-H. and Shao, Q.-M. (1998). Monte Carlo estimation of Bayesian
credible and HPD intervals, \emph{Journal of Computational and Graphical Statistics}, 7.

Liu Y, Gelman A, Zheng T (2015). Simulation-efficient shortest probability
intervals, \emph{Statistics and Computing}.
}
\author{
Chris Jewell and Poppy Miller \email{p.miller at lancaster.ac.uk}
}
