\name{create.life}
\alias{create.life}
\alias{simulate.boral}
\title{Simulate a Multivariate Response Matrix}
\description{Simulate a multivariate response matrix, given parameters such as but not necessarily all of: family, number of latent variables and related coefficients, an matrix of explanatory variables and related coefficients, row effects, cutoffs for cumulative probit regression of ordinal responses.}

\usage{ 
create.life(true.lv = NULL, lv.coefs, X = NULL, X.coefs = NULL, 
     traits = NULL, traits.coefs = NULL, family, row.eff = "none", 
     row.params = NULL, trial.size = 1, cutoffs = NULL, 
     powerparam = NULL, manual.dim = NULL)

\method{simulate}{boral}(object, nsim = 1, seed = NULL, est = "median", ...)   
 }

\arguments{
  \item{object}{An object of class "boral".}

  \item{nsim}{Number of multivariate response matrices to simulate. Defaults to 1.}

 \item{seed}{Seed for dataset simulation. Defaults to NULL, in which case no seed is set.}

  \item{est}{A choice of either the posterior median (\code{est == "median"}) or posterior mean (\code{est == "mean"}), which are then treated as estimates and the fitted values are calculated from. Default is posterior median.}

  \item{true.lv}{A matrix of true latent variables. With multivariate abundance data in ecology for instance, each row corresponds to the true site ordination coordinates. Defaults to \code{NULL}, in which case no latent variables are included.}

 \item{lv.coefs}{A matrix containing column-specific intercepts, latent variable coefficients relating to \code{true.lv}, and dispersion parameters.}

  \item{X}{An model matrix of covariates, which can be included as part of the data generation. Defaults to \code{NULL}, in which case no model matrix is used. No intercept column should be included in \code{X}.}  

  \item{X.coefs}{The coefficients relating to the model matrix \code{X}.}

  \item{traits}{A model matrix of species covariates, which can be included as part of the data generation. Defaults to \code{NULL}, in which case no matrix is used. An intercept column should be included in \code{traits} if appropriate (usually is).}  

  \item{traits.coefs}{A matrix of coefficients that are used to generate "new" column-specific intercepts and \code{X.coefs}. The number of rows shoud equal to (\code{ncol(X)+1}) and the number of columns should equal to (\code{ncol(traits)}+1). 
  
  How this argument works is as follows: when both \code{traits} and \code{traits.coefs} are supplied, then new column-specific intercepts (i.e. the first column of \code{lv.coefs} is overwritten) are generated by simulating from a normal distribution with mean equal to \code{traits*
  traits.coefs[1,-ncol(traits.coefs)]} and standard deviation \code{traits.coefs[1,ncol(traits.coefs)]}. In other words, the last column of \code{trait.coefs} provides the standard deviation of the normal distribution, with the other columns being the regression coefficients in the mean of the normal distribution. Analogously, new \code{X.coefs} are generated in the same manner using the remaining rows of \code{trait.coefs}. Please see the section on including species traits in the help file for \code{\link{boral}} for more information.
   
	It is important that highlight then with in this data generation mechanism, the new column-specific intercepts and \code{X.coefs} are now random effects, being drawn from a normal distribution. 

	Defaults to \code{NULL}, in conjuction with \code{traits = NULL}.}  

  \item{family}{Either a single element, or a vector of length equal to the number of columns in \code{y}. The former assumes all columns of \code{y} come from this distribution. The latter option allows for different distributions for each column of \code{y}. Elements can be one of "binomial" (with probit link), "poisson" (with log link), "negative.binomial" (with log link), "normal" (with identity link), "lnormal" for lognormal (with log link), "tweedie" (with log link), "exponential" (with log link), "gamma" (with log link), "beta" (with logit link), "ordinal" (cumulative probit regression). 

  For the negative binomial distribution, the variance is parameterized as \eqn{Var(y) = \mu + \phi\mu^2}, where \eqn{\phi} is the column-specific dispersion parameter. For the normal distribution, the variance is parameterized as \eqn{Var(y) = \phi^2}, where \eqn{\phi} is the column-specific standard deviation. For the tweedie distribution, the variance is parameterized as \eqn{Var(y) = \phi \mu^p} where \eqn{\phi} is the column-specific dispersion parameter and \eqn{p} is a power parameter common to all columns assumed to be tweedie, with \eqn{1 < p < 2}. For the gamma distribution, the variance is parameterized as \eqn{Var(y) = \mu/\phi} where \eqn{\phi} is the column-specific rate (henceforth referred to also as dispersion parameter). For the beta distribution, the parameterization is in terms of the mean \eqn{\mu} and sample size \eqn{\phi} (henceforth referred to also as dispersion parameter), so that the two shape parameters are given by \eqn{a = \mu\phi} and \eqn{b = (1-\mu)\phi}.

 All columns assumed to have ordinal responses are constrained to have the same cutoffs points, with a column-specific intercept to account for differences between the columns (please see \emph{Details} for formulation). 
}

  \item{row.eff}{Single element indicating whether row effects are included as fixed effects ("fixed"), random effects ("random") or not included ("none") in the boral model. If random effects, they are drawn from a normal distribution with mean zero and standard deviation given by \code{row.params}. Defaults to "none". } 

  \item{row.params}{Parameters corresponding to the row effect from the boral model. If \cr \code{row.eff = "fixed"}, then these are the fixed effects and should have length equal to the number of columns in \code{y}. If \code{row.eff = "random"}, then this is the standard deviation for the random effects normal distribution. If \code{row.eff = "none"}, then this argument is ignored.}

 \item{trial.size}{Either equal to a single element, or a vector of length equal to the number of columns in y. If a single element, then all columns assumed to be binomially distributed will have trial size set to this. If a vector, different trial sizes are allowed in each column of y. The argument is ignored for all columns not assumed to be binomially distributed. Defaults to 1, i.e. Bernoulli distribution.}

  \item{cutoffs}{A vector of common common cutoffs for proportional odds regression when any of \code{family} is ordinal. They should be increasing order. Defaults to \code{NULL}.}

  \item{powerparam}{A common power parameter for tweedie regression when any of \code{family} is tweedie. Defaults to \code{NULL}.}

  \item{manual.dim}{A vector of length 2, containing the number of rows (\eqn{n}) and columns (\eqn{p}) for the multivariate response matrix. This is a "backup" argument only required when \code{create.life} can not determine how many rows or columns the multivariate response matrix should be.}

  \item{...}{Not used.}
}

\details{
\code{create.life} gives the user full capacity to control the true parameters of the model from which the multivariate responses matrices are generated from. 

\code{simulate} makes use of the generic function of the same name in \code{R}: it takes a fitted boral model, treats either the posterior medians and mean estimates from the model as the true parameters, and generates response matrices based off that.
}

\value{
One of more multivariate response matrices of dimension \eqn{n} times \eqn{p}. If \code{simulate} is used, then an array is generated where the last dimension indexes the dataset number. 
}

\author{
Francis K.C. Hui \email{fhui28@gmail.com}
}

\seealso{
\code{\link{boral}} for the default function for fitting a boral model. 
}

\examples{
## Example 1 - Simulate a response matrix of normally distributed data
library(mvtnorm)

## 30 rows (sites) with two latent variables 
true.lv <- rbind(rmvnorm(n=15,mean=c(1,2)),rmvnorm(n=15,mean=c(-3,-1))) 
## 30 columns (species)
lv.coefs <- cbind(matrix(runif(30*3),30,3),1)

X <- matrix(rnorm(30*4),30,4) 
## 4 explanatory variables
X.coefs <- matrix(rnorm(30*4),30,4)

sim.y <- create.life(true.lv, lv.coefs, X, X.coefs, family = "normal")

\dontrun{
fit.boral <- boral(sim.y, X = X, family = "normal", num.lv = 2)

summary(fit.boral)
}


## Example 2 - Simulate a response matrix of ordinal data

## 30 rows (sites) with two latent variables 
true.lv <- rbind(rmvnorm(15,mean=c(-2,-2)),rmvnorm(15,mean=c(2,2)))
## 10 columns (species)
true.lv.coefs <- rmvnorm(10,mean = rep(0,3)); 
## Impose a sum-to-zero constraint on the column effects
true.lv.coefs[nrow(true.lv.coefs),1] <- -sum(true.lv.coefs[-nrow(true.lv.coefs),1])
## Cutoffs for proportional odds regression (must be in increasing order)
true.ordinal.cutoffs <- seq(-2,10,length=10-1)

sim.y <- create.life(true.lv = true.lv, lv.coefs = true.lv.coefs, 
     family = "ordinal", cutoffs = true.ordinal.cutoffs) 

\dontrun{
fit.boral <- boral(y = sim.y, family = "ordinal", num.lv = 2)
}

\dontrun{
## Example 3 - Simulate a response matrix of count data based off
## a fitted boral model involving traits (ants data from mvabund)
library(mvabund)
data(antTraits)

y <- antTraits$abun
X <- as.matrix(antTraits$env)
## Include only traits 1, 2, and 5, plus an intercept
traits <- as.matrix(cbind(1,antTraits$traits[,c(1,2,5)]))
## Please see help file for boral regarding the use of which.traits
which.traits <- vector("list",ncol(X)+1)
for(i in 1:length(which.traits)) which.traits[[i]] <- 1:ncol(traits)

fit.traits <- boral(y, X = X, traits = traits, which.traits = which.traits, 
	family = "poisson", num.lv = 2)

## The hard way
sim.y <- create.life(true.lv = NULL, lv.coefs = fit.traits$lv.coefs.median, 
	X = X, X.coefs = fit.traits$X.coefs.median, 
	traits = traits, traits.coefs = fit.traits$traits.coefs.median, 
	family = "poisson")

## The easy way
sim.y <- simulate(object = fit.traits)

}


}