% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/covregrf.R
\name{covregrf}
\alias{covregrf}
\title{Covariance Regression with Random Forests}
\usage{
covregrf(
  formula,
  data,
  params.rfsrc = list(ntree = 1000, mtry = ceiling(px/3), nsplit = max(round(n/50),
    10)),
  nodesize.set = round(0.5^(1:100) * sampsize)[round(0.5^(1:100) * sampsize) > py],
  importance = FALSE
)
}
\arguments{
\item{formula}{Object of class \code{formula} or \code{character} describing
the model to fit. Interaction terms are not supported.}

\item{data}{The multivariate data set which has \eqn{n} observations and
\eqn{px+py} variables where \eqn{px} and \eqn{py} are the number of
covariates (\eqn{X}) and response variables (\eqn{Y}), respectively. Should
be a data.frame.}

\item{params.rfsrc}{List of parameters that should be passed to
\code{randomForestSRC}. In the default parameter set, \code{ntree} = 1000,
\code{mtry} = \eqn{px/3}  (rounded up), \code{nsplit} =
\eqn{max(round(n/50), 10)}. See \code{randomForestSRC} for possible
parameters.}

\item{nodesize.set}{The set of \code{nodesize} levels for tuning. Default set
includes the power of two times the sub-sample size (\eqn{.632n}) greater
than the number of response variables (\eqn{py}). See below for details of
the \code{nodesize} tuning.}

\item{importance}{Should variable importance of covariates be assessed? The
default is \code{FALSE}.}
}
\value{
An object of class \code{(covregrf, grow)} which is a list with the
  following components:

  \item{predicted.oob}{OOB predicted covariance matrices for training
    observations.}
  \item{importance}{Variable importance measures (VIMP) for covariates.}
  \item{best.nodesize}{Best \code{nodesize} value selected with the proposed
    tuning method.}
  \item{params.rfsrc}{List of parameters that was used to fit random forest
    with \code{randomForestSRC}.}
  \item{n}{Sample size of the data (\code{NA}'s are omitted).}
  \item{xvar.names}{A character vector of the covariate names.}
  \item{yvar.names}{A character vector of the response variable names.}
  \item{xvar}{Data frame of covariates.}
  \item{yvar}{Data frame of responses.}
  \item{rf.grow}{Fitted random forest object. This object is used for
    prediction with training or new data.}
}
\description{
Estimates the covariance matrix of a multivariate response given a set of
  covariates using a random forest framework.
}
\section{Details}{

  For mean regression problems, random forests search for the optimal level
  of the \code{nodesize} parameter by using out-of-bag (OOB) prediction
  errors computed as the difference between the true responses and OOB
  predictions. The \code{nodesize} value having the smallest OOB prediction
  error is chosen. However, the covariance regression problem is
  unsupervised by nature. Therefore, we tune \code{nodesize} parameter with a
  heuristic method. We use OOB covariance matrix estimates. The general idea
  of the proposed tuning method is to find the \code{nodesize} level where
  the OOB covariance matrix predictions converge. The steps are as follows.
  Firstly, we train separate random forests for a set of \code{nodesize}
  values. Secondly, we compute the OOB covariance matrix estimates for each
  random forest. Next, we compute the mean absolute difference (MAD) between
  the upper triangular OOB covariance matrix estimates of two consecutive
  \code{nodesize} levels over all observations. Finally, we take the pair of
  \code{nodesize} levels having the smallest MAD. Among these two
  \code{nodesize} levels, we select the smaller since in general deeper trees
  are desired in random forests.
}

\examples{
options(rf.cores=2, mc.cores=2)

## load generated example data
data(data, package = "CovRegRF")
xvar.names <- colnames(data$X)
yvar.names <- colnames(data$Y)
data1 <- data.frame(data$X, data$Y)

## define train/test split
set.seed(2345)
smp <- sample(1:nrow(data1), size = round(nrow(data1)*0.6), replace = FALSE)
traindata <- data1[smp,,drop=FALSE]
testdata <- data1[-smp, xvar.names, drop=FALSE]

## formula object
formula <- as.formula(paste(paste(yvar.names, collapse="+"), ".", sep=" ~ "))

## train covregrf
covregrf.obj <- covregrf(formula, traindata, params.rfsrc = list(ntree = 50),
  importance = TRUE)

## get the OOB predictions
pred.oob <- covregrf.obj$predicted.oob

## predict with new test data
pred.obj <- predict(covregrf.obj, newdata = testdata)
pred <- pred.obj$predicted

## get the variable importance measures
vimp <- covregrf.obj$importance


}
\seealso{
\code{\link{predict.covregrf}}
  \code{\link{significance.test}}
  \code{\link{vimp.covregrf}}
  \code{\link{print.covregrf}}
}
