% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sentomodel.R
\name{ctr_model}
\alias{ctr_model}
\title{Set up control for sentiment measures-based regression modelling}
\usage{
ctr_model(model = c("gaussian", "binomial", "multinomial"), type = c("BIC",
  "AIC", "Cp", "cv"), intercept = TRUE, do.iter = FALSE, h = 0,
  alphas = seq(0, 1, by = 0.2), nSample = NULL, trainWindow = NULL,
  testWindow = NULL, oos = 0, start = 1, do.progress = TRUE,
  do.parallel = FALSE)
}
\arguments{
\item{model}{a \code{character} vector with one of the following: "\code{gaussian}" (linear regression), "\code{binomial}"
(binomial logistic regression), or "\code{multinomial}" (multinomial logistic regression).}

\item{type}{a \code{character} vector indicating which model calibration approach to use. Supports "\code{BIC}",
"\code{AIC}" and "\code{Cp}" (Mallows's Cp) as sparse regression adapted information criteria (cf., ``On the `degrees of
freedom' of the LASSO''; Zou, Hastie, Tibshirani et al., 2007), and "\code{cv}" (cross-validation based on the
\code{\link[caret]{train}} function from the \pkg{caret} package). The adapted information criteria are currently
only available for a linear regression.}

\item{intercept}{a \code{logical}, \code{TRUE} by default fits an intercept.}

\item{do.iter}{a \code{logical}, \code{TRUE} induces an iterative estimation of models at the given \code{nSample} size and
performs the associated one-step ahead out-of-sample prediction exercise through time.}

\item{h}{an \code{integer} value that shifts the time series to have the desired prediction setup; \code{h = 0} means
no change to the input data (nowcasting assuming data is aligned properly), \code{h > 0} shifts the dependent variable by
\code{h} periods (i.e. rows) further in time (forecasting), \code{h < 0} shifts the independent variables by \code{h}
periods.}

\item{alphas}{a \code{numeric} vector of the different alphas to test for during calibration, between 0 and 1. A value of
0 pertains to Ridge regression, a value of 1 to LASSO regression; values in between are pure elastic net. The lambda
values tested for are chosen by the \code{\link[glmnet]{glmnet}} function or set to \code{10^seq(2, -2, length.out = 100)}
in case of cross-validation.}

\item{nSample}{a positive \code{integer} as the size of the sample for model estimation at every iteration (ignored if
\code{iter = FALSE}).}

\item{trainWindow}{a positive \code{integer} as the size of the training sample in cross-validation (ignored if
\code{type != } "\code{cv}").}

\item{testWindow}{a positive \code{integer} as the size of the test sample in cross-validation (ignored if \code{type != }
"\code{cv}").}

\item{oos}{a non-negative \code{integer} to indicate the number of periods to skip from the end of the cross-validation
training sample (out-of-sample) up to the test sample (ignored if \code{type != } "\code{cv}").}

\item{start}{a positive \code{integer} to indicate at which point the iteration has to start (ignored if
\code{iter = FALSE}). For example, given 100 possible iterations, \code{start = 70} leads to model estimations
only for the last 31 samples.}

\item{do.progress}{a \code{logical}, if \code{TRUE} progress statements are displayed during model calibration.}

\item{do.parallel}{a \code{logical}, if \code{TRUE} the \code{\%dopar\%} construct from the \pkg{foreach} package is
applied for iterative model estimation. A proper parallel backend needs to be set up to make it work. No progress statements
are displayed whatsoever when \code{TRUE}. For cross-validation models, parallelization can also be carried out for
single-run models, whenever a parallel backend is set up. See the examples in \code{\link{sento_model}}.}
}
\value{
A \code{list} encapsulating the control parameters.
}
\description{
Sets up control object for linear or nonlinear modelling of a response variable onto a large panel of
textual sentiment measures (and potentially other variables). See \code{\link{sento_model}} for details on the
estimation and calibration procedure.
}
\examples{
# information criterion based model control functions
ctrIC1 <- ctr_model(model = "gaussian", type = "BIC", do.iter = FALSE, h = 0,
                    alphas = seq(0, 1, by = 0.10))
ctrIC2 <- ctr_model(model = "gaussian", type = "AIC", do.iter = TRUE, h = 0, nSample = 100)

# cross-validation based model control functions
ctrCV1 <- ctr_model(model = "gaussian", type = "cv", do.iter = FALSE, h = 0, trainWindow = 250,
                    testWindow = 4, oos = 0, do.progress = TRUE)
ctrCV2 <- ctr_model(model = "binomial", type = "cv", h = 0, trainWindow = 250,
                    testWindow = 4, oos = 0, do.progress = TRUE)
ctrCV3 <- ctr_model(model = "multinomial", type = "cv", h = 0, trainWindow = 250,
                    testWindow = 4, oos = 0, do.progress = TRUE)
ctrCV4 <- ctr_model(model = "gaussian", type = "cv", do.iter = TRUE, h = 0, trainWindow = 45,
                    testWindow = 4, oos = 0, nSample = 70, do.progress = TRUE)

}
\seealso{
\code{\link{sento_model}}
}
\author{
Samuel Borms, Keven Bluteau
}
