% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/logis_firth.R
\name{logis_firth}
\alias{logis_firth}
\title{Main function for fitting the fixed effect logistic model using firth correction}
\usage{
logis_firth(
  formula = NULL,
  data = NULL,
  Y.char = NULL,
  Z.char = NULL,
  ProvID.char = NULL,
  Y = NULL,
  Z = NULL,
  ProvID = NULL,
  max.iter = 1000,
  tol = 1e-05,
  bound = 10,
  cutoff = 10,
  threads = 1,
  message = TRUE
)
}
\arguments{
\item{formula}{a two-sided formula object describing the model to be fitted,
with the response variable on the left of a ~ operator and covariates on the right,
separated by + operators. The fixed effect of the provider identifier is specified using \code{id()}.}

\item{data}{a data frame containing the variables named in the \code{formula},
or the columns specified by \code{Y.char}, \code{Z.char}, and \code{ProvID.char}.}

\item{Y.char}{a character string specifying the column name of the response variable in the \code{data}.}

\item{Z.char}{a character vector specifying the column names of the covariates in the \code{data}.}

\item{ProvID.char}{a character string specifying the column name of the provider identifier in the \code{data}.}

\item{Y}{a numeric vector representing the response variable.}

\item{Z}{a matrix or data frame representing the covariates, which can include both numeric and categorical variables.}

\item{ProvID}{a numeric vector representing the provider identifier.}

\item{max.iter}{maximum iteration number if the stopping criterion specified by \code{stop} is not satisfied. The default value is 10,000.}

\item{tol}{tolerance used for stopping the algorithm. See details in \code{stop} below. The default value is 1e-5.}

\item{bound}{a positive number to avoid inflation of provider effects. The default value is 10.}

\item{cutoff}{An integer specifying the minimum number of observations required for providers.
Providers with fewer observations than the cutoff will be labeled as \code{"include = 0"} and excluded from model fitting. The default is 10.}

\item{threads}{a positive integer specifying the number of threads to be used. The default value is 1.}

\item{message}{a Boolean indicating whether to print the progress of the fitting process. The default is TRUE.}
}
\value{
A list of objects with S3 class \code{"logis_fe"}:
\item{coefficient}{a list containing the estimated coefficients:
\code{beta}, the fixed effects for each predictor, and \code{gamma}, the effect for each provider.}
\item{variance}{a list containing the variance estimates:
\code{beta}, the variance-covariance matrix of the predictor coefficients, and \code{gamma}, the variance of the provider effects.}
\item{linear_pred}{the linear predictor of each individual.}
\item{fitted}{the predicted probability of each observation having a response of 1.}
\item{observation}{the original response of each individual.}
\item{Loglkd}{the log-likelihood.}
\item{AIC}{Akaike info criterion.}
\item{BIC}{Bayesian info criterion.}
\item{AUC}{area under the ROC curve.}
\item{char_list}{a list of the character vectors representing the column names for
the response variable, covariates, and provider identifier.
For categorical variables, the names reflect the dummy variables created for each category.}
\item{data_include}{the data used to fit the model, sorted by the provider identifier.
For categorical covariates, this includes the dummy variables created for
all categories except the reference level. Additionally, it contains three extra columns:
\code{included}, indicating whether the provider is included based on the \code{cutoff} argument;
\code{all.events}, indicating if all observations in the provider are 1;
\code{no.events}, indicating if all observations in the provider are 0.}
}
\description{
Fixed effects (FE) models suffer from separation issues when all outcomes in a cluster are the same,
leading to infinite estimates and unreliable inference.
Firth’s corrected logistic regression (FLR) overcomes this limitation and
outperforms both FE and random effects (RE) models in terms of bias and RMSE.
}
\details{
The function accepts three different input formats:
a formula and dataset, where the formula is of the form \code{response ~ covariates + id(provider)}, with \code{provider} representing the provider identifier;
a dataset along with the column names of the response, covariates, and provider identifier;
or the binary outcome vector \eqn{\boldsymbol{Y}}, the covariate matrix or data frame \eqn{\mathbf{Z}}, and the provider identifier vector.

This function utilizes OpenMP for parallel processing. For macOS, to enable multi-threading,
users may need to install the OpenMP library (e.g., brew install libomp) or use a supported compiler such as GCC.
If OpenMP is not detected during installation, the function will transparently fall back to single-threaded execution.
}
\examples{
data(ExampleDataBinary)
outcome <- ExampleDataBinary$Y
covar <- ExampleDataBinary$Z
ProvID <- ExampleDataBinary$ProvID
data <- data.frame(outcome, ProvID, covar)
covar.char <- colnames(covar)
outcome.char <- colnames(data)[1]
ProvID.char <- colnames(data)[2]
formula <- as.formula(paste("outcome ~", paste(covar.char, collapse = " + "), "+ id(ProvID)"))

# Fit logistic linear effect model using three input formats
fit_fe1 <- logis_firth(Y = outcome, Z = covar, ProvID = ProvID)
fit_fe2 <- logis_firth(data = data, Y.char = outcome.char,
Z.char = covar.char, ProvID.char = ProvID.char)
fit_fe3 <- logis_firth(formula, data)

}
\references{
Firth, D. (1993) Bias reduction of maximum likelihood estimates.
\emph{Biometrika}, \strong{80(1)}: 27-38.
\cr
}
\seealso{
\code{\link{data_check}}
}
