% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/DMCEM.R
\name{DMCEM}
\alias{DMCEM}
\title{Distributed Monte Carlo Expectation-Maximization (DMCEM) Algorithm}
\description{
  Implements a distributed version of the Monte Carlo EM algorithm for handling missing response variables in linear regression models.
  By running multiple simulations and averaging the results, it provides more stable parameter estimates compared to standard EM.
}
\usage{
DMCEM(data, R = 50, tol = 0.01, nb = 50)
}
\arguments{
  \item{data}{
    A data frame where the first column is the response variable (with missing values) and subsequent columns are predictors.
  }
  \item{R}{
    Integer specifying the number of Monte Carlo simulations. Larger values improve stability but increase computation time (default = 50).
  }
  \item{tol}{
    Numeric value indicating the convergence tolerance. The algorithm stops when the change in coefficients between iterations is below this threshold (default = 0.01).
  }
  \item{nb}{
    Integer specifying the maximum number of iterations per simulation. Prevents infinite loops if convergence is not achieved (default = 50).
  }
}
\value{
  A list containing:
  \describe{
    \item{\code{Yhat}}{A vector of imputed response values with missing data filled in.}
    \item{\code{betahat}}{A vector of final regression coefficients, averaged across simulations.}
  }
}
\details{
  The DMCEM algorithm works by:
  \enumerate{
    \item Splitting data into observed and missing response subsets.
    \item Running multiple MCEM simulations with random imputations.
    \item Averaging results across simulations to reduce variance.
    \item Using robust matrix inversion to handle near-singular designs.
  }
  This approach is particularly useful for datasets with a large proportion of missing responses or high variability in the data.
}
\examples{
# Generate data with 20% missing responses
set.seed(123)
data <- data.frame(
  Y = c(rnorm(80), rep(NA, 20)),
  X1 = rnorm(100),
  X2 = runif(100)
)

# Run DMCEM with 50 simulations
result <- DMCEM(data, R = 50, tol = 0.001, nb = 100)

# View imputed values and coefficients
head(result$Yhat)
result$betahat

# Check convergence and variance
result$converged_ratio
result$sigma2
}




