% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/test.gen.R
\name{test.gen}
\alias{test.gen}
\title{Generate the Test Statistic or Null Distribution Using Permutation}
\usage{
test.gen(
  formula,
  data,
  method = "rf",
  metric,
  nperm = 60,
  subsample = 1,
  p = 0.8,
  poly = TRUE,
  interaction = TRUE,
  degree = 3,
  nrounds = 600,
  nthread = 1,
  permutation = FALSE,
  metricfunc = NULL,
  mlfunc = NULL,
  num_class = NULL,
  progress = TRUE,
  ...
)
}
\arguments{
\item{formula}{Formula specifying the relationship between dependent and independent variables.}

\item{data}{Data frame. The data containing the variables used.}

\item{method}{Character. The modeling method to be used. Options include "xgboost" for gradient boosting, or "rf" for random forests or '"svm" for Support Vector Machine.}

\item{metric}{Character. The type of metric: can be "RMSE", "Kappa" or "Custom. Default is 'RMSE'}

\item{nperm}{Integer. The number of generated Monte Carlo samples. Default is 60.}

\item{subsample}{Numeric. The proportion of the data to be used for subsampling. Default is 1 (no subsampling).}

\item{p}{Numeric. The proportion of the data to be used for training. The remaining data will be used for testing. Default is 0.8.}

\item{poly}{Logical. Whether to include polynomial terms of the conditioning variables. Default is TRUE.}

\item{interaction}{Logical. Whether to include interaction terms of the conditioning variables. Default is TRUE.}

\item{degree}{Integer. The degree of polynomial terms to be included if \code{poly} is TRUE. Default is 3.}

\item{nrounds}{Integer. The number of rounds (trees) for methods like xgboost, ranger, and lightgbm. Default is 500.}

\item{nthread}{Integer. The number of threads to use for parallel processing. Default is 1.}

\item{permutation}{Logical. Whether to perform permutation to generate a null distribution. Default is FALSE.}

\item{metricfunc}{Function. A custom metric function provided by the user. The function must take arguments: \code{data}, \code{model}, \code{test_indices}, and \code{test_matrix}, and return a single value performance metric. Default is NULL.}

\item{mlfunc}{Function. A custom machine learning function provided by the user. The function must have the arguments: \code{formula}, \code{data}, \code{train_indices}, \code{test_indices}, and \code{...}, and return a single value performance metric. Default is NULL.}

\item{num_class}{Integer. The number of classes for categorical data (used in xgboost and lightgbm). Default is NULL.}

\item{progress}{Function. A logical value indicating whether to show a progress bar during the permutation process. Default is TRUE.}

\item{...}{Additional arguments to pass to the machine learning wrapper functions \code{xgboost_wrapper}, \code{ranger_wrapper}, \code{lightgbm_wrapper}, or to a custom-built wrapper function.}
}
\value{
A list containing the test distribution.
}
\description{
This function generates the test statistic or a null distribution through permutation for conditional independence testing.
It supports various machine learning methods, including random forests, extreme gradient boosting, and allows for custom metric functions and model fitting functions.
}
\examples{
set.seed(123)
data <- data.frame(x1 = rnorm(100),
x2 = rnorm(100),
x3 = rnorm(100),
x4 = rnorm(100),
y = rnorm(100))
result <- test.gen(formula = y ~ x1 | x2 + x3 + x4,
                   metric = "RMSE",
                   data = data)
hist(result$distribution)
}
