\name{data_org}
\alias{data_org}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
Prepare Variables for Bayesian Mediation Analysis with BART
}
\description{
Read in exposure, mediators, outcome, and covariates, and transform them into formats fit for BART fitting.
}
\usage{
data_org(pred, m, y, refy = rep(NA, ncol(data.frame(y))), 
         predref = rep(NA, ncol(data.frame(pred))), deltap = NA, 
         deltam = NA, mref = rep(NA, ncol(data.frame(m))), cova = NULL, 
         cova.ref = list(), mcov = NULL, mcov.ref = list(), mclist = NULL, 
         complete = FALSE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{pred}{
The vector/matrix of the exposure/predict variable(s).
}
  \item{m}{
The dataframe of all potential mediators
}
  \item{y}{
The vector/matrix of the outcome(s).
}
  \item{refy}{
The reference groups of y when the corresponding outcome is binary or categorical.
}
  \item{predref}{
The reference groups of pred when the corresponding outcome is binary or categorical.
}
  \item{deltap}{
A vector of the length of the number of exposures. The difference in pred when calculate the changing rate by pred. If not set, the difference is 1 for categorical predictor and one tenth of the standard deviaiton of the predictor if continuous.
}
  \item{deltam}{
A vector of the length of the number of mediators. The ith item is the difference in the ith mediator when calculate the changing rate by each mediator. If not set, the difference is 1 for categorical mediators and one tenth of the standard deviaiton of the mediator if continuous.
}
  \item{mref}{
The reference groups of mediators when the corresponding mediator is binary or categorical.
}
  \item{cova}{
The covariance data for y.
}
  \item{cova.ref}{
The reference group for the binary or categorical covariates in cova.
}
  \item{mcov}{
The covariance data for mediators.
}
  \item{mcov.ref}{
The reference group if the mcovs are categorical or binary.
}
  \item{mclist}{
If mclist is null but not mcov, mcov is applied to all mediators.
If both mcov and mclist are not NULL, the first item of mclist lists all mediators that are using different mcov, the following items gives the mcov for the mediators in order, NA if no mcov to be used. e.g. mclist=list(c(1,2,4),l1=1,l2=NA,l4=c(1,3)), mediator 1, m[,1], use mcov[,1], 2 uses no covariates, 4 uses mcov[,c(1,3)], all other mediators use all. Can also replace variable names with column numbers in the mclist.
}
  \item{complete}{
complete=TRUE if only completed cases are used in analysis.
}
}
\details{
The function helps organize input data into formats readible to the BART package for building BART. It also recoganize the type of the response variable(s), so that different functions and methods will be used for the mediation effect inferences.
}
\value{
Return the cleaned up dataset and organized by types, which is ready for the Bayesian Mediation Analysis.
\item{N }{The total number of observations.}
\item{y_type }{The format of the response variable(s): 1 for continuous, 2 binary, 3 categorical, and 4 time-to-event. It is the same length as the number of outcomes.}
\item{y }{The original y with observations of missing data removed, if complete=T.}
\item{y1}{The outcome variables where binary or categorical variables are replaced with dummy design matrix.}
\item{cova}{The covariates for y, where binary or categorical variables are replaced with dummy design matrix.}
\item{npred}{The number of predictors/exposures, where a categorical exposure of k levels has k-1 dummy predictors.}
\item{nm}{The number of original mediators, ncol(m).}
\item{mcov}{Reformated mcov.}
\item{mind}{If mcov is not NULL, mind is a matrix of (# of mediator)*ncol(mcov), cell (i,j) is the indicator of whether the jth column of mcov should be used for mediator i in m1.}
\item{pred1}{The original pred with observations of missing data removed, if complete=T.}
\item{pred2}{The pred1 with all categorical or binary variables are turned into dummis.}
%\item{pred3}{The pred2 with all continuous variables add a deltap.}
\item{binpred1}{The column numbers of binary predictors in pred1.} 
\item{binpred2}{The column numbers of binary predictors in pred2.} 
\item{catpred1}{The column numbers of categorical predictors in pred1.} 
\item{catpred2}{The column numbers of categorical predictors in pred2.} 
\item{contpred1}{The column numbers of continuous predictors in pred1.} 
\item{contpred2}{The column numbers of continuous predictors in pred2.} 
\item{m1}{The original m with observations of missing data removed, if complete=T.}
\item{m2}{The m1 with all categorical or binary variables are turned into dummis.}
\item{m3.1}{The m2 with all continuous variables minus a deltam[i]/2, where i is the ith mediator.}
\item{m3.2}{The m2 with all continuous variables add a deltam[i]/2, where i is the ith mediator.}
\item{p1}{The number of continuous mediators.}
\item{p2}{The number of binary mediators.}
\item{p3}{The number of categorical mediators.}
\item{binm1}{The column number of binary mediators in m1.}
\item{binm2}{The column number of binary mediators in m2.}
\item{catm1}{The column number of categorical mediators in m1.}
\item{catm2}{A matrix with the number of rows the number of categorical meidators by the order of catm1. Each row has the start (first column) and end (second column) column numbers of the categorical variable's design matrix in m2.}
\item{contm1}{The column number of continuous mediators in m1.}
\item{contm2}{The column number of continuous mediators in m2.}
\item{deltap}{
A vector of the length of the number of exposures. The difference in pred when calculate the changing rate by pred. If not input, the difference is 1 for categorical predictor and one tenth of the standard deviaiton of the predictor if continuous.
}
\item{deltam}{
A vector of the length of the number of mediators. The ith item is the difference in the ith mediator when calculate the changing rate by each mediator. If not set, the difference is 1 for categorical mediators and one tenth of the standard deviaiton of the mediator if continuous.
}
}
\references{
Yu, Q., and Li, B. (2025) <doi:>. "Mediation Analysis with Bayesian Additive Regression Trees," submitted.
}
\author{
Qingzhao Yu and Bin Li
}
\note{
data_org is run within bma.bart function. Users do not have to run data_org separately.
}
\examples{
data("weight_behavior")
#binary predictor
try0= data_org(pred=weight_behavior[,3], m=weight_behavior[,c(2,4:14)], 
               y=weight_behavior[,15], refy = 0, predref = "F")
#add covariate for mediators
try1= data_org(pred=weight_behavior[,3], m=weight_behavior[,c(2,4:13)], 
               mcov=weight_behavior[,14], mclist=append(list(var=1:10),rep(NA,10)), 
               #"sweater" is used as a cov for "excercises" only
               y=weight_behavior[,15], refy = 0, predref = "F")  #,complete=T
#multiple prdictor
try2= data_org(pred=weight_behavior[,4], m=weight_behavior[,c(2:3,5:14)], 
               y=weight_behavior[,15], refy = 0, predref = "OTHER")
try3= data_org(pred=weight_behavior[,c(1,4)], m=weight_behavior[,c(2:3,5:14)], 
               y=weight_behavior[,15], refy = 0, predref = "OTHER")
#continuous y
try4= data_org(pred=weight_behavior[,4], m=weight_behavior[,c(2:3,5:14)], 
               y=weight_behavior[,1], refy = 0, predref = "OTHER")
#categorical y
try5= data_org(pred=weight_behavior[,1], m=weight_behavior[,c(2:3,5:14)], 
               y=weight_behavior[,4], refy = "", predref = "OTHER")
#add covariates for y and for mediators
try6= data_org(pred=weight_behavior[,4], m=weight_behavior[,c(5:12)], 
               cova=weight_behavior[,2:3],mcov=weight_behavior[,13:14], 
               mclist=c(list(var=1:7),rep(NA,6),list(1)),
               y=weight_behavior[,1], refy = 0, predref = "OTHER")
#time-to-event outcome
data(cgd1)       #a dataset in the survival package
x=cgd1[,c(4:5,7:12)]
pred=cgd1[,6]
status<-ifelse(is.na(cgd1$etime1),0,1)
y=Surv(cgd1$futime,status)          
#for continuous predictor
try7<-data_org(pred=pred,m=x,y=y) 
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory (show via RShowDoc("KEYWORDS")):
% \keyword{ ~kwd1 }
% \keyword{ ~kwd2 }
% Use only one keyword per line.
% For non-standard keywords, use \concept instead of \keyword:
% \concept{ ~cpt1 }
% \concept{ ~cpt2 }
% Use only one concept per line.
