% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bcorsis.R
\name{bcorsis}
\alias{bcorsis}
\title{Ball Correlation based Sure Independence Screening (BCor-SIS)}
\usage{
bcorsis(
  x,
  y,
  d = "small",
  weight = c("constant", "probability", "chisquare"),
  method = "standard",
  distance = FALSE,
  category = FALSE,
  parms = list(d1 = 5, d2 = 5, df = 3),
  num.threads = 0
)
}
\arguments{
\item{x}{a numeric matrix or data.frame included \eqn{n} rows and \eqn{p} columns. 
Each row is an observation vector and each column corresponding to a explanatory variable, generally \eqn{p >> n}.}

\item{y}{a numeric vector, matrix, or data.frame.}

\item{d}{the hard cutoff rule suggests selecting \eqn{d} variables. Setting \code{d = "large"} or 
\code{d = "small"} means \code{n - 1} or \code{floor(n/log(n))} 
variables are selected. If \code{d} is a integer, \code{d} variables are selected. Default: \code{d = "small"}.}

\item{weight}{a logical or character string used to choose the weight form of Ball Covariance statistic.. 
If input is a character string, it must be one of \code{"constant"}, \code{"probability"}, or \code{"chisquare"}. 
Any unambiguous substring can be given. 
If input is a logical value, it is equivalent to \code{weight = "probability"} if \code{weight = TRUE} while 
equivalent to \code{weight = "constant"} if \code{weight = FALSE}.
Default: \code{weight = FALSE}.}

\item{method}{specific method for the BCor-SIS procedure. It must be one of \code{"standard"},
\code{"lm"}, \code{"gam"}, \code{"interaction"}, or \code{"survival"}.
Setting \code{method = "standard"} means performing standard SIS procedure 
while the options \code{"lm"} and \code{"gam"} mean carrying out iterative SIS procedure with ordinary 
linear regression and generalized additive models, respectively.
The options \code{"interaction"} and \code{"survival"} are designed for detecting variables 
with potential linear interaction and associated with left censored responses, respectively. 
Any unambiguous substring can be given. Default: \code{method = "standard"}.}

\item{distance}{if \code{distance = TRUE}, \code{y} will be considered as a distance matrix. 
Arguments only available when \code{method = "standard"} and \code{method = "interaction"}. Default: \code{distance = FALSE}.}

\item{category}{a logical value or integer vector indicating columns to be selected as categorical variables.
If \code{category} is an integer vector, the positive/negative integers select/discard the corresponding columns;
If \code{category} is a logical value, \code{category = TRUE} select all columns, \code{category = FALSE} select none column.
Default: \code{category = FALSE}.}

\item{parms}{parameters list only available when \code{method = "lm"} or \code{"gam"}. 
It contains three parameters: \code{d1}, \code{d2}, and \code{df}. \code{d1} is the
number of initially selected variables, \code{d2} is the number of variables added in each iteration.
\code{df} is a degree freedom of basis in generalized additive models playing a role only when \code{method = "gam"}. 
Default: \code{parms = list(d1 = 5, d2 = 5, df = 3)}.}

\item{num.threads}{number of threads. If \code{num.threads = 0}, then all of available cores will be used. Default \code{num.threads = 0}.}
}
\value{
\item{\code{ix }}{ the indices vector corresponding to variables selected by BCor-SIS.} 
\item{\code{method }}{ the method used.} 
\item{\code{weight }}{ the weight used.} 
\item{\code{complete.info }}{ a \code{list} mainly containing a \eqn{p \times 3} matrix, 
where each row is a variable and each column is a weight Ball Correlation statistic. 
If \code{method = "gam"} or \code{method = "lm"}, \code{complete.info} is an empty list.}
}
\description{
Generic non-parametric sure independence screening (SIS) procedure based on Ball Correlation.
Ball correlation is a generic measure of dependence in Banach spaces.
}
\details{
\code{bcorsis} performs a model-free generic sure independence screening procedure, 
BCor-SIS, to pick out variables from \code{x} which are potentially associated with \code{y}. 
BCor-SIS relies on Ball correlation, a universal dependence measure in Banach spaces.
Ball correlation (BCor) ranges from 0 to 1. A larger BCor implies they are likely to be associated while 
Bcor is equal to 0 implies they are unassociated. (See \code{\link{bcor}} for details.)
Consequently, BCor-SIS pick out variables with larger Bcor values with \code{y}.

Theory and numerical result indicate that BCor-SIS has following advantages:
\itemize{
\item BCor-SIS can retain the efficient variables even when the dimensionality (i.e., \code{ncol(x)}) is 
an exponential order of the sample size (i.e., \code{exp(nrow(x))});
\item It is distribution-free and model-free;
\item It is very robust;
\item It is works well for complex data, such as shape and survival data;
}

If \code{x} is a matrix, the sample sizes of \code{x} and \code{y} must agree.
If \code{x} is a \code{\link{list}} object, each element of this \code{list} must with the same sample size.
\code{x} and \code{y} must not contain missing or infinite values. 

When \code{method = "survival"}, the matrix or data.frame pass to \code{y} must have exactly two columns, where the first column is 
event (failure) time while the second column is a dichotomous censored status.
}
\note{
\code{bcorsis} simultaneously computing Ball Correlation statistics with 
\code{"constant"}, \code{"probability"}, and \code{"chisquare"} weights.
Users can get other Ball Correlation statistics with different weight in the \code{complete.info} element of output. 
We give a quick example below to illustrate.
}
\examples{
\dontrun{

############### Quick Start for bcorsis function ###############
set.seed(1)
n <- 150
p <- 3000
x <- matrix(rnorm(n * p), nrow = n)
eps <- rnorm(n)
y <- 3 * x[, 1] + 5 * (x[, 3])^2 + eps
res <- bcorsis(y = y, x = x)
head(res[["ix"]])
head(res[["complete.info"]][["statistic"]])

############### BCor-SIS: Censored Data Example ###############
data("genlung")
result <- bcorsis(x = genlung[["covariate"]], y = genlung[["survival"]], 
                  method = "survival")
index <- result[["ix"]]
top_gene <- colnames(genlung[["covariate"]])[index]
head(top_gene, n = 1)


############### BCor-SIS: Interaction Pursuing ###############
set.seed(1)
n <- 150
p <- 3000
x <- matrix(rnorm(n * p), nrow = n)
eps <- rnorm(n)
y <- 3 * x[, 1] * x[, 5] * x[, 10] + eps
res <- bcorsis(y = y, x = x, method = "interaction")
head(res[["ix"]])

############### BCor-SIS: Iterative Method ###############
library(mvtnorm)
set.seed(1)
n <- 150
p <- 3000
sigma_mat <- matrix(0.5, nrow = p, ncol = p)
diag(sigma_mat) <- 1
x <- rmvnorm(n = n, sigma = sigma_mat)
eps <- rnorm(n)
rm(sigma_mat); gc(reset = TRUE)
y <- 3 * (x[, 1])^2 + 5 * (x[, 2])^2 + 5 * x[, 8] - 8 * x[, 16] + eps
res <- bcorsis(y = y, x = x, method = "lm", d = 15)
res <- bcorsis(y = y, x = x, method = "gam", d = 15)
res[["ix"]]

############### Weighted BCor-SIS: Probability weight ###############
set.seed(1)
n <- 150
p <- 3000
x <- matrix(rnorm(n * p), nrow = n)
eps <- rnorm(n)
y <- 3 * x[, 1] + 5 * (x[, 3])^2 + eps
res <- bcorsis(y = y, x = x, weight = "prob")
head(res[["ix"]])
# Alternative, chisq weight:
res <- bcorsis(y = y, x = x, weight = "chisq")
head(res[["ix"]])

############### BCor-SIS: GWAS data ###############
set.seed(1)
n <- 150
p <- 3000
x <- sapply(1:p, function(i) {
  sample(0:2, size = n, replace = TRUE)
})
eps <- rnorm(n)
y <- 6 * x[, 1] - 7 * x[, 2] + 5 * x[, 3] + eps
res <- bcorsis(x = x, y = y, category = TRUE)
head(res[["ix"]])
head(res[["complete.info"]][["statistic"]])

x <- cbind(matrix(rnorm(n * 2), ncol = 2), x)
# remove the first two columns:
res <- bcorsis(x = x, y = y, category = c(-1, -2))
head(res[["ix"]])

x <- cbind(x[, 3:5], matrix(rnorm(n * p), ncol = p))
res <- bcorsis(x = x, y = y, category = 1:3)
head(res[["ix"]], n = 10)
}
}
\references{
Wenliang Pan, Xueqin Wang, Weinan Xiao & Hongtu Zhu (2018) A Generic Sure Independence Screening Procedure, Journal of the American Statistical Association, DOI: 10.1080/01621459.2018.1462709
}
\seealso{
\code{\link{bcor}}
}
\author{
Wenliang Pan, Weinan Xiao, Xueqin Wang, Hongtu Zhu, Jin Zhu
}
