% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/isotree_po.R
\name{isotree_po}
\alias{isotree_po}
\title{Function to run extended isolation forest as SDM.}
\usage{
isotree_po(
  occ,
  occ_test = NULL,
  occ_crs = 4326,
  variables,
  categ_vars = NULL,
  ntrees = 100L,
  sample_size = NA,
  sample_rate = 1,
  ndim = 1L,
  seed = 10L,
  ...,
  offset = 0,
  response = TRUE,
  spatial_response = TRUE,
  check_variable = TRUE,
  visualize = FALSE
)
}
\arguments{
\item{occ}{(\code{data.frame}, \code{sf}, \code{SpatialPointsDataFrame})
The occurrence dataset for training.
There must be column \code{x} and \code{y} for coordinates if
it is a regular \code{data.frame}.}

\item{occ_test}{(\code{data.frame}, \code{sf}, \code{SpatialPointsDataFrame}, or \code{NULL})
The occurrence dataset for independent test. The same structure as \code{occ}.
If not \code{NULL}, there must be column \code{x} and \code{y} for coordinates when it is a
regular \code{data.frame}. If \code{NULL}, no independent test will be used.
The default is \code{NULL}.}

\item{occ_crs}{(\code{numeric} or \code{\link{crs}}) The EPSG number or
\code{\link{crs}} object of occurrence CRS.
The default value is \code{4326}, which is the geographic coordinate system.}

\item{variables}{(\code{RasterStack} or \code{stars}) The stack of environmental variables.}

\item{categ_vars}{(\code{vector} of \code{character} or \code{NULL}) The names of categorical
variables. Must be the same as the names in \code{variables}.}

\item{ntrees}{(\code{integer}) The number of trees for the isolation forest. It must
be integer, which you could use function \code{\link{as.integer}} to convert to.
The default is \code{100L}.}

\item{sample_size}{(\code{integer} or \code{NULL}) Alternative argument for \code{sample_rate}.
If not \code{NULL}, it should be a number for sampling size in \verb{[2, nrow(occ)]}.
It must be integer, which you could use function \code{\link{as.integer}} to
convert to. The default is \code{NULL}.
Only set either \code{sample_size} or \code{sample_rate}.}

\item{sample_rate}{(\code{numeric} or \code{NULL}) Alternative argument for \code{sample_size}.
If not \code{NULL}, it should be a rate for sampling size in \verb{[0, 1]}.
The default is \code{NULL}. Only set either \code{sample_size} or \code{sample_rate}.}

\item{ndim}{(\code{integer}) ExtensionLevel for isolation forest. It must
be integer, which you could use function \code{\link{as.integer}} to convert
to. Also, it must be no smaller than the dimension of environmental variables.
When it is 1, the model is a traditional isolation forest, otherwise the model
is an extended isolation forest. The default is 1.}

\item{seed}{(\code{integer}) The random seed used in the modeling. It should be an
integer. The default is \code{10L}.}

\item{...}{Other arguments that \code{\link{isolation.forest}} needs.}

\item{offset}{(\code{numeric}) The offset to adjust fitted suitability. The default
is zero. Highly recommend to leave it as default.}

\item{response}{(\code{logical}) If \code{TRUE}, generate response curves.
The default is \code{TRUE}.}

\item{spatial_response}{(\code{logical}) If \code{TRUE}, generate spatial response maps.
The default is \code{TRUE} because it might be slow. NOTE that here SHAP-based map
is not generated because it is slow. If you want it be mapped, you could call
function \code{\link{spatial_response}} to make it.}

\item{check_variable}{(\code{logical}) If \code{TRUE}, check the variable importance.
The default is \code{TRUE}.}

\item{visualize}{(\code{logical}) If \code{TRUE}, generate the essential figures
related to the model. The default is \code{FALSE}.}
}
\value{
(\code{POIsotree}) A list of
\itemize{
\item{model (\code{\link{isolation.forest}}) The threshold set in
function inputs}
\item{variables (\code{stars}) The formatted image stack of
environmental variables}
\item{pts_occ (\code{\link{sf}}) A \code{\link{sf}} of training occurrence
dataset}
\item{pts_bg_occ (\code{\link{sf}}) A \code{\link{sf}} of background points
for training dataset evaluation or SHAP dependence plot}
\item{pts_occ_test (\code{\link{sf}} or \code{NULL}) A \code{\link{sf}} of test
occurrence dataset}
\item{pts_bg_occ_test (\code{\link{sf}} or \code{NULL}) A \code{\link{sf}} of
background points for test dataset evaluation or SHAP dependence plot}
\item{var_train (\code{\link{sf}}) A \code{\link{sf}} with values of each
environmental variables for training occurrence}
\item{pred_train (\code{\link{sf}}) A \code{\link{sf}} with values of
prediction for training occurrence}
\item{eval_train (\code{POEvaluation}) A list of presence-only evaluation metrics
based on training dataset. See details of \code{POEvaluation} in
\code{\link{evaluate_po}}}
\item{var_test (\code{\link{sf}} or \code{NULL}) A \code{\link{sf}} with values of each
environmental variables for test occurrence}
\item{pred_test (\code{\link{sf}} or \code{NULL}) A \code{\link{sf}} with values of
prediction for test occurrence}
\item{eval_test (\code{POEvaluation} or \code{NULL}) A list of presence-only evaluation metrics
based on test dataset.
See details of \code{POEvaluation} in \code{\link{evaluate_po}}}
\item{prediction (\code{stars}) The predicted environmental suitability}
\item{marginal_responses (\code{MarginalResponse} or \code{NULL}) A list of marginal response
values of each environmental variables.
See details in \code{\link{marginal_response}}}
\item{offset (\code{numeric}) The offset value set as inputs.}
\item{independent_responses (\code{IndependentResponse} or \code{NULL}) A list of independent
response values of each environmental variables.
See details in \code{\link{independent_response}}}
\item{shap_dependences (\code{ShapDependence} or \code{NULL}) A list of variable
dependence values of each environmental variables.
See details in \code{\link{shap_dependence}}}
\item{spatial_responses (\code{SpatialResponse} or \code{NULL}) A list of spatial variable
dependence values of each environmental variables.
See details in \code{\link{shap_dependence}}}
\item{variable_analysis (\code{VariableAnalysis} or \code{NULL}) A list of variable importance
analysis based on multiple metrics.
See details in \code{\link{variable_analysis}}}}
}
\description{
Call isolation forest and its variations to do
species distribution modeling and optionally do model explanation.
}
\details{
Please read details of algorithm \code{\link{isolation.forest}} on
\url{https://github.com/david-cortes/isotree}, and
the R documentation of function \code{\link{isolation.forest}}.
}
\examples{
\donttest{
# Using a pseudo presence-only occurrence dataset of
# virtual species provided in this package
library(dplyr)
library(sf)
library(stars)
library(itsdm)

# Prepare data
data("occ_virtual_species")
occ_virtual_species <- occ_virtual_species \%>\%
  mutate(id = row_number())

set.seed(11)
occ <- occ_virtual_species \%>\% sample_frac(0.7)
occ_test <- occ_virtual_species \%>\% filter(! id \%in\% occ$id)
occ <- occ \%>\% select(-id)
occ_test <- occ_test \%>\% select(-id)

env_vars <- system.file(
  'extdata/bioclim_tanzania_10min.tif',
  package = 'itsdm') \%>\% read_stars() \%>\%
  slice('band', c(1, 5, 12))

# Modeling
mod_virtual_species <- isotree_po(
  occ = occ, occ_test = occ_test,
  variables = env_vars, ntrees = 10,
  sample_rate = 0.6, ndim = 1L,
  seed = 123L)

# Check results
## Evaluation based on training dataset
print(mod_virtual_species$eval_train)
plot(mod_virtual_species$eval_train)

## Response curves
plot(mod_virtual_species$marginal_responses)
plot(mod_virtual_species$independent_responses,
  target_var = c('bio1', 'bio5'))
plot(mod_virtual_species$shap_dependence)

## Relationships between target var and related var
plot(mod_virtual_species$shap_dependence,
  target_var = c('bio1', 'bio5'),
  related_var = 'bio12', smooth_span = 0)

# Variable importance
mod_virtual_species$variable_analysis
plot(mod_virtual_species$variable_analysis)
}

}
\references{
\itemize{
\item{Liu, Fei
Tony, Kai Ming Ting, and Zhi-Hua Zhou. "Isolation forest."
\emph{2008 eighth ieee international conference on data mining}.IEEE, 2008.
\doi{10.1109/ICDM.2008.17}}
\item{Liu, Fei Tony, Kai Ming
Ting, and Zhi-Hua Zhou. "Isolation-based anomaly detection."
\emph{ACM Transactions on Knowledge Discovery from Data (TKDD)} 6.1 (2012): 1-39.
\doi{10.1145/2133360.2133363}}
\item{Liu, Fei Tony,
Kai Ming Ting, and Zhi-Hua Zhou. "On detecting clustered anomalies using
SCiForest." \emph{Joint European Conference on Machine Learning and
Knowledge Discovery in Databases}. Springer, Berlin, Heidelberg, 2010.
\doi{10.1007/978-3-642-15883-4_18}}
\item{Ha
riri, Sahand, Matias Carrasco Kind, and Robert J. Brunner. "Extended
isolation forest." \emph{IEEE Transactions on Knowledge and Data Engineering (2019)}.
\doi{10.1109/TKDE.2019.2947676}}
\item{\url{https://github.com/david-cortes/isotree}}
\item{References of related feature such as response curves and variable importance
will be listed under their own functions}
}
}
\seealso{
\code{\link{evaluate_po}}, \code{\link{marginal_response}},
\code{\link{independent_response}}, \code{\link{shap_dependence}},
\code{\link{spatial_response}}, \code{\link{variable_analysis}},
\code{\link{isolation.forest}}
}
