
##
##  defines some useful predicates for use with assert functions in assertr                      ##
##


#' Returns TRUE if value is not NA
#'
#' This is the inverse of \code{\link[base]{is.na}} if it is
#' used on a atomic element. This is a convenience function meant
#' to be used as a predicate in an \code{\link{assertr}} assertion.
#'
#' @param x A single atomic value
#' @param allow.NaN A logical indicating whether NaNs should be allowed
#'        (default FALSE)
#' @return TRUE if x is not NA, FALSE otherwise
#' @seealso \code{\link{is.na}}
#' @examples
#' not_na(NA)
#' not_na(2.8)
#' not_na("tree")
#'
#' @export
not_na <- function(x, allow.NaN=FALSE){
  if(length(x)>1)            stop("not_na must be called with single element")
  if(is.null(x))             stop("not_na must be called with single element")
  if(allow.NaN && is.nan(x)) return(TRUE)
  if(is.na(x))               return(FALSE)
  return(TRUE)
}


#' Creates bounds checking predicate
#'
#' This function returns a predicate function that will take a single
#' numeric value and return TRUE if the value is within the bounds set.
#' This does not actually check the bounds of anything--it only returns
#' a function that actually does the checking when called with a number.
#' This is a convenience function meant to return a predicate function to
#' be used in an \code{\link{assertr}} assertion.
#'
#' @param lower.bound The lowest permitted value
#' @param upper.bound The upper permitted value
#' @param include.lower A logical indicating whether lower bound
#'        should be inclusive (default TRUE)
#' @param include.upper A logical indicating whether upprt bound
#'        should be inclusive (default TRUE)
#' @param allow.na A logical indicating whether NAs (including NaNs)
#'        should be permitted (default TRUE)
#'
#' @return A function that takes one numeric and returns TRUE
#'         if the value is within the bounds defined by the
#'         arguments supplied by \code{within_bounds} and FALSE
#'         otherwise
#'
#' @examples
#' predicate <- within_bounds(3,4)
#' predicate(pi)
#'
#' ## is equivalent to
#'
#' within_bounds(3,4)(pi)
#'
#' # a correlation coefficient must always be between 0 and 1
#' coeff <- cor.test(c(1,2,3), c(.5, 2.4, 4))[["estimate"]]
#' within_bounds(0,1)(coeff)
#'
#' ## check for positive number
#' positivep <- within_bounds(0, Inf, include.lower=FALSE)
#'
#' ## this is meant to be used as a predicate in an assert statement
#' assert(mtcars, within_bounds(4,8), cyl)
#'
#' ## or in a pipeline, like this was meant for
#'
#' library(magrittr)
#'
#' mtcars %>%
#'   assert(within_bounds(4,8), cyl)
#'
#' @export
within_bounds <- function(lower.bound, upper.bound,
                          include.lower=TRUE, include.upper=TRUE,
                          allow.na=TRUE){
  if(!(is.numeric(lower.bound) && is.numeric(upper.bound)))
    stop("bounds must be numeric")
  if(lower.bound >= upper.bound)
    stop("lower bound must be strictly lower than upper bound")
  function(x){
    if(length(x)>1)      stop("bounds must be checked on a single element")
    if(is.null(x))       stop("bounds must be checked on a single element")
    if(!is.numeric(x))   stop("bounds must only be checked on numerics")
    if(is.na(x)){
      if(allow.na)    return(TRUE)
      if(!allow.na)   return(FALSE)
    }
    lower.operator <- `>=`
    if(!include.lower) lower.operator <- `>`
    upper.operator <- `<=`
    if(!include.upper) upper.operator <- `<`
    if(lower.operator(x, lower.bound) && upper.operator(x, upper.bound))
      return(TRUE)
    return(FALSE)
  }
}
# so, this function returns a function to be used as argument to another
# function


#' Returns TRUE if value in set
#'
#' This function returns a predicate function that will take a single
#' value and return TRUE if the value is a member of the set of objects
#' supplied. This doesn't actually check the membership of anything--it
#' only returns a function that actually does the checking when called
#' with a value. This is a convenience function meant to return a
#' predicate function to be used in an \code{\link{assertr}} assertion.
#'
#' @param ... objects that make up the set
#' @param allow.na A logical indicating whether NAs (including NaNs)
#'        should be permitted (default TRUE)
#' @return A function that takes one value and returns TRUE
#'         if the value is in the set defined by the
#'         arguments supplied by \code{in_set} and FALSE
#'         otherwise
#' @seealso \code{\link{\%in\%}}
#' @examples
#' predicate <- in_set(3,4)
#' predicate(4)
#'
#' ## is equivalent to
#'
#' in_set(3,4)(3)
#'
#' # the remainder of division by 2 is always 0 or 1
#' rem <- 10 %% 2
#' in_set(0,1)(rem)
#'
#' ## this is meant to be used as a predicate in an assert statement
#' assert(mtcars, in_set(3,4,5), gear)
#'
#' ## or in a pipeline, like this was meant for
#'
#' library(magrittr)
#'
#' mtcars %>%
#'   assert(in_set(3,4,5), gear) %>%
#'   assert(in_set(0,1), vs, am)
#'
#' @export
in_set <- function(..., allow.na=TRUE){
  set <- c(...)
  if(!length(set)) stop("can not test for membership in empty set")
  function(x){
    if(length(x)>1)      stop("bounds must be checked on a single element")
    if(is.null(x))       stop("bounds must be checked on a single element")
    if(x %in% set)
      return(TRUE)
    if(is.na(x))
      if(allow.na)
        return(TRUE)
    return(FALSE)
  }
}


#' Return a function to create z-score checking predicate
#'
#' This function takes one argument, the number of standard deviations
#' within which to accept a particular data point.
#'
#' As an example, if '2' is passed into this function, this will return
#' a function that takes a vector and figures out the bounds of two
#' standard deviations from the mean. That function will then return
#' a \code{\link{within_bounds}} function that can then be applied
#' to a single datum. If the datum is within two standard deviations of
#' the mean of the vector given to the function returned by this function,
#' it will return TRUE. If not, FALSE.
#'
#' This function isn't meant to be used on its own, although it can. Rather,
#' this function is meant to be used with the \code{\link{insist}} function to
#' search for potentially erroneous data points in a data set.
#'
#' @param n The number of standard deviations from the mean
#'        within which to accept a datum
#' @param ... Additional arguments to be passed to \code{\link{within_bounds}}
#'
#' @return A function that takes a vector and returns a
#'         \code{\link{within_bounds}} predicate based on the standard deviation
#'         of that vector.
#'
#' @examples
#' test.vector <- rnorm(100, mean=100, sd=20)
#'
#' within.one.sd <- within_n_sds(1)
#' custom.bounds.checker <- within.one.sd(test.vector)
#' custom.bounds.checker(105)     # returns TRUE
#' custom.bounds.checker(40)      # returns FALSE
#'
#' # same as
#' within_n_sds(1)(test.vector)(40)    # returns FALSE
#'
#' within_n_sds(2)(test.vector)(as.numeric(NA))  # returns TRUE
#' # because, by default, within_bounds() will accept
#' # NA values. If we want to reject NAs, we have to
#' # provide extra arguments to this function
#' within_n_sds(2, allow.na=FALSE)(test.vector)(as.numeric(NA))  # returns TRUE
#'
#' # or in a pipeline, like this was meant for
#'
#' library(magrittr)
#'
#' iris %>%
#'   insist(within_n_sds(5), Sepal.Length)
#'
#' @export
within_n_sds <- function(n, ...){
  if(!is.numeric(n) || length(n)!=1 || n<=0){
    stop("'n' must be a positive number")
  }
  function(a.vector){
    if(!is.vector(a.vector) || !is.numeric(a.vector))
      stop("argument must be a numeric vector")
    mu <- mean(a.vector, na.rm=TRUE)
    stdev <- sd(a.vector, na.rm=TRUE)
    if(is.na(mu)) stop("mean of vector is NA")
    if(is.na(stdev)) stop("standard deviations of vector is NA")
    within_bounds((mu-(n*stdev)), (mu+(n*stdev)), ...)
  }
}


