##
# ------------------------------------------------------------------------
#
# best.block.sub.size(X, func, PLT=TRUE, qq=0.75, ...) 
#
# Optimal Block Subsampling Size
#
# ------------------------------------------------------------------------
##
#' @aliases best.block.sub.size
#' @title Optimal Block Subsampling Size
#' @description This function determines the optimal block size for subsampling 
#' using a distance-based method. It applies the Circular Block method and 
#' calculates Kolmogorov distances to select the most suitable subsampling size.
#' @param X A numeric vector or time series data.
#' @param func A function applied to the blocks.
#' @param PLT Logical. If \code{TRUE} (default), plots the Kolmogorov distances 
#' versus subsampling sizes.
#' @param qq A numeric value in the interval \eqn{(0, 1)}. Determines the scaling 
#' factor for subsampling sizes. Higher values result in more subsampling 
#' distributions being computed. Default is \eqn{0.75}.
#' @param ... Optional additional arguments passed to the \code{func} function.
#' @details The procedure relies on the method proposed by Bickel and Sakov (2008) 
#' for determining optimal subsampling sizes. It computes a range of subsampling 
#' distributions for sizes proportional to powers of \code{qq}. The function 
#' then evaluates the Kolmogorov distance between consecutive subsampling 
#' distributions to identify the optimal block size.
#' The function uses the Circular Block Bootstrap for generating subsamples.
#' Ensure that \code{qq} is set such that \eqn{\lfloor-\log(length(X))/\log(qq)\rfloor \leq 3}; 
#' otherwise, the function will return an error.
#' @return Returns the optimal block size for subsampling.
#' If \code{PLT = TRUE}, a plot of Kolmogorov distances between consecutive subsampling distributions
#' versus subsampling sizes is also displayed. 
#' @references Bertail, P. and Dudek, A. (2025). \emph{Bootstrap for 
#' Dependent Data, with an R package} (by Bernard Desgraupes and Karolina Marek) - submitted.
#' 
#' Bickel, P., and Sakov, A. (2008). On the choice of m in the m out of n bootstrap and 
#' confidence bounds for extrema. \emph{Statistica Sinica}, \bold{18} 967–985. 
#' @seealso {\code{\link{block.sub}},
#' \code{\link{rate.sub}},
#' \code{\link{rate.block.sub}}}.
#' @keywords bootstrap
#' @export
#' @examples 
#'  set.seed(12345)
#'  n = 1000 # sample size
#'  # generating an AR(1) Gaussian process with variance 1
#'  ts = arima.sim(n=n,model=list(ar=c(0.4)))*sqrt(1-0.4^2)
#'  bopt1=best.block.sub.size(ts,mean)
##

best.block.sub.size<-function(X, func, PLT=TRUE, qq=0.75, ...)  {
  
  N=length(X)                   
  if (floor(-log(N)/log(qq))<3)  stop("choose an higher value for qq<1 (default=0.75)")                 
  TN=func(X,...)
  nb=floor(-log(N)/log(qq))
  l_b = floor((1/qq)^(6:nb))
  n_b=length(l_b)
  
  T_table=matrix(0,n_b,N) # matrix with subsampling distribution for subsampling sizes l_b
  
  for (i in 1:n_b) { # use circular block bootstrap (the number of replication is the same whatever the subsampling size)
    T_table[i,]=block.sub(as.vector(X),func,l_b[i],method="circular")
  }
  dist2= matrix(0,nrow=n_b-1, ncol=2)
  
  for (i in 2:n_b)      {
    i1=i
    i2 =i-1
    Z1 = T_table[i1,]
    Z2 = T_table[i2,]
    # Use range instead of variance to have more robust standardisation (does not improve)
    #Z1 = (T[i1,]-TN)/sd(T[i1,])
    #Z2 = (T[i2,]-TN)/sd(T[i2,])
    
    a=t(cbind(rbind(Z1,1),rbind(Z2,2)))
    a=a[order(a[,1]),]
    Kb_n1n2 = (cumsum(a[,2]==2)-cumsum(a[,2]==1))/N
    
    # compute the Kolmogorov distance between two consecutive subsampling distributions  
    MM=max(abs(Kb_n1n2))
    dist2[i2,]=c(l_b[i1],MM)
  }
  
  # Plot distance as a function of subsampling size 
  if (PLT == TRUE) {
    plot(dist2[,1],dist2[,2])
  }
  ind=which.min(dist2[,2])
  
  return(dist2[ind,1])
  
}
