% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/autotune_softImpute.R
\name{autotune_softImpute}
\alias{autotune_softImpute}
\title{Perform imputation using softImpute package}
\usage{
autotune_softImpute(
  df,
  percent_of_missing,
  col_type,
  col_0_1 = FALSE,
  cat_Fun = VIM::maxCat,
  lambda = 0,
  rank.max = 2,
  type = "als",
  thresh = 1e-05,
  maxit = 100,
  out_file = NULL
)
}
\arguments{
\item{df}{data.frame. Df to impute with column names and without target column.}

\item{percent_of_missing}{numeric vector. Vector contatining percent of missing data in columns for example  c(0,1,0,0,11.3,..)}

\item{col_type}{Character vector with types of columns.}

\item{col_0_1}{Decaid if add bonus column informing where imputation been done. 0 - value was in dataset, 1 - value was imputed. Default False. (Works only for returning one dataset).}

\item{cat_Fun}{Function to impute categorical features. Default maxCat (mode). Can be every function with input one character vector and return atomic object.}

\item{lambda}{nuclear-norm regularization parameter. If lambda=0, the algorithm reverts to "hardImpute", for which convergence is typically slower. If null lambda is set automatically at the highest possible values.}

\item{rank.max}{This restricts the rank of the solution. Defoult 2 if set as NULL rank.max=min(dim(X))-1.}

\item{type}{Chose of algoritm 'als' or 'svd . Defoult 'als'.}

\item{thresh}{Threshold for convergence.}

\item{maxit}{Maximum number of iterations.}

\item{out_file}{Output log file location if file already exists log message will be added. If NULL no log will be produced.}
}
\value{
Return one data.frame with imputed values.
}
\description{
Function use softImpute to impute missing data it works only with numeric data. Columns with categorical values are imputed by a selected function.
}
\details{
Function use algorithm base on matrix whats meaning if only one numeric column exists in dataset imputation algorithm don't work. In that case, this column will be imputed using a function for categorical columns. Because of this algorithm is working properly only with at least two numeric features in the dataset. To specify column type argument col_type is used so it's possible to forcefully use for example numeric factors in imputation. Action like this can led to errors and its not.
}
\examples{
{
  raw_data <- data.frame(
    a = as.factor(sample(c("red", "yellow", "blue", NA), 1000, replace = TRUE)),
    b = as.integer(1:1000),
    c = as.factor(sample(c("YES", "NO", NA), 1000, replace = TRUE)),
    d = runif(1000, 1, 10),
    e = as.factor(sample(c("YES", "NO"), 1000, replace = TRUE)),
    f = as.factor(sample(c("male", "female", "trans", "other", NA), 1000, replace = TRUE)))

  # Prepering col_type
  col_type <- c("factor", "integer", "factor", "numeric", "factor", "factor")

  percent_of_missing <- 1:6
  for (i in percent_of_missing) {
    percent_of_missing[i] <- 100 * (sum(is.na(raw_data[, i])) / nrow(raw_data))
  }


  imp_data <- autotune_softImpute(raw_data, percent_of_missing, col_type)

  # Check if all missing value was imputed
  sum(is.na(imp_data)) == 0
  # TRUE
}
}
\references{
Trevor Hastie and Rahul Mazumder (2015). softImpute: Matrix Completion via Iterative Soft-Thresholded SVD. R package version 1.4. https://CRAN.R-project.org/package=softImpute
}
\author{
Trevor Hastie and Rahul Mazumder (2015).
}
