% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/clustering_functions.R
\name{Clara_Medoids}
\alias{Clara_Medoids}
\title{Clustering large applications}
\usage{
Clara_Medoids(data, clusters, samples, sample_size,
  distance_metric = "euclidean", minkowski_p = 1, threads = 1,
  swap_phase = TRUE, fuzzy = FALSE, verbose = FALSE, seed = 1)
}
\arguments{
\item{data}{matrix or data frame}

\item{clusters}{the number of clusters}

\item{samples}{number of samples to draw from the data set}

\item{sample_size}{fraction of data to draw in each sample iteration. It should be a float number greater than 0.0 and less or equal to 1.0}

\item{distance_metric}{a string specifying the distance method. One of,  \emph{euclidean},  \emph{manhattan},  \emph{chebyshev},  \emph{canberra},  \emph{braycurtis},  \emph{pearson_correlation},  \emph{simple_matching_coefficient},  \emph{minkowski},  \emph{hamming},  \emph{jaccard_coefficient},  \emph{Rao_coefficient},  \emph{mahalanobis}}

\item{minkowski_p}{a numeric value specifying the minkowski parameter in case that distance_metric = "minkowski"}

\item{threads}{an integer specifying the number of cores to run in parallel. Openmp will be utilized to parallelize the number of the different sample draws}

\item{swap_phase}{either TRUE or FALSE. If TRUE then both phases ('build' and 'swap') will take place. The 'swap_phase' is considered more computationally intensive.}

\item{fuzzy}{either TRUE or FALSE. If TRUE, then probabilities for each cluster will be returned based on the distance between observations and medoids}

\item{verbose}{either TRUE or FALSE, indicating whether progress is printed during clustering}

\item{seed}{integer value for random number generator (RNG)}
}
\value{
a list with the following attributes : medoids, medoid_indices, sample_indices, best_dissimilarity, clusters, fuzzy_probs (if fuzzy = TRUE), clustering_stats, dissimilarity_matrix, silhouette_matrix
}
\description{
Clustering large applications
}
\details{
The Clara_Medoids function is implemented in the same way as the 'clara' (clustering large applications) algorithm (Kaufman and Rousseeuw(1990)). In the 'Clara_Medoids'
the 'Cluster_Medoids' function will be applied to each sample draw.
}
\examples{

data(dietary_survey_IBS)

dat = dietary_survey_IBS[, -ncol(dietary_survey_IBS)]

dat = center_scale(dat)

clm = Clara_Medoids(dat, clusters = 3, samples = 5, sample_size = 0.2, swap_phase = TRUE)

}
\references{
Anja Struyf, Mia Hubert, Peter J. Rousseeuw, (Feb. 1997), Clustering in an Object-Oriented Environment, Journal of Statistical Software, Vol 1, Issue 4
}
\author{
Lampros Mouselimis
}
