% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kprototypes_gower.R
\name{kproto_gower}
\alias{kproto_gower}
\title{k-Prototypes Clustering using Gower Dissimilarity}
\usage{
kproto_gower(
  x,
  k,
  lambda = NULL,
  iter.max = 100,
  na.rm = "yes",
  keep.data = TRUE,
  verbose = TRUE
)
}
\arguments{
\item{x}{Data frame with both numerics and factors (also ordered factors are possible).}

\item{k}{Either the number of clusters, a vector specifying indices of initial prototypes, or a data frame of prototypes of the same columns as \code{x}.}

\item{lambda}{Parameter > 0 to trade off between Euclidean distance of numeric variables 
and simple matching coefficient between categorical variables. Also a vector of variable specific factors is possible where 
the order must correspond to the order of the variables in the data. In this case all variables' distances will be multiplied by 
their corresponding lambda value.}

\item{iter.max}{Maximum number of iterations if no convergence before.}

\item{na.rm}{Character; passed from \code{\link{kproto}}. For  "no" observations where all variables are missinf are assigned cluster membershim \code{NA}.}

\item{keep.data}{Logical whether original should be included in the returned object.}

\item{verbose}{Logical whether information about the cluster procedure should be given. Caution: If \code{verbose=FALSE}, the reduction of the number of clusters is not mentioned.}
}
\value{
\code{\link{kmeans}} like object of class \code{\link{kproto}}:

\item{cluster}{Vector of cluster memberships.}

\item{centers}{Data frame of cluster prototypes.}

\item{lambda}{Distance parameter lambda. For \code{type = "gower"} only a vector of variable specific weights is possible.}

\item{size}{Vector of cluster sizes.}

\item{withinss}{Vector of within cluster distances for each cluster, i.e. summed distances of all observations belonging to a cluster to their respective prototype.}

\item{tot.withinss}{Target function: sum of all observations' distances to their corresponding cluster prototype.}

\item{dists}{Matrix with distances of observations to all cluster prototypes.}

\item{iter}{Prespecified maximum number of iterations.}

\item{stdization}{List of standardized ranks for ordinal variables and and an additional element \code{num_ranges} with ranges of all numeric variables. Used by \code{\link{predict.kproto}}.}

\item{trace}{List with two elements (vectors) tracing the iteration process: 
\code{tot.dists} and \code{moved} number of observations over all iterations.}
}
\description{
Internal function. Computes k-prototypes clustering for mixed-type data using Gower dissimilarity.
}
\details{
Internal function called by \code{\link{kproto}}. Note that there is no \code{nstart} argument.  
Higher values than \code{nstart = 1} can be specified within \code{kproto} which will call \code{kproto_gower} 
several times.
For Gower dissimilarity range-normalized absolute distances from the cluster median 
are computed for the numeric variables (and for the ranks of the ordered factors respectively). 
For factors simple matching distance is used as in the original k prototypes algorithm. 
The prototypes are given by the median for numeric variables, the mode for factors and the level with the closest rank 
to the median rank of the corresponding cluster.
In case of \code{na.rm = "no"}: for each observation variables with missings are ignored 
(i.e. only the remaining variables are considered for distance computation). 
In consequence for observations with missings this might result in a change of variable's weighting compared to the one specified
by \code{lambda}. Further note: For these observations distances to the prototypes will typically be smaller as they are based 
on fewer variables.
}
\examples{

datasim <- function(n = 100, k.ord = 2, muk = 1.5){
  clusid <- rep(1:4, each = n)
  # numeric
  mus <- c(rep(-muk, n),
           rep(-muk, n),
           rep(muk, n),
           rep(muk, n))
           x1 <- rnorm(4*n) + mus
 # ordered factor
  mus <- c(rep(-muk, n),
           rep(muk, n),
           rep(-muk, n),
           rep(muk, n))
 x2 <- rnorm(4*n) + mus
 # ordered factor
 
 quants <- quantile(x2, seq(0, 1, length.out = (k.ord+1)))
 quants[1] <- -Inf
 quants[length(quants)] <- Inf
 x2 <- as.ordered(cut(x2, quants))
 x <- data.frame(x1, x2)
 return(x)
 }
 
 n     <- 100
 x     <- datasim(n = n, k.ord = 10, muk = 2)
 truth <- rep(1:4, each = n)
 
 # calling the internal kproto_gower() directly 
 kgres <- kproto_gower(x, 4, verbose = FALSE)
 
 # calling kproto gower via kproto:
 kgres2 <- kproto(x, 4, verbose = FALSE, type = "gower", nstart = 10)
 
 table(kgres$cluster, truth)
 clprofiles(kgres, x)
 
}
\references{
\itemize{
    \item Gower, J. C. (1971): A General Coefficient of Similarity and Some of Its Properties. {\emph{Biometrics, 27(4)}}, 857–871. 
          \doi{10.2307/2528823}. 
    \item Podani, J. (1999): Extending Gower's general coefficient of similarity to ordinal characters. {\emph{TAXON, 48}}, 331-340.
          \doi{10.2307/1224438}.
  }
}
\author{
\email{gero.szepannek@web.de}
}
\keyword{classif}
\keyword{cluster}
\keyword{multivariate}
