% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/comp.clustering_function.R
\name{comp_clustering}
\alias{comp_clustering}
\title{End-user-ready results for comparison dissimilarity and hierarchical clustering
(Comparisons' comparability for transitivity evaluation)}
\usage{
comp_clustering(
  input,
  weight,
  drug_names,
  threshold,
  informative = TRUE,
  ranged_values = FALSE,
  optimal_clusters,
  get_plots = "none",
  override = FALSE,
  label_size = 4,
  title_size = 14,
  axis_title_size = 14,
  axis_text_size = 14,
  axis_x_text_angle = 0,
  legend_text_size = 13,
  str_wrap_width = 10
)
}
\arguments{
\item{input}{A data-frame in the long arm-based format. Two-arm trials occupy
one row in the data-frame. Multi-arm trials occupy as many rows as the
number of possible comparisons among the interventions. The first three
columns refer to the trial name, first and second arm of the comparison,
respectively. The remaining columns refer to summary characteristics. See
'Details' for the specification of the columns.}

\item{weight}{A vector of non-negative numbers to define the weight
contribution of each characteristic. The default is a vector of 1s for all
characteristics.}

\item{drug_names}{A vector of labels with the name of the interventions
in the order they have been defined in the argument \code{input}.}

\item{threshold}{A positive scalar to indicate the cut-off of low
dissimilarity of two comparisons. The value must be low.}

\item{informative}{Logical with \code{TRUE} for evaluating only the
comparison dissimilarity and \code{FALSE} for performing hierarchical
agglomerative clustering, thus, allowing the user to define the number of
clusters via the argument \code{optimal_clusters}.
The default argument is \code{TRUE}.}

\item{ranged_values}{Whether to use a colour scale when creating the
heatmap of within-comparison and between-comparison dissimilarities
(\code{TRUE}) or colour the cells with green and orange, when below or
exceeding the specified \code{threshold}. Relevant only when
\code{informative = TRUE}. The default argument is \code{FALSE}.}

\item{optimal_clusters}{A positive integer for the optimal number of
clusters, ideally, decided after inspecting the profile plot with average
silhouette widths for a range of clusters, and the dendrogram. The user
\bold{must} define the value. It takes values from two to the number of
trials minus one.}

\item{get_plots}{Logical with values \code{TRUE} for returning all plots and
\code{FALSE} for concealing the plots. The default argument is
\code{FALSE}.}

\item{override}{Logical with values \code{TRUE} to run the function for a
pairwise meta-analysis and \code{FALSE} to stop the function in case of two
treatments. The default argument is \code{FALSE}.}

\item{label_size}{A positive integer for the font size of labels in the
violin plot for the study dissimilarities per comparison and comparison
between comparisons. \code{label_size} determines the size argument found
in the geom's aesthetic properties in the R-package
\href{https://CRAN.R-project.org/package=ggplot2}{ggplot2}.}

\item{title_size}{A positive integer for the font size of legend title in
the stacked barplot on the percentage studies of each comparison found in
the clusters. \code{title_size} determines the title argument
found in the theme's properties in the R-package
\href{https://CRAN.R-project.org/package=ggplot2}{ggplot2}.}

\item{axis_title_size}{A positive integer for the font size of axis title in
the violin plot for the study dissimilarities per comparison and comparison
between comparisons, and the barplot of percentage trials per comparison
and cluster. \code{axis_title_size} determines the axis.title
argument found in the theme's properties in the
R-package \href{https://CRAN.R-project.org/package=ggplot2}{ggplot2}.}

\item{axis_text_size}{A positive integer for the font size of axis text in
the violin plot for the study dissimilarities per comparison and comparison
between comparisons, the heatmap of comparison dissimilarity, and the
barplot of percentage trials per comparison and cluster.
\code{axis_text_size} determines the axis.text argument found in the
theme's properties in the R-package
\href{https://CRAN.R-project.org/package=ggplot2}{ggplot2}.}

\item{axis_x_text_angle}{A positive integer for the angle of axis text in
the violin plot for the study dissimilarities per comparison and comparison
between comparisons. \code{axis_x_text_angle} determines the axis.text.x
argument found in the theme's properties in the R-package
\href{https://CRAN.R-project.org/package=ggplot2}{ggplot2}.}

\item{legend_text_size}{A positive integer for the font size of legend text
in the barplot of percentage trials per comparison and cluster.
\code{legend_text_size} determines the legend.text argument found in the
theme's properties in the R-package
\href{https://CRAN.R-project.org/package=ggplot2}{ggplot2}.}

\item{str_wrap_width}{A positive integer for wrapping the axis labels in the
the violin plot for the study dissimilarities per comparison between
comparisons. \code{str_wrap_width} determines the
\code{\link[stringr:str_wrap]{str_wrap}} function of the R-package
\href{https://CRAN.R-project.org/package=stringr}{stringr}.}
}
\value{
Initially, \code{comp_clustering} prints on the console the following
  messages: the number of observed comparisons (and number of single-study
  comparisons, if any); the number of dropped characteristics due to many
  missing data; the maximum value of the cophenetic correlation coefficient;
  and the optimal linkage method selected based on the cophenetic correlation
  coefficient. Then, the function returns the following list of elements:
  \item{Trials_diss_table}{A lower off-diagonal matrix of 'dist' class
  with the Gower dissimilarities of all pairs of studies in the network.}
  \item{Comparisons_diss_table}{A lower off-diagonal matrix of 'dist' class
  with the within-comparison dissimilarities at the main diagonal and the
  between-comparison dissimilarities of all pairs of observed
  intervention comparisons at the off-diagonal elements.}
  \item{Total_dissimilarity}{A data-frame on the observed comparisons and
  comparisons between comparisons, alongside the corresponding
  within-comparison and between-comparisons dissimilarity. The data-frame has
  been sorted in decreasing within each dissimilarity 'type'.}
  \item{Types_used}{A data-frame with type mode (i.e., double or integer) of
  each characteristic.}
  \item{Total_missing}{The percentage of missing cases in the dataset,
  calculated as the ratio of total missing cases to the product of the number
  of studies with the number of characteristics.}
  \item{Cluster_comp}{A data-frame on the studies and the cluster they
  belong (based on the argument \code{optimal_clusters}.}
  \item{Table_average_silhouette_width}{A data-frame with the average
  silhouette width for a range of 2 to P-1 trials, with P being the number
  trials.}
  \item{Table_cophenetic_coefficient}{A data-frame on the cophenetic
  correlation coefficient for eight linkage methods (Ward's two
  versions, single, complete, average, Mcquitty, median and centroid). The
  data-frame has been sorted in decreasing order of the cophenetic correlation
  coefficient.}
  \item{Optimal_link}{The optimal linkage method (ward.D, ward.D2, single,
  complete, average, mcquitty, median, or centroid) based on the cophenetic
  correlation coefficient.}

  If \code{get_plots = FALSE} only the list of elements mentioned above is
  returned. If \code{get_plots = TRUE}, \code{comp_clustering} returns a
  series of plots in addition to the list of elements mentioned above:
  \item{Within_comparison_dissimilarity}{A violin plot with integrated box
  plots and dots on the study dissimilarities per observed comparison
  (x-axis). Violins are sorted in descending order of the within-comparison
  dissimilarities (blue point).}
  \item{Between_comparison_dissimilarity}{A violin plot with integrated box
  plots and dots on the study dissimilarities per comparison between
  comparisons (x-axis). Violins are sorted in descending order of the
  between-comparison dissimilarities (blue point).}
  \item{Dissimilarity_heatmap}{A heatmap on within-comparison and
  between-comparison dissimilarities when (\code{informative = TRUE}).
  Diagonal elements refer to within-comparison dissimilarity, and
  off-diagonal elements refer to between-comparisons dissimilarity. Using a
  threshold of high similarity (specified using the argument
  \code{threshold}), cells equal or above this threshold are highlighted in
  orange; otherwise, in green. This heatmap aids in finding 'hot spots' of
  comparisons that may violate the plausibility of transitivity in the
  network. Single-study comparisons are indicated with white numbers.}
  \item{Profile_plot}{A profile plot on the average silhouette width for a
  range of 2 to P-1 clusters, with P being the number of trials. The
  candidate optimal number of  clusters is indicated with a red point
  directly on the line.}
  \item{Silhouette_width_plot}{A silhouette plot illustrating the silhouette
  width for each trial, with the trials sorted in decreasing order within the
  cluster they belong. This output is obtained by calling the
  \code{\link[cluster:silhouette]{silhouette}} function in the R-package
  \href{https://CRAN.R-project.org/package=cluster}{cluster}.}
  \item{Barplot_comparisons_cluster}{As stacked barplot on the percentage
  trials of each comparison found in the clusters (based on the argument
  \code{optimal_clusters}.}
}
\description{
\code{comp_clustering} hosts a toolkit of functions that facilitates
  conducting, visualising and evaluating hierarchical agglomerative of
  observed comparisons of interventions for a specific network and set of
  characteristics that act as effect modifiers. It also calculates the
  non-statistical heterogeneity within-comparisons and between-comparisons
  using the dissimilarities among all trials of the network.
}
\details{
The correct type mode of columns in \code{input} must be ensured to use
  the function \code{comp_clustering}. The first three columns referring to
  the trial name, first and second arm of the comparison, respectively, must
  be \strong{character}. The remaining columns referring to the
  characteristics must be \strong{double} or \strong{integer} depending on
  whether the corresponding characteristic refers to a quantitative or
  qualitative variable. The type mode of each column is assessed by
  \code{comp_clustering} using the base function \code{typeof}. Note that
  \code{comp_clustering} invites unordered and ordered variables; for the
  latter, add the argument \code{ordered = TRUE} in the base function
  \bold{factor()}.

  The interventions should be sorted in an ascending order of their
  identifier number within the trials so that the first intervention column
  (second column in \code{input}) is the control arm for every pairwise
  comparison. This is important to ensure consistency in the intervention
  order within the comparisons obtained from the other related functions.

  \code{comp_clustering} excludes from the dataset the following type of
  characteristics: (i) completely missing characteristics and
  (ii) characteristics with missing values in all but one studies for at
  least one non-single-stufy comparison. Then it proceeds with the clustering
  process.

  The cophenetic correlation coefficient is calculated using the
  \code{\link[stats:cophenetic]{cophenetic}} function alongside the
  \code{\link[stats:hclust]{hclust}} function for selected linkage methods.

 \code{comp_clustering} can be used only for a network with at least three
  comparisons. Otherwise, the execution of the function will be stopped and
  an error message will be printed on the R console.
}
\examples{
\donttest{
# Fictional dataset
data_set <- data.frame(Trial_name = as.character(1:7),
                      arm1 = c("1", "1", "1", "1", "1", "2", "2"),
                      arm2 = c("2", "2", "2", "3", "3", "3", "3"),
                      sample = c(140, 145, 150, 40, 45, 75, 80),
                      age = c(18, 18, 18, 48, 48, 35, 35),
                      blinding = factor(c("yes", "yes", "yes", "no", "no", "no", "no")))

# Obtain comparison dissimilarities (informative = TRUE)
comp_clustering(input = data_set,
                drug_names = c("A", "B", "C"),
                threshold = 0.13,  # General research setting
                informative = TRUE,
                get_plots = TRUE)
}

}
\references{
Gower J. General Coefficient of Similarity and Some of Its Properties.
\emph{Biometrics} 1971;\bold{27}(4):857--71.
doi: 10.2307/2528823

Sokal R, Rohlf F. The Comparison of Dendrograms by Objective Methods.
\emph{Int Assoc Plant Taxon} 1962;\bold{11}(2):33--40.
doi: 10.2307/1217208

Handl J, Knowles J, Kell DB. Computational cluster validation in post-genomic
data analysis. \emph{Biometrics} 2005;\bold{21}(15):3201--120.
doi: 10.1093/bioinformatics/bti517

Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and
validation of cluster analysis.
\emph{J Comput Appl Math} 1987;\bold{20}:53--65.
}
\seealso{
\code{\link[stats:cophenetic]{cophenetic}},
 \code{\link[stats:hclust]{hclust}}, \code{\link{internal_measures_plot}},
 \code{\link[cluster:silhouette]{silhouette}},
 \code{\link[stringr:str_wrap]{str_wrap}}
}
\author{
{Loukia M. Spineli}
}
