Using ggpicrust2

Introduction

ggpicrust2 provides a practical workflow for PICRUSt2 downstream analysis:

This vignette focuses on the general package workflow. For a deeper GSEA walkthrough, see the dedicated gsea_analysis vignette.

Installation

# install.packages("devtools")
devtools::install_github("cafferychen777/ggpicrust2")

library(ggpicrust2)
library(tibble)

One-command workflow

If you want a fast end-to-end run from abundance data to annotated differential abundance output, start with ggpicrust2():

data("ko_abundance")
data("metadata")

results <- ggpicrust2(
  data = ko_abundance,
  metadata = metadata,
  group = "Environment",
  pathway = "KO",
  daa_method = "LinDA",
  ko_to_kegg = TRUE,
  order = "pathway_class",
  p_values_bar = TRUE,
  x_lab = "pathway_name"
)

# Access the main outputs
results[[1]]$plot
head(results[[1]]$results)

Use this route when you want a fast default analysis. Use the stepwise workflow below when you need more control over data preparation or visualization.

Stepwise pathway workflow

Convert KO abundance to KEGG pathway abundance

kegg_pathway_abundance <- ko2kegg_abundance(data = ko_abundance)
head(kegg_pathway_abundance[, 1:3])

Run differential abundance analysis

daa_results <- pathway_daa(
  abundance = kegg_pathway_abundance,
  metadata = metadata,
  group = "Environment",
  daa_method = "ALDEx2"
)

head(daa_results)

Annotate pathway results

annotated_daa <- pathway_annotation(
  pathway = "KO",
  daa_results_df = daa_results,
  ko_to_kegg = TRUE
)

head(annotated_daa)

Visualize pathway-level results

pathway_errorbar(
  abundance = kegg_pathway_abundance,
  daa_results_df = annotated_daa,
  Group = "Environment"
)
sig_pathways <- annotated_daa$feature[annotated_daa$p_adjust < 0.05]

if (length(sig_pathways) > 0) {
  pathway_heatmap(
    abundance = kegg_pathway_abundance[sig_pathways, , drop = FALSE],
    metadata = metadata,
    group = "Environment"
  )
}

pathway_pca(
  abundance = kegg_pathway_abundance,
  metadata = metadata,
  group = "Environment"
)

Taxa contribution workflow

PICRUSt2 can output per-sequence contribution files that explain which taxa are driving predicted functional shifts. ggpicrust2 now supports a full contribution workflow.

Read PICRUSt2 contribution files

# For pred_metagenome_contrib.tsv
contrib_data <- read_contrib_file("pred_metagenome_contrib.tsv")

# For pred_metagenome_strat.tsv
strat_data <- read_strat_file("pred_metagenome_strat.tsv")

Aggregate to a taxonomic level

taxa_contrib <- aggregate_taxa_contributions(
  contrib_data = contrib_data,
  taxonomy = your_taxonomy_table,
  tax_level = "Genus",
  top_n = 10,
  daa_results_df = daa_results
)

head(taxa_contrib)

aggregate_taxa_contributions() accepts either:

Use daa_results_df or pathway_ids when you want to focus only on pathways that were significant in your pathway-level analysis.

Visualize taxa-level drivers

taxa_contribution_bar(
  contrib_agg = taxa_contrib,
  metadata = metadata,
  group = "Environment",
  facet_by = "function"
)

taxa_contribution_heatmap(
  contrib_agg = taxa_contrib,
  n_functions = 20
)

This step is useful when pathway-level significance is not enough and you need to identify which taxa are contributing to the change.

GSEA workflow

Use GSEA when you want pathway-set level inference from KO or EC abundance rather than testing each pathway independently.

gsea_results <- pathway_gsea(
  abundance = ko_abundance %>% column_to_rownames("#NAME"),
  metadata = metadata,
  group = "Environment",
  pathway_type = "KEGG",
  method = "camera"
)

annotated_gsea <- gsea_pathway_annotation(
  gsea_results = gsea_results,
  pathway_type = "KEGG"
)

visualize_gsea(
  gsea_results = annotated_gsea,
  plot_type = "barplot",
  n_pathways = 15
)

For a method-by-method GSEA explanation, covariate adjustment, and comparison with DAA, see the gsea_analysis vignette.

Summary

The package is easiest to use when you choose the shortest path that matches your question: