From 83e7adcbdc97ef95973a625fb7573780a3604602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAndreas?= Date: Fri, 11 Jul 2025 16:55:52 -0400 Subject: [PATCH 1/3] Add possibility for pairwise comparisons in betta test results --- R/betta_posthoc.R | 260 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 R/betta_posthoc.R diff --git a/R/betta_posthoc.R b/R/betta_posthoc.R new file mode 100644 index 0000000..9aba367 --- /dev/null +++ b/R/betta_posthoc.R @@ -0,0 +1,260 @@ +#' Helper function to generate compact letter display +#' +#' @param pvals Named vector of p-values for pairwise comparisons +#' @param alpha Significance threshold +#' @param letter_set Character vector of letters to use +#' @return Named vector of compact letter display codes +#' @keywords internal +generate_cld <- function(pvals, alpha = 0.05, letter_set = LETTERS) { + # Use multcompLetters with p-values + cld_letters <- multcompView::multcompLetters( + pvals, + threshold = alpha, + Letters = letter_set + )$Letters + + return(cld_letters) +} + +#' Post-hoc pairwise comparisons and compact letter display from diversity test object +#' +#' Performs pairwise post-hoc linear combination tests based on a fitted betta model object +#' (`test_obj`) that contains a formula of the form `estimate ~ ...`. The function +#' extracts the model formula and metadata variables, constructs all pairwise +#' comparisons between levels, runs `betta_lincom()` on each linear contrast, +#' adjusts p-values, computes significance codes, and returns compact letter display (CLD) +#' indicating significantly different groups. +#' +#' @param test_obj An object returned from the initial diversity test function (`betta()`). +#' This object must contain a formula accessible as `test_obj$function.args$formula`, +#' where the RHS defines the model terms used in `model.matrix`. +#' @param metadata A data frame containing the variables used in the model formula. +#' Each variable should correspond to a column used in `test_obj$function.args$formula`. +#' @param p_adjust_method Character string specifying the p-value adjustment method +#' (default `"BH"`). See `?p.adjust` for available methods. +#' @param alpha Numeric significance level used for compact letter display grouping (default `0.05`). +#' @param letter_set Character vector of letters or numbers used for compact letter display labels +#' (default `LETTERS`). +#' @param human_sep Character string used as separator for output group labels (default `"."`). +#' +#' @return A list with two components: +#' \describe{ +#' \item{results_df}{A data frame containing pairwise comparison results with columns: +#' `group_1`, `group_2`, `Estimates`, `Standard Errors`, `Lower CIs`, `Upper CIs`, +#' `p_value`, `p_adjusted`, and `significance`.} +#' \item{cld_df}{A data frame containing group-level compact letter display labels. +#' This data frame contains one row per group with columns corresponding to +#' the original variables from the formula, a `cld` column for the letter code, +#' and group-level estimates: `estimate`, `std_error`, `lower_ci`, `upper_ci`.} +#' } +#' +#' @importFrom dplyr select distinct arrange mutate across everything all_of left_join bind_rows +#' @importFrom purrr map_dfr +#' @importFrom tidyr separate +#' @importFrom stringr str_replace_all +#' @importFrom multcompView multcompLetters +#' @importFrom tibble tibble +#' +#' @export +betta_posthoc <- function( + test_obj, + metadata, + p_adjust_method = "BH", + alpha = 0.05, + letter_set = LETTERS, + human_sep = "." +) { + # Input validation + # Check for required components + if ( + !is.list(test_obj) || + is.null(test_obj$function.args) || + is.null(test_obj$function.args$formula) + ) { + stop( + "test_obj must be a fitted betta model with accessible formula in test_obj$function.args$formula" + ) + } + + # Additional check to ensure it has betta-like structure for betta_lincom + if (is.null(test_obj$table)) { + stop( + "test_obj must be a fitted betta model with a 'table' component (required for betta_lincom)" + ) + } + + if (!is.data.frame(metadata)) { + stop("metadata must be a data frame") + } + + # Internal separator for combining group names + internal_sep <- "__SEP__" + + # Extract the right-hand side of the formula + full_formula <- test_obj$function.args$formula + rhs_formula <- as.formula(paste("~", as.character(full_formula)[3])) + rhs_vars <- all.vars(rhs_formula) + + # Check that all variables in formula are in metadata + missing_vars <- setdiff(rhs_vars, names(metadata)) + if (length(missing_vars) > 0) { + stop(paste( + "Variables not found in metadata:", + paste(missing_vars, collapse = ", ") + )) + } + + # Create all unique combinations of factor levels + combos <- metadata %>% + dplyr::select(dplyr::all_of(rhs_vars)) %>% + dplyr::distinct() %>% + dplyr::arrange(dplyr::across(dplyr::everything())) + + # Create group labels + combos$group <- apply(combos, 1, paste, collapse = internal_sep) + + # Create model matrix for linear combinations + mm <- model.matrix(rhs_formula, data = combos) + + # Generate all pairwise comparisons + pairwise_indices <- combn(nrow(combos), 2) + + # Perform pairwise comparisons using betta_lincom + results_df <- purrr::map_dfr(seq_len(ncol(pairwise_indices)), function(i) { + idx1 <- pairwise_indices[1, i] + idx2 <- pairwise_indices[2, i] + + g1 <- combos$group[idx1] + g2 <- combos$group[idx2] + + # Linear combination: difference between groups + lin_com <- mm[idx1, ] - mm[idx2, ] + + # Perform linear combination test + res <- betta_lincom(test_obj, lin_com) + + # Extract results + res %>% + dplyr::mutate(p_value = as.numeric(`p-values`)) %>% + dplyr::select( + Estimates, + `Standard Errors`, + `Lower CIs`, + `Upper CIs`, + p_value + ) %>% + dplyr::mutate(group_1 = g1, group_2 = g2, .before = Estimates) + }) + + # Add adjusted p-values and significance codes + results_df <- results_df %>% + dplyr::mutate( + p_adjusted = p.adjust(p_value, method = p_adjust_method), + significance = dplyr::case_when( + p_adjusted <= 0.001 ~ "***", + p_adjusted <= 0.01 ~ "**", + p_adjusted <= 0.05 ~ "*", + p_adjusted <= 0.1 ~ ".", + TRUE ~ " " + ) + ) + + # Create p-value vector for compact letter display + # multcompLetters needs a named vector of p-values for all pairwise comparisons + pvals <- results_df$p_adjusted + names(pvals) <- paste(results_df$group_1, results_df$group_2, sep = "-") + + # Generate compact letter display using p-value vector approach + cld_letters <- generate_cld(pvals, alpha, letter_set) + + cld_df <- tibble::tibble( + group_label = names(cld_letters), + cld = as.character(cld_letters) + ) + + # Add group-level estimates and confidence intervals to CLD table + # Calculate predicted values for each group using the model matrix + group_estimates <- tibble::tibble() + + for (i in seq_len(nrow(combos))) { + group_name <- combos$group[i] + + # Get linear combination for this group (intercept + effects) + lin_com_group <- mm[i, ] + + # Calculate group estimate using betta_lincom + group_result <- betta_lincom(test_obj, lin_com_group) + + group_estimates <- dplyr::bind_rows( + group_estimates, + tibble::tibble( + group_label = group_name, + estimate = as.numeric(group_result$Estimates), + std_error = as.numeric(group_result$`Standard Errors`), + lower_ci = as.numeric(group_result$`Lower CIs`), + upper_ci = as.numeric(group_result$`Upper CIs`) + ) + ) + } + + # Join estimates with CLD letters + cld_df <- cld_df %>% + dplyr::left_join(group_estimates, by = "group_label") + + # Safely escape internal separator for tidyr::separate() + escaped_sep <- gsub( + "([\\^\\$\\.|\\?\\*\\+\\(\\)\\[\\]\\{\\}\\\\])", + "\\\\\\1", + internal_sep + ) + + cld_df <- cld_df %>% + tidyr::separate( + group_label, + into = rhs_vars, + sep = escaped_sep, + remove = TRUE + ) + + # Replace internal separator with readable one + results_df <- results_df %>% + dplyr::mutate( + group_1 = stringr::str_replace_all(group_1, internal_sep, human_sep), + group_2 = stringr::str_replace_all(group_2, internal_sep, human_sep) + ) + + cld_df <- cld_df %>% + dplyr::mutate( + dplyr::across( + dplyr::all_of(rhs_vars), + ~ stringr::str_replace_all(., internal_sep, human_sep) + ) + ) %>% + # Also clean up the group_label if it still exists + dplyr::mutate( + dplyr::across( + dplyr::any_of("group_label"), + ~ stringr::str_replace_all(., internal_sep, human_sep) + ) + ) + + # Return results as a proper betta_posthoc object + new_betta_posthoc(results_df, cld_df) +} + + +#' Create a betta_posthoc object with proper class +#' +#' @param results_df Pairwise comparison results +#' @param cld_df Compact letter display results +#' @return A betta_posthoc object +#' @keywords internal +new_betta_posthoc <- function(results_df, cld_df) { + structure( + list( + results_df = results_df, + cld_df = cld_df + ), + class = "betta_posthoc" + ) +} From beb99e1207a5beb7e72422d0e31aeda04b9b4dea Mon Sep 17 00:00:00 2001 From: Sarah Teichman Date: Thu, 17 Jul 2025 08:58:21 -0400 Subject: [PATCH 2/3] making small updates to betta_posthoc and running it --- DESCRIPTION | 5 ++++- R/betta_posthoc.R | 28 ++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1180aeb..baefbaf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,7 +35,10 @@ Suggests: testthat, ggplot2, R.rsp, - speedyseq + speedyseq, + multcompView, + purrr, + stringr Additional_repositories: https://github.com/mikemc/speedyseq Encoding: UTF-8 LazyData: true diff --git a/R/betta_posthoc.R b/R/betta_posthoc.R index 9aba367..efe673c 100644 --- a/R/betta_posthoc.R +++ b/R/betta_posthoc.R @@ -64,6 +64,18 @@ betta_posthoc <- function( letter_set = LETTERS, human_sep = "." ) { + + # check that required packages are installed (since these are Suggests for this package) + if (!requireNamespace("multcompView", quietly = TRUE)) { + stop("Package 'multicompView' is required for this function. Please install it.") + } + if (!requireNamespace("purrr", quietly = TRUE)) { + stop("Package 'purrr' is required for this function. Please install it.") + } + if (!requireNamespace("stringr", quietly = TRUE)) { + stop("Package 'stringr' is required for this function. Please install it.") + } + # Input validation # Check for required components if ( @@ -149,14 +161,14 @@ betta_posthoc <- function( # Add adjusted p-values and significance codes results_df <- results_df %>% dplyr::mutate( - p_adjusted = p.adjust(p_value, method = p_adjust_method), - significance = dplyr::case_when( - p_adjusted <= 0.001 ~ "***", - p_adjusted <= 0.01 ~ "**", - p_adjusted <= 0.05 ~ "*", - p_adjusted <= 0.1 ~ ".", - TRUE ~ " " - ) + p_adjusted = p.adjust(p_value, method = p_adjust_method)#, + #significance = dplyr::case_when( + # p_adjusted <= 0.001 ~ "***", + # p_adjusted <= 0.01 ~ "**", + # p_adjusted <= 0.05 ~ "*", + # p_adjusted <= 0.1 ~ ".", + # TRUE ~ " " + #) ) # Create p-value vector for compact letter display From becd145153814861ef0aef9659b72a3626e9cc29 Mon Sep 17 00:00:00 2001 From: Sarah Teichman Date: Tue, 29 Jul 2025 10:24:40 -0700 Subject: [PATCH 3/3] small tweaks to pull request, including adding documentation and including necessary functions --- DESCRIPTION | 10 +++--- NAMESPACE | 12 +++++++ R/betta_posthoc.R | 13 +------ R/utility.R | 2 +- man/betta_posthoc.Rd | 53 +++++++++++++++++++++++++++++ man/generate_cld.Rd | 22 ++++++++++++ man/new_betta_posthoc.Rd | 20 +++++++++++ man/pick_base.Rd | 3 ++ tests/testthat/test-betta_posthoc.R | 17 +++++++++ 9 files changed, 134 insertions(+), 18 deletions(-) create mode 100644 man/betta_posthoc.Rd create mode 100644 man/generate_cld.Rd create mode 100644 man/new_betta_posthoc.Rd create mode 100644 tests/testthat/test-betta_posthoc.R diff --git a/DESCRIPTION b/DESCRIPTION index baefbaf..b1b45b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,7 +21,10 @@ Imports: stats, tibble, tidyr, - utils + utils, + multcompView, + purrr, + stringr Suggests: doSNOW, glasso, @@ -36,11 +39,8 @@ Suggests: ggplot2, R.rsp, speedyseq, - multcompView, - purrr, - stringr Additional_repositories: https://github.com/mikemc/speedyseq Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.3.2 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 7ca087c..e3a47c3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(plot,diversityEstimates) S3method(print,diversityEstimates) export(MCmat) export(bc_fast) +export(betta_posthoc) export(bray_curtis_true) export(divnet) export(euc_fast) @@ -27,16 +28,27 @@ import(stats) importFrom(MASS,ginv) importFrom(MASS,mvrnorm) importFrom(breakaway,make_design_matrix) +importFrom(dplyr,across) +importFrom(dplyr,all_of) +importFrom(dplyr,arrange) +importFrom(dplyr,bind_rows) importFrom(dplyr,distinct) +importFrom(dplyr,everything) +importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(magrittr,"%>%") +importFrom(multcompView,multcompLetters) importFrom(mvnfast,rmvn) importFrom(phyloseq,get_variable) importFrom(phyloseq,otu_table) importFrom(phyloseq,sample_data) importFrom(phyloseq,sample_names) +importFrom(purrr,map_dfr) +importFrom(stringr,str_replace_all) importFrom(tibble,add_column) importFrom(tibble,rownames_to_column) +importFrom(tibble,tibble) importFrom(tidyr,gather) +importFrom(tidyr,separate) importFrom(utils,globalVariables) diff --git a/R/betta_posthoc.R b/R/betta_posthoc.R index efe673c..0db7f4b 100644 --- a/R/betta_posthoc.R +++ b/R/betta_posthoc.R @@ -65,17 +65,6 @@ betta_posthoc <- function( human_sep = "." ) { - # check that required packages are installed (since these are Suggests for this package) - if (!requireNamespace("multcompView", quietly = TRUE)) { - stop("Package 'multicompView' is required for this function. Please install it.") - } - if (!requireNamespace("purrr", quietly = TRUE)) { - stop("Package 'purrr' is required for this function. Please install it.") - } - if (!requireNamespace("stringr", quietly = TRUE)) { - stop("Package 'stringr' is required for this function. Please install it.") - } - # Input validation # Check for required components if ( @@ -129,7 +118,7 @@ betta_posthoc <- function( mm <- model.matrix(rhs_formula, data = combos) # Generate all pairwise comparisons - pairwise_indices <- combn(nrow(combos), 2) + pairwise_indices <- utils::combn(nrow(combos), 2) # Perform pairwise comparisons using betta_lincom results_df <- purrr::map_dfr(seq_len(ncol(pairwise_indices)), function(i) { diff --git a/R/utility.R b/R/utility.R index 852ad30..796be3d 100644 --- a/R/utility.R +++ b/R/utility.R @@ -51,7 +51,7 @@ acomb3 <- function(...) abind(..., along = 3) #' @param automatic_cutoff Choose detection cutoff automatically? Default is #' FALSE. If TRUE, detection_cutoff will be set equal to the maximum proportion #' of samples any taxon is detected in. -#' @value Index corresponding to taxon chosen as base taxon +#' @return Index corresponding to taxon chosen as base taxon #' @author Amy Willis #' @export pick_base <- function(W, diff --git a/man/betta_posthoc.Rd b/man/betta_posthoc.Rd new file mode 100644 index 0000000..8060acb --- /dev/null +++ b/man/betta_posthoc.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betta_posthoc.R +\name{betta_posthoc} +\alias{betta_posthoc} +\title{Post-hoc pairwise comparisons and compact letter display from diversity test object} +\usage{ +betta_posthoc( + test_obj, + metadata, + p_adjust_method = "BH", + alpha = 0.05, + letter_set = LETTERS, + human_sep = "." +) +} +\arguments{ +\item{test_obj}{An object returned from the initial diversity test function (`betta()`). +This object must contain a formula accessible as `test_obj$function.args$formula`, +where the RHS defines the model terms used in `model.matrix`.} + +\item{metadata}{A data frame containing the variables used in the model formula. +Each variable should correspond to a column used in `test_obj$function.args$formula`.} + +\item{p_adjust_method}{Character string specifying the p-value adjustment method +(default `"BH"`). See `?p.adjust` for available methods.} + +\item{alpha}{Numeric significance level used for compact letter display grouping (default `0.05`).} + +\item{letter_set}{Character vector of letters or numbers used for compact letter display labels +(default `LETTERS`).} + +\item{human_sep}{Character string used as separator for output group labels (default `"."`).} +} +\value{ +A list with two components: +\describe{ + \item{results_df}{A data frame containing pairwise comparison results with columns: + `group_1`, `group_2`, `Estimates`, `Standard Errors`, `Lower CIs`, `Upper CIs`, + `p_value`, `p_adjusted`, and `significance`.} + \item{cld_df}{A data frame containing group-level compact letter display labels. + This data frame contains one row per group with columns corresponding to + the original variables from the formula, a `cld` column for the letter code, + and group-level estimates: `estimate`, `std_error`, `lower_ci`, `upper_ci`.} +} +} +\description{ +Performs pairwise post-hoc linear combination tests based on a fitted betta model object +(`test_obj`) that contains a formula of the form `estimate ~ ...`. The function +extracts the model formula and metadata variables, constructs all pairwise +comparisons between levels, runs `betta_lincom()` on each linear contrast, +adjusts p-values, computes significance codes, and returns compact letter display (CLD) +indicating significantly different groups. +} diff --git a/man/generate_cld.Rd b/man/generate_cld.Rd new file mode 100644 index 0000000..0156c76 --- /dev/null +++ b/man/generate_cld.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betta_posthoc.R +\name{generate_cld} +\alias{generate_cld} +\title{Helper function to generate compact letter display} +\usage{ +generate_cld(pvals, alpha = 0.05, letter_set = LETTERS) +} +\arguments{ +\item{pvals}{Named vector of p-values for pairwise comparisons} + +\item{alpha}{Significance threshold} + +\item{letter_set}{Character vector of letters to use} +} +\value{ +Named vector of compact letter display codes +} +\description{ +Helper function to generate compact letter display +} +\keyword{internal} diff --git a/man/new_betta_posthoc.Rd b/man/new_betta_posthoc.Rd new file mode 100644 index 0000000..55cc9bf --- /dev/null +++ b/man/new_betta_posthoc.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betta_posthoc.R +\name{new_betta_posthoc} +\alias{new_betta_posthoc} +\title{Create a betta_posthoc object with proper class} +\usage{ +new_betta_posthoc(results_df, cld_df) +} +\arguments{ +\item{results_df}{Pairwise comparison results} + +\item{cld_df}{Compact letter display results} +} +\value{ +A betta_posthoc object +} +\description{ +Create a betta_posthoc object with proper class +} +\keyword{internal} diff --git a/man/pick_base.Rd b/man/pick_base.Rd index c780238..15a6aaf 100644 --- a/man/pick_base.Rd +++ b/man/pick_base.Rd @@ -17,6 +17,9 @@ unless automatic_cutoff is set to TRUE.} FALSE. If TRUE, detection_cutoff will be set equal to the maximum proportion of samples any taxon is detected in.} } +\value{ +Index corresponding to taxon chosen as base taxon +} \description{ Picks the base taxon to be used in divnet fit. If no taxon is detected in all samples, returns error; in this case, we recommend manually choosing diff --git a/tests/testthat/test-betta_posthoc.R b/tests/testthat/test-betta_posthoc.R new file mode 100644 index 0000000..b7e9a37 --- /dev/null +++ b/tests/testthat/test-betta_posthoc.R @@ -0,0 +1,17 @@ +data("Lee") +lee_phy <- phyloseq::tax_glom(Lee, taxrank = "Phylum") + +test_that("betta_posthoc function works as expected", { + div_obj <- divnet(lee_phy) + bett_obj <- betta(data = data.frame(sample_data(lee_phy), + chats = sapply(div_obj$shannon, function(x) x$estimate), + ses = sapply(div_obj$shannon, function(x) x$error)), + formula = chats ~ char, + ses = "ses") + bett_post <- betta_posthoc(test_obj = bett_obj, + metadata = data.frame(sample_data(lee_phy))) + expect_true(all.equal(bett_obj$table[-1, 1] %>% unname(), + -bett_post$results_df$Estimates[1:4] %>% unname())) + # we expect estimates to align when comparing baseline level to other levels, not + # p-values because of differences in how these are calculated in betta and betta_lincom +})