diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index a0d1703f..b9472d53 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -95,9 +95,9 @@ jobs: - {os: windows-latest, r: "release"} - {os: macos-15-intel, r: "release"} # Until Intel architecture retired 2027-11 - {os: macOS-latest, r: "release"} - - {os: ubuntu-24.04-arm, r: "release", rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} - {os: ubuntu-24.04, r: "3.6", rspm: "https://packagemanager.posit.co/cran/2022-10-11"} - {os: ubuntu-24.04, r: "4.0", rspm: "https://packagemanager.posit.co/cran/2022-10-11"} + - {os: ubuntu-24.04-arm, r: "release", rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} - {os: ubuntu-24.04, r: "devel", rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} env: diff --git a/DESCRIPTION b/DESCRIPTION index 63aeef70..2129c215 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TreeTools Title: Create, Modify and Analyse Phylogenetic Trees -Version: 2.0.0.9006 +Version: 2.0.0.9007 Authors@R: c( person("Martin R.", 'Smith', role = c("aut", "cre", "cph"), email = "martin.smith@durham.ac.uk", diff --git a/NAMESPACE b/NAMESPACE index e3885c0f..2a974583 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -116,6 +116,7 @@ S3method(Pruningwise,list) S3method(Pruningwise,multiPhylo) S3method(Pruningwise,phylo) S3method(RenumberTips,"NULL") +S3method(RenumberTips,Splits) S3method(RenumberTips,list) S3method(RenumberTips,multiPhylo) S3method(RenumberTips,phylo) @@ -294,6 +295,7 @@ export(EdgeAncestry) export(EdgeDistances) export(EndSentence) export(ExtractTaxa) +export(FirstMatchingSplit) export(Hamming) export(IC1Spr) export(ImposeConstraint) diff --git a/NEWS.md b/NEWS.md index bceccceb..a6c0dad7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ -# TreeTools 2.0.0.9006 (development) # +# TreeTools 2.0.0.9007 (development) # - `FirstMatchingSplit()`... +- Add method `RenumberTips.Splits()`. - Support logical `pole` in `PolarizeSplits()`. - `RenumberTree()` supports numeric `tipOrder` input. diff --git a/R/RcppExports.R b/R/RcppExports.R index 65c8e627..af2c634a 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -41,6 +41,14 @@ descendant_tips <- function(parent, child, postorder) { .Call(`_TreeTools_descendant_tips`, parent, child, postorder) } +first_matching_split_pair <- function(x, table) { + .Call(`_TreeTools_first_matching_split_pair`, x, table) +} + +first_matching_split_index <- function(x, table) { + .Call(`_TreeTools_first_matching_split_index`, x, table) +} + num_to_parent <- function(n, nTip) { .Call(`_TreeTools_num_to_parent`, n, nTip) } diff --git a/R/match.R b/R/match.R index 840bec1d..66f8e07d 100644 --- a/R/match.R +++ b/R/match.R @@ -218,3 +218,31 @@ setMethod("%in%", signature(x = "phylo", table = "phylo"), function(x, table) all.equal(x, table)) +#' @rdname match.Splits +#' @param x,table Splits objects +#' @param return Which index to return: in `x`, in `table`, or both +#' @return `FirstMatchingSplit()` returns an integer +#' (or length-2 integer if `return = "both"`) specifying the first split in `x` +#' to have a match in `table` (`return = "x"`), +#' or the index of that match (`return = "table"`). +#' `nomatch` (default `0`) is returned in the absence of a match. +#' @export +FirstMatchingSplit <- function(x, table, nomatch, + return = c("x", "table", "both")) { + if (!inherits(x, "Splits")) { + x <- as.Splits(x) + } + table <- as.Splits(table, x) + ij <- first_matching_split_pair(x, table) + + if (!missing(nomatch)) { + ij[ij == 0] <- nomatch + } + + # Return: + return <- match.arg(return) + switch(return, + x = ij[[1L]], + table = ij[[2L]], + both = ij) +} diff --git a/R/tree_numbering.R b/R/tree_numbering.R index 3187ba93..49264c46 100644 --- a/R/tree_numbering.R +++ b/R/tree_numbering.R @@ -650,6 +650,16 @@ RenumberTips.phylo <- function(tree, tipOrder) { tree } +#' @rdname RenumberTips +#' @export +RenumberTips.Splits <- function(tree, tipOrder) { + if (is.character(tipOrder)) { + as.Splits(tree, tipOrder) + } else if (is.numeric(tipOrder)) { + as.Splits(tree, TipLabels(tree)[tipOrder]) + } +} + #' @rdname RenumberTips #' @export RenumberTips.multiPhylo <- function(tree, tipOrder) { diff --git a/man/RenumberTips.Rd b/man/RenumberTips.Rd index b439c690..6a0b1f86 100644 --- a/man/RenumberTips.Rd +++ b/man/RenumberTips.Rd @@ -3,6 +3,7 @@ \name{RenumberTips} \alias{RenumberTips} \alias{RenumberTips.phylo} +\alias{RenumberTips.Splits} \alias{RenumberTips.multiPhylo} \alias{RenumberTips.list} \alias{RenumberTips.NULL} @@ -12,6 +13,8 @@ RenumberTips(tree, tipOrder) \method{RenumberTips}{phylo}(tree, tipOrder) +\method{RenumberTips}{Splits}(tree, tipOrder) + \method{RenumberTips}{multiPhylo}(tree, tipOrder) \method{RenumberTips}{list}(tree, tipOrder) diff --git a/man/match.Splits.Rd b/man/match.Splits.Rd index fd4352e8..fa1012d8 100644 --- a/man/match.Splits.Rd +++ b/man/match.Splits.Rd @@ -4,6 +4,7 @@ \alias{match,Splits,Splits-method} \alias{match} \alias{\%in\%,Splits,Splits-method} +\alias{FirstMatchingSplit} \title{Split matching} \usage{ \S4method{match}{Splits,Splits}(x, table, nomatch = NA_integer_, incomparables = NULL) @@ -11,18 +12,28 @@ match(x, table, nomatch = NA_integer_, incomparables = NULL) \S4method{\%in\%}{Splits,Splits}(x, table) + +FirstMatchingSplit(x, table, nomatch, return = c("x", "table", "both")) } \arguments{ -\item{x, table}{Object of class \code{Splits}.} +\item{x, table}{Splits objects} \item{nomatch}{Integer value that will be used in place of \code{NA} in the case where no match is found.} \item{incomparables}{Ignored. (Included for consistency with generic.)} + +\item{return}{Which index to return: in \code{x}, in \code{table}, or both} } \value{ \code{match()} returns an integer vector specifying the position in \code{table} that matches each element in \code{x}, or \code{nomatch} if no match is found. + +\code{FirstMatchingSplit()} returns an integer +(or length-2 integer if \code{return = "both"}) specifying the first split in \code{x} +to have a match in \code{table} (\code{return = "x"}), +or the index of that match (\code{return = "table"}). +\code{nomatch} (default \code{0}) is returned in the absence of a match. } \description{ \code{match()} returns a vector of the positions of (first) matches of splits in diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 3431c07f..72e00596 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -138,6 +138,30 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// first_matching_split_pair +IntegerVector first_matching_split_pair(const RawMatrix x, const RawMatrix table); +RcppExport SEXP _TreeTools_first_matching_split_pair(SEXP xSEXP, SEXP tableSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const RawMatrix >::type x(xSEXP); + Rcpp::traits::input_parameter< const RawMatrix >::type table(tableSEXP); + rcpp_result_gen = Rcpp::wrap(first_matching_split_pair(x, table)); + return rcpp_result_gen; +END_RCPP +} +// first_matching_split_index +int first_matching_split_index(const RawMatrix x, const RawMatrix table); +RcppExport SEXP _TreeTools_first_matching_split_index(SEXP xSEXP, SEXP tableSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const RawMatrix >::type x(xSEXP); + Rcpp::traits::input_parameter< const RawMatrix >::type table(tableSEXP); + rcpp_result_gen = Rcpp::wrap(first_matching_split_index(x, table)); + return rcpp_result_gen; +END_RCPP +} // num_to_parent IntegerVector num_to_parent(const IntegerVector n, const IntegerVector nTip); RcppExport SEXP _TreeTools_num_to_parent(SEXP nSEXP, SEXP nTipSEXP) { @@ -458,6 +482,8 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeTools_descendant_edges", (DL_FUNC) &_TreeTools_descendant_edges, 3}, {"_TreeTools_descendant_edges_single", (DL_FUNC) &_TreeTools_descendant_edges_single, 5}, {"_TreeTools_descendant_tips", (DL_FUNC) &_TreeTools_descendant_tips, 3}, + {"_TreeTools_first_matching_split_pair", (DL_FUNC) &_TreeTools_first_matching_split_pair, 2}, + {"_TreeTools_first_matching_split_index", (DL_FUNC) &_TreeTools_first_matching_split_index, 2}, {"_TreeTools_num_to_parent", (DL_FUNC) &_TreeTools_num_to_parent, 2}, {"_TreeTools_random_parent", (DL_FUNC) &_TreeTools_random_parent, 2}, {"_TreeTools_edge_to_num", (DL_FUNC) &_TreeTools_edge_to_num, 3}, diff --git a/src/first_matching_split.cpp b/src/first_matching_split.cpp new file mode 100644 index 00000000..2d47c45f --- /dev/null +++ b/src/first_matching_split.cpp @@ -0,0 +1,121 @@ +#include +#include +#include + +using namespace Rcpp; + +// Helper: fill `key` with the canonicalized bytes for row `i` +// so that complements map to the same key. We follow the logic pattern +// in duplicated_splits(): choose an orientation based on a sentinel bit, +// and when inverting, fix the spare bits in the last used bin. +static inline void canonicalize_row_key(std::string &key, + const RawMatrix &M, + int i, + int check_bins, + int n_bin, + int n_spare) { + key.resize(check_bins); + + // mask with ones in the UNUSED bits of the last bin, when n_spare > 0. + unsigned char unused_mask = 0u; + if (n_spare > 0) { + unsigned int used_mask = (1u << n_spare) - 1u; // low n_spare bits set + unused_mask = static_cast(~used_mask); // high bits set + } + + if (n_spare == 0) { + // Decide orientation by LSB of first bin + const bool keep = (static_cast(M(i, 0)) & 0x01u) != 0u; + if (keep) { + for (int b = 0; b < check_bins; ++b) key[b] = static_cast(M(i, b)); + } else { + for (int b = 0; b < check_bins; ++b) + key[b] = static_cast(~static_cast(M(i, b))); + } + } else if (n_spare == 1) { + // Decide orientation by non-zero last bin + const bool keep = static_cast(M(i, n_bin - 1)) != 0u; + if (keep) { + for (int b = 0; b < check_bins; ++b) key[b] = static_cast(M(i, b)); + } else { + for (int b = 0; b < check_bins; ++b) + key[b] = static_cast(~static_cast(M(i, b))); + } + } else { + // Multiple spare bits: + // If LSB of first bin is 1, invert all bins; after inversion, + // fix the last bin's unused bits by XOR with `unused_mask`. + const bool invert = (static_cast(M(i, 0)) & 0x01u) != 0u; + if (!invert) { + for (int b = 0; b < check_bins; ++b) key[b] = static_cast(M(i, b)); + } else { + for (int b = 0; b < check_bins - 1; ++b) + key[b] = static_cast(~static_cast(M(i, b))); + unsigned char last = static_cast(~static_cast(M(i, check_bins - 1))); + last ^= unused_mask; // zero-out the unused bits after inversion + key[check_bins - 1] = static_cast(last); + } + } +} + +// [[Rcpp::export]] +IntegerVector first_matching_split_pair(const RawMatrix x, const RawMatrix table) { + // Validate attributes and shapes + if (!x.hasAttribute("nTip") || !table.hasAttribute("nTip")) + stop("Both `x` and `table` must have an `nTip` attribute."); + + const int n_tip_x = as(x.attr("nTip"))[0]; + const int n_tip_t = as(table.attr("nTip"))[0]; + if (n_tip_x != n_tip_t) + stop("`x` and `table` must have the same number of tips."); + + const int n_split_x = x.rows(); + const int n_split_t = table.rows(); + if (n_split_x == 0 || n_split_t == 0) return IntegerVector::create(0, 0); + + const int n_bin_x = x.cols(); + const int n_bin_t = table.cols(); + if (n_bin_x != n_bin_t) + stop("`x` and `table` have incompatible bin counts."); + const int n_bin = n_bin_x; + + // Compute bin layout + const int BIN_SIZE = 8; + const int expected_n_bin = ((n_tip_x - 1) / BIN_SIZE) + 1; + if (expected_n_bin != n_bin) + stop("`nTip` inconsistent with number of bins."); + + const int n_spare = n_tip_x % BIN_SIZE; + const int check_bins = n_bin - ((n_spare == 1) ? 1 : 0); + + // Build hash from table + std::unordered_map H; + H.reserve(static_cast(n_split_t) * 2u); + + std::string key; + key.reserve(static_cast(check_bins)); + + for (int j = 0; j < n_split_t; ++j) { + canonicalize_row_key(key, table, j, check_bins, n_bin, n_spare); + // keep first occurrence + H.emplace(key, j + 1); + } + + // Probe with x; return first hit (1-based indices) + for (int i = 0; i < n_split_x; ++i) { + canonicalize_row_key(key, x, i, check_bins, n_bin, n_spare); + auto it = H.find(key); + if (it != H.end()) { + return IntegerVector::create(i + 1, it->second); + } + } + + return IntegerVector::create(0, 0); +} + +// Convenience: only the index in `x` (0 if none) +// [[Rcpp::export]] +int first_matching_split_index(const RawMatrix x, const RawMatrix table) { + IntegerVector ij = first_matching_split_pair(x, table); + return ij[0]; +} diff --git a/tests/testthat/test-ClusterTable.R b/tests/testthat/test-ClusterTable.R index 5b2a8644..50f36b89 100644 --- a/tests/testthat/test-ClusterTable.R +++ b/tests/testthat/test-ClusterTable.R @@ -114,6 +114,7 @@ test_that("ClusterTable with multiple trees", { test_that("ClusterTable with complex trees", { skip_if_not_installed("TreeDist", "2.9.2.9000") + skip_on_cran() library("TreeDist") # Test exposes failures in C++ - constexpr not playing nicely with Rcpp diff --git a/tests/testthat/test-FirstMatchingSplit.R b/tests/testthat/test-FirstMatchingSplit.R new file mode 100644 index 00000000..95f65893 --- /dev/null +++ b/tests/testthat/test-FirstMatchingSplit.R @@ -0,0 +1,34 @@ +test_that("FirstMatchingSplit() fails gracefully", { + bal13 <- BalancedTree(13) + pec13 <- PectinateTree(13) + expect_error(FirstMatchingSplit(bal13, raw(13)), + "Splits") +}) + +test_that("FirstMatchingSplit() works", { + bal13 <- as.Splits(BalancedTree(13)) + pec13 <- as.Splits(PectinateTree(13)) + + expect_equal( + FirstMatchingSplit(BalancedTree(13), PectinateTree(13), return = "both"), + FirstMatchingSplit(bal13, pec13, return = "both") + ) + + + firstMatch <- which(bal13 %in% pec13)[[1]] + expect_equal(FirstMatchingSplit(bal13, pec13), firstMatch) + + expect_equal( + FirstMatchingSplit(bal13, pec13, return = "both"), + c(firstMatch, match(bal13[[firstMatch]], pec13)) + ) + expect_equal(FirstMatchingSplit(bal13, as.Splits(StarTree(13))), 0) + expect_equal(FirstMatchingSplit(as.Splits(StarTree(13)), bal13, nomatch = NA), + NA_integer_) + + # Check robustness to label order + ren13 <- RenumberTips(pec13, TipLabels(13:1))[[-1]] + # Split 1 is t1, t2, t3; not t11, t12, t13 + # So first match is split 2 (t1:4). + expect_equal(FirstMatchingSplit(ren13, bal13, return = "both"), c(2, 2)) +}) diff --git a/tests/testthat/test-tree_numbering.R b/tests/testthat/test-tree_numbering.R index 38de83e5..6aff8e2b 100644 --- a/tests/testthat/test-tree_numbering.R +++ b/tests/testthat/test-tree_numbering.R @@ -107,17 +107,43 @@ test_that("RenumberTips() works correctly", { abcd <- letters[1:4] dcba <- letters[4:1] bal7b <- BalancedTree(dcba) + bsp7b <- as.Splits(bal7b) bal7f <- BalancedTree(abcd) + bsp7f <- as.Splits(bal7f) pec7f <- PectinateTree(abcd) + psp7f <- as.Splits(bal7f) pec7b <- PectinateTree(dcba) + psp7b <- as.Splits(bal7b) l7 <- list("bal7b" = bal7b, "bal7f" = bal7f, "pec7f" = pec7f) f7 <- list(bal7f, bal7f, pec7f) b7 <- list(bal7b, bal7b, pec7b) mp7 <- structure(l7, class = "multiPhylo") - expect_true(all.equal(f7, unname(RenumberTips(l7, abcd)))) - expect_true(all.equal(b7, unname(RenumberTips(l7, dcba)))) + expect_true(all.equal(unname(RenumberTips(l7, dcba)), b7)) + expect_true(all.equal(unname(RenumberTips(l7, 4:1)), b7)) + expect_true(all.equal(unname(RenumberTips(l7, abcd)), f7)) + expect_true(all.equal(unname(RenumberTips(l7, 4:1)), f7)) + + sl7 <- list("bsp7b" = bsp7b, "bsp7f" = bsp7f, "psp7f" = psp7f) + sf7 <- list(bsp7f, bsp7f, psp7f) + sb7 <- list(bsp7b, bsp7b, psp7b) + + expect_splits_same <- function(x, y) { + if (is.list(x)) { + for (i in seq_along(x)) { + expect_splits_same(x[[i]], y[[i]]) + } + } else { + expect_equal(TipLabels(x), TipLabels(y)) + expect_equal(PolarizeSplits(x, 1), PolarizeSplits(y, 1)) + } + } + + expect_splits_same(RenumberTips(bsp7b, abcd), bsp7f) + expect_splits_same(RenumberTips(sl7, dcba), sb7) + expect_splits_same(RenumberTips(sl7, abcd), sf7) + expect_true(all.equal(structure(f7, class = "multiPhylo"), unname(RenumberTips(mp7, abcd))))