Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ jobs:
- {os: windows-latest, r: "release"}
- {os: macos-15-intel, r: "release"} # Until Intel architecture retired 2027-11
- {os: macOS-latest, r: "release"}
- {os: ubuntu-24.04-arm, r: "release", rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"}
- {os: ubuntu-24.04, r: "3.6", rspm: "https://packagemanager.posit.co/cran/2022-10-11"}
- {os: ubuntu-24.04, r: "4.0", rspm: "https://packagemanager.posit.co/cran/2022-10-11"}
- {os: ubuntu-24.04-arm, r: "release", rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"}
- {os: ubuntu-24.04, r: "devel", rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"}

env:
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: TreeTools
Title: Create, Modify and Analyse Phylogenetic Trees
Version: 2.0.0.9006
Version: 2.0.0.9007
Authors@R: c(
person("Martin R.", 'Smith', role = c("aut", "cre", "cph"),
email = "martin.smith@durham.ac.uk",
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ S3method(Pruningwise,list)
S3method(Pruningwise,multiPhylo)
S3method(Pruningwise,phylo)
S3method(RenumberTips,"NULL")
S3method(RenumberTips,Splits)
S3method(RenumberTips,list)
S3method(RenumberTips,multiPhylo)
S3method(RenumberTips,phylo)
Expand Down Expand Up @@ -294,6 +295,7 @@ export(EdgeAncestry)
export(EdgeDistances)
export(EndSentence)
export(ExtractTaxa)
export(FirstMatchingSplit)
export(Hamming)
export(IC1Spr)
export(ImposeConstraint)
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# TreeTools 2.0.0.9006 (development) #
# TreeTools 2.0.0.9007 (development) #
- `FirstMatchingSplit()`...
- Add method `RenumberTips.Splits()`.
- Support logical `pole` in `PolarizeSplits()`.
- `RenumberTree()` supports numeric `tipOrder` input.

Expand Down
8 changes: 8 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ descendant_tips <- function(parent, child, postorder) {
.Call(`_TreeTools_descendant_tips`, parent, child, postorder)
}

first_matching_split_pair <- function(x, table) {
.Call(`_TreeTools_first_matching_split_pair`, x, table)
}

first_matching_split_index <- function(x, table) {
.Call(`_TreeTools_first_matching_split_index`, x, table)
}

num_to_parent <- function(n, nTip) {
.Call(`_TreeTools_num_to_parent`, n, nTip)
}
Expand Down
28 changes: 28 additions & 0 deletions R/match.R
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,31 @@ setMethod("%in%",
signature(x = "phylo", table = "phylo"),
function(x, table) all.equal(x, table))

#' @rdname match.Splits
#' @param x,table Splits objects
#' @param return Which index to return: in `x`, in `table`, or both
#' @return `FirstMatchingSplit()` returns an integer
#' (or length-2 integer if `return = "both"`) specifying the first split in `x`
#' to have a match in `table` (`return = "x"`),
#' or the index of that match (`return = "table"`).
#' `nomatch` (default `0`) is returned in the absence of a match.
#' @export
FirstMatchingSplit <- function(x, table, nomatch,
return = c("x", "table", "both")) {
if (!inherits(x, "Splits")) {
x <- as.Splits(x)
}
table <- as.Splits(table, x)
ij <- first_matching_split_pair(x, table)

if (!missing(nomatch)) {
ij[ij == 0] <- nomatch
}

# Return:
return <- match.arg(return)
switch(return,
x = ij[[1L]],
table = ij[[2L]],
both = ij)
}
10 changes: 10 additions & 0 deletions R/tree_numbering.R
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,16 @@ RenumberTips.phylo <- function(tree, tipOrder) {
tree
}

#' @rdname RenumberTips
#' @export
RenumberTips.Splits <- function(tree, tipOrder) {
if (is.character(tipOrder)) {
as.Splits(tree, tipOrder)
} else if (is.numeric(tipOrder)) {
as.Splits(tree, TipLabels(tree)[tipOrder])
}
}

#' @rdname RenumberTips
#' @export
RenumberTips.multiPhylo <- function(tree, tipOrder) {
Expand Down
3 changes: 3 additions & 0 deletions man/RenumberTips.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 12 additions & 1 deletion man/match.Splits.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,30 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// first_matching_split_pair
IntegerVector first_matching_split_pair(const RawMatrix x, const RawMatrix table);
RcppExport SEXP _TreeTools_first_matching_split_pair(SEXP xSEXP, SEXP tableSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const RawMatrix >::type x(xSEXP);
Rcpp::traits::input_parameter< const RawMatrix >::type table(tableSEXP);
rcpp_result_gen = Rcpp::wrap(first_matching_split_pair(x, table));
return rcpp_result_gen;
END_RCPP
}
// first_matching_split_index
int first_matching_split_index(const RawMatrix x, const RawMatrix table);
RcppExport SEXP _TreeTools_first_matching_split_index(SEXP xSEXP, SEXP tableSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const RawMatrix >::type x(xSEXP);
Rcpp::traits::input_parameter< const RawMatrix >::type table(tableSEXP);
rcpp_result_gen = Rcpp::wrap(first_matching_split_index(x, table));
return rcpp_result_gen;
END_RCPP
}
// num_to_parent
IntegerVector num_to_parent(const IntegerVector n, const IntegerVector nTip);
RcppExport SEXP _TreeTools_num_to_parent(SEXP nSEXP, SEXP nTipSEXP) {
Expand Down Expand Up @@ -458,6 +482,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_TreeTools_descendant_edges", (DL_FUNC) &_TreeTools_descendant_edges, 3},
{"_TreeTools_descendant_edges_single", (DL_FUNC) &_TreeTools_descendant_edges_single, 5},
{"_TreeTools_descendant_tips", (DL_FUNC) &_TreeTools_descendant_tips, 3},
{"_TreeTools_first_matching_split_pair", (DL_FUNC) &_TreeTools_first_matching_split_pair, 2},
{"_TreeTools_first_matching_split_index", (DL_FUNC) &_TreeTools_first_matching_split_index, 2},
{"_TreeTools_num_to_parent", (DL_FUNC) &_TreeTools_num_to_parent, 2},
{"_TreeTools_random_parent", (DL_FUNC) &_TreeTools_random_parent, 2},
{"_TreeTools_edge_to_num", (DL_FUNC) &_TreeTools_edge_to_num, 3},
Expand Down
121 changes: 121 additions & 0 deletions src/first_matching_split.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#include <Rcpp.h>
#include <unordered_map>
#include <string>

using namespace Rcpp;

// Helper: fill `key` with the canonicalized bytes for row `i`
// so that complements map to the same key. We follow the logic pattern
// in duplicated_splits(): choose an orientation based on a sentinel bit,
// and when inverting, fix the spare bits in the last used bin.
static inline void canonicalize_row_key(std::string &key,
const RawMatrix &M,
int i,
int check_bins,
int n_bin,
int n_spare) {
key.resize(check_bins);

// mask with ones in the UNUSED bits of the last bin, when n_spare > 0.
unsigned char unused_mask = 0u;
if (n_spare > 0) {
unsigned int used_mask = (1u << n_spare) - 1u; // low n_spare bits set
unused_mask = static_cast<unsigned char>(~used_mask); // high bits set
}

if (n_spare == 0) {
// Decide orientation by LSB of first bin
const bool keep = (static_cast<unsigned char>(M(i, 0)) & 0x01u) != 0u;
if (keep) {
for (int b = 0; b < check_bins; ++b) key[b] = static_cast<char>(M(i, b));
} else {
for (int b = 0; b < check_bins; ++b)
key[b] = static_cast<char>(~static_cast<unsigned char>(M(i, b)));
}
} else if (n_spare == 1) {
// Decide orientation by non-zero last bin
const bool keep = static_cast<unsigned char>(M(i, n_bin - 1)) != 0u;
if (keep) {
for (int b = 0; b < check_bins; ++b) key[b] = static_cast<char>(M(i, b));
} else {
for (int b = 0; b < check_bins; ++b)
key[b] = static_cast<char>(~static_cast<unsigned char>(M(i, b)));
}
} else {
// Multiple spare bits:
// If LSB of first bin is 1, invert all bins; after inversion,
// fix the last bin's unused bits by XOR with `unused_mask`.
const bool invert = (static_cast<unsigned char>(M(i, 0)) & 0x01u) != 0u;
if (!invert) {
for (int b = 0; b < check_bins; ++b) key[b] = static_cast<char>(M(i, b));
} else {
for (int b = 0; b < check_bins - 1; ++b)
key[b] = static_cast<char>(~static_cast<unsigned char>(M(i, b)));
unsigned char last = static_cast<unsigned char>(~static_cast<unsigned char>(M(i, check_bins - 1)));
last ^= unused_mask; // zero-out the unused bits after inversion
key[check_bins - 1] = static_cast<char>(last);
}
}
}

// [[Rcpp::export]]
IntegerVector first_matching_split_pair(const RawMatrix x, const RawMatrix table) {
// Validate attributes and shapes
if (!x.hasAttribute("nTip") || !table.hasAttribute("nTip"))
stop("Both `x` and `table` must have an `nTip` attribute.");

const int n_tip_x = as<IntegerVector>(x.attr("nTip"))[0];
const int n_tip_t = as<IntegerVector>(table.attr("nTip"))[0];
if (n_tip_x != n_tip_t)
stop("`x` and `table` must have the same number of tips.");

const int n_split_x = x.rows();
const int n_split_t = table.rows();
if (n_split_x == 0 || n_split_t == 0) return IntegerVector::create(0, 0);

const int n_bin_x = x.cols();
const int n_bin_t = table.cols();
if (n_bin_x != n_bin_t)
stop("`x` and `table` have incompatible bin counts.");
const int n_bin = n_bin_x;

// Compute bin layout
const int BIN_SIZE = 8;
const int expected_n_bin = ((n_tip_x - 1) / BIN_SIZE) + 1;
if (expected_n_bin != n_bin)
stop("`nTip` inconsistent with number of bins.");

const int n_spare = n_tip_x % BIN_SIZE;
const int check_bins = n_bin - ((n_spare == 1) ? 1 : 0);

// Build hash from table
std::unordered_map<std::string, int> H;
H.reserve(static_cast<size_t>(n_split_t) * 2u);

std::string key;
key.reserve(static_cast<size_t>(check_bins));

for (int j = 0; j < n_split_t; ++j) {
canonicalize_row_key(key, table, j, check_bins, n_bin, n_spare);
// keep first occurrence
H.emplace(key, j + 1);
}

// Probe with x; return first hit (1-based indices)
for (int i = 0; i < n_split_x; ++i) {
canonicalize_row_key(key, x, i, check_bins, n_bin, n_spare);
auto it = H.find(key);
if (it != H.end()) {
return IntegerVector::create(i + 1, it->second);
}
}

return IntegerVector::create(0, 0);
}

// Convenience: only the index in `x` (0 if none)
// [[Rcpp::export]]
int first_matching_split_index(const RawMatrix x, const RawMatrix table) {
IntegerVector ij = first_matching_split_pair(x, table);
return ij[0];
}
1 change: 1 addition & 0 deletions tests/testthat/test-ClusterTable.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ test_that("ClusterTable with multiple trees", {

test_that("ClusterTable with complex trees", {
skip_if_not_installed("TreeDist", "2.9.2.9000")
skip_on_cran()
library("TreeDist")

# Test exposes failures in C++ - constexpr not playing nicely with Rcpp
Expand Down
34 changes: 34 additions & 0 deletions tests/testthat/test-FirstMatchingSplit.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
test_that("FirstMatchingSplit() fails gracefully", {
bal13 <- BalancedTree(13)
pec13 <- PectinateTree(13)
expect_error(FirstMatchingSplit(bal13, raw(13)),
"Splits")
})

test_that("FirstMatchingSplit() works", {
bal13 <- as.Splits(BalancedTree(13))
pec13 <- as.Splits(PectinateTree(13))

expect_equal(
FirstMatchingSplit(BalancedTree(13), PectinateTree(13), return = "both"),
FirstMatchingSplit(bal13, pec13, return = "both")
)


firstMatch <- which(bal13 %in% pec13)[[1]]
expect_equal(FirstMatchingSplit(bal13, pec13), firstMatch)

expect_equal(
FirstMatchingSplit(bal13, pec13, return = "both"),
c(firstMatch, match(bal13[[firstMatch]], pec13))
)
expect_equal(FirstMatchingSplit(bal13, as.Splits(StarTree(13))), 0)
expect_equal(FirstMatchingSplit(as.Splits(StarTree(13)), bal13, nomatch = NA),
NA_integer_)

# Check robustness to label order
ren13 <- RenumberTips(pec13, TipLabels(13:1))[[-1]]
# Split 1 is t1, t2, t3; not t11, t12, t13
# So first match is split 2 (t1:4).
expect_equal(FirstMatchingSplit(ren13, bal13, return = "both"), c(2, 2))
})
Loading
Loading