From b7428d533307260b5184b16a919a8e4fcce5ce66 Mon Sep 17 00:00:00 2001 From: faithokamoto Date: Wed, 22 Apr 2026 11:12:29 -0700 Subject: [PATCH 1/2] stub in missing transition checker --- src/algorithms/chain_items.cpp | 41 ++++++++++++++++++++++++++++++++++ src/algorithms/chain_items.hpp | 17 ++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/algorithms/chain_items.cpp b/src/algorithms/chain_items.cpp index c84e399ce1..0ff4784a5e 100644 --- a/src/algorithms/chain_items.cpp +++ b/src/algorithms/chain_items.cpp @@ -13,6 +13,7 @@ //#define debug_chaining //#define debug_transition +#define debug_missing_transition //#define debug_dp namespace vg { @@ -262,6 +263,23 @@ transition_iterator zip_tree_transition_iterator(const std::vector missing = \ + find_missing_zip_tree_transitions(seeds, zip_code_tree, max_graph_lookback_bases, + seed_to_starting, seed_to_ending, distance_index, + all_transitions); + if (missing.empty()) { + cerr << "No missing transitions" << endl; + } else { + cerr << "!!Missing transitions:" << endl; + for (const auto& miss : missing) { + cerr << "\t" << miss.from_anchor << " -> " << miss.to_anchor + << " dist " << miss.graph_distance << endl; + } + throw std::runtime_error("Zipcode tree iterator failed to output some transitions"); + } +#endif + std::vector filtered_transitions = calculate_transition_read_distances(all_transitions, to_chain, max_read_lookback_bases); @@ -385,6 +403,29 @@ std::vector generate_zip_tree_transitions( return all_transitions; } +std::vector find_missing_zip_tree_transitions( + const std::vector& seeds, + const ZipCodeTree& zip_code_tree, + size_t max_graph_lookback_bases, + const std::unordered_map& seed_to_starting, + const std::unordered_map& seed_to_ending, + const SnarlDistanceIndex& distance_index, + const std::vector& all_transitions) { + + // {source anchor : {dest anchor : dist}} + std::unordered_map> found; + for (const auto& transition : all_transitions) { + if (!found.count(transition.from_anchor)) { + found[transition.from_anchor] = std::unordered_map(); + } + found[transition.from_anchor][transition.to_anchor] = transition.graph_distance; + } + + std::vector missing; + + return missing; +} + std::vector calculate_transition_read_distances( const std::vector& all_transitions, const VectorView& to_chain, diff --git a/src/algorithms/chain_items.hpp b/src/algorithms/chain_items.hpp index 9ac7b792e0..29ce1462bb 100644 --- a/src/algorithms/chain_items.hpp +++ b/src/algorithms/chain_items.hpp @@ -477,6 +477,23 @@ std::vector generate_zip_tree_transitions( const std::unordered_map& seed_to_starting, const std::unordered_map& seed_to_ending); +/** + * Check if all possible transitions were actually found. + * + * Iterates over all pairs of seeds and uses the distance index + * to determine if there SHOULD have been a transition. + * + * Returns failed transitions. + */ +std::vector find_missing_zip_tree_transitions( + const std::vector& seeds, + const ZipCodeTree& zip_code_tree, + size_t max_graph_lookback_bases, + const std::unordered_map& seed_to_starting, + const std::unordered_map& seed_to_ending, + const SnarlDistanceIndex& distance_index, + const std::vector& all_transitions); + /** * Calculate read distances for each of the zip tree's transitions. * Also filters out transitions that can't be used, From 02b4506bb19cd3ca8521ddb87b7eb09f9c4129da Mon Sep 17 00:00:00 2001 From: faithokamoto Date: Wed, 22 Apr 2026 15:42:32 -0700 Subject: [PATCH 2/2] fill in distance checker --- src/algorithms/chain_items.cpp | 95 +++++++++++++++++++++++++++++----- src/algorithms/chain_items.hpp | 4 +- 2 files changed, 84 insertions(+), 15 deletions(-) diff --git a/src/algorithms/chain_items.cpp b/src/algorithms/chain_items.cpp index 0ff4784a5e..8bd5a15d53 100644 --- a/src/algorithms/chain_items.cpp +++ b/src/algorithms/chain_items.cpp @@ -13,7 +13,7 @@ //#define debug_chaining //#define debug_transition -#define debug_missing_transition +//#define debug_missing_transition //#define debug_dp namespace vg { @@ -264,19 +264,14 @@ transition_iterator zip_tree_transition_iterator(const std::vector missing = \ + bool has_missing = \ find_missing_zip_tree_transitions(seeds, zip_code_tree, max_graph_lookback_bases, seed_to_starting, seed_to_ending, distance_index, all_transitions); - if (missing.empty()) { - cerr << "No missing transitions" << endl; - } else { - cerr << "!!Missing transitions:" << endl; - for (const auto& miss : missing) { - cerr << "\t" << miss.from_anchor << " -> " << miss.to_anchor - << " dist " << miss.graph_distance << endl; - } + if (has_missing) { throw std::runtime_error("Zipcode tree iterator failed to output some transitions"); + } else { + cerr << "No missing transitions" << endl; } #endif @@ -403,7 +398,7 @@ std::vector generate_zip_tree_transitions( return all_transitions; } -std::vector find_missing_zip_tree_transitions( +bool find_missing_zip_tree_transitions( const std::vector& seeds, const ZipCodeTree& zip_code_tree, size_t max_graph_lookback_bases, @@ -415,15 +410,89 @@ std::vector find_missing_zip_tree_transitions( // {source anchor : {dest anchor : dist}} std::unordered_map> found; for (const auto& transition : all_transitions) { + size_t dist_to_save = transition.graph_distance; if (!found.count(transition.from_anchor)) { found[transition.from_anchor] = std::unordered_map(); } + if (found[transition.from_anchor].count(transition.to_anchor)) { + // If a transition appears multiple times, remember the min + dist_to_save = std::min(transition.graph_distance, + found[transition.from_anchor][transition.to_anchor]); + } found[transition.from_anchor][transition.to_anchor] = transition.graph_distance; } - std::vector missing; + bool has_missing = false; + + // Helper function to check for a distance between two seeds + auto check_distance = [&] (const ZipCodeTree::oriented_seed_t& from_seed, bool rev_from, + const ZipCodeTree::oriented_seed_t& to_seed, bool rev_to) { + // XOR to get appropriate orientations + rev_from ^= from_seed.is_reversed; + rev_to ^= to_seed.is_reversed; + if (rev_from != rev_to) { + // Cannot be compared; incompatible orientations + return; + } + + // Look up appropriate anchors + auto from_anchor_itr = rev_from ? seed_to_starting.find(from_seed.seed) + : seed_to_ending.find(from_seed.seed); + if ((rev_from && from_anchor_itr == seed_to_starting.end()) + || (!rev_from && from_anchor_itr == seed_to_ending.end())) { + // No anchor exists + return; + } + auto to_anchor_itr = rev_to ? seed_to_ending.find(to_seed.seed) + : seed_to_starting.find(to_seed.seed); + if ((rev_to && to_anchor_itr == seed_to_ending.end()) + || (!rev_to && to_anchor_itr == seed_to_starting.end())) { + // No anchor exists + return; + } + + // Construct seed positions + pos_t from_pos = seeds.at(from_seed.seed).pos; + size_t from_length = distance_index.minimum_length(distance_index.get_node_net_handle(id(from_pos))); + from_pos = rev_from ? reverse(from_pos, from_length) + : from_pos; + pos_t to_pos = seeds.at(to_seed.seed).pos; + size_t to_length = distance_index.minimum_length(distance_index.get_node_net_handle(id(to_pos))); + to_pos = rev_to ? reverse(to_pos, to_length) + : to_pos; + + // Look up true minimum distance + size_t true_distance = minimum_nontrivial_distance(distance_index, from_pos, to_pos); + if (true_distance <= max_graph_lookback_bases) { + // We should've found this transition + auto from_anchor = from_anchor_itr->second; + auto to_anchor = to_anchor_itr->second; + if (!found.count(from_anchor) + || !found[from_anchor].count(to_anchor) + || found[from_anchor][to_anchor] != true_distance) { + has_missing = true; + cerr << "Missing transition " << from_pos << "->" + << to_pos << " dist " << true_distance << endl; + } + } + }; + + vector tree_seeds = zip_code_tree.get_all_seeds(); + for (size_t i = 0; i < tree_seeds.size(); i++) { + // Check self-loops + check_distance(tree_seeds[i], false, tree_seeds[i], false); + check_distance(tree_seeds[i], false, tree_seeds[i], true); + check_distance(tree_seeds[i], true, tree_seeds[i], false); + for (size_t j = i + 1; j < tree_seeds.size(); j++) { + // Check all orientation pairs + check_distance(tree_seeds[i], false, tree_seeds[j], false); + check_distance(tree_seeds[i], false, tree_seeds[j], true); + check_distance(tree_seeds[i], true, tree_seeds[j], false); + check_distance(tree_seeds[i], true, tree_seeds[j], true); + } + } - return missing; + return has_missing; } std::vector calculate_transition_read_distances( diff --git a/src/algorithms/chain_items.hpp b/src/algorithms/chain_items.hpp index 29ce1462bb..f4531d4201 100644 --- a/src/algorithms/chain_items.hpp +++ b/src/algorithms/chain_items.hpp @@ -483,9 +483,9 @@ std::vector generate_zip_tree_transitions( * Iterates over all pairs of seeds and uses the distance index * to determine if there SHOULD have been a transition. * - * Returns failed transitions. + * Returns if any transitions were missing. */ -std::vector find_missing_zip_tree_transitions( +bool find_missing_zip_tree_transitions( const std::vector& seeds, const ZipCodeTree& zip_code_tree, size_t max_graph_lookback_bases,