Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions src/algorithms/chain_items.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

//#define debug_chaining
//#define debug_transition
//#define debug_missing_transition
//#define debug_dp

namespace vg {
Expand Down Expand Up @@ -262,6 +263,18 @@ transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistance
generate_zip_tree_transitions(seeds, zip_code_tree, max_graph_lookback_bases,
seed_to_starting, seed_to_ending);

#ifdef debug_missing_transition
bool has_missing = \
find_missing_zip_tree_transitions(seeds, zip_code_tree, max_graph_lookback_bases,
seed_to_starting, seed_to_ending, distance_index,
all_transitions);
if (has_missing) {
throw std::runtime_error("Zipcode tree iterator failed to output some transitions");
} else {
cerr << "No missing transitions" << endl;
}
#endif

std::vector<transition_info> filtered_transitions =
calculate_transition_read_distances(all_transitions, to_chain, max_read_lookback_bases);

Expand Down Expand Up @@ -385,6 +398,103 @@ std::vector<transition_info> generate_zip_tree_transitions(
return all_transitions;
}

bool find_missing_zip_tree_transitions(
const std::vector<SnarlDistanceIndexClusterer::Seed>& seeds,
const ZipCodeTree& zip_code_tree,
size_t max_graph_lookback_bases,
const std::unordered_map<size_t, size_t>& seed_to_starting,
const std::unordered_map<size_t, size_t>& seed_to_ending,
const SnarlDistanceIndex& distance_index,
const std::vector<transition_info>& all_transitions) {

// {source anchor : {dest anchor : dist}}
std::unordered_map<size_t, std::unordered_map<size_t, size_t>> found;
for (const auto& transition : all_transitions) {
size_t dist_to_save = transition.graph_distance;
if (!found.count(transition.from_anchor)) {
found[transition.from_anchor] = std::unordered_map<size_t, size_t>();
}
if (found[transition.from_anchor].count(transition.to_anchor)) {
// If a transition appears multiple times, remember the min
dist_to_save = std::min(transition.graph_distance,
found[transition.from_anchor][transition.to_anchor]);
}
found[transition.from_anchor][transition.to_anchor] = transition.graph_distance;
}

bool has_missing = false;

// Helper function to check for a distance between two seeds
auto check_distance = [&] (const ZipCodeTree::oriented_seed_t& from_seed, bool rev_from,
const ZipCodeTree::oriented_seed_t& to_seed, bool rev_to) {
// XOR to get appropriate orientations
rev_from ^= from_seed.is_reversed;
rev_to ^= to_seed.is_reversed;
if (rev_from != rev_to) {
// Cannot be compared; incompatible orientations
return;
}

// Look up appropriate anchors
auto from_anchor_itr = rev_from ? seed_to_starting.find(from_seed.seed)
: seed_to_ending.find(from_seed.seed);
if ((rev_from && from_anchor_itr == seed_to_starting.end())
|| (!rev_from && from_anchor_itr == seed_to_ending.end())) {
// No anchor exists
return;
}
auto to_anchor_itr = rev_to ? seed_to_ending.find(to_seed.seed)
: seed_to_starting.find(to_seed.seed);
if ((rev_to && to_anchor_itr == seed_to_ending.end())
|| (!rev_to && to_anchor_itr == seed_to_starting.end())) {
// No anchor exists
return;
}

// Construct seed positions
pos_t from_pos = seeds.at(from_seed.seed).pos;
size_t from_length = distance_index.minimum_length(distance_index.get_node_net_handle(id(from_pos)));
from_pos = rev_from ? reverse(from_pos, from_length)
: from_pos;
pos_t to_pos = seeds.at(to_seed.seed).pos;
size_t to_length = distance_index.minimum_length(distance_index.get_node_net_handle(id(to_pos)));
to_pos = rev_to ? reverse(to_pos, to_length)
: to_pos;

// Look up true minimum distance
size_t true_distance = minimum_nontrivial_distance(distance_index, from_pos, to_pos);
if (true_distance <= max_graph_lookback_bases) {
// We should've found this transition
auto from_anchor = from_anchor_itr->second;
auto to_anchor = to_anchor_itr->second;
if (!found.count(from_anchor)
|| !found[from_anchor].count(to_anchor)
|| found[from_anchor][to_anchor] != true_distance) {
has_missing = true;
cerr << "Missing transition " << from_pos << "->"
<< to_pos << " dist " << true_distance << endl;
}
}
};

vector<ZipCodeTree::oriented_seed_t> tree_seeds = zip_code_tree.get_all_seeds();
for (size_t i = 0; i < tree_seeds.size(); i++) {
// Check self-loops
check_distance(tree_seeds[i], false, tree_seeds[i], false);
check_distance(tree_seeds[i], false, tree_seeds[i], true);
check_distance(tree_seeds[i], true, tree_seeds[i], false);
for (size_t j = i + 1; j < tree_seeds.size(); j++) {
// Check all orientation pairs
check_distance(tree_seeds[i], false, tree_seeds[j], false);
check_distance(tree_seeds[i], false, tree_seeds[j], true);
check_distance(tree_seeds[i], true, tree_seeds[j], false);
check_distance(tree_seeds[i], true, tree_seeds[j], true);
}
}

return has_missing;
}

std::vector<transition_info> calculate_transition_read_distances(
const std::vector<transition_info>& all_transitions,
const VectorView<Anchor>& to_chain,
Expand Down
17 changes: 17 additions & 0 deletions src/algorithms/chain_items.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,23 @@ std::vector<transition_info> generate_zip_tree_transitions(
const std::unordered_map<size_t, size_t>& seed_to_starting,
const std::unordered_map<size_t, size_t>& seed_to_ending);

/**
* Check if all possible transitions were actually found.
*
* Iterates over all pairs of seeds and uses the distance index
* to determine if there SHOULD have been a transition.
*
* Returns if any transitions were missing.
*/
bool find_missing_zip_tree_transitions(
const std::vector<SnarlDistanceIndexClusterer::Seed>& seeds,
const ZipCodeTree& zip_code_tree,
size_t max_graph_lookback_bases,
const std::unordered_map<size_t, size_t>& seed_to_starting,
const std::unordered_map<size_t, size_t>& seed_to_ending,
const SnarlDistanceIndex& distance_index,
const std::vector<transition_info>& all_transitions);

/**
* Calculate read distances for each of the zip tree's transitions.
* Also filters out transitions that can't be used,
Expand Down
Loading