From b81e331a8d4d7b19286530e521614e587e17ec73 Mon Sep 17 00:00:00 2001
From: Zia <194475824+electricEpilith@users.noreply.github.com>
Date: Fri, 3 Apr 2026 09:39:37 -0700
Subject: [PATCH 1/8] add (a substantial amount of) instrumentation for vg
 giraffe planned out by Claude Opus 4.6

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/funnel.cpp                       |   4 +
 src/funnel.hpp                       |   4 +
 src/giraffe_stats.cpp                | 200 +++++++++++++++++++++++++++
 src/giraffe_stats.hpp                |  73 ++++++++++
 src/minimizer_mapper.cpp             |  17 ++-
 src/minimizer_mapper.hpp             |   7 +
 src/minimizer_mapper_from_chains.cpp |  17 ++-
 src/subcommand/giraffe_main.cpp      |  29 +++-
 8 files changed, 343 insertions(+), 8 deletions(-)
 create mode 100644 src/giraffe_stats.cpp
 create mode 100644 src/giraffe_stats.hpp
diff --git a/src/funnel.cpp b/src/funnel.cpp
index ebf796b3aaf..06cec8fe9d2 100644
--- a/src/funnel.cpp
+++ b/src/funnel.cpp
@@ -428,6 +428,10 @@ size_t Funnel::latest() const {
     return stages.back().items.size() - 1;
 }
 
+double Funnel::total_seconds() const {
+    return chrono::duration_cast<chrono::duration<double>>(stop_time - start_time).count();
+}
+
 void Funnel::for_each_stage(const function<void(const string&, const vector<size_t>&, const vector<double>&, const vector<double>&, const double&, const std::unordered_map<std::string, double>&)>& callback) const {
     for (auto& stage : stages) {
         // Make a vector of item sizes
diff --git a/src/funnel.hpp b/src/funnel.hpp
index 7c0add6a4df..7a08d524d28 100644
--- a/src/funnel.hpp
+++ b/src/funnel.hpp
@@ -223,6 +223,10 @@ class Funnel {
 
     /// Get the index of the most recent item created in the current stage.
     size_t latest() const;
+
+    /// Get the total elapsed seconds from start() to stop().
+    /// Only valid after stop() has been called.
+    double total_seconds() const;
     
     /// Call the given callback with stage name, a vector of result item sizes
     /// at that stage, a vector of correct item scores at that stage (if any),
diff --git a/src/giraffe_stats.cpp b/src/giraffe_stats.cpp
new file mode 100644
index 00000000000..3e0b5129bce
--- /dev/null
+++ b/src/giraffe_stats.cpp
@@ -0,0 +1,200 @@
+/**
+ * \file giraffe_stats.cpp
+ */
+
+#include "giraffe_stats.hpp"
+
+#include <omp.h>
+#include <algorithm>
+#include <iomanip>
+#include <numeric>
+#include <sstream>
+#include <iostream>
+
+namespace vg {
+
+GiraffeStats::GiraffeStats(size_t thread_count, double slow_threshold_s)
+    : per_thread(thread_count), slow_threshold_s(slow_threshold_s) {}
+
+void GiraffeStats::record_read(const Funnel& funnel, const std::string& read_name) {
+    int tid = omp_get_thread_num();
+    ThreadData& td = per_thread.at(tid);
+
+    double total = funnel.total_seconds();
+    td.read_durations.push_back(total);
+
+    bool is_slow = slow_threshold_s > 0.0 && total > slow_threshold_s;
+    if (is_slow) {
+        td.slow_read_count++;
+    }
+
+    // Accumulate per-stage data and optionally build the slow-read message.
+    std::ostringstream slow_msg;
+    if (is_slow) {
+        slow_msg << "warning[vg::Giraffe]: Slow read \"" << read_name
+                 << "\" took " << std::fixed << std::setprecision(3)
+                 << total << "s:\n";
+    }
+
+    funnel.for_each_stage([&](const std::string& stage,
+                               const std::vector<size_t>& result_sizes,
+                               const std::vector<double>& /*correct_scores*/,
+                               const std::vector<double>& /*noncorrect_scores*/,
+                               const double& duration,
+                               const std::unordered_map<std::string, double>& sub_durations) {
+        // Record into per-thread storage.
+        if (!td.stage_durations.count(stage)) {
+            td.stage_order.push_back(stage);
+        }
+        td.stage_durations[stage].push_back(duration);
+        td.stage_item_counts[stage].push_back(result_sizes.size());
+
+        for (auto& kv : sub_durations) {
+            std::string key = stage + "/" + kv.first;
+            td.substage_durations[key].push_back(kv.second);
+        }
+
+        if (is_slow) {
+            slow_msg << "  " << stage << ": "
+                     << std::fixed << std::setprecision(3) << duration << "s"
+                     << " (" << result_sizes.size() << " items)\n";
+            for (auto& kv : sub_durations) {
+                slow_msg << "    " << kv.first << ": "
+                         << std::fixed << std::setprecision(3) << kv.second << "s\n";
+            }
+        }
+    });
+
+    if (is_slow) {
+        #pragma omp critical (cerr)
+        std::cerr << slow_msg.str() << std::flush;
+    }
+}
+
+double GiraffeStats::percentile(const std::vector<double>& sorted, double p) {
+    if (sorted.empty()) return 0.0;
+    if (sorted.size() == 1) return sorted[0];
+    double idx = p * (sorted.size() - 1);
+    size_t lo = (size_t)idx;
+    size_t hi = lo + 1;
+    if (hi >= sorted.size()) return sorted.back();
+    double frac = idx - lo;
+    return sorted[lo] * (1.0 - frac) + sorted[hi] * frac;
+}
+
+void GiraffeStats::print_summary(std::ostream& out) const {
+    // Merge all thread data.
+    // Use the stage order from thread 0 (or whichever thread saw stages first),
+    // then append any stages seen by other threads.
+    std::vector<std::string> stage_order;
+    std::unordered_map<std::string, std::vector<double>> stage_durations;
+    std::unordered_map<std::string, std::vector<size_t>> stage_item_counts;
+    std::unordered_map<std::string, std::vector<double>> substage_durations;
+    std::vector<double> read_durations;
+    size_t slow_read_count = 0;
+    size_t total_reads = 0;
+
+    for (auto& td : per_thread) {
+        // Merge stage order (preserve first-seen ordering).
+        for (auto& s : td.stage_order) {
+            if (!stage_durations.count(s)) {
+                stage_order.push_back(s);
+            }
+        }
+        for (auto& kv : td.stage_durations) {
+            auto& dst = stage_durations[kv.first];
+            dst.insert(dst.end(), kv.second.begin(), kv.second.end());
+        }
+        for (auto& kv : td.stage_item_counts) {
+            auto& dst = stage_item_counts[kv.first];
+            dst.insert(dst.end(), kv.second.begin(), kv.second.end());
+        }
+        for (auto& kv : td.substage_durations) {
+            auto& dst = substage_durations[kv.first];
+            dst.insert(dst.end(), kv.second.begin(), kv.second.end());
+        }
+        read_durations.insert(read_durations.end(),
+                              td.read_durations.begin(), td.read_durations.end());
+        slow_read_count += td.slow_read_count;
+        total_reads += td.read_durations.size();
+    }
+
+    if (total_reads == 0) return;
+
+    // Sort per-read durations for percentiles.
+    std::vector<double> sorted_reads = read_durations;
+    std::sort(sorted_reads.begin(), sorted_reads.end());
+
+    // Print header.
+    out << "\n=== Giraffe Per-Stage Timing (" << total_reads << " reads) ===\n";
+    out << std::left
+        << std::setw(28) << "Stage"
+        << std::right
+        << std::setw(10) << "Mean(ms)"
+        << std::setw(10) << "P50(ms)"
+        << std::setw(10) << "P95(ms)"
+        << std::setw(10) << "P99(ms)"
+        << std::setw(10) << "Max(ms)"
+        << std::setw(12) << "MeanItems"
+        << std::setw(10) << "MaxItems"
+        << "\n";
+
+    // Helper to print one row.
+    auto print_row = [&](const std::string& label, std::vector<double>& durs,
+                         const std::vector<size_t>* items) {
+        std::sort(durs.begin(), durs.end());
+        double mean_ms = 1000.0 * std::accumulate(durs.begin(), durs.end(), 0.0) / durs.size();
+        double p50_ms  = 1000.0 * percentile(durs, 0.50);
+        double p95_ms  = 1000.0 * percentile(durs, 0.95);
+        double p99_ms  = 1000.0 * percentile(durs, 0.99);
+        double max_ms  = 1000.0 * durs.back();
+
+        out << std::left << std::setw(28) << label << std::right
+            << std::fixed << std::setprecision(2)
+            << std::setw(10) << mean_ms
+            << std::setw(10) << p50_ms
+            << std::setw(10) << p95_ms
+            << std::setw(10) << p99_ms
+            << std::setw(10) << max_ms;
+
+        if (items && !items->empty()) {
+            double mean_items = (double)std::accumulate(items->begin(), items->end(), (size_t)0) / items->size();
+            size_t max_items  = *std::max_element(items->begin(), items->end());
+            out << std::setw(12) << std::setprecision(1) << mean_items
+                << std::setw(10) << max_items;
+        }
+        out << "\n";
+    };
+
+    for (auto& stage : stage_order) {
+        print_row(stage, stage_durations[stage], &stage_item_counts[stage]);
+
+        // Print any substages for this stage, indented.
+        for (auto& kv : substage_durations) {
+            // Key format is "stage/substage".
+            size_t slash = kv.first.find('/');
+            if (slash == std::string::npos) continue;
+            if (kv.first.substr(0, slash) != stage) continue;
+            std::string substage_label = "  " + kv.first.substr(slash + 1);
+            print_row(substage_label, kv.second, nullptr);
+        }
+    }
+
+    // Total read timing.
+    double mean_ms = 1000.0 * std::accumulate(read_durations.begin(), read_durations.end(), 0.0) / total_reads;
+    double p99_ms  = 1000.0 * percentile(sorted_reads, 0.99);
+    double max_ms  = 1000.0 * sorted_reads.back();
+
+    out << "\nTotal per-read: mean=" << std::fixed << std::setprecision(2) << mean_ms
+        << "ms, P99=" << p99_ms << "ms, max=" << max_ms << "ms\n";
+
+    if (slow_threshold_s > 0.0) {
+        out << "Slow reads (>" << slow_threshold_s << "s): "
+            << slow_read_count << " / " << total_reads
+            << " (" << std::fixed << std::setprecision(3)
+            << 100.0 * slow_read_count / total_reads << "%)\n";
+    }
+    out << std::flush;
+}
+
+} // namespace vg
diff --git a/src/giraffe_stats.hpp b/src/giraffe_stats.hpp
new file mode 100644
index 00000000000..bb3316ed975
--- /dev/null
+++ b/src/giraffe_stats.hpp
@@ -0,0 +1,73 @@
+#ifndef VG_GIRAFFE_STATS_HPP_INCLUDED
+#define VG_GIRAFFE_STATS_HPP_INCLUDED
+
+/**
+ * \file giraffe_stats.hpp
+ * Aggregate per-stage timing statistics across reads for the Giraffe mapper.
+ */
+
+#include "funnel.hpp"
+
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <ostream>
+
+namespace vg {
+
+using namespace std;
+
+/**
+ * Thread-safe aggregate statistics collector for the Giraffe mapper.
+ *
+ * Each thread pushes data into its own ThreadData slot (no locking on the hot
+ * path). Summary statistics are computed and printed after all reads are
+ * mapped.
+ *
+ * Also logs a per-stage breakdown to stderr for any individual read whose
+ * total mapping time exceeds a configurable threshold.
+ */
+class GiraffeStats {
+public:
+    GiraffeStats(size_t thread_count, double slow_threshold_s);
+
+    /**
+     * Record timing/item data from one read's Funnel.
+     * Must be called after funnel.stop().
+     * Uses omp_get_thread_num() to select the per-thread slot.
+     */
+    void record_read(const Funnel& funnel, const std::string& read_name);
+
+    /**
+     * Print a summary table of per-stage timing percentiles to out.
+     * Thread-safe to call after all record_read() calls are done.
+     */
+    void print_summary(std::ostream& out) const;
+
+private:
+    struct ThreadData {
+        // Per-stage accumulated durations and item counts.
+        // Stage names appear in insertion order the first time they're seen.
+        std::vector<std::string> stage_order;
+        std::unordered_map<std::string, std::vector<double>> stage_durations;
+        std::unordered_map<std::string, std::vector<size_t>> stage_item_counts;
+
+        // Per-substage accumulated durations, keyed as "stage/substage".
+        std::unordered_map<std::string, std::vector<double>> substage_durations;
+
+        // Total per-read duration.
+        std::vector<double> read_durations;
+
+        size_t slow_read_count = 0;
+    };
+
+    std::vector<ThreadData> per_thread;
+    double slow_threshold_s;
+
+    // Compute percentile p in [0,1] from a sorted vector.
+    static double percentile(const std::vector<double>& sorted, double p);
+};
+
+} // namespace vg
+
+#endif // VG_GIRAFFE_STATS_HPP_INCLUDED
diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp
index c678d3ecdc7..353f823bd80 100644
--- a/src/minimizer_mapper.cpp
+++ b/src/minimizer_mapper.cpp
@@ -4,6 +4,7 @@
  */
 
 #include "minimizer_mapper.hpp"
+#include "giraffe_stats.hpp"
 
 #include "crash.hpp"
 #include "annotation.hpp"
@@ -634,11 +635,16 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
     // Cluster the seeds. Get sets of input seed indexes that go together.
     if (track_provenance) {
         funnel.stage("cluster");
+        funnel.substage("cluster_seeds");
     }
 
     // Find the clusters
     std::vector<Cluster> clusters = clusterer.cluster_seeds(seeds, get_distance_limit(aln.sequence().size()));
-    
+
+    if (track_provenance) {
+        funnel.substage_stop();
+    }
+
 #ifdef debug_validate_clusters
     vector<vector<Cluster>> all_clusters;
     all_clusters.emplace_back(clusters);
@@ -1224,10 +1230,15 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
     
     // Stop this alignment
     funnel.stop();
-    
+
+    // Record aggregate stats and log slow reads if instrumentation is enabled.
+    if (giraffe_stats) {
+        giraffe_stats->record_read(funnel, aln.name());
+    }
+
     // Annotate with whatever's in the funnel
     funnel.annotate_mapped_alignment(mappings[0], track_correctness);
-    
+
     if (track_provenance) {
         if (track_correctness) {
             annotate_with_minimizer_statistics(mappings[0], minimizers, seeds, seeds.size(), 0, funnel);
diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp
index 41a376069e6..cfa8aee47c3 100644
--- a/src/minimizer_mapper.hpp
+++ b/src/minimizer_mapper.hpp
@@ -31,6 +31,9 @@ namespace vg {
 using namespace std;
 using namespace vg::io;
 
+// Forward-declare GiraffeStats so the mapper can hold a pointer without a full include.
+class GiraffeStats;
+
 class MinimizerMapper : public AlignerClient {
     public:
     // Definitions used with minimizer indexes.
@@ -432,6 +435,10 @@ class MinimizerMapper : public AlignerClient {
     static constexpr bool default_show_work = false;
     bool show_work = default_show_work;
 
+    /// If set, collect per-stage aggregate timing statistics and log slow reads.
+    /// Not owned by this object; caller manages lifetime.
+    class GiraffeStats* giraffe_stats = nullptr;
+
     ////How many stdevs from fragment length distr mean do we cluster together?
     static constexpr double default_paired_distance_stdevs = 2.0;
     double paired_distance_stdevs = default_paired_distance_stdevs; 
diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp
index 26c86dce020..ccb0386b98d 100644
--- a/src/minimizer_mapper_from_chains.cpp
+++ b/src/minimizer_mapper_from_chains.cpp
@@ -5,6 +5,7 @@
  */
 
 #include "minimizer_mapper.hpp"
+#include "giraffe_stats.hpp"
 
 #include "annotation.hpp"
 #include "banded_global_aligner.hpp"
@@ -718,6 +719,7 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     
     if (this->track_provenance) {
         funnel.stage("tree");
+        funnel.substage("fill_in_forest");
     }
 
     // Make them into a zip code tree
@@ -725,6 +727,10 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     crash_unless(distance_index);
     zip_code_forest.fill_in_forest(seeds, *distance_index, aln.sequence().size() * zipcode_tree_scale);
 
+    if (this->track_provenance) {
+        funnel.substage_stop();
+    }
+
 #ifdef debug_print_forest
     if (show_work) {
         #pragma omp critical (cerr)
@@ -1004,9 +1010,14 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     // Stop this alignment
     funnel.stop();
 
+    // Record aggregate stats and log slow reads if instrumentation is enabled.
+    if (giraffe_stats) {
+        giraffe_stats->record_read(funnel, aln.name());
+    }
+
     // Annotate with whatever's in the funnel
     funnel.annotate_mapped_alignment(mappings[0], track_correctness);
-    
+
     if (track_provenance) {
         if (track_correctness) {
             annotate_with_minimizer_statistics(mappings[0], minimizers, seeds, seeds.size(), chains.size(), funnel);
@@ -1095,6 +1106,9 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&
     bool do_gapless_extension = aln.sequence().size() <= gapless_extension_limit;
 
     // First score all the zip code trees in the forest by summing the scores of their involved minimizers.
+    if (track_provenance) {
+        funnel.substage("score_trees");
+    }
     vector<double> tree_scores;
     double best_tree_score = 0;
     double second_best_tree_score = 0;
@@ -1151,6 +1165,7 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&
 
 
     if (track_provenance) {
+        funnel.substage_stop();
         funnel.stage("chain");
         funnel.substage("chain");
     }
diff --git a/src/subcommand/giraffe_main.cpp b/src/subcommand/giraffe_main.cpp
index f23fead27ff..b419f91054f 100644
--- a/src/subcommand/giraffe_main.cpp
+++ b/src/subcommand/giraffe_main.cpp
@@ -29,6 +29,7 @@
 #include "../index_registry.hpp"
 #include "../utility.hpp"
 #include "../watchdog.hpp"
+#include "../giraffe_stats.hpp"
 #include "../crash.hpp"
 #include <bdsg/overlays/overlay_helper.hpp>
 
@@ -765,6 +766,7 @@ int main_giraffe(int argc, char** argv) {
     constexpr int OPT_HAPLOTYPE_SAMPLING = 1104;
     constexpr int OPT_NUM_HAPLOTYPES = 1105;
     constexpr int OPT_NO_DIPLOID_SAMPLING = 1106;
+    constexpr int OPT_SLOW_READ_THRESHOLD = 1200;
 
 
     // initialize parameters with their default options
@@ -832,7 +834,11 @@ int main_giraffe(int argc, char** argv) {
     bool track_position = MinimizerMapper::default_track_position;
     // Should we log our mapping decision making?
     bool show_work = MinimizerMapper::default_show_work;
-    
+
+    // Reads longer than this (in seconds) get a per-stage breakdown logged to stderr.
+    // 0 disables per-read slow logging. The aggregate summary is always printed when > 0.
+    double slow_read_threshold = 0.0;
+
     // Should we throw out our alignments instead of outputting them?
     bool discard_alignments = false;
     
@@ -1109,6 +1115,7 @@ int main_giraffe(int argc, char** argv) {
         {"track-correctness", no_argument, 0, OPT_TRACK_CORRECTNESS},
         {"track-position", no_argument, 0, OPT_TRACK_POSITION},
         {"show-work", no_argument, 0, OPT_SHOW_WORK},
+        {"slow-read-threshold", required_argument, 0, OPT_SLOW_READ_THRESHOLD},
         {"threads", required_argument, 0, 't'},
     };
     parser->make_long_options(long_options);
@@ -1362,7 +1369,11 @@ int main_giraffe(int argc, char** argv) {
                 // Also turn on saving explanations
                 Explainer::save_explanations = true;
                 break;
-                
+
+            case OPT_SLOW_READ_THRESHOLD:
+                slow_read_threshold = parse<double>(optarg);
+                break;
+
             case 't':
                 set_thread_count(logger, optarg);
                 break;
@@ -1965,6 +1976,13 @@ int main_giraffe(int argc, char** argv) {
         // Work out the number of threads we will have
         size_t thread_count = omp_get_max_threads();
 
+        // Set up per-stage timing instrumentation if requested.
+        unique_ptr<GiraffeStats> giraffe_stats;
+        if (slow_read_threshold > 0.0) {
+            giraffe_stats.reset(new GiraffeStats(thread_count, slow_read_threshold));
+            minimizer_mapper.giraffe_stats = giraffe_stats.get();
+        }
+
         // Set up counters per-thread for total reads mapped
         vector<size_t> reads_mapped_by_thread(thread_count, 0);
         
@@ -2415,8 +2433,11 @@ int main_giraffe(int argc, char** argv) {
 
             logger.info() << "Memory footprint: " << gbwt::inGigabytes(gbwt::memoryUsage()) << " GB" << endl;
         }
-        
-        
+
+        if (giraffe_stats) {
+            giraffe_stats->print_summary(cerr);
+        }
+
         if (report) {
             // Log output filename and mapping speed in reads/second/thread to report TSV
             report << output_filename << "\t" << reads_per_second_per_thread << endl;

From 528ec4e5f306f8ab543056bc96ffb55a5bd43352 Mon Sep 17 00:00:00 2001
From: Zia <194475824+electricEpilith@users.noreply.github.com>
Date: Fri, 3 Apr 2026 14:50:41 -0700
Subject: [PATCH 2/8] fix abs() errors on Mac

Co-Authored-By: GitHub Copilot <noreply@github.com>
---
 src/multipath_alignment_graph.cpp   | 8 ++++----
 src/subcommand/gampcompare_main.cpp | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/multipath_alignment_graph.cpp b/src/multipath_alignment_graph.cpp
index 6cc2620f914..9038dff6384 100644
--- a/src/multipath_alignment_graph.cpp
+++ b/src/multipath_alignment_graph.cpp
@@ -6505,7 +6505,7 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap
         for (const auto& path_node : path_nodes) {
             for (const auto& edge : path_node.edges) {
                 const auto& next_node = path_nodes[edge.first];
-                shift = max<size_t>(shift, abs<int64_t>((next_node.begin - path_node.end) - edge.second));
+                shift = max<size_t>(shift, std::abs(static_cast<int64_t>((next_node.begin - path_node.end) - edge.second)));
             }
         }
         return shift;
@@ -6529,7 +6529,7 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap
                     ++in_degree[edge.first];
                     
                     const auto& next_node = path_nodes[edge.first];
-                    size_t shift = abs<int64_t>((next_node.begin - path_node.end) - edge.second);
+                    size_t shift = std::abs(static_cast<int64_t>((next_node.begin - path_node.end) - edge.second));
                     
 #ifdef debug_shift_pruning
                     cerr << "shift DP reverse " << i << " <- " << edge.first << " with shift " << shift << " for total " << min_shift_rev[edge.first] + shift << endl;
@@ -6555,7 +6555,7 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap
             else {
                 for (auto& edge : path_node.edges) {
                     const auto& next_node = path_nodes[edge.first];
-                    size_t shift = abs<int64_t>((next_node.begin - path_node.end) - edge.second);
+                    size_t shift = std::abs(static_cast<int64_t>((next_node.begin - path_node.end) - edge.second));
 #ifdef debug_shift_pruning
                     cerr << "shift DP forward " << i << " -> " << edge.first << " with shift " << shift << " for total " << min_shift_fwd[i] + shift << endl;
 #endif
@@ -6577,7 +6577,7 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap
             for (size_t j = 0; j < path_node.edges.size(); ++j) {
                 auto& edge = path_node.edges[j];
                 const auto& next_node = path_nodes[edge.first];
-                size_t shift = abs<int64_t>((next_node.begin - path_node.end) - edge.second);
+                size_t shift = std::abs(static_cast<int64_t>((next_node.begin - path_node.end) - edge.second));
                 
                 size_t min_edge_shift = min_shift_fwd[i] + shift + min_shift_rev[edge.first];
                 
diff --git a/src/subcommand/gampcompare_main.cpp b/src/subcommand/gampcompare_main.cpp
index 72c84c289e3..6730fc8d81a 100644
--- a/src/subcommand/gampcompare_main.cpp
+++ b/src/subcommand/gampcompare_main.cpp
@@ -216,7 +216,7 @@ int main_gampcompare(int argc, char** argv) {
                             if (path_true_positions[i].second == path_mapped_positions[j].second) {
                                 // there is a pair of positions on the same strand of the same path
                                 abs_dist = min<int64_t>(abs_dist,
-                                                        abs<int64_t>(path_true_positions[i].first - path_mapped_positions[j].first));
+                                                        std::abs(static_cast<int64_t>(path_true_positions[i].first - path_mapped_positions[j].first)));
                             }
                         }
                     }

From a3760d19cd29a1fff0624355902f73adb503cd59 Mon Sep 17 00:00:00 2001
From: Zia <194475824+electricEpilith@users.noreply.github.com>
Date: Fri, 3 Apr 2026 19:31:43 -0700
Subject: [PATCH 3/8] additional abs() fix

Co-Authored-By: GitHub Copilot <noreply@github.com>
---
 src/multipath_mapper.cpp | 2 +-
 src/recombinator.cpp     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/multipath_mapper.cpp b/src/multipath_mapper.cpp
index 4632edee8b2..4241fa12e67 100644
--- a/src/multipath_mapper.cpp
+++ b/src/multipath_mapper.cpp
@@ -2448,7 +2448,7 @@ namespace vg {
                     // in the left_idxs and right_idxs vectors
                     int64_t target_len = 2 * seq_len - left_side.clip_length - right_side.clip_length;
                     auto distance_diff = [&](size_t l, size_t r) {
-                        return abs<int64_t>(get<2>(left_sites[left_idxs[l]]) + get<2>(right_sites[right_idxs[r]]) - target_len);
+                        return std::abs(static_cast<int64_t>(get<2>(left_sites[left_idxs[l]]) + get<2>(right_sites[right_idxs[r]]) - target_len));
                     };
                     
                     // sweep to identify pairs that most nearly align
diff --git a/src/recombinator.cpp b/src/recombinator.cpp
index a9aaed4b100..07915118ed2 100644
--- a/src/recombinator.cpp
+++ b/src/recombinator.cpp
@@ -1585,7 +1585,7 @@ void add_path(const gbwt::GBWT& source, gbwt::size_type path_id, gbwt::GBWTBuild
     gbwt::PathName path_name = source.metadata.path(path_id);
     std::string sample_name = source.metadata.sample(path_name.sample);
     std::string contig_name = source.metadata.contig(path_name.contig);
-    if (sample_name == gbwtgraph::REFERENCE_PATH_SAMPLE_NAME) {
+    if (sample_name == gbwtgraph::GENERIC_PATH_SAMPLE_NAME) {
         metadata.add_generic_path(contig_name);
     } else {
         // Reference samples will be copied later.

From 7920c76d12b008882a72149e8b72ec6fcdec48ca Mon Sep 17 00:00:00 2001
From: Zia <194475824+electricEpilith@users.noreply.github.com>
Date: Fri, 3 Apr 2026 20:07:36 -0700
Subject: [PATCH 4/8] minor print changes

Co-Authored-By: GitHub Copilot <noreply@github.com>
---
 src/giraffe_stats.cpp | 15 ++++++---------
 src/giraffe_stats.hpp | 33 ++++++++++++++++++++++++++++++---
 2 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/src/giraffe_stats.cpp b/src/giraffe_stats.cpp
index 3e0b5129bce..88980960bb4 100644
--- a/src/giraffe_stats.cpp
+++ b/src/giraffe_stats.cpp
@@ -6,6 +6,7 @@
 
 #include <omp.h>
 #include <algorithm>
+#include <format>
 #include <iomanip>
 #include <numeric>
 #include <sstream>
@@ -31,9 +32,7 @@ void GiraffeStats::record_read(const Funnel& funnel, const std::string& read_nam
     // Accumulate per-stage data and optionally build the slow-read message.
     std::ostringstream slow_msg;
     if (is_slow) {
-        slow_msg << "warning[vg::Giraffe]: Slow read \"" << read_name
-                 << "\" took " << std::fixed << std::setprecision(3)
-                 << total << "s:\n";
+        slow_msg << std::format("warning[vg::Giraffe]: Slow read \"{}\" took {:.3f}s:\n", read_name, total);
     }
 
     funnel.for_each_stage([&](const std::string& stage,
@@ -55,9 +54,7 @@ void GiraffeStats::record_read(const Funnel& funnel, const std::string& read_nam
         }
 
         if (is_slow) {
-            slow_msg << "  " << stage << ": "
-                     << std::fixed << std::setprecision(3) << duration << "s"
-                     << " (" << result_sizes.size() << " items)\n";
+            slow_msg << std::format("  {}: {:.3f}s ({} items)\n", stage, duration, result_sizes.size());
             for (auto& kv : sub_durations) {
                 slow_msg << "    " << kv.first << ": "
                          << std::fixed << std::setprecision(3) << kv.second << "s\n";
@@ -87,9 +84,9 @@ void GiraffeStats::print_summary(std::ostream& out) const {
     // Use the stage order from thread 0 (or whichever thread saw stages first),
     // then append any stages seen by other threads.
     std::vector<std::string> stage_order;
-    std::unordered_map<std::string, std::vector<double>> stage_durations;
-    std::unordered_map<std::string, std::vector<size_t>> stage_item_counts;
-    std::unordered_map<std::string, std::vector<double>> substage_durations;
+    std::unordered_map<std::string, std::vector<double>, StringHash, StringEqual> stage_durations;
+    std::unordered_map<std::string, std::vector<size_t>, StringHash, StringEqual> stage_item_counts;
+    std::unordered_map<std::string, std::vector<double>, StringHash, StringEqual> substage_durations;
     std::vector<double> read_durations;
     size_t slow_read_count = 0;
     size_t total_reads = 0;
diff --git a/src/giraffe_stats.hpp b/src/giraffe_stats.hpp
index bb3316ed975..125ebdd8b2d 100644
--- a/src/giraffe_stats.hpp
+++ b/src/giraffe_stats.hpp
@@ -17,6 +17,33 @@ namespace vg {
 
 using namespace std;
 
+/**
+ * Transparent hasher for std::string keys in unordered_map.
+ * Allows heterogeneous lookup with string_view and other compatible types.
+ */
+struct StringHash {
+    using is_transparent = void;
+    size_t operator()(std::string_view str) const {
+        return std::hash<std::string_view>()(str);
+    }
+    size_t operator()(const std::string& str) const {
+        return std::hash<std::string>()(str);
+    }
+};
+
+/**
+ * Transparent equality for std::string keys in unordered_map.
+ */
+struct StringEqual {
+    using is_transparent = void;
+    bool operator()(std::string_view lhs, std::string_view rhs) const {
+        return lhs == rhs;
+    }
+    bool operator()(const std::string& lhs, const std::string& rhs) const {
+        return lhs == rhs;
+    }
+};
+
 /**
  * Thread-safe aggregate statistics collector for the Giraffe mapper.
  *
@@ -49,11 +76,11 @@ class GiraffeStats {
         // Per-stage accumulated durations and item counts.
         // Stage names appear in insertion order the first time they're seen.
         std::vector<std::string> stage_order;
-        std::unordered_map<std::string, std::vector<double>> stage_durations;
-        std::unordered_map<std::string, std::vector<size_t>> stage_item_counts;
+        std::unordered_map<std::string, std::vector<double>, StringHash, StringEqual> stage_durations;
+        std::unordered_map<std::string, std::vector<size_t>, StringHash, StringEqual> stage_item_counts;
 
         // Per-substage accumulated durations, keyed as "stage/substage".
-        std::unordered_map<std::string, std::vector<double>> substage_durations;
+        std::unordered_map<std::string, std::vector<double>, StringHash, StringEqual> substage_durations;
 
         // Total per-read duration.
         std::vector<double> read_durations;

From 190469a6f4eb57e1e9fe24850feb634c2cbc72b0 Mon Sep 17 00:00:00 2001
From: Zia Truong <194475824+electricEpilith@users.noreply.github.com>
Date: Fri, 3 Apr 2026 22:39:15 -0700
Subject: [PATCH 5/8] Revert "minor print changes"

This reverts commit 7920c76d12b008882a72149e8b72ec6fcdec48ca.
---
 src/giraffe_stats.cpp | 15 +++++++++------
 src/giraffe_stats.hpp | 33 +++------------------------------
 2 files changed, 12 insertions(+), 36 deletions(-)

diff --git a/src/giraffe_stats.cpp b/src/giraffe_stats.cpp
index 88980960bb4..3e0b5129bce 100644
--- a/src/giraffe_stats.cpp
+++ b/src/giraffe_stats.cpp
@@ -6,7 +6,6 @@
 
 #include <omp.h>
 #include <algorithm>
-#include <format>
 #include <iomanip>
 #include <numeric>
 #include <sstream>
@@ -32,7 +31,9 @@ void GiraffeStats::record_read(const Funnel& funnel, const std::string& read_nam
     // Accumulate per-stage data and optionally build the slow-read message.
     std::ostringstream slow_msg;
     if (is_slow) {
-        slow_msg << std::format("warning[vg::Giraffe]: Slow read \"{}\" took {:.3f}s:\n", read_name, total);
+        slow_msg << "warning[vg::Giraffe]: Slow read \"" << read_name
+                 << "\" took " << std::fixed << std::setprecision(3)
+                 << total << "s:\n";
     }
 
     funnel.for_each_stage([&](const std::string& stage,
@@ -54,7 +55,9 @@ void GiraffeStats::record_read(const Funnel& funnel, const std::string& read_nam
         }
 
         if (is_slow) {
-            slow_msg << std::format("  {}: {:.3f}s ({} items)\n", stage, duration, result_sizes.size());
+            slow_msg << "  " << stage << ": "
+                     << std::fixed << std::setprecision(3) << duration << "s"
+                     << " (" << result_sizes.size() << " items)\n";
             for (auto& kv : sub_durations) {
                 slow_msg << "    " << kv.first << ": "
                          << std::fixed << std::setprecision(3) << kv.second << "s\n";
@@ -84,9 +87,9 @@ void GiraffeStats::print_summary(std::ostream& out) const {
     // Use the stage order from thread 0 (or whichever thread saw stages first),
     // then append any stages seen by other threads.
     std::vector<std::string> stage_order;
-    std::unordered_map<std::string, std::vector<double>, StringHash, StringEqual> stage_durations;
-    std::unordered_map<std::string, std::vector<size_t>, StringHash, StringEqual> stage_item_counts;
-    std::unordered_map<std::string, std::vector<double>, StringHash, StringEqual> substage_durations;
+    std::unordered_map<std::string, std::vector<double>> stage_durations;
+    std::unordered_map<std::string, std::vector<size_t>> stage_item_counts;
+    std::unordered_map<std::string, std::vector<double>> substage_durations;
     std::vector<double> read_durations;
     size_t slow_read_count = 0;
     size_t total_reads = 0;
diff --git a/src/giraffe_stats.hpp b/src/giraffe_stats.hpp
index 125ebdd8b2d..bb3316ed975 100644
--- a/src/giraffe_stats.hpp
+++ b/src/giraffe_stats.hpp
@@ -17,33 +17,6 @@ namespace vg {
 
 using namespace std;
 
-/**
- * Transparent hasher for std::string keys in unordered_map.
- * Allows heterogeneous lookup with string_view and other compatible types.
- */
-struct StringHash {
-    using is_transparent = void;
-    size_t operator()(std::string_view str) const {
-        return std::hash<std::string_view>()(str);
-    }
-    size_t operator()(const std::string& str) const {
-        return std::hash<std::string>()(str);
-    }
-};
-
-/**
- * Transparent equality for std::string keys in unordered_map.
- */
-struct StringEqual {
-    using is_transparent = void;
-    bool operator()(std::string_view lhs, std::string_view rhs) const {
-        return lhs == rhs;
-    }
-    bool operator()(const std::string& lhs, const std::string& rhs) const {
-        return lhs == rhs;
-    }
-};
-
 /**
  * Thread-safe aggregate statistics collector for the Giraffe mapper.
  *
@@ -76,11 +49,11 @@ class GiraffeStats {
         // Per-stage accumulated durations and item counts.
         // Stage names appear in insertion order the first time they're seen.
         std::vector<std::string> stage_order;
-        std::unordered_map<std::string, std::vector<double>, StringHash, StringEqual> stage_durations;
-        std::unordered_map<std::string, std::vector<size_t>, StringHash, StringEqual> stage_item_counts;
+        std::unordered_map<std::string, std::vector<double>> stage_durations;
+        std::unordered_map<std::string, std::vector<size_t>> stage_item_counts;
 
         // Per-substage accumulated durations, keyed as "stage/substage".
-        std::unordered_map<std::string, std::vector<double>, StringHash, StringEqual> substage_durations;
+        std::unordered_map<std::string, std::vector<double>> substage_durations;
 
         // Total per-read duration.
         std::vector<double> read_durations;

From 3e573bd54129801c6f7f1ff31c697e815500f887 Mon Sep 17 00:00:00 2001
From: Zia Truong <194475824+electricEpilith@users.noreply.github.com>
Date: Fri, 3 Apr 2026 22:40:54 -0700
Subject: [PATCH 6/8] Revert "add (a substantial amount of) instrumentation for
 vg giraffe"

This reverts commit b81e331a8d4d7b19286530e521614e587e17ec73.
---
 src/funnel.cpp                       |   4 -
 src/funnel.hpp                       |   4 -
 src/giraffe_stats.cpp                | 200 ---------------------------
 src/giraffe_stats.hpp                |  73 ----------
 src/minimizer_mapper.cpp             |  17 +--
 src/minimizer_mapper.hpp             |   7 -
 src/minimizer_mapper_from_chains.cpp |  17 +--
 src/subcommand/giraffe_main.cpp      |  29 +---
 8 files changed, 8 insertions(+), 343 deletions(-)
 delete mode 100644 src/giraffe_stats.cpp
 delete mode 100644 src/giraffe_stats.hpp

diff --git a/src/funnel.cpp b/src/funnel.cpp
index 06cec8fe9d2..ebf796b3aaf 100644
--- a/src/funnel.cpp
+++ b/src/funnel.cpp
@@ -428,10 +428,6 @@ size_t Funnel::latest() const {
     return stages.back().items.size() - 1;
 }
 
-double Funnel::total_seconds() const {
-    return chrono::duration_cast<chrono::duration<double>>(stop_time - start_time).count();
-}
-
 void Funnel::for_each_stage(const function<void(const string&, const vector<size_t>&, const vector<double>&, const vector<double>&, const double&, const std::unordered_map<std::string, double>&)>& callback) const {
     for (auto& stage : stages) {
         // Make a vector of item sizes
diff --git a/src/funnel.hpp b/src/funnel.hpp
index 7a08d524d28..7c0add6a4df 100644
--- a/src/funnel.hpp
+++ b/src/funnel.hpp
@@ -223,10 +223,6 @@ class Funnel {
 
     /// Get the index of the most recent item created in the current stage.
     size_t latest() const;
-
-    /// Get the total elapsed seconds from start() to stop().
-    /// Only valid after stop() has been called.
-    double total_seconds() const;
     
     /// Call the given callback with stage name, a vector of result item sizes
     /// at that stage, a vector of correct item scores at that stage (if any),
diff --git a/src/giraffe_stats.cpp b/src/giraffe_stats.cpp
deleted file mode 100644
index 3e0b5129bce..00000000000
--- a/src/giraffe_stats.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-/**
- * \file giraffe_stats.cpp
- */
-
-#include "giraffe_stats.hpp"
-
-#include <omp.h>
-#include <algorithm>
-#include <iomanip>
-#include <numeric>
-#include <sstream>
-#include <iostream>
-
-namespace vg {
-
-GiraffeStats::GiraffeStats(size_t thread_count, double slow_threshold_s)
-    : per_thread(thread_count), slow_threshold_s(slow_threshold_s) {}
-
-void GiraffeStats::record_read(const Funnel& funnel, const std::string& read_name) {
-    int tid = omp_get_thread_num();
-    ThreadData& td = per_thread.at(tid);
-
-    double total = funnel.total_seconds();
-    td.read_durations.push_back(total);
-
-    bool is_slow = slow_threshold_s > 0.0 && total > slow_threshold_s;
-    if (is_slow) {
-        td.slow_read_count++;
-    }
-
-    // Accumulate per-stage data and optionally build the slow-read message.
-    std::ostringstream slow_msg;
-    if (is_slow) {
-        slow_msg << "warning[vg::Giraffe]: Slow read \"" << read_name
-                 << "\" took " << std::fixed << std::setprecision(3)
-                 << total << "s:\n";
-    }
-
-    funnel.for_each_stage([&](const std::string& stage,
-                               const std::vector<size_t>& result_sizes,
-                               const std::vector<double>& /*correct_scores*/,
-                               const std::vector<double>& /*noncorrect_scores*/,
-                               const double& duration,
-                               const std::unordered_map<std::string, double>& sub_durations) {
-        // Record into per-thread storage.
-        if (!td.stage_durations.count(stage)) {
-            td.stage_order.push_back(stage);
-        }
-        td.stage_durations[stage].push_back(duration);
-        td.stage_item_counts[stage].push_back(result_sizes.size());
-
-        for (auto& kv : sub_durations) {
-            std::string key = stage + "/" + kv.first;
-            td.substage_durations[key].push_back(kv.second);
-        }
-
-        if (is_slow) {
-            slow_msg << "  " << stage << ": "
-                     << std::fixed << std::setprecision(3) << duration << "s"
-                     << " (" << result_sizes.size() << " items)\n";
-            for (auto& kv : sub_durations) {
-                slow_msg << "    " << kv.first << ": "
-                         << std::fixed << std::setprecision(3) << kv.second << "s\n";
-            }
-        }
-    });
-
-    if (is_slow) {
-        #pragma omp critical (cerr)
-        std::cerr << slow_msg.str() << std::flush;
-    }
-}
-
-double GiraffeStats::percentile(const std::vector<double>& sorted, double p) {
-    if (sorted.empty()) return 0.0;
-    if (sorted.size() == 1) return sorted[0];
-    double idx = p * (sorted.size() - 1);
-    size_t lo = (size_t)idx;
-    size_t hi = lo + 1;
-    if (hi >= sorted.size()) return sorted.back();
-    double frac = idx - lo;
-    return sorted[lo] * (1.0 - frac) + sorted[hi] * frac;
-}
-
-void GiraffeStats::print_summary(std::ostream& out) const {
-    // Merge all thread data.
-    // Use the stage order from thread 0 (or whichever thread saw stages first),
-    // then append any stages seen by other threads.
-    std::vector<std::string> stage_order;
-    std::unordered_map<std::string, std::vector<double>> stage_durations;
-    std::unordered_map<std::string, std::vector<size_t>> stage_item_counts;
-    std::unordered_map<std::string, std::vector<double>> substage_durations;
-    std::vector<double> read_durations;
-    size_t slow_read_count = 0;
-    size_t total_reads = 0;
-
-    for (auto& td : per_thread) {
-        // Merge stage order (preserve first-seen ordering).
-        for (auto& s : td.stage_order) {
-            if (!stage_durations.count(s)) {
-                stage_order.push_back(s);
-            }
-        }
-        for (auto& kv : td.stage_durations) {
-            auto& dst = stage_durations[kv.first];
-            dst.insert(dst.end(), kv.second.begin(), kv.second.end());
-        }
-        for (auto& kv : td.stage_item_counts) {
-            auto& dst = stage_item_counts[kv.first];
-            dst.insert(dst.end(), kv.second.begin(), kv.second.end());
-        }
-        for (auto& kv : td.substage_durations) {
-            auto& dst = substage_durations[kv.first];
-            dst.insert(dst.end(), kv.second.begin(), kv.second.end());
-        }
-        read_durations.insert(read_durations.end(),
-                              td.read_durations.begin(), td.read_durations.end());
-        slow_read_count += td.slow_read_count;
-        total_reads += td.read_durations.size();
-    }
-
-    if (total_reads == 0) return;
-
-    // Sort per-read durations for percentiles.
-    std::vector<double> sorted_reads = read_durations;
-    std::sort(sorted_reads.begin(), sorted_reads.end());
-
-    // Print header.
-    out << "\n=== Giraffe Per-Stage Timing (" << total_reads << " reads) ===\n";
-    out << std::left
-        << std::setw(28) << "Stage"
-        << std::right
-        << std::setw(10) << "Mean(ms)"
-        << std::setw(10) << "P50(ms)"
-        << std::setw(10) << "P95(ms)"
-        << std::setw(10) << "P99(ms)"
-        << std::setw(10) << "Max(ms)"
-        << std::setw(12) << "MeanItems"
-        << std::setw(10) << "MaxItems"
-        << "\n";
-
-    // Helper to print one row.
-    auto print_row = [&](const std::string& label, std::vector<double>& durs,
-                         const std::vector<size_t>* items) {
-        std::sort(durs.begin(), durs.end());
-        double mean_ms = 1000.0 * std::accumulate(durs.begin(), durs.end(), 0.0) / durs.size();
-        double p50_ms  = 1000.0 * percentile(durs, 0.50);
-        double p95_ms  = 1000.0 * percentile(durs, 0.95);
-        double p99_ms  = 1000.0 * percentile(durs, 0.99);
-        double max_ms  = 1000.0 * durs.back();
-
-        out << std::left << std::setw(28) << label << std::right
-            << std::fixed << std::setprecision(2)
-            << std::setw(10) << mean_ms
-            << std::setw(10) << p50_ms
-            << std::setw(10) << p95_ms
-            << std::setw(10) << p99_ms
-            << std::setw(10) << max_ms;
-
-        if (items && !items->empty()) {
-            double mean_items = (double)std::accumulate(items->begin(), items->end(), (size_t)0) / items->size();
-            size_t max_items  = *std::max_element(items->begin(), items->end());
-            out << std::setw(12) << std::setprecision(1) << mean_items
-                << std::setw(10) << max_items;
-        }
-        out << "\n";
-    };
-
-    for (auto& stage : stage_order) {
-        print_row(stage, stage_durations[stage], &stage_item_counts[stage]);
-
-        // Print any substages for this stage, indented.
-        for (auto& kv : substage_durations) {
-            // Key format is "stage/substage".
-            size_t slash = kv.first.find('/');
-            if (slash == std::string::npos) continue;
-            if (kv.first.substr(0, slash) != stage) continue;
-            std::string substage_label = "  " + kv.first.substr(slash + 1);
-            print_row(substage_label, kv.second, nullptr);
-        }
-    }
-
-    // Total read timing.
-    double mean_ms = 1000.0 * std::accumulate(read_durations.begin(), read_durations.end(), 0.0) / total_reads;
-    double p99_ms  = 1000.0 * percentile(sorted_reads, 0.99);
-    double max_ms  = 1000.0 * sorted_reads.back();
-
-    out << "\nTotal per-read: mean=" << std::fixed << std::setprecision(2) << mean_ms
-        << "ms, P99=" << p99_ms << "ms, max=" << max_ms << "ms\n";
-
-    if (slow_threshold_s > 0.0) {
-        out << "Slow reads (>" << slow_threshold_s << "s): "
-            << slow_read_count << " / " << total_reads
-            << " (" << std::fixed << std::setprecision(3)
-            << 100.0 * slow_read_count / total_reads << "%)\n";
-    }
-    out << std::flush;
-}
-
-} // namespace vg
diff --git a/src/giraffe_stats.hpp b/src/giraffe_stats.hpp
deleted file mode 100644
index bb3316ed975..00000000000
--- a/src/giraffe_stats.hpp
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef VG_GIRAFFE_STATS_HPP_INCLUDED
-#define VG_GIRAFFE_STATS_HPP_INCLUDED
-
-/**
- * \file giraffe_stats.hpp
- * Aggregate per-stage timing statistics across reads for the Giraffe mapper.
- */
-
-#include "funnel.hpp"
-
-#include <string>
-#include <vector>
-#include <unordered_map>
-#include <ostream>
-
-namespace vg {
-
-using namespace std;
-
-/**
- * Thread-safe aggregate statistics collector for the Giraffe mapper.
- *
- * Each thread pushes data into its own ThreadData slot (no locking on the hot
- * path). Summary statistics are computed and printed after all reads are
- * mapped.
- *
- * Also logs a per-stage breakdown to stderr for any individual read whose
- * total mapping time exceeds a configurable threshold.
- */
-class GiraffeStats {
-public:
-    GiraffeStats(size_t thread_count, double slow_threshold_s);
-
-    /**
-     * Record timing/item data from one read's Funnel.
-     * Must be called after funnel.stop().
-     * Uses omp_get_thread_num() to select the per-thread slot.
-     */
-    void record_read(const Funnel& funnel, const std::string& read_name);
-
-    /**
-     * Print a summary table of per-stage timing percentiles to out.
-     * Thread-safe to call after all record_read() calls are done.
-     */
-    void print_summary(std::ostream& out) const;
-
-private:
-    struct ThreadData {
-        // Per-stage accumulated durations and item counts.
-        // Stage names appear in insertion order the first time they're seen.
-        std::vector<std::string> stage_order;
-        std::unordered_map<std::string, std::vector<double>> stage_durations;
-        std::unordered_map<std::string, std::vector<size_t>> stage_item_counts;
-
-        // Per-substage accumulated durations, keyed as "stage/substage".
-        std::unordered_map<std::string, std::vector<double>> substage_durations;
-
-        // Total per-read duration.
-        std::vector<double> read_durations;
-
-        size_t slow_read_count = 0;
-    };
-
-    std::vector<ThreadData> per_thread;
-    double slow_threshold_s;
-
-    // Compute percentile p in [0,1] from a sorted vector.
-    static double percentile(const std::vector<double>& sorted, double p);
-};
-
-} // namespace vg
-
-#endif // VG_GIRAFFE_STATS_HPP_INCLUDED
diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp
index 353f823bd80..c678d3ecdc7 100644
--- a/src/minimizer_mapper.cpp
+++ b/src/minimizer_mapper.cpp
@@ -4,7 +4,6 @@
  */
 
 #include "minimizer_mapper.hpp"
-#include "giraffe_stats.hpp"
 
 #include "crash.hpp"
 #include "annotation.hpp"
@@ -635,16 +634,11 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
     // Cluster the seeds. Get sets of input seed indexes that go together.
     if (track_provenance) {
         funnel.stage("cluster");
-        funnel.substage("cluster_seeds");
     }
 
     // Find the clusters
     std::vector<Cluster> clusters = clusterer.cluster_seeds(seeds, get_distance_limit(aln.sequence().size()));
-
-    if (track_provenance) {
-        funnel.substage_stop();
-    }
-
+    
 #ifdef debug_validate_clusters
     vector<vector<Cluster>> all_clusters;
     all_clusters.emplace_back(clusters);
@@ -1230,15 +1224,10 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
     
     // Stop this alignment
     funnel.stop();
-
-    // Record aggregate stats and log slow reads if instrumentation is enabled.
-    if (giraffe_stats) {
-        giraffe_stats->record_read(funnel, aln.name());
-    }
-
+    
     // Annotate with whatever's in the funnel
     funnel.annotate_mapped_alignment(mappings[0], track_correctness);
-
+    
     if (track_provenance) {
         if (track_correctness) {
             annotate_with_minimizer_statistics(mappings[0], minimizers, seeds, seeds.size(), 0, funnel);
diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp
index cfa8aee47c3..41a376069e6 100644
--- a/src/minimizer_mapper.hpp
+++ b/src/minimizer_mapper.hpp
@@ -31,9 +31,6 @@ namespace vg {
 using namespace std;
 using namespace vg::io;
 
-// Forward-declare GiraffeStats so the mapper can hold a pointer without a full include.
-class GiraffeStats;
-
 class MinimizerMapper : public AlignerClient {
     public:
     // Definitions used with minimizer indexes.
@@ -435,10 +432,6 @@ class MinimizerMapper : public AlignerClient {
     static constexpr bool default_show_work = false;
     bool show_work = default_show_work;
 
-    /// If set, collect per-stage aggregate timing statistics and log slow reads.
-    /// Not owned by this object; caller manages lifetime.
-    class GiraffeStats* giraffe_stats = nullptr;
-
     ////How many stdevs from fragment length distr mean do we cluster together?
     static constexpr double default_paired_distance_stdevs = 2.0;
     double paired_distance_stdevs = default_paired_distance_stdevs; 
diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp
index ccb0386b98d..26c86dce020 100644
--- a/src/minimizer_mapper_from_chains.cpp
+++ b/src/minimizer_mapper_from_chains.cpp
@@ -5,7 +5,6 @@
  */
 
 #include "minimizer_mapper.hpp"
-#include "giraffe_stats.hpp"
 
 #include "annotation.hpp"
 #include "banded_global_aligner.hpp"
@@ -719,7 +718,6 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     
     if (this->track_provenance) {
         funnel.stage("tree");
-        funnel.substage("fill_in_forest");
     }
 
     // Make them into a zip code tree
@@ -727,10 +725,6 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     crash_unless(distance_index);
     zip_code_forest.fill_in_forest(seeds, *distance_index, aln.sequence().size() * zipcode_tree_scale);
 
-    if (this->track_provenance) {
-        funnel.substage_stop();
-    }
-
 #ifdef debug_print_forest
     if (show_work) {
         #pragma omp critical (cerr)
@@ -1010,14 +1004,9 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     // Stop this alignment
     funnel.stop();
 
-    // Record aggregate stats and log slow reads if instrumentation is enabled.
-    if (giraffe_stats) {
-        giraffe_stats->record_read(funnel, aln.name());
-    }
-
     // Annotate with whatever's in the funnel
     funnel.annotate_mapped_alignment(mappings[0], track_correctness);
-
+    
     if (track_provenance) {
         if (track_correctness) {
             annotate_with_minimizer_statistics(mappings[0], minimizers, seeds, seeds.size(), chains.size(), funnel);
@@ -1106,9 +1095,6 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&
     bool do_gapless_extension = aln.sequence().size() <= gapless_extension_limit;
 
     // First score all the zip code trees in the forest by summing the scores of their involved minimizers.
-    if (track_provenance) {
-        funnel.substage("score_trees");
-    }
     vector<double> tree_scores;
     double best_tree_score = 0;
     double second_best_tree_score = 0;
@@ -1165,7 +1151,6 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&
 
 
     if (track_provenance) {
-        funnel.substage_stop();
         funnel.stage("chain");
         funnel.substage("chain");
     }
diff --git a/src/subcommand/giraffe_main.cpp b/src/subcommand/giraffe_main.cpp
index b419f91054f..f23fead27ff 100644
--- a/src/subcommand/giraffe_main.cpp
+++ b/src/subcommand/giraffe_main.cpp
@@ -29,7 +29,6 @@
 #include "../index_registry.hpp"
 #include "../utility.hpp"
 #include "../watchdog.hpp"
-#include "../giraffe_stats.hpp"
 #include "../crash.hpp"
 #include <bdsg/overlays/overlay_helper.hpp>
 
@@ -766,7 +765,6 @@ int main_giraffe(int argc, char** argv) {
     constexpr int OPT_HAPLOTYPE_SAMPLING = 1104;
     constexpr int OPT_NUM_HAPLOTYPES = 1105;
     constexpr int OPT_NO_DIPLOID_SAMPLING = 1106;
-    constexpr int OPT_SLOW_READ_THRESHOLD = 1200;
 
 
     // initialize parameters with their default options
@@ -834,11 +832,7 @@ int main_giraffe(int argc, char** argv) {
     bool track_position = MinimizerMapper::default_track_position;
     // Should we log our mapping decision making?
     bool show_work = MinimizerMapper::default_show_work;
-
-    // Reads longer than this (in seconds) get a per-stage breakdown logged to stderr.
-    // 0 disables per-read slow logging. The aggregate summary is always printed when > 0.
-    double slow_read_threshold = 0.0;
-
+    
     // Should we throw out our alignments instead of outputting them?
     bool discard_alignments = false;
     
@@ -1115,7 +1109,6 @@ int main_giraffe(int argc, char** argv) {
         {"track-correctness", no_argument, 0, OPT_TRACK_CORRECTNESS},
         {"track-position", no_argument, 0, OPT_TRACK_POSITION},
         {"show-work", no_argument, 0, OPT_SHOW_WORK},
-        {"slow-read-threshold", required_argument, 0, OPT_SLOW_READ_THRESHOLD},
         {"threads", required_argument, 0, 't'},
     };
     parser->make_long_options(long_options);
@@ -1369,11 +1362,7 @@ int main_giraffe(int argc, char** argv) {
                 // Also turn on saving explanations
                 Explainer::save_explanations = true;
                 break;
-
-            case OPT_SLOW_READ_THRESHOLD:
-                slow_read_threshold = parse<double>(optarg);
-                break;
-
+                
             case 't':
                 set_thread_count(logger, optarg);
                 break;
@@ -1976,13 +1965,6 @@ int main_giraffe(int argc, char** argv) {
         // Work out the number of threads we will have
         size_t thread_count = omp_get_max_threads();
 
-        // Set up per-stage timing instrumentation if requested.
-        unique_ptr<GiraffeStats> giraffe_stats;
-        if (slow_read_threshold > 0.0) {
-            giraffe_stats.reset(new GiraffeStats(thread_count, slow_read_threshold));
-            minimizer_mapper.giraffe_stats = giraffe_stats.get();
-        }
-
         // Set up counters per-thread for total reads mapped
         vector<size_t> reads_mapped_by_thread(thread_count, 0);
         
@@ -2433,11 +2415,8 @@ int main_giraffe(int argc, char** argv) {
 
             logger.info() << "Memory footprint: " << gbwt::inGigabytes(gbwt::memoryUsage()) << " GB" << endl;
         }
-
-        if (giraffe_stats) {
-            giraffe_stats->print_summary(cerr);
-        }
-
+        
+        
         if (report) {
             // Log output filename and mapping speed in reads/second/thread to report TSV
             report << output_filename << "\t" << reads_per_second_per_thread << endl;

From 6cf266c9bf6ccc362489d88595563fd196a47613 Mon Sep 17 00:00:00 2001
From: Zia <194475824+electricEpilith@users.noreply.github.com>
Date: Sat, 4 Apr 2026 00:18:04 -0700
Subject: [PATCH 7/8] second try at instrumentation initial plan by Claude Opus
 4.6

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-Authored-By: GitHub Copilot <noreply@github.com>
---
 src/minimizer_mapper.cpp             | 35 ++++++++++++++++++++--
 src/minimizer_mapper.hpp             | 45 ++++++++++++++++++++++++++--
 src/minimizer_mapper_from_chains.cpp | 45 +++++++++++++++++++++++-----
 src/subcommand/giraffe_main.cpp      | 26 ++++++++++++++++
 4 files changed, 140 insertions(+), 11 deletions(-)

diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp
index c678d3ecdc7..c1e5744cb01 100644
--- a/src/minimizer_mapper.cpp
+++ b/src/minimizer_mapper.cpp
@@ -30,9 +30,12 @@
 
 #include <iostream>
 #include <algorithm>
+#include <chrono>
 #include <cmath>
 #include <cfloat>
 
+#include <omp.h>
+
 // Turn on debugging prints
 //#define debug
 // Turn on printing of minimizer fact tables
@@ -621,17 +624,34 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
         return aln.sequence();
     });
 
+    // Set up per-stage timing.
+    stage_timings_t* per_thread_timings = nullptr;
+    if (!stage_timings_by_thread.empty()) {
+        per_thread_timings = &stage_timings_by_thread[omp_get_thread_num()];
+    }
+    auto stage_clock_now = []() { return std::chrono::high_resolution_clock::now(); };
+    auto stage_ns = [](std::chrono::high_resolution_clock::time_point s,
+                       std::chrono::high_resolution_clock::time_point e) {
+        return std::chrono::duration<double, std::nano>(e - s).count();
+    };
+    auto total_start = stage_clock_now();
+
     // Minimizers sorted by position
+    auto minimizer_stage_start = stage_clock_now();
     std::vector<Minimizer> minimizers_in_read = this->find_minimizers(aln.sequence(), funnel);
     // Indexes of minimizers, sorted into score order, best score first
     std::vector<size_t> minimizer_score_order = sort_minimizers_by_score(minimizers_in_read, rng);
     // Minimizers sorted by best score first
     VectorView<Minimizer> minimizers{minimizers_in_read, minimizer_score_order};
+    if (per_thread_timings) { per_thread_timings->minimizer_ns += stage_ns(minimizer_stage_start, stage_clock_now()); }
 
     // Find the seeds and mark the minimizers that were located.
+    auto seed_stage_start = stage_clock_now();
     vector<Seed> seeds = this->find_seeds(minimizers_in_read, minimizers, aln, funnel);
-  
+    if (per_thread_timings) { per_thread_timings->seed_ns += stage_ns(seed_stage_start, stage_clock_now()); }
+
     // Cluster the seeds. Get sets of input seed indexes that go together.
+    auto tree_stage_start = stage_clock_now(); // "tree" slot holds cluster+extend for extensions path
     if (track_provenance) {
         funnel.stage("cluster");
     }
@@ -832,6 +852,8 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
         });
         
     std::vector<int> cluster_extension_scores = this->score_extensions(cluster_extensions, aln, funnel);
+    if (per_thread_timings) { per_thread_timings->chain_ns += stage_ns(tree_stage_start, stage_clock_now()); }
+    auto align_stage_start = stage_clock_now();
     if (track_provenance) {
         funnel.stage("align");
     }
@@ -1073,11 +1095,13 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
         supplementaries = std::move(identify_supplementary_alignments(alignments, funnel));
     }
     
+    if (per_thread_timings) { per_thread_timings->align_ns += stage_ns(align_stage_start, stage_clock_now()); }
+    auto winner_stage_start = stage_clock_now();
     if (track_provenance) {
         // Now say we are finding the winner(s)
         funnel.stage("winner");
     }
-    
+
     // Fill this in with the alignments we will output as mappings
     vector<Alignment> mappings;
     mappings.reserve(min(alignments.size(), max_multimaps));
@@ -1274,6 +1298,13 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
         }
     }
 
+    // Record winner stage and total map_from_extensions time.
+    if (per_thread_timings) {
+        per_thread_timings->winner_ns += stage_ns(winner_stage_start, stage_clock_now());
+        per_thread_timings->total_ns  += stage_ns(total_start,        stage_clock_now());
+        per_thread_timings->read_count++;
+    }
+
     return mappings;
 }
 
diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp
index 41a376069e6..e3dd804ba66 100644
--- a/src/minimizer_mapper.hpp
+++ b/src/minimizer_mapper.hpp
@@ -590,7 +590,45 @@ class MinimizerMapper : public AlignerClient {
             return this->occs.second;
         }
     };
-    
+
+    /// Struct for per-stage timing accumulators
+    struct stage_timings_t {
+        double minimizer_ns = 0; ///< find_minimizers + flag_repetitive + sort
+        double seed_ns = 0;      ///< find_seeds
+        double tree_ns = 0;      ///< zip code forest fill + to_anchors + tree scoring
+        double chain_ns = 0;     ///< do_chaining_on_trees (incl. gapless extension)
+        double align_ns = 0;     ///< do_alignment_on_chains
+        double winner_ns = 0;    ///< pick_mappings + MAPQ + annotation
+        double total_ns = 0;     ///< full map_from_chains/map_from_extensions wall time
+        size_t read_count = 0;
+
+        inline stage_timings_t& operator+=(const stage_timings_t& other) {
+            minimizer_ns += other.minimizer_ns;
+            seed_ns      += other.seed_ns;
+            tree_ns      += other.tree_ns;
+            chain_ns     += other.chain_ns;
+            align_ns     += other.align_ns;
+            winner_ns    += other.winner_ns;
+            total_ns     += other.total_ns;
+            read_count   += other.read_count;
+            return *this;
+        }
+    };
+
+    /// Initialize per-thread stage timing accumulators for the given number of threads
+    void initialize_stage_timings(size_t thread_count) {
+        stage_timings_by_thread.assign(thread_count, stage_timings_t{});
+    }
+
+    /// Aggregate stage timings across all threads.
+    stage_timings_t get_stage_timings() const {
+        stage_timings_t total;
+        for (auto& t : stage_timings_by_thread) {
+            total += t;
+        }
+        return total;
+    }
+
 protected:
     
     /// Types of paired alignments in paired-end mapping
@@ -784,7 +822,10 @@ class MinimizerMapper : public AlignerClient {
      */
     double get_read_coverage(const Alignment& aln, const VectorView<std::vector<size_t>>& seed_sets, const std::vector<Seed>& seeds, const VectorView<Minimizer>& minimizers) const;
     
-    /// Struct to represent per-DP-method stats. 
+    /// Per-thread stage timing accumulators. Must be resized to thread_count before mapping.
+    std::vector<stage_timings_t> stage_timings_by_thread;
+
+    /// Struct to represent per-DP-method stats.
     struct aligner_stats_t {
 
         /// Collection of values you can +=
diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp
index 26c86dce020..e805ee24542 100644
--- a/src/minimizer_mapper_from_chains.cpp
+++ b/src/minimizer_mapper_from_chains.cpp
@@ -29,10 +29,13 @@
 
 #include <iostream>
 #include <algorithm>
+#include <chrono>
 #include <cmath>
 #include <cfloat>
 #include <unordered_set>
 
+#include <omp.h>
+
 // Turn on debugging prints
 //#define debug
 // Turn on recombintion debugging prints
@@ -697,8 +700,20 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
         return aln.sequence();
     });
 
+    // Set up per-stage timing.
+    stage_timings_t* per_thread_timings = nullptr;
+    if (!stage_timings_by_thread.empty()) {
+        per_thread_timings = &stage_timings_by_thread[omp_get_thread_num()];
+    }
+    auto stage_clock_now = []() { return std::chrono::high_resolution_clock::now(); };
+    auto stage_ns = [](std::chrono::high_resolution_clock::time_point s,
+                       std::chrono::high_resolution_clock::time_point e) {
+        return std::chrono::duration<double, std::nano>(e - s).count();
+    };
+    auto total_start = stage_clock_now();
 
     // Minimizers sorted by position
+    auto minimizer_stage_start = stage_clock_now();
     std::vector<Minimizer> minimizers_in_read = this->find_minimizers(aln.sequence(), funnel);
     // Flag minimizers as being in repetitive regions of the read or not
     this->flag_repetitive_minimizers(minimizers_in_read);
@@ -706,10 +721,12 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     std::vector<size_t> minimizer_score_order = sort_minimizers_by_score(minimizers_in_read, rng);
     // Minimizers sorted by best score first
     VectorView<Minimizer> minimizers{minimizers_in_read, minimizer_score_order};
-
+    if (per_thread_timings) { per_thread_timings->minimizer_ns += stage_ns(minimizer_stage_start, stage_clock_now()); }
 
     // Find the seeds and mark the minimizers that were located.
+    auto seed_stage_start = stage_clock_now();
     vector<Seed> seeds = this->find_seeds(minimizers_in_read, minimizers, aln, funnel);
+    if (per_thread_timings) { per_thread_timings->seed_ns += stage_ns(seed_stage_start, stage_clock_now()); }
 
     if (seeds.empty()) {
         #pragma omp critical (cerr)
@@ -721,6 +738,7 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     }
 
     // Make them into a zip code tree
+    auto tree_stage_start = stage_clock_now();
     ZipCodeForest zip_code_forest;
     crash_unless(distance_index);
     zip_code_forest.fill_in_forest(seeds, *distance_index, aln.sequence().size() * zipcode_tree_scale);
@@ -739,6 +757,7 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     // use them to make gapless extension anchors over them.
     // TODO: Can we only use the seeds that are in trees we keep?
     vector<algorithms::Anchor> seed_anchors = this->to_anchors(aln, minimizers, seeds);
+    if (per_thread_timings) { per_thread_timings->tree_ns += stage_ns(tree_stage_start, stage_clock_now()); }
     
     // If we do gapless extension, then it is possible to find full-length gapless extensions at this stage
     // If we have at least one good gapless extension, then we will turn them directly into alignments
@@ -765,11 +784,13 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     // The multiplicity for each chain. For now, just the multiplicity of the tree it came from
     std::vector<double> multiplicity_by_chain;
 
+    auto chain_stage_start = stage_clock_now();
     do_chaining_on_trees(aln, zip_code_forest, seeds, minimizers, seed_anchors,
                          chains, chain_rec_flags, chain_source_tree, chain_score_estimates,
                          minimizer_kept_chain_count, multiplicity_by_chain,
                          alignments, minimizer_explored, multiplicity_by_alignment,
                          rng, funnel);
+    if (per_thread_timings) { per_thread_timings->chain_ns += stage_ns(chain_stage_start, stage_clock_now()); }
 
     //Fill in chain stats for annotating the final alignment
     bool best_chain_correct = false;
@@ -821,26 +842,29 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     alignments_to_source.reserve(chain_score_estimates.size());
 
     if (alignments.size() == 0) {
-        do_alignment_on_chains(aln, seeds, minimizers, seed_anchors, chains, chain_source_tree, multiplicity_by_chain, chain_score_estimates, 
-                               minimizer_kept_chain_count, alignments, multiplicity_by_alignment, 
+        auto align_stage_start = stage_clock_now();
+        do_alignment_on_chains(aln, seeds, minimizers, seed_anchors, chains, chain_source_tree, multiplicity_by_chain, chain_score_estimates,
+                               minimizer_kept_chain_count, alignments, multiplicity_by_alignment,
                                alignments_to_source, minimizer_explored, stats, funnel_depleted, rng, funnel);
+        if (per_thread_timings) { per_thread_timings->align_ns += stage_ns(align_stage_start, stage_clock_now()); }
     }
-    
-    
+
+
     if (track_provenance) {
         // Now say we are finding the winner(s)
         funnel.stage("winner");
     }
     
     // Fill this in with the alignments we will output as mappings
+    auto winner_stage_start = stage_clock_now();
     vector<Alignment> mappings;
     mappings.reserve(min(alignments.size(), max_multimaps));
     //The scores of the mappings
     vector<double> scores;
     //The multiplicities of mappings
     vector<double> multiplicity_by_mapping;
-    
-    pick_mappings_from_alignments(aln, alignments, multiplicity_by_alignment, alignments_to_source, chain_score_estimates, 
+
+    pick_mappings_from_alignments(aln, alignments, multiplicity_by_alignment, alignments_to_source, chain_score_estimates,
                                   mappings, scores, multiplicity_by_mapping, funnel_depleted, rng, funnel);
     
     if (track_provenance) {
@@ -1072,6 +1096,13 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
 
     Explainer::clear_context();
 
+    // Record winner stage and total map_from_chains time.
+    if (per_thread_timings) {
+        per_thread_timings->winner_ns += stage_ns(winner_stage_start, stage_clock_now());
+        per_thread_timings->total_ns  += stage_ns(total_start,        stage_clock_now());
+        per_thread_timings->read_count++;
+    }
+
     return mappings;
 }
 
diff --git a/src/subcommand/giraffe_main.cpp b/src/subcommand/giraffe_main.cpp
index f23fead27ff..d233c84fc10 100644
--- a/src/subcommand/giraffe_main.cpp
+++ b/src/subcommand/giraffe_main.cpp
@@ -7,6 +7,7 @@
 #include <getopt.h>
 #include <fstream>
 #include <iostream>
+#include <iomanip>
 #include <cassert>
 #include <cstring>
 #include <ctime>
@@ -1967,6 +1968,9 @@ int main_giraffe(int argc, char** argv) {
 
         // Set up counters per-thread for total reads mapped
         vector<size_t> reads_mapped_by_thread(thread_count, 0);
+
+        // Initialize per-thread stage timing accumulators.
+        minimizer_mapper.initialize_stage_timings(thread_count);
         
         // For timing, we may run one thread first and then switch to all threads. So track both start times.
         std::chrono::time_point<std::chrono::system_clock> first_thread_start;
@@ -2414,6 +2418,28 @@ int main_giraffe(int argc, char** argv) {
             }
 
             logger.info() << "Memory footprint: " << gbwt::inGigabytes(gbwt::memoryUsage()) << " GB" << endl;
+
+            // Report per-stage timing breakdown.
+            auto timings = minimizer_mapper.get_stage_timings();
+            if (timings.read_count > 0) {
+                double rc = (double)timings.read_count;
+                // Convert nanoseconds per read to milliseconds.
+                auto ms = [&](double ns) { return ns / rc / 1e6; };
+                double total_ms = timings.total_ns / rc / 1e6;
+                auto pct = [&](double ns) -> double {
+                    return total_ms > 0 ? (ns / rc / 1e6) / total_ms * 100.0 : 0.0;
+                };
+                auto li = logger.info();
+                li << "Stage timing breakdown (avg per read across " << timings.read_count << " reads):\n";
+                li << std::fixed << std::setprecision(3);
+                li << "  minimizer: " << std::setw(8) << ms(timings.minimizer_ns) << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.minimizer_ns) << "%)\n";
+                li << "  seed:      " << std::setw(8) << std::setprecision(3) << ms(timings.seed_ns)      << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.seed_ns)      << "%)\n";
+                li << "  tree:      " << std::setw(8) << std::setprecision(3) << ms(timings.tree_ns)      << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.tree_ns)      << "%)\n";
+                li << "  chain:     " << std::setw(8) << std::setprecision(3) << ms(timings.chain_ns)     << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.chain_ns)     << "%)\n";
+                li << "  align:     " << std::setw(8) << std::setprecision(3) << ms(timings.align_ns)     << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.align_ns)     << "%)\n";
+                li << "  winner:    " << std::setw(8) << std::setprecision(3) << ms(timings.winner_ns)    << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.winner_ns)    << "%)\n";
+                li << "  total:     " << std::setw(8) << std::setprecision(3) << total_ms                 << " ms (100.0%)" << endl;
+            }
         }
         
         

From e5513aa27d9f71635f9a172bd6cf38d714896b09 Mon Sep 17 00:00:00 2001
From: Zia <194475824+electricEpilith@users.noreply.github.com>
Date: Sun, 5 Apr 2026 19:49:51 -0700
Subject: [PATCH 8/8] Revert "second try at instrumentation"

This reverts commit 6cf266c9bf6ccc362489d88595563fd196a47613.
---
 src/minimizer_mapper.cpp             | 35 ++--------------------
 src/minimizer_mapper.hpp             | 45 ++--------------------------
 src/minimizer_mapper_from_chains.cpp | 45 +++++-----------------------
 src/subcommand/giraffe_main.cpp      | 26 ----------------
 4 files changed, 11 insertions(+), 140 deletions(-)

diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp
index c1e5744cb01..c678d3ecdc7 100644
--- a/src/minimizer_mapper.cpp
+++ b/src/minimizer_mapper.cpp
@@ -30,12 +30,9 @@
 
 #include <iostream>
 #include <algorithm>
-#include <chrono>
 #include <cmath>
 #include <cfloat>
 
-#include <omp.h>
-
 // Turn on debugging prints
 //#define debug
 // Turn on printing of minimizer fact tables
@@ -624,34 +621,17 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
         return aln.sequence();
     });
 
-    // Set up per-stage timing.
-    stage_timings_t* per_thread_timings = nullptr;
-    if (!stage_timings_by_thread.empty()) {
-        per_thread_timings = &stage_timings_by_thread[omp_get_thread_num()];
-    }
-    auto stage_clock_now = []() { return std::chrono::high_resolution_clock::now(); };
-    auto stage_ns = [](std::chrono::high_resolution_clock::time_point s,
-                       std::chrono::high_resolution_clock::time_point e) {
-        return std::chrono::duration<double, std::nano>(e - s).count();
-    };
-    auto total_start = stage_clock_now();
-
     // Minimizers sorted by position
-    auto minimizer_stage_start = stage_clock_now();
     std::vector<Minimizer> minimizers_in_read = this->find_minimizers(aln.sequence(), funnel);
     // Indexes of minimizers, sorted into score order, best score first
     std::vector<size_t> minimizer_score_order = sort_minimizers_by_score(minimizers_in_read, rng);
     // Minimizers sorted by best score first
     VectorView<Minimizer> minimizers{minimizers_in_read, minimizer_score_order};
-    if (per_thread_timings) { per_thread_timings->minimizer_ns += stage_ns(minimizer_stage_start, stage_clock_now()); }
 
     // Find the seeds and mark the minimizers that were located.
-    auto seed_stage_start = stage_clock_now();
     vector<Seed> seeds = this->find_seeds(minimizers_in_read, minimizers, aln, funnel);
-    if (per_thread_timings) { per_thread_timings->seed_ns += stage_ns(seed_stage_start, stage_clock_now()); }
-
+  
     // Cluster the seeds. Get sets of input seed indexes that go together.
-    auto tree_stage_start = stage_clock_now(); // "tree" slot holds cluster+extend for extensions path
     if (track_provenance) {
         funnel.stage("cluster");
     }
@@ -852,8 +832,6 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
         });
         
     std::vector<int> cluster_extension_scores = this->score_extensions(cluster_extensions, aln, funnel);
-    if (per_thread_timings) { per_thread_timings->chain_ns += stage_ns(tree_stage_start, stage_clock_now()); }
-    auto align_stage_start = stage_clock_now();
     if (track_provenance) {
         funnel.stage("align");
     }
@@ -1095,13 +1073,11 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
         supplementaries = std::move(identify_supplementary_alignments(alignments, funnel));
     }
     
-    if (per_thread_timings) { per_thread_timings->align_ns += stage_ns(align_stage_start, stage_clock_now()); }
-    auto winner_stage_start = stage_clock_now();
     if (track_provenance) {
         // Now say we are finding the winner(s)
         funnel.stage("winner");
     }
-
+    
     // Fill this in with the alignments we will output as mappings
     vector<Alignment> mappings;
     mappings.reserve(min(alignments.size(), max_multimaps));
@@ -1298,13 +1274,6 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
         }
     }
 
-    // Record winner stage and total map_from_extensions time.
-    if (per_thread_timings) {
-        per_thread_timings->winner_ns += stage_ns(winner_stage_start, stage_clock_now());
-        per_thread_timings->total_ns  += stage_ns(total_start,        stage_clock_now());
-        per_thread_timings->read_count++;
-    }
-
     return mappings;
 }
 
diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp
index e3dd804ba66..41a376069e6 100644
--- a/src/minimizer_mapper.hpp
+++ b/src/minimizer_mapper.hpp
@@ -590,45 +590,7 @@ class MinimizerMapper : public AlignerClient {
             return this->occs.second;
         }
     };
-
-    /// Struct for per-stage timing accumulators
-    struct stage_timings_t {
-        double minimizer_ns = 0; ///< find_minimizers + flag_repetitive + sort
-        double seed_ns = 0;      ///< find_seeds
-        double tree_ns = 0;      ///< zip code forest fill + to_anchors + tree scoring
-        double chain_ns = 0;     ///< do_chaining_on_trees (incl. gapless extension)
-        double align_ns = 0;     ///< do_alignment_on_chains
-        double winner_ns = 0;    ///< pick_mappings + MAPQ + annotation
-        double total_ns = 0;     ///< full map_from_chains/map_from_extensions wall time
-        size_t read_count = 0;
-
-        inline stage_timings_t& operator+=(const stage_timings_t& other) {
-            minimizer_ns += other.minimizer_ns;
-            seed_ns      += other.seed_ns;
-            tree_ns      += other.tree_ns;
-            chain_ns     += other.chain_ns;
-            align_ns     += other.align_ns;
-            winner_ns    += other.winner_ns;
-            total_ns     += other.total_ns;
-            read_count   += other.read_count;
-            return *this;
-        }
-    };
-
-    /// Initialize per-thread stage timing accumulators for the given number of threads
-    void initialize_stage_timings(size_t thread_count) {
-        stage_timings_by_thread.assign(thread_count, stage_timings_t{});
-    }
-
-    /// Aggregate stage timings across all threads.
-    stage_timings_t get_stage_timings() const {
-        stage_timings_t total;
-        for (auto& t : stage_timings_by_thread) {
-            total += t;
-        }
-        return total;
-    }
-
+    
 protected:
     
     /// Types of paired alignments in paired-end mapping
@@ -822,10 +784,7 @@ class MinimizerMapper : public AlignerClient {
      */
     double get_read_coverage(const Alignment& aln, const VectorView<std::vector<size_t>>& seed_sets, const std::vector<Seed>& seeds, const VectorView<Minimizer>& minimizers) const;
     
-    /// Per-thread stage timing accumulators. Must be resized to thread_count before mapping.
-    std::vector<stage_timings_t> stage_timings_by_thread;
-
-    /// Struct to represent per-DP-method stats.
+    /// Struct to represent per-DP-method stats. 
     struct aligner_stats_t {
 
         /// Collection of values you can +=
diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp
index e805ee24542..26c86dce020 100644
--- a/src/minimizer_mapper_from_chains.cpp
+++ b/src/minimizer_mapper_from_chains.cpp
@@ -29,13 +29,10 @@
 
 #include <iostream>
 #include <algorithm>
-#include <chrono>
 #include <cmath>
 #include <cfloat>
 #include <unordered_set>
 
-#include <omp.h>
-
 // Turn on debugging prints
 //#define debug
 // Turn on recombintion debugging prints
@@ -700,20 +697,8 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
         return aln.sequence();
     });
 
-    // Set up per-stage timing.
-    stage_timings_t* per_thread_timings = nullptr;
-    if (!stage_timings_by_thread.empty()) {
-        per_thread_timings = &stage_timings_by_thread[omp_get_thread_num()];
-    }
-    auto stage_clock_now = []() { return std::chrono::high_resolution_clock::now(); };
-    auto stage_ns = [](std::chrono::high_resolution_clock::time_point s,
-                       std::chrono::high_resolution_clock::time_point e) {
-        return std::chrono::duration<double, std::nano>(e - s).count();
-    };
-    auto total_start = stage_clock_now();
 
     // Minimizers sorted by position
-    auto minimizer_stage_start = stage_clock_now();
     std::vector<Minimizer> minimizers_in_read = this->find_minimizers(aln.sequence(), funnel);
     // Flag minimizers as being in repetitive regions of the read or not
     this->flag_repetitive_minimizers(minimizers_in_read);
@@ -721,12 +706,10 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     std::vector<size_t> minimizer_score_order = sort_minimizers_by_score(minimizers_in_read, rng);
     // Minimizers sorted by best score first
     VectorView<Minimizer> minimizers{minimizers_in_read, minimizer_score_order};
-    if (per_thread_timings) { per_thread_timings->minimizer_ns += stage_ns(minimizer_stage_start, stage_clock_now()); }
+
 
     // Find the seeds and mark the minimizers that were located.
-    auto seed_stage_start = stage_clock_now();
     vector<Seed> seeds = this->find_seeds(minimizers_in_read, minimizers, aln, funnel);
-    if (per_thread_timings) { per_thread_timings->seed_ns += stage_ns(seed_stage_start, stage_clock_now()); }
 
     if (seeds.empty()) {
         #pragma omp critical (cerr)
@@ -738,7 +721,6 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     }
 
     // Make them into a zip code tree
-    auto tree_stage_start = stage_clock_now();
     ZipCodeForest zip_code_forest;
     crash_unless(distance_index);
     zip_code_forest.fill_in_forest(seeds, *distance_index, aln.sequence().size() * zipcode_tree_scale);
@@ -757,7 +739,6 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     // use them to make gapless extension anchors over them.
     // TODO: Can we only use the seeds that are in trees we keep?
     vector<algorithms::Anchor> seed_anchors = this->to_anchors(aln, minimizers, seeds);
-    if (per_thread_timings) { per_thread_timings->tree_ns += stage_ns(tree_stage_start, stage_clock_now()); }
     
     // If we do gapless extension, then it is possible to find full-length gapless extensions at this stage
     // If we have at least one good gapless extension, then we will turn them directly into alignments
@@ -784,13 +765,11 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     // The multiplicity for each chain. For now, just the multiplicity of the tree it came from
     std::vector<double> multiplicity_by_chain;
 
-    auto chain_stage_start = stage_clock_now();
     do_chaining_on_trees(aln, zip_code_forest, seeds, minimizers, seed_anchors,
                          chains, chain_rec_flags, chain_source_tree, chain_score_estimates,
                          minimizer_kept_chain_count, multiplicity_by_chain,
                          alignments, minimizer_explored, multiplicity_by_alignment,
                          rng, funnel);
-    if (per_thread_timings) { per_thread_timings->chain_ns += stage_ns(chain_stage_start, stage_clock_now()); }
 
     //Fill in chain stats for annotating the final alignment
     bool best_chain_correct = false;
@@ -842,29 +821,26 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     alignments_to_source.reserve(chain_score_estimates.size());
 
     if (alignments.size() == 0) {
-        auto align_stage_start = stage_clock_now();
-        do_alignment_on_chains(aln, seeds, minimizers, seed_anchors, chains, chain_source_tree, multiplicity_by_chain, chain_score_estimates,
-                               minimizer_kept_chain_count, alignments, multiplicity_by_alignment,
+        do_alignment_on_chains(aln, seeds, minimizers, seed_anchors, chains, chain_source_tree, multiplicity_by_chain, chain_score_estimates, 
+                               minimizer_kept_chain_count, alignments, multiplicity_by_alignment, 
                                alignments_to_source, minimizer_explored, stats, funnel_depleted, rng, funnel);
-        if (per_thread_timings) { per_thread_timings->align_ns += stage_ns(align_stage_start, stage_clock_now()); }
     }
-
-
+    
+    
     if (track_provenance) {
         // Now say we are finding the winner(s)
         funnel.stage("winner");
     }
     
     // Fill this in with the alignments we will output as mappings
-    auto winner_stage_start = stage_clock_now();
     vector<Alignment> mappings;
     mappings.reserve(min(alignments.size(), max_multimaps));
     //The scores of the mappings
     vector<double> scores;
     //The multiplicities of mappings
     vector<double> multiplicity_by_mapping;
-
-    pick_mappings_from_alignments(aln, alignments, multiplicity_by_alignment, alignments_to_source, chain_score_estimates,
+    
+    pick_mappings_from_alignments(aln, alignments, multiplicity_by_alignment, alignments_to_source, chain_score_estimates, 
                                   mappings, scores, multiplicity_by_mapping, funnel_depleted, rng, funnel);
     
     if (track_provenance) {
@@ -1096,13 +1072,6 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
 
     Explainer::clear_context();
 
-    // Record winner stage and total map_from_chains time.
-    if (per_thread_timings) {
-        per_thread_timings->winner_ns += stage_ns(winner_stage_start, stage_clock_now());
-        per_thread_timings->total_ns  += stage_ns(total_start,        stage_clock_now());
-        per_thread_timings->read_count++;
-    }
-
     return mappings;
 }
 
diff --git a/src/subcommand/giraffe_main.cpp b/src/subcommand/giraffe_main.cpp
index d233c84fc10..f23fead27ff 100644
--- a/src/subcommand/giraffe_main.cpp
+++ b/src/subcommand/giraffe_main.cpp
@@ -7,7 +7,6 @@
 #include <getopt.h>
 #include <fstream>
 #include <iostream>
-#include <iomanip>
 #include <cassert>
 #include <cstring>
 #include <ctime>
@@ -1968,9 +1967,6 @@ int main_giraffe(int argc, char** argv) {
 
         // Set up counters per-thread for total reads mapped
         vector<size_t> reads_mapped_by_thread(thread_count, 0);
-
-        // Initialize per-thread stage timing accumulators.
-        minimizer_mapper.initialize_stage_timings(thread_count);
         
         // For timing, we may run one thread first and then switch to all threads. So track both start times.
         std::chrono::time_point<std::chrono::system_clock> first_thread_start;
@@ -2418,28 +2414,6 @@ int main_giraffe(int argc, char** argv) {
             }
 
             logger.info() << "Memory footprint: " << gbwt::inGigabytes(gbwt::memoryUsage()) << " GB" << endl;
-
-            // Report per-stage timing breakdown.
-            auto timings = minimizer_mapper.get_stage_timings();
-            if (timings.read_count > 0) {
-                double rc = (double)timings.read_count;
-                // Convert nanoseconds per read to milliseconds.
-                auto ms = [&](double ns) { return ns / rc / 1e6; };
-                double total_ms = timings.total_ns / rc / 1e6;
-                auto pct = [&](double ns) -> double {
-                    return total_ms > 0 ? (ns / rc / 1e6) / total_ms * 100.0 : 0.0;
-                };
-                auto li = logger.info();
-                li << "Stage timing breakdown (avg per read across " << timings.read_count << " reads):\n";
-                li << std::fixed << std::setprecision(3);
-                li << "  minimizer: " << std::setw(8) << ms(timings.minimizer_ns) << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.minimizer_ns) << "%)\n";
-                li << "  seed:      " << std::setw(8) << std::setprecision(3) << ms(timings.seed_ns)      << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.seed_ns)      << "%)\n";
-                li << "  tree:      " << std::setw(8) << std::setprecision(3) << ms(timings.tree_ns)      << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.tree_ns)      << "%)\n";
-                li << "  chain:     " << std::setw(8) << std::setprecision(3) << ms(timings.chain_ns)     << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.chain_ns)     << "%)\n";
-                li << "  align:     " << std::setw(8) << std::setprecision(3) << ms(timings.align_ns)     << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.align_ns)     << "%)\n";
-                li << "  winner:    " << std::setw(8) << std::setprecision(3) << ms(timings.winner_ns)    << " ms (" << std::setw(5) << std::setprecision(1) << pct(timings.winner_ns)    << "%)\n";
-                li << "  total:     " << std::setw(8) << std::setprecision(3) << total_ms                 << " ms (100.0%)" << endl;
-            }
         }