From c339f09ee375b9f04e94684d376783c6a8c1b9af Mon Sep 17 00:00:00 2001
From: Giulio Ermanno Pibiri <jeis90@gmail.com>
Date: Sun, 21 Sep 2025 12:19:32 +0200
Subject: [PATCH 1/4] added endpoint sequence

---
 include/endpoints_sequence.hpp   | 296 +++++++++++++++++++++++++++++++
 include/util.hpp                 |   1 +
 test/test_endpoints_sequence.cpp | 224 +++++++++++++++++++++++
 3 files changed, 521 insertions(+)
 create mode 100644 include/endpoints_sequence.hpp
 create mode 100644 test/test_endpoints_sequence.cpp
diff --git a/include/endpoints_sequence.hpp b/include/endpoints_sequence.hpp
new file mode 100644
index 0000000..bd67a44
--- /dev/null
+++ b/include/endpoints_sequence.hpp
@@ -0,0 +1,296 @@
+#pragma once
+
+#include <vector>
+
+#include "bit_vector.hpp"
+#include "darray.hpp"
+#include "compact_vector.hpp"
+
+namespace bits {
+
+/*
+    This class encodes with Elias-Fano a sequence with the
+    following properties:
+    - all elements are distinct;
+    - the first element is 0;
+    - when doing `next_geq(x)`, x is always in [0,U) where
+      U is the last element of the sequence.
+
+    This is the case when the sequence represents a list of
+    bin sizes in a cumulative way, assuming that each bin is
+    non empty and we ask what is the bin that comprises element x.
+    For example: imagine we have a set of strings that we concatenate
+    together and we keep the list of string boundaries.
+    If the have 4 strings of length 3, 10, 7, 5 respectively,
+    then we encode the sequence S = [0, 3, 13, 20, U=25]
+
+    0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
+    x     x                       x                    x              x
+
+    where U = 3+10+7+5, and length of i-th string is S[i+1]-S[i],
+    for i = 0..3.
+    Given the offset x of a character, we ask what is the string that
+    comprises that offset. In this case, we have that 0 <= x < U.
+
+    The goal of this class is to support fast next_geq queries at the price
+    of some space increase compared to an `elias_fano` sequence, so:
+    - The low bits are always 8.
+    - We keep an array of H of size ceil(U/2^8)-1, where H[i] indicates the
+      position of the i-th 0 in the high bit array. Each H[i] is written
+      in 1+log(n) bits.
+*/
+
+template <typename DArray1 = darray1>
+struct endpoints_sequence {
+    endpoints_sequence() : m_back(0) {}
+
+    template <typename Iterator>
+    void encode(Iterator begin, uint64_t n, uint64_t universe) {
+        if (n == 0) return;
+
+        const uint64_t num_high_bits = n + (universe >> 8) + 1;
+        bit_vector::builder bvb_high_bits(num_high_bits);
+        m_low_bits.reserve(n);
+
+        compact_vector::builder cv_builder((universe + 256 - 1) / 256 - 1,  // ceil(U/2^8)-1
+                                           util::ceil_log2_uint64(num_high_bits));
+
+        assert(*begin == 0);
+        uint64_t prev_pos = 0;
+        for (uint64_t i = 0; i != n; ++i, ++begin) {
+            auto v = *begin;
+            m_low_bits.push_back(v & 255);
+            uint64_t high_part = v >> 8;
+            uint64_t pos = high_part + i;
+            // std::cout << "prev_pos = " << prev_pos << "; pos = " << pos << std::endl;
+            bvb_high_bits.set(pos, 1);
+            for (uint64_t j = prev_pos + 1; j < pos; ++j) {
+                // std::cout << "j = " << j << std::endl;
+                cv_builder.push_back(j);
+            }
+            prev_pos = pos;
+            m_back = v;
+        }
+
+        bvb_high_bits.build(m_high_bits);
+        // for (uint64_t i = 0; i != m_high_bits.num_bits(); ++i) {
+        //     std::cout << m_high_bits.get(i) << ' ';
+        // }
+        // std::cout << std::endl;
+        m_high_bits_d1.build(m_high_bits);
+        cv_builder.build(m_high_bits_d0);
+        // auto it = m_high_bits_d0.begin();
+        // for (uint64_t i = 0; i != m_high_bits_d0.size(); ++i) {
+        //     std::cout << i << " : " << *it << '\n';
+        //     ++it;
+        // }
+    }
+
+    struct iterator {
+        iterator() : m_ptr(nullptr), m_pos(0), m_val(0) {}
+
+        iterator(endpoints_sequence const* ptr, uint64_t pos = 0)
+            : m_ptr(ptr)
+            , m_pos(pos)
+            , m_val(0)  //
+        {
+            if (!has_next() or m_ptr->m_high_bits_d1.num_positions() == 0) return;
+            uint64_t begin = m_ptr->m_high_bits_d1.select(m_ptr->m_high_bits, m_pos);
+            m_high_bits_it = m_ptr->m_high_bits.get_iterator_at(begin);
+            m_low_bits_it = m_ptr->m_low_bits.begin() + m_pos;
+            read_next_value();
+        }
+
+        iterator(endpoints_sequence const* ptr, uint64_t pos, uint64_t high_hint)
+            : m_ptr(ptr)
+            , m_pos(pos)
+            , m_val(0)  //
+        {
+            if (!has_next() or m_ptr->m_high_bits_d1.num_positions() == 0) return;
+            assert(high_hint == 0 || m_ptr->m_high_bits.get(high_hint) == 0);
+            m_high_bits_it = m_ptr->m_high_bits.get_iterator_at(high_hint);
+            m_low_bits_it = m_ptr->m_low_bits.begin() + m_pos;
+            read_next_value();
+        }
+
+        bool has_next() const { return m_pos < m_ptr->size(); }
+        bool has_prev() const { return m_pos > 0; }
+        uint64_t value() const { return m_val; }
+        uint64_t position() const { return m_pos; }
+
+        void next() {
+            ++m_pos;
+            if (!has_next()) return;
+            read_next_value();
+        }
+
+        /*
+            Return the value before the current position.
+        */
+        uint64_t prev_value() {
+            assert(m_pos > 0);
+            uint64_t pos = m_pos - 1;
+            /*
+                `read_next_value()` sets the state ahead of 1 position,
+                hence must go back by 2 to get previous value.
+            */
+            assert(m_high_bits_it.position() >= 2);
+            uint64_t high = m_high_bits_it.prev(m_high_bits_it.position() - 2);
+            assert(high == m_ptr->m_high_bits_d1.select(m_ptr->m_high_bits, pos));
+            uint64_t low = *(m_low_bits_it - 2);
+            return (((high - pos) << 8) | low);
+        }
+
+    private:
+        endpoints_sequence const* m_ptr;
+        uint64_t m_pos;
+        uint64_t m_val;
+        bit_vector::iterator m_high_bits_it;
+        std::vector<uint8_t>::const_iterator m_low_bits_it;
+
+        void read_next_value() {
+            assert(m_pos < m_ptr->size());
+            uint64_t high = m_high_bits_it.next();
+            assert(high == m_ptr->m_high_bits_d1.select(m_ptr->m_high_bits, m_pos));
+            uint64_t low = *m_low_bits_it;
+            m_val = (((high - m_pos) << 8) | low);
+            ++m_low_bits_it;
+        }
+    };
+
+    iterator get_iterator_at(uint64_t pos) const { return iterator(this, pos); }
+    iterator begin() const { return get_iterator_at(0); }
+
+    uint64_t access(uint64_t i) const {
+        assert(i < size());
+        return ((m_high_bits_d1.select(m_high_bits, i) - i) << 8) | m_low_bits[i];
+    }
+
+    struct return_value {
+        uint64_t pos;
+        uint64_t val;
+    };
+
+    /*
+        Return [position,value] of the smallest element that is >= x.
+    */
+    return_value next_geq(const uint64_t x) const { return _next_geq(x).first; }
+
+    /*
+        Determine integers lo and hi, with lo < hi, such that lo <= x < hi, where:
+        - lo is the largest value that is <= x,
+        - hi is the smallest value that is > x.
+        Return the tuple [lo_pos, lo, hi_pos, hi].
+    */
+    std::pair<return_value, return_value> locate(const uint64_t x) const {
+        auto [lo, it] = _next_geq(x);
+        return_value hi{lo.pos, lo.val};
+
+        if (lo.val > x) {
+            assert(lo.pos > 0);
+            lo.pos -= 1;
+            lo.val = it.prev_value();
+        } else {
+            hi.pos += 1;
+            hi.val = (it.next(), it.value());
+            assert(it.position() == hi.pos);
+            assert(hi.pos < size());
+        }
+
+        return {lo, hi};
+    }
+
+    uint64_t back() const { return m_back; }
+    uint64_t size() const { return m_low_bits.size(); }
+
+    uint64_t num_bytes() const {
+        // std::cout << "m_high_bits = " << (8.0 * m_high_bits.num_bytes()) / size() << std::endl;
+        // std::cout << "m_high_bits_d1 = " << (8.0 * m_high_bits_d1.num_bytes()) / size()
+        //           << std::endl;
+        // std::cout << "m_high_bits_d0 = " << (8.0 * m_high_bits_d0.num_bytes()) / size()
+        //           << std::endl;
+        // std::cout << "m_low_bits = " << (8.0 * essentials::vec_bytes(m_low_bits)) / size()
+        //           << std::endl;
+
+        return sizeof(m_back) + m_high_bits.num_bytes() + m_high_bits_d1.num_bytes() +
+               m_high_bits_d0.num_bytes() + essentials::vec_bytes(m_low_bits);
+    }
+
+    void swap(endpoints_sequence& other) {
+        std::swap(m_back, other.m_back);
+        m_high_bits.swap(other.m_high_bits);
+        m_high_bits_d1.swap(other.m_high_bits_d1);
+        m_high_bits_d0.swap(other.m_high_bits_d0);
+        m_low_bits.swap(other.m_low_bits);
+    }
+
+    template <typename Visitor>
+    void visit(Visitor& visitor) const {
+        visit_impl(visitor, *this);
+    }
+
+    template <typename Visitor>
+    void visit(Visitor& visitor) {
+        visit_impl(visitor, *this);
+    }
+
+private:
+    uint64_t m_back;
+    bit_vector m_high_bits;
+    DArray1 m_high_bits_d1;
+    compact_vector m_high_bits_d0;
+    std::vector<uint8_t> m_low_bits;
+
+    template <typename Visitor, typename T>
+    static void visit_impl(Visitor& visitor, T&& t) {
+        visitor.visit(t.m_back);
+        visitor.visit(t.m_high_bits);
+        visitor.visit(t.m_high_bits_d1);
+        visitor.visit(t.m_high_bits_d0);
+        visitor.visit(t.m_low_bits);
+    }
+
+    std::pair<return_value, iterator> _next_geq(const uint64_t x) const  //
+    {
+        assert(x >= 0);
+        assert(x < back());
+
+        uint64_t h_x = x >> 8;
+        uint64_t p = 0;
+        uint64_t begin = 0;
+        if (h_x > 0) {
+            p = m_high_bits_d0.access(h_x - 1);
+            begin = p - h_x + 1;
+        }
+        assert(begin < size());
+
+        /*
+            `begin` is the position of the elements that have high part >= h_x
+            and `p` is the position in `m_high_bits` of the (h_x-1)-th 0,
+            so it is passed to the iterator as a hint to recover the high part
+            of the element at position `begin` without doing a select_1.
+        */
+
+        auto it = iterator(this, begin, p /* high hint */);
+        uint64_t pos = begin;
+        uint64_t val = it.value();
+        while (val < x) {
+            ++pos;
+            /*
+                Note: no need for bound checking here
+                because x <= back(), hence pos cannot
+                be equal to size().
+            */
+            it.next();
+            val = it.value();
+        }
+        /* now pos is the position of the element that is >= x */
+        assert(val >= x);
+        assert(pos < size());
+        assert(val == access(pos));
+        assert(it.position() == pos);
+        return {{pos, val}, it};
+    }
+};
+
+}  // namespace bits
\ No newline at end of file
diff --git a/include/util.hpp b/include/util.hpp
index fd16ffd..32ff70a 100644
--- a/include/util.hpp
+++ b/include/util.hpp
@@ -32,6 +32,7 @@ static inline bool msbll(uint64_t x, uint64_t& ret) {
 }
 
 static inline uint64_t ceil_log2_uint32(uint32_t x) { return (x > 1) ? msb(x - 1) + 1 : 0; }
+static inline uint64_t ceil_log2_uint64(uint64_t x) { return (x > 1) ? msbll(x - 1) + 1 : 0; }
 
 /* return the position of the least significant bit (lsb) */
 static inline uint64_t lsb(uint32_t x) {
diff --git a/test/test_endpoints_sequence.cpp b/test/test_endpoints_sequence.cpp
new file mode 100644
index 0000000..f59e1d0
--- /dev/null
+++ b/test/test_endpoints_sequence.cpp
@@ -0,0 +1,224 @@
+#include "common.hpp"
+#include "include/endpoints_sequence.hpp"
+
+constexpr uint64_t sequence_length = 10000;
+
+std::vector<uint64_t> get_sequence(const uint64_t sequence_length) {
+    std::vector<uint64_t> s = test::get_sequence(sequence_length);
+    std::vector<uint64_t> seq;
+    seq.reserve(s.size() + 1);
+    uint64_t sum = 0;
+    for (uint64_t i = 0; i != s.size(); ++i) {
+        seq.push_back(sum);
+        sum += s[i] + (s[i] == 0);  // +1 so that all values are distinct
+    }
+    seq.push_back(sum);
+    assert(std::adjacent_find(seq.begin(), seq.end()) == seq.end());  // must be all distinct
+    return seq;
+}
+
+endpoints_sequence<> encode(std::vector<uint64_t> const& seq) {
+    endpoints_sequence<> es;
+    assert(seq.front() == 0);
+    es.encode(seq.begin(), seq.size(), seq.back());
+    REQUIRE(es.size() == seq.size());
+    std::cout << "es.size() = " << es.size() << '\n';
+    std::cout << "es.back() = " << es.back() << '\n';
+    std::cout << "measured bits/int = " << (8.0 * es.num_bytes()) / es.size() << std::endl;
+    return es;
+}
+
+TEST_CASE("access") {
+    std::vector<uint64_t> seq = get_sequence(sequence_length);
+    auto es = encode(seq);
+    std::cout << "checking correctness of random access..." << std::endl;
+    for (uint64_t i = 0; i != es.size(); ++i) {
+        uint64_t got = es.access(i);  // get the integer at position i
+        uint64_t expected = seq[i];
+        REQUIRE_MESSAGE(got == expected, "got " << got << " at position " << i << "/" << es.size()
+                                                << " but expected " << expected);
+    }
+    std::cout << "EVERYTHING OK!" << std::endl;
+}
+
+TEST_CASE("iterator::next") {
+    std::vector<uint64_t> seq = get_sequence(sequence_length);
+    auto es = encode(seq);
+
+    std::cout << "checking correctness of iterator..." << std::endl;
+
+    auto it = es.begin();
+    for (uint64_t i = 0; i != es.size(); ++i, it.next()) {
+        uint64_t got = it.value();
+        uint64_t expected = seq[i];
+        REQUIRE_MESSAGE(got == expected, "got " << got << " at position " << i << "/" << es.size()
+                                                << " but expected " << expected);
+        REQUIRE(it.position() == i);
+        REQUIRE(it.has_next() == true);
+    }
+    REQUIRE(it.position() == es.size());
+    REQUIRE(it.has_next() == false);
+
+    uint64_t i = 0;
+    it = es.begin();
+    while (it.has_next()) {
+        uint64_t got = it.value();
+        uint64_t expected = seq[i];
+        REQUIRE_MESSAGE(got == expected, "got " << got << " at position " << i << "/" << es.size()
+                                                << " but expected " << expected);
+        REQUIRE(it.position() == i);
+        REQUIRE(it.has_next() == true);
+        it.next();
+        ++i;
+    }
+    REQUIRE(i == es.size());
+    REQUIRE(it.position() == es.size());
+    REQUIRE(it.has_next() == false);
+
+    std::cout << "EVERYTHING OK!" << std::endl;
+}
+
+TEST_CASE("next_geq") {
+    std::vector<uint64_t> seq = get_sequence(sequence_length);
+    auto es = encode(seq);
+
+    std::cout << "checking correctness of next_geq..." << std::endl;
+
+    for (uint64_t i = 0; i != es.size() - 1; ++i) {
+        auto x = seq[i];
+        /* since x is in the sequence, next_geq must return (i,x) */
+
+        auto p = es.next_geq(x);
+        uint64_t pos = p.pos;
+        uint64_t got = p.val;
+        uint64_t expected = seq[i];
+
+        // std::cout << "x=" << x << "; pos=" << pos << "; got=" << got << std::endl;
+
+        bool good = (got == expected) && (pos == i);
+        REQUIRE_MESSAGE(good, "got " << got << " at position " << pos << "/" << seq.size()
+                                     << " but expected " << expected << " at position " << i);
+    }
+    std::cout << "EVERYTHING OK!" << std::endl;
+
+    std::cout << "checking correctness of next_geq..." << std::endl;
+    for (uint64_t i = 0; i != 10000; ++i) {
+        uint64_t x = seq[rand() % seq.size()] + (i % 2 == 0 ? 3 : -3);  // get some value
+        if (x >= seq.back()) x = seq.back() - 1;  // lower bound must be x < back() for assumption
+
+        auto p = es.next_geq(x);
+        uint64_t pos = p.pos;
+        uint64_t got = p.val;
+
+        // std::cout << "x=" << x << "; pos=" << pos << "; got=" << got << std::endl;
+        auto it = std::lower_bound(seq.begin(), seq.end(), x);
+        assert(it != seq.end());
+        uint64_t expected = *it;
+        uint64_t pos_expected = std::distance(seq.begin(), it);
+        bool good = (got == expected) && (pos == pos_expected);
+        REQUIRE_MESSAGE(good, "got " << got << " at position " << pos << "/" << seq.size()
+                                     << " but expected " << expected << " at position "
+                                     << pos_expected);
+    }
+    std::cout << "EVERYTHING OK!" << std::endl;
+}
+
+TEST_CASE("small_next_geq") {
+    std::vector<uint64_t> seq{0, 3, 5, 6, 9, 13, 23, 31};
+    auto es = encode(seq);
+
+    auto p = es.next_geq(0);
+    REQUIRE(p.pos == 0);
+    REQUIRE(p.val == 0);
+
+    p = es.next_geq(2);
+    REQUIRE(p.pos == 1);
+    REQUIRE(p.val == 3);
+
+    p = es.next_geq(4);
+    REQUIRE(p.pos == 2);
+    REQUIRE(p.val == 5);
+
+    p = es.next_geq(6);
+    REQUIRE(p.pos == 3);
+    REQUIRE(p.val == 6);
+
+    p = es.next_geq(23);
+    REQUIRE(p.pos == 6);
+    REQUIRE(p.val == 23);
+
+    p = es.next_geq(28);
+    REQUIRE(p.pos == 7);
+    REQUIRE(p.val == 31);
+
+    p = es.next_geq(30);
+    REQUIRE(p.pos == 7);
+    REQUIRE(p.val == 31);
+}
+
+TEST_CASE("locate") {
+    std::vector<uint64_t> seq = get_sequence(sequence_length);
+    auto es = encode(seq);
+    // test::print(seq);
+
+    std::cout << "checking correctness of locate..." << std::endl;
+
+    for (uint64_t i = 0; i != seq.size() - 1; ++i)  //
+    {
+        auto x = seq[i];
+
+        /* since x is in the sequence, p.first.pos  must be i */
+        auto p = es.locate(x);
+        uint64_t lo_pos = p.first.pos;
+        uint64_t lo_val = p.first.val;
+        uint64_t hi_pos = p.second.pos;
+        uint64_t hi_val = p.second.val;
+
+        uint64_t expected_lo_pos = i;
+        uint64_t expected_lo_val = seq[i];
+        uint64_t expected_hi_pos = i + 1;
+        uint64_t expected_hi_val = seq[i + 1];
+
+        bool good = (lo_val == expected_lo_val) && (lo_pos == expected_lo_pos);
+        REQUIRE_MESSAGE(good, "got " << lo_val << " at position " << lo_pos << "/" << seq.size()
+                                     << " but expected " << expected_lo_val << " at position "
+                                     << expected_lo_pos);
+        good = (hi_val == expected_hi_val) && (hi_pos == expected_hi_pos);
+        REQUIRE_MESSAGE(good, "got " << hi_val << " at position " << hi_pos << "/" << seq.size()
+                                     << " but expected " << expected_hi_val << " at position "
+                                     << expected_hi_pos);
+    }
+    std::cout << "EVERYTHING OK!" << std::endl;
+
+    std::cout << "checking correctness of locate..." << std::endl;
+
+    for (uint64_t i = 0; i != 10000; ++i)  //
+    {
+        uint64_t x = seq[rand() % es.size()] + (i % 2 == 0 ? 3 : -3);  // get some value value
+        if (x >= seq.back()) x = seq.back() - 1;  // lower bound must be x < back() for assumption
+
+        auto p = es.locate(x);
+        uint64_t lo_pos = p.first.pos;
+        uint64_t lo_val = p.first.val;
+        uint64_t hi_pos = p.second.pos;
+        uint64_t hi_val = p.second.val;
+
+        auto it = std::upper_bound(seq.begin(), seq.end(), x) - 1;
+        uint64_t expected_lo_pos = std::distance(seq.begin(), it);
+        assert(expected_lo_pos != es.size() - 1);
+        uint64_t expected_lo_val = *it;
+        uint64_t expected_hi_pos = expected_lo_pos + 1;
+        uint64_t expected_hi_val = *(it + 1);
+
+        bool good = (lo_val == expected_lo_val) && (lo_pos == expected_lo_pos);
+        REQUIRE_MESSAGE(good, "got " << lo_val << " at position " << lo_pos << "/" << seq.size()
+                                     << " but expected " << expected_lo_val << " at position "
+                                     << expected_lo_pos);
+        good = (hi_val == expected_hi_val) && (hi_pos == expected_hi_pos);
+        REQUIRE_MESSAGE(good, "got " << hi_val << " at position " << hi_pos << "/" << seq.size()
+                                     << " but expected " << expected_hi_val << " at position "
+                                     << expected_hi_pos);
+    }
+
+    std::cout << "EVERYTHING OK!" << std::endl;
+}

From abb23fb8001d823b0f838fbc9c1b13659e5edf19 Mon Sep 17 00:00:00 2001
From: Giulio Ermanno Pibiri <jeis90@gmail.com>
Date: Sun, 21 Sep 2025 21:29:49 +0200
Subject: [PATCH 2/4] added alternative iterator to endpoint sequence

---
 include/endpoints_sequence.hpp   | 208 ++++++++++++++++++++++++++-----
 test/common.hpp                  |   1 +
 test/test_endpoints_sequence.cpp |  10 ++
 3 files changed, 191 insertions(+), 28 deletions(-)

diff --git a/include/endpoints_sequence.hpp b/include/endpoints_sequence.hpp
index bd67a44..db5121a 100644
--- a/include/endpoints_sequence.hpp
+++ b/include/endpoints_sequence.hpp
@@ -35,7 +35,7 @@ namespace bits {
     The goal of this class is to support fast next_geq queries at the price
     of some space increase compared to an `elias_fano` sequence, so:
     - The low bits are always 8.
-    - We keep an array of H of size ceil(U/2^8)-1, where H[i] indicates the
+    - We keep an array of H of size ceil(U/2^8), where H[i] indicates the
       position of the i-th 0 in the high bit array. Each H[i] is written
       in 1+log(n) bits.
 */
@@ -52,7 +52,7 @@ struct endpoints_sequence {
         bit_vector::builder bvb_high_bits(num_high_bits);
         m_low_bits.reserve(n);
 
-        compact_vector::builder cv_builder((universe + 256 - 1) / 256 - 1,  // ceil(U/2^8)-1
+        compact_vector::builder cv_builder((universe + 256 - 1) / 256,  // ceil(U/2^8)
                                            util::ceil_log2_uint64(num_high_bits));
 
         assert(*begin == 0);
@@ -62,23 +62,21 @@ struct endpoints_sequence {
             m_low_bits.push_back(v & 255);
             uint64_t high_part = v >> 8;
             uint64_t pos = high_part + i;
-            // std::cout << "prev_pos = " << prev_pos << "; pos = " << pos << std::endl;
             bvb_high_bits.set(pos, 1);
-            for (uint64_t j = prev_pos + 1; j < pos; ++j) {
-                // std::cout << "j = " << j << std::endl;
-                cv_builder.push_back(j);
-            }
+            for (uint64_t j = prev_pos + 1; j < pos; ++j) cv_builder.push_back(j);
             prev_pos = pos;
             m_back = v;
         }
+        cv_builder.push_back(prev_pos + 1);
 
         bvb_high_bits.build(m_high_bits);
+        m_high_bits_d1.build(m_high_bits);
+        cv_builder.build(m_high_bits_d0);
+
         // for (uint64_t i = 0; i != m_high_bits.num_bits(); ++i) {
         //     std::cout << m_high_bits.get(i) << ' ';
         // }
         // std::cout << std::endl;
-        m_high_bits_d1.build(m_high_bits);
-        cv_builder.build(m_high_bits_d0);
         // auto it = m_high_bits_d0.begin();
         // for (uint64_t i = 0; i != m_high_bits_d0.size(); ++i) {
         //     std::cout << i << " : " << *it << '\n';
@@ -153,8 +151,142 @@ struct endpoints_sequence {
             uint64_t high = m_high_bits_it.next();
             assert(high == m_ptr->m_high_bits_d1.select(m_ptr->m_high_bits, m_pos));
             uint64_t low = *m_low_bits_it;
-            m_val = (((high - m_pos) << 8) | low);
+            m_val = ((high - m_pos) << 8) | low;
+            ++m_low_bits_it;
+        }
+    };
+
+    /*
+        This version of iterator uses an iterator over the `low_bits`
+        and an iterator over `high_bits_d0`.
+        This is useful in next_geq and locate because only
+        these two sequences are accessed.
+        The class `iterator` accesses the `high_bits`, so when
+        used in `next_geq` and `locate`, we have accesses to three
+        sequences in total: `low_bits`, `high_bits`, and `high_bits_d0`.
+    */
+    struct _iterator {
+        _iterator() : m_ptr(nullptr), m_pos(0), m_val(0) {}
+
+        _iterator(endpoints_sequence const* ptr)
+            : m_ptr(ptr)
+            , m_pos(0)
+            , m_val(0)  //
+        {
+            m_high_part = 0;
+            m_high_bits_d0_it = m_ptr->m_high_bits_d0.get_iterator_at(0);
+            m_cur_pos = *m_high_bits_d0_it;
+            m_prev_pos = m_cur_pos;
+            m_cluster_size = m_cur_pos;
+            assert(m_cluster_size > 0);
+            m_pos_in_cluster = 0;
+            m_low_bits_it = m_ptr->m_low_bits.begin() + m_pos;
+            uint64_t low = *m_low_bits_it;
+            m_val = (m_high_part << 8) | low;
+        }
+
+        _iterator(endpoints_sequence const* ptr, uint64_t pos, uint64_t high_part)
+            : m_ptr(ptr)
+            , m_pos(pos)
+            , m_val(0)  //
+        {
+            m_high_part = high_part;
+            m_high_bits_d0_it = m_ptr->m_high_bits_d0.get_iterator_at(high_part);
+            m_prev_pos = *m_high_bits_d0_it;
+            m_cluster_size = 0;
+            m_pos_in_cluster = 0;
+
+            while (m_cluster_size == 0) {
+                m_high_part += 1;
+                ++m_high_bits_d0_it;
+                m_cur_pos = *m_high_bits_d0_it;
+                m_cluster_size = m_cur_pos - m_prev_pos - 1;
+                m_prev_pos = m_cur_pos;
+            }
+            m_low_bits_it = m_ptr->m_low_bits.begin() + m_pos;
+
+            uint64_t low = *m_low_bits_it;
+            m_val = (m_high_part << 8) | low;
+        }
+
+        bool has_next() const { return m_pos < m_ptr->size(); }
+        bool has_prev() const { return m_pos > 0; }
+        uint64_t value() const { return m_val; }
+        uint64_t position() const { return m_pos; }
+
+        void next() {
+            ++m_pos;
+            ++m_pos_in_cluster;
+            if (!has_next()) return;
+            read_next_value();
+        }
+
+        /*
+            Return the value before the current position.
+        */
+        uint64_t prev_value() {
+            assert(m_pos > 0);
+
+            uint64_t high_part_prev_val = m_high_part;
+
+            if (m_pos_in_cluster == 0) {
+                uint64_t cur_pos = m_cur_pos;
+                uint64_t prev_pos = m_cur_pos;
+                assert(m_cur_pos == *m_high_bits_d0_it);
+                uint64_t cluster_size = 0;
+
+                assert(m_high_bits_d0_it != m_ptr->m_high_bits_d0.begin());
+                high_part_prev_val -= 1;
+                --m_high_bits_d0_it;
+                cur_pos = *m_high_bits_d0_it;
+
+                while (cluster_size == 0 and high_part_prev_val > 0) {
+                    assert(m_high_bits_d0_it != m_ptr->m_high_bits_d0.begin());
+                    high_part_prev_val -= 1;
+                    --m_high_bits_d0_it;
+                    prev_pos = cur_pos;
+                    cur_pos = *m_high_bits_d0_it;
+                    cluster_size = prev_pos - cur_pos - 1;
+                }
+
+                if (cluster_size > 0 || high_part_prev_val > 0) high_part_prev_val += 1;
+            }
+
+            uint64_t low = *(m_low_bits_it - 1);
+            return (high_part_prev_val << 8) | low;
+        }
+
+    private:
+        endpoints_sequence const* m_ptr;
+        uint64_t m_pos;
+        uint64_t m_val;
+
+        uint64_t m_cur_pos, m_prev_pos;
+        uint64_t m_pos_in_cluster, m_cluster_size;
+        uint64_t m_high_part;
+
+        compact_vector::iterator m_high_bits_d0_it;
+        std::vector<uint8_t>::const_iterator m_low_bits_it;
+
+        void read_next_value() {
+            assert(m_pos < m_ptr->size());
+
+            if (m_pos_in_cluster == m_cluster_size) {
+                m_cluster_size = 0;
+                m_pos_in_cluster = 0;
+                while (m_cluster_size == 0) {
+                    m_high_part += 1;
+                    ++m_high_bits_d0_it;
+                    m_cur_pos = *m_high_bits_d0_it;
+                    m_cluster_size = m_cur_pos - m_prev_pos - 1;
+                    m_prev_pos = m_cur_pos;
+                }
+            }
+            assert(m_high_part + m_pos == m_ptr->m_high_bits_d1.select(m_ptr->m_high_bits, m_pos));
+
             ++m_low_bits_it;
+            uint64_t low = *m_low_bits_it;
+            m_val = (m_high_part << 8) | low;
         }
     };
 
@@ -250,39 +382,59 @@ struct endpoints_sequence {
         visitor.visit(t.m_low_bits);
     }
 
-    std::pair<return_value, iterator> _next_geq(const uint64_t x) const  //
+    std::pair<return_value, _iterator> _next_geq(const uint64_t x) const  //
     {
         assert(x >= 0);
         assert(x < back());
 
+        // uint64_t h_x = x >> 8;
+        // uint64_t p = 0;
+        // uint64_t begin = 0;
+        // if (h_x > 0) {
+        //     p = m_high_bits_d0.access(h_x - 1);
+        //     begin = p - h_x + 1;
+        // }
+        // assert(begin < size());
+
+        // /*
+        //     `begin` is the position of the elements that have high part >= h_x
+        //     and `p` is the position in `m_high_bits` of the (h_x-1)-th 0,
+        //     so it is passed to the iterator as a hint to recover the high part
+        //     of the element at position `begin` without doing a select_1.
+        // */
+
+        // auto it = iterator(this, begin, p /* high hint */);
+        // uint64_t pos = begin;
+        // uint64_t val = it.value();
+        // while (val < x) {
+        //     ++pos;
+        //     it.next();
+        //     val = it.value();
+        // }
+        // /* now pos is the position of the element that is >= x */
+        // assert(val >= x);
+        // assert(pos < size());
+        // assert(val == access(pos));
+        // assert(it.position() == pos);
+        // return {{pos, val}, it};
+
         uint64_t h_x = x >> 8;
-        uint64_t p = 0;
         uint64_t begin = 0;
+        _iterator it;
         if (h_x > 0) {
-            p = m_high_bits_d0.access(h_x - 1);
+            uint64_t p = m_high_bits_d0.access(h_x - 1);
             begin = p - h_x + 1;
+            it = _iterator(this, begin, h_x - 1);
+        } else {
+            it = _iterator(this);
         }
         assert(begin < size());
 
-        /*
-            `begin` is the position of the elements that have high part >= h_x
-            and `p` is the position in `m_high_bits` of the (h_x-1)-th 0,
-            so it is passed to the iterator as a hint to recover the high part
-            of the element at position `begin` without doing a select_1.
-        */
-
-        auto it = iterator(this, begin, p /* high hint */);
         uint64_t pos = begin;
         uint64_t val = it.value();
         while (val < x) {
             ++pos;
-            /*
-                Note: no need for bound checking here
-                because x <= back(), hence pos cannot
-                be equal to size().
-            */
-            it.next();
-            val = it.value();
+            val = (it.next(), it.value());
         }
         /* now pos is the position of the element that is >= x */
         assert(val >= x);
diff --git a/test/common.hpp b/test/common.hpp
index dbbaef7..8e2a632 100644
--- a/test/common.hpp
+++ b/test/common.hpp
@@ -18,6 +18,7 @@ std::vector<uint64_t> get_sequence(const uint64_t sequence_length,
                                    const uint64_t max_int = 1000,                        //
                                    const uint64_t seed = essentials::get_random_seed())  //
 {
+    std::cout << "seed = " << seed << std::endl;
     srand(seed);
     double mean = rand() % (max_int + 1);
     std::mt19937 rng(seed);
diff --git a/test/test_endpoints_sequence.cpp b/test/test_endpoints_sequence.cpp
index f59e1d0..0460ba0 100644
--- a/test/test_endpoints_sequence.cpp
+++ b/test/test_endpoints_sequence.cpp
@@ -81,6 +81,7 @@ TEST_CASE("iterator::next") {
 TEST_CASE("next_geq") {
     std::vector<uint64_t> seq = get_sequence(sequence_length);
     auto es = encode(seq);
+    // test::print(seq);
 
     std::cout << "checking correctness of next_geq..." << std::endl;
 
@@ -111,6 +112,7 @@ TEST_CASE("next_geq") {
         uint64_t got = p.val;
 
         // std::cout << "x=" << x << "; pos=" << pos << "; got=" << got << std::endl;
+
         auto it = std::lower_bound(seq.begin(), seq.end(), x);
         assert(it != seq.end());
         uint64_t expected = *it;
@@ -124,6 +126,8 @@ TEST_CASE("next_geq") {
 }
 
 TEST_CASE("small_next_geq") {
+    std::cout << "checking correctness of next_geq over a small sequence..." << std::endl;
+
     std::vector<uint64_t> seq{0, 3, 5, 6, 9, 13, 23, 31};
     auto es = encode(seq);
 
@@ -179,6 +183,9 @@ TEST_CASE("locate") {
         uint64_t expected_hi_pos = i + 1;
         uint64_t expected_hi_val = seq[i + 1];
 
+        // std::cout << "x = " << x << " lo_pos = " << lo_pos << " lo_val = " << lo_val
+        //           << " hi_pos = " << hi_pos << " hi_val = " << hi_val << std::endl;
+
         bool good = (lo_val == expected_lo_val) && (lo_pos == expected_lo_pos);
         REQUIRE_MESSAGE(good, "got " << lo_val << " at position " << lo_pos << "/" << seq.size()
                                      << " but expected " << expected_lo_val << " at position "
@@ -203,6 +210,9 @@ TEST_CASE("locate") {
         uint64_t hi_pos = p.second.pos;
         uint64_t hi_val = p.second.val;
 
+        // std::cout << "x = " << x << " lo_pos = " << lo_pos << " lo_val = " << lo_val
+        //           << " hi_pos = " << hi_pos << " hi_val = " << hi_val << std::endl;
+
         auto it = std::upper_bound(seq.begin(), seq.end(), x) - 1;
         uint64_t expected_lo_pos = std::distance(seq.begin(), it);
         assert(expected_lo_pos != es.size() - 1);

From 8bd915bc51a7b675a43874bc7e1073069d6bf1df Mon Sep 17 00:00:00 2001
From: Giulio Ermanno Pibiri <jeis90@gmail.com>
Date: Mon, 22 Sep 2025 09:36:27 +0200
Subject: [PATCH 3/4] alt iterator in endpoints_sequence is actually slower
 when used in SSHash

---
 include/endpoints_sequence.hpp | 217 +++++----------------------------
 1 file changed, 28 insertions(+), 189 deletions(-)

diff --git a/include/endpoints_sequence.hpp b/include/endpoints_sequence.hpp
index db5121a..08cc0be 100644
--- a/include/endpoints_sequence.hpp
+++ b/include/endpoints_sequence.hpp
@@ -37,7 +37,7 @@ namespace bits {
     - The low bits are always 8.
     - We keep an array of H of size ceil(U/2^8), where H[i] indicates the
       position of the i-th 0 in the high bit array. Each H[i] is written
-      in 1+log(n) bits.
+      in 1+log(n) bits because the high bit array has length <= 2n.
 */
 
 template <typename DArray1 = darray1>
@@ -99,14 +99,19 @@ struct endpoints_sequence {
             read_next_value();
         }
 
-        iterator(endpoints_sequence const* ptr, uint64_t pos, uint64_t high_hint)
+        iterator(endpoints_sequence const* ptr, uint64_t pos, uint64_t pos_in_high_bits /* hint */)
             : m_ptr(ptr)
             , m_pos(pos)
             , m_val(0)  //
         {
-            if (!has_next() or m_ptr->m_high_bits_d1.num_positions() == 0) return;
-            assert(high_hint == 0 || m_ptr->m_high_bits.get(high_hint) == 0);
-            m_high_bits_it = m_ptr->m_high_bits.get_iterator_at(high_hint);
+            /*
+                These two assertions are valid because this constructor is only used
+                in `next_geq`.
+            */
+            assert(has_next());
+            assert(pos_in_high_bits == 0 || m_ptr->m_high_bits.get(pos_in_high_bits) == 0);
+
+            m_high_bits_it = m_ptr->m_high_bits.get_iterator_at(pos_in_high_bits);
             m_low_bits_it = m_ptr->m_low_bits.begin() + m_pos;
             read_next_value();
         }
@@ -130,13 +135,13 @@ struct endpoints_sequence {
             uint64_t pos = m_pos - 1;
             /*
                 `read_next_value()` sets the state ahead of 1 position,
-                hence must go back by 2 to get previous value.
+                hence must go back by 2 positions to get previous value.
             */
             assert(m_high_bits_it.position() >= 2);
             uint64_t high = m_high_bits_it.prev(m_high_bits_it.position() - 2);
             assert(high == m_ptr->m_high_bits_d1.select(m_ptr->m_high_bits, pos));
             uint64_t low = *(m_low_bits_it - 2);
-            return (((high - pos) << 8) | low);
+            return ((high - pos) << 8) | low;
         }
 
     private:
@@ -156,140 +161,6 @@ struct endpoints_sequence {
         }
     };
 
-    /*
-        This version of iterator uses an iterator over the `low_bits`
-        and an iterator over `high_bits_d0`.
-        This is useful in next_geq and locate because only
-        these two sequences are accessed.
-        The class `iterator` accesses the `high_bits`, so when
-        used in `next_geq` and `locate`, we have accesses to three
-        sequences in total: `low_bits`, `high_bits`, and `high_bits_d0`.
-    */
-    struct _iterator {
-        _iterator() : m_ptr(nullptr), m_pos(0), m_val(0) {}
-
-        _iterator(endpoints_sequence const* ptr)
-            : m_ptr(ptr)
-            , m_pos(0)
-            , m_val(0)  //
-        {
-            m_high_part = 0;
-            m_high_bits_d0_it = m_ptr->m_high_bits_d0.get_iterator_at(0);
-            m_cur_pos = *m_high_bits_d0_it;
-            m_prev_pos = m_cur_pos;
-            m_cluster_size = m_cur_pos;
-            assert(m_cluster_size > 0);
-            m_pos_in_cluster = 0;
-            m_low_bits_it = m_ptr->m_low_bits.begin() + m_pos;
-            uint64_t low = *m_low_bits_it;
-            m_val = (m_high_part << 8) | low;
-        }
-
-        _iterator(endpoints_sequence const* ptr, uint64_t pos, uint64_t high_part)
-            : m_ptr(ptr)
-            , m_pos(pos)
-            , m_val(0)  //
-        {
-            m_high_part = high_part;
-            m_high_bits_d0_it = m_ptr->m_high_bits_d0.get_iterator_at(high_part);
-            m_prev_pos = *m_high_bits_d0_it;
-            m_cluster_size = 0;
-            m_pos_in_cluster = 0;
-
-            while (m_cluster_size == 0) {
-                m_high_part += 1;
-                ++m_high_bits_d0_it;
-                m_cur_pos = *m_high_bits_d0_it;
-                m_cluster_size = m_cur_pos - m_prev_pos - 1;
-                m_prev_pos = m_cur_pos;
-            }
-            m_low_bits_it = m_ptr->m_low_bits.begin() + m_pos;
-
-            uint64_t low = *m_low_bits_it;
-            m_val = (m_high_part << 8) | low;
-        }
-
-        bool has_next() const { return m_pos < m_ptr->size(); }
-        bool has_prev() const { return m_pos > 0; }
-        uint64_t value() const { return m_val; }
-        uint64_t position() const { return m_pos; }
-
-        void next() {
-            ++m_pos;
-            ++m_pos_in_cluster;
-            if (!has_next()) return;
-            read_next_value();
-        }
-
-        /*
-            Return the value before the current position.
-        */
-        uint64_t prev_value() {
-            assert(m_pos > 0);
-
-            uint64_t high_part_prev_val = m_high_part;
-
-            if (m_pos_in_cluster == 0) {
-                uint64_t cur_pos = m_cur_pos;
-                uint64_t prev_pos = m_cur_pos;
-                assert(m_cur_pos == *m_high_bits_d0_it);
-                uint64_t cluster_size = 0;
-
-                assert(m_high_bits_d0_it != m_ptr->m_high_bits_d0.begin());
-                high_part_prev_val -= 1;
-                --m_high_bits_d0_it;
-                cur_pos = *m_high_bits_d0_it;
-
-                while (cluster_size == 0 and high_part_prev_val > 0) {
-                    assert(m_high_bits_d0_it != m_ptr->m_high_bits_d0.begin());
-                    high_part_prev_val -= 1;
-                    --m_high_bits_d0_it;
-                    prev_pos = cur_pos;
-                    cur_pos = *m_high_bits_d0_it;
-                    cluster_size = prev_pos - cur_pos - 1;
-                }
-
-                if (cluster_size > 0 || high_part_prev_val > 0) high_part_prev_val += 1;
-            }
-
-            uint64_t low = *(m_low_bits_it - 1);
-            return (high_part_prev_val << 8) | low;
-        }
-
-    private:
-        endpoints_sequence const* m_ptr;
-        uint64_t m_pos;
-        uint64_t m_val;
-
-        uint64_t m_cur_pos, m_prev_pos;
-        uint64_t m_pos_in_cluster, m_cluster_size;
-        uint64_t m_high_part;
-
-        compact_vector::iterator m_high_bits_d0_it;
-        std::vector<uint8_t>::const_iterator m_low_bits_it;
-
-        void read_next_value() {
-            assert(m_pos < m_ptr->size());
-
-            if (m_pos_in_cluster == m_cluster_size) {
-                m_cluster_size = 0;
-                m_pos_in_cluster = 0;
-                while (m_cluster_size == 0) {
-                    m_high_part += 1;
-                    ++m_high_bits_d0_it;
-                    m_cur_pos = *m_high_bits_d0_it;
-                    m_cluster_size = m_cur_pos - m_prev_pos - 1;
-                    m_prev_pos = m_cur_pos;
-                }
-            }
-            assert(m_high_part + m_pos == m_ptr->m_high_bits_d1.select(m_ptr->m_high_bits, m_pos));
-
-            ++m_low_bits_it;
-            uint64_t low = *m_low_bits_it;
-            m_val = (m_high_part << 8) | low;
-        }
-    };
-
     iterator get_iterator_at(uint64_t pos) const { return iterator(this, pos); }
     iterator begin() const { return get_iterator_at(0); }
 
@@ -306,7 +177,7 @@ struct endpoints_sequence {
     /*
         Return [position,value] of the smallest element that is >= x.
     */
-    return_value next_geq(const uint64_t x) const { return _next_geq(x).first; }
+    return_value next_geq(const uint64_t x) const { return next_geq_helper(x).first; }
 
     /*
         Determine integers lo and hi, with lo < hi, such that lo <= x < hi, where:
@@ -315,7 +186,7 @@ struct endpoints_sequence {
         Return the tuple [lo_pos, lo, hi_pos, hi].
     */
     std::pair<return_value, return_value> locate(const uint64_t x) const {
-        auto [lo, it] = _next_geq(x);
+        auto [lo, it] = next_geq_helper(x);
         return_value hi{lo.pos, lo.val};
 
         if (lo.val > x) {
@@ -336,14 +207,6 @@ struct endpoints_sequence {
     uint64_t size() const { return m_low_bits.size(); }
 
     uint64_t num_bytes() const {
-        // std::cout << "m_high_bits = " << (8.0 * m_high_bits.num_bytes()) / size() << std::endl;
-        // std::cout << "m_high_bits_d1 = " << (8.0 * m_high_bits_d1.num_bytes()) / size()
-        //           << std::endl;
-        // std::cout << "m_high_bits_d0 = " << (8.0 * m_high_bits_d0.num_bytes()) / size()
-        //           << std::endl;
-        // std::cout << "m_low_bits = " << (8.0 * essentials::vec_bytes(m_low_bits)) / size()
-        //           << std::endl;
-
         return sizeof(m_back) + m_high_bits.num_bytes() + m_high_bits_d1.num_bytes() +
                m_high_bits_d0.num_bytes() + essentials::vec_bytes(m_low_bits);
     }
@@ -382,65 +245,41 @@ struct endpoints_sequence {
         visitor.visit(t.m_low_bits);
     }
 
-    std::pair<return_value, _iterator> _next_geq(const uint64_t x) const  //
+    std::pair<return_value, iterator> next_geq_helper(const uint64_t x) const  //
     {
         assert(x >= 0);
         assert(x < back());
 
-        // uint64_t h_x = x >> 8;
-        // uint64_t p = 0;
-        // uint64_t begin = 0;
-        // if (h_x > 0) {
-        //     p = m_high_bits_d0.access(h_x - 1);
-        //     begin = p - h_x + 1;
-        // }
-        // assert(begin < size());
-
-        // /*
-        //     `begin` is the position of the elements that have high part >= h_x
-        //     and `p` is the position in `m_high_bits` of the (h_x-1)-th 0,
-        //     so it is passed to the iterator as a hint to recover the high part
-        //     of the element at position `begin` without doing a select_1.
-        // */
-
-        // auto it = iterator(this, begin, p /* high hint */);
-        // uint64_t pos = begin;
-        // uint64_t val = it.value();
-        // while (val < x) {
-        //     ++pos;
-        //     it.next();
-        //     val = it.value();
-        // }
-        // /* now pos is the position of the element that is >= x */
-        // assert(val >= x);
-        // assert(pos < size());
-        // assert(val == access(pos));
-        // assert(it.position() == pos);
-        // return {{pos, val}, it};
-
         uint64_t h_x = x >> 8;
+        uint64_t p = 0;
         uint64_t begin = 0;
-        _iterator it;
         if (h_x > 0) {
-            uint64_t p = m_high_bits_d0.access(h_x - 1);
+            p = m_high_bits_d0.access(h_x - 1);
             begin = p - h_x + 1;
-            it = _iterator(this, begin, h_x - 1);
-        } else {
-            it = _iterator(this);
         }
         assert(begin < size());
 
+        /*
+            `begin` is the position of the elements that have high part >= h_x
+            and `p` is the position in `m_high_bits` of the (h_x-1)-th 0,
+            so it is passed to the iterator as a hint to recover the high part
+            of the element at position `begin` without doing a select_1.
+        */
+
+        auto it = iterator(this, begin, p /* position in high_bits hint */);
         uint64_t pos = begin;
         uint64_t val = it.value();
         while (val < x) {
             ++pos;
-            val = (it.next(), it.value());
+            it.next();
+            val = it.value();
         }
         /* now pos is the position of the element that is >= x */
         assert(val >= x);
         assert(pos < size());
         assert(val == access(pos));
         assert(it.position() == pos);
+
         return {{pos, val}, it};
     }
 };

From 44f7876bc1e7c0abc007f27fd0ac6548bb484453 Mon Sep 17 00:00:00 2001
From: Giulio Ermanno Pibiri <jeis90@gmail.com>
Date: Mon, 22 Sep 2025 11:08:56 +0200
Subject: [PATCH 4/4] default init compact_vector::enumerator to avoid warning
 on Linux

---
 include/compact_vector.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/compact_vector.hpp b/include/compact_vector.hpp
index 59c1753..2da67a0 100644
--- a/include/compact_vector.hpp
+++ b/include/compact_vector.hpp
@@ -14,7 +14,7 @@ struct compact_vector  //
     struct enumerator {
         using iterator_category = std::random_access_iterator_tag;
 
-        enumerator() {}
+        enumerator() : m_i(0), m_cur_val(0), m_cur_block(0), m_cur_shift(0), m_vec(nullptr) {}
 
         enumerator(Vec const* vec, uint64_t i)
             : m_i(i)