From 9b1c75f6c2e004c929754784cfb434edf3391541 Mon Sep 17 00:00:00 2001 From: "yonghao.fyh" Date: Thu, 29 Jan 2026 14:48:16 +0800 Subject: [PATCH] feat: divide range larger than cache range limit for performance --- src/paimon/common/utils/byte_range_combiner.cpp | 17 +++++++++++++++++ .../common/utils/byte_range_combiner_test.cpp | 6 +++--- src/paimon/common/utils/read_ahead_cache.cpp | 2 +- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/paimon/common/utils/byte_range_combiner.cpp b/src/paimon/common/utils/byte_range_combiner.cpp index d10eb9db..42877003 100644 --- a/src/paimon/common/utils/byte_range_combiner.cpp +++ b/src/paimon/common/utils/byte_range_combiner.cpp @@ -38,6 +38,23 @@ Result> ByteRangeCombiner::CoalesceByteRanges( if (ranges.empty()) { return ranges; } + + std::vector adjusted_ranges; + for (const auto& range : ranges) { + uint64_t range_start = range.offset; + uint64_t range_end = range.offset + range.length; + + while (range_end - range_start > range_size_limit) { + adjusted_ranges.emplace_back(range_start, range_size_limit); + range_start += range_size_limit; + } + + if (range_end > range_start) { + adjusted_ranges.emplace_back(range_start, range_end - range_start); + } + } + ranges = std::move(adjusted_ranges); + // Remove zero-sized ranges auto end = std::remove_if(ranges.begin(), ranges.end(), [](const ByteRange& range) { return range.length == 0; }); diff --git a/src/paimon/common/utils/byte_range_combiner_test.cpp b/src/paimon/common/utils/byte_range_combiner_test.cpp index 2f7d43cc..19d739a4 100644 --- a/src/paimon/common/utils/byte_range_combiner_test.cpp +++ b/src/paimon/common/utils/byte_range_combiner_test.cpp @@ -59,17 +59,17 @@ TEST(ByteRangeCombinerTest, TestBasics) { check({{110, 11}, {130, 0}, {130, 11}, {145, 0}, {150, 11}, {200, 0}}, {{110, 51}}); // No holes but large ranges - check({{110, 100}, {210, 100}}, {{110, 100}, {210, 100}}); + check({{110, 100}, {210, 100}}, {{110, 99}, {209, 1}, {210, 99}, {309, 1}}); // Small holes and large range in the middle (*) check({{110, 10}, {120, 11}, {140, 100}, {240, 11}, {260, 11}}, - {{110, 21}, {140, 100}, {240, 31}}); + {{110, 21}, {140, 99}, {239, 32}}); // Mid-size ranges that would turn large after coalescing check({{100, 50}, {150, 50}}, {{100, 50}, {150, 50}}); check({{100, 30}, {130, 30}, {160, 30}, {190, 30}, {220, 30}}, {{100, 90}, {190, 60}}); // Same as (*) but unsorted check({{140, 100}, {120, 11}, {240, 11}, {110, 10}, {260, 11}}, - {{110, 21}, {140, 100}, {240, 31}}); + {{110, 21}, {140, 99}, {239, 32}}); // Completely overlapping ranges should be eliminated check({{20, 5}, {20, 5}, {21, 2}}, {{20, 5}}); diff --git a/src/paimon/common/utils/read_ahead_cache.cpp b/src/paimon/common/utils/read_ahead_cache.cpp index 3edd23d1..029f9ccb 100644 --- a/src/paimon/common/utils/read_ahead_cache.cpp +++ b/src/paimon/common/utils/read_ahead_cache.cpp @@ -90,11 +90,11 @@ class ReadAheadCache::Impl { }; void ReadAheadCache::Impl::Cache(std::vector ranges) { - std::unique_lock lock(rw_mutex_); std::sort(ranges.begin(), ranges.end(), [](const ByteRange& a, const ByteRange& b) { return a.offset < b.offset; }); std::vector new_entries = MakeCacheEntries(ranges); // Add new entries, themselves ordered by offset + std::unique_lock lock(rw_mutex_); if (entries_.size() > 0) { size_t new_entries_size = 0; for (const auto& e : new_entries) {