From f82f3523dc5c206e9794b59995943eff1d4c1bb6 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Mon, 4 Sep 2023 16:56:24 +0800 Subject: [PATCH 1/7] update --- .../olap/rowset/segment_v2/binary_prefix_page.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp index e3e893a0ddd6c4..5fc5904b6ee890 100644 --- a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp @@ -23,6 +23,7 @@ #include #include +#include "common/status.h" #include "gutil/port.h" #include "gutil/strings/substitute.h" #include "util/coding.h" @@ -161,8 +162,8 @@ Status BinaryPrefixPageDecoder::seek_to_position_in_page(size_t pos) { size_t restart_point_index = pos / _restart_point_internal; RETURN_IF_ERROR(_seek_to_restart_point(restart_point_index)); while (_cur_pos < pos) { - _cur_pos++; RETURN_IF_ERROR(_read_next_value()); + _cur_pos++; } return Status::OK(); } @@ -199,8 +200,8 @@ Status BinaryPrefixPageDecoder::seek_at_or_after_value(const void* value, bool* *exact_match = cmp == 0; return Status::OK(); } - _cur_pos++; RETURN_IF_ERROR(_read_next_value()); + _cur_pos++; } } @@ -212,10 +213,16 @@ Status BinaryPrefixPageDecoder::next_batch(size_t* n, vectorized::MutableColumnP } size_t max_fetch = std::min(*n, static_cast(_num_values - _cur_pos)); + dst->insert_data((char*)(_current_value.data()), _current_value.size()); // read and copy values - for (size_t i = 0; i < max_fetch; ++i) { + for (size_t i = 0; i < max_fetch - 1; ++i) { + RETURN_IF_ERROR(_read_next_value()); + _cur_pos++; dst->insert_data((char*)(_current_value.data()), _current_value.size()); - _read_next_value(); + } + + if (_cur_pos < _num_values - 1) { + RETURN_IF_ERROR(_read_next_value()); _cur_pos++; } From 1394f530ad9987f906b4e6325ac300cf3c328b80 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Tue, 5 Sep 2023 14:44:57 +0800 Subject: [PATCH 2/7] update --- be/src/olap/rowset/segment_v2/binary_prefix_page.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp index 5fc5904b6ee890..3cf587cdbd1422 100644 --- a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp @@ -121,6 +121,7 @@ Status BinaryPrefixPageDecoder::_read_next_value() { uint32_t non_shared_len; auto data_ptr = _decode_value_lengths(_next_ptr, &shared_len, &non_shared_len); if (data_ptr == nullptr) { + DCHECK(false) << "[BinaryPrefixPageDecoder::_read_next_value] corruption!"; return Status::Corruption("Failed to decode value at position {}", _cur_pos); } _current_value.resize(shared_len); From 31bfc4c84f50592af1612b0ca422578197176f3f Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Tue, 5 Sep 2023 15:03:40 +0800 Subject: [PATCH 3/7] fix --- be/src/olap/rowset/segment_v2/binary_prefix_page.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp index 3cf587cdbd1422..5626ef6e7e0f99 100644 --- a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp @@ -216,7 +216,7 @@ Status BinaryPrefixPageDecoder::next_batch(size_t* n, vectorized::MutableColumnP dst->insert_data((char*)(_current_value.data()), _current_value.size()); // read and copy values - for (size_t i = 0; i < max_fetch - 1; ++i) { + for (size_t i = 1; i < max_fetch; ++i) { RETURN_IF_ERROR(_read_next_value()); _cur_pos++; dst->insert_data((char*)(_current_value.data()), _current_value.size()); From 5a1281f99c6d23cd60433fcaa6c96f54da47595f Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Wed, 6 Sep 2023 09:30:05 +0800 Subject: [PATCH 4/7] update --- be/src/olap/rowset/segment_v2/binary_prefix_page.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp index 5626ef6e7e0f99..d892e90f9fff1c 100644 --- a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp @@ -163,8 +163,8 @@ Status BinaryPrefixPageDecoder::seek_to_position_in_page(size_t pos) { size_t restart_point_index = pos / _restart_point_internal; RETURN_IF_ERROR(_seek_to_restart_point(restart_point_index)); while (_cur_pos < pos) { - RETURN_IF_ERROR(_read_next_value()); _cur_pos++; + RETURN_IF_ERROR(_read_next_value()); } return Status::OK(); } @@ -201,8 +201,8 @@ Status BinaryPrefixPageDecoder::seek_at_or_after_value(const void* value, bool* *exact_match = cmp == 0; return Status::OK(); } - RETURN_IF_ERROR(_read_next_value()); _cur_pos++; + RETURN_IF_ERROR(_read_next_value()); } } @@ -217,14 +217,14 @@ Status BinaryPrefixPageDecoder::next_batch(size_t* n, vectorized::MutableColumnP dst->insert_data((char*)(_current_value.data()), _current_value.size()); // read and copy values for (size_t i = 1; i < max_fetch; ++i) { - RETURN_IF_ERROR(_read_next_value()); _cur_pos++; + RETURN_IF_ERROR(_read_next_value()); dst->insert_data((char*)(_current_value.data()), _current_value.size()); } if (_cur_pos < _num_values - 1) { - RETURN_IF_ERROR(_read_next_value()); _cur_pos++; + RETURN_IF_ERROR(_read_next_value()); } *n = max_fetch; From d2a0726cd44f6a4a1bb11adaf5bf0788c351fe46 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Wed, 6 Sep 2023 09:40:03 +0800 Subject: [PATCH 5/7] update --- be/src/olap/rowset/segment_v2/binary_prefix_page.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp index d892e90f9fff1c..930cf784a5828f 100644 --- a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp @@ -222,8 +222,8 @@ Status BinaryPrefixPageDecoder::next_batch(size_t* n, vectorized::MutableColumnP dst->insert_data((char*)(_current_value.data()), _current_value.size()); } - if (_cur_pos < _num_values - 1) { - _cur_pos++; + _cur_pos++; + if (_cur_pos < _num_values) { RETURN_IF_ERROR(_read_next_value()); } From 8a3d1d8eb09314bcad51e82b091543168ab07786 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Wed, 6 Sep 2023 11:12:30 +0800 Subject: [PATCH 6/7] update --- .../rowset/segment_v2/binary_prefix_page.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp index 930cf784a5828f..ab9056def1b9b3 100644 --- a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp @@ -115,7 +115,7 @@ const uint8_t* BinaryPrefixPageDecoder::_decode_value_lengths(const uint8_t* ptr Status BinaryPrefixPageDecoder::_read_next_value() { if (_cur_pos >= _num_values) { - return Status::NotFound("no more value to read"); + return Status::EndOfFile("no more value to read"); } uint32_t shared_len; uint32_t non_shared_len; @@ -214,17 +214,14 @@ Status BinaryPrefixPageDecoder::next_batch(size_t* n, vectorized::MutableColumnP } size_t max_fetch = std::min(*n, static_cast(_num_values - _cur_pos)); - dst->insert_data((char*)(_current_value.data()), _current_value.size()); // read and copy values - for (size_t i = 1; i < max_fetch; ++i) { - _cur_pos++; - RETURN_IF_ERROR(_read_next_value()); + for (size_t i = 0; i < max_fetch; ++i) { dst->insert_data((char*)(_current_value.data()), _current_value.size()); - } - - _cur_pos++; - if (_cur_pos < _num_values) { - RETURN_IF_ERROR(_read_next_value()); + _cur_pos++; + // reach the end of the page, should not read the next value + if (_cur_pos < _num_values) { + RETURN_IF_ERROR(_read_next_value()); + } } *n = max_fetch; From a4fc60eda389b9e499bbe6809108c39c7d92f294 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Wed, 6 Sep 2023 11:57:42 +0800 Subject: [PATCH 7/7] fix beut --- be/test/olap/primary_key_index_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/test/olap/primary_key_index_test.cpp b/be/test/olap/primary_key_index_test.cpp index 837d2587007744..46b87b712da23f 100644 --- a/be/test/olap/primary_key_index_test.cpp +++ b/be/test/olap/primary_key_index_test.cpp @@ -129,7 +129,7 @@ TEST_F(PrimaryKeyIndexTest, builder) { EXPECT_FALSE(exists); auto status = index_iterator->seek_at_or_after(&slice, &exact_match); EXPECT_FALSE(exact_match); - EXPECT_TRUE(status.is()); + EXPECT_TRUE(status.is()); } // read all key