diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h index 233644b4e07173..59b88c2f72427a 100644 --- a/be/src/olap/primary_key_index.h +++ b/be/src/olap/primary_key_index.h @@ -67,6 +67,9 @@ class PrimaryKeyIndexBuilder { uint64_t disk_size() const { return _disk_size; } + // used for be ut + uint32_t data_page_num() const { return _primary_key_index_builder->data_page_num(); } + Slice min_key() { return Slice(_min_key.data(), _min_key.size() - _seq_col_length); } Slice max_key() { return Slice(_max_key.data(), _max_key.size() - _seq_col_length); } diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h b/be/src/olap/rowset/segment_v2/indexed_column_writer.h index ba61708dd90936..ecb26782ad1c7f 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h @@ -22,6 +22,7 @@ #include #include +#include #include #include "common/status.h" @@ -85,6 +86,8 @@ class IndexedColumnWriter { uint64_t disk_size() const { return _disk_size; } + uint32_t data_page_num() const { return _num_data_pages + 1; } + private: Status _finish_current_data_page(size_t& num_val); diff --git a/be/test/olap/primary_key_index_test.cpp b/be/test/olap/primary_key_index_test.cpp index 4de6be24feb9e6..fb96e7411e678f 100644 --- a/be/test/olap/primary_key_index_test.cpp +++ b/be/test/olap/primary_key_index_test.cpp @@ -167,4 +167,157 @@ TEST_F(PrimaryKeyIndexTest, builder) { } } +TEST_F(PrimaryKeyIndexTest, multiple_pages) { + std::string filename = kTestDir + "/multiple_pages"; + io::FileWriterPtr file_writer; + auto fs = io::global_local_filesystem(); + EXPECT_TRUE(fs->create_file(filename, &file_writer).ok()); + + config::primary_key_data_page_size = 5 * 5; + PrimaryKeyIndexBuilder builder(file_writer.get(), 0); + static_cast(builder.init()); + size_t num_rows = 0; + std::vector keys {"00000", "00002", "00004", "00006", "00008", + "00010", "00012", "00014", "00016", "00018"}; + for (const std::string& key : keys) { + static_cast(builder.add_item(key)); + num_rows++; + } + EXPECT_EQ("00000", builder.min_key().to_string()); + EXPECT_EQ("00018", builder.max_key().to_string()); + EXPECT_EQ(builder.size(), 2 * 5 * 5); + EXPECT_GT(builder.data_page_num(), 1); + segment_v2::PrimaryKeyIndexMetaPB index_meta; + EXPECT_TRUE(builder.finalize(&index_meta)); + EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended()); + EXPECT_TRUE(file_writer->close().ok()); + EXPECT_EQ(num_rows, builder.num_rows()); + + PrimaryKeyIndexReader index_reader; + io::FileReaderSPtr file_reader; + EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); + EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok()); + EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok()); + EXPECT_EQ(num_rows, index_reader.num_rows()); + + std::unique_ptr index_iterator; + EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok()); + bool exact_match = false; + uint32_t row_id; + for (size_t i = 0; i < keys.size(); i++) { + bool exists = index_reader.check_present(keys[i]); + EXPECT_TRUE(exists); + auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_TRUE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i, row_id); + } + for (size_t i = 0; i < keys.size(); i++) { + bool exists = index_reader.check_present(keys[i]); + EXPECT_TRUE(exists); + auto status = index_iterator->seek_to_ordinal(i); + EXPECT_TRUE(status.ok()); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i, row_id); + } + { + auto status = index_iterator->seek_to_ordinal(10); + EXPECT_TRUE(status.ok()); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(10, row_id); + } + + std::vector non_exist_keys {"00001", "00003", "00005", "00007", "00009", + "00011", "00013", "00015", "00017"}; + for (size_t i = 0; i < non_exist_keys.size(); i++) { + Slice slice(non_exist_keys[i]); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_FALSE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i + 1, row_id); + } + { + string key("00019"); + Slice slice(key); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_FALSE(exact_match); + EXPECT_TRUE(status.is()); + } +} + +TEST_F(PrimaryKeyIndexTest, single_page) { + std::string filename = kTestDir + "/single_page"; + io::FileWriterPtr file_writer; + auto fs = io::global_local_filesystem(); + EXPECT_TRUE(fs->create_file(filename, &file_writer).ok()); + config::primary_key_data_page_size = 32768; + + PrimaryKeyIndexBuilder builder(file_writer.get(), 0); + static_cast(builder.init()); + size_t num_rows = 0; + std::vector keys {"00000", "00002", "00004", "00006", "00008", + "00010", "00012", "00014", "00016", "00018"}; + for (const std::string& key : keys) { + static_cast(builder.add_item(key)); + num_rows++; + } + EXPECT_EQ("00000", builder.min_key().to_string()); + EXPECT_EQ("00018", builder.max_key().to_string()); + EXPECT_EQ(builder.size(), 2 * 5 * 5); + EXPECT_EQ(builder.data_page_num(), 1); + segment_v2::PrimaryKeyIndexMetaPB index_meta; + EXPECT_TRUE(builder.finalize(&index_meta)); + EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended()); + EXPECT_TRUE(file_writer->close().ok()); + EXPECT_EQ(num_rows, builder.num_rows()); + + PrimaryKeyIndexReader index_reader; + io::FileReaderSPtr file_reader; + EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); + EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok()); + EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok()); + EXPECT_EQ(num_rows, index_reader.num_rows()); + + std::unique_ptr index_iterator; + EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok()); + bool exact_match = false; + uint32_t row_id; + for (size_t i = 0; i < keys.size(); i++) { + bool exists = index_reader.check_present(keys[i]); + EXPECT_TRUE(exists); + auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_TRUE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i, row_id); + } + + std::vector non_exist_keys {"00001", "00003", "00005", "00007", "00009", + "00011", "00013", "00015", "00017"}; + for (size_t i = 0; i < non_exist_keys.size(); i++) { + Slice slice(non_exist_keys[i]); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_FALSE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i + 1, row_id); + } + { + string key("00019"); + Slice slice(key); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_FALSE(exact_match); + EXPECT_TRUE(status.is()); + } +} } // namespace doris