From ac0841b3c0f0545b4b33a4378824c4019b6a7dbd Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Mon, 22 Jul 2024 17:09:32 +0800 Subject: [PATCH 1/4] [fix](compaction) fix mismatch between segment key and value column rows during compaction (#37960) When a block is splitted to 3 segments, old code just handles 2 and the last is overlowed. --- .../rowset/vertical_beta_rowset_writer.cpp | 53 +++++++++---------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index 05730ec9f3aef3..58fdb25fd7ad8d 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -95,35 +95,30 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows)); } else { // value columns - uint32_t num_rows_written = _segment_writers[_cur_writer_idx]->num_rows_written(); - VLOG_NOTICE << "num_rows_written: " << num_rows_written - << ", _cur_writer_idx: " << _cur_writer_idx; - uint32_t num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); - // init if it's first value column write in current segment - if (_cur_writer_idx == 0 && num_rows_written == 0) { - VLOG_NOTICE << "init first value column segment writer"; - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); - } - // when splitting segment, need to make rows align between key columns and value columns - size_t start_offset = 0, limit = num_rows; - if (num_rows_written + num_rows >= num_rows_key_group && - _cur_writer_idx < _segment_writers.size() - 1) { - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block( - block, 0, num_rows_key_group - num_rows_written)); - RETURN_IF_ERROR(_flush_columns(&_segment_writers[_cur_writer_idx])); - start_offset = num_rows_key_group - num_rows_written; - limit = num_rows - start_offset; - ++_cur_writer_idx; - // switch to next writer - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); - num_rows_written = 0; - num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); - } - if (limit > 0) { - RETURN_IF_ERROR( - _segment_writers[_cur_writer_idx]->append_block(block, start_offset, limit)); - DCHECK(_segment_writers[_cur_writer_idx]->num_rows_written() <= - _segment_writers[_cur_writer_idx]->row_count()); + int64_t left = num_rows; + while (left > 0) { + uint32_t num_rows_written = _segment_writers[_cur_writer_idx]->num_rows_written(); + VLOG_NOTICE << "num_rows_written: " << num_rows_written + << ", _cur_writer_idx: " << _cur_writer_idx; + uint32_t num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); + CHECK_LE(num_rows_written, num_rows_key_group); + // init if it's first value column write in current segment + if (num_rows_written == 0) { + VLOG_NOTICE << "init first value column segment writer"; + RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); + } + + int64_t to_write = num_rows_written + left >= num_rows_key_group + ? num_rows_key_group - num_rows_written + : left; + RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, num_rows - left, + to_write)); + left -= to_write; + CHECK_GE(left, 0); + + if (left > 0) { + ++_cur_writer_idx; + } } } if (is_key) { From 91c30d5e7c9f85a6af84068bb10141d1b02f0c2f Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Wed, 24 Jul 2024 09:29:19 +0800 Subject: [PATCH 2/4] [fix](compaction) fix the issue of writing segment with 0 rows during compaction (#38251) https://github.com/apache/doris/pull/37960 introduced this issue --- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index 58fdb25fd7ad8d..027ac835bb75b6 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -101,7 +101,7 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, VLOG_NOTICE << "num_rows_written: " << num_rows_written << ", _cur_writer_idx: " << _cur_writer_idx; uint32_t num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); - CHECK_LE(num_rows_written, num_rows_key_group); + CHECK_LT(num_rows_written, num_rows_key_group); // init if it's first value column write in current segment if (num_rows_written == 0) { VLOG_NOTICE << "init first value column segment writer"; @@ -116,7 +116,8 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, left -= to_write; CHECK_GE(left, 0); - if (left > 0) { + if (num_rows_key_group == num_rows_written + to_write && + _cur_writer_idx < _segment_writers.size() - 1) { ++_cur_writer_idx; } } From 8cce17fe0a795adfabf30ac953b1ee208239a31c Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Thu, 25 Jul 2024 22:39:21 +0800 Subject: [PATCH 3/4] [fix](compaction) fix missing flush column during compaction (#38356) #37960 introduces this problem --- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index 027ac835bb75b6..cb13c2199c657e 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -118,6 +118,7 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, if (num_rows_key_group == num_rows_written + to_write && _cur_writer_idx < _segment_writers.size() - 1) { + RETURN_IF_ERROR(_flush_columns(_segment_writers[_cur_writer_idx].get())); ++_cur_writer_idx; } } From aaa5b10de5a546910512d351e13c9c287f29809b Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Tue, 13 Aug 2024 09:51:28 +0800 Subject: [PATCH 4/4] fix --- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index cb13c2199c657e..31b9e94f7329d2 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -118,7 +118,7 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, if (num_rows_key_group == num_rows_written + to_write && _cur_writer_idx < _segment_writers.size() - 1) { - RETURN_IF_ERROR(_flush_columns(_segment_writers[_cur_writer_idx].get())); + RETURN_IF_ERROR(_flush_columns(&_segment_writers[_cur_writer_idx])); ++_cur_writer_idx; } }