From 4028d98b77071f2961f2b2728a0f3f6975258ab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E5=87=AF?= Date: Fri, 18 Oct 2024 09:21:05 +0800 Subject: [PATCH 1/3] [Improve](segment iterator) reserve less rows for column when row bitmap is less than block row --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index a921674a1e506d..e717d9cefba9ab 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1989,8 +1989,11 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { if (UNLIKELY(!_lazy_inited)) { RETURN_IF_ERROR(_lazy_init()); _lazy_inited = true; + uint32_t nrows_reserve_limit = _row_bitmap.cardinality() < _opts.block_row_max + ? _row_bitmap.cardinality() + : _opts.block_row_max; if (_lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval) { - _block_rowids.resize(_opts.block_row_max); + _block_rowids.resize(nrows_reserve_limit); } _current_return_columns.resize(_schema->columns().size()); _converted_column_ids.resize(_schema->columns().size(), 0); @@ -2013,7 +2016,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { storage_column_type->is_nullable(), _opts.io_ctx.reader_type)); _current_return_columns[cid]->set_rowset_segment_id( {_segment->rowset_id(), _segment->id()}); - _current_return_columns[cid]->reserve(_opts.block_row_max); + _current_return_columns[cid]->reserve(nrows_reserve_limit); } else if (i >= block->columns()) { // if i >= block->columns means the column and not the pred_column means `column i` is // a delete condition column. but the column is not effective in the segment. so we just @@ -2024,7 +2027,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { // TODO: skip read the not effective delete column to speed up segment read. _current_return_columns[cid] = Schema::get_data_type_ptr(*column_desc)->create_column(); - _current_return_columns[cid]->reserve(_opts.block_row_max); + _current_return_columns[cid]->reserve(nrows_reserve_limit); } } From ff73ca192b1b8095a8629aac5805387e60fd4199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E5=87=AF?= Date: Mon, 21 Oct 2024 11:22:08 +0800 Subject: [PATCH 2/3] [Improve](segment iterator) reserve less rows for column when row bitmap is less than block row --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index e717d9cefba9ab..b504ddadee576c 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1989,11 +1989,11 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { if (UNLIKELY(!_lazy_inited)) { RETURN_IF_ERROR(_lazy_init()); _lazy_inited = true; - uint32_t nrows_reserve_limit = _row_bitmap.cardinality() < _opts.block_row_max - ? _row_bitmap.cardinality() - : _opts.block_row_max; + // If the row bitmap size is smaller than block_row_max, there's no need to reserve that many column rows. + auto nrows_reserve_limit = + std::min(_row_bitmap.cardinality(), uint64_t(_opts.block_row_max)); if (_lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval) { - _block_rowids.resize(nrows_reserve_limit); + _block_rowids.resize(_opts.block_row_max); } _current_return_columns.resize(_schema->columns().size()); _converted_column_ids.resize(_schema->columns().size(), 0); From fb2fc0ee44681e1b5ec219f5009c1c7431dd82c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E5=87=AF?= Date: Tue, 22 Oct 2024 10:54:57 +0800 Subject: [PATCH 3/3] update --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index b504ddadee576c..985cdc16e68f31 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -2052,7 +2052,8 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { if (_can_opt_topn_reads()) { nrows_read_limit = std::min(static_cast(_opts.topn_limit), nrows_read_limit); } - + // If the row bitmap size is smaller than nrows_read_limit, there's no need to reserve that many column rows. + nrows_read_limit = std::min(_row_bitmap.cardinality(), uint64_t(nrows_read_limit)); DBUG_EXECUTE_IF("segment_iterator.topn_opt_1", { if (nrows_read_limit != 1) { return Status::Error("topn opt 1 execute failed: {}",