diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index ac1fb1de8c2768..914dc2f40e3e6f 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -122,6 +122,9 @@ void OlapScanNode::_init_counter(RuntimeState* state) { _bitmap_index_filter_timer = ADD_TIMER(_runtime_profile, "BitmapIndexFilterTimer"); _num_scanners = ADD_COUNTER(_runtime_profile, "NumScanners", TUnit::UNIT); + + _filtered_segment_counter = ADD_COUNTER(_runtime_profile, "NumSegmentFiltered", TUnit::UNIT); + _total_segment_counter = ADD_COUNTER(_runtime_profile, "NumSegmentTotal", TUnit::UNIT); } Status OlapScanNode::prepare(RuntimeState* state) { diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h index aa0127d830a156..469c6a1c8d4492 100644 --- a/be/src/exec/olap_scan_node.h +++ b/be/src/exec/olap_scan_node.h @@ -291,6 +291,11 @@ class OlapScanNode : public ScanNode { RuntimeProfile::Counter* _bitmap_index_filter_timer = nullptr; // number of created olap scanners RuntimeProfile::Counter* _num_scanners = nullptr; + + // number of segment filted by column stat when creating seg iterator + RuntimeProfile::Counter* _filtered_segment_counter = nullptr; + // total number of segment related to this scan node + RuntimeProfile::Counter* _total_segment_counter = nullptr; }; } // namespace doris diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index a9bc32e7c72b89..38bbf7c6b52a44 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -476,6 +476,9 @@ void OlapScanner::update_counter() { COUNTER_UPDATE(_parent->_bitmap_index_filter_timer, _reader->stats().bitmap_index_filter_timer); COUNTER_UPDATE(_parent->_block_seek_counter, _reader->stats().block_seek_num); + COUNTER_UPDATE(_parent->_filtered_segment_counter, _reader->stats().filtered_segment_number); + COUNTER_UPDATE(_parent->_total_segment_counter, _reader->stats().total_segment_number); + DorisMetrics::instance()->query_scan_bytes.increment(_compressed_bytes_read); DorisMetrics::instance()->query_scan_rows.increment(_raw_rows_read); diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index f685ca8c03d193..889f0e3eb23a3f 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -268,6 +268,10 @@ struct OlapReaderStatistics { int64_t rows_bitmap_index_filtered = 0; int64_t bitmap_index_filter_timer = 0; + // number of segment filtered by column stat when creating seg iterator + int64_t filtered_segment_number = 0; + // total number of segment + int64_t total_segment_number = 0; }; typedef uint32_t ColumnId; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 002f05666a4b45..2ad6b3a9425e70 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -64,6 +64,7 @@ Status Segment::new_iterator(const Schema& schema, const StorageReadOptions& read_options, std::unique_ptr* iter) { DCHECK_NOTNULL(read_options.stats); + read_options.stats->total_segment_number++; // trying to prune the current segment by segment-level zone map if (read_options.conditions != nullptr) { for (auto& column_condition : read_options.conditions->columns()) { @@ -74,6 +75,7 @@ Status Segment::new_iterator(const Schema& schema, if (!_column_readers[column_id]->match_condition(column_condition.second)) { // any condition not satisfied, return. iter->reset(new EmptySegmentIterator(schema)); + read_options.stats->filtered_segment_number++; return Status::OK(); } } diff --git a/docs/en/administrator-guide/running-profile.md b/docs/en/administrator-guide/running-profile.md index 0a5634cd4c8619..60aca26b56b368 100644 --- a/docs/en/administrator-guide/running-profile.md +++ b/docs/en/administrator-guide/running-profile.md @@ -167,6 +167,8 @@ OLAP_SCAN_NODE (id=0): (Active: 4.050ms, non-child: 35.68%) -IndexLoadTime: 1.521ms # In V1 only, it takes time to read Index Stream. -NumDiskAccess: 6 # The number of disks involved in this ScanNode. -NumScanners: 25 # The number of Scanners generated by this ScanNode. + -NumSegmentFiltered: 4 # Number of Segment filtered by column statistic when creating Segment Iterator. + -NumSegmentTotal: 20 # Total number of Segment related to this scan. -PeakMemoryUsage: 0 # meaningless -PerReadThreadRawHdfsThroughput: 0.00 /sec # meaningless -RawRowsRead: 141.71K # The number of raw rows read in the storage engine. See below for details. @@ -199,6 +201,11 @@ OLAP_SCAN_NODE (id=0): (Active: 4.050ms, non-child: 35.68%) * Some notes on the number of rows in Profile The metrics related to the number of rows in the Profile are: + + * NumSegmentFiltered + * NumSegmentTotal + + The number of segments actually read can be obtained through these two metrics. * RowsKeyRangeFiltered * RowsBitmapIndexFiltered diff --git a/docs/zh-CN/administrator-guide/running-profile.md b/docs/zh-CN/administrator-guide/running-profile.md index baf6fa3293ff52..cc1fb4855b7f57 100644 --- a/docs/zh-CN/administrator-guide/running-profile.md +++ b/docs/zh-CN/administrator-guide/running-profile.md @@ -166,6 +166,8 @@ OLAP_SCAN_NODE (id=0):(Active: 4.050ms, non-child: 35.68%) - IndexLoadTime: 1.521ms # 仅 V1 中,读取 Index Stream 的耗时。 - NumDiskAccess: 6 # 该 ScanNode 节点涉及到的磁盘数量。 - NumScanners: 25 # 该 ScanNode 生成的 Scanner 数量。 + - NumSegmentFiltered: 4 # 在生成 Segment Iterator 时,通过列统计信息和查询条件,完全过滤掉的 Segment 数量。 + - NumSegmentTotal: 20 # 查询涉及的所有 Segment 数量。 - PeakMemoryUsage: 0 # 无意义 - PerReadThreadRawHdfsThroughput: 0.00 /sec # 无意义 - RawRowsRead: 141.71K # 存储引擎中读取的原始行数。详情见下文。 @@ -198,6 +200,11 @@ OLAP_SCAN_NODE (id=0):(Active: 4.050ms, non-child: 35.68%) * Profile 中关于行数的一些说明 在 Profile 中和行数相关的指标有: + + * NumSegmentFiltered + * NumSegmentTotal + + 通过这两个指标可以得到实际读取的 Segment 数量。 * RowsKeyRangeFiltered * RowsBitmapIndexFiltered