From cc394cae3b6881162adeccbb65674206b2aea486 Mon Sep 17 00:00:00 2001 From: Pxl Date: Mon, 4 Nov 2024 14:08:36 +0800 Subject: [PATCH 1/2] [Bug](runtime-filter) avoid wrong partitial ignore minmax filter (#43078) In #https://github.com/apache/doris/pull/41667 we support ignore filter partitial, however, when sync_filter_size is turned off, the final filter may only contain part of the data because some filters are judged to be 'in'. For example: there are rf001(2 instances, in_or_bloom),rf000(2 instances, min_max), instance_1 has 1e8 row and rf001 change to bloom, the rf000 will not ignored instance_2 has 1 row and rf001 change to in, the rf000 will ignored finally, rf000 applied and make wrong result --- be/src/exprs/runtime_filter_slots.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/be/src/exprs/runtime_filter_slots.h b/be/src/exprs/runtime_filter_slots.h index b6ca31c72722ec..2b9773ce89ffdd 100644 --- a/be/src/exprs/runtime_filter_slots.h +++ b/be/src/exprs/runtime_filter_slots.h @@ -77,6 +77,10 @@ class VRuntimeFilterSlots { if (filter->get_real_type() != RuntimeFilterType::IN_FILTER) { continue; } + if (!filter->need_sync_filter_size() && + filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER) { + continue; + } if (has_in_filter.contains(filter->expr_order())) { filter->set_ignored(); continue; @@ -84,8 +88,8 @@ class VRuntimeFilterSlots { has_in_filter.insert(filter->expr_order()); } - // process ignore filter when it has IN_FILTER on same expr, and init bloom filter size - for (auto* filter : _runtime_filters) { + // process ignore filter when it has IN_FILTER on same expr + for (auto filter : _runtime_filters) { if (filter->get_ignored()) { continue; } From 1c4c8336d05c1fbc292a9582f0375ea8ffb7a62c Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Wed, 27 Nov 2024 21:46:13 +0800 Subject: [PATCH 2/2] update --- be/src/pipeline/exec/hashjoin_build_sink.cpp | 28 ++++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index a177c1b59e1435..ccec9f1c44ce9a 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -137,22 +137,22 @@ Status HashJoinBuildSinkLocalState::close(RuntimeState* state, Status exec_statu return Base::close(state, exec_status); } - if (state->get_task()->wake_up_by_downstream()) { - RETURN_IF_ERROR(_runtime_filter_slots->send_filter_size(state, 0, _finish_dependency)); - RETURN_IF_ERROR(_runtime_filter_slots->ignore_all_filters()); - } else { - auto* block = _shared_state->build_block.get(); - uint64_t hash_table_size = block ? block->rows() : 0; - { - SCOPED_TIMER(_runtime_filter_init_timer); - if (_should_build_hash_table) { + if (_should_build_hash_table) { + if (state->get_task()->wake_up_by_downstream()) { + RETURN_IF_ERROR(_runtime_filter_slots->send_filter_size(state, 0, _finish_dependency)); + RETURN_IF_ERROR(_runtime_filter_slots->ignore_all_filters()); + } else { + auto* block = _shared_state->build_block.get(); + uint64_t hash_table_size = block ? block->rows() : 0; + { + SCOPED_TIMER(_runtime_filter_init_timer); RETURN_IF_ERROR(_runtime_filter_slots->init_filters(state, hash_table_size)); + RETURN_IF_ERROR(_runtime_filter_slots->ignore_filters(state)); + } + if (hash_table_size > 1) { + SCOPED_TIMER(_runtime_filter_compute_timer); + _runtime_filter_slots->insert(block); } - RETURN_IF_ERROR(_runtime_filter_slots->ignore_filters(state)); - } - if (_should_build_hash_table && hash_table_size > 1) { - SCOPED_TIMER(_runtime_filter_compute_timer); - _runtime_filter_slots->insert(block); } } SCOPED_TIMER(_publish_runtime_filter_timer);