From c053e37ba95da83e722f8ba800bc747e1a085af7 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Thu, 22 Jan 2026 10:23:05 +0800 Subject: [PATCH] [fix](predicates) Fix predicates push-down (#60022) --- be/src/pipeline/exec/olap_scan_operator.cpp | 16 +- .../test_topn_fault_injection.out | 67 -------- .../test_topn_fault_injection.groovy | 161 ------------------ 3 files changed, 1 insertion(+), 243 deletions(-) delete mode 100644 regression-test/data/fault_injection_p0/test_topn_fault_injection.out delete mode 100644 regression-test/suites/fault_injection_p0/test_topn_fault_injection.groovy diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index a499ed8bd3fcfe..dbb2d7733c72de 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -888,21 +888,7 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { _scan_keys.extend_scan_key(temp_range, p._max_scan_key_num, &exact_range, &eos, &should_break)); if (exact_range) { - auto key = iter->first; - _slot_id_to_value_range.erase(key); - - std::vector> new_predicates; - for (const auto& it : _slot_id_to_predicates[key]) { - if (it->type() == PredicateType::NOT_IN_LIST || - it->type() == PredicateType::NE) { - new_predicates.push_back(it); - } - } - if (new_predicates.empty()) { - _slot_id_to_predicates.erase(key); - } else { - _slot_id_to_predicates[key] = new_predicates; - } + _slot_id_to_value_range.erase(iter->first); } } else { // if exceed max_pushdown_conditions_per_column, use whole_value_rang instead diff --git a/regression-test/data/fault_injection_p0/test_topn_fault_injection.out b/regression-test/data/fault_injection_p0/test_topn_fault_injection.out deleted file mode 100644 index 742413ad30d731..00000000000000 --- a/regression-test/data/fault_injection_p0/test_topn_fault_injection.out +++ /dev/null @@ -1,67 +0,0 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !sql -- -893964617 40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964617 40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964617 40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964617 40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964617 40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964617 40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964617 40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964617 40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736 - --- !sql -- -893966413 34.0.0.0 GET /english/playing/body.html HTTP/1.0 200 5033 - --- !sql -- -893966413 34.0.0.0 GET /english/playing/body.html HTTP/1.0 200 5033 - diff --git a/regression-test/suites/fault_injection_p0/test_topn_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_topn_fault_injection.groovy deleted file mode 100644 index ebfdbf6e69cbfc..00000000000000 --- a/regression-test/suites/fault_injection_p0/test_topn_fault_injection.groovy +++ /dev/null @@ -1,161 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("test_topn_fault_injection", "nonConcurrent") { - // define a sql table - def indexTbName1 = "test_topn_fault_injection1" - def indexTbName2 = "test_topn_fault_injection2" - def indexTbName3 = "test_topn_fault_injection3" - - sql "DROP TABLE IF EXISTS ${indexTbName1}" - sql """ - CREATE TABLE ${indexTbName1} ( - `@timestamp` int(11) NULL COMMENT "", - `clientip` varchar(20) NULL COMMENT "", - `request` text NULL COMMENT "", - `status` int(11) NULL COMMENT "", - `size` int(11) NULL COMMENT "", - INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', - INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' - ) ENGINE=OLAP - DUPLICATE KEY(`@timestamp`) - COMMENT "OLAP" - DISTRIBUTED BY RANDOM BUCKETS 1 - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1", - "disable_auto_compaction" = "true" - ); - """ - - sql "DROP TABLE IF EXISTS ${indexTbName2}" - sql """ - CREATE TABLE ${indexTbName2} ( - `@timestamp` int(11) NULL COMMENT "", - `clientip` varchar(20) NULL COMMENT "", - `request` text NULL COMMENT "", - `status` int(11) NULL COMMENT "", - `size` int(11) NULL COMMENT "", - INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', - INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' - ) ENGINE=OLAP - UNIQUE KEY(`@timestamp`) - COMMENT "OLAP" - DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1 - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1", - "enable_unique_key_merge_on_write" = "true", - "disable_auto_compaction" = "true" - ); - """ - - sql "DROP TABLE IF EXISTS ${indexTbName3}" - sql """ - CREATE TABLE ${indexTbName3} ( - `@timestamp` int(11) NULL COMMENT "", - `clientip` varchar(20) NULL COMMENT "", - `request` text NULL COMMENT "", - `status` int(11) NULL COMMENT "", - `size` int(11) NULL COMMENT "" - ) ENGINE=OLAP - DUPLICATE KEY(`@timestamp`, `clientip`) - COMMENT "OLAP" - DISTRIBUTED BY RANDOM BUCKETS 1 - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1", - "disable_auto_compaction" = "true" - ); - """ - - def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, - expected_succ_rows = -1, load_to_single_tablet = 'true' -> - - // load the json data - streamLoad { - table "${table_name}" - - // set http request header params - set 'label', label + "_" + UUID.randomUUID().toString() - set 'read_json_by_line', read_flag - set 'format', format_flag - file file_name // import json file - time 10000 // limit inflight 10s - if (expected_succ_rows >= 0) { - set 'max_filter_ratio', '1' - } - - // if declared a check callback, the default check condition will ignore. - // So you must check all condition - check { result, exception, startTime, endTime -> - if (ignore_failure && expected_succ_rows < 0) { return } - if (exception != null) { - throw exception - } - log.info("Stream load result: ${result}".toString()) - def json = parseJson(result) - } - } - } - - try { - load_httplogs_data.call(indexTbName1, 'test_topn_fault_injection1', 'true', 'json', 'documents-1000.json') - load_httplogs_data.call(indexTbName2, 'test_topn_fault_injection2', 'true', 'json', 'documents-1000.json') - load_httplogs_data.call(indexTbName3, 'test_topn_fault_injection3', 'true', 'json', 'documents-1000.json') - - sql "sync" - sql """ set enable_common_expr_pushdown = true """ - - try { - GetDebugPoint().enableDebugPointForAllBEs("segment_iterator.topn_opt_1") - - qt_sql """ select * from ${indexTbName1} where (request match_phrase 'hm') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName1} where (request match_phrase 'hm' and clientip match_phrase '1') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName1} where (request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'bg' and clientip match_phrase '2') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName1} where (request match_phrase 'hm' and clientip match_phrase '1' or clientip match_phrase '3') or (request match_phrase 'bg' and clientip match_phrase '2' or clientip match_phrase '4') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName1} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (request match_phrase 'hm') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName1} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (clientip match_phrase '1' or clientip match_phrase '3') order by `@timestamp` limit 1; """ - - qt_sql """ select * from ${indexTbName2} where (request match_phrase 'hm') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName2} where (request match_phrase 'hm' and clientip match_phrase '1') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName2} where (request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'bg' and clientip match_phrase '2') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName2} where (request match_phrase 'hm' and clientip match_phrase '1' or clientip match_phrase '3') or (request match_phrase 'bg' and clientip match_phrase '2' or clientip match_phrase '4') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName2} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (request match_phrase 'hm') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName2} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (clientip match_phrase '1' or clientip match_phrase '3') order by `@timestamp` limit 1; """ - } finally { - GetDebugPoint().disableDebugPointForAllBEs("segment_iterator.topn_opt_1") - } - - try { - GetDebugPoint().enableDebugPointForAllBEs("segment_iterator.topn_opt_2") - - qt_sql """ select * from ${indexTbName1} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (request match_phrase 'hm' and request like '%ag%') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName1} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (request match_phrase 'hm' and clientip like '%1%') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName1} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (clientip match_phrase '1' or clientip match_phrase '3' and request like '%ag%') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName1} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (clientip match_phrase '1' or `@timestamp` = 1) order by `@timestamp` limit 1; """ - - qt_sql """ select * from ${indexTbName2} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (request match_phrase 'hm' and request like '%ag%') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName2} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (request match_phrase 'hm' and clientip like '%1%') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName2} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (clientip match_phrase '1' or clientip match_phrase '3' and request like '%ag%') order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName2} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and (clientip match_phrase '1' or `@timestamp` = 1) order by `@timestamp` limit 1; """ - - qt_sql """ select * from ${indexTbName3} where (`@timestamp` >= 893964617 and `@timestamp` < 893966455) and clientip = '34.0.0.0' order by `@timestamp` limit 1; """ - qt_sql """ select * from ${indexTbName3} where clientip = '34.0.0.0' order by `@timestamp` limit 1; """ - } finally { - GetDebugPoint().disableDebugPointForAllBEs("segment_iterator.topn_opt_2") - } - } finally { - } -} \ No newline at end of file