From 7d2ef422cac65d300fea033b0a6cd9accafc339f Mon Sep 17 00:00:00 2001 From: zhiqiang-hhhh Date: Wed, 25 Oct 2023 17:20:15 +0800 Subject: [PATCH 1/3] Fix --- be/src/vec/exec/scan/vscan_node.cpp | 27 ++++++++- .../correctness/test_pushdown_common_expr.out | 20 +++++++ .../test_pushdown_common_expr.groovy | 59 ++++++++++++++++++- 3 files changed, 103 insertions(+), 3 deletions(-) diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 57dc6b66fd07cd..95ef0d1cc02e0c 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -17,6 +17,7 @@ #include "vec/exec/scan/vscan_node.h" +#include #include #include #include @@ -410,6 +411,23 @@ Status VScanNode::_normalize_conjuncts() { } } + std::function _conjunct_is_acting_on_a_slot = + [&_conjunct_is_acting_on_a_slot](const VExprSPtr& expr) -> bool { + const auto& children = expr->children(); + const size_t children_size = children.size(); + + for (size_t i = 0; i < children_size; ++i) { + // If any child expr does not act on a column slot + // return false immediately. + if (!_conjunct_is_acting_on_a_slot(children[i])) { + return false; + } + } + + // This is a leaf expression. + return expr->node_type() == TExprNodeType::SLOT_REF; + }; + for (auto it = _conjuncts.begin(); it != _conjuncts.end();) { auto& conjunct = *it; if (conjunct->root()) { @@ -417,12 +435,16 @@ Status VScanNode::_normalize_conjuncts() { RETURN_IF_ERROR(_normalize_predicate(conjunct->root(), conjunct.get(), new_root)); if (new_root) { conjunct->set_root(new_root); - if (_should_push_down_common_expr()) { + if (_should_push_down_common_expr() && + _conjunct_is_acting_on_a_slot(conjunct->root())) { + // We need to make sure conjunct is acting on a slot before push it down. + // Or it will not be executed by SegmentIterator::_vec_init_lazy_materialization _common_expr_ctxs_push_down.emplace_back(conjunct); it = _conjuncts.erase(it); continue; } - } else { // All conjuncts are pushed down as predicate column + } else { + // Whole conjunct is pushed down as predicate column _stale_expr_ctxs.emplace_back(conjunct); it = _conjuncts.erase(it); continue; @@ -430,6 +452,7 @@ Status VScanNode::_normalize_conjuncts() { } ++it; } + for (auto& it : _slot_id_to_value_range) { std::visit( [&](auto&& range) { diff --git a/regression-test/data/correctness/test_pushdown_common_expr.out b/regression-test/data/correctness/test_pushdown_common_expr.out index e50a453c5c34b7..3cc0b6d3a54279 100644 --- a/regression-test/data/correctness/test_pushdown_common_expr.out +++ b/regression-test/data/correctness/test_pushdown_common_expr.out @@ -22,6 +22,16 @@ 64 g gg 8 d dd +-- !5 -- +256 i ii + +-- !6 -- + +-- !7 -- +1024 k kk + +-- !8 -- + -- !1 -- 1 a aa 128 h hh @@ -47,3 +57,13 @@ 64 g gg 8 d dd +-- !5 -- +256 i ii + +-- !6 -- + +-- !7 -- +1024 k kk + +-- !8 -- + diff --git a/regression-test/suites/correctness/test_pushdown_common_expr.groovy b/regression-test/suites/correctness/test_pushdown_common_expr.groovy index 8819288e10306c..74d7e15f1aff9f 100644 --- a/regression-test/suites/correctness/test_pushdown_common_expr.groovy +++ b/regression-test/suites/correctness/test_pushdown_common_expr.groovy @@ -65,6 +65,38 @@ suite("test_pushdown_common_expr") { SELECT * FROM t_pushdown_common_expr WHERE c1 < 300 OR UPPER(c2)="F" OR c3 LIKE "%f%"; """ + // One conjunct. No push down. + order_qt_5 """ + SELECT * FROM t_pushdown_common_expr WHERE random() > 1 OR uuid_numeric() = 'A' OR c1 = 256 + """ + + // Two conjuncts. + // random() > 1 is executed by scan node, c1 > 0 is pushed down and executed by segment iterator + order_qt_6 """ + SELECT * FROM t_pushdown_common_expr WHERE random() > 1 AND c1 > 0 + """ + + // One conjunct. No push down. + order_qt_7 """ + SELECT * FROM t_pushdown_common_expr WHERE random() > 1 OR c1 > 1020 + """ + // t1: 512 & 1024 + // t2: all + // join gets nothing + order_qt_8 """ + SELECT * + FROM ( + SELECT c1 + FROM t_pushdown_common_expr + WHERE c1 >= 512 or random() > 1 + ) AS t1 + JOIN ( + SELECT c1 + FROM t_pushdown_common_expr + WHERE random() = 0 + ) AS t2 ON t1.c1 = t2.c1 + """ + sql """set enable_common_expr_pushdown=false""" order_qt_1 """ @@ -83,4 +115,29 @@ suite("test_pushdown_common_expr") { SELECT * FROM t_pushdown_common_expr WHERE c1 < 300 OR UPPER(c2)="K" OR c3 LIKE "%k%"; """ -} + order_qt_5 """ + SELECT * FROM t_pushdown_common_expr WHERE random() > 1 OR uuid_numeric() = 'A' OR c1 = 256 + """ + + order_qt_6 """ + SELECT * FROM t_pushdown_common_expr WHERE random() > 1 + """ + + order_qt_7 """ + SELECT * FROM t_pushdown_common_expr WHERE random() > 1 OR c1 > 1020 + """ + + order_qt_8 """ + SELECT * + FROM ( + SELECT c1 + FROM t_pushdown_common_expr + WHERE c1 >= 512 or random() > 1 + ) AS t1 + JOIN ( + SELECT c1 + FROM t_pushdown_common_expr + WHERE random() = 0 + ) AS t2 ON t1.c1 = t2.c1 + """ +} \ No newline at end of file From c646b21ecc963989c71df9aae7fe3a1b255444c0 Mon Sep 17 00:00:00 2001 From: zhiqiang-hhhh Date: Wed, 25 Oct 2023 17:27:37 +0800 Subject: [PATCH 2/3] format --- be/src/vec/exec/scan/vscan_node.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 95ef0d1cc02e0c..1cf640398928a9 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -427,7 +427,7 @@ Status VScanNode::_normalize_conjuncts() { // This is a leaf expression. return expr->node_type() == TExprNodeType::SLOT_REF; }; - + for (auto it = _conjuncts.begin(); it != _conjuncts.end();) { auto& conjunct = *it; if (conjunct->root()) { From e4072fd419027aa6de8dc46a945757c02bf52df8 Mon Sep 17 00:00:00 2001 From: zhiqiang-hhhh Date: Wed, 25 Oct 2023 19:39:03 +0800 Subject: [PATCH 3/3] CR --- be/src/vec/exec/scan/vscan_node.cpp | 19 +--------------- be/src/vec/exprs/vexpr.h | 16 ++++++++++++++ .../correctness/test_pushdown_common_expr.out | 8 +++++-- .../test_pushdown_common_expr.groovy | 22 ++++++++++++++----- 4 files changed, 39 insertions(+), 26 deletions(-) diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 1cf640398928a9..0791e2bdcd7014 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -411,23 +411,6 @@ Status VScanNode::_normalize_conjuncts() { } } - std::function _conjunct_is_acting_on_a_slot = - [&_conjunct_is_acting_on_a_slot](const VExprSPtr& expr) -> bool { - const auto& children = expr->children(); - const size_t children_size = children.size(); - - for (size_t i = 0; i < children_size; ++i) { - // If any child expr does not act on a column slot - // return false immediately. - if (!_conjunct_is_acting_on_a_slot(children[i])) { - return false; - } - } - - // This is a leaf expression. - return expr->node_type() == TExprNodeType::SLOT_REF; - }; - for (auto it = _conjuncts.begin(); it != _conjuncts.end();) { auto& conjunct = *it; if (conjunct->root()) { @@ -436,7 +419,7 @@ Status VScanNode::_normalize_conjuncts() { if (new_root) { conjunct->set_root(new_root); if (_should_push_down_common_expr() && - _conjunct_is_acting_on_a_slot(conjunct->root())) { + VExpr::is_acting_on_a_slot(conjunct->root())) { // We need to make sure conjunct is acting on a slot before push it down. // Or it will not be executed by SegmentIterator::_vec_init_lazy_materialization _common_expr_ctxs_push_down.emplace_back(conjunct); diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index ad5af0aa5bcd9d..eb915aff7e4040 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -75,6 +75,22 @@ class VExpr { return block->columns() - 1; } + static bool is_acting_on_a_slot(const VExprSPtr& expr) { + const auto& children = expr->children(); + const size_t children_size = children.size(); + + for (size_t i = 0; i < children_size; ++i) { + // If any child expr acts on a column slot + // return true immediately. + if (is_acting_on_a_slot(children[i])) { + return true; + } + } + + // This is a leaf expression. + return expr->node_type() == TExprNodeType::SLOT_REF; + } + VExpr(const TExprNode& node); VExpr(const VExpr& vexpr); VExpr(const TypeDescriptor& type, bool is_slotref, bool is_nullable); diff --git a/regression-test/data/correctness/test_pushdown_common_expr.out b/regression-test/data/correctness/test_pushdown_common_expr.out index 3cc0b6d3a54279..8da380e841b23a 100644 --- a/regression-test/data/correctness/test_pushdown_common_expr.out +++ b/regression-test/data/correctness/test_pushdown_common_expr.out @@ -28,9 +28,11 @@ -- !6 -- -- !7 -- -1024 k kk -- !8 -- +1024 k kk + +-- !9 -- -- !1 -- 1 a aa @@ -63,7 +65,9 @@ -- !6 -- -- !7 -- -1024 k kk -- !8 -- +1024 k kk + +-- !9 -- diff --git a/regression-test/suites/correctness/test_pushdown_common_expr.groovy b/regression-test/suites/correctness/test_pushdown_common_expr.groovy index 74d7e15f1aff9f..2395f057b1df4d 100644 --- a/regression-test/suites/correctness/test_pushdown_common_expr.groovy +++ b/regression-test/suites/correctness/test_pushdown_common_expr.groovy @@ -70,20 +70,26 @@ suite("test_pushdown_common_expr") { SELECT * FROM t_pushdown_common_expr WHERE random() > 1 OR uuid_numeric() = 'A' OR c1 = 256 """ + // One conjunct. No push down. + order_qt_6 """ + SELECT * FROM t_pushdown_common_expr WHERE random() > 1 + """ + // Two conjuncts. // random() > 1 is executed by scan node, c1 > 0 is pushed down and executed by segment iterator - order_qt_6 """ + order_qt_7 """ SELECT * FROM t_pushdown_common_expr WHERE random() > 1 AND c1 > 0 """ // One conjunct. No push down. - order_qt_7 """ + order_qt_8 """ SELECT * FROM t_pushdown_common_expr WHERE random() > 1 OR c1 > 1020 """ + // t1: 512 & 1024 // t2: all // join gets nothing - order_qt_8 """ + order_qt_9 """ SELECT * FROM ( SELECT c1 @@ -118,16 +124,20 @@ suite("test_pushdown_common_expr") { order_qt_5 """ SELECT * FROM t_pushdown_common_expr WHERE random() > 1 OR uuid_numeric() = 'A' OR c1 = 256 """ - + order_qt_6 """ SELECT * FROM t_pushdown_common_expr WHERE random() > 1 """ - + order_qt_7 """ - SELECT * FROM t_pushdown_common_expr WHERE random() > 1 OR c1 > 1020 + SELECT * FROM t_pushdown_common_expr WHERE random() > 1 AND c1 > 0 """ order_qt_8 """ + SELECT * FROM t_pushdown_common_expr WHERE random() > 1 OR c1 > 1020 + """ + + order_qt_9 """ SELECT * FROM ( SELECT c1