From 2ec931675b91999cd001e3a564d257ce6e51ac2b Mon Sep 17 00:00:00 2001 From: Socrates Date: Thu, 8 May 2025 00:24:08 +0800 Subject: [PATCH 1/3] fix --- be/src/vec/exec/format/orc/vorc_reader.cpp | 23 ++++-------- .../format/parquet/vparquet_group_reader.cpp | 37 ++++++------------- 2 files changed, 20 insertions(+), 40 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 9298a665f54f03..7a99b2e8327544 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -2360,22 +2360,15 @@ bool OrcReader::_can_filter_by_dict(int slot_id) { return false; } - std::function visit_function_call = [&](const VExpr* expr) { - // TODO: The current implementation of dictionary filtering does not take into account - // the implementation of NULL values because the dictionary itself does not contain - // NULL value encoding. As a result, many NULL-related functions or expressions - // cannot work properly, such as is null, is not null, coalesce, etc. - // Here we first disable dictionary filtering when predicate expr is not slot. - // Implementation of NULL value dictionary filtering will be carried out later. - if (expr->node_type() != TExprNodeType::SLOT_REF) { - return false; - } - return std::ranges::all_of(expr->children(), [&](const auto& child) { - return visit_function_call(child.get()); - }); - }; + // TODO: The current implementation of dictionary filtering does not take into account + // the implementation of NULL values because the dictionary itself does not contain + // NULL value encoding. As a result, many NULL-related functions or expressions + // cannot work properly, such as is null, is not null, coalesce, etc. + // Here we check if the predicate expr is IN or BINARY_PRED. + // Implementation of NULL value dictionary filtering will be carried out later. return std::ranges::all_of(_slot_id_to_filter_conjuncts->at(slot_id), [&](const auto& ctx) { - return visit_function_call(ctx->root().get()); + return ctx->root()->node_type() == TExprNodeType::IN_PRED || + ctx->root()->node_type() == TExprNodeType::BINARY_PRED; }); } diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 0a016afd6cf78a..93e007a63402dc 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -201,37 +201,24 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id, return false; } - if (_slot_id_to_filter_conjuncts->find(slot_id) == _slot_id_to_filter_conjuncts->end()) { + if (!is_dictionary_encoded(column_metadata)) { return false; } - if (!is_dictionary_encoded(column_metadata)) { + if (_slot_id_to_filter_conjuncts->find(slot_id) == _slot_id_to_filter_conjuncts->end()) { return false; } - std::function visit_function_call = [&](const VExpr* expr) { - // TODO: The current implementation of dictionary filtering does not take into account - // the implementation of NULL values because the dictionary itself does not contain - // NULL value encoding. As a result, many NULL-related functions or expressions - // cannot work properly, such as is null, is not null, coalesce, etc. - // Here we first disable dictionary filtering when predicate is not slot. - // Implementation of NULL value dictionary filtering will be carried out later. - if (expr->node_type() != TExprNodeType::SLOT_REF) { - return false; - } - for (auto& child : expr->children()) { - if (!visit_function_call(child.get())) { - return false; - } - } - return true; - }; - for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { - if (!visit_function_call(ctx->root().get())) { - return false; - } - } - return true; + // TODO: The current implementation of dictionary filtering does not take into account + // the implementation of NULL values because the dictionary itself does not contain + // NULL value encoding. As a result, many NULL-related functions or expressions + // cannot work properly, such as is null, is not null, coalesce, etc. + // Here we check if the predicate expr is IN or BINARY_PRED. + // Implementation of NULL value dictionary filtering will be carried out later. + return std::ranges::all_of(_slot_id_to_filter_conjuncts->at(slot_id), [&](const auto& ctx) { + return ctx->root()->node_type() == TExprNodeType::IN_PRED || + ctx->root()->node_type() == TExprNodeType::BINARY_PRED; + }); } // This function is copied from From a046364b577a16bbc502ef4db24832e34e2fd9f1 Mon Sep 17 00:00:00 2001 From: Socrates Date: Thu, 15 May 2025 23:29:05 +0800 Subject: [PATCH 2/3] fix --- be/src/vec/exec/format/orc/vorc_reader.cpp | 5 +++-- be/src/vec/exec/format/parquet/vparquet_group_reader.cpp | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 7a99b2e8327544..b121ac306361f3 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -2367,8 +2367,9 @@ bool OrcReader::_can_filter_by_dict(int slot_id) { // Here we check if the predicate expr is IN or BINARY_PRED. // Implementation of NULL value dictionary filtering will be carried out later. return std::ranges::all_of(_slot_id_to_filter_conjuncts->at(slot_id), [&](const auto& ctx) { - return ctx->root()->node_type() == TExprNodeType::IN_PRED || - ctx->root()->node_type() == TExprNodeType::BINARY_PRED; + return (ctx->root()->node_type() == TExprNodeType::IN_PRED || + ctx->root()->node_type() == TExprNodeType::BINARY_PRED) && + ctx->root()->children()[0]->node_type() == TExprNodeType::SLOT_REF; }); } diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 93e007a63402dc..020b392eedc401 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -216,8 +216,9 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id, // Here we check if the predicate expr is IN or BINARY_PRED. // Implementation of NULL value dictionary filtering will be carried out later. return std::ranges::all_of(_slot_id_to_filter_conjuncts->at(slot_id), [&](const auto& ctx) { - return ctx->root()->node_type() == TExprNodeType::IN_PRED || - ctx->root()->node_type() == TExprNodeType::BINARY_PRED; + return (ctx->root()->node_type() == TExprNodeType::IN_PRED || + ctx->root()->node_type() == TExprNodeType::BINARY_PRED) && + ctx->root()->children()[0]->node_type() == TExprNodeType::SLOT_REF; }); } From 10aedd458677081d90d865859c7a330c65b511c4 Mon Sep 17 00:00:00 2001 From: Socrates Date: Fri, 16 May 2025 11:57:12 +0800 Subject: [PATCH 3/3] add cases --- .../hive/test_string_dict_filter.out | 120 ++++++++++++++++++ .../hive/test_string_dict_filter.groovy | 36 ++++++ 2 files changed, 156 insertions(+) diff --git a/regression-test/data/external_table_p0/hive/test_string_dict_filter.out b/regression-test/data/external_table_p0/hive/test_string_dict_filter.out index 2a8cebd872315e..97b1fb2ff4c761 100644 --- a/regression-test/data/external_table_p0/hive/test_string_dict_filter.out +++ b/regression-test/data/external_table_p0/hive/test_string_dict_filter.out @@ -59,6 +59,36 @@ null -- !q15 -- 5 +-- !q16 -- +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q17 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro + +-- !q18 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q19 -- +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro + +-- !q20 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q21 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + -- !q01 -- 3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos 5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly @@ -119,6 +149,36 @@ null -- !q15 -- 5 +-- !q16 -- +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q17 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro + +-- !q18 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q19 -- +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro + +-- !q20 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q21 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + -- !q01 -- 3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos 5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly @@ -179,6 +239,36 @@ null -- !q15 -- 5 +-- !q16 -- +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q17 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro + +-- !q18 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q19 -- +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro + +-- !q20 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q21 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + -- !q01 -- 3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos 5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly @@ -239,3 +329,33 @@ null -- !q15 -- 5 +-- !q16 -- +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q17 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro + +-- !q18 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q19 -- +4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro + +-- !q20 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + +-- !q21 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly + diff --git a/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy b/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy index 1929c813c554ac..18e62570ad70b0 100644 --- a/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy +++ b/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy @@ -62,6 +62,24 @@ suite("test_string_dict_filter", "p0,external,hive,external_docker,external_dock qt_q15 """ select count(o_orderpriority) from ( select (case when o_orderpriority = 'x' then '1' when o_orderpriority = 'y' then '2' else '0' end) as o_orderpriority from test_string_dict_filter_parquet ) as A where o_orderpriority = '0'; """ + qt_q16 """ + select * from test_string_dict_filter_parquet where cast(o_orderstatus as string) = 'F'; + """ + qt_q17 """ + select * from test_string_dict_filter_parquet where cast(o_orderstatus as string) = 'O'; + """ + qt_q18 """ + select * from test_string_dict_filter_parquet where cast(o_orderstatus as string) in ('O', 'F'); + """ + qt_q19 """ + select * from test_string_dict_filter_parquet where cast(o_orderpriority as string) is null; + """ + qt_q20 """ + select * from test_string_dict_filter_parquet where cast(o_orderpriority as string) is not null; + """ + qt_q21 """ + select * from test_string_dict_filter_parquet where cast(o_orderpriority as string) in ('5-LOW', NULL); + """ } def q_orc = { qt_q01 """ @@ -109,6 +127,24 @@ suite("test_string_dict_filter", "p0,external,hive,external_docker,external_dock qt_q15 """ select count(o_orderpriority) from ( select (case when o_orderpriority = 'x' then '1' when o_orderpriority = 'y' then '2' else '0' end) as o_orderpriority from test_string_dict_filter_orc ) as A where o_orderpriority = '0'; """ + qt_q16 """ + select * from test_string_dict_filter_orc where cast(o_orderstatus as string) = 'F'; + """ + qt_q17 """ + select * from test_string_dict_filter_orc where cast(o_orderstatus as string) = 'O'; + """ + qt_q18 """ + select * from test_string_dict_filter_orc where cast(o_orderstatus as string) in ('O', 'F'); + """ + qt_q19 """ + select * from test_string_dict_filter_orc where cast(o_orderpriority as string) is null; + """ + qt_q20 """ + select * from test_string_dict_filter_orc where cast(o_orderpriority as string) is not null; + """ + qt_q21 """ + select * from test_string_dict_filter_orc where cast(o_orderpriority as string) in ('5-LOW', NULL); + """ } String enabled = context.config.otherConfigs.get("enableHiveTest") if (enabled == null || !enabled.equalsIgnoreCase("true")) {