From ccea5badf725c888c4696d645ac575ab3ec6682a Mon Sep 17 00:00:00 2001 From: Socrates Date: Fri, 18 Oct 2024 17:15:01 +0800 Subject: [PATCH 1/5] fix filter_by_dict --- be/src/vec/exec/format/orc/vorc_reader.cpp | 2 +- be/src/vec/exec/format/parquet/vparquet_group_reader.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 7c05f934bd5c0a..a321a7152ac97a 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -2020,7 +2020,7 @@ bool OrcReader::_can_filter_by_dict(int slot_id) { // cannot work properly, such as is null, is not null, coalesce, etc. // Here we first disable dictionary filtering when predicate contains functions. // Implementation of NULL value dictionary filtering will be carried out later. - if (expr->node_type() == TExprNodeType::FUNCTION_CALL) { + if (expr->node_type() != TExprNodeType::SLOT_REF) { return false; } for (auto& child : expr->children()) { diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 37e82774c39ee4..21f5f8d02ef9ab 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -214,7 +214,7 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id, // cannot work properly, such as is null, is not null, coalesce, etc. // Here we first disable dictionary filtering when predicate contains functions. // Implementation of NULL value dictionary filtering will be carried out later. - if (expr->node_type() == TExprNodeType::FUNCTION_CALL) { + if (expr->node_type() != TExprNodeType::SLOT_REF) { return false; } for (auto& child : expr->children()) { From 550413031830e514ce4930d472a0a54038e4b743 Mon Sep 17 00:00:00 2001 From: Socrates Date: Fri, 18 Oct 2024 21:23:17 +0800 Subject: [PATCH 2/5] add regression case --- .../hive/test_string_dict_filter.out | 12 ++++++++++++ .../hive/test_string_dict_filter.groovy | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/regression-test/data/external_table_p0/hive/test_string_dict_filter.out b/regression-test/data/external_table_p0/hive/test_string_dict_filter.out index a14f225abe5d0b..03f7b385f1366f 100644 --- a/regression-test/data/external_table_p0/hive/test_string_dict_filter.out +++ b/regression-test/data/external_table_p0/hive/test_string_dict_filter.out @@ -56,6 +56,9 @@ null -- !q14 -- null +-- !q15 -- +null + -- !q01 -- 3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos 5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly @@ -113,6 +116,9 @@ null -- !q14 -- null +-- !q15 -- +null + -- !q01 -- 3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos 5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly @@ -170,6 +176,9 @@ null -- !q14 -- null +-- !q15 -- +null + -- !q01 -- 3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos 5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly @@ -227,3 +236,6 @@ null -- !q14 -- null +-- !q15 -- +null + diff --git a/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy b/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy index 82afc63042f249..d7724a03e5f3be 100644 --- a/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy +++ b/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy @@ -59,6 +59,9 @@ suite("test_string_dict_filter", "p0,external,hive,external_docker,external_dock qt_q14 """ select * from ( select COALESCE(o_orderpriority, 'null') AS o_orderpriority from test_string_dict_filter_parquet ) as A where o_orderpriority = 'null'; """ + qt_q15 """ + select * from ( select (case when o_orderpriority is null then 'null' else o_orderpriority end) case_col from test_string_dict_filter_parquet ) x where case_col = 'null'; + """ } def q_orc = { qt_q01 """ @@ -103,6 +106,9 @@ suite("test_string_dict_filter", "p0,external,hive,external_docker,external_dock qt_q14 """ select * from ( select COALESCE(o_orderpriority, 'null') AS o_orderpriority from test_string_dict_filter_orc ) as A where o_orderpriority = 'null'; """ + qt_q15 """ + select * from ( select (case when o_orderpriority is null then 'null' else o_orderpriority end) case_col from test_string_dict_filter_orc ) x where case_col = 'null'; + """ } String enabled = context.config.otherConfigs.get("enableHiveTest") if (enabled == null || !enabled.equalsIgnoreCase("true")) { From 510a031741644e8c88eaf7a27008a2f6dc451373 Mon Sep 17 00:00:00 2001 From: Socrates Date: Fri, 18 Oct 2024 21:25:44 +0800 Subject: [PATCH 3/5] fix --- be/src/vec/exec/format/orc/vorc_reader.cpp | 2 +- be/src/vec/exec/format/parquet/vparquet_group_reader.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index a321a7152ac97a..6671f79e3cfdd5 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -2018,7 +2018,7 @@ bool OrcReader::_can_filter_by_dict(int slot_id) { // the implementation of NULL values because the dictionary itself does not contain // NULL value encoding. As a result, many NULL-related functions or expressions // cannot work properly, such as is null, is not null, coalesce, etc. - // Here we first disable dictionary filtering when predicate contains functions. + // Here we first disable dictionary filtering when predicate expr is not slot. // Implementation of NULL value dictionary filtering will be carried out later. if (expr->node_type() != TExprNodeType::SLOT_REF) { return false; diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 21f5f8d02ef9ab..b9259be936bb31 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -212,7 +212,7 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id, // the implementation of NULL values because the dictionary itself does not contain // NULL value encoding. As a result, many NULL-related functions or expressions // cannot work properly, such as is null, is not null, coalesce, etc. - // Here we first disable dictionary filtering when predicate contains functions. + // Here we first disable dictionary filtering when predicate is not slot. // Implementation of NULL value dictionary filtering will be carried out later. if (expr->node_type() != TExprNodeType::SLOT_REF) { return false; From ef65c9f98fbf4da39b7696da54a6501217df5404 Mon Sep 17 00:00:00 2001 From: Socrates Date: Sat, 19 Oct 2024 00:27:52 +0800 Subject: [PATCH 4/5] fix regression --- .../external_table_p0/hive/test_string_dict_filter.out | 8 ++++---- .../external_table_p0/hive/test_string_dict_filter.groovy | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/regression-test/data/external_table_p0/hive/test_string_dict_filter.out b/regression-test/data/external_table_p0/hive/test_string_dict_filter.out index 03f7b385f1366f..2a8cebd872315e 100644 --- a/regression-test/data/external_table_p0/hive/test_string_dict_filter.out +++ b/regression-test/data/external_table_p0/hive/test_string_dict_filter.out @@ -57,7 +57,7 @@ null null -- !q15 -- -null +5 -- !q01 -- 3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos @@ -117,7 +117,7 @@ null null -- !q15 -- -null +5 -- !q01 -- 3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos @@ -177,7 +177,7 @@ null null -- !q15 -- -null +5 -- !q01 -- 3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos @@ -237,5 +237,5 @@ null null -- !q15 -- -null +5 diff --git a/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy b/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy index d7724a03e5f3be..8cb46ab3025ec7 100644 --- a/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy +++ b/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy @@ -60,7 +60,7 @@ suite("test_string_dict_filter", "p0,external,hive,external_docker,external_dock select * from ( select COALESCE(o_orderpriority, 'null') AS o_orderpriority from test_string_dict_filter_parquet ) as A where o_orderpriority = 'null'; """ qt_q15 """ - select * from ( select (case when o_orderpriority is null then 'null' else o_orderpriority end) case_col from test_string_dict_filter_parquet ) x where case_col = 'null'; + select count(o_orderpriority) from ( select (case when o_orderpriority = 'x' then '1' else '0' end) as o_orderpriority from test_string_dict_filter_parquet ) as A where o_orderpriority = '0'; """ } def q_orc = { @@ -107,7 +107,7 @@ suite("test_string_dict_filter", "p0,external,hive,external_docker,external_dock select * from ( select COALESCE(o_orderpriority, 'null') AS o_orderpriority from test_string_dict_filter_orc ) as A where o_orderpriority = 'null'; """ qt_q15 """ - select * from ( select (case when o_orderpriority is null then 'null' else o_orderpriority end) case_col from test_string_dict_filter_orc ) x where case_col = 'null'; + select count(o_orderpriority) from ( select (case when o_orderpriority = 'x' then '1' else '0' end) as o_orderpriority from test_string_dict_filter_orc ) as A where o_orderpriority = '0'; """ } String enabled = context.config.otherConfigs.get("enableHiveTest") From 1a564aa0ad4867b184b9ef477e4cb18a8c4dbf1e Mon Sep 17 00:00:00 2001 From: Socrates Date: Sat, 19 Oct 2024 00:36:17 +0800 Subject: [PATCH 5/5] fix --- .../external_table_p0/hive/test_string_dict_filter.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy b/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy index 8cb46ab3025ec7..1929c813c554ac 100644 --- a/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy +++ b/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy @@ -60,7 +60,7 @@ suite("test_string_dict_filter", "p0,external,hive,external_docker,external_dock select * from ( select COALESCE(o_orderpriority, 'null') AS o_orderpriority from test_string_dict_filter_parquet ) as A where o_orderpriority = 'null'; """ qt_q15 """ - select count(o_orderpriority) from ( select (case when o_orderpriority = 'x' then '1' else '0' end) as o_orderpriority from test_string_dict_filter_parquet ) as A where o_orderpriority = '0'; + select count(o_orderpriority) from ( select (case when o_orderpriority = 'x' then '1' when o_orderpriority = 'y' then '2' else '0' end) as o_orderpriority from test_string_dict_filter_parquet ) as A where o_orderpriority = '0'; """ } def q_orc = { @@ -107,7 +107,7 @@ suite("test_string_dict_filter", "p0,external,hive,external_docker,external_dock select * from ( select COALESCE(o_orderpriority, 'null') AS o_orderpriority from test_string_dict_filter_orc ) as A where o_orderpriority = 'null'; """ qt_q15 """ - select count(o_orderpriority) from ( select (case when o_orderpriority = 'x' then '1' else '0' end) as o_orderpriority from test_string_dict_filter_orc ) as A where o_orderpriority = '0'; + select count(o_orderpriority) from ( select (case when o_orderpriority = 'x' then '1' when o_orderpriority = 'y' then '2' else '0' end) as o_orderpriority from test_string_dict_filter_orc ) as A where o_orderpriority = '0'; """ } String enabled = context.config.otherConfigs.get("enableHiveTest")