From 7b549c4346bd79103a782340e27546eee7be4d56 Mon Sep 17 00:00:00 2001 From: minghong Date: Fri, 8 Nov 2024 15:20:19 +0800 Subject: [PATCH 1/3] add regression case: disable join reorder if there are invalid stats --- .../data/nereids_p0/stats/invalid_stats.out | 46 ++++++ .../nereids_p0/stats/invalid_stats.groovy | 132 +++++++----------- 2 files changed, 95 insertions(+), 83 deletions(-) create mode 100644 regression-test/data/nereids_p0/stats/invalid_stats.out diff --git a/regression-test/data/nereids_p0/stats/invalid_stats.out b/regression-test/data/nereids_p0/stats/invalid_stats.out new file mode 100644 index 00000000000000..1c9cd8b2f94592 --- /dev/null +++ b/regression-test/data/nereids_p0/stats/invalid_stats.out @@ -0,0 +1,46 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !reorder_1 -- +PhysicalResultSink +--PhysicalProject +----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +------PhysicalProject +--------PhysicalOlapScan[nation] +------PhysicalProject +--------PhysicalOlapScan[region] + +-- !ndv_min_max_invalid -- +PhysicalResultSink +--PhysicalProject +----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +------PhysicalProject +--------PhysicalOlapScan[region] +------PhysicalProject +--------PhysicalOlapScan[nation] + +-- !reorder_2 -- +PhysicalResultSink +--PhysicalProject +----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +------PhysicalProject +--------PhysicalOlapScan[nation] +------PhysicalProject +--------PhysicalOlapScan[region] + +-- !order_3 -- +PhysicalResultSink +--PhysicalProject +----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +------PhysicalProject +--------PhysicalOlapScan[nation] +------PhysicalProject +--------PhysicalOlapScan[region] + +-- !ndv_row_invalid -- +PhysicalResultSink +--PhysicalProject +----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +------PhysicalProject +--------PhysicalOlapScan[region] +------PhysicalProject +--------PhysicalOlapScan[nation] + diff --git a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy index 5304cd8c2c1fa9..042b591e0d088d 100644 --- a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy +++ b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy @@ -16,96 +16,62 @@ // under the License. suite("invalid_stats") { - // multi_sql """ - // set global enable_auto_analyze=false; - // SET enable_nereids_planner=true; - // SET enable_fallback_to_original_planner=false; - // set disable_nereids_rules=PRUNE_EMPTY_PARTITION; + multi_sql """ + set global enable_auto_analyze=false; + SET enable_nereids_planner=true; + SET enable_fallback_to_original_planner=false; + set disable_nereids_rules=PRUNE_EMPTY_PARTITION; - // drop table if exists region; - // CREATE TABLE region ( - // r_regionkey int NOT NULL, - // r_name VARCHAR(25) NOT NULL, - // r_comment VARCHAR(152) - // )ENGINE=OLAP - // DUPLICATE KEY(`r_regionkey`) - // COMMENT "OLAP" - // DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 - // PROPERTIES ( - // "replication_num" = "1" - // ); + drop table if exists region; + CREATE TABLE region ( + r_regionkey int NOT NULL, + r_name VARCHAR(25) NOT NULL, + r_comment VARCHAR(152) + )ENGINE=OLAP + DUPLICATE KEY(`r_regionkey`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); - // drop table if exists nation; - // CREATE TABLE `nation` ( - // `n_nationkey` int(11) NOT NULL, - // `n_name` varchar(25) NOT NULL, - // `n_regionkey` int(11) NOT NULL, - // `n_comment` varchar(152) NULL - // ) ENGINE=OLAP - // DUPLICATE KEY(`N_NATIONKEY`) - // COMMENT "OLAP" - // DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1 - // PROPERTIES ( - // "replication_num" = "1" - // ); - // alter table nation modify column n_nationkey set stats ('ndv'='25', 'num_nulls'='0', 'min_value'='0', 'max_value'='24', 'row_count'='25'); + drop table if exists nation; + CREATE TABLE `nation` ( + `n_nationkey` int(11) NOT NULL, + `n_name` varchar(25) NOT NULL, + `n_regionkey` int(11) NOT NULL, + `n_comment` varchar(152) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`N_NATIONKEY`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + alter table nation modify column n_nationkey set stats ('ndv'='25', 'num_nulls'='0', 'min_value'='0', 'max_value'='24', 'row_count'='25'); - // alter table nation modify column n_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='25'); + alter table region modify column r_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5'); - // """ + """ - // explain { - // sql "select * from region" - // notContains("join reorder with unknown column statistics") - // } - - // explain { - // sql "select * from region where r_regionkey=1" - // contains("join reorder with unknown column statistics") - // } - - // explain { - // sql "select r_regionkey from region group by r_regionkey" - // contains("join reorder with unknown column statistics") - // } - - // explain { - // sql "select r_regionkey from region join nation on r_regionkey=n_regionkey" - // contains("join reorder with unknown column statistics") - // } - - // sql "alter table region modify column r_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');" + qt_reorder_1 "explain shape plan select r_regionkey from region join nation on r_regionkey=n_regionkey" + + sql "alter table region modify column r_regionkey set stats ('ndv'='0', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='0');" - // explain { - // sql "select * from region where r_regionkey=1" - // notContains("join reorder with unknown column statistics") - // } - - // explain { - // sql "select r_regionkey from region group by r_regionkey" - // notContains("join reorder with unknown column statistics") - // } - - // explain { - // sql "select r_regionkey from region join nation on r_regionkey=n_regionkey" - // notContains("join reorder with unknown column statistics") - // } + // r_regionkey stats invalid: ndv=0, but min or max is not null + qt_ndv_min_max_invalid "explain shape plan select r_regionkey from region join nation on r_regionkey=n_regionkey" + + // inject normal stats and check join order is nation-region + sql "alter table region modify column r_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');" + + qt_reorder_2 "explain shape plan select r_regionkey from region join nation on r_regionkey=n_regionkey" - // explain { - // sql "select r_name from region join nation on r_regionkey=n_regionkey" - // notContains("join reorder with unknown column statistics") - // } + // r_regionkey stats invalid: ndv > 10*row + sql "alter table region modify column r_regionkey set stats ('ndv'='10', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1');" + qt_order_3 "explain shape plan select r_regionkey from region join nation on r_regionkey=n_regionkey" + + sql "alter table region modify column r_regionkey set stats ('ndv'='11', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1');" + qt_ndv_row_invalid "explain shape plan select r_regionkey from region join nation on r_regionkey=n_regionkey" - // explain { - // sql """ - // select r_name - // from (select r_name, r_regionkey + 1 x from region) T join nation on T.x=n_regionkey - // """ - // notContains("join reorder with unknown column statistics") - // } } -// disable jo: alter table region modify column r_regionkey set stats ('ndv'='0', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='0'); -// disable jo: alter table region modify column r_regionkey set stats ('ndv'='11', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1'); - -// alter table region modify column r_regionkey set stats ('ndv'='10', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1'); From 30862fcf91b9804e982975903185ba57ae60bde9 Mon Sep 17 00:00:00 2001 From: minghong Date: Wed, 13 Nov 2024 10:26:02 +0800 Subject: [PATCH 2/3] update --- .../data/nereids_p0/stats/invalid_stats.out | 45 +++++++------------ .../nereids_p0/stats/invalid_stats.groovy | 2 +- 2 files changed, 16 insertions(+), 31 deletions(-) diff --git a/regression-test/data/nereids_p0/stats/invalid_stats.out b/regression-test/data/nereids_p0/stats/invalid_stats.out index 1c9cd8b2f94592..9b1b2e2aa97528 100644 --- a/regression-test/data/nereids_p0/stats/invalid_stats.out +++ b/regression-test/data/nereids_p0/stats/invalid_stats.out @@ -1,46 +1,31 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !reorder_1 -- PhysicalResultSink ---PhysicalProject -----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() -------PhysicalProject ---------PhysicalOlapScan[nation] -------PhysicalProject ---------PhysicalOlapScan[region] +--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +----PhysicalOlapScan[nation] +----PhysicalOlapScan[region] -- !ndv_min_max_invalid -- PhysicalResultSink ---PhysicalProject -----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() -------PhysicalProject ---------PhysicalOlapScan[region] -------PhysicalProject ---------PhysicalOlapScan[nation] +--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +----PhysicalOlapScan[region] +----PhysicalOlapScan[nation] -- !reorder_2 -- PhysicalResultSink ---PhysicalProject -----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() -------PhysicalProject ---------PhysicalOlapScan[nation] -------PhysicalProject ---------PhysicalOlapScan[region] +--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +----PhysicalOlapScan[nation] +----PhysicalOlapScan[region] -- !order_3 -- PhysicalResultSink ---PhysicalProject -----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() -------PhysicalProject ---------PhysicalOlapScan[nation] -------PhysicalProject ---------PhysicalOlapScan[region] +--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +----PhysicalOlapScan[nation] +----PhysicalOlapScan[region] -- !ndv_row_invalid -- PhysicalResultSink ---PhysicalProject -----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() -------PhysicalProject ---------PhysicalOlapScan[region] -------PhysicalProject ---------PhysicalOlapScan[nation] +--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +----PhysicalOlapScan[region] +----PhysicalOlapScan[nation] diff --git a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy index 042b591e0d088d..51f1a37f118a42 100644 --- a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy +++ b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy @@ -21,7 +21,7 @@ suite("invalid_stats") { SET enable_nereids_planner=true; SET enable_fallback_to_original_planner=false; set disable_nereids_rules=PRUNE_EMPTY_PARTITION; - + set ignore_shape_nodes=PhysicalProject; drop table if exists region; CREATE TABLE region ( From a6b0f9388cfbd28e1fe57e34f412b6c3facab579 Mon Sep 17 00:00:00 2001 From: minghong Date: Wed, 27 Nov 2024 11:09:48 +0800 Subject: [PATCH 3/3] add case --- .../stats/invalid_stats/invalid_stats.out | 31 +++++++++++++++++++ .../stats/{ => col_stats}/column_stats.groovy | 0 .../{ => invalid_stats}/invalid_stats.groovy | 0 3 files changed, 31 insertions(+) create mode 100644 regression-test/data/nereids_p0/stats/invalid_stats/invalid_stats.out rename regression-test/suites/nereids_p0/stats/{ => col_stats}/column_stats.groovy (100%) rename regression-test/suites/nereids_p0/stats/{ => invalid_stats}/invalid_stats.groovy (100%) diff --git a/regression-test/data/nereids_p0/stats/invalid_stats/invalid_stats.out b/regression-test/data/nereids_p0/stats/invalid_stats/invalid_stats.out new file mode 100644 index 00000000000000..9b1b2e2aa97528 --- /dev/null +++ b/regression-test/data/nereids_p0/stats/invalid_stats/invalid_stats.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !reorder_1 -- +PhysicalResultSink +--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +----PhysicalOlapScan[nation] +----PhysicalOlapScan[region] + +-- !ndv_min_max_invalid -- +PhysicalResultSink +--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +----PhysicalOlapScan[region] +----PhysicalOlapScan[nation] + +-- !reorder_2 -- +PhysicalResultSink +--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +----PhysicalOlapScan[nation] +----PhysicalOlapScan[region] + +-- !order_3 -- +PhysicalResultSink +--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +----PhysicalOlapScan[nation] +----PhysicalOlapScan[region] + +-- !ndv_row_invalid -- +PhysicalResultSink +--hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +----PhysicalOlapScan[region] +----PhysicalOlapScan[nation] + diff --git a/regression-test/suites/nereids_p0/stats/column_stats.groovy b/regression-test/suites/nereids_p0/stats/col_stats/column_stats.groovy similarity index 100% rename from regression-test/suites/nereids_p0/stats/column_stats.groovy rename to regression-test/suites/nereids_p0/stats/col_stats/column_stats.groovy diff --git a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy b/regression-test/suites/nereids_p0/stats/invalid_stats/invalid_stats.groovy similarity index 100% rename from regression-test/suites/nereids_p0/stats/invalid_stats.groovy rename to regression-test/suites/nereids_p0/stats/invalid_stats/invalid_stats.groovy