diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b6dd832ff53..39252c4bffb 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1339,6 +1339,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse, if (hasResultRTEs) remove_useless_result_rtes(root); + parse = remove_distinct_sort_clause(parse); + /* * Do the main planning. */ diff --git a/src/backend/optimizer/plan/transform.c b/src/backend/optimizer/plan/transform.c index 1a76e24123b..1a8776c6ab7 100644 --- a/src/backend/optimizer/plan/transform.c +++ b/src/backend/optimizer/plan/transform.c @@ -20,6 +20,7 @@ #include "nodes/makefuncs.h" #include "optimizer/clauses.h" #include "optimizer/optimizer.h" +#include "optimizer/tlist.h" #include "optimizer/transform.h" #include "utils/lsyscache.h" #include "catalog/pg_proc.h" @@ -39,6 +40,7 @@ static SubLink *make_sirvf_subselect(FuncExpr *fe); static Query *make_sirvf_subquery(FuncExpr *fe); static bool safe_to_replace_sirvf_tle(Query *query); static bool safe_to_replace_sirvf_rte(Query *query); +static bool tlist_has_srf(Query *query); /** * Normalize query before planning. @@ -520,3 +522,87 @@ replace_sirvf_rte(Query *query, RangeTblEntry *rte) return rte; } + +/* + * Does target list have SRFs? + */ +static +bool tlist_has_srf(Query *query) +{ + if (query->hasTargetSRFs) + { + return true; + } + + if (expression_returns_set( (Node *) query->targetList)) + { + return true; + } + + return false; +} + +/* + * DISTINCT/DISTINCT ON/ORDER BY optimization. + * Remove DISTINCT clause if possibile, ex: + * select DISTINCT count(a) from t; to + * select count(a) from t; + * There is one row returned at most, DISTINCT and/or ON is pointless then. + * The same with ORDER BY clause; + */ +Query *remove_distinct_sort_clause(Query *parse) +{ + if (parse->hasAggs && + parse->groupClause == NIL && + !contain_mutable_functions((Node *) parse) && + !tlist_has_srf(parse)) + { + List *useless_tlist = NIL; + List *tles; + List *sortops; + List *eqops; + ListCell *lc; + + if (parse->distinctClause != NIL) + { + get_sortgroupclauses_tles(parse->distinctClause, parse->targetList, + &tles, &sortops, &eqops); + foreach(lc, tles) + { + TargetEntry *tle = lfirst(lc); + if (tle->resjunk) + useless_tlist = lappend(useless_tlist, tle); + } + parse->distinctClause = NIL; + if (parse->hasDistinctOn) + parse->hasDistinctOn = false; + } + + if (parse->sortClause != NIL) + { + + get_sortgroupclauses_tles(parse->sortClause, parse->targetList, + &tles, &sortops, &eqops); + foreach(lc, tles) + { + TargetEntry *tle = lfirst(lc); + /* + * For SELECT DISTINCT, ORDER BY expressions must appear in select list, + * Some tles may be already in the list. + */ + if (tle->resjunk) + useless_tlist = list_append_unique(useless_tlist, tle); + } + parse->sortClause = NIL; + } + + /* + * There is no groupClause, sortClause and distinctClause now . + * The junk TargetEntrys with ressortgroupref index are safe to be removed. + */ + if (useless_tlist != NIL) + parse->targetList = list_difference(parse->targetList, useless_tlist); + } + + return parse; +} diff --git a/src/include/optimizer/transform.h b/src/include/optimizer/transform.h index 85d32f35e0f..d53c393769c 100644 --- a/src/include/optimizer/transform.h +++ b/src/include/optimizer/transform.h @@ -21,4 +21,8 @@ extern Query *normalize_query(Query *query); +extern Query *remove_distinct_sort_clause(Query *query); + +extern bool query_has_srf(Query *query); + #endif /* TRANSFORM_H */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 62548a4c69c..b932b05dfb0 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1066,17 +1066,15 @@ explain (costs off) select distinct max(unique2) from tenk1; QUERY PLAN --------------------------------------------------------------------------- - HashAggregate - Group Key: $0 + Result InitPlan 1 (returns $0) (slice1) -> Limit -> Gather Motion 3:1 (slice2; segments: 3) Merge Key: tenk1.unique2 -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result Optimizer: Postgres query optimizer -(10 rows) +(8 rows) select distinct max(unique2) from tenk1; max @@ -1088,17 +1086,15 @@ explain (costs off) select max(unique2) from tenk1 order by 1; QUERY PLAN --------------------------------------------------------------------------- - Sort - Sort Key: ($0) + Result InitPlan 1 (returns $0) (slice1) -> Limit -> Gather Motion 3:1 (slice2; segments: 3) Merge Key: tenk1.unique2 -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result Optimizer: Postgres query optimizer -(10 rows) +(8 rows) select max(unique2) from tenk1 order by 1; max @@ -1110,17 +1106,15 @@ explain (costs off) select max(unique2) from tenk1 order by max(unique2); QUERY PLAN --------------------------------------------------------------------------- - Sort - Sort Key: ($0) + Result InitPlan 1 (returns $0) (slice1) -> Limit -> Gather Motion 3:1 (slice2; segments: 3) Merge Key: tenk1.unique2 -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result Optimizer: Postgres query optimizer -(10 rows) +(8 rows) select max(unique2) from tenk1 order by max(unique2); max @@ -1132,17 +1126,15 @@ explain (costs off) select max(unique2) from tenk1 order by max(unique2)+1; QUERY PLAN --------------------------------------------------------------------------- - Sort - Sort Key: (($0 + 1)) + Result InitPlan 1 (returns $0) (slice1) -> Limit -> Gather Motion 3:1 (slice2; segments: 3) Merge Key: tenk1.unique2 -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result Optimizer: Postgres query optimizer -(10 rows) +(8 rows) select max(unique2) from tenk1 order by max(unique2)+1; max @@ -1260,20 +1252,16 @@ explain (costs off) select distinct min(f1), max(f1) from minmaxtest; QUERY PLAN -------------------------------------------------------------- - Unique - Group Key: (min(minmaxtest.f1)), (max(minmaxtest.f1)) - -> Sort - Sort Key: (min(minmaxtest.f1)), (max(minmaxtest.f1)) - -> Finalize Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Append - -> Seq Scan on minmaxtest minmaxtest_1 - -> Seq Scan on minmaxtest1 minmaxtest_2 - -> Seq Scan on minmaxtest2 minmaxtest_3 - -> Seq Scan on minmaxtest3 minmaxtest_4 + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Append + -> Seq Scan on minmaxtest minmaxtest_1 + -> Seq Scan on minmaxtest1 minmaxtest_2 + -> Seq Scan on minmaxtest2 minmaxtest_3 + -> Seq Scan on minmaxtest3 minmaxtest_4 Optimizer: Postgres query optimizer -(13 rows) +(9 rows) select distinct min(f1), max(f1) from minmaxtest; min | max diff --git a/src/test/regress/expected/aggregates_optimizer.out b/src/test/regress/expected/aggregates_optimizer.out index 54c9fbe2e8f..959aef0c0b2 100644 --- a/src/test/regress/expected/aggregates_optimizer.out +++ b/src/test/regress/expected/aggregates_optimizer.out @@ -1267,22 +1267,18 @@ explain (costs off) select distinct min(f1), max(f1) from minmaxtest; INFO: GPORCA failed to produce a plan, falling back to planner DETAIL: Feature not supported: Inherited tables - QUERY PLAN --------------------------------------------------------------------------- - Unique - Group Key: (min(minmaxtest.f1)), (max(minmaxtest.f1)) - -> Sort - Sort Key: (min(minmaxtest.f1)), (max(minmaxtest.f1)) - -> Finalize Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Append - -> Seq Scan on minmaxtest minmaxtest_1 - -> Seq Scan on minmaxtest1 minmaxtest_2 - -> Seq Scan on minmaxtest2 minmaxtest_3 - -> Seq Scan on minmaxtest3 minmaxtest_4 + QUERY PLAN +-------------------------------------------------------------- + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Append + -> Seq Scan on minmaxtest minmaxtest_1 + -> Seq Scan on minmaxtest1 minmaxtest_2 + -> Seq Scan on minmaxtest2 minmaxtest_3 + -> Seq Scan on minmaxtest3 minmaxtest_4 Optimizer: Postgres query optimizer -(13 rows) +(9 rows) select distinct min(f1), max(f1) from minmaxtest; INFO: GPORCA failed to produce a plan, falling back to planner diff --git a/src/test/regress/expected/cbdb_parallel.out b/src/test/regress/expected/cbdb_parallel.out index 8935ce25f62..648259d3798 100644 --- a/src/test/regress/expected/cbdb_parallel.out +++ b/src/test/regress/expected/cbdb_parallel.out @@ -2407,22 +2407,16 @@ create table t1(c1 int) distributed by (c1); insert into t1 values(11), (12); analyze t1; explain(costs off, locus) select distinct min(c1), max(c1) from t1; - QUERY PLAN ------------------------------------------------------------- - Unique + QUERY PLAN +------------------------------------------------ + Aggregate Locus: Entry - Group Key: (min(c1)), (max(c1)) - -> Sort + -> Gather Motion 3:1 (slice1; segments: 3) Locus: Entry - Sort Key: (min(c1)), (max(c1)) - -> Aggregate - Locus: Entry - -> Gather Motion 3:1 (slice1; segments: 3) - Locus: Entry - -> Seq Scan on t1 - Locus: Hashed + -> Seq Scan on t1 + Locus: Hashed Optimizer: Postgres query optimizer -(13 rows) +(7 rows) abort; begin; diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 8780b357aad..9bdd3486d2b 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1705,26 +1705,24 @@ from tenk1 t1 join tenk1 t2 on t1.unique1 = t2.unique2 join tenk1 t3 on t2.unique1 = t3.unique1 order by count(*); - QUERY PLAN -------------------------------------------------------------------------------------------------- - Sort - Sort Key: (count(*)) - -> Finalize Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Hash Join - Hash Cond: (t2.unique2 = t1.unique1) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: t2.unique2 - -> Hash Join - Hash Cond: (t2.unique1 = t3.unique1) - -> Index Scan using tenk1_unique2 on tenk1 t2 - -> Hash - -> Index Only Scan using tenk1_unique1 on tenk1 t3 - -> Hash - -> Index Only Scan using tenk1_unique1 on tenk1 t1 + QUERY PLAN +------------------------------------------------------------------------------------------- + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Hash Join + Hash Cond: (t2.unique2 = t1.unique1) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t2.unique2 + -> Hash Join + Hash Cond: (t2.unique1 = t3.unique1) + -> Index Scan using tenk1_unique2 on tenk1 t2 + -> Hash + -> Index Only Scan using tenk1_unique1 on tenk1 t3 + -> Hash + -> Index Only Scan using tenk1_unique1 on tenk1 t1 Optimizer: Postgres query optimizer -(17 rows) +(15 rows) -- Parallel sort but with expression (correlated subquery) that -- is prohibited in parallel plans. diff --git a/src/test/regress/expected/incremental_sort_optimizer.out b/src/test/regress/expected/incremental_sort_optimizer.out index 0c82c1e2482..a2387b3bf67 100644 --- a/src/test/regress/expected/incremental_sort_optimizer.out +++ b/src/test/regress/expected/incremental_sort_optimizer.out @@ -1555,26 +1555,24 @@ from tenk1 t1 join tenk1 t2 on t1.unique1 = t2.unique2 join tenk1 t3 on t2.unique1 = t3.unique1 order by count(*); - QUERY PLAN -------------------------------------------------------------------------------------------------- - Sort - Sort Key: (count(*)) - -> Finalize Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Hash Join - Hash Cond: (t2.unique2 = t1.unique1) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: t2.unique2 - -> Hash Join - Hash Cond: (t2.unique1 = t3.unique1) - -> Index Scan using tenk1_unique2 on tenk1 t2 - -> Hash - -> Index Only Scan using tenk1_unique1 on tenk1 t3 - -> Hash - -> Index Only Scan using tenk1_unique1 on tenk1 t1 + QUERY PLAN +------------------------------------------------------------------------------------------- + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Hash Join + Hash Cond: (t2.unique2 = t1.unique1) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t2.unique2 + -> Hash Join + Hash Cond: (t2.unique1 = t3.unique1) + -> Index Scan using tenk1_unique2 on tenk1 t2 + -> Hash + -> Index Only Scan using tenk1_unique1 on tenk1 t3 + -> Hash + -> Index Only Scan using tenk1_unique1 on tenk1 t1 Optimizer: Postgres query optimizer -(18 rows) +(15 rows) -- Parallel sort but with expression (correlated subquery) that -- is prohibited in parallel plans. diff --git a/src/test/regress/expected/offload_entry_to_qe.out b/src/test/regress/expected/offload_entry_to_qe.out index 1c1d0a13a25..4b441e75572 100644 --- a/src/test/regress/expected/offload_entry_to_qe.out +++ b/src/test/regress/expected/offload_entry_to_qe.out @@ -90,26 +90,20 @@ explain (costs off, locus) select count(x) over () from tst group by x limit 1; (14 rows) explain (costs off, locus) select distinct min(x), max(x) from tst; - QUERY PLAN ------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) Locus: Entry - -> Unique + -> Finalize Aggregate Locus: SingleQE - Group Key: (min(x)), (max(x)) - -> Sort + -> Gather Motion 3:1 (slice2; segments: 3) Locus: SingleQE - Sort Key: (min(x)), (max(x)) - -> Finalize Aggregate - Locus: SingleQE - -> Gather Motion 3:1 (slice2; segments: 3) - Locus: SingleQE - -> Partial Aggregate - Locus: Hashed - -> Seq Scan on tst - Locus: Hashed + -> Partial Aggregate + Locus: Hashed + -> Seq Scan on tst + Locus: Hashed Optimizer: Postgres query optimizer -(17 rows) +(11 rows) reset enable_offload_entry_to_qe; -- compare results with GUC set to false select count(x) from tst where x > 1; diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index bcfa5237e29..f463e61ef0d 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -534,3 +534,214 @@ DROP TABLE capitals; DROP TABLE cities; set gp_statistics_pullup_from_child_partition to off; -- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on +create table t_distinct_sort(a int, b int, c int); +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +analyze t_distinct_sort; +explain(verbose, costs off) +select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(a), sum(b) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b) + -> Seq Scan on public.t_distinct_sort + Output: a, b, c + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(a), sum(b) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b) + -> Seq Scan on public.t_distinct_sort + Output: a, b, c + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(a), sum(b) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b) + -> Seq Scan on public.t_distinct_sort + Output: a, b, c + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select distinct count(a), sum(b) from t_distinct_sort ; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(a), sum(b) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b) + -> Seq Scan on public.t_distinct_sort + Output: a, b, c + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select distinct count(a), sum(b) from t_distinct_sort ; + count | sum +-------+----- + 30 | 195 +(1 row) + +-- should keep distinct clause +explain(verbose, costs off) +select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- + Unique + Output: (count(a)), (sum(b)), (count(random())) + Group Key: (count(random())) + -> Sort + Output: (count(a)), (sum(b)), (count(random())) + Sort Key: (count(random())) + -> Finalize Aggregate + Output: count(a), sum(b), count(random()) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL count(random())) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL count(random()) + -> Seq Scan on public.t_distinct_sort + Output: a, b, c + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(16 rows) + +select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( +0, 2) from t_distinct_sort)as xx; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(t_distinct_sort.a) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(t_distinct_sort.a)) + -> Partial Aggregate + Output: PARTIAL count(t_distinct_sort.a) + -> Nested Loop + Output: t_distinct_sort.a + -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Subquery Scan on xx + -> Unique + Output: (count(*)), (generate_series(0, 2)) + Group Key: (count(*)), (generate_series(0, 2)) + -> Sort + Output: (count(*)), (generate_series(0, 2)) + Sort Key: (count(*)), (generate_series(0, 2)) + -> ProjectSet + Output: (count(*)), generate_series(0, 2) + -> Finalize Aggregate + Output: count(*) + -> Gather Motion 3:1 (slice3; segments: 3) + Output: (PARTIAL count(*)) + -> Partial Aggregate + Output: PARTIAL count(*) + -> Seq Scan on public.t_distinct_sort t_distinct_sort_1 + Output: t_distinct_sort_1.a, t_distinct_sort_1.b, t_distinct_sort_1.c + -> Materialize + Output: t_distinct_sort.a + -> Seq Scan on public.t_distinct_sort + Output: t_distinct_sort.a + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(32 rows) + +select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( +0, 2) from t_distinct_sort)as xx; + count +------- + 90 +(1 row) + +drop table t_distinct_sort; +explain(verbose, costs off) +select distinct(count(a)) from generate_series(0, 1) as a; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- + Aggregate + Output: count(a) + -> Function Scan on pg_catalog.generate_series a + Output: a + Function Call: generate_series(0, 1) + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +select distinct(count(a)) from generate_series(0, 1) as a; + count +------- + 2 +(1 row) + +explain(verbose, costs off) +select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- + Aggregate + Output: count(*) + -> Nested Loop + -> Function Scan on pg_catalog.generate_series a + Output: a.a + Function Call: generate_series(0, 1) + -> Function Scan on pg_catalog.generate_series b + Output: b.b + Function Call: generate_series(0, 2) + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(11 rows) + +select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; + count +------- + 6 +(1 row) + diff --git a/src/test/regress/expected/select_distinct_optimizer.out b/src/test/regress/expected/select_distinct_optimizer.out index 91b868818c9..702bb6c23fa 100644 --- a/src/test/regress/expected/select_distinct_optimizer.out +++ b/src/test/regress/expected/select_distinct_optimizer.out @@ -537,3 +537,216 @@ DROP TABLE capitals; DROP TABLE cities; set gp_statistics_pullup_from_child_partition to off; -- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on +create table t_distinct_sort(a int, b int, c int); +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +analyze t_distinct_sort; +explain(verbose, costs off) +select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Sort + Output: (count(a)), (sum(b)) + Sort Key: (sum(t_distinct_sort.b)), (count(t_distinct_sort.a)) + -> Finalize Aggregate + Output: count(a), sum(b) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b) + -> Seq Scan on public.t_distinct_sort + Output: a, b + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on' + Optimizer: Pivotal Optimizer (GPORCA) +(13 rows) + +select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(a), sum(b) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b) + -> Seq Scan on public.t_distinct_sort + Output: a, b, c + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on' + Optimizer: Postgres query optimizer +(10 rows) + +select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Result + Output: (count(a)), (sum(b)) + -> Sort + Output: (count(a)), (sum(b)), (sum(a)), (count(c)) + Sort Key: (sum(t_distinct_sort.a)), (count(t_distinct_sort.c)) + -> Finalize Aggregate + Output: count(a), sum(b), sum(a), count(c) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL sum(a)), (PARTIAL count(c)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL sum(a), PARTIAL count(c) + -> Seq Scan on public.t_distinct_sort + Output: a, b, c + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on' + Optimizer: Pivotal Optimizer (GPORCA) +(15 rows) + +select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select distinct count(a), sum(b) from t_distinct_sort ; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(a), sum(b) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b) + -> Seq Scan on public.t_distinct_sort + Output: a, b + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on' + Optimizer: Pivotal Optimizer (GPORCA) +(10 rows) + +select distinct count(a), sum(b) from t_distinct_sort ; + count | sum +-------+----- + 30 | 195 +(1 row) + +-- should keep distinct clause +explain(verbose, costs off) +select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Unique + Output: (count(a)), (sum(b)), (count(random())) + Group Key: (count(random())) + -> Sort + Output: (count(a)), (sum(b)), (count(random())) + Sort Key: (count(random())) + -> Finalize Aggregate + Output: count(a), sum(b), count(random()) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL count(random())) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL count(random()) + -> Seq Scan on public.t_distinct_sort + Output: a, b, c + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on' + Optimizer: Postgres query optimizer +(16 rows) + +select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( +0, 2) from t_distinct_sort)as xx; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(t_distinct_sort_1.a) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(t_distinct_sort_1.a)) + -> Partial Aggregate + Output: PARTIAL count(t_distinct_sort_1.a) + -> Nested Loop + Output: t_distinct_sort_1.a + Join Filter: true + -> Seq Scan on public.t_distinct_sort t_distinct_sort_1 + Output: t_distinct_sort_1.a + -> Materialize + -> Broadcast Motion 1:3 (slice2) + -> Result + -> ProjectSet + Output: generate_series(0, 2) + -> Finalize Aggregate + Output: count() + -> Gather Motion 3:1 (slice3; segments: 3) + Output: (PARTIAL count()) + -> Partial Aggregate + Output: PARTIAL count() + -> Seq Scan on public.t_distinct_sort + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off' + Optimizer: Pivotal Optimizer (GPORCA) +(25 rows) + +select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( +0, 2) from t_distinct_sort)as xx; + count +------- + 90 +(1 row) + +drop table t_distinct_sort; +explain(verbose, costs off) +select distinct(count(a)) from generate_series(0, 1) as a; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Aggregate + Output: count(generate_series) + -> Function Scan on pg_catalog.generate_series + Output: generate_series + Function Call: generate_series(0, 1) + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off' + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +select distinct(count(a)) from generate_series(0, 1) as a; + count +------- + 2 +(1 row) + +explain(verbose, costs off) +select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Aggregate + Output: count() + -> Nested Loop + Join Filter: true + -> Function Scan on pg_catalog.generate_series generate_series_1 + Output: generate_series_1.generate_series + Function Call: generate_series(0, 1) + -> Function Scan on pg_catalog.generate_series + Output: generate_series.generate_series + Function Call: generate_series(0, 2) + Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off' + Optimizer: Pivotal Optimizer (GPORCA) +(12 rows) + +select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; + count +------- + 6 +(1 row) + diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 27b63e699a0..a9ec619ff7e 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -200,3 +200,41 @@ DROP TABLE capitals; DROP TABLE cities; set gp_statistics_pullup_from_child_partition to off; -- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on + +create table t_distinct_sort(a int, b int, c int); +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +analyze t_distinct_sort; + +explain(verbose, costs off) +select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); +select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); +explain(verbose, costs off) +select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); +select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); +explain(verbose, costs off) +select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); +select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); +explain(verbose, costs off) +select distinct count(a), sum(b) from t_distinct_sort ; +select distinct count(a), sum(b) from t_distinct_sort ; + +-- should keep distinct clause +explain(verbose, costs off) +select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; +select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; + +explain(verbose, costs off) +select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( +0, 2) from t_distinct_sort)as xx; +select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( +0, 2) from t_distinct_sort)as xx; +drop table t_distinct_sort; + +explain(verbose, costs off) +select distinct(count(a)) from generate_series(0, 1) as a; +select distinct(count(a)) from generate_series(0, 1) as a; +explain(verbose, costs off) +select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; +select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; diff --git a/src/test/singlenode_regress/expected/aggregates.out b/src/test/singlenode_regress/expected/aggregates.out index 6cde5430002..2b151573b10 100644 --- a/src/test/singlenode_regress/expected/aggregates.out +++ b/src/test/singlenode_regress/expected/aggregates.out @@ -1054,15 +1054,13 @@ explain (costs off) select distinct max(unique2) from tenk1; QUERY PLAN --------------------------------------------------------------------- - HashAggregate - Group Key: $0 + Result InitPlan 1 (returns $0) -> Limit -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result Optimizer: Postgres query optimizer -(8 rows) +(6 rows) select distinct max(unique2) from tenk1; max @@ -1074,15 +1072,13 @@ explain (costs off) select max(unique2) from tenk1 order by 1; QUERY PLAN --------------------------------------------------------------------- - Sort - Sort Key: ($0) + Result InitPlan 1 (returns $0) -> Limit -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result Optimizer: Postgres query optimizer -(8 rows) +(6 rows) select max(unique2) from tenk1 order by 1; max @@ -1094,15 +1090,13 @@ explain (costs off) select max(unique2) from tenk1 order by max(unique2); QUERY PLAN --------------------------------------------------------------------- - Sort - Sort Key: ($0) + Result InitPlan 1 (returns $0) -> Limit -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result Optimizer: Postgres query optimizer -(8 rows) +(6 rows) select max(unique2) from tenk1 order by max(unique2); max @@ -1114,15 +1108,13 @@ explain (costs off) select max(unique2) from tenk1 order by max(unique2)+1; QUERY PLAN --------------------------------------------------------------------- - Sort - Sort Key: (($0 + 1)) + Result InitPlan 1 (returns $0) -> Limit -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result Optimizer: Postgres query optimizer -(8 rows) +(6 rows) select max(unique2) from tenk1 order by max(unique2)+1; max @@ -1231,20 +1223,16 @@ reset enable_seqscan; -- DISTINCT doesn't do anything useful here, but it shouldn't fail explain (costs off) select distinct min(f1), max(f1) from minmaxtest; - QUERY PLAN --------------------------------------------------------------- - Unique - Group Key: (min(minmaxtest.f1)), (max(minmaxtest.f1)) - -> Sort - Sort Key: (min(minmaxtest.f1)), (max(minmaxtest.f1)) - -> Aggregate - -> Append - -> Seq Scan on minmaxtest minmaxtest_1 - -> Seq Scan on minmaxtest1 minmaxtest_2 - -> Seq Scan on minmaxtest2 minmaxtest_3 - -> Seq Scan on minmaxtest3 minmaxtest_4 + QUERY PLAN +-------------------------------------------------- + Aggregate + -> Append + -> Seq Scan on minmaxtest minmaxtest_1 + -> Seq Scan on minmaxtest1 minmaxtest_2 + -> Seq Scan on minmaxtest2 minmaxtest_3 + -> Seq Scan on minmaxtest3 minmaxtest_4 Optimizer: Postgres query optimizer -(11 rows) +(7 rows) select distinct min(f1), max(f1) from minmaxtest; min | max diff --git a/src/test/singlenode_regress/expected/incremental_sort.out b/src/test/singlenode_regress/expected/incremental_sort.out index 40c7dd477cf..179aeabaa87 100644 --- a/src/test/singlenode_regress/expected/incremental_sort.out +++ b/src/test/singlenode_regress/expected/incremental_sort.out @@ -1610,20 +1610,18 @@ join tenk1 t3 on t2.unique1 = t3.unique1 order by count(*); QUERY PLAN ------------------------------------------------------------------------------- - Sort - Sort Key: (count(*)) - -> Aggregate + Aggregate + -> Hash Join + Hash Cond: (t2.unique1 = t3.unique1) -> Hash Join - Hash Cond: (t2.unique1 = t3.unique1) - -> Hash Join - Hash Cond: (t2.unique2 = t1.unique1) - -> Index Scan using tenk1_unique2 on tenk1 t2 - -> Hash - -> Index Only Scan using tenk1_unique1 on tenk1 t1 + Hash Cond: (t2.unique2 = t1.unique1) + -> Index Scan using tenk1_unique2 on tenk1 t2 -> Hash - -> Index Only Scan using tenk1_unique1 on tenk1 t3 + -> Index Only Scan using tenk1_unique1 on tenk1 t1 + -> Hash + -> Index Only Scan using tenk1_unique1 on tenk1 t3 Optimizer: Postgres query optimizer -(13 rows) +(10 rows) -- Parallel sort but with expression (correlated subquery) that -- is prohibited in parallel plans.