apache · my-ship-it · Dec 4, 2024 · Dec 3, 2024
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
@@ -1339,6 +1339,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	if (hasResultRTEs)
 		remove_useless_result_rtes(root);
 
+	parse = remove_distinct_sort_clause(parse);
+
 	/*
 	 * Do the main planning.
 	 */

diff --git a/src/backend/optimizer/plan/transform.c b/src/backend/optimizer/plan/transform.c
@@ -20,6 +20,7 @@
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/optimizer.h"
+#include "optimizer/tlist.h"
 #include "optimizer/transform.h"
 #include "utils/lsyscache.h"
 #include "catalog/pg_proc.h"
@@ -39,6 +40,7 @@ static SubLink *make_sirvf_subselect(FuncExpr *fe);
 static Query *make_sirvf_subquery(FuncExpr *fe);
 static bool safe_to_replace_sirvf_tle(Query *query);
 static bool safe_to_replace_sirvf_rte(Query *query);
+static bool tlist_has_srf(Query *query);
 
 /**
  * Normalize query before planning.
@@ -520,3 +522,87 @@ replace_sirvf_rte(Query *query, RangeTblEntry *rte)
 
 	return rte;
 }
+
+/*
+ * Does target list have SRFs?
+ */
+static
+bool tlist_has_srf(Query *query)
+{
+	if (query->hasTargetSRFs)
+	{
+		return true;
+	}
+
+	if (expression_returns_set( (Node *) query->targetList))
+	{
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * DISTINCT/DISTINCT ON/ORDER BY optimization.
+ * Remove DISTINCT clause if possibile, ex:
+ * select DISTINCT count(a) from t; to
+ * select count(a) from t;
+ * There is one row returned at most, DISTINCT and/or ON is pointless then.
+ * The same with ORDER BY clause;
+ */
+Query *remove_distinct_sort_clause(Query *parse)
+{
+	if (parse->hasAggs &&
+		parse->groupClause == NIL &&
+		!contain_mutable_functions((Node *) parse) &&
+		!tlist_has_srf(parse))
+	{
+		List	   *useless_tlist = NIL;
+		List	   *tles;
+		List	   *sortops;
+		List	   *eqops;
+		ListCell   *lc;
+
+		if (parse->distinctClause != NIL)
+		{
+			get_sortgroupclauses_tles(parse->distinctClause, parse->targetList,
+									  &tles, &sortops, &eqops);
+			foreach(lc, tles)
+			{
+				TargetEntry *tle = lfirst(lc);
+				if (tle->resjunk)
+					useless_tlist = lappend(useless_tlist, tle);
+			}
+			parse->distinctClause = NIL;
+			if (parse->hasDistinctOn)
+				parse->hasDistinctOn = false;
+		}
+
+		if (parse->sortClause != NIL)
+		{
+
+			get_sortgroupclauses_tles(parse->sortClause, parse->targetList,
+									  &tles, &sortops, &eqops);
+			foreach(lc, tles)
+			{
+				TargetEntry *tle = lfirst(lc);
+				/*
+				 * For SELECT DISTINCT, ORDER BY expressions must appear in select list,
+				 * Some tles may be already in the list.
+				 */ 
+				if (tle->resjunk)
+					useless_tlist = list_append_unique(useless_tlist, tle);
+			}
+			parse->sortClause = NIL;
+		}
+
+		/*
+		 * There is no groupClause, sortClause and distinctClause now .
+		 * The junk TargetEntrys with ressortgroupref index are safe to be removed.
+		 */
+		if (useless_tlist != NIL)
+			parse->targetList = list_difference(parse->targetList, useless_tlist);
+	}
+
+	return parse;
+}
diff --git a/src/include/optimizer/transform.h b/src/include/optimizer/transform.h
@@ -21,4 +21,8 @@
 
 extern Query *normalize_query(Query *query);
 
+extern Query *remove_distinct_sort_clause(Query *query);
+
+extern bool query_has_srf(Query *query);
+
 #endif /* TRANSFORM_H */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
@@ -1066,17 +1066,15 @@ explain (costs off)
   select distinct max(unique2) from tenk1;
                                 QUERY PLAN                                 
 ---------------------------------------------------------------------------
- HashAggregate
-   Group Key: $0
+ Result
    InitPlan 1 (returns $0)  (slice1)
      ->  Limit
            ->  Gather Motion 3:1  (slice2; segments: 3)
                  Merge Key: tenk1.unique2
                  ->  Index Only Scan Backward using tenk1_unique2 on tenk1
                        Index Cond: (unique2 IS NOT NULL)
-   ->  Result
  Optimizer: Postgres query optimizer
-(10 rows)
+(8 rows)
 
 select distinct max(unique2) from tenk1;
  max  
@@ -1088,17 +1086,15 @@ explain (costs off)
   select max(unique2) from tenk1 order by 1;
                                 QUERY PLAN                                 
 ---------------------------------------------------------------------------
- Sort
-   Sort Key: ($0)
+ Result
    InitPlan 1 (returns $0)  (slice1)
      ->  Limit
            ->  Gather Motion 3:1  (slice2; segments: 3)
                  Merge Key: tenk1.unique2
                  ->  Index Only Scan Backward using tenk1_unique2 on tenk1
                        Index Cond: (unique2 IS NOT NULL)
-   ->  Result
  Optimizer: Postgres query optimizer
-(10 rows)
+(8 rows)
 
 select max(unique2) from tenk1 order by 1;
  max  
@@ -1110,17 +1106,15 @@ explain (costs off)
   select max(unique2) from tenk1 order by max(unique2);
                                 QUERY PLAN                                 
 ---------------------------------------------------------------------------
- Sort
-   Sort Key: ($0)
+ Result
    InitPlan 1 (returns $0)  (slice1)
      ->  Limit
            ->  Gather Motion 3:1  (slice2; segments: 3)
                  Merge Key: tenk1.unique2
                  ->  Index Only Scan Backward using tenk1_unique2 on tenk1
                        Index Cond: (unique2 IS NOT NULL)
-   ->  Result
  Optimizer: Postgres query optimizer
-(10 rows)
+(8 rows)
 
 select max(unique2) from tenk1 order by max(unique2);
  max  
@@ -1132,17 +1126,15 @@ explain (costs off)
   select max(unique2) from tenk1 order by max(unique2)+1;
                                 QUERY PLAN                                 
 ---------------------------------------------------------------------------
- Sort
-   Sort Key: (($0 + 1))
+ Result
    InitPlan 1 (returns $0)  (slice1)
      ->  Limit
            ->  Gather Motion 3:1  (slice2; segments: 3)
                  Merge Key: tenk1.unique2
                  ->  Index Only Scan Backward using tenk1_unique2 on tenk1
                        Index Cond: (unique2 IS NOT NULL)
-   ->  Result
  Optimizer: Postgres query optimizer
-(10 rows)
+(8 rows)
 
 select max(unique2) from tenk1 order by max(unique2)+1;
  max  
@@ -1260,20 +1252,16 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                           QUERY PLAN                          
 --------------------------------------------------------------
- Unique
-   Group Key: (min(minmaxtest.f1)), (max(minmaxtest.f1))
-   ->  Sort
-         Sort Key: (min(minmaxtest.f1)), (max(minmaxtest.f1))
-         ->  Finalize Aggregate
-               ->  Gather Motion 3:1  (slice1; segments: 3)
-                     ->  Partial Aggregate
-                           ->  Append
-                                 ->  Seq Scan on minmaxtest minmaxtest_1
-                                 ->  Seq Scan on minmaxtest1 minmaxtest_2
-                                 ->  Seq Scan on minmaxtest2 minmaxtest_3
-                                 ->  Seq Scan on minmaxtest3 minmaxtest_4
+ Finalize Aggregate
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         ->  Partial Aggregate
+               ->  Append
+                     ->  Seq Scan on minmaxtest minmaxtest_1
+                     ->  Seq Scan on minmaxtest1 minmaxtest_2
+                     ->  Seq Scan on minmaxtest2 minmaxtest_3
+                     ->  Seq Scan on minmaxtest3 minmaxtest_4
  Optimizer: Postgres query optimizer
-(13 rows)
+(9 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 

diff --git a/src/test/regress/expected/aggregates_optimizer.out b/src/test/regress/expected/aggregates_optimizer.out
@@ -1267,22 +1267,18 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
 INFO:  GPORCA failed to produce a plan, falling back to planner
 DETAIL:  Feature not supported: Inherited tables
-                                QUERY PLAN                                
---------------------------------------------------------------------------
- Unique
-   Group Key: (min(minmaxtest.f1)), (max(minmaxtest.f1))
-   ->  Sort
-         Sort Key: (min(minmaxtest.f1)), (max(minmaxtest.f1))
-         ->  Finalize Aggregate
-               ->  Gather Motion 3:1  (slice1; segments: 3)
-                     ->  Partial Aggregate
-                           ->  Append
-                                 ->  Seq Scan on minmaxtest minmaxtest_1
-                                 ->  Seq Scan on minmaxtest1 minmaxtest_2
-                                 ->  Seq Scan on minmaxtest2 minmaxtest_3
-                                 ->  Seq Scan on minmaxtest3 minmaxtest_4
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Finalize Aggregate
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         ->  Partial Aggregate
+               ->  Append
+                     ->  Seq Scan on minmaxtest minmaxtest_1
+                     ->  Seq Scan on minmaxtest1 minmaxtest_2
+                     ->  Seq Scan on minmaxtest2 minmaxtest_3
+                     ->  Seq Scan on minmaxtest3 minmaxtest_4
  Optimizer: Postgres query optimizer
-(13 rows)
+(9 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
 INFO:  GPORCA failed to produce a plan, falling back to planner

diff --git a/src/test/regress/expected/cbdb_parallel.out b/src/test/regress/expected/cbdb_parallel.out
@@ -2407,22 +2407,16 @@ create table t1(c1 int) distributed by (c1);
 insert into t1 values(11), (12);
 analyze t1;
 explain(costs off, locus) select distinct min(c1), max(c1) from t1;
-                         QUERY PLAN                         
-------------------------------------------------------------
- Unique
+                   QUERY PLAN                   
+------------------------------------------------
+ Aggregate
    Locus: Entry
-   Group Key: (min(c1)), (max(c1))
-   ->  Sort
+   ->  Gather Motion 3:1  (slice1; segments: 3)
          Locus: Entry
-         Sort Key: (min(c1)), (max(c1))
-         ->  Aggregate
-               Locus: Entry
-               ->  Gather Motion 3:1  (slice1; segments: 3)
-                     Locus: Entry
-                     ->  Seq Scan on t1
-                           Locus: Hashed
+         ->  Seq Scan on t1
+               Locus: Hashed
  Optimizer: Postgres query optimizer
-(13 rows)
+(7 rows)
 
 abort;
 begin;

diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
@@ -1705,26 +1705,24 @@ from tenk1 t1
 join tenk1 t2 on t1.unique1 = t2.unique2
 join tenk1 t3 on t2.unique1 = t3.unique1
 order by count(*);
-                                           QUERY PLAN                                            
--------------------------------------------------------------------------------------------------
- Sort
-   Sort Key: (count(*))
-   ->  Finalize Aggregate
-         ->  Gather Motion 3:1  (slice1; segments: 3)
-               ->  Partial Aggregate
-                     ->  Hash Join
-                           Hash Cond: (t2.unique2 = t1.unique1)
-                           ->  Redistribute Motion 3:3  (slice2; segments: 3)
-                                 Hash Key: t2.unique2
-                                 ->  Hash Join
-                                       Hash Cond: (t2.unique1 = t3.unique1)
-                                       ->  Index Scan using tenk1_unique2 on tenk1 t2
-                                       ->  Hash
-                                             ->  Index Only Scan using tenk1_unique1 on tenk1 t3
-                           ->  Hash
-                                 ->  Index Only Scan using tenk1_unique1 on tenk1 t1
+                                        QUERY PLAN                                         
+-------------------------------------------------------------------------------------------
+ Finalize Aggregate
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         ->  Partial Aggregate
+               ->  Hash Join
+                     Hash Cond: (t2.unique2 = t1.unique1)
+                     ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                           Hash Key: t2.unique2
+                           ->  Hash Join
+                                 Hash Cond: (t2.unique1 = t3.unique1)
+                                 ->  Index Scan using tenk1_unique2 on tenk1 t2
+                                 ->  Hash
+                                       ->  Index Only Scan using tenk1_unique1 on tenk1 t3
+                     ->  Hash
+                           ->  Index Only Scan using tenk1_unique1 on tenk1 t1
  Optimizer: Postgres query optimizer
-(17 rows)
+(15 rows)
 
 -- Parallel sort but with expression (correlated subquery) that
 -- is prohibited in parallel plans.

diff --git a/src/test/regress/expected/incremental_sort_optimizer.out b/src/test/regress/expected/incremental_sort_optimizer.out
@@ -1555,26 +1555,24 @@ from tenk1 t1
 join tenk1 t2 on t1.unique1 = t2.unique2
 join tenk1 t3 on t2.unique1 = t3.unique1
 order by count(*);
-                                           QUERY PLAN                                            
--------------------------------------------------------------------------------------------------
- Sort
-   Sort Key: (count(*))
-   ->  Finalize Aggregate
-         ->  Gather Motion 3:1  (slice1; segments: 3)
-               ->  Partial Aggregate
-                     ->  Hash Join
-                           Hash Cond: (t2.unique2 = t1.unique1)
-                           ->  Redistribute Motion 3:3  (slice2; segments: 3)
-                                 Hash Key: t2.unique2
-                                 ->  Hash Join
-                                       Hash Cond: (t2.unique1 = t3.unique1)
-                                       ->  Index Scan using tenk1_unique2 on tenk1 t2
-                                       ->  Hash
-                                             ->  Index Only Scan using tenk1_unique1 on tenk1 t3
-                           ->  Hash
-                                 ->  Index Only Scan using tenk1_unique1 on tenk1 t1
+                                        QUERY PLAN                                         
+-------------------------------------------------------------------------------------------
+ Finalize Aggregate
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         ->  Partial Aggregate
+               ->  Hash Join
+                     Hash Cond: (t2.unique2 = t1.unique1)
+                     ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                           Hash Key: t2.unique2
+                           ->  Hash Join
+                                 Hash Cond: (t2.unique1 = t3.unique1)
+                                 ->  Index Scan using tenk1_unique2 on tenk1 t2
+                                 ->  Hash
+                                       ->  Index Only Scan using tenk1_unique1 on tenk1 t3
+                     ->  Hash
+                           ->  Index Only Scan using tenk1_unique1 on tenk1 t1
  Optimizer: Postgres query optimizer
-(18 rows)
+(15 rows)
 
 -- Parallel sort but with expression (correlated subquery) that
 -- is prohibited in parallel plans.