apache · yiguolei · Jan 10, 2023 · Jan 9, 2023 · Jan 9, 2023 · starocean999
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -1022,35 +1022,37 @@ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException {
              * TODO: the a.key should be replaced by a.k1 instead of unknown column 'key' in 'a'
              */
 
-            // according to mysql
-            // having clause should use column name inside group by clause, prior to alias.
-            // case1: having clause use column name table.v1, because v1 inside group by clause
-            //     select id, sum(v1) v1 from table group by id,v1 having(v1>1);
-            // case2: having clause use alias name v2, because v2 is not inside group by clause
-            //     select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND v2>1);
-            // case3: having clause use alias name v, because table do not have column name v
-            //     select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1);
+            /* according to mysql (https://dev.mysql.com/doc/refman/8.0/en/select.html)
+             * "For GROUP BY or HAVING clauses, it searches the FROM clause before searching in the
+             * select_expr values. (For GROUP BY and HAVING, this differs from the pre-MySQL 5.0 behavior
+             * that used the same rules as for ORDER BY.)"
+             * case1: having clause use column name table.v1, because it searches the FROM clause firstly
+             *     select id, sum(v1) v1 from table group by id,v1 having(v1>1);
+             * case2: having clause used in aggregate functions, such as sum(v2) here
+             *     select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND sum(v2)>1);
+             * case3: having clause use alias name v, because table do not have column name v
+             *     select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1);
+             * case4: having clause use alias name vsum, because table do not have column name vsum
+             *     select id, floor(v1) v, sum(v2) vsum from table group by id,v having(v>1 AND vsum>1);
+             */
             if (groupByClause != null) {
-                ExprSubstitutionMap excludeGroupByaliasSMap = aliasSMap.clone();
-                // according to case2, maybe some having slots inside group by clause, some do not
-                List<Expr> groupBySlots = Lists.newArrayList();
-                for (Expr expr : groupByClause.getGroupingExprs()) {
-                    expr.collect(SlotRef.class, groupBySlots);
-                }
-                for (Expr expr : groupBySlots) {
-                    if (excludeGroupByaliasSMap.get(expr) == null) {
+                ExprSubstitutionMap excludeAliasSMap = aliasSMap.clone();
+                List<Expr> havingSlots = Lists.newArrayList();
+                havingClause.collect(SlotRef.class, havingSlots);
+                for (Expr expr : havingSlots) {
+                    if (excludeAliasSMap.get(expr) == null) {
                         continue;
                     }
                     try {
                         // try to use column name firstly
                         expr.clone().analyze(analyzer);
                         // analyze success means column name exist, do not use alias name
-                        excludeGroupByaliasSMap.removeByLhsExpr(expr);
+                        excludeAliasSMap.removeByLhsExpr(expr);
                     } catch (AnalysisException ex) {
                         // according to case3, column name do not exist, keep alias name inside alias map
                     }
                 }
-                havingClauseAfterAnaylzed = havingClause.substitute(excludeGroupByaliasSMap, analyzer, false);
+                havingClauseAfterAnaylzed = havingClause.substitute(excludeAliasSMap, analyzer, false);
             } else {
                 // according to mysql
                 // if there is no group by clause, the having clause should use alias

diff --git a/regression-test/data/correctness_p0/test_group_having_alias.out b/regression-test/data/correctness_p0/test_group_having_alias.out
@@ -20,3 +20,9 @@
 -- !case3 --
 2	1	3
 
+-- !case4 --
+2	1	3
+
+-- !case5 --
+2	3
+
diff --git a/regression-test/suites/correctness_p0/test_group_having_alias.groovy b/regression-test/suites/correctness_p0/test_group_having_alias.groovy
@@ -94,7 +94,9 @@
     """
     sql """ INSERT INTO test_having_alias_tb values(1,1,1),(2,2,2),(2,3,3); """
     qt_case1 """ SELECT id, sum(v1) v1 FROM test_having_alias_tb GROUP BY id,v1 having(v1>1) ORDER BY id,v1; """
-    qt_case2 """ SELECT id, sum(v1) v1, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v1 having(v1!=2 AND v2>1) ORDER BY id,v1; """
-    qt_case3 """ SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND v2>1) ORDER BY id,v; """
+    qt_case2 """ SELECT id, sum(v1) v1, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v1 having(v1!=2 AND sum(v2)>1) ORDER BY id,v1; """
+    qt_case3 """ SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND sum(v2)>1) ORDER BY id,v; """
+    qt_case4 """ SELECT id, v1-2 as v, sum(v2) vsum FROM test_having_alias_tb GROUP BY id,v having(v>0 AND vsum>1) ORDER BY id,v; """
+    qt_case5 """ SELECT id, max(v1) v1 FROM test_having_alias_tb GROUP BY 1 having count(distinct v1)>1 ORDER BY id; """
     sql """ DROP TABLE IF EXISTS `test_having_alias_tb`; """
  }