Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 20 additions & 18 deletions fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
Original file line number Diff line number Diff line change
Expand Up @@ -1022,35 +1022,37 @@ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException {
* TODO: the a.key should be replaced by a.k1 instead of unknown column 'key' in 'a'
*/

// according to mysql
// having clause should use column name inside group by clause, prior to alias.
// case1: having clause use column name table.v1, because v1 inside group by clause
// select id, sum(v1) v1 from table group by id,v1 having(v1>1);
// case2: having clause use alias name v2, because v2 is not inside group by clause
// select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND v2>1);
// case3: having clause use alias name v, because table do not have column name v
// select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1);
/* according to mysql (https://dev.mysql.com/doc/refman/8.0/en/select.html)
* "For GROUP BY or HAVING clauses, it searches the FROM clause before searching in the
* select_expr values. (For GROUP BY and HAVING, this differs from the pre-MySQL 5.0 behavior
* that used the same rules as for ORDER BY.)"
* case1: having clause use column name table.v1, because it searches the FROM clause firstly
* select id, sum(v1) v1 from table group by id,v1 having(v1>1);
* case2: having clause used in aggregate functions, such as sum(v2) here
* select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND sum(v2)>1);
* case3: having clause use alias name v, because table do not have column name v
* select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1);
* case4: having clause use alias name vsum, because table do not have column name vsum
* select id, floor(v1) v, sum(v2) vsum from table group by id,v having(v>1 AND vsum>1);
*/
if (groupByClause != null) {
ExprSubstitutionMap excludeGroupByaliasSMap = aliasSMap.clone();
// according to case2, maybe some having slots inside group by clause, some do not
List<Expr> groupBySlots = Lists.newArrayList();
for (Expr expr : groupByClause.getGroupingExprs()) {
expr.collect(SlotRef.class, groupBySlots);
}
for (Expr expr : groupBySlots) {
if (excludeGroupByaliasSMap.get(expr) == null) {
ExprSubstitutionMap excludeAliasSMap = aliasSMap.clone();
List<Expr> havingSlots = Lists.newArrayList();
havingClause.collect(SlotRef.class, havingSlots);
for (Expr expr : havingSlots) {
if (excludeAliasSMap.get(expr) == null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like sql "SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v2>1);" still report the same error. Please double check if it's expected behavior of this pr

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This behavior is the same as before pr15143, because here we use column name firstly, so it report an error.

  1. Before 15143, we use column name firstly; (So this SQL failed)
  2. After 15143, we use column name firstly for columns inside group by; for other columns, we use alias name firstly;
    (So the SQL success)
  3. Now we revert 15143 to keep the logic simple: always use column name firstly. (So the SQL failed)

If we change having(v2>1) to having(sum(v2)>1), the SQL will success.

Later we will add a config to choose alias name firstly or column name firstly: #15748

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

continue;
}
try {
// try to use column name firstly
expr.clone().analyze(analyzer);
// analyze success means column name exist, do not use alias name
excludeGroupByaliasSMap.removeByLhsExpr(expr);
excludeAliasSMap.removeByLhsExpr(expr);
} catch (AnalysisException ex) {
// according to case3, column name do not exist, keep alias name inside alias map
}
}
havingClauseAfterAnaylzed = havingClause.substitute(excludeGroupByaliasSMap, analyzer, false);
havingClauseAfterAnaylzed = havingClause.substitute(excludeAliasSMap, analyzer, false);
} else {
// according to mysql
// if there is no group by clause, the having clause should use alias
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,9 @@
-- !case3 --
2 1 3

-- !case4 --
2 1 3

-- !case5 --
2 3

Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,9 @@
"""
sql """ INSERT INTO test_having_alias_tb values(1,1,1),(2,2,2),(2,3,3); """
qt_case1 """ SELECT id, sum(v1) v1 FROM test_having_alias_tb GROUP BY id,v1 having(v1>1) ORDER BY id,v1; """
qt_case2 """ SELECT id, sum(v1) v1, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v1 having(v1!=2 AND v2>1) ORDER BY id,v1; """
qt_case3 """ SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND v2>1) ORDER BY id,v; """
qt_case2 """ SELECT id, sum(v1) v1, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v1 having(v1!=2 AND sum(v2)>1) ORDER BY id,v1; """
qt_case3 """ SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND sum(v2)>1) ORDER BY id,v; """
qt_case4 """ SELECT id, v1-2 as v, sum(v2) vsum FROM test_having_alias_tb GROUP BY id,v having(v>0 AND vsum>1) ORDER BY id,v; """
qt_case5 """ SELECT id, max(v1) v1 FROM test_having_alias_tb GROUP BY 1 having count(distinct v1)>1 ORDER BY id; """
sql """ DROP TABLE IF EXISTS `test_having_alias_tb`; """
}