From b85329021461ee65e3c533092ba4703a7475ecba Mon Sep 17 00:00:00 2001 From: cambyzju Date: Mon, 9 Jan 2023 20:52:24 +0800 Subject: [PATCH 1/2] revert pr15143, and support multi conditions for having clause --- .../org/apache/doris/analysis/SelectStmt.java | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index acf57f28b277b2..7e311ffa6f6679 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -1022,35 +1022,37 @@ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException { * TODO: the a.key should be replaced by a.k1 instead of unknown column 'key' in 'a' */ - // according to mysql - // having clause should use column name inside group by clause, prior to alias. - // case1: having clause use column name table.v1, because v1 inside group by clause - // select id, sum(v1) v1 from table group by id,v1 having(v1>1); - // case2: having clause use alias name v2, because v2 is not inside group by clause - // select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND v2>1); - // case3: having clause use alias name v, because table do not have column name v - // select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1); + /* according to mysql (https://dev.mysql.com/doc/refman/8.0/en/select.html) + * "For GROUP BY or HAVING clauses, it searches the FROM clause before searching in the + * select_expr values. (For GROUP BY and HAVING, this differs from the pre-MySQL 5.0 behavior + * that used the same rules as for ORDER BY.)" + * case1: having clause use column name table.v1, because it searches the FROM clause firstly + * select id, sum(v1) v1 from table group by id,v1 having(v1>1); + * case2: having clause used in aggregate functions, such as sum(v2) here + * select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND sum(v2)>1); + * case3: having clause use alias name v, because table do not have column name v + * select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1); + * case4: having clause use alias name vsum, because table do not have column name vsum + * select id, floor(v1) v, sum(v2) vsum from table group by id,v having(v>1 AND vsum>1); + */ if (groupByClause != null) { - ExprSubstitutionMap excludeGroupByaliasSMap = aliasSMap.clone(); - // according to case2, maybe some having slots inside group by clause, some do not - List groupBySlots = Lists.newArrayList(); - for (Expr expr : groupByClause.getGroupingExprs()) { - expr.collect(SlotRef.class, groupBySlots); - } - for (Expr expr : groupBySlots) { - if (excludeGroupByaliasSMap.get(expr) == null) { + ExprSubstitutionMap excludeAliasSMap = aliasSMap.clone(); + List havingSlots = Lists.newArrayList(); + havingClause.collect(SlotRef.class, havingSlots); + for (Expr expr : havingSlots) { + if (excludeAliasSMap.get(expr) == null) { continue; } try { // try to use column name firstly expr.clone().analyze(analyzer); // analyze success means column name exist, do not use alias name - excludeGroupByaliasSMap.removeByLhsExpr(expr); + excludeAliasSMap.removeByLhsExpr(expr); } catch (AnalysisException ex) { // according to case3, column name do not exist, keep alias name inside alias map } } - havingClauseAfterAnaylzed = havingClause.substitute(excludeGroupByaliasSMap, analyzer, false); + havingClauseAfterAnaylzed = havingClause.substitute(excludeAliasSMap, analyzer, false); } else { // according to mysql // if there is no group by clause, the having clause should use alias From f31aa93945b4fc25cc0489ef680905fd25af3333 Mon Sep 17 00:00:00 2001 From: cambyzju Date: Mon, 9 Jan 2023 21:14:17 +0800 Subject: [PATCH 2/2] add regression test --- .../data/correctness_p0/test_group_having_alias.out | 6 ++++++ .../suites/correctness_p0/test_group_having_alias.groovy | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/regression-test/data/correctness_p0/test_group_having_alias.out b/regression-test/data/correctness_p0/test_group_having_alias.out index 8d3629b5f5fb1d..7a87076e28b6be 100644 --- a/regression-test/data/correctness_p0/test_group_having_alias.out +++ b/regression-test/data/correctness_p0/test_group_having_alias.out @@ -20,3 +20,9 @@ -- !case3 -- 2 1 3 +-- !case4 -- +2 1 3 + +-- !case5 -- +2 3 + diff --git a/regression-test/suites/correctness_p0/test_group_having_alias.groovy b/regression-test/suites/correctness_p0/test_group_having_alias.groovy index 45350026e2b07f..80187b911367cf 100644 --- a/regression-test/suites/correctness_p0/test_group_having_alias.groovy +++ b/regression-test/suites/correctness_p0/test_group_having_alias.groovy @@ -94,7 +94,9 @@ """ sql """ INSERT INTO test_having_alias_tb values(1,1,1),(2,2,2),(2,3,3); """ qt_case1 """ SELECT id, sum(v1) v1 FROM test_having_alias_tb GROUP BY id,v1 having(v1>1) ORDER BY id,v1; """ - qt_case2 """ SELECT id, sum(v1) v1, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v1 having(v1!=2 AND v2>1) ORDER BY id,v1; """ - qt_case3 """ SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND v2>1) ORDER BY id,v; """ + qt_case2 """ SELECT id, sum(v1) v1, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v1 having(v1!=2 AND sum(v2)>1) ORDER BY id,v1; """ + qt_case3 """ SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND sum(v2)>1) ORDER BY id,v; """ + qt_case4 """ SELECT id, v1-2 as v, sum(v2) vsum FROM test_having_alias_tb GROUP BY id,v having(v>0 AND vsum>1) ORDER BY id,v; """ + qt_case5 """ SELECT id, max(v1) v1 FROM test_having_alias_tb GROUP BY 1 having count(distinct v1)>1 ORDER BY id; """ sql """ DROP TABLE IF EXISTS `test_having_alias_tb`; """ }