From 98c16a04e4a529393b8535bfa97f0798c8daff74 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 3 Feb 2023 12:43:57 +0800 Subject: [PATCH 1/2] remove alias in GROUP BY only when the expr is resolved --- .../analysis/ResolveReferencesInAggregate.scala | 8 +++++++- .../src/test/resources/sql-tests/inputs/group-by.sql | 3 +++ .../test/resources/sql-tests/results/group-by.sql.out | 11 +++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala index 4af2ecc91ab55..4d4d6eb529a40 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala @@ -96,7 +96,13 @@ object ResolveReferencesInAggregate extends SQLConfHelper // can't find the grouping expressions via `semanticEquals` and the analysis will fail. // Example rules: ResolveGroupingAnalytics (See SPARK-31670 for more details) and // ResolveLateralColumnAliasReference. - groupingExpressions = resolvedGroupExprs.map(trimAliases), + groupingExpressions = resolvedGroupExprs.map { a => + // Only trim the alias if the expression is resolved, as the alias may be needed to resolve + // the expression, such as `NamePlaceHolder` in `CreateNamedStruct`. + // Note: this rule will be invoked even if the Aggregate is fully resolved. So alias in + // GROUP BY will be removed eventually, by following iterations. + if (a.resolved) trimAliases(a) else a + }, aggregateExpressions = resolvedAggExprsWithOuter) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index 1615c43cc7ed7..c812403ba2c3f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -34,6 +34,9 @@ SELECT a + b, COUNT(b) FROM testData GROUP BY a + b; SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1; SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1; +-- struct() in group by +SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa); + -- Aggregate with nulls. SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a) FROM testData; diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 0402039fafac5..6e7592d6978af 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -145,6 +145,17 @@ struct<((a + 1) + 1):int,count(b):bigint> NULL 1 +-- !query +SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa) +-- !query schema +struct +-- !query output +2 +2 +2 +3 + + -- !query SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a) FROM testData From f21aa9a78e1bae06111f7e0c603a6b8f254a6d0e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 3 Feb 2023 12:55:55 +0800 Subject: [PATCH 2/2] Update ResolveReferencesInAggregate.scala --- .../sql/catalyst/analysis/ResolveReferencesInAggregate.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala index 4d4d6eb529a40..1a9ed4ce16eb9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala @@ -96,12 +96,12 @@ object ResolveReferencesInAggregate extends SQLConfHelper // can't find the grouping expressions via `semanticEquals` and the analysis will fail. // Example rules: ResolveGroupingAnalytics (See SPARK-31670 for more details) and // ResolveLateralColumnAliasReference. - groupingExpressions = resolvedGroupExprs.map { a => + groupingExpressions = resolvedGroupExprs.map { e => // Only trim the alias if the expression is resolved, as the alias may be needed to resolve // the expression, such as `NamePlaceHolder` in `CreateNamedStruct`. // Note: this rule will be invoked even if the Aggregate is fully resolved. So alias in // GROUP BY will be removed eventually, by following iterations. - if (a.resolved) trimAliases(a) else a + if (e.resolved) trimAliases(e) else e }, aggregateExpressions = resolvedAggExprsWithOuter) }