diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 14a2a323c8852..f39ea03500636 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -730,6 +730,11 @@ class Analyzer( } Sort(newOrders, global, child) + // Eliminate the useless position numbers + case s @ Sort(orders, global, child) + if !conf.orderByOrdinal && orders.exists(o => IntegerIndex.unapply(o.child).nonEmpty) => + Sort(orders.filterNot(o => IntegerIndex.unapply(o.child).nonEmpty), global, child) + // Replace the index with the corresponding expression in aggregateExpressions. The index is // a 1-base position of aggregateExpressions, which is output columns (select expression) case a @ Aggregate(groups, aggs, child) @@ -1252,7 +1257,9 @@ class Analyzer( case ae: AnalysisException => filter } - case sort @ Sort(sortOrder, global, aggregate: Aggregate) if aggregate.resolved => + // If there exists ordinal sort orders, it's not resolved completely yet. See SPARK-16955. + case sort @ Sort(sortOrder, global, aggregate: Aggregate) if aggregate.resolved && + sortOrder.forall(x => IntegerIndex.unapply(x.child).isEmpty) => // Try resolving the ordering as though it is in the aggregate clause. try { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index eac588fff2fc7..93b3fb1115901 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -495,6 +495,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { ) } + test("SPARK-16955: Using ordinals in ORDER BY and GROUP BY causes an analysis error") { + withSQLConf(SQLConf.ORDER_BY_ORDINAL.key -> "true") { + checkAnswer( + sql("SELECT a, avg(a) FROM (SELECT * FROM VALUES 1,2,3 T(a)) U GROUP BY 1 ORDER BY 1 DESC"), + sql("SELECT a, avg(a) FROM (SELECT * FROM VALUES 1,2,3 T(a)) U GROUP BY a ORDER BY a DESC")) + } + withSQLConf(SQLConf.ORDER_BY_ORDINAL.key -> "false") { + checkAnswer( + sql("SELECT a, avg(a) FROM (SELECT * FROM VALUES 1,2,3 T(a)) U GROUP BY 1 ORDER BY 1 DESC"), + sql("SELECT a, avg(a) FROM (SELECT * FROM VALUES 1,2,3 T(a)) U GROUP BY a")) + } + } + test("select *") { checkAnswer( sql("SELECT * FROM testData"),