From f60c1843ec6a4c0c380825cb316e918e6fd7ceba Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Fri, 23 Jun 2017 19:54:44 -0700 Subject: [PATCH 01/10] save for now --- .../spark/sql/catalyst/analysis/Analyzer.scala | 5 ----- .../org/apache/spark/sql/SQLQuerySuite.scala | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 193082eb77024..98b692739618e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1197,11 +1197,6 @@ class Analyzer( case u @ UnresolvedFunction(funcId, children, isDistinct) => withPosition(u) { catalog.lookupFunction(funcId, children) match { - // DISTINCT is not meaningful for a Max or a Min. - case max: Max if isDistinct => - AggregateExpression(max, Complete, isDistinct = false) - case min: Min if isDistinct => - AggregateExpression(min, Complete, isDistinct = false) // AggregateWindowFunctions are AggregateFunctions that can only be evaluated within // the context of a Window clause. They do not need to be wrapped in an // AggregateExpression. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 68f61cfab6d2f..f8091902779a6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -25,6 +25,8 @@ import java.util.concurrent.atomic.AtomicBoolean import org.apache.spark.{AccumulatorSuite, SparkException} import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} +import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, UnresolvedGenerator, withPosition} +import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.execution.aggregate import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, CartesianProductExec, SortMergeJoinExec} @@ -49,6 +51,22 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { checkAnswer(queryCoalesce, Row("1") :: Nil) } + test("foo") { + val df = Seq((1, 1)).toDF("key", "value") + df.createOrReplaceTempView("src") + val foo=sql("select max(distinct key) from src").logicalPlan + val catalog = new SessionCatalog(new InMemoryCatalog, + EmptyFunctionRegistry, + new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) + foo match { + case u @ UnresolvedGenerator(name, children) => + withPosition(u) { + catalog.lookupFunction(name, children) match { + case max => + } + } + } + } test("show functions") { def getFunctions(pattern: String): Seq[Row] = { StringUtils.filterPattern( From 7604811863567cc81778b0f0cb39c1385564781c Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Mon, 26 Jun 2017 19:26:03 -0700 Subject: [PATCH 02/10] finish implementation and test cases --- .../spark/sql/catalyst/dsl/package.scala | 2 + .../sql/catalyst/optimizer/Optimizer.scala | 12 ++++ .../optimizer/EliminateDistinceSuite.scala | 56 +++++++++++++++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 18 ------ 4 files changed, 70 insertions(+), 18 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinceSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index beee93d906f0f..8fc26dafe9744 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -160,6 +160,8 @@ package object dsl { def last(e: Expression): Expression = new Last(e).toAggregateExpression() def min(e: Expression): Expression = Min(e).toAggregateExpression() def max(e: Expression): Expression = Max(e).toAggregateExpression() + def maxDistinct(e: Expression): Expression = Max(e).toAggregateExpression(isDistinct = true) + def minDistinct(e: Expression): Expression = Min(e).toAggregateExpression(isDistinct = true) def upper(e: Expression): Expression = Upper(e) def lower(e: Expression): Expression = Lower(e) def sqrt(e: Expression): Expression = Sqrt(e) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index b410312030c5d..fa34246fbc617 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -40,6 +40,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) protected val fixedPoint = FixedPoint(conf.optimizerMaxIterations) def batches: Seq[Batch] = { + // DISTINCT is not meaningful for a Max or a Min. + Batch("Eliminate Distinct", Once, EliminateDistinct) :: // Technically some of the rules in Finish Analysis are not optimizer rules and belong more // in the analyzer, because they are needed for correctness (e.g. ComputeCurrentTime). // However, because we also use the analyzer to canonicalized queries (for view definition), @@ -151,6 +153,16 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] = Nil } +/** + * Remove useless DISTINCT for MAX and MIN + */ +object EliminateDistinct extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan transformExpressions { + case AggregateExpression(af @ Max(_), _, true, _) => AggregateExpression(af, Complete, false) + case AggregateExpression(af @ Min(_), _, true, _) => AggregateExpression(af, Complete, false) + } +} + /** * An optimizer used in test code. * diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinceSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinceSuite.scala new file mode 100644 index 0000000000000..a60b76dfbf407 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinceSuite.scala @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.RuleExecutor + +class EliminateDistinceSuite extends PlanTest { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = + Batch("Operator Optimizations", Once, + EliminateDistinct) :: Nil + } + + val testRelation = LocalRelation('a.int) + + test("Eliminate Distinct in Max") { + val query = testRelation + .select(maxDistinct('a) as('result)) + .analyze + val answer = testRelation + .select(max('a) as('result)) + .analyze + assert(query != answer) + comparePlans(Optimize.execute(query), answer) + } + + test("Eliminate Distinct in Min") { + val query = testRelation + .select(minDistinct('a) as('result)) + .analyze + val answer = testRelation + .select(min('a) as('result)) + .analyze + assert(query != answer) + comparePlans(Optimize.execute(query), answer) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index f8091902779a6..68f61cfab6d2f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -25,8 +25,6 @@ import java.util.concurrent.atomic.AtomicBoolean import org.apache.spark.{AccumulatorSuite, SparkException} import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} -import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, UnresolvedGenerator, withPosition} -import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.execution.aggregate import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, CartesianProductExec, SortMergeJoinExec} @@ -51,22 +49,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { checkAnswer(queryCoalesce, Row("1") :: Nil) } - test("foo") { - val df = Seq((1, 1)).toDF("key", "value") - df.createOrReplaceTempView("src") - val foo=sql("select max(distinct key) from src").logicalPlan - val catalog = new SessionCatalog(new InMemoryCatalog, - EmptyFunctionRegistry, - new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) - foo match { - case u @ UnresolvedGenerator(name, children) => - withPosition(u) { - catalog.lookupFunction(name, children) match { - case max => - } - } - } - } test("show functions") { def getFunctions(pattern: String): Seq[Row] = { StringUtils.filterPattern( From 892f50a3cfda91918961cf81e13a769f2591173c Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Mon, 26 Jun 2017 21:42:10 -0700 Subject: [PATCH 03/10] revise code style and comments --- .../scala/org/apache/spark/sql/catalyst/dsl/package.scala | 2 +- .../apache/spark/sql/catalyst/optimizer/Optimizer.scala | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 8fc26dafe9744..f6792569b704e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -159,9 +159,9 @@ package object dsl { def first(e: Expression): Expression = new First(e).toAggregateExpression() def last(e: Expression): Expression = new Last(e).toAggregateExpression() def min(e: Expression): Expression = Min(e).toAggregateExpression() + def minDistinct(e: Expression): Expression = Min(e).toAggregateExpression(isDistinct = true) def max(e: Expression): Expression = Max(e).toAggregateExpression() def maxDistinct(e: Expression): Expression = Max(e).toAggregateExpression(isDistinct = true) - def minDistinct(e: Expression): Expression = Min(e).toAggregateExpression(isDistinct = true) def upper(e: Expression): Expression = Upper(e) def lower(e: Expression): Expression = Lower(e) def sqrt(e: Expression): Expression = Sqrt(e) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index fa34246fbc617..09d1152c6701c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -40,7 +40,6 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) protected val fixedPoint = FixedPoint(conf.optimizerMaxIterations) def batches: Seq[Batch] = { - // DISTINCT is not meaningful for a Max or a Min. Batch("Eliminate Distinct", Once, EliminateDistinct) :: // Technically some of the rules in Finish Analysis are not optimizer rules and belong more // in the analyzer, because they are needed for correctness (e.g. ComputeCurrentTime). @@ -154,12 +153,13 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) } /** - * Remove useless DISTINCT for MAX and MIN + * Remove useless DISTINCT for MAX and MIN. + * This rule should be applied before ReplaceDeduplicateWithAggregate. */ object EliminateDistinct extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformExpressions { - case AggregateExpression(af @ Max(_), _, true, _) => AggregateExpression(af, Complete, false) - case AggregateExpression(af @ Min(_), _, true, _) => AggregateExpression(af, Complete, false) + case AggregateExpression(max: Max, _, true, _) => AggregateExpression(max, Complete, false) + case AggregateExpression(min: Min, _, true, _) => AggregateExpression(min, Complete, false) } } From 2f894997498c472f4da1f44e77157683da17041d Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Mon, 26 Jun 2017 21:56:02 -0700 Subject: [PATCH 04/10] fix typo: distince=>distinct --- ...liminateDistinceSuite.scala => EliminateDistinctSuite.scala} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/{EliminateDistinceSuite.scala => EliminateDistinctSuite.scala} (97%) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinceSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala similarity index 97% rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinceSuite.scala rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala index a60b76dfbf407..7bf13f37e5d94 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinceSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor -class EliminateDistinceSuite extends PlanTest { +class EliminateDistinctSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = From 19163d4a3931d1b6645ad20205546e1aa2b820d8 Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Tue, 27 Jun 2017 17:59:12 -0700 Subject: [PATCH 05/10] remain AggregateMode --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 09d1152c6701c..b8e7dabf935ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -158,8 +158,10 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) */ object EliminateDistinct extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformExpressions { - case AggregateExpression(max: Max, _, true, _) => AggregateExpression(max, Complete, false) - case AggregateExpression(min: Min, _, true, _) => AggregateExpression(min, Complete, false) + case AggregateExpression(max: Max, mode: AggregateMode, true, _) => + AggregateExpression(max, mode, false) + case AggregateExpression(min: Min, mode: AggregateMode, true, _) => + AggregateExpression(min, mode, false) } } From fd3c849f0a6bcdd09c99bcde31fed56ccce43e50 Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Tue, 27 Jun 2017 18:28:21 -0700 Subject: [PATCH 06/10] revise code style --- .../spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala index 7bf13f37e5d94..91d56a5a1034e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala @@ -37,7 +37,7 @@ class EliminateDistinctSuite extends PlanTest { .select(maxDistinct('a) as('result)) .analyze val answer = testRelation - .select(max('a) as('result)) + .select(max('a).as('result)) .analyze assert(query != answer) comparePlans(Optimize.execute(query), answer) @@ -48,7 +48,7 @@ class EliminateDistinctSuite extends PlanTest { .select(minDistinct('a) as('result)) .analyze val answer = testRelation - .select(min('a) as('result)) + .select(min('a).as('result)) .analyze assert(query != answer) comparePlans(Optimize.execute(query), answer) From 9fb977977fa6df707519caa09dd1971ba7b0626b Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Tue, 27 Jun 2017 18:39:18 -0700 Subject: [PATCH 07/10] revise comment --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index b8e7dabf935ad..97f54a227cbb8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -154,7 +154,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) /** * Remove useless DISTINCT for MAX and MIN. - * This rule should be applied before ReplaceDeduplicateWithAggregate. + * This rule should be applied before RewriteDistinctAggregates. */ object EliminateDistinct extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformExpressions { From 08f61f40db06174043a58c4dc6dc9e20c9107aef Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Tue, 27 Jun 2017 20:40:25 -0700 Subject: [PATCH 08/10] revise code style --- .../spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala index 91d56a5a1034e..f40691bd1a038 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala @@ -34,7 +34,7 @@ class EliminateDistinctSuite extends PlanTest { test("Eliminate Distinct in Max") { val query = testRelation - .select(maxDistinct('a) as('result)) + .select(maxDistinct('a).as('result)) .analyze val answer = testRelation .select(max('a).as('result)) @@ -45,7 +45,7 @@ class EliminateDistinctSuite extends PlanTest { test("Eliminate Distinct in Min") { val query = testRelation - .select(minDistinct('a) as('result)) + .select(minDistinct('a).as('result)) .analyze val answer = testRelation .select(min('a).as('result)) From 536aae27b2e85ecf7f6e3d1e38fd4c93afe8ab4b Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Tue, 27 Jun 2017 22:51:35 -0700 Subject: [PATCH 09/10] stop abusing extractors --- .../apache/spark/sql/catalyst/optimizer/Optimizer.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 97f54a227cbb8..7f18e41517d32 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -158,10 +158,10 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) */ object EliminateDistinct extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformExpressions { - case AggregateExpression(max: Max, mode: AggregateMode, true, _) => - AggregateExpression(max, mode, false) - case AggregateExpression(min: Min, mode: AggregateMode, true, _) => - AggregateExpression(min, mode, false) + case ae: AggregateExpression if ae.isDistinct => + ae.aggregateFunction match { + case _: Max | _: Min => ae.copy(isDistinct = false) + } } } From 5a3df30d08dad4d282f52ba2546464f7cc473cc6 Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Tue, 27 Jun 2017 23:21:19 -0700 Subject: [PATCH 10/10] handle default case --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 7f18e41517d32..946fa7bae0199 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -161,6 +161,7 @@ object EliminateDistinct extends Rule[LogicalPlan] { case ae: AggregateExpression if ae.isDistinct => ae.aggregateFunction match { case _: Max | _: Min => ae.copy(isDistinct = false) + case _ => ae } } }