apache · jackylee-ch · Jan 9, 2025 · Jan 4, 2025 · Jan 5, 2025 · Jan 7, 2025
diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHRuleApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHRuleApi.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.delta.DeltaLogFileIndex
 import org.apache.spark.sql.delta.rules.CHOptimizeMetadataOnlyDeltaQuery
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.datasources.noop.GlutenNoopWriterRule
 import org.apache.spark.sql.execution.datasources.v2.V2CommandExec
 import org.apache.spark.util.SparkPlanRules
 
@@ -125,6 +126,7 @@ object CHRuleApi {
       c =>
         intercept(
           SparkPlanRules.extendedColumnarRule(c.glutenConf.extendedColumnarPostRules)(c.session)))
+    injector.injectPost(c => GlutenNoopWriterRule.apply(c.session))
 
     // Gluten columnar: Final rules.
     injector.injectFinal(c => RemoveGlutenTableCacheColumnarToRow(c.session))

diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala
@@ -36,6 +36,7 @@ import org.apache.gluten.sql.shims.SparkShimLoader
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.datasources.WriteFilesExec
+import org.apache.spark.sql.execution.datasources.noop.GlutenNoopWriterRule
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExecBase
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.joins.BaseJoinExec
@@ -103,6 +104,7 @@ object VeloxRuleApi {
       .getExtendedColumnarPostRules()
       .foreach(each => injector.injectPost(c => each(c.session)))
     injector.injectPost(c => ColumnarCollapseTransformStages(c.glutenConf))
+    injector.injectPost(c => GlutenNoopWriterRule(c.session))
 
     // Gluten columnar: Final rules.
     injector.injectFinal(c => RemoveGlutenTableCacheColumnarToRow(c.session))
@@ -175,6 +177,7 @@ object VeloxRuleApi {
       .getExtendedColumnarPostRules()
       .foreach(each => injector.injectPostTransform(c => each(c.session)))
     injector.injectPostTransform(c => ColumnarCollapseTransformStages(c.glutenConf))
+    injector.injectPostTransform(c => GlutenNoopWriterRule(c.session))
     injector.injectPostTransform(c => RemoveGlutenTableCacheColumnarToRow(c.session))
     injector.injectPostTransform(c => GlutenFallbackReporter(c.glutenConf, c.session))
     injector.injectPostTransform(_ => RemoveFallbackTagRule())

diff --git a/...src/main/scala/org/apache/spark/sql/execution/datasources/GlutenWriterColumnarRules.scala b/...src/main/scala/org/apache/spark/sql/execution/datasources/GlutenWriterColumnarRules.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, DataWritingCommand, DataWritingCommandExec}
-import org.apache.spark.sql.execution.datasources.v2.{AppendDataExec, OverwriteByExpressionExec}
 import org.apache.spark.sql.hive.execution.{CreateHiveTableAsSelectCommand, InsertIntoHiveDirCommand, InsertIntoHiveTable}
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -133,19 +132,33 @@ object GlutenWriterColumnarRules {
     }
   }
 
-  case class NativeWritePostRule(session: SparkSession) extends Rule[SparkPlan] {
+  private[datasources] def injectFakeRowAdaptor(command: SparkPlan, child: SparkPlan): SparkPlan = {
+    child match {
+      // if the child is columnar, we can just wrap & transfer the columnar data
+      case c2r: ColumnarToRowExecBase =>
+        command.withNewChildren(Array(FakeRowAdaptor(c2r.child)))
+      // If the child is aqe, we make aqe "support columnar",
+      // then aqe itself will guarantee to generate columnar outputs.
+      // So FakeRowAdaptor will always consumes columnar data,
+      // thus avoiding the case of c2r->aqe->r2c->writer
+      case aqe: AdaptiveSparkPlanExec =>
+        command.withNewChildren(
+          Array(
+            FakeRowAdaptor(
+              AdaptiveSparkPlanExec(
+                aqe.inputPlan,
+                aqe.context,
+                aqe.preprocessingRules,
+                aqe.isSubquery,
+                supportsColumnar = true
+              ))))
+      case other => command.withNewChildren(Array(FakeRowAdaptor(other)))
+    }
+  }
 
-    private val NOOP_WRITE = "org.apache.spark.sql.execution.datasources.noop.NoopWrite$"
+  case class NativeWritePostRule(session: SparkSession) extends Rule[SparkPlan] {
 
     override def apply(p: SparkPlan): SparkPlan = p match {
-      case rc @ AppendDataExec(_, _, write)
-          if write.getClass.getName == NOOP_WRITE &&
-            BackendsApiManager.getSettings.enableNativeWriteFiles() =>
-        injectFakeRowAdaptor(rc, rc.child)
-      case rc @ OverwriteByExpressionExec(_, _, write)
-          if write.getClass.getName == NOOP_WRITE &&
-            BackendsApiManager.getSettings.enableNativeWriteFiles() =>
-        injectFakeRowAdaptor(rc, rc.child)
       case rc @ DataWritingCommandExec(cmd, child) =>
         // The same thread can set these properties in the last query submission.
         val fields = child.output.toStructType.fields
@@ -165,30 +178,6 @@ object GlutenWriterColumnarRules {
 
       case plan: SparkPlan => plan.withNewChildren(plan.children.map(apply))
     }
-
-    private def injectFakeRowAdaptor(command: SparkPlan, child: SparkPlan): SparkPlan = {
-      child match {
-        // if the child is columnar, we can just wrap&transfer the columnar data
-        case c2r: ColumnarToRowExecBase =>
-          command.withNewChildren(Array(FakeRowAdaptor(c2r.child)))
-        // If the child is aqe, we make aqe "support columnar",
-        // then aqe itself will guarantee to generate columnar outputs.
-        // So FakeRowAdaptor will always consumes columnar data,
-        // thus avoiding the case of c2r->aqe->r2c->writer
-        case aqe: AdaptiveSparkPlanExec =>
-          command.withNewChildren(
-            Array(
-              FakeRowAdaptor(
-                AdaptiveSparkPlanExec(
-                  aqe.inputPlan,
-                  aqe.context,
-                  aqe.preprocessingRules,
-                  aqe.isSubquery,
-                  supportsColumnar = true
-                ))))
-        case other => command.withNewChildren(Array(FakeRowAdaptor(other)))
-      }
-    }
   }
 
   def injectSparkLocalProperty(spark: SparkSession, format: Option[String]): Unit = {

diff --git a/...src/main/scala/org/apache/spark/sql/execution/datasources/noop/GlutenNoopWriterRule.scala b/...src/main/scala/org/apache/spark/sql/execution/datasources/noop/GlutenNoopWriterRule.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.noop
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.datasources.GlutenWriterColumnarRules.injectFakeRowAdaptor
+import org.apache.spark.sql.execution.datasources.v2.{AppendDataExec, OverwriteByExpressionExec}
+
+/**
+ * A rule that injects a FakeRowAdaptor for NoopWrite.
+ *
+ * The current V2 Command does not support columnar. Therefore, when its child node is a
+ * ColumnarNode, Vanilla Spark inserts a ColumnarToRow conversion between V2 Command and its child.
+ * This rule replaces the inserted ColumnarToRow with a FakeRowAdaptor, effectively bypassing the
+ * ColumnarToRow operation for NoopWrite. Since NoopWrite does not actually perform any data
+ * operations, it can accept input data in either row-based or columnar format.
+ */
+case class GlutenNoopWriterRule(session: SparkSession) extends Rule[SparkPlan] {
+  override def apply(p: SparkPlan): SparkPlan = p match {
+    case rc @ AppendDataExec(_, _, NoopWrite) =>
+      injectFakeRowAdaptor(rc, rc.child)
+    case rc @ OverwriteByExpressionExec(_, _, NoopWrite) =>
+      injectFakeRowAdaptor(rc, rc.child)
+    case _ => p
+  }
+}
diff --git a/...ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/...ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -1122,6 +1122,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("Change merge join to broadcast join without local shuffle read")
     .exclude(
       "Avoid changing merge join to broadcast join if too many empty partitions on build plan")
+    .exclude("SPARK-32932: Do not use local shuffle read at final stage on write command")
+    .exclude(
+      "SPARK-30953: InsertAdaptiveSparkPlan should apply AQE on child plan of v2 write commands")
     .exclude("SPARK-29544: adaptive skew join with different join types")
     .exclude("SPARK-34682: AQEShuffleReadExec operating on canonicalized plan")
     .exclude("SPARK-32717: AQEOptimizer should respect excludedRules configuration")

diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -164,14 +164,12 @@ class VeloxTestSettings extends BackendTestSettings {
       "SPARK-30403",
       "SPARK-30719",
       "SPARK-31384",
-      "SPARK-30953",
       "SPARK-31658",
       "SPARK-32717",
       "SPARK-32649",
       "SPARK-34533",
       "SPARK-34781",
       "SPARK-35585",
-      "SPARK-32932",
       "SPARK-33494",
       // "SPARK-33933",
       "SPARK-31220",
@@ -1059,7 +1057,6 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenSupportsCatalogOptionsSuite]
   enableSuite[GlutenTableCapabilityCheckSuite]
   enableSuite[GlutenWriteDistributionAndOrderingSuite]
-  enableSuite[GlutenWriterColumnarRulesSuite]
   enableSuite[GlutenBucketedReadWithoutHiveSupportSuite]
     // Exclude the following suite for plan changed from SMJ to SHJ.
     .exclude("avoid shuffle when join 2 bucketed tables")

diff --git a/...org/apache/spark/sql/execution/adaptive/clickhouse/ClickHouseAdaptiveQueryExecSuite.scala b/...org/apache/spark/sql/execution/adaptive/clickhouse/ClickHouseAdaptiveQueryExecSuite.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.spark.sql.execution.adaptive.clickhouse
 
+import org.apache.gluten.config.GlutenConfig
 import org.apache.gluten.execution.{BroadcastHashJoinExecTransformerBase, ShuffledHashJoinExecTransformerBase, SortExecTransformer, SortMergeJoinExecTransformer}
 
 import org.apache.spark.SparkConf
@@ -25,14 +26,20 @@ import org.apache.spark.sql.GlutenTestConstants.GLUTEN_TEST
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive._
+import org.apache.spark.sql.execution.command.DataWritingCommandExec
+import org.apache.spark.sql.execution.datasources.FakeRowAdaptor
+import org.apache.spark.sql.execution.datasources.noop.NoopDataSource
+import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
 import org.apache.spark.sql.execution.exchange._
 import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, ShuffledHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.execution.metric.SQLShuffleReadMetricsReporter
 import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate
 import org.apache.spark.sql.functions.when
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
 import org.apache.spark.sql.test.SQLTestData.TestData
 import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.util.QueryExecutionListener
 
 import org.apache.log4j.Level
 
@@ -42,7 +49,7 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
 
   override def sparkConf: SparkConf = {
     super.sparkConf
-      .set("spark.gluten.sql.columnar.forceShuffledHashJoin", "false")
+      .set(GlutenConfig.COLUMNAR_FORCE_SHUFFLED_HASH_JOIN_ENABLED.key, "false")
       .set(SQLConf.SHUFFLE_PARTITIONS.key, "5")
   }
 
@@ -1196,6 +1203,86 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
     }
   }
 
+  testGluten("SPARK-32932: Do not use local shuffle read at final stage on write command") {
+    withSQLConf(
+      SQLConf.PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.DYNAMIC.toString,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "5",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true"
+    ) {
+      val data =
+        for (
+          i <- 1L to 10L;
+          j <- 1L to 3L
+        ) yield (i, j)
+
+      val df = data.toDF("i", "j").repartition($"j")
+      var noLocalread: Boolean = false
+      val listener = new QueryExecutionListener {
+        override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+          qe.executedPlan match {
+            case plan @ (_: DataWritingCommandExec | _: V2TableWriteExec) =>
+              noLocalread = collect(plan) {
+                case exec: AQEShuffleReadExec if exec.isLocalRead => exec
+              }.isEmpty
+            case _ => // ignore other events
+          }
+        }
+        override def onFailure(
+            funcName: String,
+            qe: QueryExecution,
+            exception: Exception): Unit = {}
+      }
+      spark.listenerManager.register(listener)
+
+      withTable("t") {
+        df.write.partitionBy("j").saveAsTable("t")
+        sparkContext.listenerBus.waitUntilEmpty()
+        assert(noLocalread)
+        noLocalread = false
+      }
+
+      // Test DataSource v2
+      val format = classOf[NoopDataSource].getName
+      df.write.format(format).mode("overwrite").save()
+      sparkContext.listenerBus.waitUntilEmpty()
+      assert(noLocalread)
+      noLocalread = false
+
+      spark.listenerManager.unregister(listener)
+    }
+  }
+
+  testGluten(
+    "SPARK-30953: InsertAdaptiveSparkPlan should apply AQE on child plan of v2 write commands") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {
+      var plan: SparkPlan = null
+      val listener = new QueryExecutionListener {
+        override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+          plan = qe.executedPlan
+        }
+        override def onFailure(
+            funcName: String,
+            qe: QueryExecution,
+            exception: Exception): Unit = {}
+      }
+      spark.listenerManager.register(listener)
+      withTable("t1") {
+        val format = classOf[NoopDataSource].getName
+        Seq((0, 1)).toDF("x", "y").write.format(format).mode("overwrite").save()
+
+        sparkContext.listenerBus.waitUntilEmpty()
+        assert(plan.isInstanceOf[V2TableWriteExec])
+        val childPlan = plan.asInstanceOf[V2TableWriteExec].child
+        assert(childPlan.isInstanceOf[FakeRowAdaptor])
+        assert(childPlan.asInstanceOf[FakeRowAdaptor].child.isInstanceOf[AdaptiveSparkPlanExec])
+
+        spark.listenerManager.unregister(listener)
+      }
+    }
+  }
+
   testGluten("SPARK-35650: Coalesce number of partitions by AEQ") {
     withSQLConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM.key -> "1") {
       Seq("REPARTITION", "REBALANCE(key)")