apache · zhztheplayer · Apr 15, 2025 · Apr 14, 2025 · Apr 14, 2025 · Apr 14, 2025
diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml
@@ -642,7 +642,7 @@ jobs:
           export SPARK_SCALA_VERSION=2.12
           $MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox -Piceberg \
           -Pdelta -Phudi -DargLine="-Dspark.test.home=/opt/shims/spark32/spark_home/" \
-          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
+          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTest
       - name: Upload test report
         if: always()
         uses: actions/upload-artifact@v4
@@ -729,7 +729,7 @@ jobs:
           java -version
           $MVN_CMD clean test -Pspark-3.3 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \
-          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
+          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTest
       - name: Upload test report
         if: always()
         uses: actions/upload-artifact@v4
@@ -823,7 +823,7 @@ jobs:
           export SPARK_HOME=/opt/shims/spark34/spark_home/
           ls -l $SPARK_HOME
           $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Pspark-ut \
-          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags \
+          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTest \
           -DargLine="-Dspark.test.home=$SPARK_HOME ${EXTRA_FLAGS}"
       - name: Upload test report
         if: always()
@@ -918,7 +918,7 @@ jobs:
           java -version
           $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/  ${EXTRA_FLAGS}" \
-          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
+          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTest
       - name: Upload test report
         if: always()
         uses: actions/upload-artifact@v4
@@ -971,7 +971,7 @@ jobs:
           java -version
           $MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Piceberg \
           -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark35-scala-2.13/spark_home/" \
-          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
+          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTest
       - name: Upload test report
         if: always()
         uses: actions/upload-artifact@v4
@@ -1057,7 +1057,7 @@ jobs:
           java -version
           $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \
-          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
+          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTest
       - name: Upload test report
         uses: actions/upload-artifact@v4
         with:
@@ -1141,7 +1141,7 @@ jobs:
           java -version
           $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.sql.columnar.forceShuffledHashJoin=false" \
-          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
+          -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTest
       - name: Upload test report
         uses: actions/upload-artifact@v4
         with:

diff --git a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -290,7 +290,7 @@ object VeloxConfig {
     buildConf("spark.gluten.sql.columnar.backend.velox.resizeBatches.shuffleInput")
       .internal()
       .doc(s"If true, combine small columnar batches together before sending to shuffle. " +
-        s"The default minimum output batch size is equal to 0.8 * ${COLUMNAR_MAX_BATCH_SIZE.key}")
+        s"The default minimum output batch size is equal to 0.25 * ${COLUMNAR_MAX_BATCH_SIZE.key}")
       .booleanConf
       .createWithDefault(true)
 

diff --git a/.../org/apache/gluten/tags/SkipTestTags.java → ...java/org/apache/gluten/tags/SkipTest.java b/.../org/apache/gluten/tags/SkipTestTags.java → ...java/org/apache/gluten/tags/SkipTest.java
@@ -23,4 +23,4 @@
 @TagAnnotation
 @Retention(RetentionPolicy.RUNTIME)
 @Target({ElementType.METHOD, ElementType.TYPE})
-public @interface SkipTestTags {}
+public @interface SkipTest {}
diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/DynamicOffHeapSizingSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/DynamicOffHeapSizingSuite.scala
@@ -17,11 +17,11 @@
 package org.apache.gluten.execution
 
 import org.apache.gluten.benchmarks.RandomParquetDataGenerator
-import org.apache.gluten.tags.SkipTestTags
+import org.apache.gluten.tags.SkipTest
 
 import org.apache.spark.SparkConf
 
-@SkipTestTags
+@SkipTest
 class DynamicOffHeapSizingSuite extends VeloxWholeStageTransformerSuite {
   override protected val resourcePath: String = "/tpch-data-parquet"
   override protected val fileFormat: String = "parquet"
@@ -51,7 +51,7 @@ class DynamicOffHeapSizingSuite extends VeloxWholeStageTransformerSuite {
     getRootCause(e.getCause)
   }
 
-  test("Dynamic Off-Heap Sizing") {
+  test("Dynamic off-heap sizing") {
     System.gc()
     dataGenerator.generateRandomData(spark, Some(outputPath))
     spark.read.format("parquet").load(outputPath).createOrReplaceTempView("tbl")

diff --git a/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
@@ -17,7 +17,7 @@
 package org.apache.gluten.expression
 
 import org.apache.gluten.execution.ProjectExecTransformer
-import org.apache.gluten.tags.{SkipTestTags, UDFTest}
+import org.apache.gluten.tags.{SkipTest, UDFTest}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{GlutenQueryTest, Row, SparkSession}
@@ -255,7 +255,7 @@ class VeloxUdfSuiteLocal extends VeloxUdfSuite {
 // /path/to/gluten/package/target/gluten-package-${project.version}.jar
 // -Dvelox.udf.lib.path=\
 // /path/to/gluten/cpp/build/velox/udf/examples/libmyudf.so
-@SkipTestTags
+@SkipTest
 class VeloxUdfSuiteCluster extends VeloxUdfSuite {
 
   override val master: String = "local-cluster[2,2,1024]"

diff --git a/backends-velox/src/test/scala/org/apache/gluten/fuzzer/RowToColumnarFuzzer.scala b/backends-velox/src/test/scala/org/apache/gluten/fuzzer/RowToColumnarFuzzer.scala
@@ -18,13 +18,13 @@ package org.apache.gluten.fuzzer
 
 import org.apache.gluten.execution.RowToVeloxColumnarExec
 import org.apache.gluten.fuzzer.FuzzerResult.Successful
-import org.apache.gluten.tags.{FuzzerTest, SkipTestTags}
+import org.apache.gluten.tags.{FuzzerTest, SkipTest}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.DataFrame
 
 @FuzzerTest
-@SkipTestTags
+@SkipTest
 class RowToColumnarFuzzer extends FuzzerBase {
 
   override protected def sparkConf: SparkConf = {

diff --git a/backends-velox/src/test/scala/org/apache/gluten/fuzzer/ShuffleWriterFuzzer.scala b/backends-velox/src/test/scala/org/apache/gluten/fuzzer/ShuffleWriterFuzzer.scala
@@ -17,13 +17,13 @@
 package org.apache.gluten.fuzzer
 
 import org.apache.gluten.fuzzer.FuzzerResult.Successful
-import org.apache.gluten.tags.{FuzzerTest, SkipTestTags}
+import org.apache.gluten.tags.{FuzzerTest, SkipTest}
 
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.execution.ColumnarShuffleExchangeExec
 
 @FuzzerTest
-@SkipTestTags
+@SkipTest
 class ShuffleWriterFuzzer extends FuzzerBase {
   private val REPARTITION_SQL = (numPartitions: Int) =>
     s"select /*+ REPARTITION($numPartitions) */ * from tbl"

diff --git a/...re/src/main/java/org/apache/gluten/memory/memtarget/DynamicOffHeapSizingMemoryTarget.java b/...re/src/main/java/org/apache/gluten/memory/memtarget/DynamicOffHeapSizingMemoryTarget.java
@@ -43,22 +43,26 @@ public long borrow(long size) {
       return 0;
     }
 
-    long totalMemory = Runtime.getRuntime().totalMemory();
-    long freeMemory = Runtime.getRuntime().freeMemory();
-    long usedOnHeapBytes = (totalMemory - freeMemory);
+    // Only JVM shrinking can reclaim space from the total JVM memory.
+    // See https://github.com/apache/incubator-gluten/issues/9276.
+    long totalHeapMemory = Runtime.getRuntime().totalMemory();
+    long freeHeapMemory = Runtime.getRuntime().freeMemory();
+
     long usedOffHeapBytesNow = USED_OFFHEAP_BYTES.get();
 
-    if (size + usedOffHeapBytesNow + usedOnHeapBytes > MAX_MEMORY_IN_BYTES) {
+    // Adds the total JVM memory which is the actual memory the JVM occupied from the operating
+    // system into the counter.
+    if (size + usedOffHeapBytesNow + totalHeapMemory > MAX_MEMORY_IN_BYTES) {
       LOG.warn(
           String.format(
               "Failing allocation as unified memory is OOM. "
                   + "Used Off-heap: %d, Used On-Heap: %d, "
                   + "Free On-heap: %d, Total On-heap: %d, "
                   + "Max On-heap: %d, Allocation: %d.",
               usedOffHeapBytesNow,
-              usedOnHeapBytes,
-              freeMemory,
-              totalMemory,
+              totalHeapMemory - freeHeapMemory,
+              freeHeapMemory,
+              totalHeapMemory,
               MAX_MEMORY_IN_BYTES,
               size));
 

diff --git a/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala b/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala
@@ -32,7 +32,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.softaffinity.SoftAffinityListener
 import org.apache.spark.sql.execution.ui.{GlutenSQLAppStatusListener, GlutenUIUtils}
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{SparkConfigUtil, SQLConf}
 import org.apache.spark.sql.internal.StaticSQLConf.SPARK_SESSION_EXTENSIONS
 import org.apache.spark.task.TaskResources
 import org.apache.spark.util.SparkResourceUtil
@@ -137,13 +137,13 @@ private[gluten] class GlutenDriverPlugin extends DriverPlugin with Logging {
 
   private def setPredefinedConfigs(conf: SparkConf): Unit = {
     // Spark SQL extensions
-    val extensions = if (conf.contains(SPARK_SESSION_EXTENSIONS.key)) {
-      s"${conf.get(SPARK_SESSION_EXTENSIONS.key)}," +
-        s"${GlutenSessionExtensions.GLUTEN_SESSION_EXTENSION_NAME}"
-    } else {
-      s"${GlutenSessionExtensions.GLUTEN_SESSION_EXTENSION_NAME}"
+    val extensionSeq =
+      SparkConfigUtil.getEntryValue(conf, SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty)
+    if (!extensionSeq.toSet.contains(GlutenSessionExtensions.GLUTEN_SESSION_EXTENSION_NAME)) {
+      conf.set(
+        SPARK_SESSION_EXTENSIONS.key,
+        (extensionSeq :+ GlutenSessionExtensions.GLUTEN_SESSION_EXTENSION_NAME).mkString(","))
     }
-    conf.set(SPARK_SESSION_EXTENSIONS.key, extensions)
 
     // adaptive custom cost evaluator class
     val enableGlutenCostEvaluator = conf.getBoolean(

diff --git a/shims/common/src/main/scala/org/apache/spark/sql/internal/SparkConfigUtil.scala b/shims/common/src/main/scala/org/apache/spark/sql/internal/SparkConfigUtil.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.ConfigEntry
+
+object SparkConfigUtil {
+  def getEntryValue[T](conf: SparkConf, entry: ConfigEntry[T]): T = {
+    conf.get(entry)
+  }
+}