From ae23bee91679cb2754ff852c53015566314ecdd7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 13 Jun 2025 13:50:45 -0600 Subject: [PATCH 01/16] enable Spark SQL tests --- .github/workflows/spark_sql_test_native_auto.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/spark_sql_test_native_auto.yml b/.github/workflows/spark_sql_test_native_auto.yml index bc2c278b6a..6adf40a1ae 100644 --- a/.github/workflows/spark_sql_test_native_auto.yml +++ b/.github/workflows/spark_sql_test_native_auto.yml @@ -22,6 +22,16 @@ concurrency: cancel-in-progress: true on: + push: + paths-ignore: + - "doc/**" + - "docs/**" + - "**.md" + pull_request: + paths-ignore: + - "doc/**" + - "docs/**" + - "**.md" # manual trigger # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow workflow_dispatch: @@ -35,7 +45,7 @@ jobs: matrix: os: [ubuntu-24.04] java-version: [11] - spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.5'}] + spark-version: [{short: '3.5', full: '3.5.6'}] module: - {name: "catalyst", args1: "catalyst/test", args2: ""} - {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest} From 38d664310a77415718cf371dad62266e76ca0015 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 17 Jun 2025 13:24:18 -0600 Subject: [PATCH 02/16] fix --- dev/diffs/3.4.3.diff | 4 ++-- dev/diffs/3.5.6.diff | 4 ++-- dev/diffs/4.0.0-preview1.diff | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dev/diffs/3.4.3.diff b/dev/diffs/3.4.3.diff index 52aa73e93d..83d82bf40b 100644 --- a/dev/diffs/3.4.3.diff +++ b/dev/diffs/3.4.3.diff @@ -2687,8 +2687,8 @@ index dd55fcfe42c..cdeef29df48 100644 + ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) + } else { + val cometScanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) -+ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT -+ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION ++ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT || cometScanImpl == CometConf.SCAN_AUTO ++ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION || cometScanImpl == CometConf.SCAN_AUTO + if (isCometEnabled && isNativeIcebergCompat && + testTags.exists(_.isInstanceOf[IgnoreCometNativeIcebergCompat])) { + ignore(testName + " (disabled for NATIVE_ICEBERG_COMPAT)", testTags: _*)(testFun) diff --git a/dev/diffs/3.5.6.diff b/dev/diffs/3.5.6.diff index 4897de62a7..8985704d55 100644 --- a/dev/diffs/3.5.6.diff +++ b/dev/diffs/3.5.6.diff @@ -2658,8 +2658,8 @@ index e937173a590..18c0232014b 100644 + ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) + } else { + val cometScanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) -+ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT -+ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION ++ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT || cometScanImpl == CometConf.SCAN_AUTO ++ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION || cometScanImpl == CometConf.SCAN_AUTO + if (isCometEnabled && isNativeIcebergCompat && + testTags.exists(_.isInstanceOf[IgnoreCometNativeIcebergCompat])) { + ignore(testName + " (disabled for NATIVE_ICEBERG_COMPAT)", testTags: _*)(testFun) diff --git a/dev/diffs/4.0.0-preview1.diff b/dev/diffs/4.0.0-preview1.diff index f4cb601e41..cd62afd185 100644 --- a/dev/diffs/4.0.0-preview1.diff +++ b/dev/diffs/4.0.0-preview1.diff @@ -2907,8 +2907,8 @@ index 5fbf379644f..8bdb733fec5 100644 + ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) + } else { + val cometScanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) -+ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT -+ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION ++ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT || cometScanImpl == CometConf.SCAN_AUTO ++ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION || cometScanImpl == CometConf.SCAN_AUTO + if (isCometEnabled && isNativeIcebergCompat && + testTags.exists(_.isInstanceOf[IgnoreCometNativeIcebergCompat])) { + ignore(testName + " (disabled for NATIVE_ICEBERG_COMPAT)", testTags: _*)(testFun) From db10be76b3beafe62c882375c1f5c2d23b667609 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 17 Jun 2025 17:25:45 -0600 Subject: [PATCH 03/16] Revert "fix" This reverts commit 38d664310a77415718cf371dad62266e76ca0015. --- dev/diffs/3.4.3.diff | 4 ++-- dev/diffs/3.5.6.diff | 4 ++-- dev/diffs/4.0.0-preview1.diff | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dev/diffs/3.4.3.diff b/dev/diffs/3.4.3.diff index 83d82bf40b..52aa73e93d 100644 --- a/dev/diffs/3.4.3.diff +++ b/dev/diffs/3.4.3.diff @@ -2687,8 +2687,8 @@ index dd55fcfe42c..cdeef29df48 100644 + ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) + } else { + val cometScanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) -+ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT || cometScanImpl == CometConf.SCAN_AUTO -+ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION || cometScanImpl == CometConf.SCAN_AUTO ++ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT ++ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION + if (isCometEnabled && isNativeIcebergCompat && + testTags.exists(_.isInstanceOf[IgnoreCometNativeIcebergCompat])) { + ignore(testName + " (disabled for NATIVE_ICEBERG_COMPAT)", testTags: _*)(testFun) diff --git a/dev/diffs/3.5.6.diff b/dev/diffs/3.5.6.diff index 8985704d55..4897de62a7 100644 --- a/dev/diffs/3.5.6.diff +++ b/dev/diffs/3.5.6.diff @@ -2658,8 +2658,8 @@ index e937173a590..18c0232014b 100644 + ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) + } else { + val cometScanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) -+ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT || cometScanImpl == CometConf.SCAN_AUTO -+ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION || cometScanImpl == CometConf.SCAN_AUTO ++ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT ++ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION + if (isCometEnabled && isNativeIcebergCompat && + testTags.exists(_.isInstanceOf[IgnoreCometNativeIcebergCompat])) { + ignore(testName + " (disabled for NATIVE_ICEBERG_COMPAT)", testTags: _*)(testFun) diff --git a/dev/diffs/4.0.0-preview1.diff b/dev/diffs/4.0.0-preview1.diff index cd62afd185..f4cb601e41 100644 --- a/dev/diffs/4.0.0-preview1.diff +++ b/dev/diffs/4.0.0-preview1.diff @@ -2907,8 +2907,8 @@ index 5fbf379644f..8bdb733fec5 100644 + ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) + } else { + val cometScanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) -+ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT || cometScanImpl == CometConf.SCAN_AUTO -+ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION || cometScanImpl == CometConf.SCAN_AUTO ++ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT ++ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION + if (isCometEnabled && isNativeIcebergCompat && + testTags.exists(_.isInstanceOf[IgnoreCometNativeIcebergCompat])) { + ignore(testName + " (disabled for NATIVE_ICEBERG_COMPAT)", testTags: _*)(testFun) From 207d39b6c48eddbe8bfa008aaca9ea85a54ed69f Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 17 Jun 2025 17:30:28 -0600 Subject: [PATCH 04/16] fix 3.5.6 diff --- dev/diffs/3.5.6.diff | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dev/diffs/3.5.6.diff b/dev/diffs/3.5.6.diff index 4897de62a7..ee509df99d 100644 --- a/dev/diffs/3.5.6.diff +++ b/dev/diffs/3.5.6.diff @@ -2630,7 +2630,7 @@ index abe606ad9c1..2d930b64cca 100644 val tblTargetName = "tbl_target" val tblSourceQualified = s"default.$tblSourceName" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala -index e937173a590..18c0232014b 100644 +index e937173a590..ca06132102d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala @@ -27,6 +27,7 @@ import scala.concurrent.duration._ @@ -2649,7 +2649,7 @@ index e937173a590..18c0232014b 100644 import org.apache.spark.sql.execution.FilterExec import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecution import org.apache.spark.sql.execution.datasources.DataSourceUtils -@@ -126,7 +128,26 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with +@@ -126,7 +128,28 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with } } } else { @@ -2658,8 +2658,10 @@ index e937173a590..18c0232014b 100644 + ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) + } else { + val cometScanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) -+ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT -+ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION ++ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT || ++ cometScanImpl == CometConf.SCAN_AUTO ++ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION || ++ cometScanImpl == CometConf.SCAN_AUTO + if (isCometEnabled && isNativeIcebergCompat && + testTags.exists(_.isInstanceOf[IgnoreCometNativeIcebergCompat])) { + ignore(testName + " (disabled for NATIVE_ICEBERG_COMPAT)", testTags: _*)(testFun) @@ -2677,7 +2679,7 @@ index e937173a590..18c0232014b 100644 } } -@@ -242,6 +263,29 @@ private[sql] trait SQLTestUtilsBase +@@ -242,6 +265,29 @@ private[sql] trait SQLTestUtilsBase protected override def _sqlContext: SQLContext = self.spark.sqlContext } @@ -2707,7 +2709,7 @@ index e937173a590..18c0232014b 100644 protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = { SparkSession.setActiveSession(spark) super.withSQLConf(pairs: _*)(f) -@@ -435,6 +479,8 @@ private[sql] trait SQLTestUtilsBase +@@ -435,6 +481,8 @@ private[sql] trait SQLTestUtilsBase val schema = df.schema val withoutFilters = df.queryExecution.executedPlan.transform { case FilterExec(_, child) => child From facd4e30a5a12b94278fee7b8c099e3cdd719207 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 17 Jun 2025 17:32:34 -0600 Subject: [PATCH 05/16] fix 3.4.3 diff --- dev/diffs/3.4.3.diff | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dev/diffs/3.4.3.diff b/dev/diffs/3.4.3.diff index 52aa73e93d..96b9874ba0 100644 --- a/dev/diffs/3.4.3.diff +++ b/dev/diffs/3.4.3.diff @@ -2650,7 +2650,7 @@ index abe606ad9c1..2d930b64cca 100644 val tblTargetName = "tbl_target" val tblSourceQualified = s"default.$tblSourceName" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala -index dd55fcfe42c..cdeef29df48 100644 +index dd55fcfe42c..a1d390c93d0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala @@ -27,6 +27,7 @@ import scala.concurrent.duration._ @@ -2678,7 +2678,7 @@ index dd55fcfe42c..cdeef29df48 100644 if (testTags.exists(_.isInstanceOf[DisableAdaptiveExecution])) { super.test(testName, testTags: _*) { withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { -@@ -126,7 +128,26 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with +@@ -126,7 +128,28 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with } } } else { @@ -2687,8 +2687,10 @@ index dd55fcfe42c..cdeef29df48 100644 + ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) + } else { + val cometScanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) -+ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT -+ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION ++ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT || ++ cometScanImpl == CometConf.SCAN_AUTO ++ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION || ++ cometScanImpl == CometConf.SCAN_AUTO + if (isCometEnabled && isNativeIcebergCompat && + testTags.exists(_.isInstanceOf[IgnoreCometNativeIcebergCompat])) { + ignore(testName + " (disabled for NATIVE_ICEBERG_COMPAT)", testTags: _*)(testFun) @@ -2706,7 +2708,7 @@ index dd55fcfe42c..cdeef29df48 100644 } } -@@ -242,6 +263,29 @@ private[sql] trait SQLTestUtilsBase +@@ -242,6 +265,29 @@ private[sql] trait SQLTestUtilsBase protected override def _sqlContext: SQLContext = self.spark.sqlContext } @@ -2736,7 +2738,7 @@ index dd55fcfe42c..cdeef29df48 100644 protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = { SparkSession.setActiveSession(spark) super.withSQLConf(pairs: _*)(f) -@@ -434,6 +478,8 @@ private[sql] trait SQLTestUtilsBase +@@ -434,6 +480,8 @@ private[sql] trait SQLTestUtilsBase val schema = df.schema val withoutFilters = df.queryExecution.executedPlan.transform { case FilterExec(_, child) => child From 5beaad8119baeac851116d2f870cb5c0c0961a87 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 17 Jun 2025 17:34:00 -0600 Subject: [PATCH 06/16] fix 4.0.0-preview1 diff --- dev/diffs/4.0.0-preview1.diff | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dev/diffs/4.0.0-preview1.diff b/dev/diffs/4.0.0-preview1.diff index f4cb601e41..3ac9995769 100644 --- a/dev/diffs/4.0.0-preview1.diff +++ b/dev/diffs/4.0.0-preview1.diff @@ -2870,7 +2870,7 @@ index af07aceaed1..ed0b5e6d9be 100644 val tblTargetName = "tbl_target" val tblSourceQualified = s"default.$tblSourceName" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala -index 5fbf379644f..8bdb733fec5 100644 +index 5fbf379644f..d0575e1df69 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala @@ -27,6 +27,7 @@ import scala.jdk.CollectionConverters._ @@ -2898,7 +2898,7 @@ index 5fbf379644f..8bdb733fec5 100644 if (testTags.exists(_.isInstanceOf[DisableAdaptiveExecution])) { super.test(testName, testTags: _*) { withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { -@@ -127,7 +129,26 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with +@@ -127,7 +129,28 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with } } } else { @@ -2907,8 +2907,10 @@ index 5fbf379644f..8bdb733fec5 100644 + ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) + } else { + val cometScanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) -+ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT -+ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION ++ val isNativeIcebergCompat = cometScanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT || ++ cometScanImpl == CometConf.SCAN_AUTO ++ val isNativeDataFusion = cometScanImpl == CometConf.SCAN_NATIVE_DATAFUSION || ++ cometScanImpl == CometConf.SCAN_AUTO + if (isCometEnabled && isNativeIcebergCompat && + testTags.exists(_.isInstanceOf[IgnoreCometNativeIcebergCompat])) { + ignore(testName + " (disabled for NATIVE_ICEBERG_COMPAT)", testTags: _*)(testFun) @@ -2926,7 +2928,7 @@ index 5fbf379644f..8bdb733fec5 100644 } } -@@ -243,6 +264,29 @@ private[sql] trait SQLTestUtilsBase +@@ -243,6 +266,29 @@ private[sql] trait SQLTestUtilsBase protected override def _sqlContext: SQLContext = self.spark.sqlContext } @@ -2956,7 +2958,7 @@ index 5fbf379644f..8bdb733fec5 100644 protected override def withSQLConf[T](pairs: (String, String)*)(f: => T): T = { SparkSession.setActiveSession(spark) super.withSQLConf(pairs: _*)(f) -@@ -434,6 +478,8 @@ private[sql] trait SQLTestUtilsBase +@@ -434,6 +480,8 @@ private[sql] trait SQLTestUtilsBase val schema = df.schema val withoutFilters = df.queryExecution.executedPlan.transform { case FilterExec(_, child) => child From a0bba56f868d9bedfbddb2c8446205550deaec06 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 17 Jun 2025 20:31:50 -0600 Subject: [PATCH 07/16] fix Spark SQL log format --- .github/workflows/spark_sql_test_native_auto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/spark_sql_test_native_auto.yml b/.github/workflows/spark_sql_test_native_auto.yml index 6adf40a1ae..ef64f1f69f 100644 --- a/.github/workflows/spark_sql_test_native_auto.yml +++ b/.github/workflows/spark_sql_test_native_auto.yml @@ -75,7 +75,7 @@ jobs: run: | cd apache-spark rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups - ENABLE_COMET=true ENABLE_COMET_SHUFFLE=true COMET_PARQUET_SCAN_IMPL=auto build/sbt ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}" + ENABLE_COMET=true ENABLE_COMET_SHUFFLE=true COMET_PARQUET_SCAN_IMPL=auto build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}" env: LC_ALL: "C.UTF-8" From 79f77d6386b69da87ae883e21987864a4ace7251 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 24 Jun 2025 16:40:17 -0600 Subject: [PATCH 08/16] Check for data file location in auto scan mode --- .../main/scala/org/apache/comet/rules/CometScanRule.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala index 4837918094..6fa93d202a 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala @@ -258,11 +258,14 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] { } private def selectScan(scanExec: FileSourceScanExec, partitionSchema: StructType): String = { - // TODO these checks are not yet exhaustive. For example, native_iceberg_compat does - // not support reading from S3 val fallbackReasons = new ListBuffer[String]() + // native_iceberg_compat only supports local filesystem and S3 + if (!scanExec.relation.inputFiles.forall(path => path.startsWith("file://") || path.startsWith("s3a://"))) { + fallbackReasons += s"$SCAN_NATIVE_ICEBERG_COMPAT only supports local filesystem and S3" + } + val typeChecker = CometScanTypeChecker(SCAN_NATIVE_ICEBERG_COMPAT) val schemaSupported = typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons) From 405067719358766f462ef51a460d15f1ff182b37 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 25 Jun 2025 09:02:14 -0600 Subject: [PATCH 09/16] format --- .../src/main/scala/org/apache/comet/rules/CometScanRule.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala index 6fa93d202a..07d8085baf 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala @@ -262,7 +262,8 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] { val fallbackReasons = new ListBuffer[String]() // native_iceberg_compat only supports local filesystem and S3 - if (!scanExec.relation.inputFiles.forall(path => path.startsWith("file://") || path.startsWith("s3a://"))) { + if (!scanExec.relation.inputFiles + .forall(path => path.startsWith("file://") || path.startsWith("s3a://"))) { fallbackReasons += s"$SCAN_NATIVE_ICEBERG_COMPAT only supports local filesystem and S3" } From 5b23ef07642b50faaee1d20d6ca564cada9fbf50 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 25 Jun 2025 12:32:31 -0600 Subject: [PATCH 10/16] fix --- .../src/main/scala/org/apache/comet/rules/CometScanRule.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala index 07d8085baf..5b2997756c 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala @@ -301,7 +301,8 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] { fallbackReasons += s"$SCAN_NATIVE_ICEBERG_COMPAT requires ${COMET_EXEC_ENABLED.key}=true" } - if (cometExecEnabled && schemaSupported && partitionSchemaSupported && !knownIssues) { + if (cometExecEnabled && schemaSupported && partitionSchemaSupported && !knownIssues && + fallbackReasons.isEmpty) { logInfo(s"Auto scan mode selecting $SCAN_NATIVE_ICEBERG_COMPAT") SCAN_NATIVE_ICEBERG_COMPAT } else { From 483cb8ceac9e9e503ae63b62d2d05c9c237a0e2b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 25 Jun 2025 13:37:00 -0600 Subject: [PATCH 11/16] Enable auto scan mode by default --- .../workflows/spark_sql_test_native_auto.yml | 81 ------------------- .../scala/org/apache/comet/CometConf.scala | 2 +- 2 files changed, 1 insertion(+), 82 deletions(-) delete mode 100644 .github/workflows/spark_sql_test_native_auto.yml diff --git a/.github/workflows/spark_sql_test_native_auto.yml b/.github/workflows/spark_sql_test_native_auto.yml deleted file mode 100644 index 5c5e7118fe..0000000000 --- a/.github/workflows/spark_sql_test_native_auto.yml +++ /dev/null @@ -1,81 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Spark SQL Tests (native_auto) - -concurrency: - group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} - cancel-in-progress: true - -on: - push: - paths-ignore: - - "doc/**" - - "docs/**" - - "**.md" - pull_request: - paths-ignore: - - "doc/**" - - "docs/**" - - "**.md" - # manual trigger - # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow - workflow_dispatch: - -env: - RUST_VERSION: stable - -jobs: - spark-sql-catalyst-native-auto: - strategy: - matrix: - os: [ubuntu-24.04] - java-version: [11] - spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.6'}] - module: - - {name: "catalyst", args1: "catalyst/test", args2: ""} - - {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest} - - {name: "sql/core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"} - - {name: "sql/core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"} - - {name: "sql/hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"} - - {name: "sql/hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"} - - {name: "sql/hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"} - fail-fast: false - name: spark-sql-native-auto-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }} - runs-on: ${{ matrix.os }} - container: - image: amd64/rust - steps: - - uses: actions/checkout@v4 - - name: Setup Rust & Java toolchain - uses: ./.github/actions/setup-builder - with: - rust-version: ${{env.RUST_VERSION}} - jdk-version: ${{ matrix.java-version }} - - name: Setup Spark - uses: ./.github/actions/setup-spark-builder - with: - spark-version: ${{ matrix.spark-version.full }} - spark-short-version: ${{ matrix.spark-version.short }} - - name: Run Spark tests - run: | - cd apache-spark - rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups - ENABLE_COMET=true ENABLE_COMET_SHUFFLE=true COMET_PARQUET_SCAN_IMPL=auto build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}" - env: - LC_ALL: "C.UTF-8" - diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala index 555beb5cbb..e0e3f88ef0 100644 --- a/common/src/main/scala/org/apache/comet/CometConf.scala +++ b/common/src/main/scala/org/apache/comet/CometConf.scala @@ -103,7 +103,7 @@ object CometConf extends ShimCometConf { .checkValues( Set(SCAN_NATIVE_COMET, SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT, SCAN_AUTO)) .createWithDefault(sys.env - .getOrElse("COMET_PARQUET_SCAN_IMPL", SCAN_NATIVE_COMET) + .getOrElse("COMET_PARQUET_SCAN_IMPL", SCAN_AUTO) .toLowerCase(Locale.ROOT)) val COMET_PARQUET_PARALLEL_IO_ENABLED: ConfigEntry[Boolean] = From eedcde80e0547ac61ef42182c279c942182132aa Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 25 Jun 2025 17:32:38 -0600 Subject: [PATCH 12/16] skip test --- dev/diffs/3.4.3.diff | 22 +++++++++++++++++++ dev/diffs/3.5.6.diff | 22 +++++++++++++++++++ .../spark/sql/comet/CometScanExec.scala | 2 ++ 3 files changed, 46 insertions(+) diff --git a/dev/diffs/3.4.3.diff b/dev/diffs/3.4.3.diff index 617d6a4826..c4d318e0bd 100644 --- a/dev/diffs/3.4.3.diff +++ b/dev/diffs/3.4.3.diff @@ -2868,6 +2868,28 @@ index 52abd248f3a..7a199931a08 100644 case h: HiveTableScanExec => h.partitionPruningPred.collect { case d: DynamicPruningExpression => d.child } +diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala +index de3b1ffccf0..2a76d127093 100644 +--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala ++++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala +@@ -23,14 +23,15 @@ import java.util.concurrent.{Executors, TimeUnit} + import org.scalatest.BeforeAndAfterEach + + import org.apache.spark.metrics.source.HiveCatalogMetrics +-import org.apache.spark.sql.QueryTest ++import org.apache.spark.sql.{IgnoreCometSuite, QueryTest} + import org.apache.spark.sql.execution.datasources.FileStatusCache + import org.apache.spark.sql.hive.test.TestHiveSingleton + import org.apache.spark.sql.internal.SQLConf + import org.apache.spark.sql.test.SQLTestUtils + + class PartitionedTablePerfStatsSuite +- extends QueryTest with TestHiveSingleton with SQLTestUtils with BeforeAndAfterEach { ++ extends QueryTest with TestHiveSingleton with SQLTestUtils with BeforeAndAfterEach ++ with IgnoreCometSuite { + + override def beforeEach(): Unit = { + super.beforeEach() diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index a902cb3a69e..800a3acbe99 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala diff --git a/dev/diffs/3.5.6.diff b/dev/diffs/3.5.6.diff index 618ea6a13b..98de916d0f 100644 --- a/dev/diffs/3.5.6.diff +++ b/dev/diffs/3.5.6.diff @@ -2883,6 +2883,28 @@ index 549431ef4f4..e48f1730da6 100644 withTempDir { dir => withSQLConf( "parquet.crypto.factory.class" -> +diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala +index de3b1ffccf0..2a76d127093 100644 +--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala ++++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala +@@ -23,14 +23,15 @@ import java.util.concurrent.{Executors, TimeUnit} + import org.scalatest.BeforeAndAfterEach + + import org.apache.spark.metrics.source.HiveCatalogMetrics +-import org.apache.spark.sql.QueryTest ++import org.apache.spark.sql.{IgnoreCometSuite, QueryTest} + import org.apache.spark.sql.execution.datasources.FileStatusCache + import org.apache.spark.sql.hive.test.TestHiveSingleton + import org.apache.spark.sql.internal.SQLConf + import org.apache.spark.sql.test.SQLTestUtils + + class PartitionedTablePerfStatsSuite +- extends QueryTest with TestHiveSingleton with SQLTestUtils with BeforeAndAfterEach { ++ extends QueryTest with TestHiveSingleton with SQLTestUtils with BeforeAndAfterEach ++ with IgnoreCometSuite { + + override def beforeEach(): Unit = { + super.beforeEach() diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 6160c3e5f6c..0956d7d9edc 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala index 8bba2d8635..5679a87b30 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala @@ -74,6 +74,8 @@ case class CometScanExec( with ShimCometScanExec with CometPlan { + assert(scanImpl != CometConf.SCAN_AUTO) + // FIXME: ideally we should reuse wrapped.supportsColumnar, however that fails many tests override lazy val supportsColumnar: Boolean = relation.fileFormat.supportBatch(relation.sparkSession, schema) From 06a80fb4f50a5763ede5c50b3e5c0fb269b6fa00 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 25 Jun 2025 18:23:09 -0600 Subject: [PATCH 13/16] fix Spark 4 issue --- .../main/scala/org/apache/comet/rules/CometScanRule.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala index 5b2997756c..592069fcc6 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.comet.{CometConf, DataTypeSupport} +import org.apache.comet.{CometConf, CometSparkSessionExtensions, DataTypeSupport} import org.apache.comet.CometConf._ import org.apache.comet.CometSparkSessionExtensions.{isCometLoaded, isCometScanEnabled, withInfo, withInfos} import org.apache.comet.parquet.{CometParquetScan, SupportsComet} @@ -261,6 +261,10 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] { val fallbackReasons = new ListBuffer[String]() + if (CometSparkSessionExtensions.isSpark40Plus) { + fallbackReasons += s"$SCAN_NATIVE_ICEBERG_COMPAT is not implemented for Spark 4.0.0" + } + // native_iceberg_compat only supports local filesystem and S3 if (!scanExec.relation.inputFiles .forall(path => path.startsWith("file://") || path.startsWith("s3a://"))) { From 590388eb263f53f3415133885c5191a15ce1db71 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 26 Jun 2025 05:57:05 -0600 Subject: [PATCH 14/16] fix --- .../org/apache/comet/CometExpressionSuite.scala | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 8fe04009c3..c1384bc687 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -2324,6 +2324,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } test("get_struct_field - select primitive fields") { + val scanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get() + assume(!(scanImpl == CometConf.SCAN_AUTO && CometSparkSessionExtensions.isSpark40Plus)) withTempPath { dir => // create input file with Comet disabled withSQLConf(CometConf.COMET_ENABLED.key -> "false") { @@ -2338,7 +2340,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { val df = spark.read.parquet(dir.toString()).select("nested1.id") // Comet's original scan does not support structs. // The plan will have a Comet Scan only if scan impl is native_full or native_recordbatch - if (!CometConf.COMET_NATIVE_SCAN_IMPL.get().equals(CometConf.SCAN_NATIVE_COMET)) { + if (!scanImpl.equals(CometConf.SCAN_NATIVE_COMET)) { checkSparkAnswerAndOperator(df) } else { checkSparkAnswer(df) @@ -2347,6 +2349,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } test("get_struct_field - select subset of struct") { + val scanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get() + assume(!(scanImpl == CometConf.SCAN_AUTO && CometSparkSessionExtensions.isSpark40Plus)) withTempPath { dir => // create input file with Comet disabled withSQLConf(CometConf.COMET_ENABLED.key -> "false") { @@ -2368,7 +2372,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { val df = spark.read.parquet(dir.toString()) // Comet's original scan does not support structs. // The plan will have a Comet Scan only if scan impl is native_full or native_recordbatch - if (!CometConf.COMET_NATIVE_SCAN_IMPL.get().equals(CometConf.SCAN_NATIVE_COMET)) { + if (scanImpl != CometConf.SCAN_NATIVE_COMET) { checkSparkAnswerAndOperator(df.select("nested1.id")) checkSparkAnswerAndOperator(df.select("nested1.nested2")) checkSparkAnswerAndOperator(df.select("nested1.nested2.id")) @@ -2383,6 +2387,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } test("get_struct_field - read entire struct") { + val scanImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get() + assume(!(scanImpl == CometConf.SCAN_AUTO && CometSparkSessionExtensions.isSpark40Plus)) withTempPath { dir => // create input file with Comet disabled withSQLConf(CometConf.COMET_ENABLED.key -> "false") { @@ -2404,7 +2410,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { val df = spark.read.parquet(dir.toString()).select("nested1.id") // Comet's original scan does not support structs. // The plan will have a Comet Scan only if scan impl is native_full or native_recordbatch - if (!CometConf.COMET_NATIVE_SCAN_IMPL.get().equals(CometConf.SCAN_NATIVE_COMET)) { + if (scanImpl != CometConf.SCAN_NATIVE_COMET) { checkSparkAnswerAndOperator(df) } else { checkSparkAnswer(df) From 90c8183f937c9e57c931a60bf3d6b1bbc26dfbbe Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 26 Jun 2025 05:58:53 -0600 Subject: [PATCH 15/16] update 4.0.0 diff --- dev/diffs/4.0.0-preview1.diff | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/dev/diffs/4.0.0-preview1.diff b/dev/diffs/4.0.0-preview1.diff index 0e9ee5dc4f..8686b4456e 100644 --- a/dev/diffs/4.0.0-preview1.diff +++ b/dev/diffs/4.0.0-preview1.diff @@ -3079,6 +3079,28 @@ index 52abd248f3a..7a199931a08 100644 case h: HiveTableScanExec => h.partitionPruningPred.collect { case d: DynamicPruningExpression => d.child } +diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala +index de3b1ffccf0..2a76d127093 100644 +--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala ++++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala +@@ -23,14 +23,15 @@ import java.util.concurrent.{Executors, TimeUnit} + import org.scalatest.BeforeAndAfterEach + + import org.apache.spark.metrics.source.HiveCatalogMetrics +-import org.apache.spark.sql.QueryTest ++import org.apache.spark.sql.{IgnoreCometSuite, QueryTest} + import org.apache.spark.sql.execution.datasources.FileStatusCache + import org.apache.spark.sql.hive.test.TestHiveSingleton + import org.apache.spark.sql.internal.SQLConf + import org.apache.spark.sql.test.SQLTestUtils + + class PartitionedTablePerfStatsSuite +- extends QueryTest with TestHiveSingleton with SQLTestUtils with BeforeAndAfterEach { ++ extends QueryTest with TestHiveSingleton with SQLTestUtils with BeforeAndAfterEach ++ with IgnoreCometSuite { + + override def beforeEach(): Unit = { + super.beforeEach() diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 0bcac639443..8957c76886f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala From cca388d49b2612e639ebe1c37df7a19937e4dcc3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 27 Jun 2025 14:09:42 -0600 Subject: [PATCH 16/16] run Spark SQL tests for native_comet --- .github/workflows/spark_sql_test.yml | 43 ++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 0b19d4d2ab..5422012ddf 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -40,7 +40,7 @@ env: RUST_VERSION: stable jobs: - spark-sql-native-comet: + spark-sql-auto-scan: strategy: matrix: os: [ubuntu-24.04] @@ -75,7 +75,46 @@ jobs: run: | cd apache-spark rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups - ENABLE_COMET=true ENABLE_COMET_SHUFFLE=true build/sbt ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}" + ENABLE_COMET=true ENABLE_COMET_SHUFFLE=true build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}" + env: + LC_ALL: "C.UTF-8" + + spark-sql-native-native-comet: + strategy: + matrix: + os: [ ubuntu-24.04 ] + java-version: [ 11 ] + spark-version: [ { short: '3.4', full: '3.4.3' }, { short: '3.5', full: '3.5.6' } ] + module: + - { name: "catalyst", args1: "catalyst/test", args2: "" } + - { name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest } + - { name: "sql/core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest" } + - { name: "sql/core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest" } + - { name: "sql/hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest" } + - { name: "sql/hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest" } + - { name: "sql/hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest" } + fail-fast: false + name: spark-sql-native-comet-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }} + runs-on: ${{ matrix.os }} + container: + image: amd64/rust + steps: + - uses: actions/checkout@v4 + - name: Setup Rust & Java toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: ${{env.RUST_VERSION}} + jdk-version: ${{ matrix.java-version }} + - name: Setup Spark + uses: ./.github/actions/setup-spark-builder + with: + spark-version: ${{ matrix.spark-version.full }} + spark-short-version: ${{ matrix.spark-version.short }} + - name: Run Spark tests + run: | + cd apache-spark + rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups + ENABLE_COMET=true ENABLE_COMET_SHUFFLE=true COMET_PARQUET_SCAN_IMPL=native_comet build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}" env: LC_ALL: "C.UTF-8"