apache · andygrove · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026
diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml
@@ -323,6 +323,7 @@ jobs:
               org.apache.comet.parquet.ParquetReadV1Suite
               org.apache.comet.parquet.ParquetReadV2Suite
               org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite
+              org.apache.comet.parquet.ParquetSchemaMismatchSuite
               org.apache.spark.sql.comet.ParquetDatetimeRebaseV1Suite
               org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
               org.apache.spark.sql.comet.ParquetEncryptionITCase

diff --git a/.github/workflows/pr_build_macos.yml b/.github/workflows/pr_build_macos.yml
@@ -170,6 +170,7 @@ jobs:
               org.apache.comet.parquet.ParquetReadV1Suite
               org.apache.comet.parquet.ParquetReadV2Suite
               org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite
+              org.apache.comet.parquet.ParquetSchemaMismatchSuite
               org.apache.spark.sql.comet.ParquetDatetimeRebaseV1Suite
               org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
               org.apache.spark.sql.comet.ParquetEncryptionITCase

diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -785,6 +785,20 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithDefault(true)
 
+  val COMET_PARQUET_TIMESTAMP_NTZ_CHECK: ConfigEntry[Boolean] =
+    conf("spark.comet.scan.timestampNTZSafetyCheck")
+      .category(CATEGORY_SCAN)
+      .doc(
+        "Parquet files may contain INT96 timestamps (TimestampType/LTZ) which the " +
+          "native_datafusion scan cannot distinguish from TimestampNTZType after Parquet " +
+          "schema coercion. When this config is true (default), the native_datafusion scan " +
+          "falls back to Spark for TimestampNTZ columns to avoid silently returning incorrect " +
+          "timestamp values. Set to false to allow native execution if you know your Parquet " +
+          "files do not contain INT96 timestamps being read as TimestampNTZ. See " +
+          s"https://github.com/apache/datafusion-comet/issues/3720 for details. $COMPAT_GUIDE.")
+      .booleanConf
+      .createWithDefault(true)
+
   val COMET_EXEC_STRICT_FLOATING_POINT: ConfigEntry[Boolean] =
     conf("spark.comet.exec.strictFloatingPoint")
       .category(CATEGORY_EXEC)

diff --git a/docs/source/user-guide/latest/compatibility/scans.md b/docs/source/user-guide/latest/compatibility/scans.md
@@ -71,6 +71,12 @@ requires `spark.comet.exec.enabled=true` because the scan node must be wrapped b
 - Duplicate field names in case-insensitive mode (e.g., a Parquet file with both `B` and `b` columns)
   are detected at read time and raise a `SparkRuntimeException` with error class `_LEGACY_ERROR_TEMP_2093`,
   matching Spark's behavior.
+- `TimestampNTZType` columns, by default. Parquet files may contain INT96 timestamps (`TimestampType`/LTZ)
+  which the `native_datafusion` scan cannot distinguish from `TimestampNTZType` after Parquet schema coercion,
+  potentially returning incorrect timestamp values. When `spark.comet.scan.timestampNTZSafetyCheck=true`
+  (default), the scan falls back to Spark for `TimestampNTZ` columns. Set to `false` if your Parquet files
+  do not contain INT96 timestamps being read as `TimestampNTZ`. See
+  [issue #3720](https://github.com/apache/datafusion-comet/issues/3720) for more details.
 
 ## `native_iceberg_compat` Limitations
 

diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala
@@ -709,6 +709,15 @@ case class CometScanTypeChecker(scanImpl: String) extends DataTypeSupport with C
           "native execution if your data does not contain unsigned small integers. " +
           CometConf.COMPAT_GUIDE
         false
+      case _: TimestampNTZType
+          if scanImpl == CometConf.SCAN_NATIVE_DATAFUSION &&
+            CometConf.COMET_PARQUET_TIMESTAMP_NTZ_CHECK.get() =>
+        fallbackReasons +=
+          s"$scanImpl scan may read INT96 timestamps as TimestampNTZ incorrectly. " +
+            s"Set ${CometConf.COMET_PARQUET_TIMESTAMP_NTZ_CHECK.key}=false to allow " +
+            "native execution if your Parquet files do not contain INT96 timestamps " +
+            s"being read as TimestampNTZ. ${CometConf.COMPAT_GUIDE}"
+        false
       case dt if isStringCollationType(dt) =>
         // we don't need specific support for collation in scans, but this
         // is a convenient place to force the whole query to fall back to Spark for now