From c7e2606268ff2c00d38cb475d4ad83f27978274d Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 12 Feb 2026 12:31:29 -0800 Subject: [PATCH 01/13] Df52 migration - get failed tests --- spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala index 02d13c841d..58f8bee4b4 100644 --- a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala @@ -31,6 +31,7 @@ import org.apache.comet.DataTypeSupport.isComplexType import org.apache.comet.testing.{DataGenOptions, ParquetGenerator} import org.apache.comet.testing.FuzzDataGenerator.{doubleNaNLiteral, floatNaNLiteral} + class CometFuzzTestSuite extends CometFuzzTestBase { test("select *") { From 9a88360b0a046b48ea4de1e0c93428ac42b8f512 Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 12 Feb 2026 12:38:56 -0800 Subject: [PATCH 02/13] [df52] miri --- .github/workflows/miri.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml index ea36e1359a..3f0bcd5e8c 100644 --- a/.github/workflows/miri.yml +++ b/.github/workflows/miri.yml @@ -59,5 +59,6 @@ jobs: cargo miri setup - name: Test with Miri run: | + cargo clean cd native MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test --lib --bins --tests --examples From 5cfdb4f104f9bddd9572da836a0028f3fea264dd Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 12 Feb 2026 12:41:32 -0800 Subject: [PATCH 03/13] [df52] miri --- .github/workflows/miri.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml index 3f0bcd5e8c..308186773f 100644 --- a/.github/workflows/miri.yml +++ b/.github/workflows/miri.yml @@ -59,6 +59,6 @@ jobs: cargo miri setup - name: Test with Miri run: | - cargo clean cd native + cargo clean MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test --lib --bins --tests --examples From 807a0d1c33753520b9847ebbad98f6e1bcb8d762 Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 12 Feb 2026 12:57:21 -0800 Subject: [PATCH 04/13] [df52] miri --- .github/workflows/miri.yml | 1 - native/Cargo.lock | 241 +++++++++--------- .../org/apache/comet/CometFuzzTestSuite.scala | 1 - 3 files changed, 121 insertions(+), 122 deletions(-) diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml index 308186773f..ea36e1359a 100644 --- a/.github/workflows/miri.yml +++ b/.github/workflows/miri.yml @@ -60,5 +60,4 @@ jobs: - name: Test with Miri run: | cd native - cargo clean MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test --lib --bins --tests --examples diff --git a/native/Cargo.lock b/native/Cargo.lock index d1c8acf522..4d828a79e0 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -420,9 +420,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.37" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40" +checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f" dependencies = [ "compression-codecs", "compression-core", @@ -528,7 +528,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -554,9 +554,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.13" +version = "1.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c456581cb3c77fafcc8c67204a70680d40b61112d6da78c77bd31d945b65f1b5" +checksum = "8a8fc176d53d6fe85017f230405e3255cedb4a02221cb55ed6d76dccbbb099b2" dependencies = [ "aws-credential-types", "aws-runtime", @@ -584,9 +584,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.11" +version = "1.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3" +checksum = "e26bbf46abc608f2dc61fd6cb3b7b0665497cc259a21520151ed98f8b37d2c79" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -606,9 +606,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.37.0" +version = "0.37.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c34dda4df7017c8db52132f0f8a2e0f8161649d15723ed63fc00c82d0f2081a" +checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549" dependencies = [ "cc", "cmake", @@ -618,9 +618,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c635c2dc792cb4a11ce1a4f392a925340d1bdf499289b5ec1ec6810954eb43f5" +checksum = "b0f92058d22a46adf53ec57a6a96f34447daf02bff52e8fb956c66bcd5c6ac12" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -631,6 +631,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", + "bytes-utils", "fastrand", "http 1.4.0", "http-body 1.0.1", @@ -642,9 +643,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.93.0" +version = "1.94.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcb38bb33fc0a11f1ffc3e3e85669e0a11a37690b86f77e75306d8f369146a0" +checksum = "699da1961a289b23842d88fe2984c6ff68735fdf9bdcbc69ceaeb2491c9bf434" dependencies = [ "aws-credential-types", "aws-runtime", @@ -666,9 +667,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.95.0" +version = "1.96.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ada8ffbea7bd1be1f53df1dadb0f8fdb04badb13185b3321b929d1ee3caad09" +checksum = "e3e3a4cb3b124833eafea9afd1a6cc5f8ddf3efefffc6651ef76a03cbc6b4981" dependencies = [ "aws-credential-types", "aws-runtime", @@ -690,9 +691,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.97.0" +version = "1.98.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6443ccadc777095d5ed13e21f5c364878c9f5bad4e35187a6cdbd863b0afcad" +checksum = "89c4f19655ab0856375e169865c91264de965bd74c407c7f1e403184b1049409" dependencies = [ "aws-credential-types", "aws-runtime", @@ -715,9 +716,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.8" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efa49f3c607b92daae0c078d48a4571f599f966dce3caee5f1ea55c4d9073f99" +checksum = "68f6ae9b71597dc5fd115d52849d7a5556ad9265885ad3492ea8d73b93bbc46e" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -737,9 +738,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.11" +version = "1.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52eec3db979d18cb807fc1070961cc51d87d069abe9ab57917769687368a8c6c" +checksum = "3cba48474f1d6807384d06fec085b909f5807e16653c5af5c45dfe89539f0b70" dependencies = [ "futures-util", "pin-project-lite", @@ -748,9 +749,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.63.3" +version = "0.63.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630e67f2a31094ffa51b210ae030855cb8f3b7ee1329bdd8d085aaf61e8b97fc" +checksum = "af4a8a5fe3e4ac7ee871237c340bbce13e982d37543b65700f4419e039f5d78e" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", @@ -769,9 +770,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.9" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12fb0abf49ff0cab20fd31ac1215ed7ce0ea92286ba09e2854b42ba5cabe7525" +checksum = "0709f0083aa19b704132684bc26d3c868e06bd428ccc4373b0b55c3e8748a58b" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -793,27 +794,27 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.62.3" +version = "0.62.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb96aa208d62ee94104645f7b2ecaf77bf27edf161590b6224bfbac2832f979" +checksum = "27b3a779093e18cad88bbae08dc4261e1d95018c4c5b9356a52bcae7c0b6e9bb" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-observability" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0a46543fbc94621080b3cf553eb4cbbdc41dd9780a30c4756400f0139440a1d" +checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b" dependencies = [ "aws-smithy-runtime-api", ] [[package]] name = "aws-smithy-query" -version = "0.60.13" +version = "0.60.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cebbddb6f3a5bd81553643e9c7daf3cc3dc5b0b5f398ac668630e8a84e6fff0" +checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0" dependencies = [ "aws-smithy-types", "urlencoding", @@ -821,9 +822,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3df87c14f0127a0d77eb261c3bc45d5b4833e2a1f63583ebfb728e4852134ee" +checksum = "8fd3dfc18c1ce097cf81fced7192731e63809829c6cbf933c1ec47452d08e1aa" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -846,9 +847,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.11.3" +version = "1.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49952c52f7eebb72ce2a754d3866cc0f87b97d2a46146b79f80f3a93fb2b3716" +checksum = "8c55e0837e9b8526f49e0b9bfa9ee18ddee70e853f5bc09c5d11ebceddcb0fec" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -863,9 +864,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.4.3" +version = "1.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3a26048eeab0ddeba4b4f9d51654c79af8c3b32357dc5f336cee85ab331c33" +checksum = "576b0d6991c9c32bc14fc340582ef148311f924d41815f641a308b5d11e8e7cd" dependencies = [ "base64-simd", "bytes", @@ -886,18 +887,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.13" +version = "0.60.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" +checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.11" +version = "1.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164" +checksum = "6c50f3cdf47caa8d01f2be4a6663ea02418e892f9bbfd82c7b9a3a37eaccdd3a" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1008,7 +1009,7 @@ dependencies = [ "regex", "rustc-hash 2.1.1", "shlex", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1102,7 +1103,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1125,7 +1126,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1334,18 +1335,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.57" +version = "4.5.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" +checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.57" +version = "4.5.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" +checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" dependencies = [ "anstyle", "clap_lex", @@ -1353,9 +1354,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "cmake" @@ -1656,7 +1657,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1670,7 +1671,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1683,7 +1684,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1694,7 +1695,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1705,7 +1706,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1716,7 +1717,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core 0.23.0", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2321,7 +2322,7 @@ checksum = "c4fe888aeb6a095c4bcbe8ac1874c4b9a4c7ffa2ba849db7922683ba20875aaf" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2529,9 +2530,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" dependencies = [ "powerfmt", "serde_core", @@ -2555,7 +2556,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2565,7 +2566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2586,7 +2587,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.114", + "syn 2.0.115", "unicode-xid", ] @@ -2616,7 +2617,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2669,7 +2670,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2898,7 +2899,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -3564,9 +3565,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89a5b5e10d5a9ad6e5d1f4bd58225f655d6fe9767575a5e8ac5a6fe64e04495" +checksum = "c867c356cc096b33f4981825ab281ecba3db0acefe60329f044c1789d94c6543" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -3579,13 +3580,13 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff7a39c8862fc1369215ccf0a8f12dd4598c7f6484704359f0351bd617034dbf" +checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -3739,9 +3740,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.180" +version = "0.2.181" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5" [[package]] name = "libloading" @@ -3898,9 +3899,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" @@ -4273,9 +4274,9 @@ dependencies = [ [[package]] name = "parquet" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -4314,9 +4315,9 @@ dependencies = [ [[package]] name = "parquet-variant" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c254fac16af78ad96aa442290cb6504951c4d484fdfcfe58f4588033d30e4c8f" +checksum = "a6c31f8f9bfefb9dbf67b0807e00fd918676954a7477c889be971ac904103184" dependencies = [ "arrow-schema", "chrono", @@ -4328,9 +4329,9 @@ dependencies = [ [[package]] name = "parquet-variant-compute" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2178772f1c5ad7e5da8b569d986d3f5cbb4a4cee915925f28fdc700dbb2e80cf" +checksum = "196cd9f7178fed3ac8d5e6d2b51193818e896bbc3640aea3fde3440114a8f39c" dependencies = [ "arrow", "arrow-schema", @@ -4344,9 +4345,9 @@ dependencies = [ [[package]] name = "parquet-variant-json" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a1510daa121c04848368f9c38d0be425b9418c70be610ecc0aa8071738c0ef3" +checksum = "ed23d7acc90ef60f7fdbcc473fa2fdaefa33542ed15b84388959346d52c839be" dependencies = [ "arrow-schema", "base64", @@ -4421,7 +4422,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -4563,7 +4564,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -4633,7 +4634,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.114", + "syn 2.0.115", "tempfile", ] @@ -4647,7 +4648,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -4912,7 +4913,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5238,9 +5239,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "same-file" @@ -5378,7 +5379,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5402,7 +5403,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5445,7 +5446,7 @@ dependencies = [ "darling 0.21.3", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5572,7 +5573,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5611,7 +5612,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5656,9 +5657,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.114" +version = "2.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12" dependencies = [ "proc-macro2", "quote", @@ -5682,7 +5683,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5699,12 +5700,12 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tempfile" -version = "3.24.0" +version = "3.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.1", "once_cell", "rustix 1.1.3", "windows-sys 0.61.2", @@ -5736,7 +5737,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5747,7 +5748,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5902,7 +5903,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5951,9 +5952,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.0.8+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "0742ff5ff03ea7e67c8ae6c93cac239e0d9784833362da3f9a9c1da8dfefcbdc" dependencies = [ "winnow", ] @@ -6022,7 +6023,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6066,7 +6067,7 @@ checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6086,9 +6087,9 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" [[package]] name = "unicode-segmentation" @@ -6272,7 +6273,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", "wasm-bindgen-shared", ] @@ -6425,7 +6426,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6436,7 +6437,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6733,7 +6734,7 @@ dependencies = [ "heck", "indexmap 2.13.0", "prettyplease", - "syn 2.0.114", + "syn 2.0.115", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -6749,7 +6750,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -6831,7 +6832,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", "synstructure", ] @@ -6852,7 +6853,7 @@ checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6872,7 +6873,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", "synstructure", ] @@ -6912,7 +6913,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6923,9 +6924,9 @@ checksum = "a7948af682ccbc3342b6e9420e8c51c1fe5d7bf7756002b4a3c6cabfe96a7e3c" [[package]] name = "zmij" -version = "1.0.19" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" [[package]] name = "zstd" diff --git a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala index 58f8bee4b4..02d13c841d 100644 --- a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala @@ -31,7 +31,6 @@ import org.apache.comet.DataTypeSupport.isComplexType import org.apache.comet.testing.{DataGenOptions, ParquetGenerator} import org.apache.comet.testing.FuzzDataGenerator.{doubleNaNLiteral, floatNaNLiteral} - class CometFuzzTestSuite extends CometFuzzTestBase { test("select *") { From 85c9ccbca38245005b133b9b1bb757ab128fd5dc Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 12 Feb 2026 13:37:59 -0800 Subject: [PATCH 05/13] [df52] miri --- .../main/spark-4.0/org/apache/comet/shims/CometExprShim.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala index 12ea91d423..9fe53b9a84 100644 --- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala @@ -29,7 +29,7 @@ import org.apache.comet.CometSparkSessionExtensions.withInfo import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.exprToProtoInternal /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions. From 6a0ae18ff3961d426b891961fcc114c6c4152708 Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 13 Feb 2026 10:58:54 -0800 Subject: [PATCH 06/13] [df52] tests ignore --- dev/diffs/3.5.8.diff | 22 +++++++++++++++++++ native/core/src/parquet/schema_adapter.rs | 9 ++++++-- .../org/apache/comet/CometFuzzTestSuite.scala | 2 ++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff index beef445490..cba479b568 100644 --- a/dev/diffs/3.5.8.diff +++ b/dev/diffs/3.5.8.diff @@ -2833,6 +2833,28 @@ index d675503a8ba..f220892396e 100644 assert(bucketedScan.length == expectedNumBucketedScan) } +diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +index 7b1a5a32037..151ea63d740 100644 +--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala ++++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +@@ -25,6 +25,7 @@ import org.apache.hadoop.fs.{FileAlreadyExistsException, FSDataOutputStream, Pat + + import org.apache.spark.{SparkArithmeticException, SparkException} + import org.apache.spark.sql._ ++import org.apache.spark.sql.IgnoreCometNativeDataFusion + import org.apache.spark.sql.catalyst.TableIdentifier + import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} + import org.apache.spark.sql.catalyst.parser.ParseException +@@ -1758,7 +1759,8 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { + } + } + +- test("INSERT rows, ALTER TABLE ADD COLUMNS with DEFAULTs, then SELECT them") { ++ test("INSERT rows, ALTER TABLE ADD COLUMNS with DEFAULTs, then SELECT them", ++ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) { + case class Config( + sqlConf: Option[(String, String)], + useDataFrames: Boolean = false) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala index 7f6fa2a123e..c778b4e2c48 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index 2874b6cbf1..aab50805af 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -209,6 +209,8 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter { // In that case, fall back to wrapping everything ourselves. let expr = match self.default_adapter.rewrite(Arc::clone(&expr)) { Ok(rewritten) => { + // Replace references to missing columns with default values + let rewritten = self.replace_missing_with_defaults(rewritten)?; // Replace DataFusion's CastColumnExpr with either: // - CometCastColumnExpr (for Struct/List/Map, uses spark_parquet_convert) // - Spark Cast (for simple scalar types) @@ -216,9 +218,10 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter { .transform(|e| self.replace_with_spark_cast(e)) .data()? } - Err(_) => { + Err(e) => { // Default adapter failed (likely complex nested type cast). // Handle all type mismatches ourselves using spark_parquet_convert. + log::info!("Default schema adapter error: {}", e); self.wrap_all_type_mismatches(expr)? } }; @@ -249,7 +252,6 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter { } } -#[allow(dead_code)] impl SparkPhysicalExprAdapter { /// Wrap ALL Column expressions that have type mismatches with CometCastColumnExpr. /// This is the fallback path when the default adapter fails (e.g., for complex @@ -440,6 +442,8 @@ impl SparkPhysicalExprAdapter { }) .collect(); + dbg!(&name_based, &expr); + if name_based.is_empty() { return Ok(expr); } @@ -448,6 +452,7 @@ impl SparkPhysicalExprAdapter { } } + /// Adapt a batch to match the target schema using expression evaluation. /// /// This function is useful for cases like Iceberg scanning where batches diff --git a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala index 02d13c841d..cd1976066c 100644 --- a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala @@ -67,6 +67,8 @@ class CometFuzzTestSuite extends CometFuzzTestBase { } test("select column with default value") { + // ignoring native datafusion https://github.com/apache/datafusion-comet/issues/3515 + assume(usingLegacyNativeCometScan) // This test fails in Spark's vectorized Parquet reader for DECIMAL(36,18) or BINARY default values. withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") { // This test relies on two tables: 1) t1 the Parquet file generated by ParquetGenerator with random values, and From 9899af2773db379436b11158d538e9cc46390ec7 Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 13 Feb 2026 11:11:45 -0800 Subject: [PATCH 07/13] [df52] tests ignore --- native/core/src/parquet/schema_adapter.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index aab50805af..e2a5ed428f 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -452,7 +452,6 @@ impl SparkPhysicalExprAdapter { } } - /// Adapt a batch to match the target schema using expression evaluation. /// /// This function is useful for cases like Iceberg scanning where batches From e9c09d97133d3eda6a9e4ec52a1ca0d51025654a Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 13 Feb 2026 11:17:35 -0800 Subject: [PATCH 08/13] DataFusion 52 migration --- native/core/src/parquet/schema_adapter.rs | 47 ----------------------- 1 file changed, 47 deletions(-) diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index e2a5ed428f..dde9569ba5 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -371,53 +371,6 @@ impl SparkPhysicalExprAdapter { Ok(Transformed::no(expr)) } - /// Cast Column expressions where the physical and logical datatypes differ. - /// - /// This function traverses the expression tree and for each Column expression, - /// checks if the physical file schema datatype differs from the logical file schema - /// datatype. If they differ, it wraps the Column with a CastColumnExpr to perform - /// the necessary type conversion. - fn cast_datafusion_unsupported_expr( - &self, - expr: Arc, - ) -> DataFusionResult> { - expr.transform(|e| { - // Check if this is a Column expression - if let Some(column) = e.as_any().downcast_ref::() { - let col_idx = column.index(); - - // dbg!(&self.logical_file_schema, &self.physical_file_schema); - - // Get the logical datatype (expected by the query) - let logical_field = self.logical_file_schema.fields().get(col_idx); - // Get the physical datatype (actual file schema) - let physical_field = self.physical_file_schema.fields().get(col_idx); - - // dbg!(&logical_field, &physical_field); - - if let (Some(logical_field), Some(physical_field)) = (logical_field, physical_field) - { - let logical_type = logical_field.data_type(); - let physical_type = physical_field.data_type(); - - // If datatypes differ, insert a CastColumnExpr - if logical_type != physical_type { - let cast_expr: Arc = Arc::new(CometCastColumnExpr::new( - Arc::clone(&e), - Arc::clone(physical_field), - Arc::clone(logical_field), - None, - )); - // dbg!(&cast_expr); - return Ok(Transformed::yes(cast_expr)); - } - } - } - Ok(Transformed::no(e)) - }) - .data() - } - /// Replace references to missing columns with default values. fn replace_missing_with_defaults( &self, From d60f33673f2b118011bb088b887108b00e1f6dfa Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 13 Feb 2026 13:27:20 -0800 Subject: [PATCH 09/13] [df52] tests ignore --- dev/diffs/3.5.8.diff | 24 +---------------------- native/core/src/parquet/schema_adapter.rs | 2 -- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff index cba479b568..3c81e01a3c 100644 --- a/dev/diffs/3.5.8.diff +++ b/dev/diffs/3.5.8.diff @@ -2832,29 +2832,7 @@ index d675503a8ba..f220892396e 100644 + } assert(bucketedScan.length == expectedNumBucketedScan) } - -diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala -index 7b1a5a32037..151ea63d740 100644 ---- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala -+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala -@@ -25,6 +25,7 @@ import org.apache.hadoop.fs.{FileAlreadyExistsException, FSDataOutputStream, Pat - - import org.apache.spark.{SparkArithmeticException, SparkException} - import org.apache.spark.sql._ -+import org.apache.spark.sql.IgnoreCometNativeDataFusion - import org.apache.spark.sql.catalyst.TableIdentifier - import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} - import org.apache.spark.sql.catalyst.parser.ParseException -@@ -1758,7 +1759,8 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { - } - } - -- test("INSERT rows, ALTER TABLE ADD COLUMNS with DEFAULTs, then SELECT them") { -+ test("INSERT rows, ALTER TABLE ADD COLUMNS with DEFAULTs, then SELECT them", -+ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) { - case class Config( - sqlConf: Option[(String, String)], - useDataFrames: Boolean = false) + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala index 7f6fa2a123e..c778b4e2c48 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index dde9569ba5..b25143ac19 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -395,8 +395,6 @@ impl SparkPhysicalExprAdapter { }) .collect(); - dbg!(&name_based, &expr); - if name_based.is_empty() { return Ok(expr); } From 7afea280acbcd97d76899f9c16a39ac058eb187e Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 13 Feb 2026 16:03:49 -0800 Subject: [PATCH 10/13] DataFusion 52 migration --- native/core/src/execution/planner.rs | 7 +++- native/core/src/parquet/parquet_exec.rs | 4 +- native/core/src/parquet/schema_adapter.rs | 37 ++++++++++--------- .../org/apache/comet/CometFuzzTestSuite.scala | 4 +- 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index b4a538aaed..aeb3db716e 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -1030,7 +1030,7 @@ impl PhysicalPlanner { .map(|expr| self.create_expr(expr, Arc::clone(&required_schema))) .collect(); - let default_values: Option> = if !scan + let default_values: Option> = if !scan .default_values .is_empty() { @@ -1060,6 +1060,11 @@ impl PhysicalPlanner { default_values_indexes .into_iter() .zip(default_values) + .map(|(idx, scalar_value)| { + let field = required_schema.field(idx); + let column = Column::new(field.name().as_str(), idx); + (column, scalar_value) + }) .collect(), ) } else { diff --git a/native/core/src/parquet/parquet_exec.rs b/native/core/src/parquet/parquet_exec.rs index f4cc7bf9fe..2d970734bb 100644 --- a/native/core/src/parquet/parquet_exec.rs +++ b/native/core/src/parquet/parquet_exec.rs @@ -28,7 +28,7 @@ use datafusion::datasource::physical_plan::{ use datafusion::datasource::source::DataSourceExec; use datafusion::execution::object_store::ObjectStoreUrl; use datafusion::execution::SendableRecordBatchStream; -use datafusion::physical_expr::expressions::BinaryExpr; +use datafusion::physical_expr::expressions::{BinaryExpr, Column}; use datafusion::physical_expr::PhysicalExpr; use datafusion::physical_expr_adapter::PhysicalExprAdapterFactory; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; @@ -67,7 +67,7 @@ pub(crate) fn init_datasource_exec( file_groups: Vec>, projection_vector: Option>, data_filters: Option>>, - default_values: Option>, + default_values: Option>, session_timezone: &str, case_sensitive: bool, session_ctx: &Arc, diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index b25143ac19..3e65cd8509 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -54,15 +54,15 @@ pub struct SparkPhysicalExprAdapterFactory { /// Spark-specific parquet options for type conversions parquet_options: SparkParquetOptions, /// Default values for columns that may be missing from the physical schema. - /// The key is the column index in the logical schema. - default_values: Option>, + /// The key is the Column (containing name and index). + default_values: Option>, } impl SparkPhysicalExprAdapterFactory { /// Create a new factory with the given options. pub fn new( parquet_options: SparkParquetOptions, - default_values: Option>, + default_values: Option>, ) -> Self { Self { parquet_options, @@ -186,8 +186,8 @@ struct SparkPhysicalExprAdapter { physical_file_schema: SchemaRef, /// Spark-specific options for type conversions parquet_options: SparkParquetOptions, - /// Default values for missing columns (keyed by logical schema index) - default_values: Option>, + /// Default values for missing columns (keyed by Column) + default_values: Option>, /// The default DataFusion adapter to delegate standard handling to default_adapter: Arc, /// Mapping from logical column names to original physical column names, @@ -207,10 +207,10 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter { // // The default adapter may fail for complex nested type casts (List, Map). // In that case, fall back to wrapping everything ourselves. + let expr = self.replace_missing_with_defaults(expr)?; let expr = match self.default_adapter.rewrite(Arc::clone(&expr)) { Ok(rewritten) => { // Replace references to missing columns with default values - let rewritten = self.replace_missing_with_defaults(rewritten)?; // Replace DataFusion's CastColumnExpr with either: // - CometCastColumnExpr (for Struct/List/Map, uses spark_parquet_convert) // - Spark Cast (for simple scalar types) @@ -384,17 +384,16 @@ impl SparkPhysicalExprAdapter { return Ok(expr); } - // Convert index-based defaults to name-based for replace_columns_with_literals + dbg!(&self.logical_file_schema, &self.physical_file_schema); + + // Convert Column-based defaults to name-based for replace_columns_with_literals let name_based: HashMap<&str, &ScalarValue> = defaults .iter() - .filter_map(|(idx, val)| { - self.logical_file_schema - .fields() - .get(*idx) - .map(|f| (f.name().as_str(), val)) - }) + .map(|(col, val)| (col.name(), val)) .collect(); + dbg!(&expr, &name_based); + if name_based.is_empty() { return Ok(expr); } @@ -465,13 +464,13 @@ pub fn adapt_batch_with_expressions( pub struct SparkSchemaAdapterFactory { /// Spark cast options parquet_options: SparkParquetOptions, - default_values: Option>, + default_values: Option>, } impl SparkSchemaAdapterFactory { pub fn new( options: SparkParquetOptions, - default_values: Option>, + default_values: Option>, ) -> Self { Self { parquet_options: options, @@ -509,7 +508,7 @@ pub struct SparkSchemaAdapter { required_schema: SchemaRef, /// Spark cast options parquet_options: SparkParquetOptions, - default_values: Option>, + default_values: Option>, } impl SchemaAdapter for SparkSchemaAdapter { @@ -614,7 +613,7 @@ pub struct SchemaMapping { field_mappings: Vec>, /// Spark cast options parquet_options: SparkParquetOptions, - default_values: Option>, + default_values: Option>, } impl SchemaMapper for SchemaMapping { @@ -643,7 +642,9 @@ impl SchemaMapper for SchemaMapping { || { if let Some(default_values) = &self.default_values { // We have a map of default values, see if this field is in there. - if let Some(value) = default_values.get(&field_idx) + // Create a Column from the field name and index to look up the default value + let column = Column::new(field.name().as_str(), field_idx); + if let Some(value) = default_values.get(&column) // Default value exists, construct a column from it. { let cv = if field.data_type() == &value.data_type() { diff --git a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala index cd1976066c..faf4dd1eb8 100644 --- a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala @@ -67,8 +67,6 @@ class CometFuzzTestSuite extends CometFuzzTestBase { } test("select column with default value") { - // ignoring native datafusion https://github.com/apache/datafusion-comet/issues/3515 - assume(usingLegacyNativeCometScan) // This test fails in Spark's vectorized Parquet reader for DECIMAL(36,18) or BINARY default values. withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") { // This test relies on two tables: 1) t1 the Parquet file generated by ParquetGenerator with random values, and @@ -76,7 +74,7 @@ class CometFuzzTestSuite extends CometFuzzTestBase { // We use the schema and values of t1 to simplify random value generation for the default column value in t2. val df = spark.read.parquet(filename) df.createOrReplaceTempView("t1") - val columns = df.schema.fields.filter(f => !isComplexType(f.dataType)).map(_.name) + val columns = df.schema.fields.filter(f => !isComplexType(f.dataType)).map(_.name).take(1) for (col <- columns) { // Select the first non-null value from our target column type. val defaultValueRow = From df7f3acddfd7e5cc02a3631b91bbe62489cc5893 Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 13 Feb 2026 16:04:32 -0800 Subject: [PATCH 11/13] [df52] tests ignore --- spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala index faf4dd1eb8..02d13c841d 100644 --- a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala @@ -74,7 +74,7 @@ class CometFuzzTestSuite extends CometFuzzTestBase { // We use the schema and values of t1 to simplify random value generation for the default column value in t2. val df = spark.read.parquet(filename) df.createOrReplaceTempView("t1") - val columns = df.schema.fields.filter(f => !isComplexType(f.dataType)).map(_.name).take(1) + val columns = df.schema.fields.filter(f => !isComplexType(f.dataType)).map(_.name) for (col <- columns) { // Select the first non-null value from our target column type. val defaultValueRow = From fc3d5cb6fb851edf3b32b3f3cbc612c2836ddfcf Mon Sep 17 00:00:00 2001 From: comphead Date: Sat, 14 Feb 2026 10:26:42 -0800 Subject: [PATCH 12/13] DataFusion 52 migration --- native/core/src/parquet/schema_adapter.rs | 25 +++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index 3e65cd8509..3203583bd8 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -41,9 +41,6 @@ use datafusion_physical_expr_adapter::{ }; use std::collections::HashMap; use std::sync::Arc; -// ============================================================================ -// New PhysicalExprAdapter Implementation (Recommended) -// ============================================================================ /// Factory for creating Spark-compatible physical expression adapters. /// @@ -384,15 +381,27 @@ impl SparkPhysicalExprAdapter { return Ok(expr); } - dbg!(&self.logical_file_schema, &self.physical_file_schema); + // dbg!(&self.logical_file_schema, &self.physical_file_schema); - // Convert Column-based defaults to name-based for replace_columns_with_literals - let name_based: HashMap<&str, &ScalarValue> = defaults + // Convert Column-based defaults to name-based for replace_columns_with_literals. + // If the default value's type doesn't match the logical schema, cast it. + let owned_values: Vec<(String, ScalarValue)> = defaults .iter() - .map(|(col, val)| (col.name(), val)) + .map(|(col, val)| { + let col_name = col.name(); + let value = self + .logical_file_schema + .field_with_name(col_name) + .ok() + .filter(|field| val.data_type() != *field.data_type()) + .and_then(|field| val.cast_to(field.data_type()).ok()) + .unwrap_or_else(|| val.clone()); + (col_name.to_string(), value) + }) .collect(); - dbg!(&expr, &name_based); + let name_based: HashMap<&str, &ScalarValue> = + owned_values.iter().map(|(k, v)| (k.as_str(), v)).collect(); if name_based.is_empty() { return Ok(expr); From 4e5b95e5d281305d1cdc78a6567385e9ef050eac Mon Sep 17 00:00:00 2001 From: comphead Date: Sat, 14 Feb 2026 12:17:25 -0800 Subject: [PATCH 13/13] DataFusion 52 migration --- native/core/src/parquet/schema_adapter.rs | 31 +++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index 3203583bd8..3402377b5d 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -384,9 +384,23 @@ impl SparkPhysicalExprAdapter { // dbg!(&self.logical_file_schema, &self.physical_file_schema); // Convert Column-based defaults to name-based for replace_columns_with_literals. - // If the default value's type doesn't match the logical schema, cast it. + // Only include columns that are MISSING from the physical file schema. + // If the default value's type doesn't match the logical schema, cast it using Spark cast. let owned_values: Vec<(String, ScalarValue)> = defaults .iter() + .filter(|(col, _)| { + // Only include defaults for columns missing from the physical file schema + let col_name = col.name(); + if self.parquet_options.case_sensitive { + self.physical_file_schema.field_with_name(col_name).is_err() + } else { + !self + .physical_file_schema + .fields() + .iter() + .any(|f| f.name().eq_ignore_ascii_case(col_name)) + } + }) .map(|(col, val)| { let col_name = col.name(); let value = self @@ -394,7 +408,18 @@ impl SparkPhysicalExprAdapter { .field_with_name(col_name) .ok() .filter(|field| val.data_type() != *field.data_type()) - .and_then(|field| val.cast_to(field.data_type()).ok()) + .and_then(|field| { + spark_parquet_convert( + ColumnarValue::Scalar(val.clone()), + field.data_type(), + &self.parquet_options, + ) + .ok() + .and_then(|cv| match cv { + ColumnarValue::Scalar(s) => Some(s), + _ => None, + }) + }) .unwrap_or_else(|| val.clone()); (col_name.to_string(), value) }) @@ -403,6 +428,8 @@ impl SparkPhysicalExprAdapter { let name_based: HashMap<&str, &ScalarValue> = owned_values.iter().map(|(k, v)| (k.as_str(), v)).collect(); + dbg!(&name_based, &expr); + if name_based.is_empty() { return Ok(expr); }