diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff index beef445490..3c81e01a3c 100644 --- a/dev/diffs/3.5.8.diff +++ b/dev/diffs/3.5.8.diff @@ -2832,7 +2832,7 @@ index d675503a8ba..f220892396e 100644 + } assert(bucketedScan.length == expectedNumBucketedScan) } - + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala index 7f6fa2a123e..c778b4e2c48 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala diff --git a/native/Cargo.lock b/native/Cargo.lock index d1c8acf522..4d828a79e0 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -420,9 +420,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.37" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40" +checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f" dependencies = [ "compression-codecs", "compression-core", @@ -528,7 +528,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -554,9 +554,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.13" +version = "1.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c456581cb3c77fafcc8c67204a70680d40b61112d6da78c77bd31d945b65f1b5" +checksum = "8a8fc176d53d6fe85017f230405e3255cedb4a02221cb55ed6d76dccbbb099b2" dependencies = [ "aws-credential-types", "aws-runtime", @@ -584,9 +584,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.11" +version = "1.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3" +checksum = "e26bbf46abc608f2dc61fd6cb3b7b0665497cc259a21520151ed98f8b37d2c79" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -606,9 +606,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.37.0" +version = "0.37.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c34dda4df7017c8db52132f0f8a2e0f8161649d15723ed63fc00c82d0f2081a" +checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549" dependencies = [ "cc", "cmake", @@ -618,9 +618,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c635c2dc792cb4a11ce1a4f392a925340d1bdf499289b5ec1ec6810954eb43f5" +checksum = "b0f92058d22a46adf53ec57a6a96f34447daf02bff52e8fb956c66bcd5c6ac12" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -631,6 +631,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", + "bytes-utils", "fastrand", "http 1.4.0", "http-body 1.0.1", @@ -642,9 +643,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.93.0" +version = "1.94.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcb38bb33fc0a11f1ffc3e3e85669e0a11a37690b86f77e75306d8f369146a0" +checksum = "699da1961a289b23842d88fe2984c6ff68735fdf9bdcbc69ceaeb2491c9bf434" dependencies = [ "aws-credential-types", "aws-runtime", @@ -666,9 +667,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.95.0" +version = "1.96.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ada8ffbea7bd1be1f53df1dadb0f8fdb04badb13185b3321b929d1ee3caad09" +checksum = "e3e3a4cb3b124833eafea9afd1a6cc5f8ddf3efefffc6651ef76a03cbc6b4981" dependencies = [ "aws-credential-types", "aws-runtime", @@ -690,9 +691,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.97.0" +version = "1.98.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6443ccadc777095d5ed13e21f5c364878c9f5bad4e35187a6cdbd863b0afcad" +checksum = "89c4f19655ab0856375e169865c91264de965bd74c407c7f1e403184b1049409" dependencies = [ "aws-credential-types", "aws-runtime", @@ -715,9 +716,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.8" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efa49f3c607b92daae0c078d48a4571f599f966dce3caee5f1ea55c4d9073f99" +checksum = "68f6ae9b71597dc5fd115d52849d7a5556ad9265885ad3492ea8d73b93bbc46e" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -737,9 +738,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.11" +version = "1.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52eec3db979d18cb807fc1070961cc51d87d069abe9ab57917769687368a8c6c" +checksum = "3cba48474f1d6807384d06fec085b909f5807e16653c5af5c45dfe89539f0b70" dependencies = [ "futures-util", "pin-project-lite", @@ -748,9 +749,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.63.3" +version = "0.63.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630e67f2a31094ffa51b210ae030855cb8f3b7ee1329bdd8d085aaf61e8b97fc" +checksum = "af4a8a5fe3e4ac7ee871237c340bbce13e982d37543b65700f4419e039f5d78e" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", @@ -769,9 +770,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.9" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12fb0abf49ff0cab20fd31ac1215ed7ce0ea92286ba09e2854b42ba5cabe7525" +checksum = "0709f0083aa19b704132684bc26d3c868e06bd428ccc4373b0b55c3e8748a58b" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -793,27 +794,27 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.62.3" +version = "0.62.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb96aa208d62ee94104645f7b2ecaf77bf27edf161590b6224bfbac2832f979" +checksum = "27b3a779093e18cad88bbae08dc4261e1d95018c4c5b9356a52bcae7c0b6e9bb" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-observability" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0a46543fbc94621080b3cf553eb4cbbdc41dd9780a30c4756400f0139440a1d" +checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b" dependencies = [ "aws-smithy-runtime-api", ] [[package]] name = "aws-smithy-query" -version = "0.60.13" +version = "0.60.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cebbddb6f3a5bd81553643e9c7daf3cc3dc5b0b5f398ac668630e8a84e6fff0" +checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0" dependencies = [ "aws-smithy-types", "urlencoding", @@ -821,9 +822,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3df87c14f0127a0d77eb261c3bc45d5b4833e2a1f63583ebfb728e4852134ee" +checksum = "8fd3dfc18c1ce097cf81fced7192731e63809829c6cbf933c1ec47452d08e1aa" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -846,9 +847,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.11.3" +version = "1.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49952c52f7eebb72ce2a754d3866cc0f87b97d2a46146b79f80f3a93fb2b3716" +checksum = "8c55e0837e9b8526f49e0b9bfa9ee18ddee70e853f5bc09c5d11ebceddcb0fec" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -863,9 +864,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.4.3" +version = "1.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3a26048eeab0ddeba4b4f9d51654c79af8c3b32357dc5f336cee85ab331c33" +checksum = "576b0d6991c9c32bc14fc340582ef148311f924d41815f641a308b5d11e8e7cd" dependencies = [ "base64-simd", "bytes", @@ -886,18 +887,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.13" +version = "0.60.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" +checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.11" +version = "1.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164" +checksum = "6c50f3cdf47caa8d01f2be4a6663ea02418e892f9bbfd82c7b9a3a37eaccdd3a" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1008,7 +1009,7 @@ dependencies = [ "regex", "rustc-hash 2.1.1", "shlex", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1102,7 +1103,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1125,7 +1126,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1334,18 +1335,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.57" +version = "4.5.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" +checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.57" +version = "4.5.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" +checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" dependencies = [ "anstyle", "clap_lex", @@ -1353,9 +1354,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "cmake" @@ -1656,7 +1657,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1670,7 +1671,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1683,7 +1684,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1694,7 +1695,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1705,7 +1706,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -1716,7 +1717,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core 0.23.0", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2321,7 +2322,7 @@ checksum = "c4fe888aeb6a095c4bcbe8ac1874c4b9a4c7ffa2ba849db7922683ba20875aaf" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2529,9 +2530,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" dependencies = [ "powerfmt", "serde_core", @@ -2555,7 +2556,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2565,7 +2566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2586,7 +2587,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.114", + "syn 2.0.115", "unicode-xid", ] @@ -2616,7 +2617,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2669,7 +2670,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -2898,7 +2899,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -3564,9 +3565,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89a5b5e10d5a9ad6e5d1f4bd58225f655d6fe9767575a5e8ac5a6fe64e04495" +checksum = "c867c356cc096b33f4981825ab281ecba3db0acefe60329f044c1789d94c6543" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -3579,13 +3580,13 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff7a39c8862fc1369215ccf0a8f12dd4598c7f6484704359f0351bd617034dbf" +checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -3739,9 +3740,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.180" +version = "0.2.181" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5" [[package]] name = "libloading" @@ -3898,9 +3899,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" @@ -4273,9 +4274,9 @@ dependencies = [ [[package]] name = "parquet" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -4314,9 +4315,9 @@ dependencies = [ [[package]] name = "parquet-variant" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c254fac16af78ad96aa442290cb6504951c4d484fdfcfe58f4588033d30e4c8f" +checksum = "a6c31f8f9bfefb9dbf67b0807e00fd918676954a7477c889be971ac904103184" dependencies = [ "arrow-schema", "chrono", @@ -4328,9 +4329,9 @@ dependencies = [ [[package]] name = "parquet-variant-compute" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2178772f1c5ad7e5da8b569d986d3f5cbb4a4cee915925f28fdc700dbb2e80cf" +checksum = "196cd9f7178fed3ac8d5e6d2b51193818e896bbc3640aea3fde3440114a8f39c" dependencies = [ "arrow", "arrow-schema", @@ -4344,9 +4345,9 @@ dependencies = [ [[package]] name = "parquet-variant-json" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a1510daa121c04848368f9c38d0be425b9418c70be610ecc0aa8071738c0ef3" +checksum = "ed23d7acc90ef60f7fdbcc473fa2fdaefa33542ed15b84388959346d52c839be" dependencies = [ "arrow-schema", "base64", @@ -4421,7 +4422,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -4563,7 +4564,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -4633,7 +4634,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.114", + "syn 2.0.115", "tempfile", ] @@ -4647,7 +4648,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -4912,7 +4913,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5238,9 +5239,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "same-file" @@ -5378,7 +5379,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5402,7 +5403,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5445,7 +5446,7 @@ dependencies = [ "darling 0.21.3", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5572,7 +5573,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5611,7 +5612,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5656,9 +5657,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.114" +version = "2.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12" dependencies = [ "proc-macro2", "quote", @@ -5682,7 +5683,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5699,12 +5700,12 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tempfile" -version = "3.24.0" +version = "3.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.1", "once_cell", "rustix 1.1.3", "windows-sys 0.61.2", @@ -5736,7 +5737,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5747,7 +5748,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5902,7 +5903,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -5951,9 +5952,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.0.8+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "0742ff5ff03ea7e67c8ae6c93cac239e0d9784833362da3f9a9c1da8dfefcbdc" dependencies = [ "winnow", ] @@ -6022,7 +6023,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6066,7 +6067,7 @@ checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6086,9 +6087,9 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" [[package]] name = "unicode-segmentation" @@ -6272,7 +6273,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", "wasm-bindgen-shared", ] @@ -6425,7 +6426,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6436,7 +6437,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6733,7 +6734,7 @@ dependencies = [ "heck", "indexmap 2.13.0", "prettyplease", - "syn 2.0.114", + "syn 2.0.115", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -6749,7 +6750,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -6831,7 +6832,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", "synstructure", ] @@ -6852,7 +6853,7 @@ checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6872,7 +6873,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", "synstructure", ] @@ -6912,7 +6913,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.115", ] [[package]] @@ -6923,9 +6924,9 @@ checksum = "a7948af682ccbc3342b6e9420e8c51c1fe5d7bf7756002b4a3c6cabfe96a7e3c" [[package]] name = "zmij" -version = "1.0.19" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" [[package]] name = "zstd" diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index b4a538aaed..aeb3db716e 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -1030,7 +1030,7 @@ impl PhysicalPlanner { .map(|expr| self.create_expr(expr, Arc::clone(&required_schema))) .collect(); - let default_values: Option> = if !scan + let default_values: Option> = if !scan .default_values .is_empty() { @@ -1060,6 +1060,11 @@ impl PhysicalPlanner { default_values_indexes .into_iter() .zip(default_values) + .map(|(idx, scalar_value)| { + let field = required_schema.field(idx); + let column = Column::new(field.name().as_str(), idx); + (column, scalar_value) + }) .collect(), ) } else { diff --git a/native/core/src/parquet/parquet_exec.rs b/native/core/src/parquet/parquet_exec.rs index f4cc7bf9fe..2d970734bb 100644 --- a/native/core/src/parquet/parquet_exec.rs +++ b/native/core/src/parquet/parquet_exec.rs @@ -28,7 +28,7 @@ use datafusion::datasource::physical_plan::{ use datafusion::datasource::source::DataSourceExec; use datafusion::execution::object_store::ObjectStoreUrl; use datafusion::execution::SendableRecordBatchStream; -use datafusion::physical_expr::expressions::BinaryExpr; +use datafusion::physical_expr::expressions::{BinaryExpr, Column}; use datafusion::physical_expr::PhysicalExpr; use datafusion::physical_expr_adapter::PhysicalExprAdapterFactory; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; @@ -67,7 +67,7 @@ pub(crate) fn init_datasource_exec( file_groups: Vec>, projection_vector: Option>, data_filters: Option>>, - default_values: Option>, + default_values: Option>, session_timezone: &str, case_sensitive: bool, session_ctx: &Arc, diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index 2874b6cbf1..3402377b5d 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -41,9 +41,6 @@ use datafusion_physical_expr_adapter::{ }; use std::collections::HashMap; use std::sync::Arc; -// ============================================================================ -// New PhysicalExprAdapter Implementation (Recommended) -// ============================================================================ /// Factory for creating Spark-compatible physical expression adapters. /// @@ -54,15 +51,15 @@ pub struct SparkPhysicalExprAdapterFactory { /// Spark-specific parquet options for type conversions parquet_options: SparkParquetOptions, /// Default values for columns that may be missing from the physical schema. - /// The key is the column index in the logical schema. - default_values: Option>, + /// The key is the Column (containing name and index). + default_values: Option>, } impl SparkPhysicalExprAdapterFactory { /// Create a new factory with the given options. pub fn new( parquet_options: SparkParquetOptions, - default_values: Option>, + default_values: Option>, ) -> Self { Self { parquet_options, @@ -186,8 +183,8 @@ struct SparkPhysicalExprAdapter { physical_file_schema: SchemaRef, /// Spark-specific options for type conversions parquet_options: SparkParquetOptions, - /// Default values for missing columns (keyed by logical schema index) - default_values: Option>, + /// Default values for missing columns (keyed by Column) + default_values: Option>, /// The default DataFusion adapter to delegate standard handling to default_adapter: Arc, /// Mapping from logical column names to original physical column names, @@ -207,8 +204,10 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter { // // The default adapter may fail for complex nested type casts (List, Map). // In that case, fall back to wrapping everything ourselves. + let expr = self.replace_missing_with_defaults(expr)?; let expr = match self.default_adapter.rewrite(Arc::clone(&expr)) { Ok(rewritten) => { + // Replace references to missing columns with default values // Replace DataFusion's CastColumnExpr with either: // - CometCastColumnExpr (for Struct/List/Map, uses spark_parquet_convert) // - Spark Cast (for simple scalar types) @@ -216,9 +215,10 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter { .transform(|e| self.replace_with_spark_cast(e)) .data()? } - Err(_) => { + Err(e) => { // Default adapter failed (likely complex nested type cast). // Handle all type mismatches ourselves using spark_parquet_convert. + log::info!("Default schema adapter error: {}", e); self.wrap_all_type_mismatches(expr)? } }; @@ -249,7 +249,6 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter { } } -#[allow(dead_code)] impl SparkPhysicalExprAdapter { /// Wrap ALL Column expressions that have type mismatches with CometCastColumnExpr. /// This is the fallback path when the default adapter fails (e.g., for complex @@ -369,53 +368,6 @@ impl SparkPhysicalExprAdapter { Ok(Transformed::no(expr)) } - /// Cast Column expressions where the physical and logical datatypes differ. - /// - /// This function traverses the expression tree and for each Column expression, - /// checks if the physical file schema datatype differs from the logical file schema - /// datatype. If they differ, it wraps the Column with a CastColumnExpr to perform - /// the necessary type conversion. - fn cast_datafusion_unsupported_expr( - &self, - expr: Arc, - ) -> DataFusionResult> { - expr.transform(|e| { - // Check if this is a Column expression - if let Some(column) = e.as_any().downcast_ref::() { - let col_idx = column.index(); - - // dbg!(&self.logical_file_schema, &self.physical_file_schema); - - // Get the logical datatype (expected by the query) - let logical_field = self.logical_file_schema.fields().get(col_idx); - // Get the physical datatype (actual file schema) - let physical_field = self.physical_file_schema.fields().get(col_idx); - - // dbg!(&logical_field, &physical_field); - - if let (Some(logical_field), Some(physical_field)) = (logical_field, physical_field) - { - let logical_type = logical_field.data_type(); - let physical_type = physical_field.data_type(); - - // If datatypes differ, insert a CastColumnExpr - if logical_type != physical_type { - let cast_expr: Arc = Arc::new(CometCastColumnExpr::new( - Arc::clone(&e), - Arc::clone(physical_field), - Arc::clone(logical_field), - None, - )); - // dbg!(&cast_expr); - return Ok(Transformed::yes(cast_expr)); - } - } - } - Ok(Transformed::no(e)) - }) - .data() - } - /// Replace references to missing columns with default values. fn replace_missing_with_defaults( &self, @@ -429,17 +381,55 @@ impl SparkPhysicalExprAdapter { return Ok(expr); } - // Convert index-based defaults to name-based for replace_columns_with_literals - let name_based: HashMap<&str, &ScalarValue> = defaults + // dbg!(&self.logical_file_schema, &self.physical_file_schema); + + // Convert Column-based defaults to name-based for replace_columns_with_literals. + // Only include columns that are MISSING from the physical file schema. + // If the default value's type doesn't match the logical schema, cast it using Spark cast. + let owned_values: Vec<(String, ScalarValue)> = defaults .iter() - .filter_map(|(idx, val)| { - self.logical_file_schema - .fields() - .get(*idx) - .map(|f| (f.name().as_str(), val)) + .filter(|(col, _)| { + // Only include defaults for columns missing from the physical file schema + let col_name = col.name(); + if self.parquet_options.case_sensitive { + self.physical_file_schema.field_with_name(col_name).is_err() + } else { + !self + .physical_file_schema + .fields() + .iter() + .any(|f| f.name().eq_ignore_ascii_case(col_name)) + } + }) + .map(|(col, val)| { + let col_name = col.name(); + let value = self + .logical_file_schema + .field_with_name(col_name) + .ok() + .filter(|field| val.data_type() != *field.data_type()) + .and_then(|field| { + spark_parquet_convert( + ColumnarValue::Scalar(val.clone()), + field.data_type(), + &self.parquet_options, + ) + .ok() + .and_then(|cv| match cv { + ColumnarValue::Scalar(s) => Some(s), + _ => None, + }) + }) + .unwrap_or_else(|| val.clone()); + (col_name.to_string(), value) }) .collect(); + let name_based: HashMap<&str, &ScalarValue> = + owned_values.iter().map(|(k, v)| (k.as_str(), v)).collect(); + + dbg!(&name_based, &expr); + if name_based.is_empty() { return Ok(expr); } @@ -510,13 +500,13 @@ pub fn adapt_batch_with_expressions( pub struct SparkSchemaAdapterFactory { /// Spark cast options parquet_options: SparkParquetOptions, - default_values: Option>, + default_values: Option>, } impl SparkSchemaAdapterFactory { pub fn new( options: SparkParquetOptions, - default_values: Option>, + default_values: Option>, ) -> Self { Self { parquet_options: options, @@ -554,7 +544,7 @@ pub struct SparkSchemaAdapter { required_schema: SchemaRef, /// Spark cast options parquet_options: SparkParquetOptions, - default_values: Option>, + default_values: Option>, } impl SchemaAdapter for SparkSchemaAdapter { @@ -659,7 +649,7 @@ pub struct SchemaMapping { field_mappings: Vec>, /// Spark cast options parquet_options: SparkParquetOptions, - default_values: Option>, + default_values: Option>, } impl SchemaMapper for SchemaMapping { @@ -688,7 +678,9 @@ impl SchemaMapper for SchemaMapping { || { if let Some(default_values) = &self.default_values { // We have a map of default values, see if this field is in there. - if let Some(value) = default_values.get(&field_idx) + // Create a Column from the field name and index to look up the default value + let column = Column::new(field.name().as_str(), field_idx); + if let Some(value) = default_values.get(&column) // Default value exists, construct a column from it. { let cv = if field.data_type() == &value.data_type() { diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala index 12ea91d423..9fe53b9a84 100644 --- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala @@ -29,7 +29,7 @@ import org.apache.comet.CometSparkSessionExtensions.withInfo import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass, Incompatible} import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} -import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.QueryPlanSerde.exprToProtoInternal /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions.