diff --git a/native/Cargo.lock b/native/Cargo.lock index 4f7f5424b7..404656405f 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -17,6 +17,16 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + [[package]] name = "aes" version = "0.8.4" @@ -28,6 +38,20 @@ dependencies = [ "cpufeatures 0.2.17", ] +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "ahash" version = "0.8.12" @@ -98,9 +122,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anyhow" @@ -135,9 +159,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.2" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" +checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6" dependencies = [ "rustversion", ] @@ -162,9 +186,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" dependencies = [ "arrow-arith", "arrow-array", @@ -183,9 +207,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" dependencies = [ "arrow-array", "arrow-buffer", @@ -197,9 +221,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" dependencies = [ "ahash", "arrow-buffer", @@ -216,9 +240,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" dependencies = [ "bytes", "half", @@ -228,9 +252,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" dependencies = [ "arrow-array", "arrow-buffer", @@ -250,9 +274,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" dependencies = [ "arrow-array", "arrow-cast", @@ -265,9 +289,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -278,9 +302,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -288,14 +312,14 @@ dependencies = [ "arrow-schema", "arrow-select", "flatbuffers", - "lz4_flex 0.12.1", + "lz4_flex", ] [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" dependencies = [ "arrow-array", "arrow-buffer", @@ -304,7 +328,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.13.0", + "indexmap 2.13.1", "itoa", "lexical-core", "memchr", @@ -317,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" dependencies = [ "arrow-array", "arrow-buffer", @@ -330,9 +354,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" dependencies = [ "arrow-array", "arrow-buffer", @@ -343,9 +367,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" dependencies = [ "bitflags 2.11.0", "serde_core", @@ -354,9 +378,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" dependencies = [ "ahash", "arrow-array", @@ -368,9 +392,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" dependencies = [ "arrow-array", "arrow-buffer", @@ -600,9 +624,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.16.1" +version = "1.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" dependencies = [ "aws-lc-sys", "zeroize", @@ -610,9 +634,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.38.0" +version = "0.39.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" +checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399" dependencies = [ "cc", "cmake", @@ -647,9 +671,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.96.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f64a6eded248c6b453966e915d32aeddb48ea63ad17932682774eb026fbef5b1" +checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567" dependencies = [ "aws-credential-types", "aws-runtime", @@ -671,9 +695,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.98.0" +version = "1.99.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db96d720d3c622fcbe08bae1c4b04a72ce6257d8b0584cb5418da00ae20a344f" +checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8" dependencies = [ "aws-credential-types", "aws-runtime", @@ -695,9 +719,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.100.0" +version = "1.101.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fafbdda43b93f57f699c5dfe8328db590b967b8a820a13ccdd6687355dfcc7ca" +checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -868,9 +892,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b1117b3b2bbe166d11199b540ceed0d0f7676e36e7b962b5a437a9971eac75" +checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" dependencies = [ "base64-simd", "bytes", @@ -1017,7 +1041,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "shlex", "syn 2.0.117", ] @@ -1045,16 +1069,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.3" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", - "cpufeatures 0.2.17", + "cpufeatures 0.3.0", ] [[package]] @@ -1100,9 +1124,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" dependencies = [ "bon-macros", "rustversion", @@ -1110,9 +1134,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ "darling 0.23.0", "ident_case", @@ -1204,9 +1228,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.58" +version = "1.2.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" +checksum = "b7a4d3ec6524d28a329fc53654bbadc9bdd7b0431f5d65f1a56ffb28a1ee5283" dependencies = [ "find-msvc-tools", "jobserver", @@ -1326,18 +1350,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstyle", "clap_lex", @@ -1345,15 +1369,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "cmake" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" dependencies = [ "cc", ] @@ -1583,6 +1607,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", + "rand_core 0.6.4", "typenum", ] @@ -1608,23 +1633,38 @@ dependencies = [ ] [[package]] -name = "darling" -version = "0.20.11" +name = "ctor" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e" dependencies = [ - "darling_core 0.20.11", - "darling_macro 0.20.11", + "ctor-proc-macro", + "dtor", +] + +[[package]] +name = "ctor-proc-macro" +version = "0.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" + +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", ] [[package]] name = "darling" -version = "0.21.3" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", + "darling_core 0.20.11", + "darling_macro 0.20.11", ] [[package]] @@ -1651,20 +1691,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "darling_core" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 2.0.117", -] - [[package]] name = "darling_core" version = "0.23.0" @@ -1689,17 +1715,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "darling_macro" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" -dependencies = [ - "darling_core 0.21.3", - "quote", - "syn 2.0.117", -] - [[package]] name = "darling_macro" version = "0.23.0" @@ -1727,9 +1742,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c18ba387f9c05ac1f3be32a73f8f3cc6c1cfc43e5d4b7a8e5b0d3a5eb48dc7" +checksum = "de9f8117889ba9503440f1dd79ebab32ba52ccf1720bb83cd718a29d4edc0d16" dependencies = [ "arrow", "arrow-schema", @@ -1778,9 +1793,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c75a4ce672b27fb8423810efb92a3600027717a1664d06a2c307eeeabcec694" +checksum = "be893b73a13671f310ffcc8da2c546b81efcc54c22e0382c0a28aa3537017137" dependencies = [ "arrow", "async-trait", @@ -1803,9 +1818,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c8b9a3795ffb46bf4957a34c67d89a67558b311ae455c8d4295ff2115eeea50" +checksum = "830487b51ed83807d6b32d6325f349c3144ae0c9bf772cf2a712db180c31d5e6" dependencies = [ "arrow", "async-trait", @@ -1862,7 +1877,7 @@ dependencies = [ "object_store", "object_store_opendal", "once_cell", - "opendal", + "opendal 0.55.0 (git+https://github.com/apache/opendal?rev=173feb6)", "parking_lot", "parquet", "paste", @@ -1964,7 +1979,7 @@ dependencies = [ "itertools 0.14.0", "jni 0.21.1", "log", - "lz4_flex 0.13.0", + "lz4_flex", "simd-adler32", "snap", "tempfile", @@ -1995,9 +2010,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "205dc1e20441973f470e6b7ef87626a3b9187970e5106058fef1b713047f770c" +checksum = "0d7663f3af955292f8004e74bcaf8f7ea3d66cc38438749615bb84815b61a293" dependencies = [ "ahash", "arrow", @@ -2006,7 +2021,8 @@ dependencies = [ "half", "hashbrown 0.16.1", "hex", - "indexmap 2.13.0", + "indexmap 2.13.1", + "itertools 0.14.0", "libc", "log", "object_store", @@ -2019,9 +2035,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf5880c02ff6f5f11fb5bc19211789fb32fd3c53d79b7d6cb2b12e401312ba0" +checksum = "5f590205c7e32fe1fea48dd53ffb406e56ae0e7a062213a3ac848db8771641bd" dependencies = [ "futures", "log", @@ -2030,9 +2046,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc614d6e709450e29b7b032a42c1bdb705f166a6b2edef7bed7c7897eb905499" +checksum = "fde1e030a9dc87b743c806fbd631f5ecfa2ccaa4ffb61fa19144a07fea406b79" dependencies = [ "arrow", "async-compression", @@ -2065,9 +2081,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e497d5fc48dac7ce86f6b4fb09a3a494385774af301ff20ec91aebfae9b05b4" +checksum = "331ebae7055dc108f9b54994b93dff91f3a17445539efe5b74e89264f7b36e15" dependencies = [ "arrow", "arrow-ipc", @@ -2089,9 +2105,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dfc250cad940d0327ca2e9109dc98830892d17a3d6b2ca11d68570e872cf379" +checksum = "9e0d475088325e2986876aa27bb30d0574f72a22955a527d202f454681d55c5c" dependencies = [ "arrow", "async-trait", @@ -2112,9 +2128,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91e9677ed62833b0e8129dec0d1a8f3c9bb7590bd6dd714a43e4c3b663e4aa0" +checksum = "ea1520d81f31770f3ad6ee98b391e75e87a68a5bb90de70064ace5e0a7182fe8" dependencies = [ "arrow", "async-trait", @@ -2129,14 +2145,16 @@ dependencies = [ "datafusion-session", "futures", "object_store", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-datasource-parquet" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23798383465e0c569bd442d1453b50691261f8ad6511d840c48457b3bf51ae21" +checksum = "95be805d0742ab129720f4c51ad9242cd872599cdb076098b03f061fcdc7f946" dependencies = [ "arrow", "async-trait", @@ -2164,22 +2182,24 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e13e5fe3447baa0584b61ee8644086e007e1ef6e58f4be48bc8a72417854729" +checksum = "5c93ad9e37730d2c7196e68616f3f2dd3b04c892e03acd3a8eeca6e177f3c06a" [[package]] name = "datafusion-execution" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48a6cc03e34899a54546b229235f7b192634c8e832f78a267f0989b18216c56d" +checksum = "9437d3cd5d363f9319f8122182d4d233427de79c7eb748f23054c9aaa0fdd8df" dependencies = [ "arrow", + "arrow-buffer", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", "object_store", @@ -2192,9 +2212,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee3315d87eca7a7df58e52a1fb43b4c4171b545fd30ffc3102945c162a9f6ddb" +checksum = "67164333342b86521d6d93fa54081ee39839894fb10f7a700c099af96d7552cf" dependencies = [ "arrow", "async-trait", @@ -2205,7 +2225,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "paste", "serde_json", @@ -2214,22 +2234,22 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c6d83feae0753799f933a2c47dfd15980c6947960cb95ed60f5c1f885548b3" +checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-functions" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b82962015cc3db4d7662459c9f7fcda0591b5edacb8af1cf3bc3031f274800" +checksum = "04fb863482d987cf938db2079e07ab0d3bb64595f28907a6c2f8671ad71cca7e" dependencies = [ "arrow", "arrow-buffer", @@ -2248,6 +2268,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.2", "regex", @@ -2258,9 +2279,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e42c227d9e55a6c8041785d4a8a117e4de531033d480aae10984247ac62e27e" +checksum = "829856f4e14275fb376c104f27cbf3c3b57a9cfe24885d98677525f5e43ce8d6" dependencies = [ "ahash", "arrow", @@ -2274,14 +2295,15 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cead3cfed825b0b688700f4338d281cd7857e4907775a5b9554c083edd5f3f95" +checksum = "08af79cc3d2aa874a362fb97decfcbd73d687190cb096f16a6c85a7780cce311" dependencies = [ "ahash", "arrow", @@ -2292,9 +2314,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ea99612970aebab8cf864d02eb3d296bbab7f4881e1023d282b57fe431b201" +checksum = "465ae3368146d49c2eda3e2c0ef114424c87e8a6b509ab34c1026ace6497e790" dependencies = [ "arrow", "arrow-ord", @@ -2308,16 +2330,18 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83dbf3ab8b9af6f209b068825a7adbd3b88bf276f2a1ec14ba09567b97f5674" +checksum = "6156e6b22fcf1784112fc0173f3ae6e78c8fdb4d3ed0eace9543873b437e2af6" dependencies = [ "arrow", "async-trait", @@ -2331,9 +2355,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732edabe07496e2fc5a1e57a284d7a36edcea445a2821119770a0dea624b472c" +checksum = "ca7baec14f866729012efb89011a6973f3a346dc8090c567bfcd328deff551c1" dependencies = [ "arrow", "datafusion-common", @@ -2349,9 +2373,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c6e30e09700799bd52adce8c377ab03dda96e73a623e4803a31ad94fe7ce14" +checksum = "159228c3280d342658466bb556dc24de30047fe1d7e559dc5d16ccc5324166f9" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2359,9 +2383,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402f2a8ed70fb99a18f71580a1fe338604222a3d32ddeac6e72c5b34feea2d4d" +checksum = "e5427e5da5edca4d21ea1c7f50e1c9421775fe33d7d5726e5641a833566e7578" dependencies = [ "datafusion-doc", "quote", @@ -2370,9 +2394,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f32edb8ba12f08138f86c09b80fae3d4a320551262fa06b91d8a8cb3065a5b" +checksum = "89099eefcd5b223ec685c36a41d35c69239236310d71d339f2af0fa4383f3f46" dependencies = [ "arrow", "chrono", @@ -2380,7 +2404,7 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "log", "regex", @@ -2389,9 +2413,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "987c5e29e96186589301b42e25aa7d11bbe319a73eb02ef8d755edc55b5b89fc" +checksum = "0f222df5195d605d79098ef37bdd5323bff0131c9d877a24da6ec98dfca9fe36" dependencies = [ "ahash", "arrow", @@ -2402,7 +2426,7 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.16.1", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "parking_lot", "paste", @@ -2412,9 +2436,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1de89d0afa08b6686697bd8a6bac4ba2cd44c7003356e1bce6114d5a93f94b5c" +checksum = "40838625d63d9c12549d81979db3dd675d159055eb9135009ba272ab0e8d0f64" dependencies = [ "arrow", "datafusion-common", @@ -2427,9 +2451,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602d1970c0fe87f1c3a36665d131fbfe1c4379d35f8fc5ec43a362229ad2954d" +checksum = "eacbcc4cfd502558184ed58fa3c72e775ec65bf077eef5fd2b3453db676f893c" dependencies = [ "ahash", "arrow", @@ -2437,16 +2461,16 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "hashbrown 0.16.1", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "parking_lot", ] [[package]] name = "datafusion-physical-optimizer" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b24d704b6385ebe27c756a12e5ba15684576d3b47aeca79cc9fb09480236dc32" +checksum = "d501d0e1d0910f015677121601ac177ec59272ef5c9324d1147b394988f40941" dependencies = [ "arrow", "datafusion-common", @@ -2462,9 +2486,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c21d94141ea5043e98793f170798e9c1887095813b8291c5260599341e383a38" +checksum = "463c88ad6f1ecab1810f4c9f046898bee035b370137eb79b2b2db925e270631d" dependencies = [ "ahash", "arrow", @@ -2483,9 +2507,10 @@ dependencies = [ "futures", "half", "hashbrown 0.16.1", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -2493,9 +2518,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a68cce43d18c0dfac95cacd74e70565f7e2fb12b9ed41e2d312f0fa837626b1" +checksum = "2857618a0ecbd8cd0cf29826889edd3a25774ec26b2995fc3862095c95d88fc6" dependencies = [ "arrow", "datafusion-common", @@ -2510,9 +2535,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4e1c40a0b1896aed4a4504145c2eb7fa9b9da13c2d04b40a4767a09f076199" +checksum = "ef8637e35022c5c775003b3ab1debc6b4a8f0eb41b069bdd5475dd3aa93f6eba" dependencies = [ "async-trait", "datafusion-common", @@ -2524,39 +2549,44 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e53604bca77d4544426a425e2a50d7b911bbe35d3c8193de24093b445f23856" +checksum = "923a8b871962a9d860f036f743a20af50ff04729f1da2468ed220dab4f61c97d" dependencies = [ "arrow", "bigdecimal", "chrono", "crc32fast", + "datafusion", "datafusion-catalog", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-functions", + "datafusion-functions-aggregate", "datafusion-functions-nested", "log", "percent-encoding", "rand 0.9.2", + "serde_json", "sha1", + "sha2", "url", ] [[package]] name = "datafusion-sql" -version = "52.4.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f1891e5b106d1d73c7fe403bd8a265d19c3977edc17f60808daf26c2fe65ffb" +checksum = "12d9e9f16a1692a11c94bcc418191fa15fd2b4d72a0c1a0c607db93c0b84dd81" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.13.0", + "datafusion-functions-nested", + "indexmap 2.13.1", "log", "regex", "sqlparser", @@ -2676,9 +2706,9 @@ dependencies = [ [[package]] name = "dissimilar" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" +checksum = "aeda16ab4059c5fd2a83f2b9c9e9c981327b18aa8e3b313f7e6563799d4f093e" [[package]] name = "dlv-list" @@ -2689,6 +2719,21 @@ dependencies = [ "const-random", ] +[[package]] +name = "dtor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301" +dependencies = [ + "dtor-proc-macro", +] + +[[package]] +name = "dtor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" + [[package]] name = "dunce" version = "1.0.5" @@ -3053,6 +3098,16 @@ dependencies = [ "wasip3", ] +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "gimli" version = "0.32.3" @@ -3089,7 +3144,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.4.0", - "indexmap 2.13.0", + "indexmap 2.13.1", "slab", "tokio", "tokio-util", @@ -3269,9 +3324,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hyper" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" dependencies = [ "atomic-waker", "bytes", @@ -3283,7 +3338,6 @@ dependencies = [ "httparse", "itoa", "pin-project-lite", - "pin-utils", "smallvec", "tokio", "want", @@ -3357,8 +3411,9 @@ dependencies = [ [[package]] name = "iceberg" version = "0.9.0" -source = "git+https://github.com/apache/iceberg-rust?tag=v0.9.0-rc.1#7ef4063926f76f4ab3037227a9fa7a53e21e717f" +source = "git+https://github.com/apache/iceberg-rust?rev=477a1e5#477a1e525b4915895388a4f45557b825ea541ef2" dependencies = [ + "aes-gcm", "anyhow", "apache-avro", "array-init", @@ -3404,20 +3459,22 @@ dependencies = [ "typetag", "url", "uuid", + "zeroize", "zstd", ] [[package]] name = "iceberg-storage-opendal" version = "0.9.0" -source = "git+https://github.com/apache/iceberg-rust?tag=v0.9.0-rc.1#7ef4063926f76f4ab3037227a9fa7a53e21e717f" +source = "git+https://github.com/apache/iceberg-rust?rev=477a1e5#477a1e525b4915895388a4f45557b825ea541ef2" dependencies = [ "anyhow", "async-trait", "bytes", "cfg-if", + "futures", "iceberg", - "opendal", + "opendal 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)", "reqsign", "reqwest", "serde", @@ -3427,12 +3484,13 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" dependencies = [ "displaydoc", "potential_utf", + "utf8_iter", "yoke", "zerofrom", "zerovec", @@ -3440,9 +3498,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" dependencies = [ "displaydoc", "litemap", @@ -3453,9 +3511,9 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" dependencies = [ "icu_collections", "icu_normalizer_data", @@ -3467,15 +3525,15 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" [[package]] name = "icu_properties" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" dependencies = [ "icu_collections", "icu_locale_core", @@ -3487,15 +3545,15 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" [[package]] name = "icu_provider" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" dependencies = [ "displaydoc", "icu_locale_core", @@ -3552,9 +3610,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.13.0" +version = "2.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "45a8a2b9cb3e0b0c1803dbb0758ffac5de2f425b23c28f518faabd9d805342ff" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -3569,7 +3627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" dependencies = [ "ahash", - "indexmap 2.13.0", + "indexmap 2.13.1", "is-terminal", "itoa", "log", @@ -3598,9 +3656,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "inventory" -version = "0.3.22" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227" +checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" dependencies = [ "rustversion", ] @@ -3613,9 +3671,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.10" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" dependencies = [ "memchr", "serde", @@ -3652,9 +3710,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "java-locator" @@ -3673,10 +3731,12 @@ checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", + "js-sys", "log", "portable-atomic", "portable-atomic-util", "serde_core", + "wasm-bindgen", "windows-sys 0.61.2", ] @@ -3794,10 +3854,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.91" +version = "0.3.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +checksum = "2e04e2ef80ce82e13552136fabeef8a5ed1f985a96805761cbb9a2c34e7664d9" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] @@ -3976,9 +4038,9 @@ checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" [[package]] name = "lock_api" @@ -4040,15 +4102,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" -[[package]] -name = "lz4_flex" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" -dependencies = [ - "twox-hash", -] - [[package]] name = "lz4_flex" version = "0.13.0" @@ -4068,6 +4121,15 @@ dependencies = [ "digest", ] +[[package]] +name = "mea" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6747f54621d156e1b47eb6b25f39a941b9fc347f98f67d25d8881ff99e8ed832" +dependencies = [ + "slab", +] + [[package]] name = "memchr" version = "2.8.0" @@ -4127,9 +4189,9 @@ checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" [[package]] name = "moka" -version = "0.12.14" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85f8024e1c8e71c778968af91d43700ce1d11b219d127d79fb2934153b82b42b" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" dependencies = [ "async-lock", "crossbeam-channel", @@ -4230,9 +4292,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" [[package]] name = "num-format" @@ -4296,16 +4358,18 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.5" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" dependencies = [ "async-trait", "base64", "bytes", "chrono", "form_urlencoded", - "futures", + "futures-channel", + "futures-core", + "futures-util", "http 1.4.0", "http-body-util", "httparse", @@ -4315,11 +4379,11 @@ dependencies = [ "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.4", - "rand 0.9.2", + "quick-xml 0.39.2", + "rand 0.10.0", "reqwest", "ring", - "rustls-pemfile", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", @@ -4335,15 +4399,15 @@ dependencies = [ [[package]] name = "object_store_opendal" version = "0.55.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "113ab0769e972eee585e57407b98de08bda5354fa28e8ba4d89038d6cb6a8991" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" dependencies = [ "async-trait", "bytes", "chrono", "futures", + "mea", "object_store", - "opendal", + "opendal 0.55.0 (git+https://github.com/apache/opendal?rev=173feb6)", "pin-project", "tokio", ] @@ -4360,6 +4424,12 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "opendal" version = "0.55.0" @@ -4373,7 +4443,6 @@ dependencies = [ "crc32c", "futures", "getrandom 0.2.17", - "hdrs", "http 1.4.0", "http-body 1.0.1", "jiff", @@ -4390,6 +4459,100 @@ dependencies = [ "uuid", ] +[[package]] +name = "opendal" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "ctor", + "opendal-core", + "opendal-layer-concurrent-limit", + "opendal-layer-logging", + "opendal-layer-retry", + "opendal-layer-timeout", + "opendal-service-hdfs", +] + +[[package]] +name = "opendal-core" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "anyhow", + "base64", + "bytes", + "futures", + "http 1.4.0", + "http-body 1.0.1", + "jiff", + "log", + "md-5", + "mea", + "percent-encoding", + "quick-xml 0.38.4", + "reqsign-core", + "reqwest", + "serde", + "serde_json", + "tokio", + "url", + "uuid", + "web-time", +] + +[[package]] +name = "opendal-layer-concurrent-limit" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "futures", + "http 1.4.0", + "mea", + "opendal-core", +] + +[[package]] +name = "opendal-layer-logging" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "log", + "opendal-core", +] + +[[package]] +name = "opendal-layer-retry" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "backon", + "log", + "opendal-core", +] + +[[package]] +name = "opendal-layer-timeout" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "opendal-core", + "tokio", +] + +[[package]] +name = "opendal-service-hdfs" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "bytes", + "futures", + "hdrs", + "log", + "opendal-core", + "serde", + "tokio", +] + [[package]] name = "openssl-probe" version = "0.2.1" @@ -4461,14 +4624,13 @@ dependencies = [ [[package]] name = "parquet" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" +checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" dependencies = [ "ahash", "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-ipc", "arrow-schema", @@ -4481,7 +4643,7 @@ dependencies = [ "futures", "half", "hashbrown 0.16.1", - "lz4_flex 0.12.1", + "lz4_flex", "num-bigint", "num-integer", "num-traits", @@ -4502,39 +4664,40 @@ dependencies = [ [[package]] name = "parquet-variant" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6c31f8f9bfefb9dbf67b0807e00fd918676954a7477c889be971ac904103184" +checksum = "2bf493f3c9ddd984d0efb019f67343e4aa4bab893931f6a14b82083065dc3d28" dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.13.0", + "indexmap 2.13.1", "simdutf8", "uuid", ] [[package]] name = "parquet-variant-compute" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "196cd9f7178fed3ac8d5e6d2b51193818e896bbc3640aea3fde3440114a8f39c" +checksum = "6ac038d46a503a7d563b4f5df5802c4315d5343d009feab195d15ac512b4cb27" dependencies = [ "arrow", "arrow-schema", "chrono", "half", - "indexmap 2.13.0", + "indexmap 2.13.1", "parquet-variant", "parquet-variant-json", + "serde_json", "uuid", ] [[package]] name = "parquet-variant-json" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed23d7acc90ef60f7fdbcc473fa2fdaefa33542ed15b84388959346d52c839be" +checksum = "015a09c2ffe5108766c7c1235c307b8a3c2ea64eca38455ba1a7f3a7f32f16e2" dependencies = [ "arrow-schema", "base64", @@ -4599,7 +4762,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.13.0", + "indexmap 2.13.1", "serde", ] @@ -4750,6 +4913,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -4758,18 +4933,18 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" dependencies = [ "portable-atomic", ] [[package]] name = "potential_utf" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" dependencies = [ "zerovec", ] @@ -4940,6 +5115,16 @@ dependencies = [ "serde", ] +[[package]] +name = "quick-xml" +version = "0.39.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "958f21e8e7ceb5a1aa7fa87fab28e7c75976e0bfe7e23ff069e0a260f894067d" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quinn" version = "0.11.9" @@ -4951,7 +5136,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "rustls", "socket2", "thiserror 2.0.18", @@ -4971,7 +5156,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "rustls", "rustls-pki-types", "slab", @@ -5208,6 +5393,28 @@ dependencies = [ "tokio", ] +[[package]] +name = "reqsign-core" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b10302cf0a7d7e7352ba211fc92c3c5bebf1286153e49cc5aa87348078a8e102" +dependencies = [ + "anyhow", + "base64", + "bytes", + "form_urlencoded", + "futures", + "hex", + "hmac", + "http 1.4.0", + "jiff", + "log", + "percent-encoding", + "sha1", + "sha2", + "windows-sys 0.61.2", +] + [[package]] name = "reqwest" version = "0.12.28" @@ -5329,9 +5536,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustc_version" @@ -5395,15 +5602,6 @@ dependencies = [ "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -5638,15 +5836,15 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.17.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "381b283ce7bc6b476d903296fb59d0d36633652b633b27f64db4fb46dcbfc3b9" +checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" dependencies = [ "base64", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.13.0", + "indexmap 2.13.1", "schemars 0.9.0", "schemars 1.2.1", "serde_core", @@ -5657,11 +5855,11 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.17.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6d4e30573c8cb306ed6ab1dca8423eec9a463ea0e155f45399455e0368b27e0" +checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "proc-macro2", "quote", "syn 2.0.117", @@ -5673,7 +5871,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.13.0", + "indexmap 2.13.1", "itoa", "ryu", "serde", @@ -5823,9 +6021,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "sqlparser_derive", @@ -5833,9 +6031,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", @@ -5889,9 +6087,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "symbolic-common" -version = "12.17.2" +version = "12.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "751a2823d606b5d0a7616499e4130a516ebd01a44f39811be2b9600936509c23" +checksum = "52ca086c1eb5c7ee74b151ba83c6487d5d33f8c08ad991b86f3f58f6629e68d5" dependencies = [ "debugid", "memmap2", @@ -5901,9 +6099,9 @@ dependencies = [ [[package]] name = "symbolic-demangle" -version = "12.17.2" +version = "12.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79b237cfbe320601dd24b4ac817a5b68bb28f5508e33f08d42be0682cadc8ac9" +checksum = "baa911a28a62823aaf2cc2e074212492a3ee69d0d926cc8f5b12b4a108ff5c0c" dependencies = [ "cpp_demangle", "rustc-demangle", @@ -6105,9 +6303,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ "displaydoc", "zerovec", @@ -6125,9 +6323,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -6176,6 +6374,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", + "tokio-util", +] + [[package]] name = "tokio-util" version = "0.7.18" @@ -6353,9 +6563,9 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" @@ -6369,6 +6579,16 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "unsafe-any-ors" version = "1.0.0" @@ -6489,9 +6709,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.114" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +checksum = "0551fc1bb415591e3372d0bc4780db7e587d84e2a7e79da121051c5c4b89d0b0" dependencies = [ "cfg-if", "once_cell", @@ -6502,23 +6722,19 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.64" +version = "0.4.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +checksum = "03623de6905b7206edd0a75f69f747f134b7f0a2323392d664448bf2d3c5d87e" dependencies = [ - "cfg-if", - "futures-util", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.114" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +checksum = "7fbdf9a35adf44786aecd5ff89b4563a90325f9da0923236f6104e603c7e86be" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6526,9 +6742,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.114" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +checksum = "dca9693ef2bab6d4e6707234500350d8dad079eb508dca05530c85dc3a529ff2" dependencies = [ "bumpalo", "proc-macro2", @@ -6539,9 +6755,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.114" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +checksum = "39129a682a6d2d841b6c429d0c51e5cb0ed1a03829d8b3d1e69a011e62cb3d3b" dependencies = [ "unicode-ident", ] @@ -6563,7 +6779,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "indexmap 2.13.0", + "indexmap 2.13.1", "wasm-encoder", "wasmparser", ] @@ -6589,15 +6805,15 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags 2.11.0", "hashbrown 0.15.5", - "indexmap 2.13.0", + "indexmap 2.13.1", "semver", ] [[package]] name = "web-sys" -version = "0.3.91" +version = "0.3.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +checksum = "cd70027e39b12f0849461e08ffc50b9cd7688d942c1c8e3c7b22273236b4dd0a" dependencies = [ "js-sys", "wasm-bindgen", @@ -6983,7 +7199,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck", - "indexmap 2.13.0", + "indexmap 2.13.1", "prettyplease", "syn 2.0.117", "wasm-metadata", @@ -7014,7 +7230,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags 2.11.0", - "indexmap 2.13.0", + "indexmap 2.13.1", "log", "serde", "serde_derive", @@ -7033,7 +7249,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap 2.13.0", + "indexmap 2.13.1", "log", "semver", "serde", @@ -7045,9 +7261,9 @@ dependencies = [ [[package]] name = "writeable" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" [[package]] name = "xmlparser" @@ -7057,9 +7273,9 @@ checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" [[package]] name = "yoke" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -7068,9 +7284,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", "quote", @@ -7080,18 +7296,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.42" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.42" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", @@ -7100,18 +7316,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", @@ -7127,9 +7343,9 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", "yoke", @@ -7138,9 +7354,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ "yoke", "zerofrom", @@ -7149,9 +7365,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", diff --git a/native/Cargo.toml b/native/Cargo.toml index c626743be1..b71bc0c73c 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -34,14 +34,14 @@ edition = "2021" rust-version = "1.88" [workspace.dependencies] -arrow = { version = "57.3.0", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow = { version = "58.1.0", features = ["prettyprint", "ffi", "chrono-tz"] } async-trait = { version = "0.1" } bytes = { version = "1.11.1" } -parquet = { version = "57.3.0", default-features = false, features = ["experimental"] } -datafusion = { version = "52.4.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } -datafusion-datasource = { version = "52.4.0" } -datafusion-physical-expr-adapter = { version = "52.4.0" } -datafusion-spark = { version = "52.4.0" } +parquet = { version = "58.1.0", default-features = false, features = ["experimental"] } +datafusion = { version = "53.0.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } +datafusion-datasource = { version = "53.0.0" } +datafusion-physical-expr-adapter = { version = "53.0.0" } +datafusion-spark = { version = "53.0.0", features = ["core"] } datafusion-comet-spark-expr = { path = "spark-expr" } datafusion-comet-common = { path = "common" } datafusion-comet-jni-bridge = { path = "jni-bridge" } @@ -54,12 +54,12 @@ num = "0.4" rand = "0.10" regex = "1.12.3" thiserror = "2" -object_store = { version = "0.12.3", features = ["gcp", "azure", "aws", "http"] } +object_store = { version = "0.13.1", features = ["gcp", "azure", "aws", "http"] } url = "2.2" aws-config = "1.8.14" aws-credential-types = "1.2.13" -iceberg = { git = "https://github.com/apache/iceberg-rust", tag = "v0.9.0-rc.1" } -iceberg-storage-opendal = { git = "https://github.com/apache/iceberg-rust", tag = "v0.9.0-rc.1", features = ["opendal-all"] } +iceberg = { git = "https://github.com/apache/iceberg-rust", rev = "477a1e5" } +iceberg-storage-opendal = { git = "https://github.com/apache/iceberg-rust", rev = "477a1e5", features = ["opendal-all"] } [profile.release] debug = true diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index b87e389e9e..872260c02f 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -70,9 +70,9 @@ aws-credential-types = { workspace = true } parking_lot = "0.12.5" datafusion-comet-objectstore-hdfs = { path = "../hdfs", optional = true, default-features = false, features = ["hdfs"] } reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots", "http2"] } -object_store_opendal = {version = "0.55.0", optional = true} +object_store_opendal = { git = "https://github.com/apache/opendal", rev = "173feb6", package = "object_store_opendal", optional = true} hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]} -opendal = { version ="0.55.0", optional = true, features = ["services-hdfs"] } +opendal = { git = "https://github.com/apache/opendal", rev = "173feb6", optional = true, features = ["services-hdfs"] } iceberg = { workspace = true } iceberg-storage-opendal = { workspace = true } serde_json = "1.0" @@ -91,7 +91,7 @@ jni = { version = "0.22.4", features = ["invocation"] } lazy_static = "1.4" assertables = "9" hex = "0.4.3" -datafusion-functions-nested = { version = "52.4.0" } +datafusion-functions-nested = { version = "53.0.0" } [features] backtrace = ["datafusion/backtrace"] diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs index b34493ad6c..bf21a97568 100644 --- a/native/core/src/execution/jni_api.rs +++ b/native/core/src/execution/jni_api.rs @@ -393,6 +393,11 @@ fn prepare_datafusion_session_context( // register UDFs from datafusion-spark crate fn register_datafusion_spark_function(session_ctx: &SessionContext) { + // Don't register SparkArrayRepeat — it returns NULL when the element is NULL + // (e.g. array_repeat(null, 3) returns NULL instead of [null, null, null]). + // Comet's Scala serde wraps the call in a CaseWhen for null count handling, + // so DataFusion's built-in ArrayRepeat is sufficient. + // TODO: file upstream issue against datafusion-spark session_ctx.register_udf(ScalarUDF::new_from_impl(SparkExpm1::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSha2::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(CharFunc::default())); diff --git a/native/core/src/execution/memory_pools/fair_pool.rs b/native/core/src/execution/memory_pools/fair_pool.rs index 3080711810..e4a7ceab54 100644 --- a/native/core/src/execution/memory_pools/fair_pool.rs +++ b/native/core/src/execution/memory_pools/fair_pool.rs @@ -103,16 +103,21 @@ impl MemoryPool for CometFairMemoryPool { .expect("unexpected amount of unregister happened"); } - fn grow(&self, reservation: &MemoryReservation, additional: usize) { - self.try_grow(reservation, additional).unwrap(); + fn grow(&self, _reservation: &MemoryReservation, additional: usize) { + self.try_grow(_reservation, additional).unwrap(); } - fn shrink(&self, reservation: &MemoryReservation, subtractive: usize) { + fn shrink(&self, _reservation: &MemoryReservation, subtractive: usize) { if subtractive > 0 { let mut state = self.state.lock(); - let size = reservation.size(); - if size < subtractive { - panic!("Failed to release {subtractive} bytes where only {size} bytes reserved") + // We don't use reservation.size() here because DataFusion 53+ decrements + // the reservation's atomic size before calling pool.shrink(), so it would + // reflect the post-shrink value rather than the pre-shrink value. + if state.used < subtractive { + panic!( + "Failed to release {subtractive} bytes where only {} bytes tracked by pool", + state.used + ) } self.release(subtractive) .unwrap_or_else(|_| panic!("Failed to release {subtractive} bytes")); @@ -122,7 +127,7 @@ impl MemoryPool for CometFairMemoryPool { fn try_grow( &self, - reservation: &MemoryReservation, + _reservation: &MemoryReservation, additional: usize, ) -> Result<(), DataFusionError> { if additional > 0 { @@ -132,10 +137,13 @@ impl MemoryPool for CometFairMemoryPool { .pool_size .checked_div(num) .expect("overflow in checked_div"); - let size = reservation.size(); - if limit < size + additional { + // We use state.used instead of reservation.size() because DataFusion 53+ + // calls pool.try_grow() before incrementing the reservation's atomic size, + // so reservation.size() would not include prior grows. + let used = state.used; + if limit < used + additional { return resources_err!( - "Failed to acquire {additional} bytes where {size} bytes already reserved and the fair limit is {limit} bytes, {num} registered" + "Failed to acquire {additional} bytes where {used} bytes already reserved and the fair limit is {limit} bytes, {num} registered" ); } diff --git a/native/core/src/execution/operators/expand.rs b/native/core/src/execution/operators/expand.rs index 19ca204592..e06fab23ec 100644 --- a/native/core/src/execution/operators/expand.rs +++ b/native/core/src/execution/operators/expand.rs @@ -42,7 +42,7 @@ pub struct ExpandExec { projections: Vec>>, child: Arc, schema: SchemaRef, - cache: PlanProperties, + cache: Arc, } impl ExpandExec { @@ -52,12 +52,12 @@ impl ExpandExec { child: Arc, schema: SchemaRef, ) -> Self { - let cache = PlanProperties::new( + let cache = Arc::new(PlanProperties::new( EquivalenceProperties::new(Arc::clone(&schema)), Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Self { projections, @@ -129,7 +129,7 @@ impl ExecutionPlan for ExpandExec { Ok(Box::pin(expand_stream)) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.cache } diff --git a/native/core/src/execution/operators/iceberg_scan.rs b/native/core/src/execution/operators/iceberg_scan.rs index b76e8d587e..d217ebc34b 100644 --- a/native/core/src/execution/operators/iceberg_scan.rs +++ b/native/core/src/execution/operators/iceberg_scan.rs @@ -58,7 +58,7 @@ pub struct IcebergScanExec { /// Output schema after projection output_schema: SchemaRef, /// Cached execution plan properties - plan_properties: PlanProperties, + plan_properties: Arc, /// Catalog-specific configuration for FileIO catalog_properties: HashMap, /// Pre-planned file scan tasks @@ -93,13 +93,13 @@ impl IcebergScanExec { }) } - fn compute_properties(schema: SchemaRef, num_partitions: usize) -> PlanProperties { - PlanProperties::new( + fn compute_properties(schema: SchemaRef, num_partitions: usize) -> Arc { + Arc::new(PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(num_partitions), EmissionType::Incremental, Boundedness::Bounded, - ) + )) } } @@ -116,7 +116,7 @@ impl ExecutionPlan for IcebergScanExec { Arc::clone(&self.output_schema) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.plan_properties } @@ -288,7 +288,7 @@ where _ => { let adapter = self .adapter_factory - .create(Arc::clone(&self.schema), Arc::clone(&file_schema)); + .create(Arc::clone(&self.schema), Arc::clone(&file_schema))?; let exprs = build_projection_expressions(&self.schema, &adapter).map_err(|e| { DataFusionError::Execution(format!( diff --git a/native/core/src/execution/operators/parquet_writer.rs b/native/core/src/execution/operators/parquet_writer.rs index 4a53ff51b8..8ba79098d4 100644 --- a/native/core/src/execution/operators/parquet_writer.rs +++ b/native/core/src/execution/operators/parquet_writer.rs @@ -23,16 +23,18 @@ use std::{ fmt, fmt::{Debug, Formatter}, fs::File, - io::Cursor, sync::Arc, }; +#[cfg(feature = "hdfs-opendal")] use opendal::Operator; +#[cfg(feature = "hdfs-opendal")] +use std::io::Cursor; use crate::execution::shuffle::CompressionCodec; -use crate::parquet::parquet_support::{ - create_hdfs_operator, is_hdfs_scheme, prepare_object_store_with_configs, -}; +use crate::parquet::parquet_support::is_hdfs_scheme; +#[cfg(feature = "hdfs-opendal")] +use crate::parquet::parquet_support::{create_hdfs_operator, prepare_object_store_with_configs}; use arrow::datatypes::{Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use async_trait::async_trait; @@ -45,7 +47,7 @@ use datafusion::{ metrics::{ExecutionPlanMetricsSet, MetricsSet}, stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties, - SendableRecordBatchStream, Statistics, + SendableRecordBatchStream, }, }; use futures::TryStreamExt; @@ -64,6 +66,7 @@ enum ParquetWriter { /// Contains the arrow writer, HDFS operator, and destination path /// an Arrow writer writes to in-memory buffer the data converted to Parquet format /// The opendal::Writer is created lazily on first write + #[cfg(feature = "hdfs-opendal")] Remote( ArrowWriter>>, Option, @@ -80,6 +83,7 @@ impl ParquetWriter { ) -> std::result::Result<(), parquet::errors::ParquetError> { match self { ParquetWriter::LocalFile(writer) => writer.write(batch), + #[cfg(feature = "hdfs-opendal")] ParquetWriter::Remote( arrow_parquet_buffer_writer, hdfs_writer_opt, @@ -134,6 +138,7 @@ impl ParquetWriter { writer.close()?; Ok(()) } + #[cfg(feature = "hdfs-opendal")] ParquetWriter::Remote( arrow_parquet_buffer_writer, mut hdfs_writer_opt, @@ -208,7 +213,7 @@ pub struct ParquetWriterExec { /// Metrics metrics: ExecutionPlanMetricsSet, /// Cache for plan properties - cache: PlanProperties, + cache: Arc, } impl ParquetWriterExec { @@ -228,12 +233,12 @@ impl ParquetWriterExec { // Preserve the input's partitioning so each partition writes its own file let input_partitioning = input.output_partitioning().clone(); - let cache = PlanProperties::new( + let cache = Arc::new(PlanProperties::new( EquivalenceProperties::new(Arc::clone(&input.schema())), input_partitioning, EmissionType::Final, Boundedness::Bounded, - ); + )); Ok(ParquetWriterExec { input, @@ -275,7 +280,7 @@ impl ParquetWriterExec { output_file_path: &str, schema: SchemaRef, props: WriterProperties, - runtime_env: Arc, + _runtime_env: Arc, object_store_options: &HashMap, ) -> Result { // Parse URL and match on storage scheme directly @@ -284,11 +289,11 @@ impl ParquetWriterExec { })?; if is_hdfs_scheme(&url, object_store_options) { - // HDFS storage + #[cfg(feature = "hdfs-opendal")] { // Use prepare_object_store_with_configs to create and register the object store let (_object_store_url, object_store_path) = prepare_object_store_with_configs( - runtime_env, + _runtime_env, output_file_path.to_string(), object_store_options, ) @@ -324,6 +329,12 @@ impl ParquetWriterExec { object_store_path.to_string(), )) } + #[cfg(not(feature = "hdfs-opendal"))] + { + Err(DataFusionError::Execution( + "HDFS support is not enabled. Rebuild with the 'hdfs-opendal' feature.".into(), + )) + } } else if output_file_path.starts_with("file://") || output_file_path.starts_with("file:") || !output_file_path.contains("://") @@ -405,11 +416,7 @@ impl ExecutionPlan for ParquetWriterExec { Some(self.metrics.clone_inner()) } - fn statistics(&self) -> Result { - self.input.partition_statistics(None) - } - - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.cache } @@ -576,6 +583,7 @@ mod tests { /// Helper function to create a test RecordBatch with 1000 rows of (int, string) data /// Example batch_id 1 -> 0..1000, 2 -> 1001..2000 + #[allow(dead_code)] fn create_test_record_batch(batch_id: i32) -> Result { assert!(batch_id > 0, "batch_id must be greater than 0"); let num_rows = batch_id * 1000; diff --git a/native/core/src/execution/operators/scan.rs b/native/core/src/execution/operators/scan.rs index dfcb50a68b..90bb741b5e 100644 --- a/native/core/src/execution/operators/scan.rs +++ b/native/core/src/execution/operators/scan.rs @@ -70,7 +70,7 @@ pub struct ScanExec { /// It is also used in unit test to mock the input data from JVM. pub batch: Arc>>, /// Cache of expensive-to-compute plan properties - cache: PlanProperties, + cache: Arc, /// Metrics collector metrics: ExecutionPlanMetricsSet, /// Baseline metrics @@ -93,14 +93,14 @@ impl ScanExec { // Build schema directly from data types since get_next now always unpacks dictionaries let schema = schema_from_data_types(&data_types); - let cache = PlanProperties::new( + let cache = Arc::new(PlanProperties::new( EquivalenceProperties::new(Arc::clone(&schema)), // The partitioning is not important because we are not using DataFusion's // query planner or optimizer Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Ok(Self { exec_context_id, @@ -415,7 +415,7 @@ impl ExecutionPlan for ScanExec { ))) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.cache } diff --git a/native/core/src/execution/operators/shuffle_scan.rs b/native/core/src/execution/operators/shuffle_scan.rs index 1f3810ee32..92c4dc8780 100644 --- a/native/core/src/execution/operators/shuffle_scan.rs +++ b/native/core/src/execution/operators/shuffle_scan.rs @@ -61,7 +61,7 @@ pub struct ShuffleScanExec { /// The current input batch, populated by get_next_batch() before poll_next(). pub batch: Arc>>, /// Cache of plan properties. - cache: PlanProperties, + cache: Arc, /// Metrics collector. metrics: ExecutionPlanMetricsSet, /// Baseline metrics. @@ -82,12 +82,12 @@ impl ShuffleScanExec { let schema = schema_from_data_types(&data_types); - let cache = PlanProperties::new( + let cache = Arc::new(PlanProperties::new( EquivalenceProperties::new(Arc::clone(&schema)), Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Ok(Self { exec_context_id, @@ -252,7 +252,7 @@ impl ExecutionPlan for ShuffleScanExec { ))) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.cache } diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index e31ee5bbad..d7bb8018fb 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -82,10 +82,11 @@ use datafusion::common::{ JoinType as DFJoinType, NullEquality, ScalarValue, }; use datafusion::datasource::listing::PartitionedFile; +use datafusion::logical_expr::type_coercion::functions::fields_with_udf; use datafusion::logical_expr::type_coercion::other::get_coerce_type_for_case_expression; use datafusion::logical_expr::{ - AggregateUDF, ReturnFieldArgs, ScalarUDF, WindowFrame, WindowFrameBound, WindowFrameUnits, - WindowFunctionDefinition, + AggregateUDF, ReturnFieldArgs, ScalarUDF, TypeSignature, WindowFrame, WindowFrameBound, + WindowFrameUnits, WindowFunctionDefinition, }; use datafusion::physical_expr::expressions::{Literal, StatsType}; use datafusion::physical_expr::window::WindowExpr; @@ -102,7 +103,6 @@ use arrow::buffer::{BooleanBuffer, NullBuffer, OffsetBuffer}; use arrow::row::{OwnedRow, RowConverter, SortField}; use datafusion::common::utils::SingleRowListArrayBuilder; use datafusion::common::UnnestOptions; -use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec; use datafusion::physical_plan::filter::FilterExec; use datafusion::physical_plan::limit::GlobalLimitExec; use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec}; @@ -1637,44 +1637,18 @@ impl PhysicalPlanner { NullEquality::NullEqualsNothing, )?); - if join.filter.is_some() { - // SMJ with join filter produces lots of tiny batches - let coalesce_batches: Arc = - Arc::new(CoalesceBatchesExec::new( - Arc::::clone(&join), - self.session_ctx - .state() - .config_options() - .execution - .batch_size, - )); - Ok(( - scans, - shuffle_scans, - Arc::new(SparkPlan::new_with_additional( - spark_plan.plan_id, - coalesce_batches, - vec![ - Arc::clone(&join_params.left), - Arc::clone(&join_params.right), - ], - vec![join], - )), - )) - } else { - Ok(( - scans, - shuffle_scans, - Arc::new(SparkPlan::new( - spark_plan.plan_id, - join, - vec![ - Arc::clone(&join_params.left), - Arc::clone(&join_params.right), - ], - )), - )) - } + Ok(( + scans, + shuffle_scans, + Arc::new(SparkPlan::new( + spark_plan.plan_id, + join, + vec![ + Arc::clone(&join_params.left), + Arc::clone(&join_params.right), + ], + )), + )) } OpStruct::HashJoin(join) => { let (join_params, scans, shuffle_scans) = self.parse_join_parameters( @@ -1701,6 +1675,12 @@ impl PhysicalPlanner { // null doesn't equal to null in Spark join key. If the join key is // `EqualNullSafe`, Spark will rewrite it during planning. NullEquality::NullEqualsNothing, + // null_aware is for null-aware anti joins (NOT IN subqueries). + // NullEquality controls whether NULL = NULL in join keys generally, + // while null_aware changes anti-join semantics so any NULL changes + // the entire result. Spark doesn't use this path (it rewrites + // EqualNullSafe at plan time), so false is correct. + false, )?); // If the hash join is build right, we need to swap the left and right @@ -2636,15 +2616,44 @@ impl PhysicalPlanner { other => other, }; let func = self.session_ctx.udf(fun_name)?; - let coerced_types = func - .coerce_types(&input_expr_types) - .unwrap_or_else(|_| input_expr_types.clone()); - let arg_fields = coerced_types + // Type coercion strategy: + // + // In DF52, Comet used coerce_types() which returns NotImplemented + // for most UDFs, so input types were kept unchanged. In DF53, + // fields_with_udf() runs full coercion which aggressively promotes + // types (e.g. Utf8 to Utf8View via Variadic signatures, Int32 to Int64 + // via Exact signatures). This breaks Comet's native implementations. + // + // Strategy: + // 1. Try coerce_types() — only UDFs that explicitly implement it + // will return Ok. Same as DF52 behavior. + // 2. For "well-supported" signatures (Coercible, String, Numeric, + // Comparable), use fields_with_udf(). These preserve input types + // (e.g. Utf8 stays Utf8, not promoted to Utf8View). + // 3. For all other signatures (Variadic, Exact, etc.), keep original + // types unchanged. Same as DF52 behavior. + let coerced_types = match func.coerce_types(&input_expr_types) { + Ok(types) => types, + Err(_) if needs_fields_coercion(&func.signature().type_signature) => { + let input_fields: Vec<_> = input_expr_types + .iter() + .enumerate() + .map(|(i, dt)| { + Arc::new(Field::new(format!("arg{i}"), dt.clone(), true)) + }) + .collect(); + let arg_fields = fields_with_udf(&input_fields, func.as_ref())?; + arg_fields.iter().map(|f| f.data_type().clone()).collect() + } + Err(_) => input_expr_types.clone(), + }; + + let arg_fields: Vec<_> = coerced_types .iter() .enumerate() .map(|(i, dt)| Arc::new(Field::new(format!("arg{i}"), dt.clone(), true))) - .collect::>(); + .collect(); // TODO this should try and find scalar let arguments = args @@ -2700,10 +2709,33 @@ impl PhysicalPlanner { fun_name, fun_expr, args.to_vec(), - Arc::new(Field::new(fun_name, data_type, true)), + Arc::new(Field::new(fun_name, data_type.clone(), true)), Arc::new(ConfigOptions::default()), )); + // DF53 changed some UDFs (e.g. md5) to return StringViewArray at execution + // time (apache/datafusion#20045). Comet does not yet support view types, so + // cast the result back to the non-view variant. + let scalar_expr = match data_type { + DataType::Utf8View => Arc::new(CastExpr::new( + scalar_expr, + DataType::Utf8, + Some(CastOptions { + safe: false, + ..Default::default() + }), + )) as Arc, + DataType::BinaryView => Arc::new(CastExpr::new( + scalar_expr, + DataType::Binary, + Some(CastOptions { + safe: false, + ..Default::default() + }), + )) as Arc, + _ => scalar_expr, + }; + Ok(scalar_expr) } @@ -3664,6 +3696,24 @@ fn extract_literal_as_datum(expr: &spark_expression::Expr) -> Option bool { + match sig { + TypeSignature::Coercible(_) + | TypeSignature::String(_) + | TypeSignature::Numeric(_) + | TypeSignature::Comparable(_) => true, + TypeSignature::OneOf(sigs) => sigs.iter().any(needs_fields_coercion), + _ => false, + } +} + #[cfg(test)] mod tests { use futures::{poll, StreamExt}; @@ -4132,6 +4182,7 @@ mod tests { #[test] fn test_array_repeat() { + // Use built-in ArrayRepeat, not SparkArrayRepeat (see jni_api.rs comment) let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); let planner = PhysicalPlanner::new(Arc::from(session_ctx), 0); @@ -4250,7 +4301,7 @@ mod tests { "+--------------+", "| [0] |", "| [3, 3, 3, 3] |", - "| [] |", + "| |", "+--------------+", ]; assert_batches_eq!(expected, &[batch]); diff --git a/native/core/src/parquet/encryption_support.rs b/native/core/src/parquet/encryption_support.rs index afcae086a1..5ad6003fbc 100644 --- a/native/core/src/parquet/encryption_support.rs +++ b/native/core/src/parquet/encryption_support.rs @@ -19,7 +19,7 @@ use crate::execution::operators::ExecutionError; use crate::jvm_bridge::{check_exception, JVMClasses}; use arrow::datatypes::SchemaRef; use async_trait::async_trait; -use datafusion::common::extensions_options; +use datafusion::common::{extensions_options, Result as DataFusionResult}; use datafusion::config::EncryptionFactoryOptions; use datafusion::error::DataFusionError; use datafusion::execution::parquet_encryption::EncryptionFactory; @@ -54,7 +54,7 @@ impl EncryptionFactory for CometEncryptionFactory { _options: &EncryptionFactoryOptions, _schema: &SchemaRef, _file_path: &Path, - ) -> Result>, DataFusionError> { + ) -> DataFusionResult>> { Err(DataFusionError::NotImplemented( "Comet does not support Parquet encryption yet." .parse() @@ -69,7 +69,7 @@ impl EncryptionFactory for CometEncryptionFactory { &self, options: &EncryptionFactoryOptions, file_path: &Path, - ) -> Result>, DataFusionError> { + ) -> DataFusionResult>> { let config: CometEncryptionConfig = options.to_extension_options()?; let full_path: String = config.uri_base + file_path.as_ref(); diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index 9f51fd42f6..af79d9082d 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -116,7 +116,7 @@ impl PhysicalExprAdapterFactory for SparkPhysicalExprAdapterFactory { &self, logical_file_schema: SchemaRef, physical_file_schema: SchemaRef, - ) -> Arc { + ) -> DataFusionResult> { // When case-insensitive, remap physical schema field names to match logical // field names. The DefaultPhysicalExprAdapter uses exact name matching, so // without this remapping, columns like "a" won't match logical "A" and will @@ -163,9 +163,9 @@ impl PhysicalExprAdapterFactory for SparkPhysicalExprAdapterFactory { let default_adapter = default_factory.create( Arc::clone(&logical_file_schema), Arc::clone(&adapted_physical_schema), - ); + )?; - Arc::new(SparkPhysicalExprAdapter { + Ok(Arc::new(SparkPhysicalExprAdapter { logical_file_schema, physical_file_schema: adapted_physical_schema, parquet_options: self.parquet_options.clone(), @@ -173,7 +173,7 @@ impl PhysicalExprAdapterFactory for SparkPhysicalExprAdapterFactory { default_adapter, logical_to_physical_names, original_physical_schema, - }) + })) } } @@ -296,12 +296,26 @@ impl SparkPhysicalExprAdapter { ) -> DataFusionResult> { expr.transform(|e| { if let Some(column) = e.as_any().downcast_ref::() { - let col_idx = column.index(); let col_name = column.name(); - let logical_field = self.logical_file_schema.fields().get(col_idx); - // Look up physical field by name instead of index for correctness - // when logical and physical schemas have different column orderings + // Resolve fields by name because this is the fallback path + // that runs on the original expression when the default + // adapter fails. The original expression was built against + // the required (pruned) schema, so column indices refer to + // that schema — not the logical or physical file schemas. + // DataFusion's DefaultPhysicalExprAdapter::resolve_physical_column + // also resolves by name for the same reason. + let logical_field = if self.parquet_options.case_sensitive { + self.logical_file_schema + .fields() + .iter() + .find(|f| f.name() == col_name) + } else { + self.logical_file_schema + .fields() + .iter() + .find(|f| f.name().eq_ignore_ascii_case(col_name)) + }; let physical_field = if self.parquet_options.case_sensitive { self.physical_file_schema .fields() @@ -314,12 +328,31 @@ impl SparkPhysicalExprAdapter { .find(|f| f.name().eq_ignore_ascii_case(col_name)) }; - if let (Some(logical_field), Some(physical_field)) = (logical_field, physical_field) + // Remap the column index to the physical file schema so + // downstream evaluation reads the correct column from the + // parquet batch. + let physical_index = if self.parquet_options.case_sensitive { + self.physical_file_schema.index_of(col_name).ok() + } else { + self.physical_file_schema + .fields() + .iter() + .position(|f| f.name().eq_ignore_ascii_case(col_name)) + }; + + if let (Some(logical_field), Some(physical_field), Some(phys_idx)) = + (logical_field, physical_field, physical_index) { + let remapped: Arc = if column.index() != phys_idx { + Arc::new(Column::new(col_name, phys_idx)) + } else { + Arc::clone(&e) + }; + if logical_field.data_type() != physical_field.data_type() { let cast_expr: Arc = Arc::new( CometCastColumnExpr::new( - Arc::clone(&e), + remapped, Arc::clone(physical_field), Arc::clone(logical_field), None, @@ -327,6 +360,8 @@ impl SparkPhysicalExprAdapter { .with_parquet_options(self.parquet_options.clone()), ); return Ok(Transformed::yes(cast_expr)); + } else if column.index() != phys_idx { + return Ok(Transformed::yes(remapped)); } } } diff --git a/native/hdfs/src/object_store/hdfs.rs b/native/hdfs/src/object_store/hdfs.rs index a93774cffe..e368d7cf94 100644 --- a/native/hdfs/src/object_store/hdfs.rs +++ b/native/hdfs/src/object_store/hdfs.rs @@ -31,8 +31,9 @@ use fs_hdfs::walkdir::HdfsWalkDir; use futures::{stream::BoxStream, StreamExt, TryStreamExt}; use object_store::{ path::{self, Path}, - Error, GetOptions, GetRange, GetResult, GetResultPayload, ListResult, MultipartUpload, - ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, + CopyMode, CopyOptions, Error, GetOptions, GetRange, GetResult, GetResultPayload, ListResult, + MultipartUpload, ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, PutPayload, + PutResult, Result, }; /// scheme for HDFS File System @@ -144,62 +145,6 @@ impl ObjectStore for HadoopFileSystem { unimplemented!() } - async fn get(&self, location: &Path) -> Result { - let hdfs = self.hdfs.clone(); - let hdfs_root = self.hdfs.url().to_owned(); - let location = HadoopFileSystem::path_to_filesystem(location); - - let (blob, object_metadata, range) = maybe_spawn_blocking(move || { - let file = hdfs.open(&location).map_err(to_error)?; - - let file_status = file.get_file_status().map_err(to_error)?; - - let to_read = file_status.len(); - let mut total_read = 0; - let mut buf = vec![0; to_read]; - while total_read < to_read { - let read = file.read(buf.as_mut_slice()).map_err(to_error)?; - if read <= 0 { - break; - } - total_read += read as usize; - } - - if total_read != to_read { - return Err(Error::Generic { - store: "HadoopFileSystem", - source: Box::new(HdfsErr::Generic(format!( - "Error reading path {} with expected size {} and actual size {}", - file.path(), - to_read, - total_read - ))), - }); - } - - file.close().map_err(to_error)?; - - let object_metadata = convert_metadata(file_status.clone(), &hdfs_root); - - let range = Range { - start: 0, - end: file_status.len() as u64, - }; - - Ok((buf.into(), object_metadata, range)) - }) - .await?; - - Ok(GetResult { - payload: GetResultPayload::Stream( - futures::stream::once(async move { Ok(blob) }).boxed(), - ), - meta: object_metadata, - range, - attributes: Default::default(), - }) - } - async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { if options.if_match.is_some() || options.if_none_match.is_some() { return Err(Error::Generic { @@ -249,51 +194,40 @@ impl ObjectStore for HadoopFileSystem { }) } - async fn get_range(&self, location: &Path, range: Range) -> Result { + async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { let hdfs = self.hdfs.clone(); let location = HadoopFileSystem::path_to_filesystem(location); + let ranges = ranges.to_vec(); maybe_spawn_blocking(move || { let file = hdfs.open(&location).map_err(to_error)?; - let buf = Self::read_range(&range, &file)?; + let result = ranges + .iter() + .map(|range| Self::read_range(range, &file)) + .collect::>>()?; file.close().map_err(to_error)?; - - Ok(buf) + Ok(result) }) .await } - async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { - coalesce_ranges( - ranges, - |range| self.get_range(location, range), - HDFS_COALESCE_DEFAULT, - ) - .await - } - - async fn head(&self, location: &Path) -> Result { - let hdfs = self.hdfs.clone(); - let hdfs_root = self.hdfs.url().to_owned(); - let location = HadoopFileSystem::path_to_filesystem(location); - - maybe_spawn_blocking(move || { - let file_status = hdfs.get_file_status(&location).map_err(to_error)?; - Ok(convert_metadata(file_status, &hdfs_root)) - }) - .await - } - - async fn delete(&self, location: &Path) -> Result<()> { + fn delete_stream( + &self, + locations: BoxStream<'static, Result>, + ) -> BoxStream<'static, Result> { let hdfs = self.hdfs.clone(); - let location = HadoopFileSystem::path_to_filesystem(location); - - maybe_spawn_blocking(move || { - hdfs.delete(&location, false).map_err(to_error)?; - - Ok(()) - }) - .await + locations + .map(move |location| { + let hdfs = hdfs.clone(); + maybe_spawn_blocking(move || { + let location = location?; + let fs_path = HadoopFileSystem::path_to_filesystem(&location); + hdfs.delete(&fs_path, false).map_err(to_error)?; + Ok(location) + }) + }) + .buffered(10) + .boxed() } /// List all of the leaf files under the prefix path. @@ -387,7 +321,7 @@ impl ObjectStore for HadoopFileSystem { drop(parts); if is_directory { - common_prefixes.insert(prefix.child(common_prefix)); + common_prefixes.insert(prefix.clone().join(common_prefix)); } else { objects.push(convert_metadata(entry, &hdfs_root)); } @@ -402,61 +336,33 @@ impl ObjectStore for HadoopFileSystem { .await } - /// Copy an object from one path to another. - /// If there exists an object at the destination, it will be overwritten. - async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> Result<()> { let hdfs = self.hdfs.clone(); let from = HadoopFileSystem::path_to_filesystem(from); let to = HadoopFileSystem::path_to_filesystem(to); maybe_spawn_blocking(move || { - // We need to make sure the source exist if !hdfs.exist(&from) { return Err(Error::NotFound { path: from.clone(), source: Box::new(HdfsErr::FileNotFound(from)), }); } - // Delete destination if exists - if hdfs.exist(&to) { - hdfs.delete(&to, false).map_err(to_error)?; - } - - fs_hdfs::util::HdfsUtil::copy(hdfs.as_ref(), &from, hdfs.as_ref(), &to) - .map_err(to_error)?; - - Ok(()) - }) - .await - } - - /// It's only allowed for the same HDFS - async fn rename(&self, from: &Path, to: &Path) -> Result<()> { - let hdfs = self.hdfs.clone(); - let from = HadoopFileSystem::path_to_filesystem(from); - let to = HadoopFileSystem::path_to_filesystem(to); - - maybe_spawn_blocking(move || { - hdfs.rename(&from, &to, true).map_err(to_error)?; - - Ok(()) - }) - .await - } - - /// Copy an object from one path to another, only if destination is empty. - /// Will return an error if the destination already has an object. - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { - let hdfs = self.hdfs.clone(); - let from = HadoopFileSystem::path_to_filesystem(from); - let to = HadoopFileSystem::path_to_filesystem(to); - maybe_spawn_blocking(move || { - if hdfs.exist(&to) { - return Err(Error::AlreadyExists { - path: from, - source: Box::new(HdfsErr::FileAlreadyExists(to)), - }); + match options.mode { + CopyMode::Overwrite => { + if hdfs.exist(&to) { + hdfs.delete(&to, false).map_err(to_error)?; + } + } + CopyMode::Create => { + if hdfs.exist(&to) { + return Err(Error::AlreadyExists { + path: from, + source: Box::new(HdfsErr::FileAlreadyExists(to)), + }); + } + } } fs_hdfs::util::HdfsUtil::copy(hdfs.as_ref(), &from, hdfs.as_ref(), &to) diff --git a/native/shuffle/src/shuffle_writer.rs b/native/shuffle/src/shuffle_writer.rs index e649aaac69..3a297efbd7 100644 --- a/native/shuffle/src/shuffle_writer.rs +++ b/native/shuffle/src/shuffle_writer.rs @@ -35,7 +35,6 @@ use datafusion::{ metrics::{ExecutionPlanMetricsSet, MetricsSet}, stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, SendableRecordBatchStream, - Statistics, }, }; use datafusion_comet_common::tracing::with_trace_async; @@ -62,7 +61,7 @@ pub struct ShuffleWriterExec { /// Metrics metrics: ExecutionPlanMetricsSet, /// Cache for expensive-to-compute plan properties - cache: PlanProperties, + cache: Arc, /// The compression codec to use when compressing shuffle blocks codec: CompressionCodec, tracing_enabled: bool, @@ -82,12 +81,12 @@ impl ShuffleWriterExec { tracing_enabled: bool, write_buffer_size: usize, ) -> Result { - let cache = PlanProperties::new( + let cache = Arc::new(PlanProperties::new( EquivalenceProperties::new(Arc::clone(&input.schema())), Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Ok(ShuffleWriterExec { input, @@ -133,11 +132,7 @@ impl ExecutionPlan for ShuffleWriterExec { Some(self.metrics.clone_inner()) } - fn statistics(&self) -> Result { - self.input.partition_statistics(None) - } - - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.cache } diff --git a/native/spark-expr/src/array_funcs/array_compact.rs b/native/spark-expr/src/array_funcs/array_compact.rs new file mode 100644 index 0000000000..4653f966a5 --- /dev/null +++ b/native/spark-expr/src/array_funcs/array_compact.rs @@ -0,0 +1,164 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Spark-compatible array_compact: removes null elements from an array. +// +// DataFusion's array_remove_all(arr, null) returns NULL for the entire row +// when the element-to-remove is NULL (DF 53, PR #21013). Spark's array_compact +// needs to actually remove null elements, so we implement it directly. +// +// TODO: upstream this to datafusion-spark crate + +use arrow::array::{ + make_array, Array, ArrayRef, Capacities, GenericListArray, MutableArrayData, NullBufferBuilder, + OffsetSizeTrait, +}; +use arrow::buffer::OffsetBuffer; +use arrow::datatypes::{DataType, FieldRef}; +use datafusion::common::{exec_err, utils::take_function_args, Result}; +use datafusion::logical_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, +}; +use std::any::Any; +use std::sync::Arc; + +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkArrayCompact { + signature: Signature, +} + +impl Default for SparkArrayCompact { + fn default() -> Self { + Self::new() + } +} + +impl SparkArrayCompact { + pub fn new() -> Self { + Self { + signature: Signature::new(TypeSignature::Any(1), Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for SparkArrayCompact { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "spark_array_compact" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + datafusion::common::internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args( + &self, + args: datafusion::logical_expr::ReturnFieldArgs, + ) -> Result { + Ok(Arc::clone(&args.arg_fields[0])) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let [array] = take_function_args(self.name(), &args.args)?; + match array { + ColumnarValue::Array(array) => match array.data_type() { + DataType::List(_) => Ok(ColumnarValue::Array(compact_list::( + array.as_any().downcast_ref().unwrap(), + )?)), + DataType::LargeList(_) => Ok(ColumnarValue::Array(compact_list::( + array.as_any().downcast_ref().unwrap(), + )?)), + other => exec_err!("spark_array_compact does not support type '{other}'"), + }, + ColumnarValue::Scalar(scalar) => { + let array = scalar.to_array()?; + let result = match array.data_type() { + DataType::List(_) => { + compact_list::(array.as_any().downcast_ref().unwrap())? + } + DataType::LargeList(_) => { + compact_list::(array.as_any().downcast_ref().unwrap())? + } + other => { + return exec_err!("spark_array_compact does not support type '{other}'") + } + }; + Ok(ColumnarValue::Array(result)) + } + } + } +} + +/// Remove null elements from each row of a list array. +fn compact_list( + list_array: &GenericListArray, +) -> Result { + let list_field = match list_array.data_type() { + DataType::List(field) | DataType::LargeList(field) => field, + other => { + return exec_err!("Expected List or LargeList, got {other:?}"); + } + }; + + let values = list_array.values(); + let original_data = values.to_data(); + let mut offsets = Vec::::with_capacity(list_array.len() + 1); + offsets.push(OffsetSize::zero()); + let mut mutable = MutableArrayData::with_capacities( + vec![&original_data], + false, + Capacities::Array(original_data.len()), + ); + let mut valid = NullBufferBuilder::new(list_array.len()); + + for (row_index, offset_window) in list_array.offsets().windows(2).enumerate() { + if list_array.is_null(row_index) { + offsets.push(offsets[row_index]); + valid.append_null(); + continue; + } + + let start = offset_window[0].to_usize().unwrap(); + let end = offset_window[1].to_usize().unwrap(); + let mut copied = 0usize; + + for i in start..end { + if !values.is_null(i) { + mutable.extend(0, i, i + 1); + copied += 1; + } + } + + offsets.push(offsets[row_index] + OffsetSize::usize_as(copied)); + valid.append_non_null(); + } + + let new_values = make_array(mutable.freeze()); + Ok(Arc::new(GenericListArray::::try_new( + Arc::clone(list_field), + OffsetBuffer::new(offsets.into()), + new_values, + valid.finish(), + )?)) +} diff --git a/native/spark-expr/src/array_funcs/mod.rs b/native/spark-expr/src/array_funcs/mod.rs index 3ef50a252f..2bd1b9631b 100644 --- a/native/spark-expr/src/array_funcs/mod.rs +++ b/native/spark-expr/src/array_funcs/mod.rs @@ -15,11 +15,13 @@ // specific language governing permissions and limitations // under the License. +mod array_compact; mod array_insert; mod get_array_struct_fields; mod list_extract; mod size; +pub use array_compact::SparkArrayCompact; pub use array_insert::ArrayInsert; pub use get_array_struct_fields::GetArrayStructFields; pub use list_extract::ListExtract; diff --git a/native/spark-expr/src/comet_scalar_funcs.rs b/native/spark-expr/src/comet_scalar_funcs.rs index 1eaf0b2a97..9c91bb69c9 100644 --- a/native/spark-expr/src/comet_scalar_funcs.rs +++ b/native/spark-expr/src/comet_scalar_funcs.rs @@ -23,8 +23,8 @@ use crate::math_funcs::modulo_expr::spark_modulo; use crate::{ spark_ceil, spark_decimal_div, spark_decimal_integral_div, spark_floor, spark_isnan, spark_lpad, spark_make_decimal, spark_read_side_padding, spark_round, spark_rpad, spark_unhex, - spark_unscaled_value, EvalMode, SparkContains, SparkDateDiff, SparkDateTrunc, SparkMakeDate, - SparkSizeFunc, + spark_unscaled_value, EvalMode, SparkArrayCompact, SparkContains, SparkDateDiff, + SparkDateTrunc, SparkMakeDate, SparkSizeFunc, }; use arrow::datatypes::DataType; use datafusion::common::{DataFusionError, Result as DataFusionResult}; @@ -196,6 +196,7 @@ pub fn create_comet_physical_fun_with_eval_mode( fn all_scalar_functions() -> Vec> { vec![ + Arc::new(ScalarUDF::new_from_impl(SparkArrayCompact::default())), Arc::new(ScalarUDF::new_from_impl(SparkContains::default())), Arc::new(ScalarUDF::new_from_impl(SparkDateDiff::default())), Arc::new(ScalarUDF::new_from_impl(SparkDateTrunc::default())), diff --git a/native/spark-expr/src/math_funcs/round.rs b/native/spark-expr/src/math_funcs/round.rs index d6302d9b7b..9605f93f17 100644 --- a/native/spark-expr/src/math_funcs/round.rs +++ b/native/spark-expr/src/math_funcs/round.rs @@ -110,6 +110,8 @@ pub fn spark_round( let ColumnarValue::Scalar(ScalarValue::Int64(Some(point))) = point else { return internal_err!("Invalid point argument for Round(): {:#?}", point); }; + // DataFusion's RoundFunc expects Int32 for decimal_places + let point_i32 = ColumnarValue::Scalar(ScalarValue::Int32(Some(*point as i32))); match value { ColumnarValue::Array(array) => match array.data_type() { DataType::Int64 if *point < 0 => { @@ -133,7 +135,7 @@ pub fn spark_round( let round_udf = RoundFunc::new(); let return_field = Arc::new(Field::new("round", array.data_type().clone(), true)); let args_for_round = ScalarFunctionArgs { - args: vec![ColumnarValue::Array(Arc::clone(array)), args[1].clone()], + args: vec![ColumnarValue::Array(Arc::clone(array)), point_i32.clone()], number_rows: array.len(), return_field, arg_fields: vec![], @@ -166,7 +168,7 @@ pub fn spark_round( let data_type = a.data_type(); let return_field = Arc::new(Field::new("round", data_type, true)); let args_for_round = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(a.clone()), args[1].clone()], + args: vec![ColumnarValue::Scalar(a.clone()), point_i32.clone()], number_rows: 1, return_field, arg_fields: vec![], diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index 47a6e91421..1e2eeaaa28 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -323,14 +323,16 @@ object CometArrayCompact extends CometExpressionSerde[Expression] { val elementType = child.dataType.asInstanceOf[ArrayType].elementType val arrayExprProto = exprToProto(child, inputs, binding) - val nullLiteralProto = exprToProto(Literal(null, elementType), Seq.empty) + // Use Comet's SparkArrayCompact UDF instead of DataFusion's array_remove_all. + // DF 53 changed array_remove_all to return NULL when the element arg is NULL, + // which breaks the array_compact use case. + // TODO: upstream to datafusion-spark crate val arrayCompactScalarExpr = scalarFunctionExprToProtoWithReturnType( - "array_remove_all", + "spark_array_compact", ArrayType(elementType = elementType), false, - arrayExprProto, - nullLiteralProto) + arrayExprProto) optExprWithInfo(arrayCompactScalarExpr, expr, expr.children: _*) } } diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala index 9afffe20bc..652fdfc96d 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala @@ -155,16 +155,47 @@ case class CometScanExec( /** * Returns the data filters that are supported for this scan implementation. For - * native_datafusion scans, this excludes dynamic pruning filters (subqueries) + * native_datafusion scans, this excludes dynamic pruning filters (subqueries) and null checks + * on array columns (see [[isNullCheckOnArrayColumn]]). */ lazy val supportedDataFilters: Seq[Expression] = { if (scanImpl == CometConf.SCAN_NATIVE_DATAFUSION) { - dataFilters.filterNot(isDynamicPruningFilter) + dataFilters + .filterNot(isDynamicPruningFilter) + .filterNot(isNullCheckOnArrayColumn) } else { dataFilters } } + /** + * Returns true for IsNotNull/IsNull predicates on ArrayType columns. + * + * These must be excluded from native scan data filters because: + * + * 1. Parquet does not support predicate pushdown on repeated columns. The Parquet library's + * SchemaCompatibilityValidator rejects filter predicates on repeated fields entirely + * (SPARK-39393, PARQUET-34). Spark's own ParquetFilters excludes REPEATED columns from + * pushdown for the same reason. + * + * 2. When Comet attaches these filters via ParquetSource.with_predicate(), DataFusion's list + * predicate pushdown (PR #19545) considers IsNotNull on List columns a supported predicate and + * pushes it into the Parquet reader as a RowFilter. This triggers an arrow-rs bug where + * ListArrayReader crashes on bare repeated primitives ("item_reader def levels are None"). + * + * 3. Even without the arrow-rs bug, the filter is redundant: a bare repeated field is never + * null (an empty repeated field means zero elements, not null), and DataFusion's optimizer + * would eliminate the filter if it went through the normal planning path. + * + * Filtering these out is safe -- the predicate is still evaluated after reading, so correctness + * is preserved. + */ + private def isNullCheckOnArrayColumn(expr: Expression): Boolean = expr match { + case IsNotNull(child) => child.dataType.isInstanceOf[ArrayType] + case IsNull(child) => child.dataType.isInstanceOf[ArrayType] + case _ => false + } + @transient private lazy val pushedDownFilters = { getPushedDownFilters(relation, supportedDataFilters) diff --git a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala index cf79f6af0f..35a7313f93 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala @@ -22,7 +22,14 @@ package org.apache.comet.exec import org.scalactic.source.Position import org.scalatest.Tag -import org.apache.spark.sql.CometTestBase +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.apache.parquet.hadoop.ParquetWriter +import org.apache.parquet.hadoop.api.WriteSupport +import org.apache.parquet.hadoop.api.WriteSupport.WriteContext +import org.apache.parquet.io.api.RecordConsumer +import org.apache.parquet.schema.MessageTypeParser +import org.apache.spark.sql.{CometTestBase, Row} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.functions.{array, col} import org.apache.spark.sql.internal.SQLConf @@ -293,8 +300,8 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper test("native reader - read a STRUCT subfield - field from second") { testSingleLineQuery( """ - |select 1 a, named_struct('a', 1, 'b', 'n') c0 - |""".stripMargin, + |select 1 a, named_struct('a', 1, 'b', 'n') c0 + |""".stripMargin, "select c0.b from tbl") } @@ -598,8 +605,112 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper test("native reader - support ARRAY literal nested ARRAY fields") { testSingleLineQuery( """ - |select 1 a - |""".stripMargin, + |select 1 a + |""".stripMargin, "select array(array(1, 2, null), array(), array(10), null, array(null)) from tbl") } + + // Regression test found during DataFusion 53 upgrade (PR #3629). + // Spark's SchemaPruningSuite tests (e.g. "select a single complex field array + // and in clause", "select explode of nested field of array of struct") were + // failing because wrap_all_type_mismatches in Comet's schema adapter looked up + // the logical field by column index instead of by name. Filter expressions + // built against the pruned required_schema had "friends" at index 0, but the + // full logical_file_schema had "id: Int32" at index 0. + test("native reader - nested schema pruning with array of struct and filter") { + testSingleLineQuery( + """ + |select + | 0 as id, + | named_struct('first', 'Jane', 'middle', 'X.', 'last', 'Doe') as name, + | '123 Main Street' as address, + | 1 as pets, + | array( + | named_struct('first', 'Susan', 'middle', 'Z.', 'last', 'Smith') + | ) as friends + |union all + |select + | 1 as id, + | named_struct('first', 'John', 'middle', 'Y.', 'last', 'Doe') as name, + | '321 Wall Street' as address, + | 3 as pets, + | array( + | named_struct('first', 'Alice', 'middle', 'A.', 'last', 'Jones') + | ) as friends + |""".stripMargin, + "select friends.middle from tbl where friends.first[0] = 'Susan'") + } + + // SPARK-39393: bare "repeated int32" (protobuf-style, no wrapping list group) + // should be readable without crashing on missing def levels. + // SPARK-39393: Parquet does not support predicate pushdown on repeated columns. + // A bare "repeated int32 f" (protobuf-style, no wrapping LIST group) must not + // have IsNotNull pushed into the Parquet reader. Comet filters these out in + // CometScanExec.supportedDataFilters so the predicate is evaluated after + // reading. Without that, DataFusion's list predicate pushdown would push + // IsNotNull as a RowFilter, triggering an arrow-rs ListArrayReader crash. + test("native reader - read bare repeated primitive field") { + withTempDir { dir => + val path = new Path(dir.toURI.toString, "protobuf-parquet").toString + val schema = + """message protobuf_style { + | repeated int32 f; + |} + """.stripMargin + + writeDirect( + path, + schema, + { rc => + rc.startMessage() + rc.startField("f", 0) + rc.addInteger(1) + rc.addInteger(2) + rc.endField("f", 0) + rc.endMessage() + }) + + // Read without filter + checkAnswer(spark.read.parquet(dir.getCanonicalPath), Seq(Row(Seq(1, 2)))) + + // Read with isnotnull filter — the filter should not be pushed down into + // the Parquet reader for repeated primitive fields (SPARK-39393), but the + // query should still return correct results by evaluating the filter after + // reading. + checkAnswer( + spark.read.parquet(dir.getCanonicalPath).filter("isnotnull(f)"), + Seq(Row(Seq(1, 2)))) + } + } + + /** Write a Parquet file using a raw RecordConsumer for full schema control. */ + private def writeDirect( + path: String, + schema: String, + recordWriters: (RecordConsumer => Unit)*): Unit = { + val messageType = MessageTypeParser.parseMessageType(schema) + val writeSupport = new DirectWriteSupport(messageType) + class Builder extends ParquetWriter.Builder[RecordConsumer => Unit, Builder](new Path(path)) { + override def getWriteSupport(conf: Configuration): WriteSupport[RecordConsumer => Unit] = + writeSupport + override def self(): Builder = this + } + val writer = new Builder().build() + try recordWriters.foreach(writer.write) + finally writer.close() + } +} + +private class DirectWriteSupport(schema: org.apache.parquet.schema.MessageType) + extends WriteSupport[RecordConsumer => Unit] { + private var recordConsumer: RecordConsumer = _ + + override def init(configuration: Configuration): WriteContext = + new WriteContext(schema, java.util.Collections.emptyMap()) + + override def write(recordWriter: RecordConsumer => Unit): Unit = + recordWriter(recordConsumer) + + override def prepareForWrite(rc: RecordConsumer): Unit = + this.recordConsumer = rc }