From 39c3c4ec72f39cf9d1ce477ec19c62fc233f3b04 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 4 Mar 2026 06:38:47 -0500 Subject: [PATCH 01/30] stash --- native/Cargo.lock | 690 +++++++++++------- native/Cargo.toml | 14 +- native/core/Cargo.toml | 2 +- native/core/src/execution/operators/expand.rs | 8 +- .../src/execution/operators/iceberg_scan.rs | 12 +- .../src/execution/operators/parquet_writer.rs | 12 +- native/core/src/execution/operators/scan.rs | 8 +- native/core/src/execution/planner.rs | 1 + .../src/execution/shuffle/shuffle_writer.rs | 12 +- native/core/src/parquet/encryption_support.rs | 6 +- native/core/src/parquet/parquet_support.rs | 2 +- native/core/src/parquet/schema_adapter.rs | 8 +- 12 files changed, 481 insertions(+), 294 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 78fa3fa124..0d40eb7da3 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -151,23 +151,23 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "602268ce9f569f282cedb9a9f6bac569b680af47b9b077d515900c03c5d190da" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", + "arrow-arith 58.0.0", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-cast 58.0.0", "arrow-csv", - "arrow-data", - "arrow-ipc", + "arrow-data 58.0.0", + "arrow-ipc 58.0.0", "arrow-json", - "arrow-ord", + "arrow-ord 58.0.0", "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", + "arrow-string 58.0.0", ] [[package]] @@ -176,10 +176,24 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "num-traits", +] + +[[package]] +name = "arrow-arith" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd53c6bf277dea91f136ae8e3a5d7041b44b5e489e244e637d00ae302051f56f" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "chrono", "num-traits", ] @@ -191,9 +205,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "half", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-array" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e53796e07a6525edaf7dc28b540d477a934aff14af97967ad1d5550878969b9e" +dependencies = [ + "ahash", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "chrono", "chrono-tz", "half", @@ -215,18 +247,51 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arrow-buffer" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c1a85bb2e94ee10b76531d8bc3ce9b7b4c0d508cabfb17d477f63f2617bd20" +dependencies = [ + "bytes", + "half", + "num-bigint", + "num-traits", +] + [[package]] name = "arrow-cast" version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "atoi", + "base64", + "chrono", + "half", + "lexical-core", + "num-traits", + "ryu", +] + +[[package]] +name = "arrow-cast" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89fb245db6b0e234ed8e15b644edb8664673fefe630575e94e62cd9d489a8a26" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-ord 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "atoi", "base64", "chrono", @@ -239,13 +304,13 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "d374882fb465a194462527c0c15a93aa19a554cf690a6b77a26b2a02539937a7" dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-cast 58.0.0", + "arrow-schema 58.0.0", "chrono", "csv", "csv-core", @@ -258,8 +323,21 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", + "half", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-data" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "189d210bc4244c715fa3ed9e6e22864673cccb73d5da28c2723fb2e527329b33" +dependencies = [ + "arrow-buffer 58.0.0", + "arrow-schema 58.0.0", "half", "num-integer", "num-traits", @@ -271,26 +349,40 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "flatbuffers", +] + +[[package]] +name = "arrow-ipc" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7968c2e5210c41f4909b2ef76f6e05e172b99021c2def5edf3cc48fdd39d1d6c" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "flatbuffers", "lz4_flex", ] [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "92111dba5bf900f443488e01f00d8c4ddc2f47f5c50039d18120287b580baa22" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-cast 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "chrono", "half", "indexmap 2.13.0", @@ -310,23 +402,36 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", +] + +[[package]] +name = "arrow-ord" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "211136cb253577ee1a6665f741a13136d4e563f64f5093ffd6fb837af90b9495" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", ] [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "8e0f20145f9f5ea3fe383e2ba7a7487bf19be36aa9dbf5dd6a1f92f657179663" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "half", ] @@ -335,6 +440,12 @@ name = "arrow-schema" version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" + +[[package]] +name = "arrow-schema" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b47e0ca91cc438d2c7879fe95e0bca5329fff28649e30a88c6f760b1faeddcb" dependencies = [ "bitflags 2.11.0", "serde_core", @@ -348,10 +459,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "num-traits", +] + +[[package]] +name = "arrow-select" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "750a7d1dda177735f5e82a314485b6915c7cccdbb278262ac44090f4aba4a325" +dependencies = [ + "ahash", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "num-traits", ] @@ -361,11 +486,28 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "memchr", + "num-traits", + "regex", + "regex-syntax", +] + +[[package]] +name = "arrow-string" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1eab1208bc4fe55d768cdc9b9f3d9df5a794cdb3ee2586bf89f9b30dc31ad8c" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "memchr", "num-traits", "regex", @@ -589,9 +731,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.16.0" +version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9a7b350e3bb1767102698302bc37256cbd48422809984b98d292c40e2579aa9" +checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" dependencies = [ "aws-lc-sys", "zeroize", @@ -599,9 +741,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.37.1" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549" +checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" dependencies = [ "cc", "cmake", @@ -1682,12 +1824,11 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503f1f4a9060ae6e650d3dff5dc7a21266fea1302d890768d45b4b28586e830f" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", - "arrow-schema", + "arrow-schema 58.0.0", "async-trait", "bytes", "chrono", @@ -1719,9 +1860,9 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", "parking_lot", - "parquet", + "parquet 58.0.0", "rand 0.9.2", "regex", "sqlparser", @@ -1733,9 +1874,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14417a3ee4ae3d092b56cd6c1d32e8ff3e2c9ec130ecb2276ec91c89fd599399" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "async-trait", @@ -1751,16 +1891,15 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", "parking_lot", "tokio", ] [[package]] name = "datafusion-catalog-listing" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0eba824adb45a4b3ac6f0251d40df3f6a9382371cad136f4f14ac9ebc6bc10" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "async-trait", @@ -1776,7 +1915,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", ] [[package]] @@ -1812,12 +1951,12 @@ dependencies = [ "lz4_flex", "mimalloc", "num", - "object_store", + "object_store 0.13.1", "object_store_opendal", "once_cell", "opendal", "parking_lot", - "parquet", + "parquet 58.0.0", "paste", "pprof", "procfs", @@ -1863,7 +2002,7 @@ dependencies = [ "datafusion-comet-fs-hdfs3", "fs-hdfs3", "futures", - "object_store", + "object_store 0.13.1", "tokio", ] @@ -1898,22 +2037,22 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0039deefbd00c56adf5168b7ca58568fb058e4ba4c5a03b09f8be371b4e434b6" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "ahash", "arrow", - "arrow-ipc", + "arrow-ipc 58.0.0", "chrono", "half", "hashbrown 0.16.1", "hex", "indexmap 2.13.0", + "itertools 0.14.0", "libc", "log", - "object_store", - "parquet", + "object_store 0.13.1", + "parquet 58.0.0", "paste", "sqlparser", "tokio", @@ -1922,9 +2061,8 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec7e3e60b813048331f8fb9673583173e5d2dd8fef862834ee871fc98b57ca7" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "futures", "log", @@ -1933,9 +2071,8 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "802068957f620302ecf05f84ff4019601aeafd36f5f3f1334984af2e34265129" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "async-compression", @@ -1958,7 +2095,7 @@ dependencies = [ "itertools 0.14.0", "liblzma", "log", - "object_store", + "object_store 0.13.1", "rand 0.9.2", "tokio", "tokio-util", @@ -1968,12 +2105,11 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fc387d5067c62d494a6647d29c5ad4fcdd5a6e50ab4ea1d2568caa2d66f2cc" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", - "arrow-ipc", + "arrow-ipc 58.0.0", "async-trait", "bytes", "datafusion-common", @@ -1986,15 +2122,14 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.1", "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd5e20579bb6c8bd4e6c620253972fb723822030c280dd6aa047f660d09eeba" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "async-trait", @@ -2008,16 +2143,15 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.1", "regex", "tokio", ] [[package]] name = "datafusion-datasource-json" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0788b0d48fcef31880a02013ea3cc18e5a4e0eacc3b0abdd2cd0597b99dc96e" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "async-trait", @@ -2031,15 +2165,16 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.1", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-datasource-parquet" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66639b70f1f363f5f0950733170100e588f1acfacac90c1894e231194aa35957" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "async-trait", @@ -2059,35 +2194,35 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", "parking_lot", - "parquet", + "parquet 58.0.0", "tokio", ] [[package]] name = "datafusion-doc" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e44b41f3e8267c6cf3eec982d63f34db9f1dd5f30abfd2e1f124f0871708952e" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" [[package]] name = "datafusion-execution" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e456f60e5d38db45335e84617006d90af14a8c8c5b8e959add708b2daaa0e2c" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", + "arrow-buffer 58.0.0", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", - "object_store", + "object_store 0.13.1", "parking_lot", - "parquet", + "parquet 58.0.0", "rand 0.9.2", "tempfile", "url", @@ -2095,9 +2230,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6507c719804265a58043134580c1c20767e7c23ba450724393f03ec982769ad9" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "async-trait", @@ -2117,9 +2251,8 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a413caa9c5885072b539337aed68488f0291653e8edd7d676c92df2480f6cab0" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "datafusion-common", @@ -2130,12 +2263,11 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "189256495dc9cbbb8e20dbcf161f60422e628d201a78df8207e44bd4baefadb6" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", - "arrow-buffer", + "arrow-buffer 58.0.0", "base64", "blake2", "blake3", @@ -2151,6 +2283,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.2", "regex", @@ -2161,9 +2294,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12e73dfee4cd67c4a507ffff4c5a711d39983adf544adbc09c09bf06f789f413" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "ahash", "arrow", @@ -2177,14 +2309,14 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87727bd9e65f4f9ac6d608c9810b7da9eaa3b18b26a4a4b76520592d49020acf" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "ahash", "arrow", @@ -2195,12 +2327,11 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5ef761359224b7c2b5a1bfad6296ac63225f8583d08ad18af9ba1a89ac3887" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", - "arrow-ord", + "arrow-ord 58.0.0", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2211,16 +2342,17 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b17dac25dfda2d2a90ff0ad1c054a11fb1523766226bec6e9bd8c410daee2ae" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "async-trait", @@ -2234,9 +2366,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c594a29ddb22cbdbce500e4d99b5b2392c5cecb4c1086298b41d1ffec14dbb77" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "datafusion-common", @@ -2252,9 +2383,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aa1b15ed81c7543f62264a30dd49dec4b1b0b698053b968f53be32dfba4f729" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2262,9 +2392,8 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00c31c4795597aa25b74cab5174ac07a53051f27ce1e011ecaffa9eaeecef81" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "datafusion-doc", "quote", @@ -2273,9 +2402,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80ccf60767c09302b2e0fc3afebb3761a6d508d07316fab8c5e93312728a21bb" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "chrono", @@ -2292,9 +2420,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64b7f277556944e4edd3558da01d9e9ff9f5416f1c0aa7fee088e57bd141a7e" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "ahash", "arrow", @@ -2315,9 +2442,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7abaee372ea2d19c016ee9ef8629c4415257d291cdd152bc7f0b75f28af1b63" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "datafusion-common", @@ -2330,9 +2456,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42237efe621f92adc22d111b531fdbc2cc38ca9b5e02327535628fb103ae2157" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "ahash", "arrow", @@ -2347,9 +2472,8 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd093498bd1319c6e5c76e9dfa905e78486f01b34579ce97f2e3a49f84c37fac" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "datafusion-common", @@ -2365,14 +2489,13 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cbe61b12daf81a9f20ba03bd3541165d51f86e004ef37426b11881330eed261" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "ahash", "arrow", - "arrow-ord", - "arrow-schema", + "arrow-ord 58.0.0", + "arrow-schema 58.0.0", "async-trait", "datafusion-common", "datafusion-common-runtime", @@ -2389,6 +2512,7 @@ dependencies = [ "indexmap 2.13.0", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -2396,9 +2520,8 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0124331116db7f79df92ebfd2c3b11a8f90240f253555c9bb084f10b6fecf1dd" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "datafusion-common", @@ -2413,9 +2536,8 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1673e3c58ba618a6ea0568672f00664087b8982c581e9afd5aa6c3c79c9b431f" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "async-trait", "datafusion-common", @@ -2427,38 +2549,41 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d28510abfc85709578fcf9065325d43ee3303012c0ccec2dce351bdc577d00" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "bigdecimal", "chrono", "crc32fast", + "datafusion", "datafusion-catalog", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-functions", + "datafusion-functions-aggregate", "datafusion-functions-nested", "log", "percent-encoding", "rand 0.9.2", + "serde_json", "sha1", + "sha2", "url", ] [[package]] name = "datafusion-sql" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5272d256dab5347bb39d2040589f45d8c6b715b27edcb5fffe88cc8b9c3909cb" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions-nested", "indexmap 2.13.0", "log", "regex", @@ -2926,20 +3051,20 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 6.0.0", "rand_core 0.10.0", "wasip2", "wasip3", @@ -3254,14 +3379,14 @@ dependencies = [ "anyhow", "apache-avro", "array-init", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-ord", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "arrow-string 57.3.0", "as-any", "async-trait", "backon", @@ -3281,7 +3406,7 @@ dependencies = [ "once_cell", "opendal", "ordered-float 4.6.0", - "parquet", + "parquet 57.3.0", "rand 0.8.5", "reqsign", "reqwest", @@ -3473,9 +3598,9 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" @@ -3533,9 +3658,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.22" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819b44bc7c87d9117eb522f14d46e918add69ff12713c475946b0a29363ed1c2" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -3548,9 +3673,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.22" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "470252db18ecc35fd766c0891b1e3ec6cbbcd62507e85276c01bf75d8e94d4a1" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", @@ -3559,9 +3684,9 @@ dependencies = [ [[package]] name = "jiff-tzdb" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" [[package]] name = "jiff-tzdb-platform" @@ -4080,6 +4205,30 @@ name = "object_store" version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "http 1.4.0", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror 2.0.18", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + +[[package]] +name = "object_store" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2858065e55c148d294a9f3aae3b0fa9458edadb41a108397094566f4e3c0dfb" dependencies = [ "async-trait", "base64", @@ -4100,7 +4249,7 @@ dependencies = [ "rand 0.9.2", "reqwest", "ring", - "rustls-pemfile", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", @@ -4123,7 +4272,7 @@ dependencies = [ "bytes", "chrono", "futures", - "object_store", + "object_store 0.12.5", "opendal", "pin-project", "tokio", @@ -4247,13 +4396,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-ipc 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "base64", "brotli", "bytes", @@ -4266,7 +4415,42 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", - "object_store", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", +] + +[[package]] +name = "parquet" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f491d0ef1b510194426ee67ddc18a9b747ef3c42050c19322a2cd2e1666c29b" +dependencies = [ + "ahash", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-ipc 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.16.1", + "lz4_flex", + "num-bigint", + "num-integer", + "num-traits", + "object_store 0.13.1", "parquet-variant", "parquet-variant-compute", "parquet-variant-json", @@ -4283,11 +4467,11 @@ dependencies = [ [[package]] name = "parquet-variant" -version = "57.3.0" +version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6c31f8f9bfefb9dbf67b0807e00fd918676954a7477c889be971ac904103184" +checksum = "00ba4e5dcbc8ad65882b7337a95c12a0f9cbb6add237c53d93b803b7d7f70f02" dependencies = [ - "arrow-schema", + "arrow-schema 58.0.0", "chrono", "half", "indexmap 2.13.0", @@ -4297,27 +4481,28 @@ dependencies = [ [[package]] name = "parquet-variant-compute" -version = "57.3.0" +version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "196cd9f7178fed3ac8d5e6d2b51193818e896bbc3640aea3fde3440114a8f39c" +checksum = "9ec4cfb8da15565c8d211b6bc51e8eb481ea65d19132462af3f948b150ac8efe" dependencies = [ "arrow", - "arrow-schema", + "arrow-schema 58.0.0", "chrono", "half", "indexmap 2.13.0", "parquet-variant", "parquet-variant-json", + "serde_json", "uuid", ] [[package]] name = "parquet-variant-json" -version = "57.3.0" +version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed23d7acc90ef60f7fdbcc473fa2fdaefa33542ed15b84388959346d52c839be" +checksum = "3668ff00a6aeb29d172ba15f9d8fedf1675d79bff7d1916daa333efdeaa13e46" dependencies = [ - "arrow-schema", + "arrow-schema 58.0.0", "base64", "chrono", "parquet-variant", @@ -4711,9 +4896,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.44" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -4724,6 +4909,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.8.5" @@ -4752,7 +4943,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" dependencies = [ "chacha20", - "getrandom 0.4.1", + "getrandom 0.4.2", "rand_core 0.10.0", ] @@ -5079,15 +5270,6 @@ dependencies = [ "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -5439,9 +5621,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "sqlparser_derive", @@ -5449,9 +5631,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", @@ -5581,7 +5763,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" dependencies = [ "fastrand", - "getrandom 0.4.1", + "getrandom 0.4.2", "once_cell", "rustix 1.1.4", "windows-sys 0.61.2", @@ -5756,9 +5938,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.49.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", @@ -5792,6 +5974,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", + "tokio-util", +] + [[package]] name = "tokio-util" version = "0.7.18" @@ -6036,7 +6230,7 @@ version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.4.1", + "getrandom 0.4.2", "js-sys", "serde_core", "wasm-bindgen", diff --git a/native/Cargo.toml b/native/Cargo.toml index d5a6aeabc9..4067956722 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -34,14 +34,14 @@ edition = "2021" rust-version = "1.88" [workspace.dependencies] -arrow = { version = "57.3.0", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow = { version = "58.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } async-trait = { version = "0.1" } bytes = { version = "1.11.1" } -parquet = { version = "57.3.0", default-features = false, features = ["experimental"] } -datafusion = { version = "52.2.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } -datafusion-datasource = { version = "52.2.0" } -datafusion-physical-expr-adapter = { version = "52.2.0" } -datafusion-spark = { version = "52.2.0" } +parquet = { version = "58.0.0", default-features = false, features = ["experimental"] } +datafusion = { git = "https://github.com/apache/datafusion", branch = "branch-53", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } +datafusion-datasource = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-physical-expr-adapter = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-spark = { git = "https://github.com/apache/datafusion", branch = "branch-53", features = ["core"] } datafusion-comet-spark-expr = { path = "spark-expr" } datafusion-comet-proto = { path = "proto" } chrono = { version = "0.4", default-features = false, features = ["clock"] } @@ -51,7 +51,7 @@ num = "0.4" rand = "0.10" regex = "1.12.3" thiserror = "2" -object_store = { version = "0.12.3", features = ["gcp", "azure", "aws", "http"] } +object_store = { version = "0.13.1", features = ["gcp", "azure", "aws", "http"] } url = "2.2" aws-config = "1.8.14" aws-credential-types = "1.2.13" diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index cbe397b12b..23a78aa3ee 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -96,7 +96,7 @@ jni = { version = "0.21", features = ["invocation"] } lazy_static = "1.4" assertables = "9" hex = "0.4.3" -datafusion-functions-nested = { version = "52.2.0" } +datafusion-functions-nested = { git = "https://github.com/apache/datafusion", branch = "branch-53" } [features] backtrace = ["datafusion/backtrace"] diff --git a/native/core/src/execution/operators/expand.rs b/native/core/src/execution/operators/expand.rs index 19ca204592..e06fab23ec 100644 --- a/native/core/src/execution/operators/expand.rs +++ b/native/core/src/execution/operators/expand.rs @@ -42,7 +42,7 @@ pub struct ExpandExec { projections: Vec>>, child: Arc, schema: SchemaRef, - cache: PlanProperties, + cache: Arc, } impl ExpandExec { @@ -52,12 +52,12 @@ impl ExpandExec { child: Arc, schema: SchemaRef, ) -> Self { - let cache = PlanProperties::new( + let cache = Arc::new(PlanProperties::new( EquivalenceProperties::new(Arc::clone(&schema)), Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Self { projections, @@ -129,7 +129,7 @@ impl ExecutionPlan for ExpandExec { Ok(Box::pin(expand_stream)) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.cache } diff --git a/native/core/src/execution/operators/iceberg_scan.rs b/native/core/src/execution/operators/iceberg_scan.rs index 720a4c09a4..8409545763 100644 --- a/native/core/src/execution/operators/iceberg_scan.rs +++ b/native/core/src/execution/operators/iceberg_scan.rs @@ -57,7 +57,7 @@ pub struct IcebergScanExec { /// Output schema after projection output_schema: SchemaRef, /// Cached execution plan properties - plan_properties: PlanProperties, + plan_properties: Arc, /// Catalog-specific configuration for FileIO catalog_properties: HashMap, /// Pre-planned file scan tasks @@ -92,13 +92,13 @@ impl IcebergScanExec { }) } - fn compute_properties(schema: SchemaRef, num_partitions: usize) -> PlanProperties { - PlanProperties::new( + fn compute_properties(schema: SchemaRef, num_partitions: usize) -> Arc { + Arc::new(PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(num_partitions), EmissionType::Incremental, Boundedness::Bounded, - ) + )) } } @@ -115,7 +115,7 @@ impl ExecutionPlan for IcebergScanExec { Arc::clone(&self.output_schema) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.plan_properties } @@ -269,7 +269,7 @@ where _ => { let adapter = self .adapter_factory - .create(Arc::clone(&self.schema), Arc::clone(&file_schema)); + .create(Arc::clone(&self.schema), Arc::clone(&file_schema))?; let exprs = build_projection_expressions(&self.schema, &adapter).map_err(|e| { DataFusionError::Execution(format!( diff --git a/native/core/src/execution/operators/parquet_writer.rs b/native/core/src/execution/operators/parquet_writer.rs index 4a53ff51b8..7b53fbc4bc 100644 --- a/native/core/src/execution/operators/parquet_writer.rs +++ b/native/core/src/execution/operators/parquet_writer.rs @@ -208,7 +208,7 @@ pub struct ParquetWriterExec { /// Metrics metrics: ExecutionPlanMetricsSet, /// Cache for plan properties - cache: PlanProperties, + cache: Arc, } impl ParquetWriterExec { @@ -228,12 +228,12 @@ impl ParquetWriterExec { // Preserve the input's partitioning so each partition writes its own file let input_partitioning = input.output_partitioning().clone(); - let cache = PlanProperties::new( + let cache = Arc::new(PlanProperties::new( EquivalenceProperties::new(Arc::clone(&input.schema())), input_partitioning, EmissionType::Final, Boundedness::Bounded, - ); + )); Ok(ParquetWriterExec { input, @@ -405,11 +405,7 @@ impl ExecutionPlan for ParquetWriterExec { Some(self.metrics.clone_inner()) } - fn statistics(&self) -> Result { - self.input.partition_statistics(None) - } - - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.cache } diff --git a/native/core/src/execution/operators/scan.rs b/native/core/src/execution/operators/scan.rs index 2543705fb0..dbebbe25be 100644 --- a/native/core/src/execution/operators/scan.rs +++ b/native/core/src/execution/operators/scan.rs @@ -72,7 +72,7 @@ pub struct ScanExec { /// It is also used in unit test to mock the input data from JVM. pub batch: Arc>>, /// Cache of expensive-to-compute plan properties - cache: PlanProperties, + cache: Arc, /// Metrics collector metrics: ExecutionPlanMetricsSet, /// Baseline metrics @@ -95,14 +95,14 @@ impl ScanExec { // Build schema directly from data types since get_next now always unpacks dictionaries let schema = schema_from_data_types(&data_types); - let cache = PlanProperties::new( + let cache = Arc::new(PlanProperties::new( EquivalenceProperties::new(Arc::clone(&schema)), // The partitioning is not important because we are not using DataFusion's // query planner or optimizer Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Ok(Self { exec_context_id, @@ -417,7 +417,7 @@ impl ExecutionPlan for ScanExec { ))) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.cache } diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 094777e796..3eba167e41 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -1577,6 +1577,7 @@ impl PhysicalPlanner { // null doesn't equal to null in Spark join key. If the join key is // `EqualNullSafe`, Spark will rewrite it during planning. NullEquality::NullEqualsNothing, + false, )?); // If the hash join is build right, we need to swap the left and right diff --git a/native/core/src/execution/shuffle/shuffle_writer.rs b/native/core/src/execution/shuffle/shuffle_writer.rs index fe1bf0fccf..8327f04654 100644 --- a/native/core/src/execution/shuffle/shuffle_writer.rs +++ b/native/core/src/execution/shuffle/shuffle_writer.rs @@ -62,7 +62,7 @@ pub struct ShuffleWriterExec { /// Metrics metrics: ExecutionPlanMetricsSet, /// Cache for expensive-to-compute plan properties - cache: PlanProperties, + cache: Arc, /// The compression codec to use when compressing shuffle blocks codec: CompressionCodec, tracing_enabled: bool, @@ -82,12 +82,12 @@ impl ShuffleWriterExec { tracing_enabled: bool, write_buffer_size: usize, ) -> Result { - let cache = PlanProperties::new( + let cache = Arc::new(PlanProperties::new( EquivalenceProperties::new(Arc::clone(&input.schema())), Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Ok(ShuffleWriterExec { input, @@ -133,11 +133,7 @@ impl ExecutionPlan for ShuffleWriterExec { Some(self.metrics.clone_inner()) } - fn statistics(&self) -> Result { - self.input.partition_statistics(None) - } - - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.cache } diff --git a/native/core/src/parquet/encryption_support.rs b/native/core/src/parquet/encryption_support.rs index 4540c217d5..f62c04b854 100644 --- a/native/core/src/parquet/encryption_support.rs +++ b/native/core/src/parquet/encryption_support.rs @@ -19,7 +19,7 @@ use crate::execution::operators::ExecutionError; use crate::jvm_bridge::{check_exception, JVMClasses}; use arrow::datatypes::SchemaRef; use async_trait::async_trait; -use datafusion::common::extensions_options; +use datafusion::common::{extensions_options, Result as DataFusionResult}; use datafusion::config::EncryptionFactoryOptions; use datafusion::error::DataFusionError; use datafusion::execution::parquet_encryption::EncryptionFactory; @@ -54,7 +54,7 @@ impl EncryptionFactory for CometEncryptionFactory { _options: &EncryptionFactoryOptions, _schema: &SchemaRef, _file_path: &Path, - ) -> Result>, DataFusionError> { + ) -> DataFusionResult>> { Err(DataFusionError::NotImplemented( "Comet does not support Parquet encryption yet." .parse() @@ -69,7 +69,7 @@ impl EncryptionFactory for CometEncryptionFactory { &self, options: &EncryptionFactoryOptions, file_path: &Path, - ) -> Result>, DataFusionError> { + ) -> DataFusionResult>> { let config: CometEncryptionConfig = options.to_extension_options()?; let full_path: String = config.uri_base + file_path.as_ref(); diff --git a/native/core/src/parquet/parquet_support.rs b/native/core/src/parquet/parquet_support.rs index e7ff5630f1..e1c4a1ec7c 100644 --- a/native/core/src/parquet/parquet_support.rs +++ b/native/core/src/parquet/parquet_support.rs @@ -477,7 +477,7 @@ pub(crate) fn prepare_object_store_with_configs( .map_err(|e| ExecutionError::GeneralError(e.to_string()))?; let object_store_url = ObjectStoreUrl::parse(url_key.clone())?; - runtime_env.register_object_store(&url, Arc::from(object_store)); + runtime_env.register_object_store(&url, Arc::from(object_store) as Arc); Ok((object_store_url, object_store_path)) } diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index 42f0e7fc61..e8df2c0e37 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -100,7 +100,7 @@ impl PhysicalExprAdapterFactory for SparkPhysicalExprAdapterFactory { &self, logical_file_schema: SchemaRef, physical_file_schema: SchemaRef, - ) -> Arc { + ) -> DataFusionResult> { // When case-insensitive, remap physical schema field names to match logical // field names. The DefaultPhysicalExprAdapter uses exact name matching, so // without this remapping, columns like "a" won't match logical "A" and will @@ -145,16 +145,16 @@ impl PhysicalExprAdapterFactory for SparkPhysicalExprAdapterFactory { let default_adapter = default_factory.create( Arc::clone(&logical_file_schema), Arc::clone(&adapted_physical_schema), - ); + )?; - Arc::new(SparkPhysicalExprAdapter { + Ok(Arc::new(SparkPhysicalExprAdapter { logical_file_schema, physical_file_schema: adapted_physical_schema, parquet_options: self.parquet_options.clone(), default_values: self.default_values.clone(), default_adapter, logical_to_physical_names, - }) + })) } } From 0edf710c24e417cc6f5ea2417702917c91408ded Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 4 Mar 2026 06:49:41 -0500 Subject: [PATCH 02/30] add iceberg and opendal as features --- native/core/Cargo.toml | 5 ++-- native/core/src/execution/operators/mod.rs | 2 ++ .../src/execution/operators/parquet_writer.rs | 23 ++++++++++++++----- native/core/src/execution/planner.rs | 19 +++++++++++++++ .../src/execution/shuffle/shuffle_writer.rs | 1 - 5 files changed, 41 insertions(+), 9 deletions(-) diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index 23a78aa3ee..c44d0d65c1 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -79,7 +79,7 @@ reqwest = { version = "0.12", default-features = false, features = ["rustls-tls- object_store_opendal = {version = "0.55.0", optional = true} hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]} opendal = { version ="0.55.0", optional = true, features = ["services-hdfs"] } -iceberg = { workspace = true } +iceberg = { workspace = true, optional = true } serde_json = "1.0" uuid = "1.21.0" @@ -100,7 +100,8 @@ datafusion-functions-nested = { git = "https://github.com/apache/datafusion", br [features] backtrace = ["datafusion/backtrace"] -default = ["hdfs-opendal"] +default = [] +iceberg = ["dep:iceberg"] hdfs = ["datafusion-comet-objectstore-hdfs"] hdfs-opendal = ["opendal", "object_store_opendal", "hdfs-sys"] jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl"] diff --git a/native/core/src/execution/operators/mod.rs b/native/core/src/execution/operators/mod.rs index 07ee995367..7d3b4dabc6 100644 --- a/native/core/src/execution/operators/mod.rs +++ b/native/core/src/execution/operators/mod.rs @@ -22,12 +22,14 @@ use std::fmt::Debug; use jni::objects::GlobalRef; pub use copy::*; +#[cfg(feature = "iceberg")] pub use iceberg_scan::*; pub use scan::*; mod copy; mod expand; pub use expand::ExpandExec; +#[cfg(feature = "iceberg")] mod iceberg_scan; mod parquet_writer; pub use parquet_writer::ParquetWriterExec; diff --git a/native/core/src/execution/operators/parquet_writer.rs b/native/core/src/execution/operators/parquet_writer.rs index 7b53fbc4bc..132ebf7be9 100644 --- a/native/core/src/execution/operators/parquet_writer.rs +++ b/native/core/src/execution/operators/parquet_writer.rs @@ -23,16 +23,18 @@ use std::{ fmt, fmt::{Debug, Formatter}, fs::File, - io::Cursor, sync::Arc, }; +#[cfg(feature = "hdfs-opendal")] +use std::io::Cursor; +#[cfg(feature = "hdfs-opendal")] use opendal::Operator; use crate::execution::shuffle::CompressionCodec; -use crate::parquet::parquet_support::{ - create_hdfs_operator, is_hdfs_scheme, prepare_object_store_with_configs, -}; +use crate::parquet::parquet_support::is_hdfs_scheme; +#[cfg(feature = "hdfs-opendal")] +use crate::parquet::parquet_support::{create_hdfs_operator, prepare_object_store_with_configs}; use arrow::datatypes::{Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use async_trait::async_trait; @@ -45,7 +47,7 @@ use datafusion::{ metrics::{ExecutionPlanMetricsSet, MetricsSet}, stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties, - SendableRecordBatchStream, Statistics, + SendableRecordBatchStream, }, }; use futures::TryStreamExt; @@ -64,6 +66,7 @@ enum ParquetWriter { /// Contains the arrow writer, HDFS operator, and destination path /// an Arrow writer writes to in-memory buffer the data converted to Parquet format /// The opendal::Writer is created lazily on first write + #[cfg(feature = "hdfs-opendal")] Remote( ArrowWriter>>, Option, @@ -80,6 +83,7 @@ impl ParquetWriter { ) -> std::result::Result<(), parquet::errors::ParquetError> { match self { ParquetWriter::LocalFile(writer) => writer.write(batch), + #[cfg(feature = "hdfs-opendal")] ParquetWriter::Remote( arrow_parquet_buffer_writer, hdfs_writer_opt, @@ -134,6 +138,7 @@ impl ParquetWriter { writer.close()?; Ok(()) } + #[cfg(feature = "hdfs-opendal")] ParquetWriter::Remote( arrow_parquet_buffer_writer, mut hdfs_writer_opt, @@ -284,7 +289,7 @@ impl ParquetWriterExec { })?; if is_hdfs_scheme(&url, object_store_options) { - // HDFS storage + #[cfg(feature = "hdfs-opendal")] { // Use prepare_object_store_with_configs to create and register the object store let (_object_store_url, object_store_path) = prepare_object_store_with_configs( @@ -324,6 +329,12 @@ impl ParquetWriterExec { object_store_path.to_string(), )) } + #[cfg(not(feature = "hdfs-opendal"))] + { + Err(DataFusionError::Execution( + "HDFS support is not enabled. Rebuild with the 'hdfs-opendal' feature.".into(), + )) + } } else if output_file_path.starts_with("file://") || output_file_path.starts_with("file:") || !output_file_path.contains("://") diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 3eba167e41..311c3b14e8 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -22,6 +22,7 @@ pub mod macros; pub mod operator_registry; use crate::execution::operators::init_csv_datasource_exec; +#[cfg(feature = "iceberg")] use crate::execution::operators::IcebergScanExec; use crate::{ errors::ExpressionError, @@ -73,6 +74,7 @@ use datafusion_comet_spark_expr::{ create_comet_physical_fun, create_comet_physical_fun_with_eval_mode, BinaryOutputStyle, BloomFilterAgg, BloomFilterMightContain, CsvWriteOptions, EvalMode, SumInteger, ToCsv, }; +#[cfg(feature = "iceberg")] use iceberg::expr::Bind; use crate::execution::operators::ExecutionError::GeneralError; @@ -1192,6 +1194,7 @@ impl PhysicalPlanner { Arc::new(SparkPlan::new(spark_plan.plan_id, Arc::new(scan), vec![])), )) } + #[cfg(feature = "iceberg")] OpStruct::IcebergScan(scan) => { // Extract common data and single partition's file tasks // Per-partition injection happens in Scala before sending to native @@ -1228,6 +1231,10 @@ impl PhysicalPlanner { )), )) } + #[cfg(not(feature = "iceberg"))] + OpStruct::IcebergScan(_) => { + Err(GeneralError("Iceberg support is not enabled. Rebuild with the 'iceberg' feature.".into()).into()) + } OpStruct::ShuffleWriter(writer) => { assert_eq!(children.len(), 1); let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; @@ -1577,6 +1584,11 @@ impl PhysicalPlanner { // null doesn't equal to null in Spark join key. If the join key is // `EqualNullSafe`, Spark will rewrite it during planning. NullEquality::NullEqualsNothing, + // null_aware is for null-aware anti joins (NOT IN subqueries). + // NullEquality controls whether NULL = NULL in join keys generally, + // while null_aware changes anti-join semantics so any NULL changes + // the entire result. Spark doesn't use this path (it rewrites + // EqualNullSafe at plan time), so false is correct. false, )?); @@ -2700,6 +2712,7 @@ fn convert_spark_types_to_arrow_schema( arrow_schema } +#[cfg(feature = "iceberg")] /// Converts a protobuf PartitionValue to an iceberg Literal. /// fn partition_value_to_literal( @@ -2785,6 +2798,7 @@ fn partition_value_to_literal( /// Uses the existing Struct::from_iter() API from iceberg-rust to construct the struct /// from the list of partition values. /// This can potentially be upstreamed to iceberg_rust +#[cfg(feature = "iceberg")] fn partition_data_to_struct( proto_partition: &spark_operator::PartitionData, ) -> Result { @@ -2804,6 +2818,7 @@ fn partition_data_to_struct( /// /// This function uses deduplication pools from the IcebergScanCommon to avoid redundant /// parsing of schemas, partition specs, partition types, name mappings, and other repeated data. +#[cfg(feature = "iceberg")] fn parse_file_scan_tasks_from_common( proto_common: &spark_operator::IcebergScanCommon, proto_tasks: &[spark_operator::IcebergFileScanTask], @@ -3252,6 +3267,7 @@ fn literal_to_array_ref( // always returns MIGHT_MATCH (never prunes row groups). These are handled by CometFilter post-scan. /// Converts a protobuf Spark expression to an Iceberg predicate for row-group filtering. +#[cfg(feature = "iceberg")] fn convert_spark_expr_to_predicate( expr: &spark_expression::Expr, ) -> Option { @@ -3383,6 +3399,7 @@ fn convert_spark_expr_to_predicate( } } +#[cfg(feature = "iceberg")] fn convert_binary_to_predicate( left: &Option>, right: &Option>, @@ -3431,6 +3448,7 @@ fn convert_binary_to_predicate( None } +#[cfg(feature = "iceberg")] fn extract_column_reference(expr: &spark_expression::Expr) -> Option { use spark_expression::expr::ExprStruct; @@ -3440,6 +3458,7 @@ fn extract_column_reference(expr: &spark_expression::Expr) -> Option { } } +#[cfg(feature = "iceberg")] fn extract_literal_as_datum(expr: &spark_expression::Expr) -> Option { use spark_expression::expr::ExprStruct; diff --git a/native/core/src/execution/shuffle/shuffle_writer.rs b/native/core/src/execution/shuffle/shuffle_writer.rs index 8327f04654..1b9433993d 100644 --- a/native/core/src/execution/shuffle/shuffle_writer.rs +++ b/native/core/src/execution/shuffle/shuffle_writer.rs @@ -36,7 +36,6 @@ use datafusion::{ metrics::{ExecutionPlanMetricsSet, MetricsSet}, stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, SendableRecordBatchStream, - Statistics, }, }; use futures::{StreamExt, TryFutureExt, TryStreamExt}; From c96422ea292a1b40f6ce418351ccffd9467b73b7 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 4 Mar 2026 06:50:09 -0500 Subject: [PATCH 03/30] cargo fmt --- native/core/src/execution/operators/parquet_writer.rs | 4 ++-- native/core/src/execution/planner.rs | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/native/core/src/execution/operators/parquet_writer.rs b/native/core/src/execution/operators/parquet_writer.rs index 132ebf7be9..820d8b0481 100644 --- a/native/core/src/execution/operators/parquet_writer.rs +++ b/native/core/src/execution/operators/parquet_writer.rs @@ -26,10 +26,10 @@ use std::{ sync::Arc, }; -#[cfg(feature = "hdfs-opendal")] -use std::io::Cursor; #[cfg(feature = "hdfs-opendal")] use opendal::Operator; +#[cfg(feature = "hdfs-opendal")] +use std::io::Cursor; use crate::execution::shuffle::CompressionCodec; use crate::parquet::parquet_support::is_hdfs_scheme; diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 311c3b14e8..99fee145ba 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -1232,9 +1232,10 @@ impl PhysicalPlanner { )) } #[cfg(not(feature = "iceberg"))] - OpStruct::IcebergScan(_) => { - Err(GeneralError("Iceberg support is not enabled. Rebuild with the 'iceberg' feature.".into()).into()) - } + OpStruct::IcebergScan(_) => Err(GeneralError( + "Iceberg support is not enabled. Rebuild with the 'iceberg' feature.".into(), + ) + .into()), OpStruct::ShuffleWriter(writer) => { assert_eq!(children.len(), 1); let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; From eca9b33417a11c9f93c21a9332ddb8d434ef3504 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 4 Mar 2026 06:51:20 -0500 Subject: [PATCH 04/30] clippy fixes --- native/core/src/execution/planner.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 99fee145ba..b58cde0939 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -1234,8 +1234,7 @@ impl PhysicalPlanner { #[cfg(not(feature = "iceberg"))] OpStruct::IcebergScan(_) => Err(GeneralError( "Iceberg support is not enabled. Rebuild with the 'iceberg' feature.".into(), - ) - .into()), + )), OpStruct::ShuffleWriter(writer) => { assert_eq!(children.len(), 1); let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; From b5ff6411cc6683d450b598b6227609afed00417c Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 4 Mar 2026 07:33:18 -0500 Subject: [PATCH 05/30] bump to iceberg-rust df53 branch, clippy fixes --- native/Cargo.lock | 354 +++++------------- native/Cargo.toml | 2 +- native/core/Cargo.toml | 3 +- native/core/src/execution/operators/mod.rs | 2 - .../src/execution/operators/parquet_writer.rs | 4 +- native/core/src/execution/planner.rs | 62 +-- 6 files changed, 103 insertions(+), 324 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 0d40eb7da3..a66099fc88 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -155,33 +155,19 @@ version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "602268ce9f569f282cedb9a9f6bac569b680af47b9b077d515900c03c5d190da" dependencies = [ - "arrow-arith 58.0.0", - "arrow-array 58.0.0", - "arrow-buffer 58.0.0", - "arrow-cast 58.0.0", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", "arrow-csv", - "arrow-data 58.0.0", - "arrow-ipc 58.0.0", + "arrow-data", + "arrow-ipc", "arrow-json", - "arrow-ord 58.0.0", + "arrow-ord", "arrow-row", - "arrow-schema 58.0.0", - "arrow-select 58.0.0", - "arrow-string 58.0.0", -] - -[[package]] -name = "arrow-arith" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" -dependencies = [ - "arrow-array 57.3.0", - "arrow-buffer 57.3.0", - "arrow-data 57.3.0", - "arrow-schema 57.3.0", - "chrono", - "num-traits", + "arrow-schema", + "arrow-select", + "arrow-string", ] [[package]] @@ -190,32 +176,14 @@ version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd53c6bf277dea91f136ae8e3a5d7041b44b5e489e244e637d00ae302051f56f" dependencies = [ - "arrow-array 58.0.0", - "arrow-buffer 58.0.0", - "arrow-data 58.0.0", - "arrow-schema 58.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "chrono", "num-traits", ] -[[package]] -name = "arrow-array" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" -dependencies = [ - "ahash", - "arrow-buffer 57.3.0", - "arrow-data 57.3.0", - "arrow-schema 57.3.0", - "chrono", - "half", - "hashbrown 0.16.1", - "num-complex", - "num-integer", - "num-traits", -] - [[package]] name = "arrow-array" version = "58.0.0" @@ -223,9 +191,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e53796e07a6525edaf7dc28b540d477a934aff14af97967ad1d5550878969b9e" dependencies = [ "ahash", - "arrow-buffer 58.0.0", - "arrow-data 58.0.0", - "arrow-schema 58.0.0", + "arrow-buffer", + "arrow-data", + "arrow-schema", "chrono", "chrono-tz", "half", @@ -235,18 +203,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arrow-buffer" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" -dependencies = [ - "bytes", - "half", - "num-bigint", - "num-traits", -] - [[package]] name = "arrow-buffer" version = "58.0.0" @@ -259,39 +215,18 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arrow-cast" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" -dependencies = [ - "arrow-array 57.3.0", - "arrow-buffer 57.3.0", - "arrow-data 57.3.0", - "arrow-ord 57.3.0", - "arrow-schema 57.3.0", - "arrow-select 57.3.0", - "atoi", - "base64", - "chrono", - "half", - "lexical-core", - "num-traits", - "ryu", -] - [[package]] name = "arrow-cast" version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89fb245db6b0e234ed8e15b644edb8664673fefe630575e94e62cd9d489a8a26" dependencies = [ - "arrow-array 58.0.0", - "arrow-buffer 58.0.0", - "arrow-data 58.0.0", - "arrow-ord 58.0.0", - "arrow-schema 58.0.0", - "arrow-select 58.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-ord", + "arrow-schema", + "arrow-select", "atoi", "base64", "chrono", @@ -308,66 +243,39 @@ version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d374882fb465a194462527c0c15a93aa19a554cf690a6b77a26b2a02539937a7" dependencies = [ - "arrow-array 58.0.0", - "arrow-cast 58.0.0", - "arrow-schema 58.0.0", + "arrow-array", + "arrow-cast", + "arrow-schema", "chrono", "csv", "csv-core", "regex", ] -[[package]] -name = "arrow-data" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" -dependencies = [ - "arrow-buffer 57.3.0", - "arrow-schema 57.3.0", - "half", - "num-integer", - "num-traits", -] - [[package]] name = "arrow-data" version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "189d210bc4244c715fa3ed9e6e22864673cccb73d5da28c2723fb2e527329b33" dependencies = [ - "arrow-buffer 58.0.0", - "arrow-schema 58.0.0", + "arrow-buffer", + "arrow-schema", "half", "num-integer", "num-traits", ] -[[package]] -name = "arrow-ipc" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" -dependencies = [ - "arrow-array 57.3.0", - "arrow-buffer 57.3.0", - "arrow-data 57.3.0", - "arrow-schema 57.3.0", - "arrow-select 57.3.0", - "flatbuffers", -] - [[package]] name = "arrow-ipc" version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7968c2e5210c41f4909b2ef76f6e05e172b99021c2def5edf3cc48fdd39d1d6c" dependencies = [ - "arrow-array 58.0.0", - "arrow-buffer 58.0.0", - "arrow-data 58.0.0", - "arrow-schema 58.0.0", - "arrow-select 58.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", "flatbuffers", "lz4_flex", ] @@ -378,11 +286,11 @@ version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92111dba5bf900f443488e01f00d8c4ddc2f47f5c50039d18120287b580baa22" dependencies = [ - "arrow-array 58.0.0", - "arrow-buffer 58.0.0", - "arrow-cast 58.0.0", - "arrow-data 58.0.0", - "arrow-schema 58.0.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "chrono", "half", "indexmap 2.13.0", @@ -396,30 +304,17 @@ dependencies = [ "simdutf8", ] -[[package]] -name = "arrow-ord" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" -dependencies = [ - "arrow-array 57.3.0", - "arrow-buffer 57.3.0", - "arrow-data 57.3.0", - "arrow-schema 57.3.0", - "arrow-select 57.3.0", -] - [[package]] name = "arrow-ord" version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "211136cb253577ee1a6665f741a13136d4e563f64f5093ffd6fb837af90b9495" dependencies = [ - "arrow-array 58.0.0", - "arrow-buffer 58.0.0", - "arrow-data 58.0.0", - "arrow-schema 58.0.0", - "arrow-select 58.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", ] [[package]] @@ -428,19 +323,13 @@ version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e0f20145f9f5ea3fe383e2ba7a7487bf19be36aa9dbf5dd6a1f92f657179663" dependencies = [ - "arrow-array 58.0.0", - "arrow-buffer 58.0.0", - "arrow-data 58.0.0", - "arrow-schema 58.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "half", ] -[[package]] -name = "arrow-schema" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" - [[package]] name = "arrow-schema" version = "58.0.0" @@ -452,20 +341,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "arrow-select" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" -dependencies = [ - "ahash", - "arrow-array 57.3.0", - "arrow-buffer 57.3.0", - "arrow-data 57.3.0", - "arrow-schema 57.3.0", - "num-traits", -] - [[package]] name = "arrow-select" version = "58.0.0" @@ -473,41 +348,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "750a7d1dda177735f5e82a314485b6915c7cccdbb278262ac44090f4aba4a325" dependencies = [ "ahash", - "arrow-array 58.0.0", - "arrow-buffer 58.0.0", - "arrow-data 58.0.0", - "arrow-schema 58.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "num-traits", ] -[[package]] -name = "arrow-string" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" -dependencies = [ - "arrow-array 57.3.0", - "arrow-buffer 57.3.0", - "arrow-data 57.3.0", - "arrow-schema 57.3.0", - "arrow-select 57.3.0", - "memchr", - "num-traits", - "regex", - "regex-syntax", -] - [[package]] name = "arrow-string" version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1eab1208bc4fe55d768cdc9b9f3d9df5a794cdb3ee2586bf89f9b30dc31ad8c" dependencies = [ - "arrow-array 58.0.0", - "arrow-buffer 58.0.0", - "arrow-data 58.0.0", - "arrow-schema 58.0.0", - "arrow-select 58.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", "memchr", "num-traits", "regex", @@ -1828,7 +1686,7 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", - "arrow-schema 58.0.0", + "arrow-schema", "async-trait", "bytes", "chrono", @@ -1862,7 +1720,7 @@ dependencies = [ "log", "object_store 0.13.1", "parking_lot", - "parquet 58.0.0", + "parquet", "rand 0.9.2", "regex", "sqlparser", @@ -1956,7 +1814,7 @@ dependencies = [ "once_cell", "opendal", "parking_lot", - "parquet 58.0.0", + "parquet", "paste", "pprof", "procfs", @@ -2042,7 +1900,7 @@ source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e7 dependencies = [ "ahash", "arrow", - "arrow-ipc 58.0.0", + "arrow-ipc", "chrono", "half", "hashbrown 0.16.1", @@ -2052,7 +1910,7 @@ dependencies = [ "libc", "log", "object_store 0.13.1", - "parquet 58.0.0", + "parquet", "paste", "sqlparser", "tokio", @@ -2109,7 +1967,7 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", - "arrow-ipc 58.0.0", + "arrow-ipc", "async-trait", "bytes", "datafusion-common", @@ -2196,7 +2054,7 @@ dependencies = [ "log", "object_store 0.13.1", "parking_lot", - "parquet 58.0.0", + "parquet", "tokio", ] @@ -2211,7 +2069,7 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", - "arrow-buffer 58.0.0", + "arrow-buffer", "async-trait", "chrono", "dashmap", @@ -2222,7 +2080,7 @@ dependencies = [ "log", "object_store 0.13.1", "parking_lot", - "parquet 58.0.0", + "parquet", "rand 0.9.2", "tempfile", "url", @@ -2267,7 +2125,7 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", - "arrow-buffer 58.0.0", + "arrow-buffer", "base64", "blake2", "blake3", @@ -2331,7 +2189,7 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" dependencies = [ "arrow", - "arrow-ord 58.0.0", + "arrow-ord", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2494,8 +2352,8 @@ source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e7 dependencies = [ "ahash", "arrow", - "arrow-ord 58.0.0", - "arrow-schema 58.0.0", + "arrow-ord", + "arrow-schema", "async-trait", "datafusion-common", "datafusion-common-runtime", @@ -3374,19 +3232,19 @@ dependencies = [ [[package]] name = "iceberg" version = "0.8.0" -source = "git+https://github.com/apache/iceberg-rust?rev=b24ab63#b24ab6310235f71907f4b6b6dc14a8e5d9291acc" +source = "git+https://github.com/mbutrovich/iceberg-rust?branch=df53-upgrade#71bc1ab8189860d00a0805db405431d806c03a26" dependencies = [ "anyhow", "apache-avro", "array-init", - "arrow-arith 57.3.0", - "arrow-array 57.3.0", - "arrow-buffer 57.3.0", - "arrow-cast 57.3.0", - "arrow-ord 57.3.0", - "arrow-schema 57.3.0", - "arrow-select 57.3.0", - "arrow-string 57.3.0", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-ord", + "arrow-schema", + "arrow-select", + "arrow-string", "as-any", "async-trait", "backon", @@ -3406,7 +3264,7 @@ dependencies = [ "once_cell", "opendal", "ordered-float 4.6.0", - "parquet 57.3.0", + "parquet", "rand 0.8.5", "reqsign", "reqwest", @@ -4389,42 +4247,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "parquet" -version = "57.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" -dependencies = [ - "ahash", - "arrow-array 57.3.0", - "arrow-buffer 57.3.0", - "arrow-cast 57.3.0", - "arrow-data 57.3.0", - "arrow-ipc 57.3.0", - "arrow-schema 57.3.0", - "arrow-select 57.3.0", - "base64", - "brotli", - "bytes", - "chrono", - "flate2", - "futures", - "half", - "hashbrown 0.16.1", - "lz4_flex", - "num-bigint", - "num-integer", - "num-traits", - "paste", - "seq-macro", - "simdutf8", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd", -] - [[package]] name = "parquet" version = "58.0.0" @@ -4432,12 +4254,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f491d0ef1b510194426ee67ddc18a9b747ef3c42050c19322a2cd2e1666c29b" dependencies = [ "ahash", - "arrow-array 58.0.0", - "arrow-buffer 58.0.0", - "arrow-data 58.0.0", - "arrow-ipc 58.0.0", - "arrow-schema 58.0.0", - "arrow-select 58.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", "base64", "brotli", "bytes", @@ -4471,7 +4293,7 @@ version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00ba4e5dcbc8ad65882b7337a95c12a0f9cbb6add237c53d93b803b7d7f70f02" dependencies = [ - "arrow-schema 58.0.0", + "arrow-schema", "chrono", "half", "indexmap 2.13.0", @@ -4486,7 +4308,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ec4cfb8da15565c8d211b6bc51e8eb481ea65d19132462af3f948b150ac8efe" dependencies = [ "arrow", - "arrow-schema 58.0.0", + "arrow-schema", "chrono", "half", "indexmap 2.13.0", @@ -4502,7 +4324,7 @@ version = "58.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3668ff00a6aeb29d172ba15f9d8fedf1675d79bff7d1916daa333efdeaa13e46" dependencies = [ - "arrow-schema 58.0.0", + "arrow-schema", "base64", "chrono", "parquet-variant", diff --git a/native/Cargo.toml b/native/Cargo.toml index 4067956722..abf8a3bf5e 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -55,7 +55,7 @@ object_store = { version = "0.13.1", features = ["gcp", "azure", "aws", "http"] url = "2.2" aws-config = "1.8.14" aws-credential-types = "1.2.13" -iceberg = { git = "https://github.com/apache/iceberg-rust", rev = "b24ab63" } +iceberg = { git = "https://github.com/mbutrovich/iceberg-rust", branch = "df53-upgrade" } [profile.release] debug = true diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index c44d0d65c1..2233aa8855 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -79,7 +79,7 @@ reqwest = { version = "0.12", default-features = false, features = ["rustls-tls- object_store_opendal = {version = "0.55.0", optional = true} hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]} opendal = { version ="0.55.0", optional = true, features = ["services-hdfs"] } -iceberg = { workspace = true, optional = true } +iceberg = { workspace = true } serde_json = "1.0" uuid = "1.21.0" @@ -101,7 +101,6 @@ datafusion-functions-nested = { git = "https://github.com/apache/datafusion", br [features] backtrace = ["datafusion/backtrace"] default = [] -iceberg = ["dep:iceberg"] hdfs = ["datafusion-comet-objectstore-hdfs"] hdfs-opendal = ["opendal", "object_store_opendal", "hdfs-sys"] jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl"] diff --git a/native/core/src/execution/operators/mod.rs b/native/core/src/execution/operators/mod.rs index 7d3b4dabc6..07ee995367 100644 --- a/native/core/src/execution/operators/mod.rs +++ b/native/core/src/execution/operators/mod.rs @@ -22,14 +22,12 @@ use std::fmt::Debug; use jni::objects::GlobalRef; pub use copy::*; -#[cfg(feature = "iceberg")] pub use iceberg_scan::*; pub use scan::*; mod copy; mod expand; pub use expand::ExpandExec; -#[cfg(feature = "iceberg")] mod iceberg_scan; mod parquet_writer; pub use parquet_writer::ParquetWriterExec; diff --git a/native/core/src/execution/operators/parquet_writer.rs b/native/core/src/execution/operators/parquet_writer.rs index 820d8b0481..bb2f54b2a6 100644 --- a/native/core/src/execution/operators/parquet_writer.rs +++ b/native/core/src/execution/operators/parquet_writer.rs @@ -280,7 +280,7 @@ impl ParquetWriterExec { output_file_path: &str, schema: SchemaRef, props: WriterProperties, - runtime_env: Arc, + _runtime_env: Arc, object_store_options: &HashMap, ) -> Result { // Parse URL and match on storage scheme directly @@ -293,7 +293,7 @@ impl ParquetWriterExec { { // Use prepare_object_store_with_configs to create and register the object store let (_object_store_url, object_store_path) = prepare_object_store_with_configs( - runtime_env, + _runtime_env, output_file_path.to_string(), object_store_options, ) diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index b58cde0939..46d00a3e67 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -22,7 +22,6 @@ pub mod macros; pub mod operator_registry; use crate::execution::operators::init_csv_datasource_exec; -#[cfg(feature = "iceberg")] use crate::execution::operators::IcebergScanExec; use crate::{ errors::ExpressionError, @@ -74,7 +73,6 @@ use datafusion_comet_spark_expr::{ create_comet_physical_fun, create_comet_physical_fun_with_eval_mode, BinaryOutputStyle, BloomFilterAgg, BloomFilterMightContain, CsvWriteOptions, EvalMode, SumInteger, ToCsv, }; -#[cfg(feature = "iceberg")] use iceberg::expr::Bind; use crate::execution::operators::ExecutionError::GeneralError; @@ -107,7 +105,6 @@ use arrow::buffer::{BooleanBuffer, NullBuffer, OffsetBuffer}; use arrow::row::{OwnedRow, RowConverter, SortField}; use datafusion::common::utils::SingleRowListArrayBuilder; use datafusion::common::UnnestOptions; -use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec; use datafusion::physical_plan::filter::FilterExec; use datafusion::physical_plan::limit::GlobalLimitExec; use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec}; @@ -1194,7 +1191,6 @@ impl PhysicalPlanner { Arc::new(SparkPlan::new(spark_plan.plan_id, Arc::new(scan), vec![])), )) } - #[cfg(feature = "iceberg")] OpStruct::IcebergScan(scan) => { // Extract common data and single partition's file tasks // Per-partition injection happens in Scala before sending to native @@ -1231,10 +1227,6 @@ impl PhysicalPlanner { )), )) } - #[cfg(not(feature = "iceberg"))] - OpStruct::IcebergScan(_) => Err(GeneralError( - "Iceberg support is not enabled. Rebuild with the 'iceberg' feature.".into(), - )), OpStruct::ShuffleWriter(writer) => { assert_eq!(children.len(), 1); let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; @@ -1522,42 +1514,17 @@ impl PhysicalPlanner { NullEquality::NullEqualsNothing, )?); - if join.filter.is_some() { - // SMJ with join filter produces lots of tiny batches - let coalesce_batches: Arc = - Arc::new(CoalesceBatchesExec::new( - Arc::::clone(&join), - self.session_ctx - .state() - .config_options() - .execution - .batch_size, - )); - Ok(( - scans, - Arc::new(SparkPlan::new_with_additional( - spark_plan.plan_id, - coalesce_batches, - vec![ - Arc::clone(&join_params.left), - Arc::clone(&join_params.right), - ], - vec![join], - )), - )) - } else { - Ok(( - scans, - Arc::new(SparkPlan::new( - spark_plan.plan_id, - join, - vec![ - Arc::clone(&join_params.left), - Arc::clone(&join_params.right), - ], - )), - )) - } + Ok(( + scans, + Arc::new(SparkPlan::new( + spark_plan.plan_id, + join, + vec![ + Arc::clone(&join_params.left), + Arc::clone(&join_params.right), + ], + )), + )) } OpStruct::HashJoin(join) => { let (join_params, scans) = self.parse_join_parameters( @@ -2712,7 +2679,6 @@ fn convert_spark_types_to_arrow_schema( arrow_schema } -#[cfg(feature = "iceberg")] /// Converts a protobuf PartitionValue to an iceberg Literal. /// fn partition_value_to_literal( @@ -2798,7 +2764,6 @@ fn partition_value_to_literal( /// Uses the existing Struct::from_iter() API from iceberg-rust to construct the struct /// from the list of partition values. /// This can potentially be upstreamed to iceberg_rust -#[cfg(feature = "iceberg")] fn partition_data_to_struct( proto_partition: &spark_operator::PartitionData, ) -> Result { @@ -2818,7 +2783,6 @@ fn partition_data_to_struct( /// /// This function uses deduplication pools from the IcebergScanCommon to avoid redundant /// parsing of schemas, partition specs, partition types, name mappings, and other repeated data. -#[cfg(feature = "iceberg")] fn parse_file_scan_tasks_from_common( proto_common: &spark_operator::IcebergScanCommon, proto_tasks: &[spark_operator::IcebergFileScanTask], @@ -3267,7 +3231,6 @@ fn literal_to_array_ref( // always returns MIGHT_MATCH (never prunes row groups). These are handled by CometFilter post-scan. /// Converts a protobuf Spark expression to an Iceberg predicate for row-group filtering. -#[cfg(feature = "iceberg")] fn convert_spark_expr_to_predicate( expr: &spark_expression::Expr, ) -> Option { @@ -3399,7 +3362,6 @@ fn convert_spark_expr_to_predicate( } } -#[cfg(feature = "iceberg")] fn convert_binary_to_predicate( left: &Option>, right: &Option>, @@ -3448,7 +3410,6 @@ fn convert_binary_to_predicate( None } -#[cfg(feature = "iceberg")] fn extract_column_reference(expr: &spark_expression::Expr) -> Option { use spark_expression::expr::ExprStruct; @@ -3458,7 +3419,6 @@ fn extract_column_reference(expr: &spark_expression::Expr) -> Option { } } -#[cfg(feature = "iceberg")] fn extract_literal_as_datum(expr: &spark_expression::Expr) -> Option { use spark_expression::expr::ExprStruct; From 42583001b3bdb0ac4b164918e831b685a8a364a9 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 4 Mar 2026 07:47:16 -0500 Subject: [PATCH 06/30] bump to iceberg-rust df53 branch, clippy fixes --- native/Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index a66099fc88..fc08f2b485 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -3232,7 +3232,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.8.0" -source = "git+https://github.com/mbutrovich/iceberg-rust?branch=df53-upgrade#71bc1ab8189860d00a0805db405431d806c03a26" +source = "git+https://github.com/mbutrovich/iceberg-rust?branch=df53-upgrade#72fbebc4b0b9f5363accfb4707c52635306e5271" dependencies = [ "anyhow", "apache-avro", From efa94372d6bcdcc1209a4c6f112e795c547140ac Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 4 Mar 2026 09:11:42 -0500 Subject: [PATCH 07/30] fix fileIO construction --- .../src/execution/operators/iceberg_scan.rs | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/native/core/src/execution/operators/iceberg_scan.rs b/native/core/src/execution/operators/iceberg_scan.rs index 8409545763..3ecc3b2c5c 100644 --- a/native/core/src/execution/operators/iceberg_scan.rs +++ b/native/core/src/execution/operators/iceberg_scan.rs @@ -38,7 +38,7 @@ use datafusion::physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, }; use futures::{Stream, StreamExt, TryStreamExt}; -use iceberg::io::FileIO; +use iceberg::io::{FileIO, FileIOBuilder, OpenDalStorageFactory, StorageFactory}; use crate::execution::operators::ExecutionError; use crate::parquet::parquet_support::SparkParquetOptions; @@ -191,20 +191,32 @@ impl IcebergScanExec { Ok(Box::pin(wrapped_stream)) } + fn storage_factory_for(path: &str) -> Result, DataFusionError> { + let scheme = path.split("://").next().unwrap_or("file"); + match scheme { + "file" | "" => Ok(Arc::new(OpenDalStorageFactory::Fs)), + "s3" | "s3a" => Ok(Arc::new(OpenDalStorageFactory::S3 { + configured_scheme: scheme.to_string(), + customized_credential_load: None, + })), + _ => Err(DataFusionError::Execution(format!( + "Unsupported storage scheme: {scheme}" + ))), + } + } + fn load_file_io( catalog_properties: &HashMap, metadata_location: &str, ) -> Result { - let mut file_io_builder = FileIO::from_path(metadata_location) - .map_err(|e| DataFusionError::Execution(format!("Failed to create FileIO: {}", e)))?; + let factory = Self::storage_factory_for(metadata_location)?; + let mut file_io_builder = FileIOBuilder::new(factory); for (key, value) in catalog_properties { file_io_builder = file_io_builder.with_prop(key, value); } - file_io_builder - .build() - .map_err(|e| DataFusionError::Execution(format!("Failed to build FileIO: {}", e))) + Ok(file_io_builder.build()) } } From a548871c09b7d9e4a03cd4a1807ffd6cfab37d04 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 4 Mar 2026 11:43:52 -0500 Subject: [PATCH 08/30] update deps --- native/Cargo.lock | 66 +++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index fc08f2b485..8e2ed37f69 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -1683,7 +1683,7 @@ dependencies = [ [[package]] name = "datafusion" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "arrow-schema", @@ -1733,7 +1733,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "async-trait", @@ -1757,7 +1757,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "async-trait", @@ -1896,7 +1896,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "ahash", "arrow", @@ -1920,7 +1920,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "futures", "log", @@ -1930,7 +1930,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "async-compression", @@ -1964,7 +1964,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "arrow-ipc", @@ -1987,7 +1987,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "async-trait", @@ -2009,7 +2009,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "async-trait", @@ -2032,7 +2032,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "async-trait", @@ -2061,12 +2061,12 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" [[package]] name = "datafusion-execution" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "arrow-buffer", @@ -2089,7 +2089,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "async-trait", @@ -2110,7 +2110,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "datafusion-common", @@ -2122,7 +2122,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "arrow-buffer", @@ -2153,7 +2153,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "ahash", "arrow", @@ -2174,7 +2174,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "ahash", "arrow", @@ -2186,7 +2186,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "arrow-ord", @@ -2210,7 +2210,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "async-trait", @@ -2225,7 +2225,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "datafusion-common", @@ -2242,7 +2242,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2251,7 +2251,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "datafusion-doc", "quote", @@ -2261,7 +2261,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "chrono", @@ -2279,7 +2279,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "ahash", "arrow", @@ -2301,7 +2301,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "datafusion-common", @@ -2315,7 +2315,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "ahash", "arrow", @@ -2331,7 +2331,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "datafusion-common", @@ -2348,7 +2348,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "ahash", "arrow", @@ -2379,7 +2379,7 @@ dependencies = [ [[package]] name = "datafusion-pruning" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "datafusion-common", @@ -2395,7 +2395,7 @@ dependencies = [ [[package]] name = "datafusion-session" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "async-trait", "datafusion-common", @@ -2408,7 +2408,7 @@ dependencies = [ [[package]] name = "datafusion-spark" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "bigdecimal", @@ -2434,7 +2434,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#23e7a19186e71044a610e5bfcc4e647598bf557a" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", "bigdecimal", @@ -3232,7 +3232,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.8.0" -source = "git+https://github.com/mbutrovich/iceberg-rust?branch=df53-upgrade#72fbebc4b0b9f5363accfb4707c52635306e5271" +source = "git+https://github.com/mbutrovich/iceberg-rust?branch=df53-upgrade#646ab672a649322f3b140b0cf3dc76e26bb540a1" dependencies = [ "anyhow", "apache-avro", From faf1c56054747ba2cc7f508a199fd5b649824223 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 17 Mar 2026 07:30:54 -0400 Subject: [PATCH 09/30] Bump to 53.0.0-rc2. --- native/Cargo.lock | 645 ++++++++++++++++++++++++++++------------------ native/Cargo.toml | 8 +- 2 files changed, 393 insertions(+), 260 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 8e2ed37f69..35de5ed68d 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -1363,12 +1363,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "const-oid" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" - [[package]] name = "const-random" version = "0.1.18" @@ -1438,15 +1432,6 @@ dependencies = [ "libc", ] -[[package]] -name = "crc32c" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" -dependencies = [ - "rustc_version", -] - [[package]] name = "crc32fast" version = "1.5.0" @@ -1683,7 +1668,7 @@ dependencies = [ [[package]] name = "datafusion" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "arrow-schema", @@ -1692,25 +1677,25 @@ dependencies = [ "chrono", "datafusion-catalog", "datafusion-catalog-listing", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-common-runtime", "datafusion-datasource", "datafusion-datasource-arrow", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-aggregate 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-nested 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", - "datafusion-physical-expr", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-session", @@ -1733,17 +1718,17 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1757,18 +1742,18 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "async-trait", "datafusion-catalog", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "futures", "itertools 0.14.0", @@ -1793,7 +1778,7 @@ dependencies = [ "datafusion-comet-proto", "datafusion-comet-spark-expr", "datafusion-datasource", - "datafusion-functions-nested", + "datafusion-functions-nested 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", "datafusion-physical-expr-adapter", "datafusion-spark", "futures", @@ -1896,7 +1881,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "ahash", "arrow", @@ -1918,9 +1903,29 @@ dependencies = [ ] [[package]] -name = "datafusion-common-runtime" +name = "datafusion-common" version = "53.0.0" source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +dependencies = [ + "ahash", + "arrow", + "arrow-ipc", + "chrono", + "half", + "hashbrown 0.16.1", + "indexmap 2.13.0", + "itertools 0.14.0", + "libc", + "log", + "paste", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "futures", "log", @@ -1930,7 +1935,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "async-compression", @@ -1938,13 +1943,13 @@ dependencies = [ "bytes", "bzip2", "chrono", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "datafusion-session", "flate2", @@ -1964,18 +1969,18 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "arrow-ipc", "async-trait", "bytes", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1987,17 +1992,17 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "datafusion-session", "futures", @@ -2009,17 +2014,17 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "datafusion-session", "futures", @@ -2032,20 +2037,20 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", @@ -2058,6 +2063,11 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-doc" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" + [[package]] name = "datafusion-doc" version = "53.0.0" @@ -2066,16 +2076,16 @@ source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c [[package]] name = "datafusion-execution" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "arrow-buffer", "async-trait", "chrono", "dashmap", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "futures", "log", "object_store 0.13.1", @@ -2086,6 +2096,49 @@ dependencies = [ "url", ] +[[package]] +name = "datafusion-execution" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +dependencies = [ + "arrow", + "arrow-buffer", + "async-trait", + "chrono", + "dashmap", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "futures", + "log", + "object_store 0.13.1", + "parking_lot", + "rand 0.9.2", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-window-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "indexmap 2.13.0", + "itertools 0.14.0", + "paste", + "serde_json", + "sqlparser", +] + [[package]] name = "datafusion-expr" version = "53.0.0" @@ -2094,12 +2147,12 @@ dependencies = [ "arrow", "async-trait", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-functions-window-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", "indexmap 2.13.0", "itertools 0.14.0", "paste", @@ -2107,13 +2160,25 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "datafusion-expr-common" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +dependencies = [ + "arrow", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "indexmap 2.13.0", + "itertools 0.14.0", + "paste", +] + [[package]] name = "datafusion-expr-common" version = "53.0.0" source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ "arrow", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", "indexmap 2.13.0", "itertools 0.14.0", "paste", @@ -2122,7 +2187,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "arrow-buffer", @@ -2131,12 +2196,12 @@ dependencies = [ "blake3", "chrono", "chrono-tz", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "hex", "itertools 0.14.0", "log", @@ -2150,6 +2215,54 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-functions" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +dependencies = [ + "arrow", + "arrow-buffer", + "base64", + "chrono", + "chrono-tz", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "hex", + "itertools 0.14.0", + "log", + "memchr", + "num-traits", + "rand 0.9.2", + "regex", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "half", + "log", + "num-traits", + "paste", +] + [[package]] name = "datafusion-functions-aggregate" version = "53.0.0" @@ -2157,20 +2270,32 @@ source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", "half", "log", "num-traits", "paste", ] +[[package]] +name = "datafusion-functions-aggregate-common" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", +] + [[package]] name = "datafusion-functions-aggregate-common" version = "53.0.0" @@ -2178,9 +2303,33 @@ source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", +] + +[[package]] +name = "datafusion-functions-nested" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-aggregate 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "hashbrown 0.16.1", + "itertools 0.14.0", + "itoa", + "log", + "paste", ] [[package]] @@ -2190,16 +2339,16 @@ source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c dependencies = [ "arrow", "arrow-ord", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-functions-aggregate 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", "hashbrown 0.16.1", "itertools 0.14.0", "itoa", @@ -2210,13 +2359,13 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "async-trait", "datafusion-catalog", - "datafusion-common", - "datafusion-expr", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "parking_lot", "paste", @@ -2225,27 +2374,46 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-window-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "log", "paste", ] +[[package]] +name = "datafusion-functions-window-common" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +dependencies = [ + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", +] + [[package]] name = "datafusion-functions-window-common" version = "53.0.0" source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", +] + +[[package]] +name = "datafusion-macros" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +dependencies = [ + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "quote", + "syn 2.0.117", ] [[package]] @@ -2253,7 +2421,7 @@ name = "datafusion-macros" version = "53.0.0" source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" dependencies = [ - "datafusion-doc", + "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", "quote", "syn 2.0.117", ] @@ -2261,14 +2429,14 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "indexmap 2.13.0", "itertools 0.14.0", "log", @@ -2276,6 +2444,28 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "datafusion-physical-expr" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "half", + "hashbrown 0.16.1", + "indexmap 2.13.0", + "itertools 0.14.0", + "parking_lot", + "paste", + "petgraph", + "tokio", +] + [[package]] name = "datafusion-physical-expr" version = "53.0.0" @@ -2283,11 +2473,11 @@ source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", "half", "hashbrown 0.16.1", "indexmap 2.13.0", @@ -2301,17 +2491,33 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-functions", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "itertools 0.14.0", ] +[[package]] +name = "datafusion-physical-expr-common" +version = "53.0.0" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +dependencies = [ + "ahash", + "arrow", + "chrono", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "hashbrown 0.16.1", + "indexmap 2.13.0", + "itertools 0.14.0", + "parking_lot", +] + [[package]] name = "datafusion-physical-expr-common" version = "53.0.0" @@ -2320,8 +2526,8 @@ dependencies = [ "ahash", "arrow", "chrono", - "datafusion-common", - "datafusion-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", "hashbrown 0.16.1", "indexmap 2.13.0", "itertools 0.14.0", @@ -2331,15 +2537,15 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", @@ -2348,22 +2554,22 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "ahash", "arrow", "arrow-ord", "arrow-schema", "async-trait", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-window-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "futures", "half", "hashbrown 0.16.1", @@ -2379,14 +2585,14 @@ dependencies = [ [[package]] name = "datafusion-pruning" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", - "datafusion-common", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-datasource", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "itertools 0.14.0", "log", @@ -2395,12 +2601,12 @@ dependencies = [ [[package]] name = "datafusion-session" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "async-trait", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "datafusion-physical-plan", "parking_lot", ] @@ -2408,7 +2614,7 @@ dependencies = [ [[package]] name = "datafusion-spark" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "bigdecimal", @@ -2416,12 +2622,12 @@ dependencies = [ "crc32fast", "datafusion", "datafusion-catalog", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-aggregate 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-nested 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "log", "percent-encoding", "rand 0.9.2", @@ -2434,14 +2640,14 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "bigdecimal", "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-functions-nested", + "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-functions-nested 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", "indexmap 2.13.0", "log", "regex", @@ -2533,7 +2739,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", - "const-oid", "crypto-common", "subtle", ] @@ -2555,15 +2760,6 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" -[[package]] -name = "dlv-list" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" -dependencies = [ - "const-random", -] - [[package]] name = "dunce" version = "1.0.5" @@ -2626,7 +2822,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -3231,8 +3427,8 @@ dependencies = [ [[package]] name = "iceberg" -version = "0.8.0" -source = "git+https://github.com/mbutrovich/iceberg-rust?branch=df53-upgrade#646ab672a649322f3b140b0cf3dc76e26bb540a1" +version = "0.9.0" +source = "git+https://github.com/mbutrovich/iceberg-rust?branch=df53-upgrade#934d88a6c21e4389c380397485a35365c77a8a02" dependencies = [ "anyhow", "apache-avro", @@ -3262,11 +3458,9 @@ dependencies = [ "moka", "murmur3", "once_cell", - "opendal", "ordered-float 4.6.0", "parquet", "rand 0.8.5", - "reqsign", "reqwest", "roaring", "serde", @@ -3478,7 +3672,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -3526,7 +3720,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -4158,7 +4352,6 @@ dependencies = [ "backon", "base64", "bytes", - "crc32c", "futures", "getrandom 0.2.17", "hdrs", @@ -4169,7 +4362,6 @@ dependencies = [ "md-5", "percent-encoding", "quick-xml 0.38.4", - "reqsign", "reqwest", "serde", "serde_json", @@ -4202,16 +4394,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "ordered-multimap" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" -dependencies = [ - "dlv-list", - "hashbrown 0.14.5", -] - [[package]] name = "outref" version = "0.5.2" @@ -4641,16 +4823,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "quick-xml" -version = "0.37.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "quick-xml" version = "0.38.4" @@ -4713,7 +4885,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -4897,35 +5069,6 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" -[[package]] -name = "reqsign" -version = "0.16.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" -dependencies = [ - "anyhow", - "async-trait", - "base64", - "chrono", - "form_urlencoded", - "getrandom 0.2.17", - "hex", - "hmac", - "home", - "http 1.4.0", - "log", - "percent-encoding", - "quick-xml 0.37.5", - "rand 0.8.5", - "reqwest", - "rust-ini", - "serde", - "serde_json", - "sha1", - "sha2", - "tokio", -] - [[package]] name = "reqwest" version = "0.12.28" @@ -5002,16 +5145,6 @@ dependencies = [ "byteorder", ] -[[package]] -name = "rust-ini" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" -dependencies = [ - "cfg-if", - "ordered-multimap", -] - [[package]] name = "rustc-demangle" version = "0.1.27" @@ -5062,7 +5195,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -5588,7 +5721,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix 1.1.4", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6288,7 +6421,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] diff --git a/native/Cargo.toml b/native/Cargo.toml index abf8a3bf5e..d4118c89a3 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -38,10 +38,10 @@ arrow = { version = "58.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } async-trait = { version = "0.1" } bytes = { version = "1.11.1" } parquet = { version = "58.0.0", default-features = false, features = ["experimental"] } -datafusion = { git = "https://github.com/apache/datafusion", branch = "branch-53", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } -datafusion-datasource = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-physical-expr-adapter = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-spark = { git = "https://github.com/apache/datafusion", branch = "branch-53", features = ["core"] } +datafusion = { git = "https://github.com/apache/datafusion", tag = "53.0.0-rc2", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } +datafusion-datasource = { git = "https://github.com/apache/datafusion", tag = "53.0.0-rc2" } +datafusion-physical-expr-adapter = { git = "https://github.com/apache/datafusion", tag = "53.0.0-rc2" } +datafusion-spark = { git = "https://github.com/apache/datafusion", tag = "53.0.0-rc2", features = ["core"] } datafusion-comet-spark-expr = { path = "spark-expr" } datafusion-comet-proto = { path = "proto" } chrono = { version = "0.4", default-features = false, features = ["clock"] } From 5a70cc9ab958f9dfcaf1f0bf83a05a4c301ff23e Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 17 Mar 2026 07:35:07 -0400 Subject: [PATCH 10/30] Merge in upstream/main. --- native/Cargo.lock | 563 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 442 insertions(+), 121 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 35de5ed68d..b7be88a62c 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -17,6 +17,17 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures 0.2.17", +] + [[package]] name = "ahash" version = "0.8.12" @@ -87,9 +98,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anyhow" @@ -547,9 +558,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.14" +version = "1.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a8fc176d53d6fe85017f230405e3255cedb4a02221cb55ed6d76dccbbb099b2" +checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc" dependencies = [ "aws-credential-types", "aws-runtime", @@ -567,7 +578,7 @@ dependencies = [ "fastrand", "hex", "http 1.4.0", - "ring", + "sha1", "time", "tokio", "tracing", @@ -577,9 +588,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.13" +version = "1.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -611,9 +622,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.1" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" +checksum = "5fc0651c57e384202e47153c1260b84a9936e19803d747615edf199dc3b98d17" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -636,9 +647,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.95.0" +version = "1.96.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00c5ff27c6ba2cbd95e6e26e2e736676fdf6bcf96495b187733f521cfe4ce448" +checksum = "f64a6eded248c6b453966e915d32aeddb48ea63ad17932682774eb026fbef5b1" dependencies = [ "aws-credential-types", "aws-runtime", @@ -660,9 +671,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.97.0" +version = "1.98.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d186f1e5a3694a188e5a0640b3115ccc6e084d104e16fd6ba968dca072ffef8" +checksum = "db96d720d3c622fcbe08bae1c4b04a72ce6257d8b0584cb5418da00ae20a344f" dependencies = [ "aws-credential-types", "aws-runtime", @@ -684,9 +695,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.99.0" +version = "1.100.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9acba7c62f3d4e2408fa998a3a8caacd8b9a5b5549cf36e2372fbdae329d5449" +checksum = "fafbdda43b93f57f699c5dfe8328db590b967b8a820a13ccdd6687355dfcc7ca" dependencies = [ "aws-credential-types", "aws-runtime", @@ -709,9 +720,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" +checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -857,9 +868,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b1117b3b2bbe166d11199b540ceed0d0f7676e36e7b962b5a437a9971eac75" +checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" dependencies = [ "base64-simd", "bytes", @@ -889,9 +900,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.13" +version = "1.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" +checksum = "47c8323699dd9b3c8d5b3c13051ae9cdef58fd179957c882f8374dd8725962d9" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -943,6 +954,12 @@ dependencies = [ "vsimd", ] +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "bigdecimal" version = "0.4.10" @@ -1049,6 +1066,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + [[package]] name = "blocking" version = "1.6.2" @@ -1074,9 +1100,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" dependencies = [ "bon-macros", "rustversion", @@ -1084,9 +1110,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ "darling 0.23.0", "ident_case", @@ -1167,11 +1193,20 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + [[package]] name = "cc" -version = "1.2.56" +version = "1.2.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" dependencies = [ "find-msvc-tools", "jobserver", @@ -1268,6 +1303,16 @@ dependencies = [ "half", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -1281,18 +1326,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstyle", "clap_lex", @@ -1300,9 +1345,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "cmake" @@ -1363,6 +1408,12 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "const-random" version = "0.1.18" @@ -1432,6 +1483,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -1557,16 +1617,6 @@ dependencies = [ "darling_macro 0.20.11", ] -[[package]] -name = "darling" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" -dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", -] - [[package]] name = "darling" version = "0.23.0" @@ -1591,20 +1641,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "darling_core" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 2.0.117", -] - [[package]] name = "darling_core" version = "0.23.0" @@ -1629,17 +1665,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "darling_macro" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" -dependencies = [ - "darling_core 0.21.3", - "quote", - "syn 2.0.117", -] - [[package]] name = "darling_macro" version = "0.23.0" @@ -1786,6 +1811,7 @@ dependencies = [ "hdrs", "hex", "iceberg", + "iceberg-storage-opendal", "itertools 0.14.0", "jni", "lazy_static", @@ -1872,6 +1898,7 @@ dependencies = [ "num", "rand 0.10.0", "regex", + "serde", "serde_json", "thiserror 2.0.18", "tokio", @@ -1905,7 +1932,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "ahash", "arrow", @@ -2071,7 +2098,7 @@ source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ed [[package]] name = "datafusion-doc" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" [[package]] name = "datafusion-execution" @@ -2099,7 +2126,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "arrow-buffer", @@ -2142,7 +2169,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "async-trait", @@ -2175,7 +2202,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", @@ -2218,7 +2245,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "arrow-buffer", @@ -2266,7 +2293,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "ahash", "arrow", @@ -2299,7 +2326,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "ahash", "arrow", @@ -2335,7 +2362,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", "arrow-ord", @@ -2400,7 +2427,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", @@ -2419,7 +2446,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", "quote", @@ -2469,7 +2496,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "ahash", "arrow", @@ -2521,7 +2548,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#c466f820c66c08ccc8c7a7d1eaf39de39b4cbd61" +source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "ahash", "arrow", @@ -2663,6 +2690,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + [[package]] name = "deranged" version = "0.5.8" @@ -2739,6 +2777,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", + "const-oid", "crypto-common", "subtle", ] @@ -2756,9 +2795,18 @@ dependencies = [ [[package]] name = "dissimilar" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" +checksum = "aeda16ab4059c5fd2a83f2b9c9e9c981327b18aa8e3b313f7e6563799d4f093e" + +[[package]] +name = "dlv-list" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" +dependencies = [ + "const-random", +] [[package]] name = "dunce" @@ -2822,7 +2870,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3478,6 +3526,25 @@ dependencies = [ "zstd", ] +[[package]] +name = "iceberg-storage-opendal" +version = "0.9.0" +source = "git+https://github.com/mbutrovich/iceberg-rust?branch=df53-upgrade#934d88a6c21e4389c380397485a35365c77a8a02" +dependencies = [ + "anyhow", + "async-trait", + "bytes", + "cfg-if", + "futures", + "iceberg", + "opendal", + "reqsign", + "reqwest", + "serde", + "typetag", + "url", +] + [[package]] name = "icu_collections" version = "2.1.1" @@ -3633,6 +3700,16 @@ dependencies = [ "str_stack", ] +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "block-padding", + "generic-array", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -3672,7 +3749,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3720,7 +3797,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3793,6 +3870,21 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonwebtoken" +version = "9.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +dependencies = [ + "base64", + "js-sys", + "pem", + "ring", + "serde", + "serde_json", + "simple_asn1", +] + [[package]] name = "kv-log-macro" version = "1.0.7" @@ -3807,6 +3899,9 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin 0.9.8", +] [[package]] name = "lazycell" @@ -3885,9 +3980,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.182" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libloading" @@ -4025,9 +4120,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" +checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" dependencies = [ "twox-hash", ] @@ -4177,6 +4272,22 @@ dependencies = [ "serde", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -4332,9 +4443,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "oorandom" @@ -4352,6 +4463,7 @@ dependencies = [ "backon", "base64", "bytes", + "crc32c", "futures", "getrandom 0.2.17", "hdrs", @@ -4362,6 +4474,7 @@ dependencies = [ "md-5", "percent-encoding", "quick-xml 0.38.4", + "reqsign", "reqwest", "serde", "serde_json", @@ -4394,6 +4507,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ordered-multimap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" +dependencies = [ + "dlv-list", + "hashbrown 0.14.5", +] + [[package]] name = "outref" version = "0.5.2" @@ -4520,12 +4643,41 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest", + "hmac", +] + [[package]] name = "peeking_take_while" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64", + "serde_core", +] + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -4605,6 +4757,44 @@ dependencies = [ "futures-io", ] +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs5" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" +dependencies = [ + "aes", + "cbc", + "der", + "pbkdf2", + "scrypt", + "sha2", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "pkcs5", + "rand_core 0.6.4", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -4661,9 +4851,9 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" dependencies = [ "portable-atomic", ] @@ -4699,7 +4889,7 @@ dependencies = [ "nix", "once_cell", "smallvec", - "spin", + "spin 0.10.0", "symbolic-demangle", "tempfile", "thiserror 2.0.18", @@ -4823,6 +5013,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quick-xml" version = "0.38.4" @@ -4855,9 +5055,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "bytes", "getrandom 0.3.4", @@ -4885,7 +5085,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -5069,6 +5269,38 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "reqsign" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" +dependencies = [ + "anyhow", + "async-trait", + "base64", + "chrono", + "form_urlencoded", + "getrandom 0.2.17", + "hex", + "hmac", + "home", + "http 1.4.0", + "jsonwebtoken", + "log", + "once_cell", + "percent-encoding", + "quick-xml 0.37.5", + "rand 0.8.5", + "reqwest", + "rsa", + "rust-ini", + "serde", + "serde_json", + "sha1", + "sha2", + "tokio", +] + [[package]] name = "reqwest" version = "0.12.28" @@ -5145,6 +5377,37 @@ dependencies = [ "byteorder", ] +[[package]] +name = "rsa" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "sha2", + "signature", + "spki", + "subtle", + "zeroize", +] + +[[package]] +name = "rust-ini" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" +dependencies = [ + "cfg-if", + "ordered-multimap", +] + [[package]] name = "rustc-demangle" version = "0.1.27" @@ -5195,7 +5458,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5259,6 +5522,15 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] + [[package]] name = "same-file" version = "1.0.6" @@ -5270,9 +5542,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" dependencies = [ "windows-sys 0.61.2", ] @@ -5307,6 +5579,17 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scrypt" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" +dependencies = [ + "pbkdf2", + "salsa20", + "sha2", +] + [[package]] name = "security-framework" version = "3.7.0" @@ -5439,9 +5722,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.17.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "381b283ce7bc6b476d903296fb59d0d36633652b633b27f64db4fb46dcbfc3b9" +checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" dependencies = [ "base64", "chrono", @@ -5458,11 +5741,11 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.17.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6d4e30573c8cb306ed6ab1dca8423eec9a463ea0e155f45399455e0368b27e0" +checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "proc-macro2", "quote", "syn 2.0.117", @@ -5519,6 +5802,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.8" @@ -5531,6 +5824,18 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +[[package]] +name = "simple_asn1" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d585997b0ac10be3c5ee635f1bab02d512760d14b7c468801ac8a01d9ae5f1d" +dependencies = [ + "num-bigint", + "num-traits", + "thiserror 2.0.18", + "time", +] + [[package]] name = "siphasher" version = "1.0.2" @@ -5557,14 +5862,20 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "spin" version = "0.10.0" @@ -5574,6 +5885,16 @@ dependencies = [ "lock_api", ] +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "sqlparser" version = "0.61.0" @@ -5713,15 +6034,15 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "tempfile" -version = "3.26.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", "getrandom 0.4.2", "once_cell", "rustix 1.1.4", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5878,9 +6199,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -6181,9 +6502,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.21.0" +version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" +checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -6421,7 +6742,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6845,18 +7166,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.40" +version = "0.8.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5" +checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.40" +version = "0.8.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953" +checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" dependencies = [ "proc-macro2", "quote", From eb3198fc14753880db51841972e1a5fd82384b7d Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 17 Mar 2026 07:47:06 -0400 Subject: [PATCH 11/30] Fix native/core/Cargo.toml. --- native/Cargo.lock | 589 ++++++++++------------------------------- native/core/Cargo.toml | 2 +- 2 files changed, 148 insertions(+), 443 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index b7be88a62c..70e59fe96d 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -1702,25 +1702,25 @@ dependencies = [ "chrono", "datafusion-catalog", "datafusion-catalog-listing", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-datasource-arrow", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-aggregate 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-nested 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr", "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-session", @@ -1748,12 +1748,12 @@ dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1772,13 +1772,13 @@ dependencies = [ "arrow", "async-trait", "datafusion-catalog", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", "datafusion-datasource", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common", "datafusion-physical-plan", "futures", "itertools 0.14.0", @@ -1803,7 +1803,7 @@ dependencies = [ "datafusion-comet-proto", "datafusion-comet-spark-expr", "datafusion-datasource", - "datafusion-functions-nested 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-functions-nested", "datafusion-physical-expr-adapter", "datafusion-spark", "futures", @@ -1929,26 +1929,6 @@ dependencies = [ "web-time", ] -[[package]] -name = "datafusion-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "ahash", - "arrow", - "arrow-ipc", - "chrono", - "half", - "hashbrown 0.16.1", - "indexmap 2.13.0", - "itertools 0.14.0", - "libc", - "log", - "paste", - "tokio", - "web-time", -] - [[package]] name = "datafusion-common-runtime" version = "53.0.0" @@ -1970,13 +1950,13 @@ dependencies = [ "bytes", "bzip2", "chrono", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", "datafusion-common-runtime", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "flate2", @@ -2002,12 +1982,12 @@ dependencies = [ "arrow-ipc", "async-trait", "bytes", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", @@ -2024,12 +2004,12 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", @@ -2046,12 +2026,12 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", @@ -2069,15 +2049,15 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", @@ -2095,11 +2075,6 @@ name = "datafusion-doc" version = "53.0.0" source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -[[package]] -name = "datafusion-doc" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" - [[package]] name = "datafusion-execution" version = "53.0.0" @@ -2110,9 +2085,9 @@ dependencies = [ "async-trait", "chrono", "dashmap", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", "object_store 0.13.1", @@ -2123,28 +2098,6 @@ dependencies = [ "url", ] -[[package]] -name = "datafusion-execution" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "arrow", - "arrow-buffer", - "async-trait", - "chrono", - "dashmap", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "futures", - "log", - "object_store 0.13.1", - "parking_lot", - "rand 0.9.2", - "tempfile", - "url", -] - [[package]] name = "datafusion-expr" version = "53.0.0" @@ -2153,33 +2106,12 @@ dependencies = [ "arrow", "async-trait", "chrono", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-window-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "indexmap 2.13.0", - "itertools 0.14.0", - "paste", - "serde_json", - "sqlparser", -] - -[[package]] -name = "datafusion-expr" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "arrow", - "async-trait", - "chrono", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-functions-window-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", "indexmap 2.13.0", "itertools 0.14.0", "paste", @@ -2193,19 +2125,7 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "indexmap 2.13.0", - "itertools 0.14.0", - "paste", -] - -[[package]] -name = "datafusion-expr-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-common", "indexmap 2.13.0", "itertools 0.14.0", "paste", @@ -2223,12 +2143,12 @@ dependencies = [ "blake3", "chrono", "chrono-tz", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", "hex", "itertools 0.14.0", "log", @@ -2242,33 +2162,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "datafusion-functions" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "arrow", - "arrow-buffer", - "base64", - "chrono", - "chrono-tz", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "hex", - "itertools 0.14.0", - "log", - "memchr", - "num-traits", - "rand 0.9.2", - "regex", - "unicode-segmentation", - "uuid", -] - [[package]] name = "datafusion-functions-aggregate" version = "53.0.0" @@ -2276,35 +2169,14 @@ source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ed dependencies = [ "ahash", "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "half", - "log", - "num-traits", - "paste", -] - -[[package]] -name = "datafusion-functions-aggregate" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "half", "log", "num-traits", @@ -2318,21 +2190,9 @@ source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ed dependencies = [ "ahash", "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", -] - -[[package]] -name = "datafusion-functions-aggregate-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", ] [[package]] @@ -2342,40 +2202,16 @@ source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ed dependencies = [ "arrow", "arrow-ord", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-aggregate 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "hashbrown 0.16.1", - "itertools 0.14.0", - "itoa", - "log", - "paste", -] - -[[package]] -name = "datafusion-functions-nested" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "arrow", - "arrow-ord", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-functions-aggregate 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr-common", "hashbrown 0.16.1", "itertools 0.14.0", "itoa", @@ -2391,8 +2227,8 @@ dependencies = [ "arrow", "async-trait", "datafusion-catalog", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", + "datafusion-expr", "datafusion-physical-plan", "parking_lot", "paste", @@ -2404,13 +2240,13 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-window-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-macros 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "log", "paste", ] @@ -2420,17 +2256,8 @@ name = "datafusion-functions-window-common" version = "53.0.0" source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", -] - -[[package]] -name = "datafusion-functions-window-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-common", + "datafusion-physical-expr-common", ] [[package]] @@ -2438,17 +2265,7 @@ name = "datafusion-macros" version = "53.0.0" source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "datafusion-macros" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "datafusion-doc 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-doc", "quote", "syn 2.0.117", ] @@ -2460,10 +2277,10 @@ source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ed dependencies = [ "arrow", "chrono", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", "indexmap 2.13.0", "itertools 0.14.0", "log", @@ -2478,33 +2295,11 @@ source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ed dependencies = [ "ahash", "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "half", - "hashbrown 0.16.1", - "indexmap 2.13.0", - "itertools 0.14.0", - "parking_lot", - "paste", - "petgraph", - "tokio", -] - -[[package]] -name = "datafusion-physical-expr" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", "half", "hashbrown 0.16.1", "indexmap 2.13.0", @@ -2521,11 +2316,11 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", + "datafusion-expr", + "datafusion-functions", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "itertools 0.14.0", ] @@ -2537,24 +2332,8 @@ dependencies = [ "ahash", "arrow", "chrono", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "hashbrown 0.16.1", - "indexmap 2.13.0", - "itertools 0.14.0", - "parking_lot", -] - -[[package]] -name = "datafusion-physical-expr-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion?branch=branch-53#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" -dependencies = [ - "ahash", - "arrow", - "chrono", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?branch=branch-53)", + "datafusion-common", + "datafusion-expr-common", "hashbrown 0.16.1", "indexmap 2.13.0", "itertools 0.14.0", @@ -2567,12 +2346,12 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", @@ -2588,15 +2367,15 @@ dependencies = [ "arrow-ord", "arrow-schema", "async-trait", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", "datafusion-common-runtime", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-aggregate-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-window-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "futures", "half", "hashbrown 0.16.1", @@ -2615,11 +2394,11 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "arrow", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", "datafusion-datasource", - "datafusion-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-physical-expr-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "itertools 0.14.0", "log", @@ -2631,9 +2410,9 @@ version = "53.0.0" source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" dependencies = [ "async-trait", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", "datafusion-physical-plan", "parking_lot", ] @@ -2649,12 +2428,12 @@ dependencies = [ "crc32fast", "datafusion", "datafusion-catalog", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-execution 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-aggregate 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-nested 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", "log", "percent-encoding", "rand 0.9.2", @@ -2672,9 +2451,9 @@ dependencies = [ "arrow", "bigdecimal", "chrono", - "datafusion-common 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-expr 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", - "datafusion-functions-nested 53.0.0 (git+https://github.com/apache/datafusion?tag=53.0.0-rc2)", + "datafusion-common", + "datafusion-expr", + "datafusion-functions-nested", "indexmap 2.13.0", "log", "regex", @@ -2870,7 +2649,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -3749,7 +3528,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -3797,7 +3576,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -4964,7 +4743,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.14.0", + "itertools 0.13.0", "log", "multimap", "petgraph", @@ -4983,7 +4762,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.117", @@ -5085,7 +4864,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -5458,7 +5237,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6042,7 +5821,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix 1.1.4", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6742,7 +6521,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6837,15 +6616,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.5", -] - [[package]] name = "windows-sys" version = "0.61.2" @@ -6879,30 +6649,13 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", + "windows_i686_gnullvm", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] -[[package]] -name = "windows-targets" -version = "0.53.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" -dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.1", - "windows_aarch64_msvc 0.53.1", - "windows_i686_gnu 0.53.1", - "windows_i686_gnullvm 0.53.1", - "windows_i686_msvc 0.53.1", - "windows_x86_64_gnu 0.53.1", - "windows_x86_64_gnullvm 0.53.1", - "windows_x86_64_msvc 0.53.1", -] - [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -6915,12 +6668,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" - [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -6933,12 +6680,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" - [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -6951,24 +6692,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" - [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -6981,12 +6710,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_i686_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" - [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -6999,12 +6722,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" - [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -7017,12 +6734,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" - [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -7035,12 +6746,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" - [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index 2eb3c182cc..6b2b81a79f 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -97,7 +97,7 @@ jni = { version = "0.21", features = ["invocation"] } lazy_static = "1.4" assertables = "9" hex = "0.4.3" -datafusion-functions-nested = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-functions-nested = { git = "https://github.com/apache/datafusion", tag = "53.0.0-rc2" } [features] backtrace = ["datafusion/backtrace"] From 0de138170a614e6696d007686d3284c32167eea1 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 17 Mar 2026 08:10:32 -0400 Subject: [PATCH 12/30] Fix native test failures, clippy. --- native/core/src/execution/jni_api.rs | 2 ++ native/core/src/execution/planner.rs | 17 ++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs index 361deae182..858639b025 100644 --- a/native/core/src/execution/jni_api.rs +++ b/native/core/src/execution/jni_api.rs @@ -40,6 +40,7 @@ use datafusion::{ prelude::{SessionConfig, SessionContext}, }; use datafusion_comet_proto::spark_operator::Operator; +use datafusion_spark::function::array::repeat::SparkArrayRepeat; use datafusion_spark::function::bitwise::bit_count::SparkBitCount; use datafusion_spark::function::bitwise::bit_get::SparkBitGet; use datafusion_spark::function::bitwise::bitwise_not::SparkBitwiseNot; @@ -389,6 +390,7 @@ fn prepare_datafusion_session_context( // register UDFs from datafusion-spark crate fn register_datafusion_spark_function(session_ctx: &SessionContext) { + session_ctx.register_udf(ScalarUDF::new_from_impl(SparkArrayRepeat::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(SparkExpm1::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSha2::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(CharFunc::default())); diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 3f1245b05e..3b52d0e954 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -85,6 +85,7 @@ use datafusion::common::{ JoinType as DFJoinType, NullEquality, ScalarValue, }; use datafusion::datasource::listing::PartitionedFile; +use datafusion::logical_expr::type_coercion::functions::fields_with_udf; use datafusion::logical_expr::type_coercion::other::get_coerce_type_for_case_expression; use datafusion::logical_expr::{ AggregateUDF, ReturnFieldArgs, ScalarUDF, WindowFrame, WindowFrameBound, WindowFrameUnits, @@ -2548,15 +2549,14 @@ impl PhysicalPlanner { other => other, }; let func = self.session_ctx.udf(fun_name)?; - let coerced_types = func - .coerce_types(&input_expr_types) - .unwrap_or_else(|_| input_expr_types.clone()); - - let arg_fields = coerced_types + let input_fields: Vec<_> = input_expr_types .iter() .enumerate() .map(|(i, dt)| Arc::new(Field::new(format!("arg{i}"), dt.clone(), true))) - .collect::>(); + .collect(); + let arg_fields = fields_with_udf(&input_fields, func.as_ref())?; + let coerced_types: Vec<_> = + arg_fields.iter().map(|f| f.data_type().clone()).collect(); // TODO this should try and find scalar let arguments = args @@ -4058,6 +4058,9 @@ mod tests { #[test] fn test_array_repeat() { let session_ctx = SessionContext::new(); + session_ctx.register_udf(ScalarUDF::new_from_impl( + datafusion_spark::function::array::repeat::SparkArrayRepeat::default(), + )); let task_ctx = session_ctx.task_ctx(); let planner = PhysicalPlanner::new(Arc::from(session_ctx), 0); @@ -4175,7 +4178,7 @@ mod tests { "+--------------+", "| [0] |", "| [3, 3, 3, 3] |", - "| [] |", + "| |", "+--------------+", ]; assert_batches_eq!(expected, &[batch]); From 7257a24dc25cf81f97fde52ace6349b6967023b8 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 17 Mar 2026 08:38:52 -0400 Subject: [PATCH 13/30] Update to use object_store 0.13 in hdfs.rs. --- .../src/execution/operators/parquet_writer.rs | 1 + native/hdfs/src/object_store/hdfs.rs | 176 ++++-------------- 2 files changed, 42 insertions(+), 135 deletions(-) diff --git a/native/core/src/execution/operators/parquet_writer.rs b/native/core/src/execution/operators/parquet_writer.rs index bb2f54b2a6..8ba79098d4 100644 --- a/native/core/src/execution/operators/parquet_writer.rs +++ b/native/core/src/execution/operators/parquet_writer.rs @@ -583,6 +583,7 @@ mod tests { /// Helper function to create a test RecordBatch with 1000 rows of (int, string) data /// Example batch_id 1 -> 0..1000, 2 -> 1001..2000 + #[allow(dead_code)] fn create_test_record_batch(batch_id: i32) -> Result { assert!(batch_id > 0, "batch_id must be greater than 0"); let num_rows = batch_id * 1000; diff --git a/native/hdfs/src/object_store/hdfs.rs b/native/hdfs/src/object_store/hdfs.rs index a93774cffe..cb5a2fa5c2 100644 --- a/native/hdfs/src/object_store/hdfs.rs +++ b/native/hdfs/src/object_store/hdfs.rs @@ -31,8 +31,9 @@ use fs_hdfs::walkdir::HdfsWalkDir; use futures::{stream::BoxStream, StreamExt, TryStreamExt}; use object_store::{ path::{self, Path}, - Error, GetOptions, GetRange, GetResult, GetResultPayload, ListResult, MultipartUpload, - ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, + CopyMode, CopyOptions, Error, GetOptions, GetRange, GetResult, GetResultPayload, ListResult, + MultipartUpload, ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, PutPayload, + PutResult, Result, }; /// scheme for HDFS File System @@ -144,62 +145,6 @@ impl ObjectStore for HadoopFileSystem { unimplemented!() } - async fn get(&self, location: &Path) -> Result { - let hdfs = self.hdfs.clone(); - let hdfs_root = self.hdfs.url().to_owned(); - let location = HadoopFileSystem::path_to_filesystem(location); - - let (blob, object_metadata, range) = maybe_spawn_blocking(move || { - let file = hdfs.open(&location).map_err(to_error)?; - - let file_status = file.get_file_status().map_err(to_error)?; - - let to_read = file_status.len(); - let mut total_read = 0; - let mut buf = vec![0; to_read]; - while total_read < to_read { - let read = file.read(buf.as_mut_slice()).map_err(to_error)?; - if read <= 0 { - break; - } - total_read += read as usize; - } - - if total_read != to_read { - return Err(Error::Generic { - store: "HadoopFileSystem", - source: Box::new(HdfsErr::Generic(format!( - "Error reading path {} with expected size {} and actual size {}", - file.path(), - to_read, - total_read - ))), - }); - } - - file.close().map_err(to_error)?; - - let object_metadata = convert_metadata(file_status.clone(), &hdfs_root); - - let range = Range { - start: 0, - end: file_status.len() as u64, - }; - - Ok((buf.into(), object_metadata, range)) - }) - .await?; - - Ok(GetResult { - payload: GetResultPayload::Stream( - futures::stream::once(async move { Ok(blob) }).boxed(), - ), - meta: object_metadata, - range, - attributes: Default::default(), - }) - } - async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { if options.if_match.is_some() || options.if_none_match.is_some() { return Err(Error::Generic { @@ -249,51 +194,40 @@ impl ObjectStore for HadoopFileSystem { }) } - async fn get_range(&self, location: &Path, range: Range) -> Result { + async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { let hdfs = self.hdfs.clone(); let location = HadoopFileSystem::path_to_filesystem(location); + let ranges = ranges.to_vec(); maybe_spawn_blocking(move || { let file = hdfs.open(&location).map_err(to_error)?; - let buf = Self::read_range(&range, &file)?; + let result = ranges + .iter() + .map(|range| Self::read_range(range, &file)) + .collect::>>()?; file.close().map_err(to_error)?; - - Ok(buf) + Ok(result) }) .await } - async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { - coalesce_ranges( - ranges, - |range| self.get_range(location, range), - HDFS_COALESCE_DEFAULT, - ) - .await - } - - async fn head(&self, location: &Path) -> Result { - let hdfs = self.hdfs.clone(); - let hdfs_root = self.hdfs.url().to_owned(); - let location = HadoopFileSystem::path_to_filesystem(location); - - maybe_spawn_blocking(move || { - let file_status = hdfs.get_file_status(&location).map_err(to_error)?; - Ok(convert_metadata(file_status, &hdfs_root)) - }) - .await - } - - async fn delete(&self, location: &Path) -> Result<()> { + fn delete_stream( + &self, + locations: BoxStream<'static, Result>, + ) -> BoxStream<'static, Result> { let hdfs = self.hdfs.clone(); - let location = HadoopFileSystem::path_to_filesystem(location); - - maybe_spawn_blocking(move || { - hdfs.delete(&location, false).map_err(to_error)?; - - Ok(()) - }) - .await + locations + .map(move |location| { + let hdfs = hdfs.clone(); + maybe_spawn_blocking(move || { + let location = location?; + let fs_path = HadoopFileSystem::path_to_filesystem(&location); + hdfs.delete(&fs_path, false).map_err(to_error)?; + Ok(location) + }) + }) + .buffered(10) + .boxed() } /// List all of the leaf files under the prefix path. @@ -402,61 +336,33 @@ impl ObjectStore for HadoopFileSystem { .await } - /// Copy an object from one path to another. - /// If there exists an object at the destination, it will be overwritten. - async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> Result<()> { let hdfs = self.hdfs.clone(); let from = HadoopFileSystem::path_to_filesystem(from); let to = HadoopFileSystem::path_to_filesystem(to); maybe_spawn_blocking(move || { - // We need to make sure the source exist if !hdfs.exist(&from) { return Err(Error::NotFound { path: from.clone(), source: Box::new(HdfsErr::FileNotFound(from)), }); } - // Delete destination if exists - if hdfs.exist(&to) { - hdfs.delete(&to, false).map_err(to_error)?; - } - - fs_hdfs::util::HdfsUtil::copy(hdfs.as_ref(), &from, hdfs.as_ref(), &to) - .map_err(to_error)?; - - Ok(()) - }) - .await - } - - /// It's only allowed for the same HDFS - async fn rename(&self, from: &Path, to: &Path) -> Result<()> { - let hdfs = self.hdfs.clone(); - let from = HadoopFileSystem::path_to_filesystem(from); - let to = HadoopFileSystem::path_to_filesystem(to); - - maybe_spawn_blocking(move || { - hdfs.rename(&from, &to, true).map_err(to_error)?; - - Ok(()) - }) - .await - } - - /// Copy an object from one path to another, only if destination is empty. - /// Will return an error if the destination already has an object. - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { - let hdfs = self.hdfs.clone(); - let from = HadoopFileSystem::path_to_filesystem(from); - let to = HadoopFileSystem::path_to_filesystem(to); - maybe_spawn_blocking(move || { - if hdfs.exist(&to) { - return Err(Error::AlreadyExists { - path: from, - source: Box::new(HdfsErr::FileAlreadyExists(to)), - }); + match options.mode { + CopyMode::Overwrite => { + if hdfs.exist(&to) { + hdfs.delete(&to, false).map_err(to_error)?; + } + } + CopyMode::Create => { + if hdfs.exist(&to) { + return Err(Error::AlreadyExists { + path: from, + source: Box::new(HdfsErr::FileAlreadyExists(to)), + }); + } + } } fs_hdfs::util::HdfsUtil::copy(hdfs.as_ref(), &from, hdfs.as_ref(), &to) From 43059fdf7e4b6bca53b31fcd13100f8195e3496d Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 17 Mar 2026 09:52:45 -0400 Subject: [PATCH 14/30] Fix memory pool issues. --- .../src/execution/memory_pools/fair_pool.rs | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/native/core/src/execution/memory_pools/fair_pool.rs b/native/core/src/execution/memory_pools/fair_pool.rs index 1a98f91e49..e1f686fdc8 100644 --- a/native/core/src/execution/memory_pools/fair_pool.rs +++ b/native/core/src/execution/memory_pools/fair_pool.rs @@ -108,16 +108,21 @@ impl MemoryPool for CometFairMemoryPool { .expect("unexpected amount of unregister happened"); } - fn grow(&self, reservation: &MemoryReservation, additional: usize) { - self.try_grow(reservation, additional).unwrap(); + fn grow(&self, _reservation: &MemoryReservation, additional: usize) { + self.try_grow(_reservation, additional).unwrap(); } - fn shrink(&self, reservation: &MemoryReservation, subtractive: usize) { + fn shrink(&self, _reservation: &MemoryReservation, subtractive: usize) { if subtractive > 0 { let mut state = self.state.lock(); - let size = reservation.size(); - if size < subtractive { - panic!("Failed to release {subtractive} bytes where only {size} bytes reserved") + // We don't use reservation.size() here because DataFusion 53+ decrements + // the reservation's atomic size before calling pool.shrink(), so it would + // reflect the post-shrink value rather than the pre-shrink value. + if state.used < subtractive { + panic!( + "Failed to release {subtractive} bytes where only {} bytes tracked by pool", + state.used + ) } self.release(subtractive) .unwrap_or_else(|_| panic!("Failed to release {subtractive} bytes")); @@ -127,7 +132,7 @@ impl MemoryPool for CometFairMemoryPool { fn try_grow( &self, - reservation: &MemoryReservation, + _reservation: &MemoryReservation, additional: usize, ) -> Result<(), DataFusionError> { if additional > 0 { @@ -137,10 +142,13 @@ impl MemoryPool for CometFairMemoryPool { .pool_size .checked_div(num) .expect("overflow in checked_div"); - let size = reservation.size(); - if limit < size + additional { + // We use state.used instead of reservation.size() because DataFusion 53+ + // calls pool.try_grow() before incrementing the reservation's atomic size, + // so reservation.size() would not include prior grows. + let used = state.used; + if limit < used + additional { return resources_err!( - "Failed to acquire {additional} bytes where {size} bytes already reserved and the fair limit is {limit} bytes, {num} registered" + "Failed to acquire {additional} bytes where {used} bytes already reserved and the fair limit is {limit} bytes, {num} registered" ); } From 1794cfda25cdd1f0d1060ae2bbede2d65a0ef0dc Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 17 Mar 2026 14:24:37 -0400 Subject: [PATCH 15/30] Fix type coercion for Utf8View stuff. --- native/core/src/execution/planner.rs | 85 +++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 7 deletions(-) diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 3b52d0e954..11fd7be3f6 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -88,8 +88,8 @@ use datafusion::datasource::listing::PartitionedFile; use datafusion::logical_expr::type_coercion::functions::fields_with_udf; use datafusion::logical_expr::type_coercion::other::get_coerce_type_for_case_expression; use datafusion::logical_expr::{ - AggregateUDF, ReturnFieldArgs, ScalarUDF, WindowFrame, WindowFrameBound, WindowFrameUnits, - WindowFunctionDefinition, + AggregateUDF, ReturnFieldArgs, ScalarUDF, TypeSignature, WindowFrame, WindowFrameBound, + WindowFrameUnits, WindowFunctionDefinition, }; use datafusion::physical_expr::expressions::{Literal, StatsType}; use datafusion::physical_expr::window::WindowExpr; @@ -2549,14 +2549,44 @@ impl PhysicalPlanner { other => other, }; let func = self.session_ctx.udf(fun_name)?; - let input_fields: Vec<_> = input_expr_types + + // Type coercion strategy: + // + // In DF52, Comet used coerce_types() which returns NotImplemented + // for most UDFs, so input types were kept unchanged. In DF53, + // fields_with_udf() runs full coercion which aggressively promotes + // types (e.g. Utf8 to Utf8View via Variadic signatures, Int32 to Int64 + // via Exact signatures). This breaks Comet's native implementations. + // + // Strategy: + // 1. Try coerce_types() — only UDFs that explicitly implement it + // will return Ok. Same as DF52 behavior. + // 2. For "well-supported" signatures (Coercible, String, Numeric, + // Comparable), use fields_with_udf(). These preserve input types + // (e.g. Utf8 stays Utf8, not promoted to Utf8View). + // 3. For all other signatures (Variadic, Exact, etc.), keep original + // types unchanged. Same as DF52 behavior. + let coerced_types = match func.coerce_types(&input_expr_types) { + Ok(types) => types, + Err(_) if needs_fields_coercion(&func.signature().type_signature) => { + let input_fields: Vec<_> = input_expr_types + .iter() + .enumerate() + .map(|(i, dt)| { + Arc::new(Field::new(format!("arg{i}"), dt.clone(), true)) + }) + .collect(); + let arg_fields = fields_with_udf(&input_fields, func.as_ref())?; + arg_fields.iter().map(|f| f.data_type().clone()).collect() + } + Err(_) => input_expr_types.clone(), + }; + + let arg_fields: Vec<_> = coerced_types .iter() .enumerate() .map(|(i, dt)| Arc::new(Field::new(format!("arg{i}"), dt.clone(), true))) .collect(); - let arg_fields = fields_with_udf(&input_fields, func.as_ref())?; - let coerced_types: Vec<_> = - arg_fields.iter().map(|f| f.data_type().clone()).collect(); // TODO this should try and find scalar let arguments = args @@ -2612,10 +2642,33 @@ impl PhysicalPlanner { fun_name, fun_expr, args.to_vec(), - Arc::new(Field::new(fun_name, data_type, true)), + Arc::new(Field::new(fun_name, data_type.clone(), true)), Arc::new(ConfigOptions::default()), )); + // DF53 changed some UDFs (e.g. md5) to return StringViewArray at execution + // time (apache/datafusion#20045). Comet does not yet support view types, so + // cast the result back to the non-view variant. + let scalar_expr = match data_type { + DataType::Utf8View => Arc::new(CastExpr::new( + scalar_expr, + DataType::Utf8, + Some(CastOptions { + safe: false, + ..Default::default() + }), + )) as Arc, + DataType::BinaryView => Arc::new(CastExpr::new( + scalar_expr, + DataType::Binary, + Some(CastOptions { + safe: false, + ..Default::default() + }), + )) as Arc, + _ => scalar_expr, + }; + Ok(scalar_expr) } @@ -3594,6 +3647,24 @@ fn extract_literal_as_datum(expr: &spark_expression::Expr) -> Option bool { + match sig { + TypeSignature::Coercible(_) + | TypeSignature::String(_) + | TypeSignature::Numeric(_) + | TypeSignature::Comparable(_) => true, + TypeSignature::OneOf(sigs) => sigs.iter().any(needs_fields_coercion), + _ => false, + } +} + #[cfg(test)] mod tests { use futures::{poll, StreamExt}; From 6cfe19041fbf7590d5445df2df5ea670a501cb47 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 14:58:42 -0400 Subject: [PATCH 16/30] Bump to released crates. --- native/Cargo.lock | 496 ++++++++++++++++++++++++++--------------- native/Cargo.toml | 12 +- native/core/Cargo.toml | 2 +- 3 files changed, 328 insertions(+), 182 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 70e59fe96d..3a5d5445f2 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -135,9 +135,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.2" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" +checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6" dependencies = [ "rustversion", ] @@ -162,9 +162,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602268ce9f569f282cedb9a9f6bac569b680af47b9b077d515900c03c5d190da" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" dependencies = [ "arrow-arith", "arrow-array", @@ -183,9 +183,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd53c6bf277dea91f136ae8e3a5d7041b44b5e489e244e637d00ae302051f56f" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" dependencies = [ "arrow-array", "arrow-buffer", @@ -197,9 +197,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e53796e07a6525edaf7dc28b540d477a934aff14af97967ad1d5550878969b9e" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" dependencies = [ "ahash", "arrow-buffer", @@ -216,9 +216,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2c1a85bb2e94ee10b76531d8bc3ce9b7b4c0d508cabfb17d477f63f2617bd20" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" dependencies = [ "bytes", "half", @@ -228,9 +228,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89fb245db6b0e234ed8e15b644edb8664673fefe630575e94e62cd9d489a8a26" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" dependencies = [ "arrow-array", "arrow-buffer", @@ -250,9 +250,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d374882fb465a194462527c0c15a93aa19a554cf690a6b77a26b2a02539937a7" +checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" dependencies = [ "arrow-array", "arrow-cast", @@ -265,9 +265,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "189d210bc4244c715fa3ed9e6e22864673cccb73d5da28c2723fb2e527329b33" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -278,9 +278,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7968c2e5210c41f4909b2ef76f6e05e172b99021c2def5edf3cc48fdd39d1d6c" +checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -288,14 +288,14 @@ dependencies = [ "arrow-schema", "arrow-select", "flatbuffers", - "lz4_flex", + "lz4_flex 0.13.0", ] [[package]] name = "arrow-json" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92111dba5bf900f443488e01f00d8c4ddc2f47f5c50039d18120287b580baa22" +checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" dependencies = [ "arrow-array", "arrow-buffer", @@ -317,9 +317,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "211136cb253577ee1a6665f741a13136d4e563f64f5093ffd6fb837af90b9495" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" dependencies = [ "arrow-array", "arrow-buffer", @@ -330,9 +330,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e0f20145f9f5ea3fe383e2ba7a7487bf19be36aa9dbf5dd6a1f92f657179663" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" dependencies = [ "arrow-array", "arrow-buffer", @@ -343,9 +343,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b47e0ca91cc438d2c7879fe95e0bca5329fff28649e30a88c6f760b1faeddcb" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" dependencies = [ "bitflags 2.11.0", "serde_core", @@ -354,9 +354,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "750a7d1dda177735f5e82a314485b6915c7cccdbb278262ac44090f4aba4a325" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" dependencies = [ "ahash", "arrow-array", @@ -368,9 +368,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1eab1208bc4fe55d768cdc9b9f3d9df5a794cdb3ee2586bf89f9b30dc31ad8c" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" dependencies = [ "arrow-array", "arrow-buffer", @@ -600,9 +600,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.16.1" +version = "1.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" dependencies = [ "aws-lc-sys", "zeroize", @@ -610,9 +610,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.38.0" +version = "0.39.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" +checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399" dependencies = [ "cc", "cmake", @@ -647,9 +647,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.96.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f64a6eded248c6b453966e915d32aeddb48ea63ad17932682774eb026fbef5b1" +checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567" dependencies = [ "aws-credential-types", "aws-runtime", @@ -671,9 +671,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.98.0" +version = "1.99.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db96d720d3c622fcbe08bae1c4b04a72ce6257d8b0584cb5418da00ae20a344f" +checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8" dependencies = [ "aws-credential-types", "aws-runtime", @@ -695,9 +695,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.100.0" +version = "1.101.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fafbdda43b93f57f699c5dfe8328db590b967b8a820a13ccdd6687355dfcc7ca" +checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1017,7 +1017,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "shlex", "syn 2.0.117", ] @@ -1045,16 +1045,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.3" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", - "cpufeatures 0.2.17", + "cpufeatures 0.3.0", ] [[package]] @@ -1204,9 +1204,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.57" +version = "1.2.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" +checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" dependencies = [ "find-msvc-tools", "jobserver", @@ -1351,9 +1351,9 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "cmake" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" dependencies = [ "cc", ] @@ -1693,7 +1693,8 @@ dependencies = [ [[package]] name = "datafusion" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de9f8117889ba9503440f1dd79ebab32ba52ccf1720bb83cd718a29d4edc0d16" dependencies = [ "arrow", "arrow-schema", @@ -1728,7 +1729,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.1", + "object_store 0.13.2", "parking_lot", "parquet", "rand 0.9.2", @@ -1743,7 +1744,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be893b73a13671f310ffcc8da2c546b81efcc54c22e0382c0a28aa3537017137" dependencies = [ "arrow", "async-trait", @@ -1759,7 +1761,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.1", + "object_store 0.13.2", "parking_lot", "tokio", ] @@ -1767,7 +1769,8 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830487b51ed83807d6b32d6325f349c3144ae0c9bf772cf2a712db180c31d5e6" dependencies = [ "arrow", "async-trait", @@ -1783,7 +1786,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.1", + "object_store 0.13.2", ] [[package]] @@ -1817,10 +1820,10 @@ dependencies = [ "lazy_static", "log", "log4rs", - "lz4_flex", + "lz4_flex 0.12.1", "mimalloc", "num", - "object_store 0.13.1", + "object_store 0.13.2", "object_store_opendal", "once_cell", "opendal", @@ -1871,7 +1874,7 @@ dependencies = [ "datafusion-comet-fs-hdfs3", "fs-hdfs3", "futures", - "object_store 0.13.1", + "object_store 0.13.2", "tokio", ] @@ -1908,7 +1911,8 @@ dependencies = [ [[package]] name = "datafusion-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d7663f3af955292f8004e74bcaf8f7ea3d66cc38438749615bb84815b61a293" dependencies = [ "ahash", "arrow", @@ -1921,7 +1925,7 @@ dependencies = [ "itertools 0.14.0", "libc", "log", - "object_store 0.13.1", + "object_store 0.13.2", "parquet", "paste", "sqlparser", @@ -1932,7 +1936,8 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f590205c7e32fe1fea48dd53ffb406e56ae0e7a062213a3ac848db8771641bd" dependencies = [ "futures", "log", @@ -1942,7 +1947,8 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fde1e030a9dc87b743c806fbd631f5ecfa2ccaa4ffb61fa19144a07fea406b79" dependencies = [ "arrow", "async-compression", @@ -1965,7 +1971,7 @@ dependencies = [ "itertools 0.14.0", "liblzma", "log", - "object_store 0.13.1", + "object_store 0.13.2", "rand 0.9.2", "tokio", "tokio-util", @@ -1976,7 +1982,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331ebae7055dc108f9b54994b93dff91f3a17445539efe5b74e89264f7b36e15" dependencies = [ "arrow", "arrow-ipc", @@ -1992,14 +1999,15 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store 0.13.1", + "object_store 0.13.2", "tokio", ] [[package]] name = "datafusion-datasource-csv" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e0d475088325e2986876aa27bb30d0574f72a22955a527d202f454681d55c5c" dependencies = [ "arrow", "async-trait", @@ -2013,7 +2021,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.13.1", + "object_store 0.13.2", "regex", "tokio", ] @@ -2021,7 +2029,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea1520d81f31770f3ad6ee98b391e75e87a68a5bb90de70064ace5e0a7182fe8" dependencies = [ "arrow", "async-trait", @@ -2035,7 +2044,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.13.1", + "object_store 0.13.2", "serde_json", "tokio", "tokio-stream", @@ -2044,7 +2053,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95be805d0742ab129720f4c51ad9242cd872599cdb076098b03f061fcdc7f946" dependencies = [ "arrow", "async-trait", @@ -2064,7 +2074,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.1", + "object_store 0.13.2", "parking_lot", "parquet", "tokio", @@ -2073,12 +2083,14 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c93ad9e37730d2c7196e68616f3f2dd3b04c892e03acd3a8eeca6e177f3c06a" [[package]] name = "datafusion-execution" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9437d3cd5d363f9319f8122182d4d233427de79c7eb748f23054c9aaa0fdd8df" dependencies = [ "arrow", "arrow-buffer", @@ -2090,7 +2102,7 @@ dependencies = [ "datafusion-physical-expr-common", "futures", "log", - "object_store 0.13.1", + "object_store 0.13.2", "parking_lot", "parquet", "rand 0.9.2", @@ -2101,7 +2113,8 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67164333342b86521d6d93fa54081ee39839894fb10f7a700c099af96d7552cf" dependencies = [ "arrow", "async-trait", @@ -2122,7 +2135,8 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9" dependencies = [ "arrow", "datafusion-common", @@ -2134,7 +2148,8 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04fb863482d987cf938db2079e07ab0d3bb64595f28907a6c2f8671ad71cca7e" dependencies = [ "arrow", "arrow-buffer", @@ -2165,7 +2180,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829856f4e14275fb376c104f27cbf3c3b57a9cfe24885d98677525f5e43ce8d6" dependencies = [ "ahash", "arrow", @@ -2186,7 +2202,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08af79cc3d2aa874a362fb97decfcbd73d687190cb096f16a6c85a7780cce311" dependencies = [ "ahash", "arrow", @@ -2198,7 +2215,8 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "465ae3368146d49c2eda3e2c0ef114424c87e8a6b509ab34c1026ace6497e790" dependencies = [ "arrow", "arrow-ord", @@ -2222,7 +2240,8 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6156e6b22fcf1784112fc0173f3ae6e78c8fdb4d3ed0eace9543873b437e2af6" dependencies = [ "arrow", "async-trait", @@ -2237,7 +2256,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca7baec14f866729012efb89011a6973f3a346dc8090c567bfcd328deff551c1" dependencies = [ "arrow", "datafusion-common", @@ -2254,7 +2274,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "159228c3280d342658466bb556dc24de30047fe1d7e559dc5d16ccc5324166f9" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2263,7 +2284,8 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5427e5da5edca4d21ea1c7f50e1c9421775fe33d7d5726e5641a833566e7578" dependencies = [ "datafusion-doc", "quote", @@ -2273,7 +2295,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89099eefcd5b223ec685c36a41d35c69239236310d71d339f2af0fa4383f3f46" dependencies = [ "arrow", "chrono", @@ -2291,7 +2314,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f222df5195d605d79098ef37bdd5323bff0131c9d877a24da6ec98dfca9fe36" dependencies = [ "ahash", "arrow", @@ -2313,7 +2337,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40838625d63d9c12549d81979db3dd675d159055eb9135009ba272ab0e8d0f64" dependencies = [ "arrow", "datafusion-common", @@ -2327,7 +2352,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eacbcc4cfd502558184ed58fa3c72e775ec65bf077eef5fd2b3453db676f893c" dependencies = [ "ahash", "arrow", @@ -2343,7 +2369,8 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d501d0e1d0910f015677121601ac177ec59272ef5c9324d1147b394988f40941" dependencies = [ "arrow", "datafusion-common", @@ -2360,7 +2387,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "463c88ad6f1ecab1810f4c9f046898bee035b370137eb79b2b2db925e270631d" dependencies = [ "ahash", "arrow", @@ -2391,7 +2419,8 @@ dependencies = [ [[package]] name = "datafusion-pruning" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2857618a0ecbd8cd0cf29826889edd3a25774ec26b2995fc3862095c95d88fc6" dependencies = [ "arrow", "datafusion-common", @@ -2407,7 +2436,8 @@ dependencies = [ [[package]] name = "datafusion-session" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef8637e35022c5c775003b3ab1debc6b4a8f0eb41b069bdd5475dd3aa93f6eba" dependencies = [ "async-trait", "datafusion-common", @@ -2420,7 +2450,8 @@ dependencies = [ [[package]] name = "datafusion-spark" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "923a8b871962a9d860f036f743a20af50ff04729f1da2468ed220dab4f61c97d" dependencies = [ "arrow", "bigdecimal", @@ -2446,7 +2477,8 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "53.0.0" -source = "git+https://github.com/apache/datafusion?tag=53.0.0-rc2#28fc91a2a5a8ede5797fde04527f76f351c3cd8b" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12d9e9f16a1692a11c94bcc418191fa15fd2b4d72a0c1a0c607db93c0b84dd81" dependencies = [ "arrow", "bigdecimal", @@ -2649,7 +2681,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3167,9 +3199,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hyper" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" dependencies = [ "atomic-waker", "bytes", @@ -3181,7 +3213,6 @@ dependencies = [ "httparse", "itoa", "pin-project-lite", - "pin-utils", "smallvec", "tokio", "want", @@ -3497,9 +3528,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "inventory" -version = "0.3.22" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227" +checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" dependencies = [ "rustversion", ] @@ -3512,9 +3543,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.10" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" dependencies = [ "memchr", "serde", @@ -3528,7 +3559,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3551,9 +3582,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "java-locator" @@ -3576,7 +3607,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3615,7 +3646,7 @@ dependencies = [ "cfg-if", "combine", "java-locator", - "jni-sys", + "jni-sys 0.3.1", "libloading 0.7.4", "log", "thiserror 1.0.69", @@ -3625,9 +3656,31 @@ dependencies = [ [[package]] name = "jni-sys" -version = "0.3.0" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" +dependencies = [ + "jni-sys 0.4.1", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.117", +] [[package]] name = "jobserver" @@ -3641,10 +3694,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.91" +version = "0.3.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +checksum = "797146bb2677299a1eb6b7b50a890f4c361b29ef967addf5b2fa45dae1bb6d7d" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] @@ -3906,6 +3961,15 @@ dependencies = [ "twox-hash", ] +[[package]] +name = "lz4_flex" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db9a0d582c2874f68138a16ce1867e0ffde6c0bb0a0df85e1f36d04146db488a" +dependencies = [ + "twox-hash", +] + [[package]] name = "md-5" version = "0.10.6" @@ -3958,9 +4022,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", @@ -3975,9 +4039,9 @@ checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" [[package]] name = "moka" -version = "0.12.14" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85f8024e1c8e71c778968af91d43700ce1d11b219d127d79fb2934153b82b42b" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" dependencies = [ "async-lock", "crossbeam-channel", @@ -4078,9 +4142,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" [[package]] name = "num-format" @@ -4168,16 +4232,18 @@ dependencies = [ [[package]] name = "object_store" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2858065e55c148d294a9f3aae3b0fa9458edadb41a108397094566f4e3c0dfb" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" dependencies = [ "async-trait", "base64", "bytes", "chrono", "form_urlencoded", - "futures", + "futures-channel", + "futures-core", + "futures-util", "http 1.4.0", "http-body-util", "httparse", @@ -4187,8 +4253,8 @@ dependencies = [ "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.4", - "rand 0.9.2", + "quick-xml 0.39.2", + "rand 0.10.0", "reqwest", "ring", "rustls-pki-types", @@ -4333,9 +4399,9 @@ dependencies = [ [[package]] name = "parquet" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f491d0ef1b510194426ee67ddc18a9b747ef3c42050c19322a2cd2e1666c29b" +checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" dependencies = [ "ahash", "arrow-array", @@ -4352,11 +4418,11 @@ dependencies = [ "futures", "half", "hashbrown 0.16.1", - "lz4_flex", + "lz4_flex 0.13.0", "num-bigint", "num-integer", "num-traits", - "object_store 0.13.1", + "object_store 0.13.2", "parquet-variant", "parquet-variant-compute", "parquet-variant-json", @@ -4373,9 +4439,9 @@ dependencies = [ [[package]] name = "parquet-variant" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00ba4e5dcbc8ad65882b7337a95c12a0f9cbb6add237c53d93b803b7d7f70f02" +checksum = "2bf493f3c9ddd984d0efb019f67343e4aa4bab893931f6a14b82083065dc3d28" dependencies = [ "arrow-schema", "chrono", @@ -4387,9 +4453,9 @@ dependencies = [ [[package]] name = "parquet-variant-compute" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ec4cfb8da15565c8d211b6bc51e8eb481ea65d19132462af3f948b150ac8efe" +checksum = "6ac038d46a503a7d563b4f5df5802c4315d5343d009feab195d15ac512b4cb27" dependencies = [ "arrow", "arrow-schema", @@ -4404,9 +4470,9 @@ dependencies = [ [[package]] name = "parquet-variant-json" -version = "58.0.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3668ff00a6aeb29d172ba15f9d8fedf1675d79bff7d1916daa333efdeaa13e46" +checksum = "015a09c2ffe5108766c7c1235c307b8a3c2ea64eca38455ba1a7f3a7f32f16e2" dependencies = [ "arrow-schema", "base64", @@ -4743,7 +4809,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "log", "multimap", "petgraph", @@ -4762,7 +4828,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.117", @@ -4812,6 +4878,16 @@ dependencies = [ "serde", ] +[[package]] +name = "quick-xml" +version = "0.39.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "958f21e8e7ceb5a1aa7fa87fab28e7c75976e0bfe7e23ff069e0a260f894067d" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quinn" version = "0.11.9" @@ -4823,7 +4899,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "rustls", "socket2", "thiserror 2.0.18", @@ -4843,7 +4919,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "rustls", "rustls-pki-types", "slab", @@ -4864,7 +4940,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -5201,9 +5277,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustc_version" @@ -5237,7 +5313,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5279,9 +5355,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.9" +version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "aws-lc-rs", "ring", @@ -5593,9 +5669,9 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] name = "simdutf8" @@ -5742,9 +5818,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "symbolic-common" -version = "12.17.2" +version = "12.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "751a2823d606b5d0a7616499e4130a516ebd01a44f39811be2b9600936509c23" +checksum = "52ca086c1eb5c7ee74b151ba83c6487d5d33f8c08ad991b86f3f58f6629e68d5" dependencies = [ "debugid", "memmap2", @@ -5754,9 +5830,9 @@ dependencies = [ [[package]] name = "symbolic-demangle" -version = "12.17.2" +version = "12.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79b237cfbe320601dd24b4ac817a5b68bb28f5508e33f08d42be0682cadc8ac9" +checksum = "baa911a28a62823aaf2cc2e074212492a3ee69d0d926cc8f5b12b4a108ff5c0c" dependencies = [ "cpp_demangle", "rustc-demangle", @@ -5821,7 +5897,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix 1.1.4", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6218,9 +6294,9 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" @@ -6281,9 +6357,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.22.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -6354,9 +6430,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.114" +version = "0.2.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +checksum = "7dc0882f7b5bb01ae8c5215a1230832694481c1a4be062fd410e12ea3da5b631" dependencies = [ "cfg-if", "once_cell", @@ -6367,23 +6443,19 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.64" +version = "0.4.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +checksum = "19280959e2844181895ef62f065c63e0ca07ece4771b53d89bfdb967d97cbf05" dependencies = [ - "cfg-if", - "futures-util", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.114" +version = "0.2.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +checksum = "75973d3066e01d035dbedaad2864c398df42f8dd7b1ea057c35b8407c015b537" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6391,9 +6463,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.114" +version = "0.2.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +checksum = "91af5e4be765819e0bcfee7322c14374dc821e35e72fa663a830bbc7dc199eac" dependencies = [ "bumpalo", "proc-macro2", @@ -6404,9 +6476,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.114" +version = "0.2.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +checksum = "c9bf0406a78f02f336bf1e451799cca198e8acde4ffa278f0fb20487b150a633" dependencies = [ "unicode-ident", ] @@ -6460,9 +6532,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.91" +version = "0.3.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +checksum = "749466a37ee189057f54748b200186b59a03417a117267baf3fd89cecc9fb837" dependencies = [ "js-sys", "wasm-bindgen", @@ -6521,7 +6593,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6616,6 +6688,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -6649,13 +6730,30 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -6668,6 +6766,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -6680,6 +6784,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -6692,12 +6802,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -6710,6 +6832,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -6722,6 +6850,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -6734,6 +6868,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -6746,6 +6886,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "wit-bindgen" version = "0.51.0" @@ -6871,18 +7017,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.42" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.42" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", diff --git a/native/Cargo.toml b/native/Cargo.toml index c617a4dfa7..05f43c7ec4 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -34,14 +34,14 @@ edition = "2021" rust-version = "1.88" [workspace.dependencies] -arrow = { version = "58.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow = { version = "58.1.0", features = ["prettyprint", "ffi", "chrono-tz"] } async-trait = { version = "0.1" } bytes = { version = "1.11.1" } -parquet = { version = "58.0.0", default-features = false, features = ["experimental"] } -datafusion = { git = "https://github.com/apache/datafusion", tag = "53.0.0-rc2", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } -datafusion-datasource = { git = "https://github.com/apache/datafusion", tag = "53.0.0-rc2" } -datafusion-physical-expr-adapter = { git = "https://github.com/apache/datafusion", tag = "53.0.0-rc2" } -datafusion-spark = { git = "https://github.com/apache/datafusion", tag = "53.0.0-rc2", features = ["core"] } +parquet = { version = "58.1.0", default-features = false, features = ["experimental"] } +datafusion = { version = "53.0.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } +datafusion-datasource = { version = "53.0.0" } +datafusion-physical-expr-adapter = { version = "53.0.0" } +datafusion-spark = { version = "53.0.0", features = ["core"] } datafusion-comet-spark-expr = { path = "spark-expr" } datafusion-comet-proto = { path = "proto" } chrono = { version = "0.4", default-features = false, features = ["clock"] } diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index 6b2b81a79f..f4786ba8ed 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -97,7 +97,7 @@ jni = { version = "0.21", features = ["invocation"] } lazy_static = "1.4" assertables = "9" hex = "0.4.3" -datafusion-functions-nested = { git = "https://github.com/apache/datafusion", tag = "53.0.0-rc2" } +datafusion-functions-nested = { version = "53.0.0" } [features] backtrace = ["datafusion/backtrace"] From 3b40b81938bc18e66e80e7bddc465f929b4d1150 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 15:04:15 -0400 Subject: [PATCH 17/30] bump to iceberg-rust main commit with df53 --- native/Cargo.lock | 78 +++++++++++++++++++++++++++++++++++++++++++++-- native/Cargo.toml | 4 +-- 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 3a5d5445f2..ac391369c4 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -17,6 +17,16 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + [[package]] name = "aes" version = "0.8.4" @@ -28,6 +38,20 @@ dependencies = [ "cpufeatures 0.2.17", ] +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "ahash" version = "0.8.12" @@ -1583,6 +1607,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", + "rand_core 0.6.4", "typenum", ] @@ -1607,6 +1632,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + [[package]] name = "darling" version = "0.20.11" @@ -2983,6 +3017,16 @@ dependencies = [ "wasip3", ] +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "gimli" version = "0.32.3" @@ -3286,8 +3330,9 @@ dependencies = [ [[package]] name = "iceberg" version = "0.9.0" -source = "git+https://github.com/mbutrovich/iceberg-rust?branch=df53-upgrade#934d88a6c21e4389c380397485a35365c77a8a02" +source = "git+https://github.com/apache/iceberg-rust?rev=477a1e5#477a1e525b4915895388a4f45557b825ea541ef2" dependencies = [ + "aes-gcm", "anyhow", "apache-avro", "array-init", @@ -3333,13 +3378,14 @@ dependencies = [ "typetag", "url", "uuid", + "zeroize", "zstd", ] [[package]] name = "iceberg-storage-opendal" version = "0.9.0" -source = "git+https://github.com/mbutrovich/iceberg-rust?branch=df53-upgrade#934d88a6c21e4389c380397485a35365c77a8a02" +source = "git+https://github.com/apache/iceberg-rust?rev=477a1e5#477a1e525b4915895388a4f45557b825ea541ef2" dependencies = [ "anyhow", "async-trait", @@ -4298,6 +4344,12 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "opendal" version = "0.55.0" @@ -4688,6 +4740,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -6310,6 +6374,16 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "unsafe-any-ors" version = "1.0.0" diff --git a/native/Cargo.toml b/native/Cargo.toml index 05f43c7ec4..996abd6d61 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -55,8 +55,8 @@ object_store = { version = "0.13.1", features = ["gcp", "azure", "aws", "http"] url = "2.2" aws-config = "1.8.14" aws-credential-types = "1.2.13" -iceberg = { git = "https://github.com/mbutrovich/iceberg-rust", branch = "df53-upgrade" } -iceberg-storage-opendal = { git = "https://github.com/mbutrovich/iceberg-rust", branch = "df53-upgrade", features = ["opendal-all"] } +iceberg = { git = "https://github.com/apache/iceberg-rust", rev = "477a1e5" } +iceberg-storage-opendal = { git = "https://github.com/apache/iceberg-rust", rev = "477a1e5", features = ["opendal-all"] } [profile.release] debug = true From a359213c2b2cf585eb0d1ef362e906cb2ade8dbe Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 15:33:27 -0400 Subject: [PATCH 18/30] putting missing file back --- .claude/skills/review-comet-pr/SKILL.md | 297 ++++++++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 .claude/skills/review-comet-pr/SKILL.md diff --git a/.claude/skills/review-comet-pr/SKILL.md b/.claude/skills/review-comet-pr/SKILL.md new file mode 100644 index 0000000000..cf87bcfc87 --- /dev/null +++ b/.claude/skills/review-comet-pr/SKILL.md @@ -0,0 +1,297 @@ +--- +name: review-comet-pr +description: Review a DataFusion Comet pull request for Spark compatibility and implementation correctness. Provides guidance to a reviewer rather than posting comments directly. +argument-hint: +--- + +Review Comet PR #$ARGUMENTS + +## Before You Start + +### Gather PR Metadata + +Fetch the PR details to understand the scope: + +```bash +gh pr view $ARGUMENTS --repo apache/datafusion-comet --json title,body,author,isDraft,state,files +``` + +### Review Existing Comments First + +Before forming your review: + +1. **Read all existing review comments** on the PR +2. **Check the conversation tab** for any discussion +3. **Avoid duplicating feedback** that others have already provided +4. **Build on existing discussions** rather than starting new threads on the same topic +5. **If you have no additional concerns beyond what's already discussed, say so** +6. **Ignore Copilot reviews** - do not reference or build upon comments from GitHub Copilot + +```bash +# View existing comments on a PR +gh pr view $ARGUMENTS --repo apache/datafusion-comet --comments +``` + +--- + +## Review Workflow + +### 1. Gather Context + +Read the changed files and understand the area of the codebase being modified: + +```bash +# View the diff +gh pr diff $ARGUMENTS --repo apache/datafusion-comet +``` + +For expression PRs, check how similar expressions are implemented in the codebase. Look at the serde files in `spark/src/main/scala/org/apache/comet/serde/` and Rust implementations in `native/spark-expr/src/`. + +### 2. Read Spark Source (Expression PRs) + +**For any PR that adds or modifies an expression, you must read the Spark source code to understand the canonical behavior.** This is the authoritative reference for what Comet must match. + +1. **Clone or update the Spark repo:** + + ```bash + # Clone if not already present (use /tmp to avoid polluting the workspace) + if [ ! -d /tmp/spark ]; then + git clone --depth 1 https://github.com/apache/spark.git /tmp/spark + fi + ``` + +2. **Find the expression implementation in Spark:** + + ```bash + # Search for the expression class (e.g., for "Conv", "Hex", "Substring") + find /tmp/spark/sql/catalyst/src/main/scala -name "*.scala" | xargs grep -l "case class " + ``` + +3. **Read the Spark implementation carefully.** Pay attention to: + - The `eval` and `doGenEval`/`nullSafeEval` methods. These define the exact behavior. + - The `inputTypes` and `dataType` fields. These define which types Spark accepts and what it returns. + - Null handling. Does it use `nullable = true`? Does `nullSafeEval` handle nulls implicitly? + - Special cases, guards, and `require` assertions. + - ANSI mode branches (look for `SQLConf.get.ansiEnabled` or `failOnError`). + +4. **Read the Spark tests for the expression:** + + ```bash + # Find test files + find /tmp/spark/sql -name "*.scala" -path "*/test/*" | xargs grep -l "" + ``` + +5. **Compare the Spark behavior against the Comet implementation in the PR.** Identify: + - Edge cases tested in Spark but not in the PR + - Data types supported in Spark but not handled in the PR + - Behavioral differences that should be marked `Incompatible` + +6. **Suggest additional tests** for any edge cases or type combinations covered in Spark's tests that are missing from the PR's tests. + +### 3. Spark Compatibility Check + +**This is the most critical aspect of Comet reviews.** Comet must produce identical results to Spark. + +For expression PRs, verify against the Spark source you read in step 2: + +1. **Check edge cases** + - Null handling + - Overflow behavior + - Empty input behavior + - Type-specific behavior + +2. **Verify all data types are handled** + - Does Spark support this type? (Check `inputTypes` in Spark source) + - Does the PR handle all Spark-supported types? + +3. **Check for ANSI mode differences** + - Spark behavior may differ between legacy and ANSI modes + - PR should handle both or mark as `Incompatible` + +### 4. Check Against Implementation Guidelines + +**Always verify PRs follow the implementation guidelines.** + +#### Scala Serde (`spark/src/main/scala/org/apache/comet/serde/`) + +- [ ] Expression class correctly identified +- [ ] All child expressions converted via `exprToProtoInternal` +- [ ] Return type correctly serialized +- [ ] `getSupportLevel` reflects true compatibility: + - `Compatible()` - matches Spark exactly + - `Incompatible(Some("reason"))` - differs in documented ways + - `Unsupported(Some("reason"))` - cannot be implemented +- [ ] Serde in appropriate file (`datetime.scala`, `strings.scala`, `arithmetic.scala`, etc.) + +#### Registration (`QueryPlanSerde.scala`) + +- [ ] Added to correct map (temporal, string, arithmetic, etc.) +- [ ] No duplicate registrations +- [ ] Import statement added + +#### Rust Implementation (if applicable) + +Location: `native/spark-expr/src/` + +- [ ] Matches DataFusion and Arrow conventions +- [ ] Null handling is correct +- [ ] No panics. Use `Result` types. +- [ ] Efficient array operations (avoid row-by-row) + +#### Tests - Prefer SQL File-Based Framework + +**Expression tests should use the SQL file-based framework (`CometSqlFileTestSuite`) where possible.** This framework automatically runs each query through both Spark and Comet and compares results. No Scala code is needed. Only fall back to Scala tests in `CometExpressionSuite` when the SQL framework cannot express the test. Examples include complex `DataFrame` setup, programmatic data generation, or non-expression tests. + +**SQL file test location:** `spark/src/test/resources/sql-tests/expressions//` + +Categories include: `aggregate/`, `array/`, `string/`, `math/`, `struct/`, `map/`, `datetime/`, `hash/`, etc. + +**SQL file structure:** + +```sql +-- ConfigMatrix: parquet.enable.dictionary=false,true + +-- Create test data +statement +CREATE TABLE test_crc32(col string, a int, b float) USING parquet + +statement +INSERT INTO test_crc32 VALUES ('Spark', 10, 1.5), (NULL, NULL, NULL), ('', 0, 0.0) + +-- Default mode: verifies native Comet execution + result matches Spark +query +SELECT crc32(col) FROM test_crc32 + +-- spark_answer_only: compares results without requiring native execution +query spark_answer_only +SELECT crc32(cast(a as string)) FROM test_crc32 + +-- tolerance: allows numeric variance for floating-point results +query tolerance=0.0001 +SELECT cos(v) FROM test_trig + +-- expect_fallback: asserts fallback to Spark occurs +query expect_fallback(unsupported expression) +SELECT unsupported_func(v) FROM test_table + +-- expect_error: verifies both engines throw matching exceptions +query expect_error(ARITHMETIC_OVERFLOW) +SELECT 2147483647 + 1 + +-- ignore: skip queries with known bugs (include GitHub issue link) +query ignore(https://github.com/apache/datafusion-comet/issues/NNNN) +SELECT known_buggy_expr(v) FROM test_table +``` + +**Running SQL file tests:** + +```bash +# All SQL file tests +./mvnw test -Dsuites="org.apache.comet.CometSqlFileTestSuite" -Dtest=none + +# Specific test file (substring match) +./mvnw test -Dsuites="org.apache.comet.CometSqlFileTestSuite crc32" -Dtest=none +``` + +**CRITICAL: Verify all test requirements (regardless of framework):** + +- [ ] Basic functionality tested (column data, not just literals) +- [ ] Null handling tested (`SELECT expression(NULL)`) +- [ ] Edge cases tested (empty input, overflow, boundary values) +- [ ] Both literal values and column references tested (they use different code paths) +- [ ] For timestamp/datetime expressions, timezone handling is tested (e.g., UTC, non-UTC session timezone, timestamps with and without timezone) +- [ ] One expression per SQL file for easier debugging +- [ ] If using Scala tests instead, literal tests MUST disable constant folding: + ```scala + withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> + "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") { + checkSparkAnswerAndOperator("SELECT func(literal)") + } + ``` + +### 5. Performance Review (Expression PRs) + +**For PRs that add new expressions, performance is not optional.** The whole point of Comet is to be faster than Spark. If a new expression is not faster, it may not be worth adding. + +1. **Check that the PR includes microbenchmark results.** The PR description should contain benchmark numbers comparing Comet vs Spark for the new expression. If benchmark results are missing, flag this as a required addition. + +2. **Look for a microbenchmark implementation.** Expression benchmarks live in `spark/src/test/scala/org/apache/spark/sql/benchmark/`. Check whether the PR adds a benchmark for the new expression. + +3. **Review the benchmark results if provided:** + - Is Comet actually faster than Spark for this expression? + - Are the benchmarks representative? They should test with realistic data sizes, not just trivial inputs. + - Are different data types benchmarked if the expression supports multiple types? + +4. **Review the Rust implementation for performance concerns:** + - Unnecessary allocations or copies + - Row-by-row processing where batch/array operations are possible + - Redundant type conversions + - Inefficient string handling (e.g., repeated UTF-8 validation) + - Missing use of Arrow compute kernels where they exist + +5. **If benchmark results show Comet is slower than Spark**, flag this clearly. The PR should explain why the regression is acceptable or include a plan to optimize. + +### 6. Check CI Test Failures + +**Always check the CI status and summarize any test failures in your review.** + +```bash +# View CI check status +gh pr checks $ARGUMENTS --repo apache/datafusion-comet + +# View failed check details +gh pr checks $ARGUMENTS --repo apache/datafusion-comet --failed +``` + +### 7. Documentation Check + +Check whether the PR requires updates to user-facing documentation in `docs/`: + +- **Compatibility guide** (`docs/source/user-guide/compatibility.md`): New expressions or operators should be listed. Incompatible behaviors should be documented. +- **Configuration guide** (`docs/source/user-guide/configs.md`): New config options should be documented. +- **Expressions list** (`docs/source/user-guide/expressions.md`): New expressions should be added. + +If the PR adds a new expression or operator but does not update the relevant docs, flag this as something that needs to be addressed. + +### 8. Common Comet Review Issues + +1. **Incomplete type support**: Spark expression supports types not handled in PR +2. **Missing edge cases**: Null, overflow, empty string, negative values +3. **Wrong return type**: Return type must match Spark exactly +4. **Tests in wrong framework**: Expression tests should use the SQL file-based framework (`CometSqlFileTestSuite`) rather than adding to Scala test suites like `CometExpressionSuite`. Suggest migration if the PR adds Scala tests for expressions that could use SQL files instead. +5. **Stale native code**: PR might need `./mvnw install -pl common -DskipTests` +6. **Missing `getSupportLevel`**: Edge cases should be marked as `Incompatible` + +--- + +## Output Format + +Present your review as guidance for the reviewer. Structure your output as: + +1. **PR Summary** - Brief description of what the PR does +2. **CI Status** - Summary of CI check results +3. **Findings** - Your analysis organized by area (Spark compatibility, implementation, tests, etc.) +4. **Suggested Review Comments** - Specific comments the reviewer could leave on the PR, with file and line references where applicable + +## Review Tone and Style + +Write reviews that sound human and conversational. Avoid: + +- Robotic or formulaic language +- Em dashes. Use separate sentences instead. +- Semicolons. Use separate sentences instead. + +Instead: + +- Write in flowing paragraphs using simple grammar +- Keep sentences short and separate rather than joining them with punctuation +- Be kind and constructive, even when raising concerns +- Use backticks around any code references (function names, file paths, class names, types, config keys, etc.) +- **Suggest** adding tests rather than stating tests are missing (e.g., "It might be worth adding a test for X" not "Tests are missing for X") +- **Ask questions** about edge cases rather than asserting they aren't handled (e.g., "Does this handle the case where X is null?" not "This doesn't handle null") +- Frame concerns as questions or suggestions when possible +- Acknowledge what the PR does well before raising concerns + +## Do Not Post Comments + +**IMPORTANT: Never post comments or reviews on the PR directly.** This skill is for providing guidance to a human reviewer. Present all findings and suggested comments to the user. The user will decide what to post. From 88e4bf4a8e016b5f6296b5e7b55e9cb965087b1d Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 15:39:14 -0400 Subject: [PATCH 19/30] fix --- native/hdfs/src/object_store/hdfs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native/hdfs/src/object_store/hdfs.rs b/native/hdfs/src/object_store/hdfs.rs index cb5a2fa5c2..e368d7cf94 100644 --- a/native/hdfs/src/object_store/hdfs.rs +++ b/native/hdfs/src/object_store/hdfs.rs @@ -321,7 +321,7 @@ impl ObjectStore for HadoopFileSystem { drop(parts); if is_directory { - common_prefixes.insert(prefix.child(common_prefix)); + common_prefixes.insert(prefix.clone().join(common_prefix)); } else { objects.push(convert_metadata(entry, &hdfs_root)); } From de3faa03003d88d67b257551017bcffb6f162b8f Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 16:26:41 -0400 Subject: [PATCH 20/30] fix native test --- native/spark-expr/src/math_funcs/round.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/native/spark-expr/src/math_funcs/round.rs b/native/spark-expr/src/math_funcs/round.rs index d6302d9b7b..069eb670c9 100644 --- a/native/spark-expr/src/math_funcs/round.rs +++ b/native/spark-expr/src/math_funcs/round.rs @@ -110,6 +110,8 @@ pub fn spark_round( let ColumnarValue::Scalar(ScalarValue::Int64(Some(point))) = point else { return internal_err!("Invalid point argument for Round(): {:#?}", point); }; + // DataFusion's RoundFunc expects Int32 for decimal_places + let point_as_i32 = ColumnarValue::Scalar(ScalarValue::Int32(Some(*point as i32))); match value { ColumnarValue::Array(array) => match array.data_type() { DataType::Int64 if *point < 0 => { @@ -133,7 +135,10 @@ pub fn spark_round( let round_udf = RoundFunc::new(); let return_field = Arc::new(Field::new("round", array.data_type().clone(), true)); let args_for_round = ScalarFunctionArgs { - args: vec![ColumnarValue::Array(Arc::clone(array)), args[1].clone()], + args: vec![ + ColumnarValue::Array(Arc::clone(array)), + point_as_i32.clone(), + ], number_rows: array.len(), return_field, arg_fields: vec![], @@ -166,7 +171,7 @@ pub fn spark_round( let data_type = a.data_type(); let return_field = Arc::new(Field::new("round", data_type, true)); let args_for_round = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(a.clone()), args[1].clone()], + args: vec![ColumnarValue::Scalar(a.clone()), point_as_i32.clone()], number_rows: 1, return_field, arg_fields: vec![], From 1105bb22f99f4dd6a366a2c3a40a1922b2d99c50 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 1 Apr 2026 15:57:22 -0400 Subject: [PATCH 21/30] workaround array_compact and array_repeat failures --- native/core/src/execution/jni_api.rs | 7 +++++-- native/core/src/execution/planner.rs | 4 +--- native/spark-expr/src/array_funcs/mod.rs | 2 ++ native/spark-expr/src/comet_scalar_funcs.rs | 5 +++-- .../src/main/scala/org/apache/comet/serde/arrays.scala | 10 ++++++---- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs index 9ebaa3d72f..27933d0ed6 100644 --- a/native/core/src/execution/jni_api.rs +++ b/native/core/src/execution/jni_api.rs @@ -40,7 +40,6 @@ use datafusion::{ prelude::{SessionConfig, SessionContext}, }; use datafusion_comet_proto::spark_operator::Operator; -use datafusion_spark::function::array::repeat::SparkArrayRepeat; use datafusion_spark::function::bitwise::bit_count::SparkBitCount; use datafusion_spark::function::bitwise::bit_get::SparkBitGet; use datafusion_spark::function::bitwise::bitwise_not::SparkBitwiseNot; @@ -393,7 +392,11 @@ fn prepare_datafusion_session_context( // register UDFs from datafusion-spark crate fn register_datafusion_spark_function(session_ctx: &SessionContext) { - session_ctx.register_udf(ScalarUDF::new_from_impl(SparkArrayRepeat::default())); + // Don't register SparkArrayRepeat — it returns NULL when the element is NULL + // (e.g. array_repeat(null, 3) returns NULL instead of [null, null, null]). + // Comet's Scala serde wraps the call in a CaseWhen for null count handling, + // so DataFusion's built-in ArrayRepeat is sufficient. + // TODO: file upstream issue against datafusion-spark session_ctx.register_udf(ScalarUDF::new_from_impl(SparkExpm1::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSha2::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(CharFunc::default())); diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 4487feae85..e886640620 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -4176,10 +4176,8 @@ mod tests { #[test] fn test_array_repeat() { + // Use built-in ArrayRepeat, not SparkArrayRepeat (see jni_api.rs comment) let session_ctx = SessionContext::new(); - session_ctx.register_udf(ScalarUDF::new_from_impl( - datafusion_spark::function::array::repeat::SparkArrayRepeat::default(), - )); let task_ctx = session_ctx.task_ctx(); let planner = PhysicalPlanner::new(Arc::from(session_ctx), 0); diff --git a/native/spark-expr/src/array_funcs/mod.rs b/native/spark-expr/src/array_funcs/mod.rs index 3ef50a252f..2bd1b9631b 100644 --- a/native/spark-expr/src/array_funcs/mod.rs +++ b/native/spark-expr/src/array_funcs/mod.rs @@ -15,11 +15,13 @@ // specific language governing permissions and limitations // under the License. +mod array_compact; mod array_insert; mod get_array_struct_fields; mod list_extract; mod size; +pub use array_compact::SparkArrayCompact; pub use array_insert::ArrayInsert; pub use get_array_struct_fields::GetArrayStructFields; pub use list_extract::ListExtract; diff --git a/native/spark-expr/src/comet_scalar_funcs.rs b/native/spark-expr/src/comet_scalar_funcs.rs index 1eaf0b2a97..9c91bb69c9 100644 --- a/native/spark-expr/src/comet_scalar_funcs.rs +++ b/native/spark-expr/src/comet_scalar_funcs.rs @@ -23,8 +23,8 @@ use crate::math_funcs::modulo_expr::spark_modulo; use crate::{ spark_ceil, spark_decimal_div, spark_decimal_integral_div, spark_floor, spark_isnan, spark_lpad, spark_make_decimal, spark_read_side_padding, spark_round, spark_rpad, spark_unhex, - spark_unscaled_value, EvalMode, SparkContains, SparkDateDiff, SparkDateTrunc, SparkMakeDate, - SparkSizeFunc, + spark_unscaled_value, EvalMode, SparkArrayCompact, SparkContains, SparkDateDiff, + SparkDateTrunc, SparkMakeDate, SparkSizeFunc, }; use arrow::datatypes::DataType; use datafusion::common::{DataFusionError, Result as DataFusionResult}; @@ -196,6 +196,7 @@ pub fn create_comet_physical_fun_with_eval_mode( fn all_scalar_functions() -> Vec> { vec![ + Arc::new(ScalarUDF::new_from_impl(SparkArrayCompact::default())), Arc::new(ScalarUDF::new_from_impl(SparkContains::default())), Arc::new(ScalarUDF::new_from_impl(SparkDateDiff::default())), Arc::new(ScalarUDF::new_from_impl(SparkDateTrunc::default())), diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index 47a6e91421..1e2eeaaa28 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -323,14 +323,16 @@ object CometArrayCompact extends CometExpressionSerde[Expression] { val elementType = child.dataType.asInstanceOf[ArrayType].elementType val arrayExprProto = exprToProto(child, inputs, binding) - val nullLiteralProto = exprToProto(Literal(null, elementType), Seq.empty) + // Use Comet's SparkArrayCompact UDF instead of DataFusion's array_remove_all. + // DF 53 changed array_remove_all to return NULL when the element arg is NULL, + // which breaks the array_compact use case. + // TODO: upstream to datafusion-spark crate val arrayCompactScalarExpr = scalarFunctionExprToProtoWithReturnType( - "array_remove_all", + "spark_array_compact", ArrayType(elementType = elementType), false, - arrayExprProto, - nullLiteralProto) + arrayExprProto) optExprWithInfo(arrayCompactScalarExpr, expr, expr.children: _*) } } From a2f52577434e3a26f7ad8e357a385f97b992c747 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 1 Apr 2026 16:03:17 -0400 Subject: [PATCH 22/30] workaround array_compact and array_repeat failures --- .../src/array_funcs/array_compact.rs | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 native/spark-expr/src/array_funcs/array_compact.rs diff --git a/native/spark-expr/src/array_funcs/array_compact.rs b/native/spark-expr/src/array_funcs/array_compact.rs new file mode 100644 index 0000000000..4653f966a5 --- /dev/null +++ b/native/spark-expr/src/array_funcs/array_compact.rs @@ -0,0 +1,164 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Spark-compatible array_compact: removes null elements from an array. +// +// DataFusion's array_remove_all(arr, null) returns NULL for the entire row +// when the element-to-remove is NULL (DF 53, PR #21013). Spark's array_compact +// needs to actually remove null elements, so we implement it directly. +// +// TODO: upstream this to datafusion-spark crate + +use arrow::array::{ + make_array, Array, ArrayRef, Capacities, GenericListArray, MutableArrayData, NullBufferBuilder, + OffsetSizeTrait, +}; +use arrow::buffer::OffsetBuffer; +use arrow::datatypes::{DataType, FieldRef}; +use datafusion::common::{exec_err, utils::take_function_args, Result}; +use datafusion::logical_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, +}; +use std::any::Any; +use std::sync::Arc; + +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkArrayCompact { + signature: Signature, +} + +impl Default for SparkArrayCompact { + fn default() -> Self { + Self::new() + } +} + +impl SparkArrayCompact { + pub fn new() -> Self { + Self { + signature: Signature::new(TypeSignature::Any(1), Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for SparkArrayCompact { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "spark_array_compact" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + datafusion::common::internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args( + &self, + args: datafusion::logical_expr::ReturnFieldArgs, + ) -> Result { + Ok(Arc::clone(&args.arg_fields[0])) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let [array] = take_function_args(self.name(), &args.args)?; + match array { + ColumnarValue::Array(array) => match array.data_type() { + DataType::List(_) => Ok(ColumnarValue::Array(compact_list::( + array.as_any().downcast_ref().unwrap(), + )?)), + DataType::LargeList(_) => Ok(ColumnarValue::Array(compact_list::( + array.as_any().downcast_ref().unwrap(), + )?)), + other => exec_err!("spark_array_compact does not support type '{other}'"), + }, + ColumnarValue::Scalar(scalar) => { + let array = scalar.to_array()?; + let result = match array.data_type() { + DataType::List(_) => { + compact_list::(array.as_any().downcast_ref().unwrap())? + } + DataType::LargeList(_) => { + compact_list::(array.as_any().downcast_ref().unwrap())? + } + other => { + return exec_err!("spark_array_compact does not support type '{other}'") + } + }; + Ok(ColumnarValue::Array(result)) + } + } + } +} + +/// Remove null elements from each row of a list array. +fn compact_list( + list_array: &GenericListArray, +) -> Result { + let list_field = match list_array.data_type() { + DataType::List(field) | DataType::LargeList(field) => field, + other => { + return exec_err!("Expected List or LargeList, got {other:?}"); + } + }; + + let values = list_array.values(); + let original_data = values.to_data(); + let mut offsets = Vec::::with_capacity(list_array.len() + 1); + offsets.push(OffsetSize::zero()); + let mut mutable = MutableArrayData::with_capacities( + vec![&original_data], + false, + Capacities::Array(original_data.len()), + ); + let mut valid = NullBufferBuilder::new(list_array.len()); + + for (row_index, offset_window) in list_array.offsets().windows(2).enumerate() { + if list_array.is_null(row_index) { + offsets.push(offsets[row_index]); + valid.append_null(); + continue; + } + + let start = offset_window[0].to_usize().unwrap(); + let end = offset_window[1].to_usize().unwrap(); + let mut copied = 0usize; + + for i in start..end { + if !values.is_null(i) { + mutable.extend(0, i, i + 1); + copied += 1; + } + } + + offsets.push(offsets[row_index] + OffsetSize::usize_as(copied)); + valid.append_non_null(); + } + + let new_values = make_array(mutable.freeze()); + Ok(Arc::new(GenericListArray::::try_new( + Arc::clone(list_field), + OffsetBuffer::new(offsets.into()), + new_values, + valid.finish(), + )?)) +} From 7684997b6116be8547695bf6a8bee391c8f7eee3 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 1 Apr 2026 16:15:42 -0400 Subject: [PATCH 23/30] bump opendal to commit on main with upgraded object_store. --- native/Cargo.lock | 322 +++++++++++++++++++++++++++++------------ native/core/Cargo.toml | 6 +- 2 files changed, 231 insertions(+), 97 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index c1acb9bf42..489dfe7444 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -1632,6 +1632,22 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctor" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e" +dependencies = [ + "ctor-proc-macro", + "dtor", +] + +[[package]] +name = "ctor-proc-macro" +version = "0.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" + [[package]] name = "ctr" version = "0.9.2" @@ -1763,7 +1779,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", "parking_lot", "parquet", "rand 0.9.2", @@ -1795,7 +1811,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", "parking_lot", "tokio", ] @@ -1820,7 +1836,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", ] [[package]] @@ -1858,10 +1874,10 @@ dependencies = [ "log4rs", "mimalloc", "num", - "object_store 0.13.2", + "object_store", "object_store_opendal", "once_cell", - "opendal", + "opendal 0.55.0 (git+https://github.com/apache/opendal?rev=173feb6)", "parking_lot", "parquet", "paste", @@ -1933,7 +1949,7 @@ dependencies = [ "datafusion-comet-fs-hdfs3", "fs-hdfs3", "futures", - "object_store 0.13.2", + "object_store", "tokio", ] @@ -2009,7 +2025,7 @@ dependencies = [ "itertools 0.14.0", "libc", "log", - "object_store 0.13.2", + "object_store", "parquet", "paste", "sqlparser", @@ -2055,7 +2071,7 @@ dependencies = [ "itertools 0.14.0", "liblzma", "log", - "object_store 0.13.2", + "object_store", "rand 0.9.2", "tokio", "tokio-util", @@ -2083,7 +2099,7 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store 0.13.2", + "object_store", "tokio", ] @@ -2105,7 +2121,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.13.2", + "object_store", "regex", "tokio", ] @@ -2128,7 +2144,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.13.2", + "object_store", "serde_json", "tokio", "tokio-stream", @@ -2158,7 +2174,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", "parking_lot", "parquet", "tokio", @@ -2186,7 +2202,7 @@ dependencies = [ "datafusion-physical-expr-common", "futures", "log", - "object_store 0.13.2", + "object_store", "parking_lot", "parquet", "rand 0.9.2", @@ -2703,6 +2719,21 @@ dependencies = [ "const-random", ] +[[package]] +name = "dtor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301" +dependencies = [ + "dtor-proc-macro", +] + +[[package]] +name = "dtor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" + [[package]] name = "dunce" version = "1.0.5" @@ -3443,7 +3474,7 @@ dependencies = [ "cfg-if", "futures", "iceberg", - "opendal", + "opendal 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)", "reqsign", "reqwest", "serde", @@ -3453,12 +3484,13 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" dependencies = [ "displaydoc", "potential_utf", + "utf8_iter", "yoke", "zerofrom", "zerovec", @@ -3466,9 +3498,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" dependencies = [ "displaydoc", "litemap", @@ -3479,9 +3511,9 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" dependencies = [ "icu_collections", "icu_normalizer_data", @@ -3493,15 +3525,15 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" [[package]] name = "icu_properties" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" dependencies = [ "icu_collections", "icu_locale_core", @@ -3513,15 +3545,15 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" [[package]] name = "icu_provider" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" dependencies = [ "displaydoc", "icu_locale_core", @@ -3699,10 +3731,12 @@ checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", + "js-sys", "log", "portable-atomic", "portable-atomic-util", "serde_core", + "wasm-bindgen", "windows-sys 0.61.2", ] @@ -3790,9 +3824,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.93" +version = "0.3.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "797146bb2677299a1eb6b7b50a890f4c361b29ef967addf5b2fa45dae1bb6d7d" +checksum = "2e04e2ef80ce82e13552136fabeef8a5ed1f985a96805761cbb9a2c34e7664d9" dependencies = [ "cfg-if", "futures-util", @@ -3910,9 +3944,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.183" +version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "libloading" @@ -3984,9 +4018,9 @@ checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" [[package]] name = "lock_api" @@ -4067,6 +4101,15 @@ dependencies = [ "digest", ] +[[package]] +name = "mea" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6747f54621d156e1b47eb6b25f39a941b9fc347f98f67d25d8881ff99e8ed832" +dependencies = [ + "slab", +] + [[package]] name = "memchr" version = "2.8.0" @@ -4293,30 +4336,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "object_store" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "http 1.4.0", - "humantime", - "itertools 0.14.0", - "parking_lot", - "percent-encoding", - "thiserror 2.0.18", - "tokio", - "tracing", - "url", - "walkdir", - "wasm-bindgen-futures", - "web-time", -] - [[package]] name = "object_store" version = "0.13.2" @@ -4360,15 +4379,15 @@ dependencies = [ [[package]] name = "object_store_opendal" version = "0.55.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "113ab0769e972eee585e57407b98de08bda5354fa28e8ba4d89038d6cb6a8991" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" dependencies = [ "async-trait", "bytes", "chrono", "futures", - "object_store 0.12.5", - "opendal", + "mea", + "object_store", + "opendal 0.55.0 (git+https://github.com/apache/opendal?rev=173feb6)", "pin-project", "tokio", ] @@ -4404,7 +4423,6 @@ dependencies = [ "crc32c", "futures", "getrandom 0.2.17", - "hdrs", "http 1.4.0", "http-body 1.0.1", "jiff", @@ -4421,6 +4439,100 @@ dependencies = [ "uuid", ] +[[package]] +name = "opendal" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "ctor", + "opendal-core", + "opendal-layer-concurrent-limit", + "opendal-layer-logging", + "opendal-layer-retry", + "opendal-layer-timeout", + "opendal-service-hdfs", +] + +[[package]] +name = "opendal-core" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "anyhow", + "base64", + "bytes", + "futures", + "http 1.4.0", + "http-body 1.0.1", + "jiff", + "log", + "md-5", + "mea", + "percent-encoding", + "quick-xml 0.38.4", + "reqsign-core", + "reqwest", + "serde", + "serde_json", + "tokio", + "url", + "uuid", + "web-time", +] + +[[package]] +name = "opendal-layer-concurrent-limit" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "futures", + "http 1.4.0", + "mea", + "opendal-core", +] + +[[package]] +name = "opendal-layer-logging" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "log", + "opendal-core", +] + +[[package]] +name = "opendal-layer-retry" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "backon", + "log", + "opendal-core", +] + +[[package]] +name = "opendal-layer-timeout" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "opendal-core", + "tokio", +] + +[[package]] +name = "opendal-service-hdfs" +version = "0.55.0" +source = "git+https://github.com/apache/opendal?rev=173feb6#173feb6d22a35d176e354bd5e20e2802dc28b93f" +dependencies = [ + "bytes", + "futures", + "hdrs", + "log", + "opendal-core", + "serde", + "tokio", +] + [[package]] name = "openssl-probe" version = "0.2.1" @@ -4515,7 +4627,7 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", - "object_store 0.13.2", + "object_store", "parquet-variant", "parquet-variant-compute", "parquet-variant-json", @@ -4810,9 +4922,9 @@ dependencies = [ [[package]] name = "potential_utf" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" dependencies = [ "zerovec", ] @@ -5261,6 +5373,28 @@ dependencies = [ "tokio", ] +[[package]] +name = "reqsign-core" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b10302cf0a7d7e7352ba211fc92c3c5bebf1286153e49cc5aa87348078a8e102" +dependencies = [ + "anyhow", + "base64", + "bytes", + "form_urlencoded", + "futures", + "hex", + "hmac", + "http 1.4.0", + "jiff", + "log", + "percent-encoding", + "sha1", + "sha2", + "windows-sys 0.61.2", +] + [[package]] name = "reqwest" version = "0.12.28" @@ -6139,9 +6273,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ "displaydoc", "zerovec", @@ -6545,9 +6679,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.116" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dc0882f7b5bb01ae8c5215a1230832694481c1a4be062fd410e12ea3da5b631" +checksum = "0551fc1bb415591e3372d0bc4780db7e587d84e2a7e79da121051c5c4b89d0b0" dependencies = [ "cfg-if", "once_cell", @@ -6558,9 +6692,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.66" +version = "0.4.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19280959e2844181895ef62f065c63e0ca07ece4771b53d89bfdb967d97cbf05" +checksum = "03623de6905b7206edd0a75f69f747f134b7f0a2323392d664448bf2d3c5d87e" dependencies = [ "js-sys", "wasm-bindgen", @@ -6568,9 +6702,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.116" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75973d3066e01d035dbedaad2864c398df42f8dd7b1ea057c35b8407c015b537" +checksum = "7fbdf9a35adf44786aecd5ff89b4563a90325f9da0923236f6104e603c7e86be" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6578,9 +6712,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.116" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91af5e4be765819e0bcfee7322c14374dc821e35e72fa663a830bbc7dc199eac" +checksum = "dca9693ef2bab6d4e6707234500350d8dad079eb508dca05530c85dc3a529ff2" dependencies = [ "bumpalo", "proc-macro2", @@ -6591,9 +6725,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.116" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9bf0406a78f02f336bf1e451799cca198e8acde4ffa278f0fb20487b150a633" +checksum = "39129a682a6d2d841b6c429d0c51e5cb0ed1a03829d8b3d1e69a011e62cb3d3b" dependencies = [ "unicode-ident", ] @@ -6647,9 +6781,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.93" +version = "0.3.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "749466a37ee189057f54748b200186b59a03417a117267baf3fd89cecc9fb837" +checksum = "cd70027e39b12f0849461e08ffc50b9cd7688d942c1c8e3c7b22273236b4dd0a" dependencies = [ "js-sys", "wasm-bindgen", @@ -7109,9 +7243,9 @@ checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" [[package]] name = "yoke" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -7120,9 +7254,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", "quote", @@ -7152,18 +7286,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", @@ -7179,9 +7313,9 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", "yoke", @@ -7190,9 +7324,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ "yoke", "zerofrom", @@ -7201,9 +7335,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index f91b0cfa11..1f21da6b3a 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -70,9 +70,9 @@ aws-credential-types = { workspace = true } parking_lot = "0.12.5" datafusion-comet-objectstore-hdfs = { path = "../hdfs", optional = true, default-features = false, features = ["hdfs"] } reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots", "http2"] } -object_store_opendal = {version = "0.55.0", optional = true} +object_store_opendal = { git = "https://github.com/apache/opendal", rev = "173feb6", package = "object_store_opendal", optional = true} hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]} -opendal = { version ="0.55.0", optional = true, features = ["services-hdfs"] } +opendal = { git = "https://github.com/apache/opendal", rev = "173feb6", optional = true, features = ["services-hdfs"] } iceberg = { workspace = true } iceberg-storage-opendal = { workspace = true } serde_json = "1.0" @@ -95,7 +95,7 @@ datafusion-functions-nested = { version = "53.0.0" } [features] backtrace = ["datafusion/backtrace"] -default = [] +default = ["hdfs-opendal"] hdfs = ["datafusion-comet-objectstore-hdfs"] hdfs-opendal = ["opendal", "object_store_opendal", "hdfs-sys"] jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl"] From 1413355a6f8209fe09a86d1e7f07c84440c815e1 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 3 Apr 2026 11:39:33 -0400 Subject: [PATCH 24/30] cargo update --- native/Cargo.lock | 124 ++++++++++++++++++++++++++++------------------ 1 file changed, 77 insertions(+), 47 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 489dfe7444..404656405f 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -328,7 +328,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.13.0", + "indexmap 2.13.1", "itoa", "lexical-core", "memchr", @@ -1228,9 +1228,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.58" +version = "1.2.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" +checksum = "b7a4d3ec6524d28a329fc53654bbadc9bdd7b0431f5d65f1a56ffb28a1ee5283" dependencies = [ "find-msvc-tools", "jobserver", @@ -1345,7 +1345,7 @@ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", "libc", - "libloading 0.8.9", + "libloading", ] [[package]] @@ -1868,7 +1868,7 @@ dependencies = [ "iceberg", "iceberg-storage-opendal", "itertools 0.14.0", - "jni", + "jni 0.22.4", "lazy_static", "log", "log4rs", @@ -1929,7 +1929,7 @@ dependencies = [ "assertables", "datafusion", "datafusion-comet-common", - "jni", + "jni 0.22.4", "lazy_static", "once_cell", "parquet", @@ -1977,7 +1977,7 @@ dependencies = [ "datafusion-comet-spark-expr", "futures", "itertools 0.14.0", - "jni", + "jni 0.21.1", "log", "lz4_flex", "simd-adler32", @@ -2021,7 +2021,7 @@ dependencies = [ "half", "hashbrown 0.16.1", "hex", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "libc", "log", @@ -2225,7 +2225,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "paste", "serde_json", @@ -2240,7 +2240,7 @@ checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "paste", ] @@ -2404,7 +2404,7 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "log", "regex", @@ -2426,7 +2426,7 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.16.1", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "parking_lot", "paste", @@ -2461,7 +2461,7 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "hashbrown 0.16.1", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "parking_lot", ] @@ -2507,7 +2507,7 @@ dependencies = [ "futures", "half", "hashbrown 0.16.1", - "indexmap 2.13.0", + "indexmap 2.13.1", "itertools 0.14.0", "log", "num-traits", @@ -2586,7 +2586,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-functions-nested", - "indexmap 2.13.0", + "indexmap 2.13.1", "log", "regex", "sqlparser", @@ -3144,7 +3144,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.4.0", - "indexmap 2.13.0", + "indexmap 2.13.1", "slab", "tokio", "tokio-util", @@ -3610,9 +3610,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.13.0" +version = "2.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "45a8a2b9cb3e0b0c1803dbb0758ffac5de2f425b23c28f518faabd9d805342ff" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -3627,7 +3627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" dependencies = [ "ahash", - "indexmap 2.13.0", + "indexmap 2.13.1", "is-terminal", "itoa", "log", @@ -3775,15 +3775,45 @@ dependencies = [ "cesu8", "cfg-if", "combine", - "java-locator", "jni-sys 0.3.1", - "libloading 0.7.4", "log", "thiserror 1.0.69", "walkdir", "windows-sys 0.45.0", ] +[[package]] +name = "jni" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498" +dependencies = [ + "cfg-if", + "combine", + "java-locator", + "jni-macros", + "jni-sys 0.4.1", + "libloading", + "log", + "simd_cesu8", + "thiserror 2.0.18", + "walkdir", + "windows-link", +] + +[[package]] +name = "jni-macros" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "simd_cesu8", + "syn 2.0.117", +] + [[package]] name = "jni-sys" version = "0.3.1" @@ -3948,16 +3978,6 @@ version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" -[[package]] -name = "libloading" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" -dependencies = [ - "cfg-if", - "winapi", -] - [[package]] name = "libloading" version = "0.8.9" @@ -4651,7 +4671,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.13.0", + "indexmap 2.13.1", "simdutf8", "uuid", ] @@ -4666,7 +4686,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.13.0", + "indexmap 2.13.1", "parquet-variant", "parquet-variant-json", "serde_json", @@ -4742,7 +4762,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.13.0", + "indexmap 2.13.1", "serde", ] @@ -5824,7 +5844,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.13.0", + "indexmap 2.13.1", "schemars 0.9.0", "schemars 1.2.1", "serde_core", @@ -5851,7 +5871,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.13.0", + "indexmap 2.13.1", "itoa", "ryu", "serde", @@ -5912,6 +5932,16 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" +[[package]] +name = "simd_cesu8" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33" +dependencies = [ + "rustc_version", + "simdutf8", +] + [[package]] name = "simdutf8" version = "0.1.5" @@ -6308,9 +6338,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.50.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +checksum = "2bd1c4c0fc4a7ab90fc15ef6daaa3ec3b893f004f915f2392557ed23237820cd" dependencies = [ "bytes", "libc", @@ -6325,9 +6355,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.1" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", @@ -6749,7 +6779,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "indexmap 2.13.0", + "indexmap 2.13.1", "wasm-encoder", "wasmparser", ] @@ -6775,7 +6805,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags 2.11.0", "hashbrown 0.15.5", - "indexmap 2.13.0", + "indexmap 2.13.1", "semver", ] @@ -7169,7 +7199,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck", - "indexmap 2.13.0", + "indexmap 2.13.1", "prettyplease", "syn 2.0.117", "wasm-metadata", @@ -7200,7 +7230,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags 2.11.0", - "indexmap 2.13.0", + "indexmap 2.13.1", "log", "serde", "serde_derive", @@ -7219,7 +7249,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap 2.13.0", + "indexmap 2.13.1", "log", "semver", "serde", @@ -7231,9 +7261,9 @@ dependencies = [ [[package]] name = "writeable" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" [[package]] name = "xmlparser" From 2be791c8de7a826552cc4aabd6daf7e00e0272b4 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 3 Apr 2026 12:39:08 -0400 Subject: [PATCH 25/30] fix mapping issue with native_datafusion --- native/core/src/parquet/schema_adapter.rs | 47 +++++++++++++--- .../comet/exec/CometNativeReaderSuite.scala | 54 +++++++++++++++++++ 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index 53107ac51b..af79d9082d 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -296,12 +296,26 @@ impl SparkPhysicalExprAdapter { ) -> DataFusionResult> { expr.transform(|e| { if let Some(column) = e.as_any().downcast_ref::() { - let col_idx = column.index(); let col_name = column.name(); - let logical_field = self.logical_file_schema.fields().get(col_idx); - // Look up physical field by name instead of index for correctness - // when logical and physical schemas have different column orderings + // Resolve fields by name because this is the fallback path + // that runs on the original expression when the default + // adapter fails. The original expression was built against + // the required (pruned) schema, so column indices refer to + // that schema — not the logical or physical file schemas. + // DataFusion's DefaultPhysicalExprAdapter::resolve_physical_column + // also resolves by name for the same reason. + let logical_field = if self.parquet_options.case_sensitive { + self.logical_file_schema + .fields() + .iter() + .find(|f| f.name() == col_name) + } else { + self.logical_file_schema + .fields() + .iter() + .find(|f| f.name().eq_ignore_ascii_case(col_name)) + }; let physical_field = if self.parquet_options.case_sensitive { self.physical_file_schema .fields() @@ -314,12 +328,31 @@ impl SparkPhysicalExprAdapter { .find(|f| f.name().eq_ignore_ascii_case(col_name)) }; - if let (Some(logical_field), Some(physical_field)) = (logical_field, physical_field) + // Remap the column index to the physical file schema so + // downstream evaluation reads the correct column from the + // parquet batch. + let physical_index = if self.parquet_options.case_sensitive { + self.physical_file_schema.index_of(col_name).ok() + } else { + self.physical_file_schema + .fields() + .iter() + .position(|f| f.name().eq_ignore_ascii_case(col_name)) + }; + + if let (Some(logical_field), Some(physical_field), Some(phys_idx)) = + (logical_field, physical_field, physical_index) { + let remapped: Arc = if column.index() != phys_idx { + Arc::new(Column::new(col_name, phys_idx)) + } else { + Arc::clone(&e) + }; + if logical_field.data_type() != physical_field.data_type() { let cast_expr: Arc = Arc::new( CometCastColumnExpr::new( - Arc::clone(&e), + remapped, Arc::clone(physical_field), Arc::clone(logical_field), None, @@ -327,6 +360,8 @@ impl SparkPhysicalExprAdapter { .with_parquet_options(self.parquet_options.clone()), ); return Ok(Transformed::yes(cast_expr)); + } else if column.index() != phys_idx { + return Ok(Transformed::yes(remapped)); } } } diff --git a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala index cf79f6af0f..0d6b3c9809 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala @@ -19,6 +19,7 @@ package org.apache.comet.exec +import org.apache.hadoop.fs.Path import org.scalactic.source.Position import org.scalatest.Tag @@ -602,4 +603,57 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper |""".stripMargin, "select array(array(1, 2, null), array(), array(10), null, array(null)) from tbl") } + + test("native reader - nested schema pruning with array of struct and filter") { + // Regression test found during DataFusion 53 upgrade (PR #3629). + // Spark's SchemaPruningSuite tests (e.g. "select a single complex field array + // and in clause", "select explode of nested field of array of struct", + // "SPARK-34638: nested column prune on generator output") were failing with: + // native panic: called `Result::unwrap()` on an `Err` value: + // Internal("Unexpected data type in GetArrayStructFields: Int32") + // The root cause was wrap_all_type_mismatches in Comet's schema adapter + // looking up the logical field by column index instead of by name. When + // filter expressions are created against the pruned required_schema (where + // "friends" is at index 0), the fallback would index into the full + // logical_file_schema and get "id: Int32" instead of "friends: List<...>". + withTempDir { dir => + val path = new Path(dir.toURI.toString, "test").toUri.toString + + // Create a table with multiple columns so that nested schema pruning + // can prune away unneeded columns. The friends column is an array of + // structs with first/middle/last, but the query only needs first and middle. + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + spark.sql( + """ + |select + | 0 as id, + | named_struct('first', 'Jane', 'middle', 'X.', 'last', 'Doe') as name, + | '123 Main Street' as address, + | 1 as pets, + | array( + | named_struct('first', 'Susan', 'middle', 'Z.', 'last', 'Smith') + | ) as friends + |union all + |select + | 1 as id, + | named_struct('first', 'John', 'middle', 'Y.', 'last', 'Doe') as name, + | '321 Wall Street' as address, + | 3 as pets, + | array( + | named_struct('first', 'Alice', 'middle', 'A.', 'last', 'Jones') + | ) as friends + |""".stripMargin).repartition(1).write.parquet(path) + } + + val schema = spark.read.parquet(path).schema + + readParquetFile(path, Some(schema)) { df => + df.createOrReplaceTempView("tbl") + } + + val query = "select friends.middle from tbl where friends.first[0] = 'Susan'" + val df = sql(query) + checkSparkAnswer(df) + } + } } From 51ebbc3e42afe525318c24489baafa578953ffe3 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 3 Apr 2026 12:45:01 -0400 Subject: [PATCH 26/30] make test consistent with others in the file --- .../comet/exec/CometNativeReaderSuite.scala | 87 +++++++------------ 1 file changed, 32 insertions(+), 55 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala index 0d6b3c9809..30001927b4 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala @@ -19,7 +19,6 @@ package org.apache.comet.exec -import org.apache.hadoop.fs.Path import org.scalactic.source.Position import org.scalatest.Tag @@ -294,8 +293,8 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper test("native reader - read a STRUCT subfield - field from second") { testSingleLineQuery( """ - |select 1 a, named_struct('a', 1, 'b', 'n') c0 - |""".stripMargin, + |select 1 a, named_struct('a', 1, 'b', 'n') c0 + |""".stripMargin, "select c0.b from tbl") } @@ -599,61 +598,39 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper test("native reader - support ARRAY literal nested ARRAY fields") { testSingleLineQuery( """ - |select 1 a - |""".stripMargin, + |select 1 a + |""".stripMargin, "select array(array(1, 2, null), array(), array(10), null, array(null)) from tbl") } + // Regression test found during DataFusion 53 upgrade (PR #3629). + // Spark's SchemaPruningSuite tests (e.g. "select a single complex field array + // and in clause", "select explode of nested field of array of struct") were + // failing because wrap_all_type_mismatches in Comet's schema adapter looked up + // the logical field by column index instead of by name. Filter expressions + // built against the pruned required_schema had "friends" at index 0, but the + // full logical_file_schema had "id: Int32" at index 0. test("native reader - nested schema pruning with array of struct and filter") { - // Regression test found during DataFusion 53 upgrade (PR #3629). - // Spark's SchemaPruningSuite tests (e.g. "select a single complex field array - // and in clause", "select explode of nested field of array of struct", - // "SPARK-34638: nested column prune on generator output") were failing with: - // native panic: called `Result::unwrap()` on an `Err` value: - // Internal("Unexpected data type in GetArrayStructFields: Int32") - // The root cause was wrap_all_type_mismatches in Comet's schema adapter - // looking up the logical field by column index instead of by name. When - // filter expressions are created against the pruned required_schema (where - // "friends" is at index 0), the fallback would index into the full - // logical_file_schema and get "id: Int32" instead of "friends: List<...>". - withTempDir { dir => - val path = new Path(dir.toURI.toString, "test").toUri.toString - - // Create a table with multiple columns so that nested schema pruning - // can prune away unneeded columns. The friends column is an array of - // structs with first/middle/last, but the query only needs first and middle. - withSQLConf(CometConf.COMET_ENABLED.key -> "false") { - spark.sql( - """ - |select - | 0 as id, - | named_struct('first', 'Jane', 'middle', 'X.', 'last', 'Doe') as name, - | '123 Main Street' as address, - | 1 as pets, - | array( - | named_struct('first', 'Susan', 'middle', 'Z.', 'last', 'Smith') - | ) as friends - |union all - |select - | 1 as id, - | named_struct('first', 'John', 'middle', 'Y.', 'last', 'Doe') as name, - | '321 Wall Street' as address, - | 3 as pets, - | array( - | named_struct('first', 'Alice', 'middle', 'A.', 'last', 'Jones') - | ) as friends - |""".stripMargin).repartition(1).write.parquet(path) - } - - val schema = spark.read.parquet(path).schema - - readParquetFile(path, Some(schema)) { df => - df.createOrReplaceTempView("tbl") - } - - val query = "select friends.middle from tbl where friends.first[0] = 'Susan'" - val df = sql(query) - checkSparkAnswer(df) - } + testSingleLineQuery( + """ + |select + | 0 as id, + | named_struct('first', 'Jane', 'middle', 'X.', 'last', 'Doe') as name, + | '123 Main Street' as address, + | 1 as pets, + | array( + | named_struct('first', 'Susan', 'middle', 'Z.', 'last', 'Smith') + | ) as friends + |union all + |select + | 1 as id, + | named_struct('first', 'John', 'middle', 'Y.', 'last', 'Doe') as name, + | '321 Wall Street' as address, + | 3 as pets, + | array( + | named_struct('first', 'Alice', 'middle', 'A.', 'last', 'Jones') + | ) as friends + |""".stripMargin, + "select friends.middle from tbl where friends.first[0] = 'Susan'") } } From 18efeb43233943aa4153390598a91fd2addf9817 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 3 Apr 2026 15:42:21 -0400 Subject: [PATCH 27/30] add fallback for SPARK-39393 test. --- .../spark/sql/comet/CometScanExec.scala | 15 +++- .../comet/exec/CometNativeReaderSuite.scala | 78 ++++++++++++++++++- 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala index 9afffe20bc..ad057e6ab8 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala @@ -159,12 +159,25 @@ case class CometScanExec( */ lazy val supportedDataFilters: Seq[Expression] = { if (scanImpl == CometConf.SCAN_NATIVE_DATAFUSION) { - dataFilters.filterNot(isDynamicPruningFilter) + dataFilters + .filterNot(isDynamicPruningFilter) + .filterNot(isNullCheckOnArrayColumn) } else { dataFilters } } + /** + * DataFusion's list predicate pushdown pushes IsNotNull/IsNull on List columns into the Parquet + * reader as a RowFilter. This crashes on protobuf-style bare repeated primitive fields + * (SPARK-39393). Filter these out so the predicate is evaluated after reading instead. + */ + private def isNullCheckOnArrayColumn(expr: Expression): Boolean = expr match { + case IsNotNull(child) => child.dataType.isInstanceOf[ArrayType] + case IsNull(child) => child.dataType.isInstanceOf[ArrayType] + case _ => false + } + @transient private lazy val pushedDownFilters = { getPushedDownFilters(relation, supportedDataFilters) diff --git a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala index 30001927b4..7bec1ea86f 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala @@ -19,10 +19,19 @@ package org.apache.comet.exec +import scala.jdk.CollectionConverters._ + import org.scalactic.source.Position import org.scalatest.Tag -import org.apache.spark.sql.CometTestBase +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.apache.parquet.hadoop.ParquetWriter +import org.apache.parquet.hadoop.api.WriteSupport +import org.apache.parquet.hadoop.api.WriteSupport.WriteContext +import org.apache.parquet.io.api.RecordConsumer +import org.apache.parquet.schema.MessageTypeParser +import org.apache.spark.sql.{CometTestBase, Row} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.functions.{array, col} import org.apache.spark.sql.internal.SQLConf @@ -633,4 +642,71 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper |""".stripMargin, "select friends.middle from tbl where friends.first[0] = 'Susan'") } + + // SPARK-39393: bare "repeated int32" (protobuf-style, no wrapping list group) + // should be readable without crashing on missing def levels. + test("native reader - read bare repeated primitive field") { + withTempDir { dir => + val path = new Path(dir.toURI.toString, "protobuf-parquet").toString + val schema = + """message protobuf_style { + | repeated int32 f; + |} + """.stripMargin + + writeDirect( + path, + schema, + { rc => + rc.startMessage() + rc.startField("f", 0) + rc.addInteger(1) + rc.addInteger(2) + rc.endField("f", 0) + rc.endMessage() + }) + + // Read without filter + checkAnswer(spark.read.parquet(dir.getCanonicalPath), Seq(Row(Seq(1, 2)))) + + // Read with isnotnull filter — the filter should not be pushed down into + // the Parquet reader for repeated primitive fields (SPARK-39393), but the + // query should still return correct results by evaluating the filter after + // reading. + checkAnswer( + spark.read.parquet(dir.getCanonicalPath).filter("isnotnull(f)"), + Seq(Row(Seq(1, 2)))) + } + } + + /** Write a Parquet file using a raw RecordConsumer for full schema control. */ + private def writeDirect( + path: String, + schema: String, + recordWriters: (RecordConsumer => Unit)*): Unit = { + val messageType = MessageTypeParser.parseMessageType(schema) + val writeSupport = new DirectWriteSupport(messageType) + class Builder extends ParquetWriter.Builder[RecordConsumer => Unit, Builder](new Path(path)) { + override def getWriteSupport(conf: Configuration): WriteSupport[RecordConsumer => Unit] = + writeSupport + override def self(): Builder = this + } + val writer = new Builder().build() + try recordWriters.foreach(writer.write) + finally writer.close() + } +} + +private class DirectWriteSupport(schema: org.apache.parquet.schema.MessageType) + extends WriteSupport[RecordConsumer => Unit] { + private var recordConsumer: RecordConsumer = _ + + override def init(configuration: Configuration): WriteContext = + new WriteContext(schema, java.util.Collections.emptyMap()) + + override def write(recordWriter: RecordConsumer => Unit): Unit = + recordWriter(recordConsumer) + + override def prepareForWrite(rc: RecordConsumer): Unit = + this.recordConsumer = rc } From 38dc46a00c092d6a3048b21841ae881cba4fd9fd Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 3 Apr 2026 15:52:28 -0400 Subject: [PATCH 28/30] update docs --- .../spark/sql/comet/CometScanExec.scala | 26 ++++++++++++++++--- .../comet/exec/CometNativeReaderSuite.scala | 8 ++++-- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala index ad057e6ab8..2a7d059fca 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala @@ -155,7 +155,8 @@ case class CometScanExec( /** * Returns the data filters that are supported for this scan implementation. For - * native_datafusion scans, this excludes dynamic pruning filters (subqueries) + * native_datafusion scans, this excludes dynamic pruning filters (subqueries) and null checks + * on array columns (see [[isNullCheckOnArrayColumn]]). */ lazy val supportedDataFilters: Seq[Expression] = { if (scanImpl == CometConf.SCAN_NATIVE_DATAFUSION) { @@ -168,9 +169,26 @@ case class CometScanExec( } /** - * DataFusion's list predicate pushdown pushes IsNotNull/IsNull on List columns into the Parquet - * reader as a RowFilter. This crashes on protobuf-style bare repeated primitive fields - * (SPARK-39393). Filter these out so the predicate is evaluated after reading instead. + * Returns true for IsNotNull/IsNull predicates on ArrayType columns. + * + * These must be excluded from native scan data filters because: + * + * 1. Parquet does not support predicate pushdown on repeated columns. The Parquet library's + * SchemaCompatibilityValidator rejects filter predicates on repeated fields entirely + * (SPARK-39393, PARQUET-34). Spark's own ParquetFilters excludes REPEATED columns from + * pushdown for the same reason. + * + * 2. When Comet attaches these filters via ParquetSource.with_predicate(), DataFusion's list + * predicate pushdown (PR #19545) considers IsNotNull on List columns a supported predicate and + * pushes it into the Parquet reader as a RowFilter. This triggers an arrow-rs bug where + * ListArrayReader crashes on bare repeated primitives ("item_reader def levels are None"). + * + * 3. Even without the arrow-rs bug, the filter is redundant: a bare repeated field is never + * null (an empty repeated field means zero elements, not null), and DataFusion's optimizer + * would eliminate the filter if it went through the normal planning path. + * + * Filtering these out is safe — the predicate is still evaluated after reading, so correctness + * is preserved. */ private def isNullCheckOnArrayColumn(expr: Expression): Boolean = expr match { case IsNotNull(child) => child.dataType.isInstanceOf[ArrayType] diff --git a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala index 7bec1ea86f..35a7313f93 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala @@ -19,8 +19,6 @@ package org.apache.comet.exec -import scala.jdk.CollectionConverters._ - import org.scalactic.source.Position import org.scalatest.Tag @@ -645,6 +643,12 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper // SPARK-39393: bare "repeated int32" (protobuf-style, no wrapping list group) // should be readable without crashing on missing def levels. + // SPARK-39393: Parquet does not support predicate pushdown on repeated columns. + // A bare "repeated int32 f" (protobuf-style, no wrapping LIST group) must not + // have IsNotNull pushed into the Parquet reader. Comet filters these out in + // CometScanExec.supportedDataFilters so the predicate is evaluated after + // reading. Without that, DataFusion's list predicate pushdown would push + // IsNotNull as a RowFilter, triggering an arrow-rs ListArrayReader crash. test("native reader - read bare repeated primitive field") { withTempDir { dir => val path = new Path(dir.toURI.toString, "protobuf-parquet").toString From c73db1f3bde91b43f73aff983330fedda50d74c2 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 3 Apr 2026 17:01:39 -0400 Subject: [PATCH 29/30] fix spotless. --- .../main/scala/org/apache/spark/sql/comet/CometScanExec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala index 2a7d059fca..652fdfc96d 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala @@ -187,7 +187,7 @@ case class CometScanExec( * null (an empty repeated field means zero elements, not null), and DataFusion's optimizer * would eliminate the filter if it went through the normal planning path. * - * Filtering these out is safe — the predicate is still evaluated after reading, so correctness + * Filtering these out is safe -- the predicate is still evaluated after reading, so correctness * is preserved. */ private def isNullCheckOnArrayColumn(expr: Expression): Boolean = expr match { From e518d166ce6070573d348990e7e690eda692aba7 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 7 Apr 2026 15:45:26 -0400 Subject: [PATCH 30/30] rename from PR feedback --- native/spark-expr/src/math_funcs/round.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/native/spark-expr/src/math_funcs/round.rs b/native/spark-expr/src/math_funcs/round.rs index 069eb670c9..9605f93f17 100644 --- a/native/spark-expr/src/math_funcs/round.rs +++ b/native/spark-expr/src/math_funcs/round.rs @@ -111,7 +111,7 @@ pub fn spark_round( return internal_err!("Invalid point argument for Round(): {:#?}", point); }; // DataFusion's RoundFunc expects Int32 for decimal_places - let point_as_i32 = ColumnarValue::Scalar(ScalarValue::Int32(Some(*point as i32))); + let point_i32 = ColumnarValue::Scalar(ScalarValue::Int32(Some(*point as i32))); match value { ColumnarValue::Array(array) => match array.data_type() { DataType::Int64 if *point < 0 => { @@ -135,10 +135,7 @@ pub fn spark_round( let round_udf = RoundFunc::new(); let return_field = Arc::new(Field::new("round", array.data_type().clone(), true)); let args_for_round = ScalarFunctionArgs { - args: vec![ - ColumnarValue::Array(Arc::clone(array)), - point_as_i32.clone(), - ], + args: vec![ColumnarValue::Array(Arc::clone(array)), point_i32.clone()], number_rows: array.len(), return_field, arg_fields: vec![], @@ -171,7 +168,7 @@ pub fn spark_round( let data_type = a.data_type(); let return_field = Arc::new(Field::new("round", data_type, true)); let args_for_round = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(a.clone()), point_as_i32.clone()], + args: vec![ColumnarValue::Scalar(a.clone()), point_i32.clone()], number_rows: 1, return_field, arg_fields: vec![],