From 21468b8862b854a7c0e0214eb9d2e7e6ed1720c7 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 8 May 2024 10:51:11 -0700 Subject: [PATCH 01/11] build: Switch back to released version of DataFusion and arrow-rs --- core/Cargo.lock | 128 ++++++++++++++--------- core/Cargo.toml | 22 ++-- dev/ensure-jars-have-correct-contents.sh | 13 +-- pom.xml | 2 +- 4 files changed, 96 insertions(+), 69 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 52f105591d..ff863f234c 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -115,7 +115,8 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" dependencies = [ "arrow-arith", "arrow-array", @@ -135,21 +136,23 @@ dependencies = [ [[package]] name = "arrow-arith" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "half 2.1.0", + "half 2.4.1", "num", ] [[package]] name = "arrow-array" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" dependencies = [ "ahash", "arrow-buffer", @@ -157,7 +160,7 @@ dependencies = [ "arrow-schema", "chrono", "chrono-tz", - "half 2.1.0", + "half 2.4.1", "hashbrown", "num", ] @@ -165,17 +168,19 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" dependencies = [ "bytes", - "half 2.1.0", + "half 2.4.1", "num", ] [[package]] name = "arrow-cast" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" dependencies = [ "arrow-array", "arrow-buffer", @@ -186,7 +191,7 @@ dependencies = [ "base64", "chrono", "comfy-table", - "half 2.1.0", + "half 2.4.1", "lexical-core", "num", "ryu", @@ -195,7 +200,8 @@ dependencies = [ [[package]] name = "arrow-csv" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" dependencies = [ "arrow-array", "arrow-buffer", @@ -213,18 +219,20 @@ dependencies = [ [[package]] name = "arrow-data" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" dependencies = [ "arrow-buffer", "arrow-schema", - "half 2.1.0", + "half 2.4.1", "num", ] [[package]] name = "arrow-ipc" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" dependencies = [ "arrow-array", "arrow-buffer", @@ -238,7 +246,8 @@ dependencies = [ [[package]] name = "arrow-json" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" dependencies = [ "arrow-array", "arrow-buffer", @@ -246,7 +255,7 @@ dependencies = [ "arrow-data", "arrow-schema", "chrono", - "half 2.1.0", + "half 2.4.1", "indexmap", "lexical-core", "num", @@ -257,35 +266,38 @@ dependencies = [ [[package]] name = "arrow-ord" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "half 2.1.0", + "half 2.4.1", "num", ] [[package]] name = "arrow-row" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" dependencies = [ "ahash", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "half 2.1.0", + "half 2.4.1", "hashbrown", ] [[package]] name = "arrow-schema" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" dependencies = [ "bitflags 2.5.0", ] @@ -293,7 +305,8 @@ dependencies = [ [[package]] name = "arrow-select" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" dependencies = [ "ahash", "arrow-array", @@ -306,7 +319,8 @@ dependencies = [ [[package]] name = "arrow-string" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" dependencies = [ "arrow-array", "arrow-buffer", @@ -609,7 +623,7 @@ dependencies = [ "datafusion-physical-expr", "flate2", "futures", - "half 2.1.0", + "half 2.4.1", "hashbrown", "itertools 0.11.0", "jni", @@ -824,8 +838,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85069782056753459dc47e386219aa1fdac5b731f26c28abb8c0ffd4b7c5ab11" dependencies = [ "ahash", "arrow", @@ -847,7 +862,7 @@ dependencies = [ "datafusion-sql", "futures", "glob", - "half 2.1.0", + "half 2.4.1", "hashbrown", "indexmap", "itertools 0.12.1", @@ -866,8 +881,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "309d9040751f6dc9e33c85dce6abb55a46ef7ea3644577dd014611c379447ef3" dependencies = [ "ahash", "arrow", @@ -875,7 +891,7 @@ dependencies = [ "arrow-buffer", "arrow-schema", "chrono", - "half 2.1.0", + "half 2.4.1", "instant", "libc", "num_cpus", @@ -885,16 +901,18 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e4a44d8ef1b1e85d32234e6012364c411c3787859bb3bba893b0332cb03dfd" dependencies = [ "tokio", ] [[package]] name = "datafusion-execution" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06a3a29ae36bcde07d179cc33b45656a8e7e4d023623e320e48dcf1200eeee95" dependencies = [ "arrow", "chrono", @@ -913,8 +931,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3542aa322029c2121a671ce08000d4b274171070df13f697b14169ccf4f628" dependencies = [ "ahash", "arrow", @@ -929,8 +948,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd221792c666eac174ecc09e606312844772acc12cbec61a420c2fca1ee70959" dependencies = [ "arrow", "base64", @@ -953,8 +973,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76bd7f5087817deb961764e8c973d243b54f8572db414a8f0a8f33a48f991e0a" dependencies = [ "arrow", "async-trait", @@ -970,8 +991,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cabc0d9aaa0f5eb1b472112f16223c9ffd2fb04e58cbf65c0a331ee6e993f96" dependencies = [ "ahash", "arrow", @@ -987,7 +1009,7 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", - "half 2.1.0", + "half 2.4.1", "hashbrown", "hex", "indexmap", @@ -1004,8 +1026,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17c0523e9c8880f2492a88bbd857dde02bed1ed23f3e9211a89d3d7ec3b44af9" dependencies = [ "ahash", "arrow", @@ -1020,7 +1043,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "futures", - "half 2.1.0", + "half 2.4.1", "hashbrown", "indexmap", "itertools 0.12.1", @@ -1034,8 +1057,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49eb54b42227136f6287573f2434b1de249fe1b8e6cd6cc73a634e4a3ec29356" dependencies = [ "arrow", "arrow-array", @@ -1301,10 +1325,11 @@ checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" [[package]] name = "half" -version = "2.1.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ + "cfg-if", "crunchy", "num-traits", ] @@ -1974,12 +1999,13 @@ dependencies = [ [[package]] name = "parquet" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "096795d4f47f65fd3ee1ec5a98b77ab26d602f2cc785b0e4be5443add17ecc32" dependencies = [ "ahash", "bytes", "chrono", - "half 2.1.0", + "half 2.4.1", "hashbrown", "num", "num-bigint", diff --git a/core/Cargo.toml b/core/Cargo.toml index 5e3e0ee740..39729a9632 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -29,13 +29,13 @@ include = [ [dependencies] parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate -arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c", features = ["prettyprint", "ffi", "chrono-tz"] } -arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } -arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } -arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } -arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } -parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c", default-features = false, features = ["experimental"] } -half = { version = "~2.1", default-features = false } +arrow = { version = "51.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow-array = { version = "51.0.0" } +arrow-data = { version = "51.0.0" } +arrow-schema = { version = "51.0.0" } +arrow-string = { version = "51.0.0" } +parquet = { version = "51.0.0", default-features = false, features = ["experimental"] } +half = { version = "2.4.1", default-features = false } futures = "0.3.28" mimalloc = { version = "*", default-features = false, optional = true } tokio = { version = "1", features = ["rt-multi-thread"] } @@ -66,10 +66,10 @@ itertools = "0.11.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } paste = "1.0.14" -datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4" } -datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4", features = ["crypto_expressions"]} -datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4", default-features = false, features = ["unicode_expressions"] } +datafusion-common = { version = "37.1.0" } +datafusion = { default-features = false, version = "37.1.0", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { version = "37.1.0", features = ["crypto_expressions"] } +datafusion-physical-expr = { version = "37.1.0", default-features = false, features = ["unicode_expressions"] } unicode-segmentation = "^1.10.1" once_cell = "1.18.0" regex = "1.9.6" diff --git a/dev/ensure-jars-have-correct-contents.sh b/dev/ensure-jars-have-correct-contents.sh index 1f97d2d4a7..6f34ed6504 100755 --- a/dev/ensure-jars-have-correct-contents.sh +++ b/dev/ensure-jars-have-correct-contents.sh @@ -52,12 +52,13 @@ allowed_expr+="|^conf/" # * whatever under the "lib" directory allowed_expr+="|^lib/" # Native dynamic library from Arrow -allowed_expr+="|^x86_64/" -allowed_expr+="|^aarch_64/" -allowed_expr+="|^x86_64/libarrow_cdata_jni.so$" -allowed_expr+="|^x86_64/libarrow_cdata_jni.dylib$" -allowed_expr+="|^x86_64/arrow_cdata_jni.dll$" -allowed_expr+="|^aarch_64/libarrow_cdata_jni.dylib$" +allowed_expr+="|^arrow_cdata_jni/" +allowed_expr+="|^arrow_cdata_jni/x86_64/" +allowed_expr+="|^arrow_cdata_jni/aarch_64/" +allowed_expr+="|^arrow_cdata_jni/x86_64/libarrow_cdata_jni.so$" +allowed_expr+="|^arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib$" +allowed_expr+="|^arrow_cdata_jni/x86_64/arrow_cdata_jni.dll$" +allowed_expr+="|^arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib$" # Two classes in Arrow C module: StructVectorLoader and StructVectorUnloader, are not # under org/apache/arrow/c, so we'll need to treat them specially. allowed_expr+="|^org/apache/arrow/$" diff --git a/pom.xml b/pom.xml index 59e0569ff2..3b9615b439 100644 --- a/pom.xml +++ b/pom.xml @@ -52,7 +52,7 @@ under the License. 3.19.6 1.13.1 provided - 14.0.2 + 16.0.0 1.9.13 2.43.0 0.8.11 From c76a5ec3d0c3179f2467536c377b8eaa3c06d25d Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 9 May 2024 10:51:49 -0700 Subject: [PATCH 02/11] Exclude all arrow dependencies from Spark --- pom.xml | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 3b9615b439..974f7a8b3c 100644 --- a/pom.xml +++ b/pom.xml @@ -119,6 +119,11 @@ under the License. commons-logging commons-logging + + + org.apache.arrow + * + @@ -137,7 +142,7 @@ under the License. org.apache.arrow - arrow-memory-netty + * @@ -257,7 +262,7 @@ under the License. org.apache.arrow - arrow-memory-netty + * @@ -283,7 +288,7 @@ under the License. org.apache.arrow - arrow-memory-netty + * @@ -323,6 +328,11 @@ under the License. commons-logging commons-logging + + + org.apache.arrow + * + @@ -339,6 +349,10 @@ under the License. commons-logging commons-logging + + org.apache.arrow + * + @@ -399,6 +413,11 @@ under the License. commons-logging commons-logging + + + org.apache.arrow + * + From d993fafb29bc357dbcdc717dec92d25c3ae72510 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 9 May 2024 12:52:23 -0700 Subject: [PATCH 03/11] Revert "build: Switch back to released version of DataFusion and arrow-rs" This reverts commit 29c89bfb25ddf4757ab17f951d3ccf17e55422da. --- core/Cargo.lock | 128 +++++++++-------------- core/Cargo.toml | 22 ++-- dev/ensure-jars-have-correct-contents.sh | 13 ++- pom.xml | 2 +- 4 files changed, 69 insertions(+), 96 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index ff863f234c..52f105591d 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -115,8 +115,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-arith", "arrow-array", @@ -136,23 +135,21 @@ dependencies = [ [[package]] name = "arrow-arith" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "half 2.4.1", + "half 2.1.0", "num", ] [[package]] name = "arrow-array" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "ahash", "arrow-buffer", @@ -160,7 +157,7 @@ dependencies = [ "arrow-schema", "chrono", "chrono-tz", - "half 2.4.1", + "half 2.1.0", "hashbrown", "num", ] @@ -168,19 +165,17 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "bytes", - "half 2.4.1", + "half 2.1.0", "num", ] [[package]] name = "arrow-cast" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", @@ -191,7 +186,7 @@ dependencies = [ "base64", "chrono", "comfy-table", - "half 2.4.1", + "half 2.1.0", "lexical-core", "num", "ryu", @@ -200,8 +195,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", @@ -219,20 +213,18 @@ dependencies = [ [[package]] name = "arrow-data" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-buffer", "arrow-schema", - "half 2.4.1", + "half 2.1.0", "num", ] [[package]] name = "arrow-ipc" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", @@ -246,8 +238,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", @@ -255,7 +246,7 @@ dependencies = [ "arrow-data", "arrow-schema", "chrono", - "half 2.4.1", + "half 2.1.0", "indexmap", "lexical-core", "num", @@ -266,38 +257,35 @@ dependencies = [ [[package]] name = "arrow-ord" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "half 2.4.1", + "half 2.1.0", "num", ] [[package]] name = "arrow-row" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "ahash", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "half 2.4.1", + "half 2.1.0", "hashbrown", ] [[package]] name = "arrow-schema" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "bitflags 2.5.0", ] @@ -305,8 +293,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "ahash", "arrow-array", @@ -319,8 +306,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", @@ -623,7 +609,7 @@ dependencies = [ "datafusion-physical-expr", "flate2", "futures", - "half 2.4.1", + "half 2.1.0", "hashbrown", "itertools 0.11.0", "jni", @@ -838,9 +824,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "37.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85069782056753459dc47e386219aa1fdac5b731f26c28abb8c0ffd4b7c5ab11" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" dependencies = [ "ahash", "arrow", @@ -862,7 +847,7 @@ dependencies = [ "datafusion-sql", "futures", "glob", - "half 2.4.1", + "half 2.1.0", "hashbrown", "indexmap", "itertools 0.12.1", @@ -881,9 +866,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "37.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "309d9040751f6dc9e33c85dce6abb55a46ef7ea3644577dd014611c379447ef3" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" dependencies = [ "ahash", "arrow", @@ -891,7 +875,7 @@ dependencies = [ "arrow-buffer", "arrow-schema", "chrono", - "half 2.4.1", + "half 2.1.0", "instant", "libc", "num_cpus", @@ -901,18 +885,16 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "37.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e4a44d8ef1b1e85d32234e6012364c411c3787859bb3bba893b0332cb03dfd" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" dependencies = [ "tokio", ] [[package]] name = "datafusion-execution" -version = "37.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06a3a29ae36bcde07d179cc33b45656a8e7e4d023623e320e48dcf1200eeee95" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" dependencies = [ "arrow", "chrono", @@ -931,9 +913,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "37.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a3542aa322029c2121a671ce08000d4b274171070df13f697b14169ccf4f628" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" dependencies = [ "ahash", "arrow", @@ -948,9 +929,8 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "37.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd221792c666eac174ecc09e606312844772acc12cbec61a420c2fca1ee70959" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" dependencies = [ "arrow", "base64", @@ -973,9 +953,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "37.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bd7f5087817deb961764e8c973d243b54f8572db414a8f0a8f33a48f991e0a" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" dependencies = [ "arrow", "async-trait", @@ -991,9 +970,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "37.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cabc0d9aaa0f5eb1b472112f16223c9ffd2fb04e58cbf65c0a331ee6e993f96" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" dependencies = [ "ahash", "arrow", @@ -1009,7 +987,7 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", - "half 2.4.1", + "half 2.1.0", "hashbrown", "hex", "indexmap", @@ -1026,9 +1004,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "37.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17c0523e9c8880f2492a88bbd857dde02bed1ed23f3e9211a89d3d7ec3b44af9" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" dependencies = [ "ahash", "arrow", @@ -1043,7 +1020,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "futures", - "half 2.4.1", + "half 2.1.0", "hashbrown", "indexmap", "itertools 0.12.1", @@ -1057,9 +1034,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "37.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49eb54b42227136f6287573f2434b1de249fe1b8e6cd6cc73a634e4a3ec29356" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" dependencies = [ "arrow", "arrow-array", @@ -1325,11 +1301,10 @@ checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" [[package]] name = "half" -version = "2.4.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554" dependencies = [ - "cfg-if", "crunchy", "num-traits", ] @@ -1999,13 +1974,12 @@ dependencies = [ [[package]] name = "parquet" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "096795d4f47f65fd3ee1ec5a98b77ab26d602f2cc785b0e4be5443add17ecc32" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "ahash", "bytes", "chrono", - "half 2.4.1", + "half 2.1.0", "hashbrown", "num", "num-bigint", diff --git a/core/Cargo.toml b/core/Cargo.toml index 39729a9632..5e3e0ee740 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -29,13 +29,13 @@ include = [ [dependencies] parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate -arrow = { version = "51.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } -arrow-array = { version = "51.0.0" } -arrow-data = { version = "51.0.0" } -arrow-schema = { version = "51.0.0" } -arrow-string = { version = "51.0.0" } -parquet = { version = "51.0.0", default-features = false, features = ["experimental"] } -half = { version = "2.4.1", default-features = false } +arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } +arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } +arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } +arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } +parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c", default-features = false, features = ["experimental"] } +half = { version = "~2.1", default-features = false } futures = "0.3.28" mimalloc = { version = "*", default-features = false, optional = true } tokio = { version = "1", features = ["rt-multi-thread"] } @@ -66,10 +66,10 @@ itertools = "0.11.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } paste = "1.0.14" -datafusion-common = { version = "37.1.0" } -datafusion = { default-features = false, version = "37.1.0", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { version = "37.1.0", features = ["crypto_expressions"] } -datafusion-physical-expr = { version = "37.1.0", default-features = false, features = ["unicode_expressions"] } +datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4" } +datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4", features = ["crypto_expressions"]} +datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4", default-features = false, features = ["unicode_expressions"] } unicode-segmentation = "^1.10.1" once_cell = "1.18.0" regex = "1.9.6" diff --git a/dev/ensure-jars-have-correct-contents.sh b/dev/ensure-jars-have-correct-contents.sh index 6f34ed6504..1f97d2d4a7 100755 --- a/dev/ensure-jars-have-correct-contents.sh +++ b/dev/ensure-jars-have-correct-contents.sh @@ -52,13 +52,12 @@ allowed_expr+="|^conf/" # * whatever under the "lib" directory allowed_expr+="|^lib/" # Native dynamic library from Arrow -allowed_expr+="|^arrow_cdata_jni/" -allowed_expr+="|^arrow_cdata_jni/x86_64/" -allowed_expr+="|^arrow_cdata_jni/aarch_64/" -allowed_expr+="|^arrow_cdata_jni/x86_64/libarrow_cdata_jni.so$" -allowed_expr+="|^arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib$" -allowed_expr+="|^arrow_cdata_jni/x86_64/arrow_cdata_jni.dll$" -allowed_expr+="|^arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib$" +allowed_expr+="|^x86_64/" +allowed_expr+="|^aarch_64/" +allowed_expr+="|^x86_64/libarrow_cdata_jni.so$" +allowed_expr+="|^x86_64/libarrow_cdata_jni.dylib$" +allowed_expr+="|^x86_64/arrow_cdata_jni.dll$" +allowed_expr+="|^aarch_64/libarrow_cdata_jni.dylib$" # Two classes in Arrow C module: StructVectorLoader and StructVectorUnloader, are not # under org/apache/arrow/c, so we'll need to treat them specially. allowed_expr+="|^org/apache/arrow/$" diff --git a/pom.xml b/pom.xml index 974f7a8b3c..4934416ebc 100644 --- a/pom.xml +++ b/pom.xml @@ -52,7 +52,7 @@ under the License. 3.19.6 1.13.1 provided - 16.0.0 + 14.0.2 1.9.13 2.43.0 0.8.11 From e0e9bcf4a1d169ec1e12da00c718aced5be25eb7 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 9 May 2024 13:02:25 -0700 Subject: [PATCH 04/11] Test --- core/Cargo.toml | 20 ++++++++++---------- pom.xml | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 5e3e0ee740..9874f09818 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -29,12 +29,12 @@ include = [ [dependencies] parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate -arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c", features = ["prettyprint", "ffi", "chrono-tz"] } -arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } -arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } -arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } -arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } -parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c", default-features = false, features = ["experimental"] } +arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4" } +arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4" } +arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4" } +arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4" } +parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4", default-features = false, features = ["experimental"] } half = { version = "~2.1", default-features = false } futures = "0.3.28" mimalloc = { version = "*", default-features = false, optional = true } @@ -66,10 +66,10 @@ itertools = "0.11.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } paste = "1.0.14" -datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4" } -datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4", features = ["crypto_expressions"]} -datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "57b3be4", default-features = false, features = ["unicode_expressions"] } +datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "e30e00e" } +datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "e30e00e", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "e30e00e", features = ["crypto_expressions"] } +datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "e30e00e", default-features = false, features = ["unicode_expressions"] } unicode-segmentation = "^1.10.1" once_cell = "1.18.0" regex = "1.9.6" diff --git a/pom.xml b/pom.xml index 4934416ebc..974f7a8b3c 100644 --- a/pom.xml +++ b/pom.xml @@ -52,7 +52,7 @@ under the License. 3.19.6 1.13.1 provided - 14.0.2 + 16.0.0 1.9.13 2.43.0 0.8.11 From b9d932966baf749031760072d63f6974289c631c Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 9 May 2024 13:47:16 -0700 Subject: [PATCH 05/11] Test --- core/Cargo.toml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 9874f09818..66daa56cd9 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -29,12 +29,12 @@ include = [ [dependencies] parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate -arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4", features = ["prettyprint", "ffi", "chrono-tz"] } -arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4" } -arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4" } -arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4" } -arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4" } -parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "95e7af4", default-features = false, features = ["experimental"] } +arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4" } +arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4" } +arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4" } +arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4" } +parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4", default-features = false, features = ["experimental"] } half = { version = "~2.1", default-features = false } futures = "0.3.28" mimalloc = { version = "*", default-features = false, optional = true } @@ -66,10 +66,10 @@ itertools = "0.11.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } paste = "1.0.14" -datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "e30e00e" } -datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "e30e00e", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "e30e00e", features = ["crypto_expressions"] } -datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "e30e00e", default-features = false, features = ["unicode_expressions"] } +datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "7ec8bc2" } +datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "7ec8bc2", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "7ec8bc2", features = ["crypto_expressions"] } +datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "7ec8bc2", default-features = false, features = ["unicode_expressions"] } unicode-segmentation = "^1.10.1" once_cell = "1.18.0" regex = "1.9.6" From 87d884379502e42ffceae15642cf112fae0422ab Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 13 May 2024 11:10:34 -0700 Subject: [PATCH 06/11] Test arrow-rs fix --- core/Cargo.toml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 66daa56cd9..ced062a9ab 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -29,12 +29,12 @@ include = [ [dependencies] parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate -arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4", features = ["prettyprint", "ffi", "chrono-tz"] } -arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4" } -arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4" } -arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4" } -arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4" } -parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "595abe4", default-features = false, features = ["experimental"] } +arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" } +arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" } +arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" } +arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" } +parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb", default-features = false, features = ["experimental"] } half = { version = "~2.1", default-features = false } futures = "0.3.28" mimalloc = { version = "*", default-features = false, optional = true } @@ -66,10 +66,10 @@ itertools = "0.11.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } paste = "1.0.14" -datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "7ec8bc2" } -datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "7ec8bc2", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "7ec8bc2", features = ["crypto_expressions"] } -datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "7ec8bc2", default-features = false, features = ["unicode_expressions"] } +datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81" } +datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81", features = ["crypto_expressions"] } +datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81", default-features = false, features = ["unicode_expressions"] } unicode-segmentation = "^1.10.1" once_cell = "1.18.0" regex = "1.9.6" From 7c96fa46940b93fa543d149354b953ccd9492455 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 13 May 2024 11:30:41 -0700 Subject: [PATCH 07/11] Fix --- dev/ensure-jars-have-correct-contents.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dev/ensure-jars-have-correct-contents.sh b/dev/ensure-jars-have-correct-contents.sh index 1f97d2d4a7..3a32aef5db 100755 --- a/dev/ensure-jars-have-correct-contents.sh +++ b/dev/ensure-jars-have-correct-contents.sh @@ -58,6 +58,14 @@ allowed_expr+="|^x86_64/libarrow_cdata_jni.so$" allowed_expr+="|^x86_64/libarrow_cdata_jni.dylib$" allowed_expr+="|^x86_64/arrow_cdata_jni.dll$" allowed_expr+="|^aarch_64/libarrow_cdata_jni.dylib$" + +allowed_expr+="|^arrow_cdata_jni/" +allowed_expr+="|^arrow_cdata_jni/x86_64/" +allowed_expr+="|^arrow_cdata_jni/aarch_64/" +allowed_expr+="|^arrow_cdata_jni/x86_64/libarrow_cdata_jni.so$" +allowed_expr+="|^arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib$" +allowed_expr+="|^arrow_cdata_jni/x86_64/arrow_cdata_jni.dll$" +allowed_expr+="|^arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib$" # Two classes in Arrow C module: StructVectorLoader and StructVectorUnloader, are not # under org/apache/arrow/c, so we'll need to treat them specially. allowed_expr+="|^org/apache/arrow/$" From 4b320e357c11fc9aea36132bddc2268c0ff4bde1 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 30 May 2024 14:18:15 -0700 Subject: [PATCH 08/11] Use DataFusion repo --- core/Cargo.lock | 171 ++++++++++++------ core/Cargo.toml | 24 +-- .../datafusion/expressions/bitwise_not.rs | 4 +- .../expressions/bloom_filter_might_contain.rs | 4 +- .../execution/datafusion/expressions/cast.rs | 4 +- .../datafusion/expressions/checkoverflow.rs | 4 +- .../datafusion/expressions/if_expr.rs | 12 +- .../datafusion/expressions/normalize_nan.rs | 2 +- .../datafusion/expressions/scalar_funcs.rs | 37 ++-- .../datafusion/expressions/strings.rs | 12 +- .../datafusion/expressions/subquery.rs | 2 +- .../datafusion/expressions/temporal.rs | 20 +- .../execution/datafusion/operators/expand.rs | 4 +- core/src/execution/datafusion/planner.rs | 66 ++----- .../execution/datafusion/shuffle_writer.rs | 4 +- core/src/execution/operators/copy.rs | 4 +- core/src/execution/operators/scan.rs | 2 +- 17 files changed, 195 insertions(+), 181 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 52f105591d..18291e796b 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -115,7 +115,8 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" dependencies = [ "arrow-arith", "arrow-array", @@ -135,21 +136,23 @@ dependencies = [ [[package]] name = "arrow-arith" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "half 2.1.0", + "half 2.4.1", "num", ] [[package]] name = "arrow-array" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" dependencies = [ "ahash", "arrow-buffer", @@ -157,7 +160,7 @@ dependencies = [ "arrow-schema", "chrono", "chrono-tz", - "half 2.1.0", + "half 2.4.1", "hashbrown", "num", ] @@ -165,17 +168,19 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" dependencies = [ "bytes", - "half 2.1.0", + "half 2.4.1", "num", ] [[package]] name = "arrow-cast" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" dependencies = [ "arrow-array", "arrow-buffer", @@ -186,7 +191,7 @@ dependencies = [ "base64", "chrono", "comfy-table", - "half 2.1.0", + "half 2.4.1", "lexical-core", "num", "ryu", @@ -195,7 +200,8 @@ dependencies = [ [[package]] name = "arrow-csv" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" dependencies = [ "arrow-array", "arrow-buffer", @@ -213,18 +219,20 @@ dependencies = [ [[package]] name = "arrow-data" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" dependencies = [ "arrow-buffer", "arrow-schema", - "half 2.1.0", + "half 2.4.1", "num", ] [[package]] name = "arrow-ipc" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" dependencies = [ "arrow-array", "arrow-buffer", @@ -238,7 +246,8 @@ dependencies = [ [[package]] name = "arrow-json" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" dependencies = [ "arrow-array", "arrow-buffer", @@ -246,7 +255,7 @@ dependencies = [ "arrow-data", "arrow-schema", "chrono", - "half 2.1.0", + "half 2.4.1", "indexmap", "lexical-core", "num", @@ -257,35 +266,38 @@ dependencies = [ [[package]] name = "arrow-ord" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "half 2.1.0", + "half 2.4.1", "num", ] [[package]] name = "arrow-row" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" dependencies = [ "ahash", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "half 2.1.0", + "half 2.4.1", "hashbrown", ] [[package]] name = "arrow-schema" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" dependencies = [ "bitflags 2.5.0", ] @@ -293,7 +305,8 @@ dependencies = [ [[package]] name = "arrow-select" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" dependencies = [ "ahash", "arrow-array", @@ -306,7 +319,8 @@ dependencies = [ [[package]] name = "arrow-string" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" dependencies = [ "arrow-array", "arrow-buffer", @@ -605,11 +619,13 @@ dependencies = [ "criterion", "datafusion", "datafusion-common", + "datafusion-expr", "datafusion-functions", "datafusion-physical-expr", + "datafusion-physical-expr-common", "flate2", "futures", - "half 2.1.0", + "half 2.4.1", "hashbrown", "itertools 0.11.0", "jni", @@ -824,8 +840,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" dependencies = [ "ahash", "arrow", @@ -841,13 +857,15 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions", + "datafusion-functions-aggregate", "datafusion-optimizer", "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-sql", "futures", "glob", - "half 2.1.0", + "half 2.4.1", "hashbrown", "indexmap", "itertools 0.12.1", @@ -866,8 +884,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" dependencies = [ "ahash", "arrow", @@ -875,7 +893,8 @@ dependencies = [ "arrow-buffer", "arrow-schema", "chrono", - "half 2.1.0", + "half 2.4.1", + "hashbrown", "instant", "libc", "num_cpus", @@ -885,16 +904,16 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" dependencies = [ "tokio", ] [[package]] name = "datafusion-execution" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" dependencies = [ "arrow", "chrono", @@ -913,8 +932,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" dependencies = [ "ahash", "arrow", @@ -922,6 +941,7 @@ dependencies = [ "chrono", "datafusion-common", "paste", + "serde_json", "sqlparser", "strum", "strum_macros", @@ -929,8 +949,8 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" dependencies = [ "arrow", "base64", @@ -941,28 +961,48 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "hashbrown", "hex", "itertools 0.12.1", "log", "md-5", + "rand", "regex", "sha2", "unicode-segmentation", "uuid", ] +[[package]] +name = "datafusion-functions-aggregate" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "log", + "paste", + "sqlparser", +] + [[package]] name = "datafusion-optimizer" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" dependencies = [ "arrow", "async-trait", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions-aggregate", "datafusion-physical-expr", "hashbrown", + "indexmap", "itertools 0.12.1", "log", "regex-syntax", @@ -970,8 +1010,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" dependencies = [ "ahash", "arrow", @@ -981,36 +1021,44 @@ dependencies = [ "arrow-schema", "arrow-string", "base64", - "blake2", - "blake3", "chrono", "datafusion-common", "datafusion-execution", "datafusion-expr", - "half 2.1.0", + "datafusion-functions-aggregate", + "datafusion-physical-expr-common", + "half 2.4.1", "hashbrown", "hex", "indexmap", "itertools 0.12.1", "log", - "md-5", "paste", "petgraph", - "rand", "regex", - "sha2", - "unicode-segmentation", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-expr", + "rand", ] [[package]] name = "datafusion-physical-plan" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", + "arrow-ord", "arrow-schema", "async-trait", "chrono", @@ -1018,9 +1066,11 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", + "datafusion-functions-aggregate", "datafusion-physical-expr", + "datafusion-physical-expr-common", "futures", - "half 2.1.0", + "half 2.4.1", "hashbrown", "indexmap", "itertools 0.12.1", @@ -1034,8 +1084,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "36.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=57b3be4#57b3be4297a47aa45094c16e37ddf0141d723bf0" +version = "38.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" dependencies = [ "arrow", "arrow-array", @@ -1043,6 +1093,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "log", + "regex", "sqlparser", "strum", ] @@ -1301,10 +1352,11 @@ checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" [[package]] name = "half" -version = "2.1.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ + "cfg-if", "crunchy", "num-traits", ] @@ -1974,12 +2026,13 @@ dependencies = [ [[package]] name = "parquet" version = "51.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "096795d4f47f65fd3ee1ec5a98b77ab26d602f2cc785b0e4be5443add17ecc32" dependencies = [ "ahash", "bytes", "chrono", - "half 2.1.0", + "half 2.4.1", "hashbrown", "num", "num-bigint", @@ -2529,9 +2582,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "sqlparser" -version = "0.44.0" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaf9c7ff146298ffda83a200f8d5084f08dcee1edfc135fcc1d646a45d50ffd6" +checksum = "f7bbffee862a796d67959a89859d6b1046bb5016d63e23835ad0da182777bbe0" dependencies = [ "log", "sqlparser_derive", diff --git a/core/Cargo.toml b/core/Cargo.toml index ced062a9ab..d074864e1e 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -29,13 +29,13 @@ include = [ [dependencies] parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate -arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb", features = ["prettyprint", "ffi", "chrono-tz"] } -arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" } -arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" } -arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" } -arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" } -parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb", default-features = false, features = ["experimental"] } -half = { version = "~2.1", default-features = false } +arrow = { version = "51.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow-array = { version = "51.0.0" } +arrow-data = { version = "51.0.0" } +arrow-schema = { version = "51.0.0" } +arrow-string = { version = "51.0.0" } +parquet = { version = "51.0.0", default-features = false, features = ["experimental"] } +half = { version = "2.4.1", default-features = false } futures = "0.3.28" mimalloc = { version = "*", default-features = false, optional = true } tokio = { version = "1", features = ["rt-multi-thread"] } @@ -66,10 +66,12 @@ itertools = "0.11.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } paste = "1.0.14" -datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81" } -datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81", features = ["crypto_expressions"] } -datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81", default-features = false, features = ["unicode_expressions"] } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc" } +datafusion = { default-features = false, git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", features = ["crypto_expressions"] } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", default-features = false } +datafusion-physical-expr-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", default-features = false } +datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", default-features = false } unicode-segmentation = "^1.10.1" once_cell = "1.18.0" regex = "1.9.6" diff --git a/core/src/execution/datafusion/expressions/bitwise_not.rs b/core/src/execution/datafusion/expressions/bitwise_not.rs index f9f8ee392c..06ead26708 100644 --- a/core/src/execution/datafusion/expressions/bitwise_not.rs +++ b/core/src/execution/datafusion/expressions/bitwise_not.rs @@ -105,8 +105,8 @@ impl PhysicalExpr for BitwiseNotExpr { } } - fn children(&self) -> Vec> { - vec![self.arg.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.arg] } fn with_new_children( diff --git a/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs b/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs index 6a4d07b892..b922119f82 100644 --- a/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs +++ b/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs @@ -129,8 +129,8 @@ impl PhysicalExpr for BloomFilterMightContain { }) } - fn children(&self) -> Vec> { - vec![self.bloom_filter_expr.clone(), self.value_expr.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.bloom_filter_expr, &self.value_expr] } fn with_new_children( diff --git a/core/src/execution/datafusion/expressions/cast.rs b/core/src/execution/datafusion/expressions/cast.rs index 7e8a96f28d..eabd58e828 100644 --- a/core/src/execution/datafusion/expressions/cast.rs +++ b/core/src/execution/datafusion/expressions/cast.rs @@ -1291,8 +1291,8 @@ impl PhysicalExpr for Cast { } } - fn children(&self) -> Vec> { - vec![self.child.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.child] } fn with_new_children( diff --git a/core/src/execution/datafusion/expressions/checkoverflow.rs b/core/src/execution/datafusion/expressions/checkoverflow.rs index 1e4b5f3339..044b366e3d 100644 --- a/core/src/execution/datafusion/expressions/checkoverflow.rs +++ b/core/src/execution/datafusion/expressions/checkoverflow.rs @@ -165,8 +165,8 @@ impl PhysicalExpr for CheckOverflow { } } - fn children(&self) -> Vec> { - vec![self.child.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.child] } fn with_new_children( diff --git a/core/src/execution/datafusion/expressions/if_expr.rs b/core/src/execution/datafusion/expressions/if_expr.rs index 6f2ed6a543..fa235cc667 100644 --- a/core/src/execution/datafusion/expressions/if_expr.rs +++ b/core/src/execution/datafusion/expressions/if_expr.rs @@ -110,12 +110,8 @@ impl PhysicalExpr for IfExpr { Ok(ColumnarValue::Array(current_value)) } - fn children(&self) -> Vec> { - vec![ - self.if_expr.clone(), - self.true_expr.clone(), - self.false_expr.clone(), - ] + fn children(&self) -> Vec<&Arc> { + vec![&self.if_expr, &self.true_expr, &self.false_expr] } fn with_new_children( @@ -225,8 +221,8 @@ mod tests { let true_expr = lit(123i32); let false_expr = lit(999i32); - let expr = if_fn(if_expr, true_expr, false_expr); - let children = expr.unwrap().children(); + let expr = if_fn(if_expr, true_expr, false_expr).unwrap(); + let children = expr.children(); assert_eq!(children.len(), 3); assert_eq!(children[0].to_string(), "true"); assert_eq!(children[1].to_string(), "123"); diff --git a/core/src/execution/datafusion/expressions/normalize_nan.rs b/core/src/execution/datafusion/expressions/normalize_nan.rs index 111a34d5d6..3bd5feea5f 100644 --- a/core/src/execution/datafusion/expressions/normalize_nan.rs +++ b/core/src/execution/datafusion/expressions/normalize_nan.rs @@ -77,7 +77,7 @@ impl PhysicalExpr for NormalizeNaNAndZero { } } - fn children(&self) -> Vec> { + fn children(&self) -> Vec<&Arc> { self.child.children() } diff --git a/core/src/execution/datafusion/expressions/scalar_funcs.rs b/core/src/execution/datafusion/expressions/scalar_funcs.rs index 8c5e1f3916..4ac9473674 100644 --- a/core/src/execution/datafusion/expressions/scalar_funcs.rs +++ b/core/src/execution/datafusion/expressions/scalar_funcs.rs @@ -19,7 +19,6 @@ use std::{ any::Any, cmp::min, fmt::{Debug, Write}, - str::FromStr, sync::Arc, }; @@ -35,17 +34,15 @@ use arrow_array::{Array, ArrowNativeTypeOp, Decimal128Array, StringArray}; use arrow_schema::DataType; use datafusion::{ execution::FunctionRegistry, - logical_expr::{ - BuiltinScalarFunction, ScalarFunctionDefinition, ScalarFunctionImplementation, - ScalarUDFImpl, Signature, Volatility, - }, + functions::math::round::round, + logical_expr::{ScalarFunctionImplementation, ScalarUDFImpl, Signature, Volatility}, physical_plan::ColumnarValue, }; use datafusion_common::{ cast::{as_binary_array, as_generic_string_array}, exec_err, internal_err, DataFusionError, Result as DataFusionResult, ScalarValue, }; -use datafusion_physical_expr::{math_expressions, udf::ScalarUDF}; +use datafusion_expr::ScalarUDF; use num::{ integer::{div_ceil, div_floor}, BigInt, Signed, ToPrimitive, @@ -63,9 +60,7 @@ macro_rules! make_comet_scalar_udf { $data_type.clone(), Arc::new(move |args| $func(args, &$data_type)), ); - Ok(ScalarFunctionDefinition::UDF(Arc::new( - ScalarUDF::new_from_impl(scalar_func), - ))) + Ok(Arc::new(ScalarUDF::new_from_impl(scalar_func))) }}; ($name:expr, $func:expr, without $data_type:ident) => {{ let scalar_func = CometScalarFunction::new( @@ -74,9 +69,7 @@ macro_rules! make_comet_scalar_udf { $data_type, $func, ); - Ok(ScalarFunctionDefinition::UDF(Arc::new( - ScalarUDF::new_from_impl(scalar_func), - ))) + Ok(Arc::new(ScalarUDF::new_from_impl(scalar_func))) }}; } @@ -85,7 +78,7 @@ pub fn create_comet_physical_fun( fun_name: &str, data_type: DataType, registry: &dyn FunctionRegistry, -) -> Result { +) -> Result, DataFusionError> { let sha2_functions = ["sha224", "sha256", "sha384", "sha512"]; match fun_name { "ceil" => { @@ -129,13 +122,11 @@ pub fn create_comet_physical_fun( let spark_func_name = "spark".to_owned() + sha; make_comet_scalar_udf!(spark_func_name, wrapped_func, without data_type) } - _ => { - if let Ok(fun) = BuiltinScalarFunction::from_str(fun_name) { - Ok(ScalarFunctionDefinition::BuiltIn(fun)) - } else { - Ok(ScalarFunctionDefinition::UDF(registry.udf(fun_name)?)) - } - } + _ => registry.udf(fun_name).map_err(|e| { + DataFusionError::Execution(format!( + "Function {fun_name} not found in the registry: {e}", + )) + }), } } @@ -498,9 +489,7 @@ fn spark_round( make_decimal_array(array, precision, scale, &f) } DataType::Float32 | DataType::Float64 => { - Ok(ColumnarValue::Array(math_expressions::round(&[ - array.clone() - ])?)) + Ok(ColumnarValue::Array(round(&[array.clone()])?)) } dt => exec_err!("Not supported datatype for ROUND: {dt}"), }, @@ -523,7 +512,7 @@ fn spark_round( make_decimal_scalar(a, precision, scale, &f) } ScalarValue::Float32(_) | ScalarValue::Float64(_) => Ok(ColumnarValue::Scalar( - ScalarValue::try_from_array(&math_expressions::round(&[a.to_array()?])?, 0)?, + ScalarValue::try_from_array(&round(&[a.to_array()?])?, 0)?, )), dt => exec_err!("Not supported datatype for ROUND: {dt}"), }, diff --git a/core/src/execution/datafusion/expressions/strings.rs b/core/src/execution/datafusion/expressions/strings.rs index ee9a22212b..cbbd4cfa4d 100644 --- a/core/src/execution/datafusion/expressions/strings.rs +++ b/core/src/execution/datafusion/expressions/strings.rs @@ -111,8 +111,8 @@ macro_rules! make_predicate_function { Ok(ColumnarValue::Array(Arc::new(array))) } - fn children(&self) -> Vec> { - vec![self.left.clone(), self.right.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.left, &self.right] } fn with_new_children( @@ -221,8 +221,8 @@ impl PhysicalExpr for SubstringExec { } } - fn children(&self) -> Vec> { - vec![self.child.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.child] } fn with_new_children( @@ -286,8 +286,8 @@ impl PhysicalExpr for StringSpaceExec { } } - fn children(&self) -> Vec> { - vec![self.child.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.child] } fn with_new_children( diff --git a/core/src/execution/datafusion/expressions/subquery.rs b/core/src/execution/datafusion/expressions/subquery.rs index bf37cb8951..9b1be2df67 100644 --- a/core/src/execution/datafusion/expressions/subquery.rs +++ b/core/src/execution/datafusion/expressions/subquery.rs @@ -199,7 +199,7 @@ impl PhysicalExpr for Subquery { } } - fn children(&self) -> Vec> { + fn children(&self) -> Vec<&Arc> { vec![] } diff --git a/core/src/execution/datafusion/expressions/temporal.rs b/core/src/execution/datafusion/expressions/temporal.rs index 4ae3c2605c..22b4aee8a3 100644 --- a/core/src/execution/datafusion/expressions/temporal.rs +++ b/core/src/execution/datafusion/expressions/temporal.rs @@ -111,8 +111,8 @@ impl PhysicalExpr for HourExec { } } - fn children(&self) -> Vec> { - vec![self.child.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.child] } fn with_new_children( @@ -205,8 +205,8 @@ impl PhysicalExpr for MinuteExec { } } - fn children(&self) -> Vec> { - vec![self.child.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.child] } fn with_new_children( @@ -299,8 +299,8 @@ impl PhysicalExpr for SecondExec { } } - fn children(&self) -> Vec> { - vec![self.child.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.child] } fn with_new_children( @@ -386,8 +386,8 @@ impl PhysicalExpr for DateTruncExec { } } - fn children(&self) -> Vec> { - vec![self.child.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.child] } fn with_new_children( @@ -511,8 +511,8 @@ impl PhysicalExpr for TimestampTruncExec { } } - fn children(&self) -> Vec> { - vec![self.child.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.child] } fn with_new_children( diff --git a/core/src/execution/datafusion/operators/expand.rs b/core/src/execution/datafusion/operators/expand.rs index ca3fdc1aa4..5285dfb468 100644 --- a/core/src/execution/datafusion/operators/expand.rs +++ b/core/src/execution/datafusion/operators/expand.rs @@ -96,8 +96,8 @@ impl ExecutionPlan for CometExpandExec { self.schema.clone() } - fn children(&self) -> Vec> { - vec![self.child.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.child] } fn with_new_children( diff --git a/core/src/execution/datafusion/planner.rs b/core/src/execution/datafusion/planner.rs index 7a37e3aae4..5108a3f535 100644 --- a/core/src/execution/datafusion/planner.rs +++ b/core/src/execution/datafusion/planner.rs @@ -17,7 +17,7 @@ //! Converts Spark physical plan to DataFusion physical plan -use std::{collections::HashMap, str::FromStr, sync::Arc}; +use std::{collections::HashMap, sync::Arc}; use arrow_schema::{DataType, Field, Schema, TimeUnit}; use datafusion::{ @@ -25,9 +25,7 @@ use datafusion::{ common::DataFusionError, execution::FunctionRegistry, functions::math, - logical_expr::{ - BuiltinScalarFunction, Operator as DataFusionOperator, ScalarFunctionDefinition, - }, + logical_expr::Operator as DataFusionOperator, physical_expr::{ execution_props::ExecutionProps, expressions::{ @@ -52,6 +50,7 @@ use datafusion_common::{ tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter}, JoinType as DFJoinType, ScalarValue, }; +use datafusion_physical_expr_common::aggregate::create_aggregate_expr; use itertools::Itertools; use jni::objects::GlobalRef; use num::{BigInt, ToPrimitive}; @@ -495,10 +494,7 @@ impl PhysicalPlanner { let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema.clone())?; let return_type = child.data_type(&input_schema)?; let args = vec![child]; - let scalar_def = ScalarFunctionDefinition::UDF(math::abs()); - - let expr = - ScalarFunctionExpr::new("abs", scalar_def, args, return_type, None, false); + let expr = ScalarFunctionExpr::new("abs", math::abs(), args, return_type); Ok(Arc::new(expr)) } ExprStruct::CaseWhen(case_when) => { @@ -684,8 +680,6 @@ impl PhysicalPlanner { fun_expr, vec![left, right], data_type, - None, - false, ))) } _ => Ok(Arc::new(BinaryExpr::new(left, op, right))), @@ -1204,26 +1198,18 @@ impl PhysicalPlanner { } } AggExprStruct::First(expr) => { - let child = self.create_expr(expr.child.as_ref().unwrap(), schema)?; - let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap()); - Ok(Arc::new(FirstValue::new( - child, - "first", - datatype, - vec![], - vec![], - ))) + let child = self.create_expr(expr.child.as_ref().unwrap(), schema.clone())?; + let func = datafusion_expr::AggregateUDF::new_from_impl(FirstValue::new()); + + create_aggregate_expr(&func, &[child], &[], &[], &schema, "first", false, false) + .map_err(|e| e.into()) } AggExprStruct::Last(expr) => { - let child = self.create_expr(expr.child.as_ref().unwrap(), schema)?; - let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap()); - Ok(Arc::new(LastValue::new( - child, - "last", - datatype, - vec![], - vec![], - ))) + let child = self.create_expr(expr.child.as_ref().unwrap(), schema.clone())?; + let func = datafusion_expr::AggregateUDF::new_from_impl(LastValue::new()); + + create_aggregate_expr(&func, &[child], &[], &[], &schema, "last", false, false) + .map_err(|e| e.into()) } AggExprStruct::BitAndAgg(expr) => { let child = self.create_expr(expr.child.as_ref().unwrap(), schema)?; @@ -1368,21 +1354,11 @@ impl PhysicalPlanner { let data_type = match expr.return_type.as_ref().map(to_arrow_datatype) { Some(t) => t, - None => { - // If no data type is provided from Spark, we'll use DF's return type from the - // scalar function - // Note this assumes the `fun_name` is a defined function in DF. Otherwise, it'll - // throw error. - - if let Ok(fun) = BuiltinScalarFunction::from_str(fun_name) { - fun.return_type(&input_expr_types)? - } else { - self.session_ctx - .udf(fun_name)? - .inner() - .return_type(&input_expr_types)? - } - } + None => self + .session_ctx + .udf(fun_name)? + .inner() + .return_type(&input_expr_types)?, }; let fun_expr = @@ -1393,8 +1369,6 @@ impl PhysicalPlanner { fun_expr, args.to_vec(), data_type, - None, - args.is_empty(), )); Ok(scalar_expr) @@ -1439,7 +1413,7 @@ fn expr_to_columns( let mut left_field_indices: Vec = vec![]; let mut right_field_indices: Vec = vec![]; - expr.apply(&mut |expr| { + expr.apply(&mut |expr: &Arc| { Ok({ if let Some(column) = expr.as_any().downcast_ref::() { if column.index() > left_field_len + right_field_len { diff --git a/core/src/execution/datafusion/shuffle_writer.rs b/core/src/execution/datafusion/shuffle_writer.rs index 3b92abbde4..914c741cc5 100644 --- a/core/src/execution/datafusion/shuffle_writer.rs +++ b/core/src/execution/datafusion/shuffle_writer.rs @@ -104,8 +104,8 @@ impl ExecutionPlan for ShuffleWriterExec { self.input.schema() } - fn children(&self) -> Vec> { - vec![self.input.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.input] } fn with_new_children( diff --git a/core/src/execution/operators/copy.rs b/core/src/execution/operators/copy.rs index 96c244935e..d011b3cb22 100644 --- a/core/src/execution/operators/copy.rs +++ b/core/src/execution/operators/copy.rs @@ -93,8 +93,8 @@ impl ExecutionPlan for CopyExec { self.schema.clone() } - fn children(&self) -> Vec> { - vec![self.input.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.input] } fn with_new_children( diff --git a/core/src/execution/operators/scan.rs b/core/src/execution/operators/scan.rs index 99c7c8391e..bd518eda10 100644 --- a/core/src/execution/operators/scan.rs +++ b/core/src/execution/operators/scan.rs @@ -248,7 +248,7 @@ impl ExecutionPlan for ScanExec { scan_schema(input_batch, &self.data_types) } - fn children(&self) -> Vec> { + fn children(&self) -> Vec<&Arc> { vec![] } From 3c7452f80267c59ed5021efe4851157b6aa67b15 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 1 Jun 2024 12:07:33 -0700 Subject: [PATCH 09/11] Fix --- core/src/execution/datafusion/planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/execution/datafusion/planner.rs b/core/src/execution/datafusion/planner.rs index 5108a3f535..606a8236d9 100644 --- a/core/src/execution/datafusion/planner.rs +++ b/core/src/execution/datafusion/planner.rs @@ -773,7 +773,7 @@ impl PhysicalPlanner { .iter() .enumerate() .map(|(idx, expr)| { - self.create_expr(expr, child.schema()) + self.create_expr(expr, aggregate.schema()) .map(|r| (r, format!("col_{}", idx))) }) .collect(); From f51326baa73ede9818a48476e6a805894ee53448 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 5 Jun 2024 23:42:09 -0700 Subject: [PATCH 10/11] Fix --- .../execution/datafusion/expressions/negative.rs | 14 +++++++------- .../execution/datafusion/expressions/unbound.rs | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/core/src/execution/datafusion/expressions/negative.rs b/core/src/execution/datafusion/expressions/negative.rs index e7aa2ac646..b369eb7172 100644 --- a/core/src/execution/datafusion/expressions/negative.rs +++ b/core/src/execution/datafusion/expressions/negative.rs @@ -24,9 +24,8 @@ use datafusion::{ physical_expr::PhysicalExpr, }; use datafusion_common::{Result, ScalarValue}; -use datafusion_physical_expr::{ - aggregate::utils::down_cast_any_ref, sort_properties::SortProperties, -}; +use datafusion_expr::sort_properties::ExprProperties; +use datafusion_physical_expr::aggregate::utils::down_cast_any_ref; use std::{ any::Any, hash::{Hash, Hasher}, @@ -195,8 +194,8 @@ impl PhysicalExpr for NegativeExpr { } } - fn children(&self) -> Vec> { - vec![self.arg.clone()] + fn children(&self) -> Vec<&Arc> { + vec![&self.arg] } fn with_new_children( @@ -255,8 +254,9 @@ impl PhysicalExpr for NegativeExpr { } /// The ordering of a [`NegativeExpr`] is simply the reverse of its child. - fn get_ordering(&self, children: &[SortProperties]) -> SortProperties { - -children[0] + fn get_properties(&self, children: &[ExprProperties]) -> Result { + let properties = children[0].clone().with_order(children[0].sort_properties); + Ok(properties) } } diff --git a/core/src/execution/datafusion/expressions/unbound.rs b/core/src/execution/datafusion/expressions/unbound.rs index 5387b10125..95f9912c98 100644 --- a/core/src/execution/datafusion/expressions/unbound.rs +++ b/core/src/execution/datafusion/expressions/unbound.rs @@ -83,7 +83,7 @@ impl PhysicalExpr for UnboundColumn { internal_err!("UnboundColumn::evaluate() should not be called") } - fn children(&self) -> Vec> { + fn children(&self) -> Vec<&Arc> { vec![] } From 8de0149f1b9516aa8732560ea9a41c6326cccdea Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 7 Jun 2024 10:08:11 -0700 Subject: [PATCH 11/11] Use 39.0.0-rc1 --- core/Cargo.lock | 153 ++++++++++-------- core/Cargo.toml | 25 +-- .../datafusion/expressions/negative.rs | 5 +- core/src/execution/kernels/hash.rs | 16 ++ 4 files changed, 121 insertions(+), 78 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 0adac56e8a..59c46c92be 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -114,9 +114,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" +checksum = "7ae9728f104939be6d8d9b368a354b4929b0569160ea1641f0721b55a861ce38" dependencies = [ "arrow-arith", "arrow-array", @@ -135,9 +135,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" +checksum = "a7029a5b3efbeafbf4a12d12dc16b8f9e9bff20a410b8c25c5d28acc089e1043" dependencies = [ "arrow-array", "arrow-buffer", @@ -150,16 +150,16 @@ dependencies = [ [[package]] name = "arrow-array" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" +checksum = "d33238427c60271710695f17742f45b1a5dc5bcfc5c15331c25ddfe7abf70d97" dependencies = [ "ahash", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "chrono-tz", + "chrono-tz 0.9.0", "half 2.4.1", "hashbrown", "num", @@ -167,9 +167,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" +checksum = "fe9b95e825ae838efaf77e366c00d3fc8cca78134c9db497d6bda425f2e7b7c1" dependencies = [ "bytes", "half 2.4.1", @@ -178,9 +178,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" +checksum = "87cf8385a9d5b5fcde771661dd07652b79b9139fea66193eda6a88664400ccab" dependencies = [ "arrow-array", "arrow-buffer", @@ -199,9 +199,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" +checksum = "cea5068bef430a86690059665e40034625ec323ffa4dd21972048eebb0127adc" dependencies = [ "arrow-array", "arrow-buffer", @@ -218,9 +218,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" +checksum = "cb29be98f987bcf217b070512bb7afba2f65180858bca462edf4a39d84a23e10" dependencies = [ "arrow-buffer", "arrow-schema", @@ -230,9 +230,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" +checksum = "ffc68f6523970aa6f7ce1dc9a33a7d9284cfb9af77d4ad3e617dbe5d79cc6ec8" dependencies = [ "arrow-array", "arrow-buffer", @@ -245,9 +245,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" +checksum = "2041380f94bd6437ab648e6c2085a045e45a0c44f91a1b9a4fe3fed3d379bfb1" dependencies = [ "arrow-array", "arrow-buffer", @@ -265,9 +265,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" +checksum = "fcb56ed1547004e12203652f12fe12e824161ff9d1e5cf2a7dc4ff02ba94f413" dependencies = [ "arrow-array", "arrow-buffer", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" +checksum = "575b42f1fc588f2da6977b94a5ca565459f5ab07b60545e17243fb9a7ed6d43e" dependencies = [ "ahash", "arrow-array", @@ -295,18 +295,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" +checksum = "32aae6a60458a2389c0da89c9de0b7932427776127da1a738e2efc21d32f3393" dependencies = [ "bitflags 2.5.0", ] [[package]] name = "arrow-select" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" +checksum = "de36abaef8767b4220d7b4a8c2fe5ffc78b47db81b03d77e2136091c3ba39102" dependencies = [ "ahash", "arrow-array", @@ -318,9 +318,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" +checksum = "e435ada8409bcafc910bc3e0077f532a4daa20e99060a496685c0e3e53cc2597" dependencies = [ "arrow-array", "arrow-buffer", @@ -522,7 +522,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" dependencies = [ "chrono", - "chrono-tz-build", + "chrono-tz-build 0.2.1", + "phf", +] + +[[package]] +name = "chrono-tz" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" +dependencies = [ + "chrono", + "chrono-tz-build 0.3.0", "phf", ] @@ -537,6 +548,17 @@ dependencies = [ "phf_codegen", ] +[[package]] +name = "chrono-tz-build" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + [[package]] name = "ciborium" version = "0.2.1" @@ -606,6 +628,7 @@ dependencies = [ "ahash", "arrow", "arrow-array", + "arrow-buffer", "arrow-data", "arrow-schema", "arrow-string", @@ -615,7 +638,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "chrono-tz", + "chrono-tz 0.8.6", "crc32fast", "criterion", "datafusion", @@ -842,8 +865,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "ahash", "arrow", @@ -875,6 +898,7 @@ dependencies = [ "num_cpus", "object_store", "parking_lot", + "paste", "pin-project-lite", "rand", "sqlparser", @@ -886,8 +910,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "ahash", "arrow", @@ -906,16 +930,16 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "tokio", ] [[package]] name = "datafusion-execution" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "arrow", "chrono", @@ -934,12 +958,13 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "ahash", "arrow", "arrow-array", + "arrow-buffer", "chrono", "datafusion-common", "paste", @@ -951,8 +976,8 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "arrow", "base64", @@ -977,9 +1002,10 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ + "ahash", "arrow", "arrow-schema", "datafusion-common", @@ -993,15 +1019,14 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "arrow", "async-trait", "chrono", "datafusion-common", "datafusion-expr", - "datafusion-functions-aggregate", "datafusion-physical-expr", "hashbrown", "indexmap", @@ -1012,8 +1037,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "ahash", "arrow", @@ -1042,8 +1067,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "arrow", "datafusion-common", @@ -1053,8 +1078,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "ahash", "arrow", @@ -1086,8 +1111,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "38.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ad2b1dc#ad2b1dcac8168906e4444527320d3139a1a2ea5b" +version = "39.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?tag=39.0.0-rc1#6a4a280e3cf70fe5f1a1cfe7c2de13e4c39f89bb" dependencies = [ "arrow", "arrow-array", @@ -1191,9 +1216,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "23.5.26" +version = "24.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" dependencies = [ "bitflags 1.3.2", "rustc_version", @@ -1957,9 +1982,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.9.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" +checksum = "fbebfd32c213ba1907fa7a9c9138015a8de2b43e30c5aa45b18f7deb46786ad6" dependencies = [ "async-trait", "bytes", @@ -2031,9 +2056,9 @@ dependencies = [ [[package]] name = "parquet" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "096795d4f47f65fd3ee1ec5a98b77ab26d602f2cc785b0e4be5443add17ecc32" +checksum = "29c3b5322cc1bbf67f11c079c42be41a55949099b78732f7dba9e15edde40eab" dependencies = [ "ahash", "bytes", @@ -2588,9 +2613,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "sqlparser" -version = "0.45.0" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7bbffee862a796d67959a89859d6b1046bb5016d63e23835ad0da182777bbe0" +checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25" dependencies = [ "log", "sqlparser_derive", diff --git a/core/Cargo.toml b/core/Cargo.toml index 42e24dab25..564c450cb2 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -29,12 +29,13 @@ include = [ [dependencies] parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate -arrow = { version = "51.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } -arrow-array = { version = "51.0.0" } -arrow-data = { version = "51.0.0" } -arrow-schema = { version = "51.0.0" } -arrow-string = { version = "51.0.0" } -parquet = { version = "51.0.0", default-features = false, features = ["experimental"] } +arrow = { version = "52.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow-array = { version = "52.0.0" } +arrow-buffer = { version = "52.0.0" } +arrow-data = { version = "52.0.0" } +arrow-schema = { version = "52.0.0" } +arrow-string = { version = "52.0.0" } +parquet = { version = "52.0.0", default-features = false, features = ["experimental"] } half = { version = "2.4.1", default-features = false } futures = "0.3.28" mimalloc = { version = "*", default-features = false, optional = true } @@ -66,12 +67,12 @@ itertools = "0.11.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } paste = "1.0.14" -datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc" } -datafusion = { default-features = false, git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", features = ["crypto_expressions"] } -datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", default-features = false } -datafusion-physical-expr-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", default-features = false } -datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", default-features = false } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", tag = "39.0.0-rc1" } +datafusion = { default-features = false, git = "https://github.com/apache/arrow-datafusion.git", tag = "39.0.0-rc1", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { git = "https://github.com/apache/arrow-datafusion.git", tag = "39.0.0-rc1", features = ["crypto_expressions"] } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", tag = "39.0.0-rc1", default-features = false } +datafusion-physical-expr-common = { git = "https://github.com/apache/arrow-datafusion.git", tag = "39.0.0-rc1", default-features = false } +datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", tag = "39.0.0-rc1", default-features = false } unicode-segmentation = "^1.10.1" once_cell = "1.18.0" regex = "1.9.6" diff --git a/core/src/execution/datafusion/expressions/negative.rs b/core/src/execution/datafusion/expressions/negative.rs index b369eb7172..a85cde89eb 100644 --- a/core/src/execution/datafusion/expressions/negative.rs +++ b/core/src/execution/datafusion/expressions/negative.rs @@ -18,6 +18,7 @@ use crate::errors::CometError; use arrow::{compute::kernels::numeric::neg_wrapping, datatypes::IntervalDayTimeType}; use arrow_array::RecordBatch; +use arrow_buffer::IntervalDayTime; use arrow_schema::{DataType, Schema}; use datafusion::{ logical_expr::{interval_arithmetic::Interval, ColumnarValue}, @@ -62,7 +63,7 @@ macro_rules! check_overflow { for i in 0..typed_array.len() { if typed_array.value(i) == $min_val { if $type_name == "byte" || $type_name == "short" { - let value = typed_array.value(i).to_string() + " caused"; + let value = format!("{:?} caused", typed_array.value(i)); return Err(arithmetic_overflow_error(value.as_str()).into()); } return Err(arithmetic_overflow_error($type_name).into()); @@ -134,7 +135,7 @@ impl PhysicalExpr for NegativeExpr { arrow::datatypes::IntervalUnit::DayTime => check_overflow!( array, arrow::array::IntervalDayTimeArray, - i64::MIN, + IntervalDayTime::MIN, "interval" ), arrow::datatypes::IntervalUnit::MonthDayNano => { diff --git a/core/src/execution/kernels/hash.rs b/core/src/execution/kernels/hash.rs index de30f74cdf..b39fd62243 100644 --- a/core/src/execution/kernels/hash.rs +++ b/core/src/execution/kernels/hash.rs @@ -22,6 +22,7 @@ use arrow_array::{ downcast_dictionary_array, downcast_primitive_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, PrimitiveArray, }; +use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; use std::fmt::Debug; pub fn hash(src: &[ArrayRef], dst: &mut [u64]) { @@ -169,3 +170,18 @@ impl Hashable for f64 { state.hash_one(u64::from_ne_bytes(self.to_ne_bytes())) } } + +impl Hashable for IntervalDayTime { + fn create_hash(&self, state: &RandomState) -> u64 { + state.hash_one(self.days); + state.hash_one(self.milliseconds) + } +} + +impl Hashable for IntervalMonthDayNano { + fn create_hash(&self, state: &RandomState) -> u64 { + state.hash_one(self.months); + state.hash_one(self.days); + state.hash_one(self.nanoseconds) + } +}