diff --git a/Cargo.lock b/Cargo.lock index 016e40cabcc..66c3ef72259 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -187,9 +187,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" dependencies = [ "arrow-arith", "arrow-array", @@ -208,9 +208,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" dependencies = [ "arrow-array", "arrow-buffer", @@ -222,9 +222,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" dependencies = [ "ahash", "arrow-buffer", @@ -241,9 +241,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" dependencies = [ "bytes", "half", @@ -253,9 +253,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" dependencies = [ "arrow-array", "arrow-buffer", @@ -275,9 +275,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" dependencies = [ "arrow-array", "arrow-cast", @@ -290,9 +290,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -303,9 +303,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -319,9 +319,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" dependencies = [ "arrow-array", "arrow-buffer", @@ -343,9 +343,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" dependencies = [ "arrow-array", "arrow-buffer", @@ -356,9 +356,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" dependencies = [ "arrow-array", "arrow-buffer", @@ -369,9 +369,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" dependencies = [ "bitflags 2.11.1", "serde_core", @@ -380,9 +380,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" dependencies = [ "ahash", "arrow-array", @@ -394,9 +394,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" dependencies = [ "arrow-array", "arrow-buffer", @@ -2001,9 +2001,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7541353e77dc7262b71ca27be07d8393661737e3a73b5d1b1c6f7d814c64fa2a" +checksum = "de9f8117889ba9503440f1dd79ebab32ba52ccf1720bb83cd718a29d4edc0d16" dependencies = [ "arrow", "arrow-schema", @@ -2038,7 +2038,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "parquet", "rand 0.9.4", @@ -2052,9 +2052,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9997731f90fa5398ef831ad0e69600f92c861b79c0d38bd1a29b6f0e3a0ce4c8" +checksum = "be893b73a13671f310ffcc8da2c546b81efcc54c22e0382c0a28aa3537017137" dependencies = [ "arrow", "async-trait", @@ -2070,16 +2070,16 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "tokio", ] [[package]] name = "datafusion-catalog-listing" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b30a3dd50dec860c9559275c8d97d9de602e611237a6ecfbda0b3b63b872352" +checksum = "830487b51ed83807d6b32d6325f349c3144ae0c9bf772cf2a712db180c31d5e6" dependencies = [ "arrow", "async-trait", @@ -2095,14 +2095,14 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", ] [[package]] name = "datafusion-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d551054acec0398ca604512310b77ce05c46f66e54b54d48200a686e385cca4e" +checksum = "0d7663f3af955292f8004e74bcaf8f7ea3d66cc38438749615bb84815b61a293" dependencies = [ "ahash", "arrow", @@ -2111,9 +2111,10 @@ dependencies = [ "half", "hashbrown 0.16.1", "indexmap", + "itertools 0.14.0", "libc", "log", - "object_store", + "object_store 0.13.2", "parquet", "paste", "sqlparser", @@ -2123,9 +2124,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567d40e285f5b79f8737b576605721cd6c1133b5d2b00bdbd5d9838d90d0812f" +checksum = "5f590205c7e32fe1fea48dd53ffb406e56ae0e7a062213a3ac848db8771641bd" dependencies = [ "futures", "log", @@ -2134,9 +2135,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27d2668f51b3b30befae2207472569e37807fdedd1d14da58acc6f8ca6257eae" +checksum = "fde1e030a9dc87b743c806fbd631f5ecfa2ccaa4ffb61fa19144a07fea406b79" dependencies = [ "arrow", "async-trait", @@ -2155,7 +2156,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "rand 0.9.4", "tokio", "url", @@ -2163,9 +2164,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02e1b3e3a8ec55f1f62de4252b0407c8567363d056078769a197e24fc834a0f" +checksum = "331ebae7055dc108f9b54994b93dff91f3a17445539efe5b74e89264f7b36e15" dependencies = [ "arrow", "arrow-ipc", @@ -2181,15 +2182,15 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.2", "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b559d7bf87d4f900f847baba8509634f838d9718695389e903604cdcccdb01f3" +checksum = "9e0d475088325e2986876aa27bb30d0574f72a22955a527d202f454681d55c5c" dependencies = [ "arrow", "async-trait", @@ -2203,16 +2204,16 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", "regex", "tokio", ] [[package]] name = "datafusion-datasource-json" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "250e2d7591ba8b638f063854650faa40bca4e8bd4059b2ece8836f6388d02db4" +checksum = "ea1520d81f31770f3ad6ee98b391e75e87a68a5bb90de70064ace5e0a7182fe8" dependencies = [ "arrow", "async-trait", @@ -2226,15 +2227,17 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-datasource-parquet" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b043149f2c3557ca94abc58de40f68a8d412ff53365c06126ed234f8596399d" +checksum = "95be805d0742ab129720f4c51ad9242cd872599cdb076098b03f061fcdc7f946" dependencies = [ "arrow", "async-trait", @@ -2254,7 +2257,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "parquet", "tokio", @@ -2262,25 +2265,27 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9496cb0db222dbb9a3735760ceca7fc56f35e1d5502c38d0caa77a81e9c1f6a" +checksum = "5c93ad9e37730d2c7196e68616f3f2dd3b04c892e03acd3a8eeca6e177f3c06a" [[package]] name = "datafusion-execution" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc45d23c516ed8d3637751e44e09e21b45b3f58b473c802dddd1f1ad4fe435ff" +checksum = "9437d3cd5d363f9319f8122182d4d233427de79c7eb748f23054c9aaa0fdd8df" dependencies = [ "arrow", + "arrow-buffer", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "rand 0.9.4", "tempfile", @@ -2289,9 +2294,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63dd30526d2db4fda6440806a41e4676334a94bc0596cc9cc2a0efed20ef2c44" +checksum = "67164333342b86521d6d93fa54081ee39839894fb10f7a700c099af96d7552cf" dependencies = [ "arrow", "async-trait", @@ -2311,9 +2316,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b486b5f6255d40976b88bb83813b0d035a8333e0ec39864824e78068cf42fa6" +checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9" dependencies = [ "arrow", "datafusion-common", @@ -2324,9 +2329,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07356c94118d881130dd0ffbff127540407d969c8978736e324edcd6c41cd48f" +checksum = "04fb863482d987cf938db2079e07ab0d3bb64595f28907a6c2f8671ad71cca7e" dependencies = [ "arrow", "arrow-buffer", @@ -2345,6 +2350,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.4", "regex", @@ -2355,9 +2361,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b644f9cf696df9233ce6958b9807666d78563b56f923267474dd6c07795f1f8f" +checksum = "829856f4e14275fb376c104f27cbf3c3b57a9cfe24885d98677525f5e43ce8d6" dependencies = [ "ahash", "arrow", @@ -2371,14 +2377,15 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1de2deaaabe8923ce9ea9f29c47bbb4ee14f67ea2fe1ab5398d9bbebcf86e56" +checksum = "08af79cc3d2aa874a362fb97decfcbd73d687190cb096f16a6c85a7780cce311" dependencies = [ "ahash", "arrow", @@ -2389,9 +2396,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552f8d92e4331ee91d23c02d12bb6acf32cbfd5215117e01c0fb63cd4b15af1a" +checksum = "465ae3368146d49c2eda3e2c0ef114424c87e8a6b509ab34c1026ace6497e790" dependencies = [ "arrow", "arrow-ord", @@ -2405,16 +2412,18 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "970fd0cdd3df8802b9a9975ff600998289ba9d46682a4f7285cba4820c9ada78" +checksum = "6156e6b22fcf1784112fc0173f3ae6e78c8fdb4d3ed0eace9543873b437e2af6" dependencies = [ "arrow", "async-trait", @@ -2428,9 +2437,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b4c21a7c8a986a1866c0a87ab756d0bbf7b5f41f306009fa2d9af79c52ed31" +checksum = "ca7baec14f866729012efb89011a6973f3a346dc8090c567bfcd328deff551c1" dependencies = [ "arrow", "datafusion-common", @@ -2446,9 +2455,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1210ad73b8b3211aeaf4a42bef9bd7a2b7fce3ec119a478831f18c6ff7f7b93" +checksum = "159228c3280d342658466bb556dc24de30047fe1d7e559dc5d16ccc5324166f9" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2456,9 +2465,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaa566a963013a38681ad82a727a654bc7feb19632426aea8c3412d415d200c5" +checksum = "e5427e5da5edca4d21ea1c7f50e1c9421775fe33d7d5726e5641a833566e7578" dependencies = [ "datafusion-doc", "quote", @@ -2467,9 +2476,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff9aa82b240252a88dee118372f9b9757c545ab9e53c0736bebab2e7da0ef1f2" +checksum = "89099eefcd5b223ec685c36a41d35c69239236310d71d339f2af0fa4383f3f46" dependencies = [ "arrow", "chrono", @@ -2486,9 +2495,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d48022b8af9988c1d852644f9e8b5584c490659769a550c5e8d39457a1da0a5" +checksum = "0f222df5195d605d79098ef37bdd5323bff0131c9d877a24da6ec98dfca9fe36" dependencies = [ "ahash", "arrow", @@ -2509,9 +2518,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae7a8abc0b4fe624000972a9b145b30b7f1b680bffaa950ea53f78d9b21c27c3" +checksum = "40838625d63d9c12549d81979db3dd675d159055eb9135009ba272ab0e8d0f64" dependencies = [ "arrow", "datafusion-common", @@ -2524,9 +2533,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147253ca3e6b9d59c162de64c02800973018660e13340dd1886dd038d17ac429" +checksum = "eacbcc4cfd502558184ed58fa3c72e775ec65bf077eef5fd2b3453db676f893c" dependencies = [ "ahash", "arrow", @@ -2541,9 +2550,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "689156bb2282107b6239db8d7ef44b4dab10a9b33d3491a0c74acac5e4fedd72" +checksum = "d501d0e1d0910f015677121601ac177ec59272ef5c9324d1147b394988f40941" dependencies = [ "arrow", "datafusion-common", @@ -2559,9 +2568,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68253dc0ee5330aa558b2549c9b0da5af9fc17d753ae73022939014ad616fc28" +checksum = "463c88ad6f1ecab1810f4c9f046898bee035b370137eb79b2b2db925e270631d" dependencies = [ "ahash", "arrow", @@ -2583,6 +2592,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -2590,9 +2600,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fcad240a54d0b1d3e8f668398900260a53122d522b2102ab57218590decacd6" +checksum = "2857618a0ecbd8cd0cf29826889edd3a25774ec26b2995fc3862095c95d88fc6" dependencies = [ "arrow", "datafusion-common", @@ -2607,9 +2617,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f58e83a68bb67007a8fcbf005c44cefe441270c7ee7f6dee10c0e0109b556f6d" +checksum = "ef8637e35022c5c775003b3ab1debc6b4a8f0eb41b069bdd5475dd3aa93f6eba" dependencies = [ "async-trait", "datafusion-common", @@ -2621,15 +2631,16 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be53e9eb55db0fbb8980bb6d87f2435b0524acf4c718ed54a57cabbb299b2ab3" +checksum = "12d9e9f16a1692a11c94bcc418191fa15fd2b4d72a0c1a0c607db93c0b84dd81" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions-nested", "indexmap", "log", "regex", @@ -2638,9 +2649,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dadf18107366b3470e47bdde8eeb9294b36444e451830d8e5fc8dc7b4ad9c58" +checksum = "d5e5656a7e63d51dd3e5af3dbd347ea83bbe993a77c66b854b74961570d16490" dependencies = [ "async-recursion", "async-trait", @@ -2648,7 +2659,7 @@ dependencies = [ "datafusion", "half", "itertools 0.14.0", - "object_store", + "object_store 0.13.2", "pbjson-types", "prost", "substrait", @@ -3445,9 +3456,9 @@ dependencies = [ [[package]] name = "geoarrow-array" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1cc4106ac0a0a512c398961ce95d8150475c84a84e17c4511c3643fa120a17" +checksum = "dafe7b7de3fab1a8b7099fd6a6434ca955fa65065f9c19f0f8a133693f3c2b0e" dependencies = [ "arrow-array", "arrow-buffer", @@ -3461,9 +3472,9 @@ dependencies = [ [[package]] name = "geoarrow-expr-geo" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa84300361ce57fb875bcaa6e32b95b0aff5c6b1af692b936bdd58ff343f4394" +checksum = "8e4a62ac19c86827c6ec81ea584594b3ee96db5a8119b9774d3466c6b373c434" dependencies = [ "arrow-array", "arrow-buffer", @@ -3475,9 +3486,9 @@ dependencies = [ [[package]] name = "geoarrow-schema" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e97be4e9f523f92bd6a0e0458323f4b783d073d011664decd8dbf05651704f34" +checksum = "4d4a7edb2a1d87024a93805332a9c8184a0354836271d42c0d18cf628a5e3cd0" dependencies = [ "arrow-schema", "geo-traits", @@ -3488,9 +3499,9 @@ dependencies = [ [[package]] name = "geodatafusion" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cb8faa9b3bf4ae9f49b1f023b82d20626826f6448a7055498376146c10c4ead" +checksum = "af7cd430f1a1f59bc97053d824ad410ea6fd123c8977b3c1a75335e289233b8b" dependencies = [ "arrow-arith", "arrow-array", @@ -4492,7 +4503,7 @@ dependencies = [ "lzma-sys", "mock_instant", "moka", - "object_store", + "object_store 0.12.5", "paste", "permutation", "pin-project", @@ -4544,7 +4555,7 @@ dependencies = [ [[package]] name = "lance-arrow-scalar" -version = "57.0.0" +version = "58.0.0" dependencies = [ "arrow-array", "arrow-buffer", @@ -4560,7 +4571,7 @@ dependencies = [ [[package]] name = "lance-arrow-stats" -version = "57.0.0" +version = "58.0.0" dependencies = [ "arrow-array", "arrow-schema", @@ -4603,7 +4614,7 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.12.5", "pin-project", "proptest", "prost", @@ -4741,7 +4752,7 @@ dependencies = [ "lance-datagen", "lance-index", "lance-linalg", - "object_store", + "object_store 0.12.5", "parquet", "rand 0.9.4", "tempfile", @@ -4776,7 +4787,7 @@ dependencies = [ "libc", "log", "num-traits", - "object_store", + "object_store 0.12.5", "pprof", "pretty_assertions", "proptest", @@ -4861,7 +4872,7 @@ dependencies = [ "log", "ndarray", "num-traits", - "object_store", + "object_store 0.12.5", "pprof", "prost", "prost-build", @@ -4917,7 +4928,7 @@ dependencies = [ "mock_instant", "mockall", "moka", - "object_store", + "object_store 0.12.5", "object_store_opendal", "opendal", "path_abs", @@ -5019,7 +5030,7 @@ dependencies = [ "lance-namespace", "lance-table", "log", - "object_store", + "object_store 0.12.5", "rand 0.9.4", "reqwest", "rstest", @@ -5073,7 +5084,7 @@ dependencies = [ "lance-file", "lance-io", "log", - "object_store", + "object_store 0.12.5", "pprof", "pretty_assertions", "proptest", @@ -5134,7 +5145,7 @@ dependencies = [ "lance-core", "lance-file", "lance-io", - "object_store", + "object_store 0.12.5", "snafu", "tokio", "url", @@ -5456,9 +5467,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +checksum = "db9a0d582c2874f68138a16ce1867e0ffde6c0bb0a0df85e1f36d04146db488a" dependencies = [ "twox-hash", ] @@ -5937,6 +5948,32 @@ dependencies = [ "web-time", ] +[[package]] +name = "object_store" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror 2.0.18", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + [[package]] name = "object_store_opendal" version = "0.55.0" @@ -5947,7 +5984,7 @@ dependencies = [ "bytes", "chrono", "futures", - "object_store", + "object_store 0.12.5", "opendal", "pin-project", "tokio", @@ -6138,14 +6175,13 @@ dependencies = [ [[package]] name = "parquet" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" +checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" dependencies = [ "ahash", "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-ipc", "arrow-schema", @@ -6162,7 +6198,7 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", - "object_store", + "object_store 0.13.2", "paste", "seq-macro", "simdutf8", @@ -7875,9 +7911,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "sqlparser_derive", @@ -7885,9 +7921,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", @@ -8391,6 +8427,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 6b5d2842a37..ac660892ff1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,19 +75,19 @@ lance-test-macros = { version = "=5.1.0-beta.3", path = "./rust/lance-test-macro lance-testing = { version = "=5.1.0-beta.3", path = "./rust/lance-testing" } approx = "0.5.1" # Note that this one does not include pyarrow -arrow = { version = "57.0.0", optional = false, features = ["prettyprint"] } -lance-arrow-scalar = { version = "=57.0.0", path = "./rust/arrow-scalar" } -lance-arrow-stats = { version = "=57.0.0", path = "./rust/arrow-stats" } -arrow-arith = "57.0.0" -arrow-array = "57.0.0" -arrow-buffer = "57.0.0" -arrow-cast = "57.0.0" -arrow-data = "57.0.0" -arrow-ipc = { version = "57.0.0", features = ["zstd"] } -arrow-ord = "57.0.0" -arrow-row = "57.0.0" -arrow-schema = "57.0.0" -arrow-select = "57.0.0" +arrow = { version = "58.0.0", optional = false, features = ["prettyprint"] } +lance-arrow-scalar = { version = "=58.0.0", path = "./rust/arrow-scalar" } +lance-arrow-stats = { version = "=58.0.0", path = "./rust/arrow-stats" } +arrow-arith = "58.0.0" +arrow-array = "58.0.0" +arrow-buffer = "58.0.0" +arrow-cast = "58.0.0" +arrow-data = "58.0.0" +arrow-ipc = { version = "58.0.0", features = ["zstd"] } +arrow-ord = "58.0.0" +arrow-row = "58.0.0" +arrow-schema = "58.0.0" +arrow-select = "58.0.0" async-recursion = "1.0" async-trait = "0.1" axum = "0.7" @@ -116,7 +116,7 @@ criterion = { version = "0.5", features = [ ] } crossbeam-queue = "0.3" crossbeam-skiplist = "0.1" -datafusion = { version = "52.1.0", default-features = false, features = [ +datafusion = { version = "53.0.0", default-features = false, features = [ "crypto_expressions", "datetime_expressions", "encoding_expressions", @@ -126,25 +126,25 @@ datafusion = { version = "52.1.0", default-features = false, features = [ "string_expressions", "unicode_expressions", ] } -datafusion-common = "52.1.0" -datafusion-functions = { version = "52.1.0", features = ["regex_expressions"] } -datafusion-sql = "52.1.0" -datafusion-expr = "52.1.0" -datafusion-ffi = "52.1.0" -datafusion-execution = "52.1.0" -datafusion-optimizer = "52.1.0" -datafusion-physical-expr = "52.1.0" -datafusion-physical-plan = "52.1.0" -datafusion-substrait = "52.1.0" +datafusion-common = "53.0.0" +datafusion-functions = { version = "53.0.0", features = ["regex_expressions"] } +datafusion-sql = "53.0.0" +datafusion-expr = "53.0.0" +datafusion-ffi = "53.0.0" +datafusion-execution = "53.0.0" +datafusion-optimizer = "53.0.0" +datafusion-physical-expr = "53.0.0" +datafusion-physical-plan = "53.0.0" +datafusion-substrait = "53.0.0" deepsize = "0.2.0" dirs = "6.0.0" either = "1.0" fst = { version = "0.4.7", features = ["levenshtein"] } fsst = { version = "=5.1.0-beta.3", path = "./rust/compression/fsst" } futures = "0.3" -geoarrow-array = "0.7" -geoarrow-schema = "0.7" -geodatafusion = "0.3.0" +geoarrow-array = "0.8" +geoarrow-schema = "0.8" +geodatafusion = "0.4.0" geo-traits = "0.3.0" geo-types = "0.7.16" http = "1.1.0" diff --git a/java/lance-jni/Cargo.toml b/java/lance-jni/Cargo.toml index 54213a2ca8c..82b74b43fa9 100644 --- a/java/lance-jni/Cargo.toml +++ b/java/lance-jni/Cargo.toml @@ -27,10 +27,10 @@ lance-namespace-impls = { path = "../../rust/lance-namespace-impls", features = lance-core = { path = "../../rust/lance-core" } lance-file = { path = "../../rust/lance-file" } lance-table = { path = "../../rust/lance-table" } -arrow = { version = "57.1", features = ["ffi"] } -arrow-array = "57.1" -arrow-schema = "57.1" -datafusion-common = "52.1.0" +arrow = { version = "58.0.0", features = ["ffi"] } +arrow-array = "58.0.0" +arrow-schema = "58.0.0" +datafusion-common = "53.0.0" object_store = { version = "0.12.2" } tokio = { version = "1.23", features = [ "rt-multi-thread", diff --git a/python/Cargo.lock b/python/Cargo.lock index f539539bfd2..ee88f82e4ad 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -208,9 +208,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" dependencies = [ "arrow-arith", "arrow-array", @@ -230,9 +230,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" dependencies = [ "arrow-array", "arrow-buffer", @@ -244,9 +244,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" dependencies = [ "ahash", "arrow-buffer", @@ -263,9 +263,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" dependencies = [ "bytes", "half", @@ -275,9 +275,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" dependencies = [ "arrow-array", "arrow-buffer", @@ -297,9 +297,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" dependencies = [ "arrow-array", "arrow-cast", @@ -312,9 +312,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -325,9 +325,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -341,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" dependencies = [ "arrow-array", "arrow-buffer", @@ -365,9 +365,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" dependencies = [ "arrow-array", "arrow-buffer", @@ -378,9 +378,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d18c442b4c266aaf3d7f7dd40fd7ae058cef7f113b00ff0cd8256e1e218ec544" +checksum = "e63351dc11981a316c828a6032a5021345bba882f68bc4a36c36825a50725089" dependencies = [ "arrow-array", "arrow-data", @@ -390,9 +390,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" dependencies = [ "arrow-array", "arrow-buffer", @@ -403,9 +403,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" dependencies = [ "bitflags 2.11.1", "serde_core", @@ -414,9 +414,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" dependencies = [ "ahash", "arrow-array", @@ -428,9 +428,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" dependencies = [ "arrow-array", "arrow-buffer", @@ -1573,9 +1573,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7541353e77dc7262b71ca27be07d8393661737e3a73b5d1b1c6f7d814c64fa2a" +checksum = "de9f8117889ba9503440f1dd79ebab32ba52ccf1720bb83cd718a29d4edc0d16" dependencies = [ "arrow", "arrow-schema", @@ -1613,7 +1613,7 @@ dependencies = [ "itertools 0.14.0", "liblzma", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "parquet", "rand 0.9.4", @@ -1628,9 +1628,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9997731f90fa5398ef831ad0e69600f92c861b79c0d38bd1a29b6f0e3a0ce4c8" +checksum = "be893b73a13671f310ffcc8da2c546b81efcc54c22e0382c0a28aa3537017137" dependencies = [ "arrow", "async-trait", @@ -1646,16 +1646,16 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "tokio", ] [[package]] name = "datafusion-catalog-listing" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b30a3dd50dec860c9559275c8d97d9de602e611237a6ecfbda0b3b63b872352" +checksum = "830487b51ed83807d6b32d6325f349c3144ae0c9bf772cf2a712db180c31d5e6" dependencies = [ "arrow", "async-trait", @@ -1671,14 +1671,14 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", ] [[package]] name = "datafusion-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d551054acec0398ca604512310b77ce05c46f66e54b54d48200a686e385cca4e" +checksum = "0d7663f3af955292f8004e74bcaf8f7ea3d66cc38438749615bb84815b61a293" dependencies = [ "ahash", "arrow", @@ -1687,9 +1687,10 @@ dependencies = [ "half", "hashbrown 0.16.1", "indexmap", + "itertools 0.14.0", "libc", "log", - "object_store", + "object_store 0.13.2", "parquet", "paste", "recursive", @@ -1700,9 +1701,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567d40e285f5b79f8737b576605721cd6c1133b5d2b00bdbd5d9838d90d0812f" +checksum = "5f590205c7e32fe1fea48dd53ffb406e56ae0e7a062213a3ac848db8771641bd" dependencies = [ "futures", "log", @@ -1711,9 +1712,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27d2668f51b3b30befae2207472569e37807fdedd1d14da58acc6f8ca6257eae" +checksum = "fde1e030a9dc87b743c806fbd631f5ecfa2ccaa4ffb61fa19144a07fea406b79" dependencies = [ "arrow", "async-compression", @@ -1736,7 +1737,7 @@ dependencies = [ "itertools 0.14.0", "liblzma", "log", - "object_store", + "object_store 0.13.2", "rand 0.9.4", "tokio", "tokio-util", @@ -1746,9 +1747,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02e1b3e3a8ec55f1f62de4252b0407c8567363d056078769a197e24fc834a0f" +checksum = "331ebae7055dc108f9b54994b93dff91f3a17445539efe5b74e89264f7b36e15" dependencies = [ "arrow", "arrow-ipc", @@ -1764,15 +1765,15 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.2", "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b559d7bf87d4f900f847baba8509634f838d9718695389e903604cdcccdb01f3" +checksum = "9e0d475088325e2986876aa27bb30d0574f72a22955a527d202f454681d55c5c" dependencies = [ "arrow", "async-trait", @@ -1786,16 +1787,16 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", "regex", "tokio", ] [[package]] name = "datafusion-datasource-json" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "250e2d7591ba8b638f063854650faa40bca4e8bd4059b2ece8836f6388d02db4" +checksum = "ea1520d81f31770f3ad6ee98b391e75e87a68a5bb90de70064ace5e0a7182fe8" dependencies = [ "arrow", "async-trait", @@ -1809,15 +1810,17 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-datasource-parquet" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b043149f2c3557ca94abc58de40f68a8d412ff53365c06126ed234f8596399d" +checksum = "95be805d0742ab129720f4c51ad9242cd872599cdb076098b03f061fcdc7f946" dependencies = [ "arrow", "async-trait", @@ -1837,7 +1840,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "parquet", "tokio", @@ -1845,25 +1848,27 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9496cb0db222dbb9a3735760ceca7fc56f35e1d5502c38d0caa77a81e9c1f6a" +checksum = "5c93ad9e37730d2c7196e68616f3f2dd3b04c892e03acd3a8eeca6e177f3c06a" [[package]] name = "datafusion-execution" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc45d23c516ed8d3637751e44e09e21b45b3f58b473c802dddd1f1ad4fe435ff" +checksum = "9437d3cd5d363f9319f8122182d4d233427de79c7eb748f23054c9aaa0fdd8df" dependencies = [ "arrow", + "arrow-buffer", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "rand 0.9.4", "tempfile", @@ -1872,9 +1877,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63dd30526d2db4fda6440806a41e4676334a94bc0596cc9cc2a0efed20ef2c44" +checksum = "67164333342b86521d6d93fa54081ee39839894fb10f7a700c099af96d7552cf" dependencies = [ "arrow", "async-trait", @@ -1895,9 +1900,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b486b5f6255d40976b88bb83813b0d035a8333e0ec39864824e78068cf42fa6" +checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9" dependencies = [ "arrow", "datafusion-common", @@ -1908,9 +1913,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b04181cffefd632e57acfc233ed239626863682dd8bb30ab366293f441bba8" +checksum = "4b8250f7cdf463a0ad145f41d7508bcfa54c9b9f027317e599f0331097e3cc38" dependencies = [ "abi_stable", "arrow", @@ -1938,9 +1943,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07356c94118d881130dd0ffbff127540407d969c8978736e324edcd6c41cd48f" +checksum = "04fb863482d987cf938db2079e07ab0d3bb64595f28907a6c2f8671ad71cca7e" dependencies = [ "arrow", "arrow-buffer", @@ -1959,6 +1964,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.4", "regex", @@ -1969,9 +1975,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b644f9cf696df9233ce6958b9807666d78563b56f923267474dd6c07795f1f8f" +checksum = "829856f4e14275fb376c104f27cbf3c3b57a9cfe24885d98677525f5e43ce8d6" dependencies = [ "ahash", "arrow", @@ -1985,14 +1991,15 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1de2deaaabe8923ce9ea9f29c47bbb4ee14f67ea2fe1ab5398d9bbebcf86e56" +checksum = "08af79cc3d2aa874a362fb97decfcbd73d687190cb096f16a6c85a7780cce311" dependencies = [ "ahash", "arrow", @@ -2003,9 +2010,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552f8d92e4331ee91d23c02d12bb6acf32cbfd5215117e01c0fb63cd4b15af1a" +checksum = "465ae3368146d49c2eda3e2c0ef114424c87e8a6b509ab34c1026ace6497e790" dependencies = [ "arrow", "arrow-ord", @@ -2019,16 +2026,18 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "970fd0cdd3df8802b9a9975ff600998289ba9d46682a4f7285cba4820c9ada78" +checksum = "6156e6b22fcf1784112fc0173f3ae6e78c8fdb4d3ed0eace9543873b437e2af6" dependencies = [ "arrow", "async-trait", @@ -2042,9 +2051,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b4c21a7c8a986a1866c0a87ab756d0bbf7b5f41f306009fa2d9af79c52ed31" +checksum = "ca7baec14f866729012efb89011a6973f3a346dc8090c567bfcd328deff551c1" dependencies = [ "arrow", "datafusion-common", @@ -2060,9 +2069,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1210ad73b8b3211aeaf4a42bef9bd7a2b7fce3ec119a478831f18c6ff7f7b93" +checksum = "159228c3280d342658466bb556dc24de30047fe1d7e559dc5d16ccc5324166f9" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2070,9 +2079,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaa566a963013a38681ad82a727a654bc7feb19632426aea8c3412d415d200c5" +checksum = "e5427e5da5edca4d21ea1c7f50e1c9421775fe33d7d5726e5641a833566e7578" dependencies = [ "datafusion-doc", "quote", @@ -2081,9 +2090,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff9aa82b240252a88dee118372f9b9757c545ab9e53c0736bebab2e7da0ef1f2" +checksum = "89099eefcd5b223ec685c36a41d35c69239236310d71d339f2af0fa4383f3f46" dependencies = [ "arrow", "chrono", @@ -2101,9 +2110,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d48022b8af9988c1d852644f9e8b5584c490659769a550c5e8d39457a1da0a5" +checksum = "0f222df5195d605d79098ef37bdd5323bff0131c9d877a24da6ec98dfca9fe36" dependencies = [ "ahash", "arrow", @@ -2125,9 +2134,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae7a8abc0b4fe624000972a9b145b30b7f1b680bffaa950ea53f78d9b21c27c3" +checksum = "40838625d63d9c12549d81979db3dd675d159055eb9135009ba272ab0e8d0f64" dependencies = [ "arrow", "datafusion-common", @@ -2140,9 +2149,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147253ca3e6b9d59c162de64c02800973018660e13340dd1886dd038d17ac429" +checksum = "eacbcc4cfd502558184ed58fa3c72e775ec65bf077eef5fd2b3453db676f893c" dependencies = [ "ahash", "arrow", @@ -2157,9 +2166,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "689156bb2282107b6239db8d7ef44b4dab10a9b33d3491a0c74acac5e4fedd72" +checksum = "d501d0e1d0910f015677121601ac177ec59272ef5c9324d1147b394988f40941" dependencies = [ "arrow", "datafusion-common", @@ -2176,9 +2185,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68253dc0ee5330aa558b2549c9b0da5af9fc17d753ae73022939014ad616fc28" +checksum = "463c88ad6f1ecab1810f4c9f046898bee035b370137eb79b2b2db925e270631d" dependencies = [ "ahash", "arrow", @@ -2200,6 +2209,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -2207,9 +2217,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f5ab57d0b5a368258fff1d828f1619a10541fa5c4ec4930a383deb3a23204c8" +checksum = "677ee4448a010ed5faeff8d73ff78972c2ace59eff3cd7bd15833a1dafa00492" dependencies = [ "arrow", "chrono", @@ -2228,15 +2238,16 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-proto-common", - "object_store", + "object_store 0.13.2", "prost", + "rand 0.9.4", ] [[package]] name = "datafusion-proto-common" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd21d2c804802ca4b1719191dfe8e3d0860686649de6375ddc9237f85beb82b3" +checksum = "965eca01edc8259edbbd95883a00b6d81e329fd44a019cfac3a03b026a83eade" dependencies = [ "arrow", "datafusion-common", @@ -2245,9 +2256,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fcad240a54d0b1d3e8f668398900260a53122d522b2102ab57218590decacd6" +checksum = "2857618a0ecbd8cd0cf29826889edd3a25774ec26b2995fc3862095c95d88fc6" dependencies = [ "arrow", "datafusion-common", @@ -2262,9 +2273,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f58e83a68bb67007a8fcbf005c44cefe441270c7ee7f6dee10c0e0109b556f6d" +checksum = "ef8637e35022c5c775003b3ab1debc6b4a8f0eb41b069bdd5475dd3aa93f6eba" dependencies = [ "async-trait", "datafusion-common", @@ -2276,15 +2287,16 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be53e9eb55db0fbb8980bb6d87f2435b0524acf4c718ed54a57cabbb299b2ab3" +checksum = "12d9e9f16a1692a11c94bcc418191fa15fd2b4d72a0c1a0c607db93c0b84dd81" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions-nested", "indexmap", "log", "recursive", @@ -2294,9 +2306,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.5.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dadf18107366b3470e47bdde8eeb9294b36444e451830d8e5fc8dc7b4ad9c58" +checksum = "d5e5656a7e63d51dd3e5af3dbd347ea83bbe993a77c66b854b74961570d16490" dependencies = [ "async-recursion", "async-trait", @@ -2304,7 +2316,7 @@ dependencies = [ "datafusion", "half", "itertools 0.14.0", - "object_store", + "object_store 0.13.2", "pbjson-types", "prost", "substrait", @@ -2894,9 +2906,9 @@ dependencies = [ [[package]] name = "geoarrow-array" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1cc4106ac0a0a512c398961ce95d8150475c84a84e17c4511c3643fa120a17" +checksum = "dafe7b7de3fab1a8b7099fd6a6434ca955fa65065f9c19f0f8a133693f3c2b0e" dependencies = [ "arrow-array", "arrow-buffer", @@ -2910,9 +2922,9 @@ dependencies = [ [[package]] name = "geoarrow-expr-geo" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa84300361ce57fb875bcaa6e32b95b0aff5c6b1af692b936bdd58ff343f4394" +checksum = "8e4a62ac19c86827c6ec81ea584594b3ee96db5a8119b9774d3466c6b373c434" dependencies = [ "arrow-array", "arrow-buffer", @@ -2924,9 +2936,9 @@ dependencies = [ [[package]] name = "geoarrow-schema" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e97be4e9f523f92bd6a0e0458323f4b783d073d011664decd8dbf05651704f34" +checksum = "4d4a7edb2a1d87024a93805332a9c8184a0354836271d42c0d18cf628a5e3cd0" dependencies = [ "arrow-schema", "geo-traits", @@ -2937,9 +2949,9 @@ dependencies = [ [[package]] name = "geodatafusion" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cb8faa9b3bf4ae9f49b1f023b82d20626826f6448a7055498376146c10c4ead" +checksum = "af7cd430f1a1f59bc97053d824ad410ea6fd123c8977b3c1a75335e289233b8b" dependencies = [ "arrow-arith", "arrow-array", @@ -3488,15 +3500,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "indoc" -version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" -dependencies = [ - "rustversion", -] - [[package]] name = "inout" version = "0.1.4" @@ -3757,7 +3760,7 @@ dependencies = [ "lance-tokenizer", "log", "moka", - "object_store", + "object_store 0.12.5", "permutation", "pin-project", "prost", @@ -3828,7 +3831,7 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.12.5", "pin-project", "prost", "rand 0.9.4", @@ -3955,7 +3958,7 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -4027,7 +4030,7 @@ dependencies = [ "log", "ndarray", "num-traits", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -4076,7 +4079,7 @@ dependencies = [ "libc", "log", "moka", - "object_store", + "object_store 0.12.5", "object_store_opendal", "opendal", "path_abs", @@ -4140,7 +4143,7 @@ dependencies = [ "lance-namespace", "lance-table", "log", - "object_store", + "object_store 0.12.5", "rand 0.9.4", "reqwest", "serde", @@ -4187,7 +4190,7 @@ dependencies = [ "lance-file", "lance-io", "log", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -4538,9 +4541,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +checksum = "db9a0d582c2874f68138a16ce1867e0ffde6c0bb0a0df85e1f36d04146db488a" dependencies = [ "twox-hash", ] @@ -4604,15 +4607,6 @@ dependencies = [ "libc", ] -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "mime" version = "0.3.17" @@ -4865,6 +4859,32 @@ dependencies = [ "web-time", ] +[[package]] +name = "object_store" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror 2.0.18", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + [[package]] name = "object_store_opendal" version = "0.55.0" @@ -4875,7 +4895,7 @@ dependencies = [ "bytes", "chrono", "futures", - "object_store", + "object_store 0.12.5", "opendal", "pin-project", "tokio", @@ -5000,14 +5020,13 @@ dependencies = [ [[package]] name = "parquet" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" +checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" dependencies = [ "ahash", "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-ipc", "arrow-schema", @@ -5024,7 +5043,7 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", - "object_store", + "object_store 0.13.2", "paste", "seq-macro", "simdutf8", @@ -5443,7 +5462,7 @@ dependencies = [ "lance-table", "libc", "log", - "object_store", + "object_store 0.12.5", "prost", "prost-types", "pyo3", @@ -5464,36 +5483,33 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ "chrono", - "indoc", "libc", - "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" dependencies = [ "libc", "pyo3-build-config", @@ -5501,9 +5517,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -5513,9 +5529,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" dependencies = [ "heck", "proc-macro2", @@ -5526,9 +5542,9 @@ dependencies = [ [[package]] name = "pythonize" -version = "0.26.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11e06e4cff9be2bbf2bddf28a486ae619172ea57e79787f856572878c62dcfe2" +checksum = "0b79f670c9626c8b651c0581011b57b6ba6970bb69faf01a7c4c0cfc81c43f95" dependencies = [ "pyo3", "serde", @@ -6512,9 +6528,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "recursive", @@ -6523,9 +6539,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", @@ -6908,6 +6924,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] @@ -7234,12 +7251,6 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" -[[package]] -name = "unindent" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" - [[package]] name = "unsafe-libyaml" version = "0.2.11" diff --git a/python/Cargo.toml b/python/Cargo.toml index 95a8474d9b2..5adae10ebe5 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -13,15 +13,15 @@ name = "lance" crate-type = ["cdylib"] [dependencies] -arrow = { version = "57.0.0", features = ["pyarrow"] } -arrow-array = "57.0.0" -arrow-cast = "57.0.0" -arrow-data = "57.0.0" -arrow-schema = "57.0.0" +arrow = { version = "58.0.0", features = ["pyarrow"] } +arrow-array = "58.0.0" +arrow-cast = "58.0.0" +arrow-data = "58.0.0" +arrow-schema = "58.0.0" object_store = "0.12.4" -datafusion = "52.1.0" -datafusion-ffi = "52.1.0" -datafusion-common = "52.1.0" +datafusion = "53.0.0" +datafusion-ffi = "53.0.0" +datafusion-common = "53.0.0" async-trait = "0.1" chrono = "0.4.42" env_logger = "0.11.7" @@ -53,13 +53,13 @@ libc = "0.2.176" log = "0.4" prost = "0.14.1" prost-types = "0.14.1" -pyo3 = { version = "0.26", features = [ +pyo3 = { version = "0.28", features = [ "extension-module", "abi3-py39", "py-clone", "chrono", ] } -pythonize = "0.26" +pythonize = "0.28" tokio = { version = "1.48", features = ["rt-multi-thread"] } uuid = "1.3.0" regex = "1" diff --git a/python/pyproject.toml b/python/pyproject.toml index 613ff5b8d5b..fb34ff10d05 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -61,7 +61,7 @@ tests = [ # Only test tensorflow on linux for now. We will deprecate tensorflow soon. "tensorflow; sys_platform == 'linux'", "tqdm", - "datafusion>=52,<53; python_version >= '3.10'", + "datafusion>=53,<54; python_version >= '3.10'", ] dev = ["ruff==0.11.2", "pyright"] benchmarks = ["pytest-benchmark"] diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 3c8668abb84..118b8c7e092 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -435,8 +435,7 @@ def explain_plan( CoalescePartitionsExec ProjectionExec: expr=[...] HashJoinExec: mode=CollectLeft, join_type=Right, ... - CooperativeExec - LanceRead: uri=test_dataset/data, projection=[id], ... + LanceRead: uri=test_dataset/data, projection=[id], ... RepartitionExec: ... ProjectionExec: expr=[..., true as __merge_source_sentinel] StreamingTableExec: partition_sizes=1, ... @@ -517,8 +516,7 @@ def analyze_plan( CoalescePartitionsExec, elapsed=..., metrics=[output_rows=..., elapsed_compute=...] ProjectionExec: elapsed=..., expr=[...], metrics=[...] HashJoinExec: elapsed=..., mode=CollectLeft, join_type=Right, ... - CooperativeExec, elapsed=..., metrics=[] - LanceRead: elapsed=..., ..., metrics=[..., bytes_read=..., ...] + LanceRead: elapsed=..., ..., metrics=[..., bytes_read=..., ...] RepartitionExec: ... ProjectionExec: elapsed=..., expr=[..., true as __merge_source_sentinel], metrics=[...] StreamingTableExec: ..., metrics=[] diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 7e139446819..fc20cc02cea 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -155,7 +155,7 @@ impl MergeInsertBuilder { // schema's unenforced primary key (if configured). let on = if let Some(on_any) = on { on_any - .downcast::() + .cast::() .map(|val| vec![val.to_string()]) .or_else(|_| { let iterator = on_any.try_iter().map_err(|_| { @@ -165,7 +165,7 @@ impl MergeInsertBuilder { })?; let mut keys = Vec::new(); for key in iterator { - keys.push(key?.downcast::()?.to_string()); + keys.push(key?.cast::()?.to_string()); } PyResult::Ok(keys) })? @@ -344,7 +344,7 @@ pub fn transforms_from_python( py: Python<'_>, transforms: &Bound<'_, PyAny>, ) -> PyResult { - if let Ok(transforms) = transforms.downcast::() { + if let Ok(transforms) = transforms.cast::() { let expressions = transforms .iter() .map(|(k, v)| { @@ -373,7 +373,7 @@ pub fn transforms_from_python( })?; let result_batch: PyArrowType = result .extract(py) - .map_err(|err| lance::Error::invalid_input(err.to_string()))?; + .map_err(|err: PyErr| lance::Error::invalid_input(err.to_string()))?; Ok(result_batch.0) }) }; @@ -386,8 +386,9 @@ pub fn transforms_from_python( })) } } -impl FromPyObject<'_> for PyLance { - fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult { +impl FromPyObject<'_, '_> for PyLance { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { let ascending: bool = ob.getattr("ascending")?.extract()?; let nulls_first: bool = ob.getattr("nulls_first")?.extract()?; let column_name: String = ob.getattr("column_name")?.extract()?; @@ -419,7 +420,7 @@ impl<'py> IntoPyObject<'py> for PyLance<&ColumnOrdering> { } /// Python binding for BasePath -#[pyclass(name = "DatasetBasePath", module = "_lib")] +#[pyclass(name = "DatasetBasePath", module = "_lib", from_py_object)] #[derive(Clone)] pub struct DatasetBasePath { #[pyo3(get)] @@ -476,7 +477,7 @@ impl From for BasePath { } /// Lance Dataset that will be wrapped by another class in Python -#[pyclass(name = "_Dataset", module = "_lib")] +#[pyclass(name = "_Dataset", module = "_lib", from_py_object)] #[derive(Clone)] pub struct Dataset { #[pyo3(get)] @@ -564,10 +565,10 @@ impl Dataset { let mut builder = DatasetBuilder::from_uri(&uri).with_read_params(params); if let Some(ver) = version { - if let Ok(i) = ver.downcast::() { + if let Ok(i) = ver.cast::() { let v: u64 = i.extract()?; builder = builder.with_version(v); - } else if let Ok(v) = ver.downcast::() { + } else if let Ok(v) = ver.cast::() { let t: &str = &v.to_string_lossy(); builder = builder.with_tag(t); } else { @@ -883,7 +884,7 @@ impl Dataset { .map_err(|err| PyValueError::new_err(err.to_string()))?; } if let Some(full_text_query) = full_text_query { - let fts_query = if let Ok(full_text_query) = full_text_query.downcast::() { + let fts_query = if let Ok(full_text_query) = full_text_query.cast::() { let mut query = full_text_query .get_item("query")? .ok_or_else(|| PyKeyError::new_err("query must be specified"))? @@ -894,7 +895,7 @@ impl Dataset { } else { Some( columns - .downcast::()? + .cast::()? .iter() .map(|c| c.extract::()) .collect::>>()?, @@ -931,7 +932,7 @@ impl Dataset { })?; } query - } else if let Ok(query) = full_text_query.downcast::() { + } else if let Ok(query) = full_text_query.cast::() { let query = query.borrow(); FullTextSearchQuery::new_query(query.inner.clone()) } else { @@ -1080,7 +1081,7 @@ impl Dataset { None } else { let tuple = dr - .downcast::() + .cast::() .map_err(|err| PyValueError::new_err(err.to_string()))?; if tuple.len() != 2 { return Err(PyValueError::new_err( @@ -1292,7 +1293,7 @@ impl Dataset { let alterations = alterations .iter() .map(|obj| { - let obj = obj.downcast::()?; + let obj = obj.cast::()?; let path: String = obj .get_item("path")? .ok_or_else(|| PyValueError::new_err("path is required"))? @@ -1416,8 +1417,8 @@ impl Dataset { } for (key, value) in updates { - let column: PyBackedStr = key.downcast::()?.clone().try_into()?; - let expr: PyBackedStr = value.downcast::()?.clone().try_into()?; + let column: PyBackedStr = key.cast::()?.clone().try_into()?; + let expr: PyBackedStr = value.cast::()?.clone().try_into()?; builder = builder .set(column, &expr) @@ -1935,7 +1936,7 @@ impl Dataset { } if let Some(language) = kwargs.get_item("language")? { let language: PyBackedStr = - language.downcast::()?.clone().try_into()?; + language.cast::()?.clone().try_into()?; params = params.language(&language).map_err(|e| { PyValueError::new_err(format!( "can't set tokenizer language to {}: {:?}", @@ -2790,7 +2791,7 @@ impl Dataset { } } -#[pyclass(name = "SqlQuery", module = "_lib", subclass)] +#[pyclass(name = "SqlQuery", module = "_lib", subclass, skip_from_py_object)] #[derive(Clone)] pub struct SqlQuery { builder: lance::dataset::sql::SqlQueryBuilder, @@ -2850,7 +2851,12 @@ impl SqlQuery { } } -#[pyclass(name = "SqlQueryBuilder", module = "_lib", subclass)] +#[pyclass( + name = "SqlQueryBuilder", + module = "_lib", + subclass, + skip_from_py_object +)] #[derive(Clone)] pub struct SqlQueryBuilder { builder: lance::dataset::sql::SqlQueryBuilder, @@ -2929,7 +2935,12 @@ impl DatasetDelta { } } -#[pyclass(name = "DatasetDeltaBuilder", module = "_lib", subclass)] +#[pyclass( + name = "DatasetDeltaBuilder", + module = "_lib", + subclass, + skip_from_py_object +)] #[derive(Clone)] pub struct DatasetDeltaBuilder { builder: lance::dataset::delta::DatasetDeltaBuilder, @@ -3166,13 +3177,13 @@ struct IndexProgressHandler { impl Dataset { fn transform_ref(&self, reference: Option>) -> PyResult { if let Some(reference) = reference { - if let Ok(i) = reference.downcast::() { + if let Ok(i) = reference.cast::() { let version_number: u64 = i.extract()?; Ok(version_number.into()) - } else if let Ok(tag_name) = reference.downcast::() { + } else if let Ok(tag_name) = reference.cast::() { let tag: &str = &tag_name.to_string_lossy(); Ok(tag.into()) - } else if let Ok(tuple) = reference.downcast::() { + } else if let Ok(tuple) = reference.cast::() { if tuple.len() == 2 { let (branch_name, version_number) = tuple.extract::<(Option, Option)>()?; @@ -3372,8 +3383,8 @@ pub fn get_commit_handler(options: &Bound<'_, PyDict>) -> PyResult>( - dict: &'a Bound<'py, PyDict>, +fn get_dict_opt<'py, D: FromPyObjectOwned<'py>>( + dict: &Bound<'py, PyDict>, key: &str, ) -> PyResult> { let value = dict.get_item(key)?; @@ -3382,7 +3393,7 @@ fn get_dict_opt<'a, 'py, D: FromPyObject<'a>>( if v.is_none() { None } else { - Some(v.extract::()) + Some(v.extract::().map_err(Into::into)) } }) .transpose() @@ -3646,11 +3657,7 @@ fn prepare_vector_index_params( e )) })?; - let list = l - .downcast::()? - .iter() - .map(|f| f.to_string()) - .collect(); + let list = l.cast::()?.iter().map(|f| f.to_string()).collect(); ivf_params.precomputed_shuffle_buffers = Some((path, list)); } (None, None) => {} @@ -3904,7 +3911,7 @@ impl UDFCheckpointStore for PyBatchUDFCheckpointWrapper { } } -#[pyclass(name = "PyFullTextQuery")] +#[pyclass(name = "PyFullTextQuery", from_py_object)] #[derive(Debug, Clone)] pub struct PyFullTextQuery { pub(crate) inner: FtsQuery, @@ -4128,7 +4135,7 @@ fn vector_query_params_from_dict( )) } -#[pyclass(name = "PySearchFilter")] +#[pyclass(name = "PySearchFilter", from_py_object)] #[derive(Debug, Clone)] pub struct PySearchFilter { pub(crate) inner: QueryFilter, diff --git a/python/src/dataset/io_stats.rs b/python/src/dataset/io_stats.rs index fd6f10513c3..c153dc20fed 100644 --- a/python/src/dataset/io_stats.rs +++ b/python/src/dataset/io_stats.rs @@ -12,7 +12,7 @@ use pyo3::{pyclass, pymethods}; /// /// Note: Calling `io_stats()` returns the statistics accumulated since the last call /// and resets the internal counters (incremental stats pattern). -#[pyclass(name = "IOStats", module = "_lib", get_all)] +#[pyclass(name = "IOStats", module = "_lib", get_all, skip_from_py_object)] #[derive(Clone, Debug)] pub struct IoStats { /// Number of read IO operations performed diff --git a/python/src/dataset/optimize.rs b/python/src/dataset/optimize.rs index 4af47cbf0df..321d7157b86 100644 --- a/python/src/dataset/optimize.rs +++ b/python/src/dataset/optimize.rs @@ -86,7 +86,8 @@ fn parse_compaction_options( } fn unwrap_dataset(dataset: Bound) -> PyResult> { - dataset.getattr("_ds")?.extract() + let ds = dataset.getattr("_ds")?; + Ok(ds.cast::()?.clone()) } fn wrap_fragment<'py>(py: Python<'py>, fragment: &Fragment) -> PyResult> { @@ -229,7 +230,7 @@ impl PyCompactionPlan { } } -#[pyclass(name = "CompactionTask", module = "lance.optimize")] +#[pyclass(name = "CompactionTask", module = "lance.optimize", from_py_object)] #[derive(Clone)] pub struct PyCompactionTask(CompactionTask); @@ -347,7 +348,7 @@ impl PyCompactionTask { /// /// This result is pickle-able, so it can be serialized and sent back to the /// main process to be passed to :py:meth:`lance.optimize.Compaction.commit`. -#[pyclass(name = "RewriteResult", module = "lance.optimize")] +#[pyclass(name = "RewriteResult", module = "lance.optimize", from_py_object)] #[derive(Clone)] pub struct PyRewriteResult(RewriteResult); @@ -491,7 +492,7 @@ impl PyCompaction { let dataset = dataset_ref.borrow().clone(); // Make sure we parse the options within a scoped GIL context, so we // aren't holding the GIL while blocking the thread on the operation. - let options = options.downcast::()?; + let options = options.cast::()?; let config = dataset.ds.manifest.config.clone(); let opts = parse_compaction_options(options, &config)?; let mut new_ds = dataset.ds.as_ref().clone(); @@ -525,7 +526,7 @@ impl PyCompaction { let dataset = dataset.borrow().clone(); // Make sure we parse the options within a scoped GIL context, so we // aren't holding the GIL while blocking the thread on the operation. - let options = options.downcast::()?; + let options = options.cast::()?; let config = dataset.ds.manifest.config.clone(); let opts = parse_compaction_options(options, &config)?; let plan = rt() diff --git a/python/src/file.rs b/python/src/file.rs index a25383ccea7..c2be593ee27 100644 --- a/python/src/file.rs +++ b/python/src/file.rs @@ -48,7 +48,7 @@ use std::collections::HashMap; use std::{pin::Pin, sync::Arc}; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; -#[pyclass(get_all)] +#[pyclass(get_all, skip_from_py_object)] #[derive(Clone, Debug, Serialize)] pub struct LanceBufferDescriptor { /// The byte offset of the buffer in the file @@ -70,7 +70,7 @@ impl LanceBufferDescriptor { } } -#[pyclass(get_all)] +#[pyclass(get_all, skip_from_py_object)] #[derive(Clone, Debug, Serialize)] pub struct LancePageMetadata { /// The buffers in the page @@ -94,7 +94,7 @@ impl LancePageMetadata { } } -#[pyclass(get_all)] +#[pyclass(get_all, skip_from_py_object)] #[derive(Clone, Debug, Serialize)] pub struct LanceColumnMetadata { /// The column-wide buffers @@ -119,7 +119,7 @@ impl LanceColumnMetadata { } /// Statistics summarize some of the file metadata for quick summary info -#[pyclass(get_all)] +#[pyclass(get_all, skip_from_py_object)] #[derive(Clone, Debug, Serialize)] pub struct LanceFileStatistics { /// Statistics about each of the columns in the file @@ -135,7 +135,7 @@ impl LanceFileStatistics { } /// Summary information describing a column -#[pyclass(get_all)] +#[pyclass(get_all, skip_from_py_object)] #[derive(Clone, Debug, Serialize)] pub struct LanceColumnStatistics { /// The number of pages in the column @@ -170,7 +170,7 @@ impl LanceFileStatistics { } } -#[pyclass(get_all)] +#[pyclass(get_all, skip_from_py_object)] #[derive(Clone, Debug, Serialize)] pub struct LanceFileMetadata { /// The schema of the file diff --git a/python/src/fragment.rs b/python/src/fragment.rs index 3dd512eb546..1da99492fac 100644 --- a/python/src/fragment.rs +++ b/python/src/fragment.rs @@ -42,7 +42,7 @@ use crate::schema::{LanceSchema, logical_schema_from_lance}; use crate::utils::{PyLance, export_vec, extract_vec}; use crate::{Dataset, Scanner, rt}; -#[pyclass(name = "_Fragment", module = "_lib")] +#[pyclass(name = "_Fragment", module = "_lib", from_py_object)] #[derive(Clone)] pub struct FileFragment { fragment: LanceFragment, @@ -741,7 +741,12 @@ impl PyRowDatasetVersionMeta { } } -#[pyclass(name = "FragmentSession", module = "_lib", subclass)] +#[pyclass( + name = "FragmentSession", + module = "_lib", + subclass, + skip_from_py_object +)] #[derive(Clone)] pub struct FragmentSession { session: Arc, @@ -762,8 +767,9 @@ impl FragmentSession { } } -impl FromPyObject<'_> for PyLance { - fn extract_bound(ob: &pyo3::Bound<'_, PyAny>) -> PyResult { +impl FromPyObject<'_, '_> for PyLance { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { let files = extract_vec(&ob.getattr("files")?)?; let deletion_file: Option> = @@ -842,8 +848,9 @@ impl<'py> IntoPyObject<'py> for PyLance { } } -impl FromPyObject<'_> for PyLance { - fn extract_bound(ob: &pyo3::Bound<'_, PyAny>) -> PyResult { +impl FromPyObject<'_, '_> for PyLance { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { let file_size_bytes: Option = ob.getattr("file_size_bytes")?.extract()?; let file_size_bytes = CachedFileSize::new(file_size_bytes.unwrap_or(0)); let fields: Vec = ob.getattr("fields")?.extract()?; diff --git a/python/src/indices.rs b/python/src/indices.rs index 62f3c0c64ec..35f1dd904cb 100644 --- a/python/src/indices.rs +++ b/python/src/indices.rs @@ -43,7 +43,12 @@ use lance::index::vector::ivf::write_ivf_pq_file_from_existing_index; use lance_index::{IndexDescription, IndexType}; use uuid::Uuid; -#[pyclass(name = "IndexConfig", module = "lance.indices", get_all)] +#[pyclass( + name = "IndexConfig", + module = "lance.indices", + get_all, + from_py_object +)] #[derive(Debug, Clone)] pub struct PyIndexConfig { pub index_type: String, @@ -61,7 +66,7 @@ impl PyIndexConfig { } } -#[pyclass(name = "IvfModel", module = "lance.indices")] +#[pyclass(name = "IvfModel", module = "lance.indices", skip_from_py_object)] #[derive(Debug, Clone)] pub struct PyIvfModel { pub(crate) inner: IvfModel, @@ -505,7 +510,12 @@ pub fn load_shuffled_vectors( )? } -#[pyclass(name = "IndexSegmentDescription", module = "lance.indices", get_all)] +#[pyclass( + name = "IndexSegmentDescription", + module = "lance.indices", + get_all, + skip_from_py_object +)] #[derive(Clone)] pub struct PyIndexSegmentDescription { /// The UUID of the index segment diff --git a/python/src/lib.rs b/python/src/lib.rs index ee5eaf96a16..744845dfd2d 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -28,8 +28,6 @@ use std::path::Path; use std::sync::Arc; use std::sync::atomic::{self, Ordering}; -use std::ffi::CString; - use ::arrow::pyarrow::PyArrowType; use ::arrow_schema::Schema as ArrowSchema; use ::lance::arrow::json::ArrowJsonExt; @@ -53,10 +51,13 @@ use file::{ }; use log::Level; use pyo3::exceptions::PyIOError; +use pyo3::ffi::c_str; use pyo3::prelude::*; use pyo3::types::{PyAny, PyAnyMethods, PyCapsule}; use scanner::ScanStatistics; use session::Session; +use std::ffi::CString; +use std::ptr::NonNull; pub(crate) mod arrow; #[cfg(feature = "datagen")] @@ -368,7 +369,12 @@ fn manifest_needs_migration(dataset: &Bound<'_, PyAny>) -> PyResult { )) } -#[pyclass(name = "FFILanceTableProvider", module = "lance", subclass)] +#[pyclass( + name = "FFILanceTableProvider", + module = "lance", + subclass, + skip_from_py_object +)] #[derive(Clone)] struct FFILanceTableProvider { dataset: Arc<::lance::Dataset>, @@ -424,8 +430,11 @@ fn ffi_logical_codec_from_pycapsule(obj: Bound) -> PyResult()?; - let codec = unsafe { capsule.reference::() }; + let capsule = capsule.cast::()?; + let data: NonNull = capsule + .pointer_checked(Some(c_str!("datafusion_logical_extension_codec")))? + .cast(); + let codec = unsafe { data.as_ref() }; Ok(codec.clone()) } diff --git a/python/src/namespace.rs b/python/src/namespace.rs index ee302d12303..547669e42b6 100644 --- a/python/src/namespace.rs +++ b/python/src/namespace.rs @@ -86,7 +86,7 @@ impl DynamicContextProvider for PyDynamicContextProvider { Ok(headers_py) => { // Convert Python dict to Rust HashMap let bound_headers = headers_py.bind(py); - if let Ok(dict) = bound_headers.downcast::() { + if let Ok(dict) = bound_headers.cast::() { dict_to_hashmap(dict).unwrap_or_default() } else { log::warn!("Context provider did not return a dict"); @@ -1549,10 +1549,10 @@ pub fn extract_namespace_arc( namespace_client: &Bound<'_, PyAny>, ) -> PyResult> { // Direct PyO3 class - if let Ok(dir_namespace_client) = namespace_client.downcast::() { + if let Ok(dir_namespace_client) = namespace_client.cast::() { return Ok(dir_namespace_client.borrow().inner.clone() as Arc); } - if let Ok(rest_namespace_client) = namespace_client.downcast::() { + if let Ok(rest_namespace_client) = namespace_client.cast::() { return Ok(rest_namespace_client.borrow().inner.clone() as Arc); } @@ -1565,13 +1565,13 @@ pub fn extract_namespace_arc( .unwrap_or_default(); if type_name == "DirectoryNamespace" { - if let Ok(dir_namespace_client) = inner.downcast::() { + if let Ok(dir_namespace_client) = inner.cast::() { return Ok( dir_namespace_client.borrow().inner.clone() as Arc ); } } else if type_name == "RestNamespace" - && let Ok(rest_namespace_client) = inner.downcast::() + && let Ok(rest_namespace_client) = inner.cast::() { return Ok(rest_namespace_client.borrow().inner.clone() as Arc); } diff --git a/python/src/scanner.rs b/python/src/scanner.rs index e150f10e92d..691f7f53294 100644 --- a/python/src/scanner.rs +++ b/python/src/scanner.rs @@ -33,7 +33,7 @@ use crate::schema::logical_arrow_schema; /// This will be wrapped by a python class to provide /// additional functionality -#[pyclass(name = "_Scanner", module = "_lib")] +#[pyclass(name = "_Scanner", module = "_lib", from_py_object)] #[derive(Clone)] pub struct Scanner { scanner: Arc, @@ -49,7 +49,7 @@ impl Scanner { } } -#[pyclass(name = "ScanStatistics", module = "_lib", get_all)] +#[pyclass(name = "ScanStatistics", module = "_lib", get_all, skip_from_py_object)] #[derive(Clone)] /// Statistics about the scan. pub struct ScanStatistics { diff --git a/python/src/schema.rs b/python/src/schema.rs index 13a123ec45c..3954716b416 100644 --- a/python/src/schema.rs +++ b/python/src/schema.rs @@ -17,7 +17,7 @@ use pyo3::{ types::PyTuple, }; -#[pyclass(name = "LanceField", module = "lance.schema")] +#[pyclass(name = "LanceField", module = "lance.schema", from_py_object)] #[derive(Clone)] pub struct LanceField(pub Field); @@ -86,7 +86,7 @@ impl LanceField { /// /// The assignment of field ids is particular to each dataset, so these schemas /// cannot be used interchangeably between datasets. -#[pyclass(name = "LanceSchema", module = "lance.schema")] +#[pyclass(name = "LanceSchema", module = "lance.schema", from_py_object)] #[derive(Clone)] pub struct LanceSchema(pub Schema); diff --git a/python/src/session.rs b/python/src/session.rs index ceaa00b3ae9..c91329ec1ee 100644 --- a/python/src/session.rs +++ b/python/src/session.rs @@ -13,7 +13,7 @@ use crate::rt; /// The Session holds stateful information for a dataset. /// /// The session contains caches for opened indices and file metadata. -#[pyclass(name = "_Session", module = "_lib")] +#[pyclass(name = "_Session", module = "_lib", from_py_object)] #[derive(Clone)] pub struct Session { pub inner: Arc, diff --git a/python/src/storage_options.rs b/python/src/storage_options.rs index 15f3aa2ce17..30fe5e166a9 100644 --- a/python/src/storage_options.rs +++ b/python/src/storage_options.rs @@ -12,7 +12,7 @@ use crate::rt; /// Python wrapper for StorageOptionsAccessor /// /// This wraps a Rust StorageOptionsAccessor and exposes it to Python. -#[pyclass(name = "StorageOptionsAccessor")] +#[pyclass(name = "StorageOptionsAccessor", skip_from_py_object)] #[derive(Clone)] pub struct PyStorageOptionsAccessor { inner: Arc, diff --git a/python/src/transaction.rs b/python/src/transaction.rs index eae5b49a15d..2c3d34655b3 100644 --- a/python/src/transaction.rs +++ b/python/src/transaction.rs @@ -22,8 +22,9 @@ use std::sync::Arc; use uuid::Uuid; // IndexFile bindings -impl FromPyObject<'_> for PyLance { - fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult { +impl FromPyObject<'_, '_> for PyLance { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { let path = ob.getattr("path")?.extract()?; let size_bytes = ob.getattr("size_bytes")?.extract()?; Ok(Self(IndexFile { path, size_bytes })) @@ -58,8 +59,9 @@ impl<'py> IntoPyObject<'py> for PyLance { } // IndexMetadata bindings -impl FromPyObject<'_> for PyLance { - fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult { +impl FromPyObject<'_, '_> for PyLance { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { let uuid = ob.getattr("uuid")?.to_string(); let name = ob.getattr("name")?.extract()?; let fields = ob.getattr("fields")?.extract()?; @@ -68,7 +70,7 @@ impl FromPyObject<'_> for PyLance { let fragment_ids = ob.getattr("fragment_ids")?; let created_at = ob.getattr("created_at")?.extract()?; - let fragment_ids_ref: &Bound<'_, PySet> = fragment_ids.downcast()?; + let fragment_ids_ref: &Bound<'_, PySet> = fragment_ids.cast()?; let fragment_bitmap = Some( fragment_ids_ref .into_iter() @@ -173,8 +175,9 @@ impl<'py> IntoPyObject<'py> for PyLance { } } -impl FromPyObject<'_> for PyLance { - fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult { +impl FromPyObject<'_, '_> for PyLance { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { let fragment_id = ob.getattr("fragment_id")?.extract::()?; let new_file = &ob.getattr("new_file")?.extract::>()?; @@ -206,8 +209,9 @@ impl<'py> IntoPyObject<'py> for PyLance<&DataReplacementGroup> { #[derive(Debug, Clone)] pub struct PyUpdateMode(pub UpdateMode); -impl FromPyObject<'_> for PyUpdateMode { - fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult { +impl FromPyObject<'_, '_> for PyUpdateMode { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { let mode_str: String = ob.extract()?; match mode_str.as_str() { "rewrite_rows" => Ok(Self(UpdateMode::RewriteRows)), @@ -220,9 +224,10 @@ impl FromPyObject<'_> for PyUpdateMode { } } -impl FromPyObject<'_> for PyLance { - fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult { - match class_name(ob)?.as_str() { +impl FromPyObject<'_, '_> for PyLance { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { + match class_name(&ob)?.as_str() { "Overwrite" => { let schema = extract_schema(&ob.getattr("new_schema")?)?; @@ -367,7 +372,7 @@ impl FromPyObject<'_> for PyLance { for item in items.try_iter()? { let item = item?; // Extract as a tuple and then get individual elements - let tuple = item.downcast::()?; + let tuple = item.cast::()?; let field_id = tuple.get_item(0)?.extract::()?; let update_map = tuple.get_item(1)?; if let Some(map) = extract_update_map(&update_map)? { @@ -594,8 +599,9 @@ impl<'py> IntoPyObject<'py> for PyLance<&Operation> { } } -impl FromPyObject<'_> for PyLance { - fn extract_bound(ob: &pyo3::Bound<'_, PyAny>) -> PyResult { +impl FromPyObject<'_, '_> for PyLance { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { let read_version = ob.getattr("read_version")?.extract()?; let uuid = ob.getattr("uuid")?.extract()?; let operation = ob.getattr("operation")?.extract::>()?.0; @@ -653,8 +659,9 @@ impl<'py> IntoPyObject<'py> for PyLance { } } -impl FromPyObject<'_> for PyLance { - fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult { +impl FromPyObject<'_, '_> for PyLance { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { Ok(Self(RewriteGroup { old_fragments: extract_vec(&ob.getattr("old_fragments")?)?, new_fragments: extract_vec(&ob.getattr("new_fragments")?)?, @@ -681,8 +688,9 @@ impl<'py> IntoPyObject<'py> for PyLance<&RewriteGroup> { } } -impl FromPyObject<'_> for PyLance { - fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult { +impl FromPyObject<'_, '_> for PyLance { + type Error = PyErr; + fn extract(ob: Borrowed<'_, '_, PyAny>) -> PyResult { let old_id: String = ob.getattr("old_id")?.extract()?; let new_id: String = ob.getattr("new_id")?.extract()?; let old_id = Uuid::parse_str(&old_id) @@ -770,7 +778,7 @@ fn export_update_map(py: Python<'_>, update_map: &Option) -> PyResult } fn extract_schema(schema: &Bound<'_, PyAny>) -> PyResult { - match schema.downcast::() { + match schema.cast::() { Ok(schema) => Ok(schema.borrow().0.clone()), Err(_) => { let arrow_schema = schema.extract::>()?.0; diff --git a/python/src/utils.rs b/python/src/utils.rs index 1fad883978e..4f7d6d7dde2 100644 --- a/python/src/utils.rs +++ b/python/src/utils.rs @@ -274,9 +274,9 @@ impl Hnsw { pub struct PyLance(pub T); /// Extract a Vec of PyLance types from a Python object. -pub fn extract_vec<'a, T>(ob: &Bound<'a, PyAny>) -> PyResult> +pub fn extract_vec<'py, T>(ob: &Bound<'py, PyAny>) -> PyResult> where - PyLance: FromPyObject<'a>, + PyLance: FromPyObjectOwned<'py>, { ob.extract::>>() .map(|v| v.into_iter().map(|t| t.0).collect()) diff --git a/rust/arrow-scalar/Cargo.toml b/rust/arrow-scalar/Cargo.toml index f127e6c0bfa..b5e968faf8a 100644 --- a/rust/arrow-scalar/Cargo.toml +++ b/rust/arrow-scalar/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lance-arrow-scalar" -version = "57.0.0" +version = "58.0.0" edition.workspace = true authors.workspace = true license.workspace = true diff --git a/rust/arrow-stats/Cargo.toml b/rust/arrow-stats/Cargo.toml index e57ea681034..bbf1f16d0ee 100644 --- a/rust/arrow-stats/Cargo.toml +++ b/rust/arrow-stats/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lance-arrow-stats" -version = "57.0.0" +version = "58.0.0" edition.workspace = true authors.workspace = true license.workspace = true diff --git a/rust/examples/Cargo.toml b/rust/examples/Cargo.toml index b77a0a7d062..3a1ce0ea03c 100644 --- a/rust/examples/Cargo.toml +++ b/rust/examples/Cargo.toml @@ -49,6 +49,6 @@ tokio = { workspace = true } all_asserts = "2.3.1" env_logger = "0.11.7" hf-hub = "0.4.2" -parquet = "57.1" +parquet = "58.0.0" tokenizers = "0.15.2" rand.workspace = true diff --git a/rust/lance-datafusion/src/exec.rs b/rust/lance-datafusion/src/exec.rs index b3a98275853..a77e8931c0f 100644 --- a/rust/lance-datafusion/src/exec.rs +++ b/rust/lance-datafusion/src/exec.rs @@ -71,7 +71,7 @@ pub struct OneShotExec { // We save off a copy of the schema to speed up formatting and so ExecutionPlan::schema & display_as // can still function after exhausted schema: Arc, - properties: PlanProperties, + properties: Arc, } impl OneShotExec { @@ -81,12 +81,12 @@ impl OneShotExec { Self { stream: Mutex::new(Some(stream)), schema: schema.clone(), - properties: PlanProperties::new( + properties: Arc::new(PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::RoundRobinBatch(1), EmissionType::Incremental, Boundedness::Bounded, - ), + )), } } @@ -195,18 +195,14 @@ impl ExecutionPlan for OneShotExec { } } - fn statistics(&self) -> datafusion_common::Result { - Ok(Statistics::new_unknown(&self.schema)) - } - - fn properties(&self) -> &datafusion::physical_plan::PlanProperties { + fn properties(&self) -> &Arc { &self.properties } } struct TracedExec { input: Arc, - properties: PlanProperties, + properties: Arc, span: Span, } @@ -250,7 +246,7 @@ impl ExecutionPlan for TracedExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } @@ -892,7 +888,7 @@ impl ExecutionPlan for StrictBatchSizeExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { self.input.properties() } diff --git a/rust/lance-datafusion/src/planner.rs b/rust/lance-datafusion/src/planner.rs index 1b163963e61..ca64aacdd80 100644 --- a/rust/lance-datafusion/src/planner.rs +++ b/rust/lance-datafusion/src/planner.rs @@ -40,7 +40,6 @@ use datafusion::sql::sqlparser::ast::{ use datafusion::{ common::Column, logical_expr::{Between, BinaryExpr, Like, Operator}, - physical_expr::execution_props::ExecutionProps, physical_plan::PhysicalExpr, prelude::Expr, scalar::ScalarValue, @@ -340,7 +339,7 @@ impl Planner { fn unary_expr(&self, op: &UnaryOperator, expr: &SQLExpr) -> Result { Ok(match op { - UnaryOperator::Not | UnaryOperator::PGBitwiseNot => { + UnaryOperator::Not | UnaryOperator::BitwiseNot => { Expr::Not(Box::new(self.parse_sql_expr(expr)?)) } @@ -920,8 +919,9 @@ impl Planner { // DataFusion needs the simplify and coerce passes to be applied before // expressions can be handled by the physical planner. - let props = ExecutionProps::new().with_query_execution_start_time(Utc::now()); - let simplify_context = SimplifyContext::new(&props).with_schema(df_schema.clone()); + let simplify_context = SimplifyContext::default() + .with_schema(df_schema.clone()) + .with_query_execution_start_time(Some(Utc::now())); let simplifier = datafusion::optimizer::simplify_expressions::ExprSimplifier::new(simplify_context); diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs index 1df740e815f..344c9758bde 100644 --- a/rust/lance-index/src/scalar/btree.rs +++ b/rust/lance-index/src/scalar/btree.rs @@ -585,6 +585,9 @@ impl Ord for OrderableScalarValue { (Dictionary(_, _), _) => panic!("Attempt to compare Dictionary with non-Dictionary"), // What would a btree of unions even look like? May not be possible. (Union(_, _, _), _) => todo!("Support for union scalars"), + (RunEndEncoded(_, _, _), _) => { + todo!("Support for run-end encoded scalars") + } (Null, Null) => Ordering::Equal, (Null, _) => todo!(), } diff --git a/rust/lance-index/src/scalar/expression.rs b/rust/lance-index/src/scalar/expression.rs index e6d04f031f0..abcbef075d0 100644 --- a/rust/lance-index/src/scalar/expression.rs +++ b/rust/lance-index/src/scalar/expression.rs @@ -2071,7 +2071,6 @@ mod tests { use arrow_schema::{Field, Schema}; use chrono::Utc; use datafusion_common::{Column, DFSchema}; - use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; use lance_datafusion::exec::{LanceExecutionOptions, get_session_context}; @@ -2134,8 +2133,9 @@ mod tests { let state = ctx.state(); let mut expr = state.create_logical_expr(expr, &df_schema).unwrap(); if optimize { - let props = ExecutionProps::new().with_query_execution_start_time(Utc::now()); - let simplify_context = SimplifyContext::new(&props).with_schema(Arc::new(df_schema)); + let simplify_context = SimplifyContext::default() + .with_schema(Arc::new(df_schema)) + .with_query_execution_start_time(Some(Utc::now())); let simplifier = datafusion::optimizer::simplify_expressions::ExprSimplifier::new(simplify_context); expr = simplifier.simplify(expr).unwrap(); @@ -2888,8 +2888,9 @@ mod tests { .unwrap(); // Apply DataFusion simplification (this may convert starts_with to LIKE) - let props = ExecutionProps::new().with_query_execution_start_time(Utc::now()); - let simplify_context = SimplifyContext::new(&props).with_schema(Arc::new(df_schema)); + let simplify_context = SimplifyContext::default() + .with_schema(Arc::new(df_schema)) + .with_query_execution_start_time(Some(Utc::now())); let simplifier = datafusion::optimizer::simplify_expressions::ExprSimplifier::new(simplify_context); let simplified_expr = simplifier.simplify(expr).unwrap(); diff --git a/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/btree.rs b/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/btree.rs index 8f709170c76..fed61698fab 100644 --- a/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/btree.rs +++ b/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/btree.rs @@ -34,7 +34,7 @@ pub struct BTreeIndexExec { max_visible_batch_position: usize, projection: Option>, output_schema: SchemaRef, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, /// Column name of the indexed field. column: String, @@ -92,12 +92,12 @@ impl BTreeIndexExec { ))); } - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Ok(Self { batch_store, @@ -371,7 +371,7 @@ impl ExecutionPlan for BTreeIndexExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/fts.rs b/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/fts.rs index 4451b1a1bc9..595572919f8 100644 --- a/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/fts.rs +++ b/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/fts.rs @@ -46,7 +46,7 @@ pub struct FtsIndexExec { max_visible_batch_position: usize, projection: Option>, output_schema: SchemaRef, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, /// Pre-computed batch ranges for O(log n) lookup. batch_ranges: Vec, @@ -112,12 +112,12 @@ impl FtsIndexExec { } let output_schema = Arc::new(Schema::new(fields)); - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); // Pre-compute batch ranges for O(log n) lookup and max visible row let mut batch_ranges = Vec::new(); @@ -408,7 +408,7 @@ impl ExecutionPlan for FtsIndexExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/scan.rs b/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/scan.rs index 8f4018fc92f..c56e960048d 100644 --- a/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/scan.rs +++ b/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/scan.rs @@ -41,7 +41,7 @@ pub struct MemTableScanExec { output_schema: SchemaRef, /// Schema of the source data (before projection), used for filter evaluation. source_schema: SchemaRef, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, /// Whether to include _rowid column (row position) in output. with_row_id: bool, @@ -123,12 +123,12 @@ impl MemTableScanExec { filter_predicate: Option, filter_expr: Option, ) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Self { batch_store, @@ -353,7 +353,7 @@ impl ExecutionPlan for MemTableScanExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/vector.rs b/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/vector.rs index 52c3eed584d..6713a663640 100644 --- a/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/vector.rs +++ b/rust/lance/src/dataset/mem_wal/memtable/scanner/exec/vector.rs @@ -38,7 +38,7 @@ pub struct VectorIndexExec { max_visible_batch_position: usize, projection: Option>, output_schema: SchemaRef, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, /// Whether to include _rowid column (row position) in output. with_row_id: bool, @@ -114,12 +114,12 @@ impl VectorIndexExec { } let output_schema = Arc::new(Schema::new(fields)); - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Ok(Self { batch_store, @@ -504,7 +504,7 @@ impl ExecutionPlan for VectorIndexExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/dataset/mem_wal/scanner/exec/bloom_guard.rs b/rust/lance/src/dataset/mem_wal/scanner/exec/bloom_guard.rs index 5d0edd24896..ab8be9f8b75 100644 --- a/rust/lance/src/dataset/mem_wal/scanner/exec/bloom_guard.rs +++ b/rust/lance/src/dataset/mem_wal/scanner/exec/bloom_guard.rs @@ -60,7 +60,7 @@ pub struct BloomFilterGuardExec { /// Output schema. schema: SchemaRef, /// Plan properties. - properties: PlanProperties, + properties: Arc, } impl BloomFilterGuardExec { @@ -80,12 +80,12 @@ impl BloomFilterGuardExec { ) -> Self { let schema = input.schema(); - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(schema.clone()), Partitioning::UnknownPartitioning(1), input.pipeline_behavior(), input.boundedness(), - ); + )); Self { input, @@ -142,7 +142,7 @@ impl ExecutionPlan for BloomFilterGuardExec { self.schema.clone() } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/dataset/mem_wal/scanner/exec/coalesce_first.rs b/rust/lance/src/dataset/mem_wal/scanner/exec/coalesce_first.rs index dfef9a21143..9e158c86b4a 100644 --- a/rust/lance/src/dataset/mem_wal/scanner/exec/coalesce_first.rs +++ b/rust/lance/src/dataset/mem_wal/scanner/exec/coalesce_first.rs @@ -48,7 +48,7 @@ pub struct CoalesceFirstExec { /// Output schema (must be same for all inputs). schema: SchemaRef, /// Plan properties. - properties: PlanProperties, + properties: Arc, } impl CoalesceFirstExec { @@ -79,12 +79,12 @@ impl CoalesceFirstExec { ); } - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(schema.clone()), Partitioning::UnknownPartitioning(1), inputs[0].pipeline_behavior(), inputs[0].boundedness(), - ); + )); Self { inputs, @@ -119,7 +119,7 @@ impl ExecutionPlan for CoalesceFirstExec { self.schema.clone() } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/dataset/mem_wal/scanner/exec/deduplicate.rs b/rust/lance/src/dataset/mem_wal/scanner/exec/deduplicate.rs index 868a098ecb9..4a3492f2ad7 100644 --- a/rust/lance/src/dataset/mem_wal/scanner/exec/deduplicate.rs +++ b/rust/lance/src/dataset/mem_wal/scanner/exec/deduplicate.rs @@ -67,7 +67,7 @@ pub struct DeduplicateExec { /// Whether the input is already sorted by (pk, _memtable_gen DESC, _rowaddr DESC). input_sorted: bool, /// Plan properties. - properties: PlanProperties, + properties: Arc, } impl DeduplicateExec { @@ -155,12 +155,12 @@ impl DeduplicateExec { let schema = Arc::new(Schema::new(output_fields)); // Output is single partition after sort + dedup - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(schema.clone()), Partitioning::UnknownPartitioning(1), input.pipeline_behavior(), input.boundedness(), - ); + )); Ok(Self { input, @@ -237,12 +237,12 @@ impl DeduplicateExec { let schema = Arc::new(Schema::new(output_fields)); // Output is single partition after dedup - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(schema.clone()), Partitioning::UnknownPartitioning(1), input.pipeline_behavior(), input.boundedness(), - ); + )); Ok(Self { input, @@ -381,7 +381,7 @@ impl ExecutionPlan for DeduplicateExec { self.schema.clone() } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/dataset/mem_wal/scanner/exec/filter_stale.rs b/rust/lance/src/dataset/mem_wal/scanner/exec/filter_stale.rs index de5621cd35d..434599cb4e2 100644 --- a/rust/lance/src/dataset/mem_wal/scanner/exec/filter_stale.rs +++ b/rust/lance/src/dataset/mem_wal/scanner/exec/filter_stale.rs @@ -82,7 +82,7 @@ pub struct FilterStaleExec { /// Output schema. schema: SchemaRef, /// Plan properties. - properties: PlanProperties, + properties: Arc, } impl FilterStaleExec { @@ -104,12 +104,12 @@ impl FilterStaleExec { let mut bloom_filters = bloom_filters; bloom_filters.sort_by(|a, b| b.generation.cmp(&a.generation)); - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(schema.clone()), Partitioning::UnknownPartitioning(1), input.pipeline_behavior(), input.boundedness(), - ); + )); Self { input, @@ -166,7 +166,7 @@ impl ExecutionPlan for FilterStaleExec { self.schema.clone() } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/dataset/mem_wal/scanner/exec/generation_tag.rs b/rust/lance/src/dataset/mem_wal/scanner/exec/generation_tag.rs index c750afc7f35..ba9d565316f 100644 --- a/rust/lance/src/dataset/mem_wal/scanner/exec/generation_tag.rs +++ b/rust/lance/src/dataset/mem_wal/scanner/exec/generation_tag.rs @@ -44,7 +44,7 @@ pub struct MemtableGenTagExec { /// Output schema (input schema + _gen column). schema: SchemaRef, /// Plan properties. - properties: PlanProperties, + properties: Arc, } impl MemtableGenTagExec { @@ -62,12 +62,12 @@ impl MemtableGenTagExec { let schema = Arc::new(Schema::new(fields)); // Preserve input properties - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(schema.clone()), input.output_partitioning().clone(), input.pipeline_behavior(), input.boundedness(), - ); + )); Self { input, @@ -108,7 +108,7 @@ impl ExecutionPlan for MemtableGenTagExec { self.schema.clone() } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/dataset/scanner.rs b/rust/lance/src/dataset/scanner.rs index 5f40c79b4f9..850f9b11192 100644 --- a/rust/lance/src/dataset/scanner.rs +++ b/rust/lance/src/dataset/scanner.rs @@ -2,6 +2,8 @@ // SPDX-FileCopyrightText: Copyright The Lance Authors use std::collections::HashSet; + +use datafusion::config::ConfigOptions; use std::ops::Range; use std::pin::Pin; use std::sync::{Arc, LazyLock}; @@ -18,6 +20,7 @@ use datafusion::common::{DFSchema, JoinType, NullEquality, SchemaExt, exec_dataf use datafusion::functions_aggregate; use datafusion::logical_expr::{Expr, ScalarUDF, col, lit}; use datafusion::physical_expr::PhysicalSortExpr; +#[allow(deprecated)] use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec; use datafusion::physical_plan::expressions; use datafusion::physical_plan::projection::ProjectionExec as DFProjectionExec; @@ -2557,7 +2560,7 @@ impl Scanner { } let optimizer = get_physical_optimizer(); - let options = Default::default(); + let options: ConfigOptions = Default::default(); for rule in optimizer.rules { plan = rule.optimize(plan, &options)?; } @@ -3251,6 +3254,7 @@ impl Scanner { None, datafusion_physical_plan::joins::PartitionMode::CollectLeft, NullEquality::NullEqualsNothing, + false, )?) as _); } else { must = Some(plan); @@ -4207,6 +4211,7 @@ impl Scanner { None, PartitionMode::CollectLeft, NullEquality::NullEqualsNull, + false, )?; let schema = join.schema(); @@ -4559,6 +4564,7 @@ impl Scanner { } /// Take row indices produced by input plan from the dataset (with projection) + #[allow(deprecated)] fn take( &self, input: Arc, diff --git a/rust/lance/src/dataset/write/merge_insert.rs b/rust/lance/src/dataset/write/merge_insert.rs index 8ea25c7285f..232a1e38b43 100644 --- a/rust/lance/src/dataset/write/merge_insert.rs +++ b/rust/lance/src/dataset/write/merge_insert.rs @@ -754,6 +754,7 @@ impl MergeInsertJob { None, PartitionMode::CollectLeft, NullEquality::NullEqualsNull, + false, ) .unwrap(), ); @@ -912,7 +913,7 @@ impl MergeInsertJob { let new_fragments = Arc::new(Mutex::new(Vec::new())); let mut tasks = JoinSet::new(); let task_limit = dataset.object_store().io_parallelism(); - let mut reservation = + let reservation = MemoryConsumer::new("MergeInsert").register(session_ctx.task_ctx().memory_pool()); while let Some((frag_id, batches)) = group_stream.next().await.transpose()? { @@ -4262,9 +4263,8 @@ mod tests { CoalescePartitionsExec ProjectionExec: expr=[_rowid@0 as _rowid, _rowaddr@1 as _rowaddr, value@2 as value, key@3 as key, __merge_source_sentinel@4 as __merge_source_sentinel, CASE WHEN _rowaddr@1 IS NULL THEN 2 WHEN _rowaddr@1 IS NOT NULL THEN 1 ELSE 0 END as __action] HashJoinExec: mode=CollectLeft, join_type=Right, on=[(key@0, key@1)], projection=[_rowid@1, _rowaddr@2, value@3, key@4, __merge_source_sentinel@5] - CooperativeExec - LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, \ - row_id=true, row_addr=true, full_filter=--, refine_filter=-- + LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, \ + row_id=true, row_addr=true, full_filter=--, refine_filter=-- RepartitionExec: partitioning=RoundRobinBatch(...), input_partitions=1 ProjectionExec: expr=[value@0 as value, key@1 as key, true as __merge_source_sentinel] StreamingTableExec: partition_sizes=1, projection=[value, key]" @@ -4311,8 +4311,7 @@ mod tests { CoalescePartitionsExec ProjectionExec: expr=[_rowid@0 as _rowid, _rowaddr@1 as _rowaddr, value@2 as value, key@3 as key, __merge_source_sentinel@4 as __merge_source_sentinel, CASE WHEN _rowaddr@1 IS NOT NULL THEN 1 ELSE 0 END as __action] HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(key@0, key@1)], projection=[_rowid@1, _rowaddr@2, value@3, key@4, __merge_source_sentinel@5] - CooperativeExec - LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, row_id=true, row_addr=true, full_filter=--, refine_filter=-- + LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, row_id=true, row_addr=true, full_filter=--, refine_filter=-- RepartitionExec... ProjectionExec: expr=[value@0 as value, key@1 as key, true as __merge_source_sentinel] StreamingTableExec: partition_sizes=1, projection=[value, key]" @@ -4359,8 +4358,7 @@ mod tests { CoalescePartitionsExec ProjectionExec: expr=[_rowid@0 as _rowid, _rowaddr@1 as _rowaddr, value@2 as value, key@3 as key, __merge_source_sentinel@4 as __merge_source_sentinel, CASE WHEN _rowaddr@1 IS NOT NULL AND value@2 > 20 THEN 1 ELSE 0 END as __action] HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(key@0, key@1)], projection=[_rowid@1, _rowaddr@2, value@3, key@4, __merge_source_sentinel@5] - CooperativeExec - LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, row_id=true, row_addr=true, full_filter=--, refine_filter=-- + LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, row_id=true, row_addr=true, full_filter=--, refine_filter=-- RepartitionExec... ProjectionExec: expr=[value@0 as value, key@1 as key, true as __merge_source_sentinel] StreamingTableExec: partition_sizes=1, projection=[value, key]" diff --git a/rust/lance/src/dataset/write/merge_insert/exec/delete.rs b/rust/lance/src/dataset/write/merge_insert/exec/delete.rs index 1302aeb69d9..34b704d9c7c 100644 --- a/rust/lance/src/dataset/write/merge_insert/exec/delete.rs +++ b/rust/lance/src/dataset/write/merge_insert/exec/delete.rs @@ -41,7 +41,7 @@ pub struct DeleteOnlyMergeInsertExec { input: Arc, dataset: Arc, params: MergeInsertParams, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, merge_stats: Arc>>, transaction: Arc>>, @@ -55,12 +55,12 @@ impl DeleteOnlyMergeInsertExec { params: MergeInsertParams, ) -> DFResult { let empty_schema = Arc::new(arrow_schema::Schema::empty()); - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(empty_schema), Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Ok(Self { input, @@ -231,7 +231,7 @@ impl ExecutionPlan for DeleteOnlyMergeInsertExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/dataset/write/merge_insert/exec/write.rs b/rust/lance/src/dataset/write/merge_insert/exec/write.rs index 703d4e1f6c8..62c7bdf8058 100644 --- a/rust/lance/src/dataset/write/merge_insert/exec/write.rs +++ b/rust/lance/src/dataset/write/merge_insert/exec/write.rs @@ -182,7 +182,7 @@ pub struct FullSchemaMergeInsertExec { input: Arc, dataset: Arc, params: MergeInsertParams, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, merge_stats: Arc>>, transaction: Arc>>, @@ -200,12 +200,12 @@ impl FullSchemaMergeInsertExec { params: MergeInsertParams, ) -> DFResult { let empty_schema = Arc::new(arrow_schema::Schema::empty()); - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(empty_schema), Partitioning::UnknownPartitioning(1), EmissionType::Final, Boundedness::Bounded, - ); + )); // Check if ON columns match the schema's unenforced primary key let field_ids: Vec = params @@ -792,7 +792,7 @@ impl ExecutionPlan for FullSchemaMergeInsertExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/io/exec/filter.rs b/rust/lance/src/io/exec/filter.rs index d3a2a1e6d82..3a36f8d6712 100644 --- a/rust/lance/src/io/exec/filter.rs +++ b/rust/lance/src/io/exec/filter.rs @@ -52,7 +52,7 @@ impl ExecutionPlan for LanceFilterExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { self.filter.properties() } diff --git a/rust/lance/src/io/exec/filtered_read.rs b/rust/lance/src/io/exec/filtered_read.rs index 2360c1a2212..8404e723050 100644 --- a/rust/lance/src/io/exec/filtered_read.rs +++ b/rust/lance/src/io/exec/filtered_read.rs @@ -1469,7 +1469,7 @@ impl FilteredReadOptions { pub struct FilteredReadExec { dataset: Arc, options: FilteredReadOptions, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, index_input: Option>, // Precomputed internal plan @@ -1568,12 +1568,12 @@ impl FilteredReadExec { FilteredReadThreadingMode::MultiplePartitions(n) => n, }; - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(output_schema), Partitioning::RoundRobinBatch(num_partitions), EmissionType::Incremental, Boundedness::Bounded, - ); + )); let metrics = ExecutionPlanMetricsSet::new(); @@ -1836,7 +1836,7 @@ impl ExecutionPlan for FilteredReadExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/io/exec/fts.rs b/rust/lance/src/io/exec/fts.rs index 1cf8eddc3ec..c80b28e86ab 100644 --- a/rust/lance/src/io/exec/fts.rs +++ b/rust/lance/src/io/exec/fts.rs @@ -84,7 +84,7 @@ pub struct MatchQueryExec { params: FtsSearchParams, prefilter_source: PreFilterSource, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -118,12 +118,12 @@ impl MatchQueryExec { params: FtsSearchParams, prefilter_source: PreFilterSource, ) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(FTS_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Self { dataset, query, @@ -321,15 +321,11 @@ impl ExecutionPlan for MatchQueryExec { ))) } - fn statistics(&self) -> DataFusionResult { - Ok(Statistics::new_unknown(&FTS_SCHEMA)) - } - fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } @@ -537,11 +533,6 @@ impl ExecutionPlan for FlatMatchFilterExec { ))) } - fn statistics(&self) -> DataFusionResult { - #[allow(deprecated)] - self.input.statistics() - } - fn partition_statistics(&self, partition: Option) -> DataFusionResult { self.input.partition_statistics(partition) } @@ -550,7 +541,7 @@ impl ExecutionPlan for FlatMatchFilterExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { self.input.properties() } @@ -567,7 +558,7 @@ pub struct FlatMatchQueryExec { params: FtsSearchParams, unindexed_input: Arc, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -601,12 +592,12 @@ impl FlatMatchQueryExec { params: FtsSearchParams, unindexed_input: Arc, ) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(FTS_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Self { dataset, query, @@ -714,15 +705,11 @@ impl ExecutionPlan for FlatMatchQueryExec { ))) } - fn statistics(&self) -> DataFusionResult { - Ok(Statistics::new_unknown(&FTS_SCHEMA)) - } - fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } @@ -737,7 +724,7 @@ pub struct PhraseQueryExec { query: PhraseQuery, params: FtsSearchParams, prefilter_source: PreFilterSource, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -771,12 +758,12 @@ impl PhraseQueryExec { mut params: FtsSearchParams, prefilter_source: PreFilterSource, ) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(FTS_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Final, Boundedness::Bounded, - ); + )); params = params.with_phrase_slop(Some(query.slop)); Self { @@ -947,15 +934,11 @@ impl ExecutionPlan for PhraseQueryExec { ))) } - fn statistics(&self) -> DataFusionResult { - Ok(Statistics::new_unknown(&FTS_SCHEMA)) - } - fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } @@ -971,7 +954,7 @@ pub struct BoostQueryExec { positive: Arc, negative: Arc, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -1003,12 +986,12 @@ impl BoostQueryExec { positive: Arc, negative: Arc, ) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(FTS_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Self { query, params, @@ -1122,15 +1105,11 @@ impl ExecutionPlan for BoostQueryExec { ))) } - fn statistics(&self) -> DataFusionResult { - Ok(Statistics::new_unknown(&FTS_SCHEMA)) - } - fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } @@ -1147,7 +1126,7 @@ pub struct BooleanQueryExec { must: Option>, must_not: Arc, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -1186,12 +1165,12 @@ impl BooleanQueryExec { must: Option>, must_not: Arc, ) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(FTS_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Self { query, params, @@ -1377,15 +1356,11 @@ impl ExecutionPlan for BooleanQueryExec { ))) } - fn statistics(&self) -> DataFusionResult { - Ok(Statistics::new_unknown(&FTS_SCHEMA)) - } - fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } } diff --git a/rust/lance/src/io/exec/knn.rs b/rust/lance/src/io/exec/knn.rs index 1e527a0c4f6..669c3aabcc9 100644 --- a/rust/lance/src/io/exec/knn.rs +++ b/rust/lance/src/io/exec/knn.rs @@ -118,7 +118,7 @@ pub struct KNNVectorDistanceExec { pub distance_type: DistanceType, output_schema: SchemaRef, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -164,10 +164,13 @@ impl KNNVectorDistanceExec { // This node has the same partitioning & boundedness as the input node // but it destroys any ordering. - let properties = input - .properties() - .clone() - .with_eq_properties(EquivalenceProperties::new(output_schema.clone())); + let properties = Arc::new( + input + .properties() + .as_ref() + .clone() + .with_eq_properties(EquivalenceProperties::new(output_schema.clone())), + ); Ok(Self { input, @@ -288,7 +291,7 @@ impl ExecutionPlan for KNNVectorDistanceExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } @@ -376,7 +379,7 @@ pub struct ANNIvfPartitionExec { /// The UUIDs of the indices to search. pub index_uuids: Vec, - pub properties: PlanProperties, + pub properties: Arc, pub metrics: ExecutionPlanMetricsSet, } @@ -392,12 +395,12 @@ impl ANNIvfPartitionExec { } let schema = KNN_PARTITION_SCHEMA.clone(); - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::RoundRobinBatch(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Ok(Self { dataset, @@ -449,17 +452,17 @@ impl ExecutionPlan for ANNIvfPartitionExec { KNN_PARTITION_SCHEMA.clone() } - fn statistics(&self) -> DataFusionResult { + fn properties(&self) -> &Arc { + &self.properties + } + + fn partition_statistics(&self, _partition: Option) -> DataFusionResult { Ok(Statistics { num_rows: Precision::Exact(self.query.minimum_nprobes), ..Statistics::new_unknown(self.schema().as_ref()) }) } - fn properties(&self) -> &PlanProperties { - &self.properties - } - fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } @@ -604,7 +607,7 @@ pub struct ANNIvfSubIndexExec { prefilter_source: PreFilterSource, /// Datafusion Plan Properties - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -623,12 +626,12 @@ impl ANNIvfSubIndexExec { PART_ID_COLUMN ))); } - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(KNN_INDEX_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Ok(Self { input, dataset, @@ -1113,7 +1116,7 @@ impl ExecutionPlan for ANNIvfSubIndexExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } @@ -1151,17 +1154,17 @@ pub struct MultivectorScoringExec { // the inputs are sorted ANN search results inputs: Vec>, query: Query, - properties: PlanProperties, + properties: Arc, } impl MultivectorScoringExec { pub fn try_new(inputs: Vec>, query: Query) -> Result { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(KNN_INDEX_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Final, Boundedness::Bounded, - ); + )); Ok(Self { inputs, @@ -1327,16 +1330,7 @@ impl ExecutionPlan for MultivectorScoringExec { ))) } - fn statistics(&self) -> DataFusionResult { - Ok(Statistics { - num_rows: Precision::Inexact( - self.query.k * self.query.refine_factor.unwrap_or(1) as usize, - ), - ..Statistics::new_unknown(self.schema().as_ref()) - }) - } - - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/io/exec/optimizer.rs b/rust/lance/src/io/exec/optimizer.rs index fa2b189c136..f031e10ce19 100644 --- a/rust/lance/src/io/exec/optimizer.rs +++ b/rust/lance/src/io/exec/optimizer.rs @@ -7,14 +7,14 @@ use std::sync::Arc; use super::TakeExec; use arrow_schema::Schema as ArrowSchema; +#[allow(deprecated)] +use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec; use datafusion::{ common::tree_node::{Transformed, TreeNode}, config::ConfigOptions, error::Result as DFResult, physical_optimizer::{PhysicalOptimizerRule, optimizer::PhysicalOptimizer}, - physical_plan::{ - ExecutionPlan, coalesce_batches::CoalesceBatchesExec, projection::ProjectionExec, - }, + physical_plan::{ExecutionPlan, projection::ProjectionExec}, }; use datafusion_physical_expr::{PhysicalExpr, expressions::Column}; @@ -70,6 +70,7 @@ impl CoalesceTake { } impl PhysicalOptimizerRule for CoalesceTake { + #[allow(deprecated)] fn optimize( &self, plan: Arc, diff --git a/rust/lance/src/io/exec/pushdown_scan.rs b/rust/lance/src/io/exec/pushdown_scan.rs index c83a2218762..0d49232c094 100644 --- a/rust/lance/src/io/exec/pushdown_scan.rs +++ b/rust/lance/src/io/exec/pushdown_scan.rs @@ -9,12 +9,10 @@ use arrow_array::types::{Int64Type, UInt64Type}; use arrow_array::{Array, BooleanArray, Int64Array, PrimitiveArray, RecordBatch, UInt32Array}; use arrow_schema::{DataType, Schema as ArrowSchema, SchemaRef}; use arrow_select::filter::filter_record_batch; -use datafusion::common::Statistics; use datafusion::error::{DataFusionError, Result}; use datafusion::logical_expr::col; use datafusion::logical_expr::interval_arithmetic::{Interval, NullableInterval}; use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext}; -use datafusion::physical_expr::execution_props::ExecutionProps; use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ColumnarValue, PlanProperties}; @@ -97,7 +95,7 @@ pub struct LancePushdownScanExec { predicate: Expr, config: ScanConfig, output_schema: Arc, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -134,12 +132,12 @@ impl LancePushdownScanExec { } let output_schema = Arc::new(output_schema); - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Ok(Self { dataset, @@ -185,10 +183,6 @@ impl ExecutionPlan for LancePushdownScanExec { } } - fn statistics(&self) -> datafusion::error::Result { - Ok(Statistics::new_unknown(self.output_schema.as_ref())) - } - fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } @@ -239,7 +233,7 @@ impl ExecutionPlan for LancePushdownScanExec { ))) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } } @@ -677,8 +671,7 @@ impl FragmentScanner { .collect(); let schema = Arc::new(ArrowSchema::from(self.predicate_projection.as_ref()).try_into()?); - let props = ExecutionProps::new(); - let context = SimplifyContext::new(&props).with_schema(schema); + let context = SimplifyContext::default().with_schema(schema); let mut simplifier = ExprSimplifier::new(context); let mut predicates = Vec::with_capacity(num_batches); diff --git a/rust/lance/src/io/exec/rowids.rs b/rust/lance/src/io/exec/rowids.rs index 7bdab60eae0..2589086dd6a 100644 --- a/rust/lance/src/io/exec/rowids.rs +++ b/rust/lance/src/io/exec/rowids.rs @@ -47,7 +47,7 @@ pub struct AddRowAddrExec { /// Position in the output schema where to insert the row address rowaddr_pos: usize, output_schema: SchemaRef, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -106,10 +106,13 @@ impl AddRowAddrExec { // Is just a simple projections, so it inherits the partitioning and // execution mode from parent. - let properties = input - .properties() - .clone() - .with_eq_properties(EquivalenceProperties::new(output_schema.clone())); + let properties = Arc::new( + input + .properties() + .as_ref() + .clone() + .with_eq_properties(EquivalenceProperties::new(output_schema.clone())), + ); Ok(Self { input, @@ -326,7 +329,7 @@ impl ExecutionPlan for AddRowAddrExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } } @@ -347,7 +350,7 @@ pub struct AddRowOffsetExec { input: Arc, row_addr_pos: usize, frag_id_to_offset: Arc>, - properties: PlanProperties, + properties: Arc, } impl AddRowOffsetExec { @@ -376,7 +379,13 @@ impl AddRowOffsetExec { let new_eq_props = EquivalenceProperties::new(schema).extend(input.properties().eq_properties.clone())?; - let properties = input.properties().clone().with_eq_properties(new_eq_props); + let properties = Arc::new( + input + .properties() + .as_ref() + .clone() + .with_eq_properties(new_eq_props), + ); Ok(Self { input, @@ -496,7 +505,7 @@ impl ExecutionPlan for AddRowOffsetExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/io/exec/scalar_index.rs b/rust/lance/src/io/exec/scalar_index.rs index f587ec22a91..004efafd4d1 100644 --- a/rust/lance/src/io/exec/scalar_index.rs +++ b/rust/lance/src/io/exec/scalar_index.rs @@ -14,7 +14,6 @@ use arrow_schema::{Schema, SchemaRef}; use async_recursion::async_recursion; use async_trait::async_trait; use datafusion::{ - common::{Statistics, stats::Precision}, physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, execution_plan::{Boundedness, EmissionType}, @@ -82,7 +81,7 @@ impl ScalarIndexLoader for Dataset { pub struct ScalarIndexExec { dataset: Arc, expr: ScalarIndexExpr, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -101,12 +100,12 @@ impl DisplayAs for ScalarIndexExec { impl ScalarIndexExec { pub fn new(dataset: Arc, expr: ScalarIndexExpr) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(INDEX_EXPR_RESULT_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Self { dataset, expr, @@ -217,10 +216,13 @@ impl ExecutionPlan for ScalarIndexExec { ))) } - fn statistics(&self) -> datafusion::error::Result { - Ok(Statistics { - num_rows: Precision::Exact(2), - ..Statistics::new_unknown(&INDEX_EXPR_RESULT_SCHEMA) + fn partition_statistics( + &self, + _partition: Option, + ) -> datafusion::error::Result { + Ok(datafusion::physical_plan::Statistics { + num_rows: datafusion::common::stats::Precision::Exact(2), + ..datafusion::physical_plan::Statistics::new_unknown(&INDEX_EXPR_RESULT_SCHEMA) }) } @@ -228,7 +230,7 @@ impl ExecutionPlan for ScalarIndexExec { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } @@ -249,7 +251,7 @@ pub struct MapIndexExec { column_name: String, index_name: String, input: Arc, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -272,12 +274,12 @@ impl MapIndexExec { index_name: String, input: Arc, ) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(INDEX_LOOKUP_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Self { dataset, column_name, @@ -426,7 +428,7 @@ impl ExecutionPlan for MapIndexExec { ))) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } @@ -448,7 +450,7 @@ pub struct MaterializeIndexExec { dataset: Arc, expr: ScalarIndexExpr, fragments: Arc>, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -510,12 +512,12 @@ impl MaterializeIndexExec { expr: ScalarIndexExpr, fragments: Arc>, ) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(MATERIALIZE_INDEX_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Self { dataset, expr, @@ -712,15 +714,11 @@ impl ExecutionPlan for MaterializeIndexExec { ))) } - fn statistics(&self) -> datafusion::error::Result { - Ok(Statistics::new_unknown(&MATERIALIZE_INDEX_SCHEMA)) - } - fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/io/exec/scan.rs b/rust/lance/src/io/exec/scan.rs index c7736eccfbc..00182f98b95 100644 --- a/rust/lance/src/io/exec/scan.rs +++ b/rust/lance/src/io/exec/scan.rs @@ -537,7 +537,7 @@ pub struct LanceScanExec { range: Option>, projection: Arc, output_schema: Arc, - properties: PlanProperties, + properties: Arc, config: LanceScanConfig, metrics: ExecutionPlanMetricsSet, } @@ -611,12 +611,12 @@ impl LanceScanExec { } let output_schema = Arc::new(output_schema); - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::RoundRobinBatch(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Self { dataset, fragments, @@ -710,11 +710,7 @@ impl ExecutionPlan for LanceScanExec { ))) } - fn metrics(&self) -> Option { - Some(self.metrics.clone_inner()) - } - - fn statistics(&self) -> datafusion::error::Result { + fn partition_statistics(&self, _partition: Option) -> Result { // Some fragments from older datasets might have the row count stats missing. let (row_count, is_exact) = self.fragments @@ -733,11 +729,15 @@ impl ExecutionPlan for LanceScanExec { Ok(Statistics { num_rows, - ..datafusion::physical_plan::Statistics::new_unknown(self.schema().as_ref()) + ..Statistics::new_unknown(self.schema().as_ref()) }) } - fn properties(&self) -> &PlanProperties { + fn metrics(&self) -> Option { + Some(self.metrics.clone_inner()) + } + + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/io/exec/take.rs b/rust/lance/src/io/exec/take.rs index 0dd33d1440f..977a9c88dce 100644 --- a/rust/lance/src/io/exec/take.rs +++ b/rust/lance/src/io/exec/take.rs @@ -397,7 +397,7 @@ pub struct TakeExec { // The schema of the output output_schema: SchemaRef, input: Arc, - properties: PlanProperties, + properties: Arc, metrics: ExecutionPlanMetricsSet, } @@ -477,10 +477,13 @@ impl TakeExec { &projection, )); let output_arrow = Arc::new(ArrowSchema::from(output_schema.as_ref())); - let properties = input - .properties() - .clone() - .with_eq_properties(EquivalenceProperties::new(output_arrow.clone())); + let properties = Arc::new( + input + .properties() + .as_ref() + .clone() + .with_eq_properties(EquivalenceProperties::new(output_arrow.clone())), + ); Ok(Some(Self { dataset, @@ -651,7 +654,7 @@ impl ExecutionPlan for TakeExec { }) } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lance/src/io/exec/testing.rs b/rust/lance/src/io/exec/testing.rs index 44945f96ce4..2d5911a4e46 100644 --- a/rust/lance/src/io/exec/testing.rs +++ b/rust/lance/src/io/exec/testing.rs @@ -10,7 +10,6 @@ use std::sync::Arc; use arrow_array::RecordBatch; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::{ - common::Statistics, execution::context::TaskContext, physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, SendableRecordBatchStream, @@ -23,17 +22,17 @@ use futures::StreamExt; #[derive(Debug)] pub struct TestingExec { pub(crate) batches: Vec, - properties: PlanProperties, + properties: Arc, } impl TestingExec { pub(crate) fn new(batches: Vec) -> Self { - let properties = PlanProperties::new( + let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(batches[0].schema()), Partitioning::RoundRobinBatch(1), EmissionType::Incremental, Boundedness::Bounded, - ); + )); Self { batches, properties, @@ -81,11 +80,7 @@ impl ExecutionPlan for TestingExec { Ok(Box::pin(stream)) } - fn statistics(&self) -> datafusion::error::Result { - Ok(Statistics::new_unknown(self.schema().as_ref())) - } - - fn properties(&self) -> &datafusion::physical_plan::PlanProperties { + fn properties(&self) -> &Arc { &self.properties } } diff --git a/rust/lance/src/io/exec/utils.rs b/rust/lance/src/io/exec/utils.rs index 2038dbe2c25..976bc177e88 100644 --- a/rust/lance/src/io/exec/utils.rs +++ b/rust/lance/src/io/exec/utils.rs @@ -353,7 +353,7 @@ impl ExecutionPlan for ReplayExec { } } - fn properties(&self) -> &datafusion::physical_plan::PlanProperties { + fn properties(&self) -> &Arc { self.input.properties() } }