From 5a1442ee0b783299aca2d4e564d8ad99f70b269f Mon Sep 17 00:00:00 2001 From: Rahil Chertara Date: Fri, 7 Nov 2025 06:36:29 -0800 Subject: [PATCH 1/4] fix: ensure blob encoding work when using file reader directly --- Cargo.lock | 657 +++++++++++++++++++------------ rust/lance-file/src/v2/reader.rs | 0 2 files changed, 398 insertions(+), 259 deletions(-) create mode 100644 rust/lance-file/src/v2/reader.rs diff --git a/Cargo.lock b/Cargo.lock index 1ed7c5c6889..bf52d73df47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -181,9 +181,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ded5f9a03ac8f24d1b8a25101ee812cd32cdc8c50a4c50237de2c4915850e73" +checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" dependencies = [ "rustversion", ] @@ -202,9 +202,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2b10dcb159faf30d3f81f6d56c1211a5bea2ca424eabe477648a44b993320e" +checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ "arrow-arith", "arrow-array", @@ -223,9 +223,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "288015089e7931843c80ed4032c5274f02b37bcb720c4a42096d50b390e70372" +checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ "arrow-array", "arrow-buffer", @@ -237,9 +237,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65ca404ea6191e06bf30956394173337fa9c35f445bd447fe6c21ab944e1a23c" +checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash", "arrow-buffer", @@ -256,9 +256,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36356383099be0151dacc4245309895f16ba7917d79bdb71a7148659c9206c56" +checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" dependencies = [ "bytes", "half", @@ -268,9 +268,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8e372ed52bd4ee88cc1e6c3859aa7ecea204158ac640b10e187936e7e87074" +checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ "arrow-array", "arrow-buffer", @@ -290,9 +290,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e4100b729fe656f2e4fb32bc5884f14acf9118d4ad532b7b33c1132e4dce896" +checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ "arrow-array", "arrow-cast", @@ -305,9 +305,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf87f4ff5fc13290aa47e499a8b669a82c5977c6a1fedce22c7f542c1fd5a597" +checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ "arrow-buffer", "arrow-schema", @@ -318,9 +318,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3ca63edd2073fcb42ba112f8ae165df1de935627ead6e203d07c99445f2081" +checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ "arrow-array", "arrow-buffer", @@ -334,9 +334,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a36b2332559d3310ebe3e173f75b29989b4412df4029a26a30cc3f7da0869297" +checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ "arrow-array", "arrow-buffer", @@ -358,9 +358,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c4e0530272ca755d6814218dffd04425c5b7854b87fa741d5ff848bf50aa39" +checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ "arrow-array", "arrow-buffer", @@ -371,9 +371,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b07f52788744cc71c4628567ad834cadbaeb9f09026ff1d7a4120f69edf7abd3" +checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -384,20 +384,20 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb63203e8e0e54b288d0d8043ca8fa1013820822a27692ef1b78a977d879f2c" +checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c96d8a1c180b44ecf2e66c9a2f2bbcb8b1b6f14e165ce46ac8bde211a363411b" +checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash", "arrow-array", @@ -409,9 +409,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8ad6a81add9d3ea30bf8374ee8329992c7fd246ffd8b7e2f48a3cea5aa0cc9a" +checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ "arrow-array", "arrow-buffer", @@ -459,9 +459,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.37" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40" +checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f" dependencies = [ "compression-codecs", "compression-core", @@ -524,7 +524,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -559,7 +559,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -594,9 +594,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.13" +version = "1.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c456581cb3c77fafcc8c67204a70680d40b61112d6da78c77bd31d945b65f1b5" +checksum = "8a8fc176d53d6fe85017f230405e3255cedb4a02221cb55ed6d76dccbbb099b2" dependencies = [ "aws-credential-types", "aws-runtime", @@ -624,9 +624,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.11" +version = "1.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3" +checksum = "e26bbf46abc608f2dc61fd6cb3b7b0665497cc259a21520151ed98f8b37d2c79" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -646,9 +646,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.37.0" +version = "0.37.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c34dda4df7017c8db52132f0f8a2e0f8161649d15723ed63fc00c82d0f2081a" +checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549" dependencies = [ "cc", "cmake", @@ -658,9 +658,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c635c2dc792cb4a11ce1a4f392a925340d1bdf499289b5ec1ec6810954eb43f5" +checksum = "b0f92058d22a46adf53ec57a6a96f34447daf02bff52e8fb956c66bcd5c6ac12" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -672,6 +672,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", + "bytes-utils", "fastrand 2.3.0", "http 0.2.12", "http 1.4.0", @@ -685,9 +686,9 @@ dependencies = [ [[package]] name = "aws-sdk-dynamodb" -version = "1.104.0" +version = "1.105.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f04c47115cc8d46dcc94a9a81e7a3384cea859283c1a737729691d4221f11584" +checksum = "82d2214c2ad3a175d3ece5a5af26916c29caa3e12e9e05b3cb8ed5e837b54b67" dependencies = [ "aws-credential-types", "aws-runtime", @@ -709,9 +710,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.122.0" +version = "1.123.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94c2ca0cba97e8e279eb6c0b2d0aa10db5959000e602ab2b7c02de6b85d4c19b" +checksum = "c018f22146966fdd493a664f62ee2483dff256b42a08c125ab6a084bde7b77fe" dependencies = [ "aws-credential-types", "aws-runtime", @@ -744,9 +745,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.93.0" +version = "1.94.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcb38bb33fc0a11f1ffc3e3e85669e0a11a37690b86f77e75306d8f369146a0" +checksum = "699da1961a289b23842d88fe2984c6ff68735fdf9bdcbc69ceaeb2491c9bf434" dependencies = [ "aws-credential-types", "aws-runtime", @@ -768,9 +769,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.95.0" +version = "1.96.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ada8ffbea7bd1be1f53df1dadb0f8fdb04badb13185b3321b929d1ee3caad09" +checksum = "e3e3a4cb3b124833eafea9afd1a6cc5f8ddf3efefffc6651ef76a03cbc6b4981" dependencies = [ "aws-credential-types", "aws-runtime", @@ -792,9 +793,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.97.0" +version = "1.98.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6443ccadc777095d5ed13e21f5c364878c9f5bad4e35187a6cdbd863b0afcad" +checksum = "89c4f19655ab0856375e169865c91264de965bd74c407c7f1e403184b1049409" dependencies = [ "aws-credential-types", "aws-runtime", @@ -817,9 +818,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.8" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efa49f3c607b92daae0c078d48a4571f599f966dce3caee5f1ea55c4d9073f99" +checksum = "68f6ae9b71597dc5fd115d52849d7a5556ad9265885ad3492ea8d73b93bbc46e" dependencies = [ "aws-credential-types", "aws-smithy-eventstream", @@ -845,9 +846,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.11" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52eec3db979d18cb807fc1070961cc51d87d069abe9ab57917769687368a8c6c" +checksum = "5cc50d0f63e714784b84223abd7abbc8577de8c35d699e0edd19f0a88a08ae13" dependencies = [ "futures-util", "pin-project-lite", @@ -856,9 +857,9 @@ dependencies = [ [[package]] name = "aws-smithy-checksums" -version = "0.64.3" +version = "0.64.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddcf418858f9f3edd228acb8759d77394fed7531cce78d02bdda499025368439" +checksum = "180dddf5ef0f52a2f99e2fada10e16ea610e507ef6148a42bdc4d5867596aa00" dependencies = [ "aws-smithy-http", "aws-smithy-types", @@ -877,9 +878,9 @@ dependencies = [ [[package]] name = "aws-smithy-eventstream" -version = "0.60.18" +version = "0.60.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35b9c7354a3b13c66f60fe4616d6d1969c9fd36b1b5333a5dfb3ee716b33c588" +checksum = "1c0b3e587fbaa5d7f7e870544508af8ce82ea47cd30376e69e1e37c4ac746f79" dependencies = [ "aws-smithy-types", "bytes", @@ -888,9 +889,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.63.3" +version = "0.63.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630e67f2a31094ffa51b210ae030855cb8f3b7ee1329bdd8d085aaf61e8b97fc" +checksum = "d619373d490ad70966994801bc126846afaa0d1ee920697a031f0cf63f2568e7" dependencies = [ "aws-smithy-eventstream", "aws-smithy-runtime-api", @@ -910,9 +911,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.9" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12fb0abf49ff0cab20fd31ac1215ed7ce0ea92286ba09e2854b42ba5cabe7525" +checksum = "00ccbb08c10f6bcf912f398188e42ee2eab5f1767ce215a02a73bc5df1bbdd95" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -940,27 +941,27 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.62.3" +version = "0.62.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb96aa208d62ee94104645f7b2ecaf77bf27edf161590b6224bfbac2832f979" +checksum = "27b3a779093e18cad88bbae08dc4261e1d95018c4c5b9356a52bcae7c0b6e9bb" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-observability" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0a46543fbc94621080b3cf553eb4cbbdc41dd9780a30c4756400f0139440a1d" +checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b" dependencies = [ "aws-smithy-runtime-api", ] [[package]] name = "aws-smithy-query" -version = "0.60.13" +version = "0.60.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cebbddb6f3a5bd81553643e9c7daf3cc3dc5b0b5f398ac668630e8a84e6fff0" +checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0" dependencies = [ "aws-smithy-types", "urlencoding", @@ -968,9 +969,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.10.0" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3df87c14f0127a0d77eb261c3bc45d5b4833e2a1f63583ebfb728e4852134ee" +checksum = "22ccf7f6eba8b2dcf8ce9b74806c6c185659c311665c4bf8d6e71ebd454db6bf" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -993,9 +994,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.11.3" +version = "1.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49952c52f7eebb72ce2a754d3866cc0f87b97d2a46146b79f80f3a93fb2b3716" +checksum = "b4af6e5def28be846479bbeac55aa4603d6f7986fc5da4601ba324dd5d377516" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -1010,9 +1011,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.4.3" +version = "1.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3a26048eeab0ddeba4b4f9d51654c79af8c3b32357dc5f336cee85ab331c33" +checksum = "8ca2734c16913a45343b37313605d84e7d8b34a4611598ce1d25b35860a2bed3" dependencies = [ "base64-simd", "bytes", @@ -1036,18 +1037,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.13" +version = "0.60.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" +checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.11" +version = "1.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164" +checksum = "6c50f3cdf47caa8d01f2be4a6663ea02418e892f9bbfd82c7b9a3a37eaccdd3a" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1335,9 +1336,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "bitpacking" @@ -1416,9 +1417,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.8.2" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234655ec178edd82b891e262ea7cf71f6584bcd09eff94db786be23f1821825c" +checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" dependencies = [ "bon-macros", "rustversion", @@ -1426,9 +1427,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.8.2" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ec27229c38ed0eb3c0feee3d2c1d6a4379ae44f418a29a658890e062d8f365" +checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" dependencies = [ "darling 0.23.0", "ident_case", @@ -1436,7 +1437,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -1511,9 +1512,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.55" +version = "1.2.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" dependencies = [ "find-msvc-tools", "jobserver", @@ -1611,9 +1612,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.57" +version = "4.5.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" +checksum = "c5caf74d17c3aec5495110c34cc3f78644bfa89af6c8993ed4de2790e49b6499" dependencies = [ "clap_builder", "clap_derive", @@ -1621,9 +1622,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.57" +version = "4.5.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" +checksum = "370daa45065b80218950227371916a1633217ae42b2715b2287b606dcd618e24" dependencies = [ "anstream", "anstyle", @@ -1640,14 +1641,14 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] name = "clap_lex" -version = "0.7.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "cmake" @@ -2054,7 +2055,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2067,7 +2068,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2089,7 +2090,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2100,7 +2101,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core 0.23.0", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2586,7 +2587,7 @@ checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2847,9 +2848,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" dependencies = [ "powerfmt", "serde_core", @@ -2894,7 +2895,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2914,7 +2915,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core 0.20.2", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2985,7 +2986,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -3159,9 +3160,9 @@ dependencies = [ [[package]] name = "env_filter" -version = "0.1.4" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f" dependencies = [ "log", "regex", @@ -3169,9 +3170,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" dependencies = [ "anstream", "anstyle", @@ -3197,7 +3198,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -3336,7 +3337,7 @@ version = "25.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "rustc_version", ] @@ -3450,9 +3451,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -3465,9 +3466,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -3475,15 +3476,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -3492,9 +3493,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-lite" @@ -3526,26 +3527,26 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-timer" @@ -3555,9 +3556,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -3567,7 +3568,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -3701,9 +3701,9 @@ dependencies = [ [[package]] name = "geographiclib-rs" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" +checksum = "bc8f647bd562db28a15e0dce4a77d89e3a78f6f85943e782418ebdbb420ea3c4" dependencies = [ "libm", ] @@ -3756,6 +3756,19 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + [[package]] name = "gimli" version = "0.32.3" @@ -4349,6 +4362,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "ident_case" version = "1.0.1" @@ -4399,7 +4418,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", "zstd", ] @@ -4421,6 +4440,8 @@ checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", + "serde", + "serde_core", ] [[package]] @@ -4594,9 +4615,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89a5b5e10d5a9ad6e5d1f4bd58225f655d6fe9767575a5e8ac5a6fe64e04495" +checksum = "c867c356cc096b33f4981825ab281ecba3db0acefe60329f044c1789d94c6543" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -4609,13 +4630,13 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff7a39c8862fc1369215ccf0a8f12dd4598c7f6484704359f0351bd617034dbf" +checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5332,7 +5353,7 @@ version = "3.0.0-beta.2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5389,6 +5410,12 @@ dependencies = [ "spin 0.9.8", ] +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "levenshtein_automata" version = "0.2.1" @@ -5454,9 +5481,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.180" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "libflate" @@ -5494,9 +5521,9 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "libc", - "redox_syscall 0.7.0", + "redox_syscall 0.7.1", ] [[package]] @@ -5820,15 +5847,15 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", ] @@ -5905,7 +5932,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5947,7 +5974,7 @@ checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5964,17 +5991,17 @@ checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" [[package]] name = "native-tls" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +checksum = "9d5d26952a508f321b4d3d2e80e78fc2603eaefcdf0c30783867f19586518bdc" dependencies = [ "libc", "log", "openssl", - "openssl-probe 0.1.6", + "openssl-probe", "openssl-sys", "schannel", - "security-framework 2.11.1", + "security-framework", "security-framework-sys", "tempfile", ] @@ -6143,7 +6170,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -6267,7 +6294,7 @@ version = "6.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "libc", "once_cell", "onig_sys", @@ -6325,7 +6352,7 @@ version = "0.10.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "foreign-types", "libc", @@ -6342,15 +6369,9 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] -[[package]] -name = "openssl-probe" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" - [[package]] name = "openssl-probe" version = "0.2.1" @@ -6460,9 +6481,9 @@ dependencies = [ [[package]] name = "parquet" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash", "arrow-array", @@ -6677,7 +6698,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -6863,9 +6884,9 @@ dependencies = [ [[package]] name = "predicates" -version = "3.1.3" +version = "3.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" +checksum = "ada8f2932f28a27ee7b70dd6c1c39ea0675c55a36879ab92f3a715eaa1e63cfe" dependencies = [ "anstyle", "predicates-core", @@ -6873,15 +6894,15 @@ dependencies = [ [[package]] name = "predicates-core" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa" +checksum = "cad38746f3166b4031b1a0d39ad9f954dd291e7854fcc0eed52ee41a0b50d144" [[package]] name = "predicates-tree" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c" +checksum = "d0de1b847b39c8131db0467e9df1ff60e6d0562ab8e9a16e568ad0fdb372e2f2" dependencies = [ "predicates-core", "termtree", @@ -6904,7 +6925,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -6957,7 +6978,7 @@ checksum = "37566cb3fdacef14c0737f9546df7cfeadbfbc9fef10991038bf5015d0c80532" dependencies = [ "bit-set", "bit-vec", - "bitflags 2.10.0", + "bitflags 2.11.0", "num-traits", "rand 0.9.2", "rand_chacha 0.9.0", @@ -6993,7 +7014,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.114", + "syn 2.0.116", "tempfile", ] @@ -7007,7 +7028,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -7349,16 +7370,16 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] name = "redox_syscall" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" +checksum = "35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] @@ -7640,7 +7661,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.114", + "syn 2.0.116", "unicode-ident", ] @@ -7658,7 +7679,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.114", + "syn 2.0.116", "unicode-ident", ] @@ -7709,7 +7730,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys 0.4.15", @@ -7722,7 +7743,7 @@ version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys 0.11.0", @@ -7763,10 +7784,10 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ - "openssl-probe 0.2.1", + "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.5.1", + "security-framework", ] [[package]] @@ -7830,9 +7851,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "salsa20" @@ -7882,7 +7903,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -7934,24 +7955,11 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.11.1" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" dependencies = [ - "bitflags 2.10.0", - "core-foundation 0.9.4", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework" -version = "3.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" -dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -7960,9 +7968,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.15.0" +version = "2.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +checksum = "321c8673b092a9a42605034a9879d73cb79101ed5fd117bc9a597b89b4e9e61a" dependencies = [ "core-foundation-sys", "libc", @@ -8011,7 +8019,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8022,7 +8030,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8068,7 +8076,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8080,7 +8088,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8198,9 +8206,9 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "simple_asn1" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" +checksum = "0d585997b0ac10be3c5ee635f1bab02d512760d14b7c468801ac8a01d9ae5f1d" dependencies = [ "num-bigint", "num-traits", @@ -8253,7 +8261,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8370,7 +8378,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8437,7 +8445,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8449,7 +8457,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8472,7 +8480,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.114", + "syn 2.0.116", "typify", "walkdir", ] @@ -8519,9 +8527,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.114" +version = "2.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" dependencies = [ "proc-macro2", "quote", @@ -8545,7 +8553,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8554,7 +8562,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -8740,12 +8748,12 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.24.0" +version = "3.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand 2.3.0", - "getrandom 0.3.4", + "getrandom 0.4.1", "once_cell", "rustix 1.1.3", "windows-sys 0.61.2", @@ -8776,7 +8784,7 @@ checksum = "be35209fd0781c5401458ab66e4f98accf63553e8fae7425503e92fdd319783b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8805,7 +8813,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8816,7 +8824,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -8984,7 +8992,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -9064,9 +9072,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.0.9+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" dependencies = [ "winnow", ] @@ -9093,7 +9101,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "bytes", "http 1.4.0", "http-body 1.0.1", @@ -9111,7 +9119,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "async-compression", - "bitflags 2.10.0", + "bitflags 2.11.0", "bytes", "futures-core", "futures-util", @@ -9159,7 +9167,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -9268,7 +9276,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.114", + "syn 2.0.116", "thiserror 2.0.18", "unicode-ident", ] @@ -9286,7 +9294,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.114", + "syn 2.0.116", "typify-impl", ] @@ -9319,9 +9327,9 @@ checksum = "6b12e05d9e06373163a9bb6bb8c263c261b396643a99445fe6b9811fd376581b" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-normalization" @@ -9353,6 +9361,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unicode_categories" version = "0.1.1" @@ -9436,11 +9450,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.20.0" +version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" +checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.1", "js-sys", "serde_core", "wasm-bindgen", @@ -9531,6 +9545,15 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + [[package]] name = "wasm-bindgen" version = "0.2.108" @@ -9577,7 +9600,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", "wasm-bindgen-shared", ] @@ -9590,6 +9613,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + [[package]] name = "wasm-streams" version = "0.4.2" @@ -9603,6 +9648,18 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.0", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "web-sys" version = "0.3.85" @@ -9693,7 +9750,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -9704,7 +9761,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -10010,6 +10067,88 @@ name = "wit-bindgen" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.116", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.116", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.0", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "wkb" @@ -10104,28 +10243,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.38" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57cf3aa6855b23711ee9852dfc97dfaa51c45feaba5b645d0c777414d494a961" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.38" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a616990af1a287837c4fe6596ad77ef57948f787e46ce28e166facc0cc1cb75" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -10145,7 +10284,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", "synstructure", ] @@ -10185,20 +10324,20 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] name = "zlib-rs" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7948af682ccbc3342b6e9420e8c51c1fe5d7bf7756002b4a3c6cabfe96a7e3c" +checksum = "3a33bbf307b25a1774cee0687694ec72fa7814b3ab5c1c12a9d2fc6a36fc439c" [[package]] name = "zmij" -version = "1.0.19" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" [[package]] name = "zstd" diff --git a/rust/lance-file/src/v2/reader.rs b/rust/lance-file/src/v2/reader.rs new file mode 100644 index 00000000000..e69de29bb2d From 74137f81204dbfb01570dac3462bf06185a2308b Mon Sep 17 00:00:00 2001 From: Rahil Chertara Date: Mon, 10 Nov 2025 09:05:17 -0800 Subject: [PATCH 2/4] trying out alternative idea --- java/lance-jni/src/file_reader.rs | 32 +++++-- .../java/org/lance/FileReaderWriterTest.java | 85 +++++++++++++++++++ 2 files changed, 111 insertions(+), 6 deletions(-) diff --git a/java/lance-jni/src/file_reader.rs b/java/lance-jni/src/file_reader.rs index ccaac121579..a3641853c89 100644 --- a/java/lance-jni/src/file_reader.rs +++ b/java/lance-jni/src/file_reader.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors +use std::collections::BTreeMap; use std::ops::Range; use std::sync::{Arc, Mutex}; @@ -20,7 +21,7 @@ use jni::{ }; use lance::io::ObjectStore; use lance_core::cache::LanceCache; -use lance_core::datatypes::Schema; +use lance_core::datatypes::{OnMissing, Projection, Schema}; use lance_encoding::decoder::{DecoderPlugins, FilterExpression}; use lance_file::reader::{FileReader, FileReaderOptions, ReaderProjection}; use lance_io::object_store::{ObjectStoreParams, ObjectStoreRegistry}; @@ -239,15 +240,34 @@ pub extern "system" fn Java_org_lance_file_LanceFileReader_readAllNative( }; let file_version = reader.inner.metadata().version(); + let base_schema = Schema::try_from(reader.schema()?.as_ref())?; if !projected_names.is_null() { - let schema = Schema::try_from(reader.schema()?.as_ref())?; let column_names: Vec = env.get_strings(&projected_names)?; - let names: Vec<&str> = column_names.iter().map(|s| s.as_str()).collect(); - reader_projection = Some(ReaderProjection::from_column_names( + + // Build field_id_to_column_index mapping from base schema (file layout) + let field_id_to_column_index = base_schema + .fields_pre_order() + .filter(|field| { + file_version < LanceFileVersion::V2_1 + || field.is_leaf() + || field.is_packed_struct() + }) + .enumerate() + .map(|(idx, field)| (field.id as u32, idx as u32)) + .collect::>(); + + // Use Projection to get transformed schema (with blob fields as descriptors) + let projection = Projection::empty(Arc::new(base_schema.clone())) + .union_columns(&column_names, OnMissing::Error)?; + let transformed_schema = projection.to_bare_schema(); + + // Use from_field_ids with transformed schema + // This tells the decoder to expect Struct types for blob fields + reader_projection = Some(ReaderProjection::from_field_ids( file_version, - &schema, - names.as_slice(), + &transformed_schema, + &field_id_to_column_index, )?); } diff --git a/java/src/test/java/org/lance/FileReaderWriterTest.java b/java/src/test/java/org/lance/FileReaderWriterTest.java index c645acdcaa2..690f6a4988c 100644 --- a/java/src/test/java/org/lance/FileReaderWriterTest.java +++ b/java/src/test/java/org/lance/FileReaderWriterTest.java @@ -20,11 +20,13 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.ipc.ArrowReader; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.Assertions; @@ -304,4 +306,87 @@ void testWriteNullSchemaMetadata(@TempDir Path tempDir) throws Exception { } } } + + @Test + void testBlobEncodingReturnsDescriptors(@TempDir Path tempDir) throws Exception { + String filePath = tempDir.resolve("test_blob.lance").toString(); + BufferAllocator allocator = new RootAllocator(); + + // Step 1: Write blob-encoded data + Map blobMetadata = new HashMap<>(); + blobMetadata.put("lance-encoding:blob", "true"); + + Field blobField = + new Field( + "blob_data", + new FieldType(true, ArrowType.LargeBinary.INSTANCE, null, blobMetadata), + Collections.emptyList()); + + Schema schema = new Schema(Collections.singletonList(blobField), null); + + try (LanceFileWriter writer = + LanceFileWriter.open(filePath, allocator, null, Collections.emptyMap())) { + try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + root.allocateNew(); + + LargeVarBinaryVector blobVector = (LargeVarBinaryVector) root.getVector("blob_data"); + + // Write 5 blobs + for (int i = 0; i < 5; i++) { + byte[] data = new byte[100 * (i + 1)]; // Different sizes + Arrays.fill(data, (byte) i); + blobVector.setSafe(i, data); + } + + root.setRowCount(5); + writer.write(root); + } + } + + // Step 2: Read back and verify + try (LanceFileReader reader = LanceFileReader.open(filePath, allocator)) { + // Check schema + Schema readSchema = reader.schema(); + Field readField = readSchema.getFields().get(0); + + // Check if blob metadata is preserved + assertTrue( + readField.getMetadata().containsKey("lance-encoding:blob"), + "Blob metadata should be preserved in schema"); + + // Read batch - must pass column names to trigger schema usage for blob encoding + try (ArrowReader batch = reader.readAll(Collections.singletonList("blob_data"), null, 10)) { + batch.loadNextBatch(); // Actually load the data + VectorSchemaRoot root = batch.getVectorSchemaRoot(); + + // Get the blob column + org.apache.arrow.vector.FieldVector column = root.getVector("blob_data"); + // Check if it's a struct with position and size (means the blob encoding happened) + if (column.getField().getType() instanceof ArrowType.Struct) { + // The struct should have 'position' and 'size' fields + assertEquals( + 2, + column.getField().getChildren().size(), + "Struct should have 2 fields (position and size)"); + + } else if (column.getField().getType() instanceof ArrowType.LargeBinary) { + // This is what currently happens - Java materializes + LargeVarBinaryVector binaryVector = (LargeVarBinaryVector) column; + + for (int i = 0; i < Math.min(5, root.getRowCount()); i++) { + byte[] data = binaryVector.get(i); + System.out.println("Row " + i + ": " + data.length + " bytes"); + } + // Fail the test to demonstrate the issue + fail( + "Java LanceFileReader materializes blobs instead of returning descriptors. " + + "Expected struct but got " + + column.getField().getType()); + } else { + fail("Unexpected type: " + column.getField().getType()); + } + } + } + allocator.close(); + } } From 58a4303eb09ecd94810f0c4d98ec8f6666e35e24 Mon Sep 17 00:00:00 2001 From: Rahil Chertara Date: Tue, 17 Feb 2026 09:52:29 -0500 Subject: [PATCH 3/4] address weston and tim feedback --- java/lance-jni/Cargo.lock | 2 + java/lance-jni/src/file_reader.rs | 32 ++++--- .../java/org/lance/file/BlobReadMode.java | 41 +++++++++ .../java/org/lance/file/LanceFileReader.java | 34 ++++++- .../java/org/lance/FileReaderWriterTest.java | 89 ++++++++++--------- rust/lance-file/src/v2/reader.rs | 0 6 files changed, 143 insertions(+), 55 deletions(-) create mode 100644 java/src/main/java/org/lance/file/BlobReadMode.java delete mode 100644 rust/lance-file/src/v2/reader.rs diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock index 3193de8daa4..feaf19e96a6 100644 --- a/java/lance-jni/Cargo.lock +++ b/java/lance-jni/Cargo.lock @@ -3494,6 +3494,7 @@ dependencies = [ "log", "pin-project", "prost", + "prost-build", "snafu", "tokio", "tracing", @@ -3702,6 +3703,7 @@ dependencies = [ "serde", "shellexpand", "snafu", + "tempfile", "tokio", "tracing", "url", diff --git a/java/lance-jni/src/file_reader.rs b/java/lance-jni/src/file_reader.rs index a3641853c89..a79b933e2ed 100644 --- a/java/lance-jni/src/file_reader.rs +++ b/java/lance-jni/src/file_reader.rs @@ -21,8 +21,9 @@ use jni::{ }; use lance::io::ObjectStore; use lance_core::cache::LanceCache; -use lance_core::datatypes::{OnMissing, Projection, Schema}; +use lance_core::datatypes::{BlobHandling, OnMissing, Projection, Schema}; use lance_encoding::decoder::{DecoderPlugins, FilterExpression}; +use lance_encoding::version::LanceFileVersion; use lance_file::reader::{FileReader, FileReaderOptions, ReaderProjection}; use lance_io::object_store::{ObjectStoreParams, ObjectStoreRegistry}; use lance_io::{ @@ -219,6 +220,7 @@ pub extern "system" fn Java_org_lance_file_LanceFileReader_readAllNative( projected_names: JObject, selection_ranges: JObject, stream_addr: jlong, + blob_read_mode: jint, ) { let result = (|| -> Result<()> { let mut read_parameter = ReadBatchParams::default(); @@ -242,10 +244,25 @@ pub extern "system" fn Java_org_lance_file_LanceFileReader_readAllNative( let file_version = reader.inner.metadata().version(); let base_schema = Schema::try_from(reader.schema()?.as_ref())?; - if !projected_names.is_null() { - let column_names: Vec = env.get_strings(&projected_names)?; + let blob_handling = if blob_read_mode == 1 { + BlobHandling::BlobsDescriptions + } else { + BlobHandling::AllBinary + }; + + { + let mut projection = + Projection::empty(Arc::new(base_schema.clone())).with_blob_handling(blob_handling); + + if !projected_names.is_null() { + let column_names: Vec = env.get_strings(&projected_names)?; + projection = projection.union_columns(&column_names, OnMissing::Error)?; + } else { + projection = projection.union_predicate(|_| true); + } + + let transformed_schema = projection.to_bare_schema(); - // Build field_id_to_column_index mapping from base schema (file layout) let field_id_to_column_index = base_schema .fields_pre_order() .filter(|field| { @@ -257,13 +274,6 @@ pub extern "system" fn Java_org_lance_file_LanceFileReader_readAllNative( .map(|(idx, field)| (field.id as u32, idx as u32)) .collect::>(); - // Use Projection to get transformed schema (with blob fields as descriptors) - let projection = Projection::empty(Arc::new(base_schema.clone())) - .union_columns(&column_names, OnMissing::Error)?; - let transformed_schema = projection.to_bare_schema(); - - // Use from_field_ids with transformed schema - // This tells the decoder to expect Struct types for blob fields reader_projection = Some(ReaderProjection::from_field_ids( file_version, &transformed_schema, diff --git a/java/src/main/java/org/lance/file/BlobReadMode.java b/java/src/main/java/org/lance/file/BlobReadMode.java new file mode 100644 index 00000000000..d7be0381fbf --- /dev/null +++ b/java/src/main/java/org/lance/file/BlobReadMode.java @@ -0,0 +1,41 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.file; + +/** + * Controls how blob-encoded columns are returned when reading a Lance file. + * + *

Blob columns can be read in two modes: + * + *

    + *
  • {@link #CONTENT} — materializes the full binary content (default) + *
  • {@link #DESCRIPTOR} — returns a struct with {@code position} and {@code size} fields + *
+ */ +public enum BlobReadMode { + /** Return blob columns as materialized binary content (default). */ + CONTENT(0), + /** Return blob columns as descriptors (struct with position and size). */ + DESCRIPTOR(1); + + private final int value; + + BlobReadMode(int value) { + this.value = value; + } + + public int getValue() { + return value; + } +} diff --git a/java/src/main/java/org/lance/file/LanceFileReader.java b/java/src/main/java/org/lance/file/LanceFileReader.java index 9777e529f48..2551d4db4cc 100644 --- a/java/src/main/java/org/lance/file/LanceFileReader.java +++ b/java/src/main/java/org/lance/file/LanceFileReader.java @@ -54,7 +54,8 @@ private native void readAllNative( int batchSize, @Nullable List projectedNames, @Nullable List ranges, - long streamMemoryAddress) + long streamMemoryAddress, + int blobReadMode) throws IOException; private LanceFileReader() {} @@ -126,16 +127,43 @@ private Schema load_schema() throws IOException { /** * Read all rows from the Lance file * - * @param batchSize the maximum number of rows to read in a single batch + *

Blob-encoded columns are returned as materialized binary content. + * * @param projectedNames optional list of column names to project; if null, all columns are read * @param ranges optional array of ranges to read; if null, all rows are read. + * @param batchSize the maximum number of rows to read in a single batch * @return an ArrowReader for the Lance file */ public ArrowReader readAll( @Nullable List projectedNames, @Nullable List ranges, int batchSize) throws IOException { + return readAll(projectedNames, ranges, batchSize, BlobReadMode.CONTENT); + } + + /** + * Read all rows from the Lance file with control over blob column output format. + * + * @param projectedNames optional list of column names to project; if null, all columns are read + * @param ranges optional array of ranges to read; if null, all rows are read. + * @param batchSize the maximum number of rows to read in a single batch + * @param blobReadMode how to return blob-encoded columns: as materialized binary content ({@link + * BlobReadMode#CONTENT}) or as descriptors with position and size ({@link + * BlobReadMode#DESCRIPTOR}) + * @return an ArrowReader for the Lance file + */ + public ArrowReader readAll( + @Nullable List projectedNames, + @Nullable List ranges, + int batchSize, + BlobReadMode blobReadMode) + throws IOException { try (ArrowArrayStream ffiArrowArrayStream = ArrowArrayStream.allocateNew(allocator)) { - readAllNative(batchSize, projectedNames, ranges, ffiArrowArrayStream.memoryAddress()); + readAllNative( + batchSize, + projectedNames, + ranges, + ffiArrowArrayStream.memoryAddress(), + blobReadMode.getValue()); return Data.importArrayStream(allocator, ffiArrowArrayStream); } } diff --git a/java/src/test/java/org/lance/FileReaderWriterTest.java b/java/src/test/java/org/lance/FileReaderWriterTest.java index 690f6a4988c..a5196d4be5c 100644 --- a/java/src/test/java/org/lance/FileReaderWriterTest.java +++ b/java/src/test/java/org/lance/FileReaderWriterTest.java @@ -13,6 +13,7 @@ */ package org.lance; +import org.lance.file.BlobReadMode; import org.lance.file.LanceFileReader; import org.lance.file.LanceFileWriter; import org.lance.util.Range; @@ -307,12 +308,7 @@ void testWriteNullSchemaMetadata(@TempDir Path tempDir) throws Exception { } } - @Test - void testBlobEncodingReturnsDescriptors(@TempDir Path tempDir) throws Exception { - String filePath = tempDir.resolve("test_blob.lance").toString(); - BufferAllocator allocator = new RootAllocator(); - - // Step 1: Write blob-encoded data + private void writeBlobFile(String filePath, BufferAllocator allocator) throws Exception { Map blobMetadata = new HashMap<>(); blobMetadata.put("lance-encoding:blob", "true"); @@ -331,9 +327,8 @@ void testBlobEncodingReturnsDescriptors(@TempDir Path tempDir) throws Exception LargeVarBinaryVector blobVector = (LargeVarBinaryVector) root.getVector("blob_data"); - // Write 5 blobs for (int i = 0; i < 5; i++) { - byte[] data = new byte[100 * (i + 1)]; // Different sizes + byte[] data = new byte[100 * (i + 1)]; Arrays.fill(data, (byte) i); blobVector.setSafe(i, data); } @@ -342,48 +337,60 @@ void testBlobEncodingReturnsDescriptors(@TempDir Path tempDir) throws Exception writer.write(root); } } + } - // Step 2: Read back and verify - try (LanceFileReader reader = LanceFileReader.open(filePath, allocator)) { - // Check schema - Schema readSchema = reader.schema(); - Field readField = readSchema.getFields().get(0); + @Test + void testBlobDescriptorMode(@TempDir Path tempDir) throws Exception { + String filePath = tempDir.resolve("test_blob.lance").toString(); + BufferAllocator allocator = new RootAllocator(); + writeBlobFile(filePath, allocator); - // Check if blob metadata is preserved + try (LanceFileReader reader = LanceFileReader.open(filePath, allocator)) { assertTrue( - readField.getMetadata().containsKey("lance-encoding:blob"), + reader.schema().getFields().get(0).getMetadata().containsKey("lance-encoding:blob"), "Blob metadata should be preserved in schema"); - // Read batch - must pass column names to trigger schema usage for blob encoding + try (ArrowReader batch = + reader.readAll( + Collections.singletonList("blob_data"), null, 10, BlobReadMode.DESCRIPTOR)) { + assertTrue(batch.loadNextBatch()); + VectorSchemaRoot root = batch.getVectorSchemaRoot(); + assertEquals(5, root.getRowCount()); + + org.apache.arrow.vector.FieldVector column = root.getVector("blob_data"); + assertTrue( + column.getField().getType() instanceof ArrowType.Struct, + "DESCRIPTOR mode should return Struct but got " + column.getField().getType()); + assertEquals( + 2, + column.getField().getChildren().size(), + "Struct should have 2 fields (position and size)"); + } + } + allocator.close(); + } + + @Test + void testBlobContentMode(@TempDir Path tempDir) throws Exception { + String filePath = tempDir.resolve("test_blob.lance").toString(); + BufferAllocator allocator = new RootAllocator(); + writeBlobFile(filePath, allocator); + + try (LanceFileReader reader = LanceFileReader.open(filePath, allocator)) { + // Default readAll (no BlobReadMode) should return materialized binary try (ArrowReader batch = reader.readAll(Collections.singletonList("blob_data"), null, 10)) { - batch.loadNextBatch(); // Actually load the data + assertTrue(batch.loadNextBatch()); VectorSchemaRoot root = batch.getVectorSchemaRoot(); + assertEquals(5, root.getRowCount()); - // Get the blob column org.apache.arrow.vector.FieldVector column = root.getVector("blob_data"); - // Check if it's a struct with position and size (means the blob encoding happened) - if (column.getField().getType() instanceof ArrowType.Struct) { - // The struct should have 'position' and 'size' fields - assertEquals( - 2, - column.getField().getChildren().size(), - "Struct should have 2 fields (position and size)"); - - } else if (column.getField().getType() instanceof ArrowType.LargeBinary) { - // This is what currently happens - Java materializes - LargeVarBinaryVector binaryVector = (LargeVarBinaryVector) column; - - for (int i = 0; i < Math.min(5, root.getRowCount()); i++) { - byte[] data = binaryVector.get(i); - System.out.println("Row " + i + ": " + data.length + " bytes"); - } - // Fail the test to demonstrate the issue - fail( - "Java LanceFileReader materializes blobs instead of returning descriptors. " - + "Expected struct but got " - + column.getField().getType()); - } else { - fail("Unexpected type: " + column.getField().getType()); + assertTrue( + column.getField().getType() instanceof ArrowType.LargeBinary, + "CONTENT mode should return LargeBinary but got " + column.getField().getType()); + + LargeVarBinaryVector binaryVector = (LargeVarBinaryVector) column; + for (int i = 0; i < 5; i++) { + assertEquals(100 * (i + 1), binaryVector.get(i).length); } } } diff --git a/rust/lance-file/src/v2/reader.rs b/rust/lance-file/src/v2/reader.rs deleted file mode 100644 index e69de29bb2d..00000000000 From 899b5f18ee1920872699d507d7bfaef94bcf3f4f Mon Sep 17 00:00:00 2001 From: Rahil Chertara Date: Thu, 19 Feb 2026 15:54:50 -0500 Subject: [PATCH 4/4] fix ci check error, introduce a fileReadOptions for extensibility, minor test fixes --- java/lance-jni/src/file_reader.rs | 9 ++- .../java/org/lance/file/FileReadOptions.java | 59 +++++++++++++++++++ .../java/org/lance/file/LanceFileReader.java | 18 +++--- .../java/org/lance/FileReaderWriterTest.java | 11 ++-- 4 files changed, 79 insertions(+), 18 deletions(-) create mode 100644 java/src/main/java/org/lance/file/FileReadOptions.java diff --git a/java/lance-jni/src/file_reader.rs b/java/lance-jni/src/file_reader.rs index a79b933e2ed..85da803295f 100644 --- a/java/lance-jni/src/file_reader.rs +++ b/java/lance-jni/src/file_reader.rs @@ -224,7 +224,6 @@ pub extern "system" fn Java_org_lance_file_LanceFileReader_readAllNative( ) { let result = (|| -> Result<()> { let mut read_parameter = ReadBatchParams::default(); - let mut reader_projection: Option = None; // We get reader here not from env.get_rust_field, because we need reader: MutexGuard has no relationship with the env lifecycle. // If we get reader from env.get_rust_field, we can't use env (can't borrow again) until we drop the reader. #[allow(unused_variables)] @@ -250,7 +249,7 @@ pub extern "system" fn Java_org_lance_file_LanceFileReader_readAllNative( BlobHandling::AllBinary }; - { + let reader_projection = { let mut projection = Projection::empty(Arc::new(base_schema.clone())).with_blob_handling(blob_handling); @@ -274,12 +273,12 @@ pub extern "system" fn Java_org_lance_file_LanceFileReader_readAllNative( .map(|(idx, field)| (field.id as u32, idx as u32)) .collect::>(); - reader_projection = Some(ReaderProjection::from_field_ids( + Some(ReaderProjection::from_field_ids( file_version, &transformed_schema, &field_id_to_column_index, - )?); - } + )?) + }; if !selection_ranges.is_null() { let mut ranges: Vec> = Vec::new(); diff --git a/java/src/main/java/org/lance/file/FileReadOptions.java b/java/src/main/java/org/lance/file/FileReadOptions.java new file mode 100644 index 00000000000..3d813c78eec --- /dev/null +++ b/java/src/main/java/org/lance/file/FileReadOptions.java @@ -0,0 +1,59 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.file; + +/** + * Options for reading a Lance file. + * + *

Use {@link #builder()} to create an instance. New options can be added here in the future + * without breaking existing callers. + */ +public class FileReadOptions { + private final BlobReadMode blobReadMode; + + private FileReadOptions(Builder builder) { + this.blobReadMode = builder.blobReadMode; + } + + /** Returns the blob read mode. Defaults to {@link BlobReadMode#CONTENT}. */ + public BlobReadMode getBlobReadMode() { + return blobReadMode; + } + + /** Creates a new builder with default options. */ + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private BlobReadMode blobReadMode = BlobReadMode.CONTENT; + + private Builder() {} + + /** + * Sets how blob-encoded columns are returned. + * + * @param blobReadMode {@link BlobReadMode#CONTENT} to materialize binary content, or {@link + * BlobReadMode#DESCRIPTOR} to return position/size descriptors + */ + public Builder blobReadMode(BlobReadMode blobReadMode) { + this.blobReadMode = blobReadMode; + return this; + } + + public FileReadOptions build() { + return new FileReadOptions(this); + } + } +} diff --git a/java/src/main/java/org/lance/file/LanceFileReader.java b/java/src/main/java/org/lance/file/LanceFileReader.java index 2551d4db4cc..e3962eb539a 100644 --- a/java/src/main/java/org/lance/file/LanceFileReader.java +++ b/java/src/main/java/org/lance/file/LanceFileReader.java @@ -125,9 +125,10 @@ private Schema load_schema() throws IOException { } /** - * Read all rows from the Lance file + * Read all rows from the Lance file. * - *

Blob-encoded columns are returned as materialized binary content. + *

Blob-encoded columns are returned as materialized binary content. Use {@link #readAll(List, + * List, int, FileReadOptions)} to control blob output format. * * @param projectedNames optional list of column names to project; if null, all columns are read * @param ranges optional array of ranges to read; if null, all rows are read. @@ -137,25 +138,24 @@ private Schema load_schema() throws IOException { public ArrowReader readAll( @Nullable List projectedNames, @Nullable List ranges, int batchSize) throws IOException { - return readAll(projectedNames, ranges, batchSize, BlobReadMode.CONTENT); + return readAll(projectedNames, ranges, batchSize, FileReadOptions.builder().build()); } /** - * Read all rows from the Lance file with control over blob column output format. + * Read all rows from the Lance file with additional read options. * * @param projectedNames optional list of column names to project; if null, all columns are read * @param ranges optional array of ranges to read; if null, all rows are read. * @param batchSize the maximum number of rows to read in a single batch - * @param blobReadMode how to return blob-encoded columns: as materialized binary content ({@link - * BlobReadMode#CONTENT}) or as descriptors with position and size ({@link - * BlobReadMode#DESCRIPTOR}) + * @param options file read options controlling output format (e.g. blob handling) * @return an ArrowReader for the Lance file + * @see FileReadOptions */ public ArrowReader readAll( @Nullable List projectedNames, @Nullable List ranges, int batchSize, - BlobReadMode blobReadMode) + FileReadOptions options) throws IOException { try (ArrowArrayStream ffiArrowArrayStream = ArrowArrayStream.allocateNew(allocator)) { readAllNative( @@ -163,7 +163,7 @@ public ArrowReader readAll( projectedNames, ranges, ffiArrowArrayStream.memoryAddress(), - blobReadMode.getValue()); + options.getBlobReadMode().getValue()); return Data.importArrayStream(allocator, ffiArrowArrayStream); } } diff --git a/java/src/test/java/org/lance/FileReaderWriterTest.java b/java/src/test/java/org/lance/FileReaderWriterTest.java index a5196d4be5c..a849a87c576 100644 --- a/java/src/test/java/org/lance/FileReaderWriterTest.java +++ b/java/src/test/java/org/lance/FileReaderWriterTest.java @@ -14,6 +14,7 @@ package org.lance; import org.lance.file.BlobReadMode; +import org.lance.file.FileReadOptions; import org.lance.file.LanceFileReader; import org.lance.file.LanceFileWriter; import org.lance.util.Range; @@ -21,6 +22,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; @@ -350,14 +352,15 @@ void testBlobDescriptorMode(@TempDir Path tempDir) throws Exception { reader.schema().getFields().get(0).getMetadata().containsKey("lance-encoding:blob"), "Blob metadata should be preserved in schema"); + FileReadOptions options = + FileReadOptions.builder().blobReadMode(BlobReadMode.DESCRIPTOR).build(); try (ArrowReader batch = - reader.readAll( - Collections.singletonList("blob_data"), null, 10, BlobReadMode.DESCRIPTOR)) { + reader.readAll(Collections.singletonList("blob_data"), null, 10, options)) { assertTrue(batch.loadNextBatch()); VectorSchemaRoot root = batch.getVectorSchemaRoot(); assertEquals(5, root.getRowCount()); - org.apache.arrow.vector.FieldVector column = root.getVector("blob_data"); + FieldVector column = root.getVector("blob_data"); assertTrue( column.getField().getType() instanceof ArrowType.Struct, "DESCRIPTOR mode should return Struct but got " + column.getField().getType()); @@ -383,7 +386,7 @@ void testBlobContentMode(@TempDir Path tempDir) throws Exception { VectorSchemaRoot root = batch.getVectorSchemaRoot(); assertEquals(5, root.getRowCount()); - org.apache.arrow.vector.FieldVector column = root.getVector("blob_data"); + FieldVector column = root.getVector("blob_data"); assertTrue( column.getField().getType() instanceof ArrowType.LargeBinary, "CONTENT mode should return LargeBinary but got " + column.getField().getType());