From 7205392cdf1651cb2d10f63ccbe4e545173e7f35 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Wed, 24 Dec 2025 08:34:17 -0800 Subject: [PATCH] feat: support credential vending in dir namespace --- Cargo.lock | 495 ++++++++- Cargo.toml | 1 + java/lance-jni/Cargo.lock | 946 ++++++++++++++++-- java/lance-jni/Cargo.toml | 9 +- .../lance/namespace/DirectoryNamespace.java | 52 + python/Cargo.lock | 693 ++++++++++++- python/Cargo.toml | 6 +- python/python/lance/namespace.py | 43 + rust/lance-namespace-impls/Cargo.toml | 20 + rust/lance-namespace-impls/src/credentials.rs | 717 +++++++++++++ .../src/credentials/aws.rs | 881 ++++++++++++++++ .../src/credentials/azure.rs | 335 +++++++ .../src/credentials/gcp.rs | 637 ++++++++++++ rust/lance-namespace-impls/src/dir.rs | 148 ++- rust/lance-namespace-impls/src/lib.rs | 59 ++ 15 files changed, 4915 insertions(+), 127 deletions(-) create mode 100644 rust/lance-namespace-impls/src/credentials.rs create mode 100644 rust/lance-namespace-impls/src/credentials/aws.rs create mode 100644 rust/lance-namespace-impls/src/credentials/azure.rs create mode 100644 rust/lance-namespace-impls/src/credentials/gcp.rs diff --git a/Cargo.lock b/Cargo.lock index 4c21a96967d..c55895ff204 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "RustyXML" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" + [[package]] name = "addr2line" version = "0.25.1" @@ -430,6 +436,17 @@ dependencies = [ "serde_json", ] +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener 2.5.3", + "futures-core", +] + [[package]] name = "async-channel" version = "2.5.0" @@ -459,17 +476,53 @@ dependencies = [ "zstd-safe", ] +[[package]] +name = "async-io" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc" +dependencies = [ + "autocfg", + "cfg-if", + "concurrent-queue", + "futures-io", + "futures-lite 2.6.1", + "parking", + "polling", + "rustix 1.1.3", + "slab", + "windows-sys 0.61.2", +] + [[package]] name = "async-lock" version = "3.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" dependencies = [ - "event-listener", + "event-listener 5.4.1", "event-listener-strategy", "pin-project-lite", ] +[[package]] +name = "async-process" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc50921ec0055cdd8a16de48773bfeec5c972598674347252c0399676be7da75" +dependencies = [ + "async-channel 2.5.0", + "async-io", + "async-lock", + "async-signal", + "async-task", + "blocking", + "cfg-if", + "event-listener 5.4.1", + "futures-lite 2.6.1", + "rustix 1.1.3", +] + [[package]] name = "async-recursion" version = "1.1.1" @@ -481,6 +534,30 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "async-signal" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43c070bbf59cd3570b6b2dd54cd772527c7c3620fce8be898406dd3ed6adc64c" +dependencies = [ + "async-io", + "async-lock", + "atomic-waker", + "cfg-if", + "futures-core", + "futures-io", + "rustix 1.1.3", + "signal-hook-registry", + "slab", + "windows-sys 0.61.2", +] + +[[package]] +name = "async-task" +version = "4.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" + [[package]] name = "async-trait" version = "0.1.89" @@ -541,7 +618,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "hex", "http 1.4.0", "ring", @@ -602,7 +679,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "http-body 0.4.6", "percent-encoding", @@ -627,7 +704,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -653,7 +730,7 @@ dependencies = [ "aws-smithy-xml", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "hex", "hmac", "http 0.2.12", @@ -683,7 +760,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -705,7 +782,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -728,7 +805,7 @@ dependencies = [ "aws-smithy-types", "aws-smithy-xml", "aws-types", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -897,7 +974,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -1029,13 +1106,120 @@ dependencies = [ "tracing", ] +[[package]] +name = "azure_core" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b552ad43a45a746461ec3d3a51dfb6466b4759209414b439c165eb6a6b7729e" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "dyn-clone", + "futures", + "getrandom 0.2.16", + "hmac", + "http-types", + "once_cell", + "paste", + "pin-project", + "quick-xml 0.31.0", + "rand 0.8.5", + "reqwest", + "rustc_version", + "serde", + "serde_json", + "sha2", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_identity" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ddd80344317c40c04b603807b63a5cefa532f1b43522e72f480a988141f744" +dependencies = [ + "async-lock", + "async-process", + "async-trait", + "azure_core", + "futures", + "oauth2", + "pin-project", + "serde", + "time", + "tracing", + "tz-rs", + "url", + "uuid", +] + +[[package]] +name = "azure_storage" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f838159f4d29cb400a14d9d757578ba495ae64feb07a7516bf9e4415127126" +dependencies = [ + "RustyXML", + "async-lock", + "async-trait", + "azure_core", + "bytes", + "serde", + "serde_derive", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_storage_blobs" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97e83c3636ae86d9a6a7962b2112e3b19eb3903915c50ce06ff54ff0a2e6a7e4" +dependencies = [ + "RustyXML", + "azure_core", + "azure_storage", + "azure_svc_blobstorage", + "bytes", + "futures", + "serde", + "serde_derive", + "serde_json", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_svc_blobstorage" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e6c6f20c5611b885ba94c7bae5e02849a267381aecb8aee577e8c35ff4064c6" +dependencies = [ + "azure_core", + "bytes", + "futures", + "log", + "once_cell", + "serde", + "serde_json", + "time", +] + [[package]] name = "backon" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" dependencies = [ - "fastrand", + "fastrand 2.3.0", "gloo-timers", "tokio", ] @@ -1216,6 +1400,19 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blocking" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83f8d02be6967315521be875afa792a316e28d57b5a2d401897e2a7921b7f21" +dependencies = [ + "async-channel 2.5.0", + "async-task", + "futures-io", + "futures-lite 2.6.1", + "piper", +] + [[package]] name = "bon" version = "3.8.1" @@ -1553,6 +1750,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const_fn" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f8a2ca5ac02d09563609681103aada9e1777d54fc57a5acd7a41404f9c93b6e" + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -3040,6 +3243,12 @@ version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca81e6b4777c89fd810c25a4be2b1bd93ea034fbe58e6a75216a34c6b82c539b" +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "event-listener" version = "5.4.1" @@ -3057,7 +3266,7 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" dependencies = [ - "event-listener", + "event-listener 5.4.1", "pin-project-lite", ] @@ -3073,6 +3282,15 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -3291,6 +3509,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + +[[package]] +name = "futures-lite" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad" +dependencies = [ + "fastrand 2.3.0", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + [[package]] name = "futures-macro" version = "0.3.31" @@ -3485,6 +3731,17 @@ dependencies = [ "libm", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -3494,7 +3751,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -3536,6 +3793,26 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "google-cloud-auth" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5572275b7f06b6fde8eec61a23d87c83aae362bee586bbeb8773b3f98658ae81" +dependencies = [ + "async-trait", + "base64 0.22.1", + "derive_builder 0.20.2", + "http 1.4.0", + "reqwest", + "rustls 0.23.35", + "rustls-pemfile", + "serde", + "serde_json", + "thiserror 2.0.17", + "time", + "tokio", +] + [[package]] name = "group" version = "0.12.1" @@ -3769,6 +4046,26 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-types" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e9b187a72d63adbfba487f48095306ac823049cb504ee195541e91c7775f5ad" +dependencies = [ + "anyhow", + "async-channel 1.9.0", + "base64 0.13.1", + "futures-lite 1.13.0", + "infer", + "pin-project-lite", + "rand 0.7.3", + "serde", + "serde_json", + "serde_qs", + "serde_urlencoded", + "url", +] + [[package]] name = "httparse" version = "1.10.1" @@ -4153,6 +4450,12 @@ dependencies = [ "web-time", ] +[[package]] +name = "infer" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" + [[package]] name = "inferno" version = "0.11.21" @@ -4181,6 +4484,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -4746,7 +5058,7 @@ dependencies = [ "arrow-ord", "arrow-schema", "arrow-select", - "async-channel", + "async-channel 2.5.0", "async-recursion", "async-trait", "bitpacking", @@ -4898,9 +5210,18 @@ dependencies = [ "arrow-ipc", "arrow-schema", "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-sts", "axum", + "azure_core", + "azure_identity", + "azure_storage", + "azure_storage_blobs", "bytes", + "chrono", "futures", + "google-cloud-auth", "lance", "lance-core", "lance-index", @@ -4915,6 +5236,7 @@ dependencies = [ "serde_json", "snafu", "tempfile", + "time", "tokio", "tower", "tower-http 0.5.2", @@ -5527,7 +5849,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] @@ -5574,7 +5896,7 @@ dependencies = [ "crossbeam-epoch", "crossbeam-utils", "equivalent", - "event-listener", + "event-listener 5.4.1", "futures-util", "parking_lot", "portable-atomic", @@ -5826,12 +6148,40 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + [[package]] name = "number_prefix" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +[[package]] +name = "oauth2" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c38841cdd844847e3e7c8d29cef9dcfed8877f8f56f9071f77843ecf3baf937f" +dependencies = [ + "base64 0.13.1", + "chrono", + "getrandom 0.2.16", + "http 0.2.12", + "rand 0.8.5", + "serde", + "serde_json", + "serde_path_to_error", + "sha2", + "thiserror 1.0.69", + "url", +] + [[package]] name = "object" version = "0.32.2" @@ -6303,7 +6653,7 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" dependencies = [ - "fastrand", + "fastrand 2.3.0", "phf_shared 0.13.1", ] @@ -6357,6 +6707,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piper" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" +dependencies = [ + "atomic-waker", + "fastrand 2.3.0", + "futures-io", +] + [[package]] name = "pkcs1" version = "0.7.5" @@ -6439,6 +6800,20 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "polling" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi", + "pin-project-lite", + "rustix 1.1.3", + "windows-sys 0.61.2", +] + [[package]] name = "portable-atomic" version = "1.12.0" @@ -6694,6 +7069,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quick-xml" version = "0.37.5" @@ -6790,6 +7175,19 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + [[package]] name = "rand" version = "0.8.5" @@ -6811,6 +7209,16 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -6831,6 +7239,15 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + [[package]] name = "rand_core" version = "0.6.4" @@ -6869,6 +7286,15 @@ dependencies = [ "rand 0.9.2", ] +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + [[package]] name = "rand_xorshift" version = "0.4.0" @@ -7640,6 +8066,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_qs" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6" +dependencies = [ + "percent-encoding", + "serde", + "thiserror 1.0.69", +] + [[package]] name = "serde_repr" version = "0.1.20" @@ -8337,7 +8774,7 @@ version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ - "fastrand", + "fastrand 2.3.0", "getrandom 0.3.4", "once_cell", "rustix 1.1.3", @@ -8449,7 +8886,10 @@ checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", "itoa", + "js-sys", + "libc", "num-conv", + "num_threads", "powerfmt", "serde", "time-core", @@ -8880,6 +9320,15 @@ dependencies = [ "typify-impl", ] +[[package]] +name = "tz-rs" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33851b15c848fad2cf4b105c6bb66eb9512b6f6c44a4b13f57c53c73c707e2b4" +dependencies = [ + "const_fn", +] + [[package]] name = "unarray" version = "0.1.4" @@ -9065,6 +9514,12 @@ dependencies = [ "libc", ] +[[package]] +name = "waker-fn" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" + [[package]] name = "walkdir" version = "2.5.0" @@ -9084,6 +9539,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index 207969b36d4..66e5c0a99f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -87,6 +87,7 @@ aws-config = "1.2.0" aws-credential-types = "1.2.0" aws-sdk-dynamodb = "1.38.0" aws-sdk-s3 = "1.38.0" +aws-sdk-sts = "1.38.0" half = { "version" = "2.1", default-features = false, features = [ "num-traits", "std", diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock index e3dd57d50f7..1ee870160f5 100644 --- a/java/lance-jni/Cargo.lock +++ b/java/lance-jni/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "RustyXML" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" + [[package]] name = "adler2" version = "2.0.1" @@ -148,12 +154,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" -dependencies = [ - "rustversion", -] +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" [[package]] name = "arrayref" @@ -384,6 +387,17 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener 2.5.3", + "futures-core", +] + [[package]] name = "async-channel" version = "2.5.0" @@ -413,17 +427,53 @@ dependencies = [ "zstd-safe", ] +[[package]] +name = "async-io" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc" +dependencies = [ + "autocfg", + "cfg-if", + "concurrent-queue", + "futures-io", + "futures-lite 2.6.1", + "parking", + "polling", + "rustix 1.1.3", + "slab", + "windows-sys 0.61.2", +] + [[package]] name = "async-lock" version = "3.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" dependencies = [ - "event-listener", + "event-listener 5.4.1", "event-listener-strategy", "pin-project-lite", ] +[[package]] +name = "async-process" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc50921ec0055cdd8a16de48773bfeec5c972598674347252c0399676be7da75" +dependencies = [ + "async-channel 2.5.0", + "async-io", + "async-lock", + "async-signal", + "async-task", + "blocking", + "cfg-if", + "event-listener 5.4.1", + "futures-lite 2.6.1", + "rustix 1.1.3", +] + [[package]] name = "async-recursion" version = "1.1.1" @@ -435,6 +485,30 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "async-signal" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43c070bbf59cd3570b6b2dd54cd772527c7c3620fce8be898406dd3ed6adc64c" +dependencies = [ + "async-io", + "async-lock", + "atomic-waker", + "cfg-if", + "futures-core", + "futures-io", + "rustix 1.1.3", + "signal-hook-registry", + "slab", + "windows-sys 0.61.2", +] + +[[package]] +name = "async-task" +version = "4.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" + [[package]] name = "async-trait" version = "0.1.89" @@ -495,7 +569,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "hex", "http 1.4.0", "ring", @@ -555,7 +629,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "http-body 0.4.6", "percent-encoding", @@ -580,7 +654,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -602,7 +676,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -625,7 +699,7 @@ dependencies = [ "aws-smithy-types", "aws-smithy-xml", "aws-types", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -694,17 +768,23 @@ dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", "aws-smithy-types", - "h2", + "h2 0.3.27", + "h2 0.4.12", + "http 0.2.12", "http 1.4.0", - "hyper", - "hyper-rustls", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.8.1", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.7", "hyper-util", "pin-project-lite", - "rustls", + "rustls 0.21.12", + "rustls 0.23.35", "rustls-native-certs", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower", "tracing", ] @@ -739,9 +819,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.5" +version = "1.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" +checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -750,7 +830,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -787,6 +867,7 @@ dependencies = [ "base64-simd", "bytes", "bytes-utils", + "futures-core", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -799,6 +880,8 @@ dependencies = [ "ryu", "serde", "time", + "tokio", + "tokio-util", ] [[package]] @@ -837,7 +920,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-util", "itoa", "matchit", @@ -879,17 +962,130 @@ dependencies = [ "tracing", ] +[[package]] +name = "azure_core" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b552ad43a45a746461ec3d3a51dfb6466b4759209414b439c165eb6a6b7729e" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "dyn-clone", + "futures", + "getrandom 0.2.16", + "hmac", + "http-types", + "once_cell", + "paste", + "pin-project", + "quick-xml 0.31.0", + "rand 0.8.5", + "reqwest", + "rustc_version", + "serde", + "serde_json", + "sha2", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_identity" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ddd80344317c40c04b603807b63a5cefa532f1b43522e72f480a988141f744" +dependencies = [ + "async-lock", + "async-process", + "async-trait", + "azure_core", + "futures", + "oauth2", + "pin-project", + "serde", + "time", + "tracing", + "tz-rs", + "url", + "uuid", +] + +[[package]] +name = "azure_storage" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f838159f4d29cb400a14d9d757578ba495ae64feb07a7516bf9e4415127126" +dependencies = [ + "RustyXML", + "async-lock", + "async-trait", + "azure_core", + "bytes", + "serde", + "serde_derive", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_storage_blobs" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97e83c3636ae86d9a6a7962b2112e3b19eb3903915c50ce06ff54ff0a2e6a7e4" +dependencies = [ + "RustyXML", + "azure_core", + "azure_storage", + "azure_svc_blobstorage", + "bytes", + "futures", + "serde", + "serde_derive", + "serde_json", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_svc_blobstorage" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e6c6f20c5611b885ba94c7bae5e02849a267381aecb8aee577e8c35ff4064c6" +dependencies = [ + "azure_core", + "bytes", + "futures", + "log", + "once_cell", + "serde", + "serde_json", + "time", +] + [[package]] name = "backon" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" dependencies = [ - "fastrand", + "fastrand 2.3.0", "gloo-timers", "tokio", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.21.7" @@ -998,6 +1194,19 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blocking" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83f8d02be6967315521be875afa792a316e28d57b5a2d401897e2a7921b7f21" +dependencies = [ + "async-channel 2.5.0", + "async-task", + "futures-io", + "futures-lite 2.6.1", + "piper", +] + [[package]] name = "bon" version = "3.8.1" @@ -1014,7 +1223,7 @@ version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645" dependencies = [ - "darling", + "darling 0.21.3", "ident_case", "prettyplease", "proc-macro2", @@ -1256,12 +1465,28 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const_fn" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f8a2ca5ac02d09563609681103aada9e1777d54fc57a5acd7a41404f9c93b6e" + [[package]] name = "constant_time_eq" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -1385,14 +1610,38 @@ dependencies = [ "memchr", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + [[package]] name = "darling" version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.21.3", + "darling_macro 0.21.3", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.111", ] [[package]] @@ -1409,13 +1658,24 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core 0.20.11", + "quote", + "syn 2.0.111", +] + [[package]] name = "darling_macro" version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ - "darling_core", + "darling_core 0.21.3", "quote", "syn 2.0.111", ] @@ -2138,6 +2398,37 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "syn 2.0.111", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.111", +] + [[package]] name = "digest" version = "0.10.7" @@ -2279,6 +2570,12 @@ version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca81e6b4777c89fd810c25a4be2b1bd93ea034fbe58e6a75216a34c6b82c539b" +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "event-listener" version = "5.4.1" @@ -2296,7 +2593,7 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" dependencies = [ - "event-listener", + "event-listener 5.4.1", "pin-project-lite", ] @@ -2312,6 +2609,15 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -2375,6 +2681,21 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2471,6 +2792,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + +[[package]] +name = "futures-lite" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad" +dependencies = [ + "fastrand 2.3.0", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + [[package]] name = "futures-macro" version = "0.3.31" @@ -2659,6 +3008,17 @@ dependencies = [ "libm", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -2668,7 +3028,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2704,6 +3064,45 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "google-cloud-auth" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5572275b7f06b6fde8eec61a23d87c83aae362bee586bbeb8773b3f98658ae81" +dependencies = [ + "async-trait", + "base64 0.22.1", + "derive_builder", + "http 1.4.0", + "reqwest", + "rustls 0.23.35", + "rustls-pemfile", + "serde", + "serde_json", + "thiserror 2.0.17", + "time", + "tokio", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.12" @@ -2883,6 +3282,26 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-types" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e9b187a72d63adbfba487f48095306ac823049cb504ee195541e91c7775f5ad" +dependencies = [ + "anyhow", + "async-channel 1.9.0", + "base64 0.13.1", + "futures-lite 1.13.0", + "infer", + "pin-project-lite", + "rand 0.7.3", + "serde", + "serde_json", + "serde_qs", + "serde_urlencoded", + "url", +] + [[package]] name = "httparse" version = "1.10.1" @@ -2901,6 +3320,30 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.8.1" @@ -2911,7 +3354,7 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", + "h2 0.4.12", "http 1.4.0", "http-body 1.0.1", "httparse", @@ -2925,21 +3368,52 @@ dependencies = [ ] [[package]] -name = "hyper-rustls" -version = "0.27.7" +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http 1.4.0", + "hyper 1.8.1", + "hyper-util", + "rustls 0.23.35", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower-service", + "webpki-roots", +] + +[[package]] +name = "hyper-tls" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ - "http 1.4.0", - "hyper", + "bytes", + "http-body-util", + "hyper 1.8.1", "hyper-util", - "rustls", - "rustls-native-certs", - "rustls-pki-types", + "native-tls", "tokio", - "tokio-rustls", + "tokio-native-tls", "tower-service", - "webpki-roots", ] [[package]] @@ -2955,15 +3429,17 @@ dependencies = [ "futures-util", "http 1.4.0", "http-body 1.0.1", - "hyper", + "hyper 1.8.1", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.1", + "system-configuration", "tokio", "tower-service", "tracing", + "windows-registry", ] [[package]] @@ -3160,6 +3636,12 @@ dependencies = [ "hashbrown 0.16.1", ] +[[package]] +name = "infer" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" + [[package]] name = "inout" version = "0.1.4" @@ -3170,6 +3652,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -3617,7 +4108,7 @@ dependencies = [ "arrow-ord", "arrow-schema", "arrow-select", - "async-channel", + "async-channel 2.5.0", "async-recursion", "async-trait", "bitpacking", @@ -3778,9 +4269,18 @@ dependencies = [ "arrow-ipc", "arrow-schema", "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-sts", "axum", + "azure_core", + "azure_identity", + "azure_storage", + "azure_storage_blobs", "bytes", + "chrono", "futures", + "google-cloud-auth", "lance", "lance-core", "lance-index", @@ -3793,6 +4293,7 @@ dependencies = [ "serde", "serde_json", "snafu", + "time", "tokio", "tower", "tower-http 0.5.2", @@ -4159,7 +4660,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] @@ -4180,7 +4681,7 @@ dependencies = [ "crossbeam-epoch", "crossbeam-utils", "equivalent", - "event-listener", + "event-listener 5.4.1", "futures-util", "parking_lot", "portable-atomic", @@ -4201,6 +4702,23 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" +[[package]] +name = "native-tls" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework 2.11.1", + "security-framework-sys", + "tempfile", +] + [[package]] name = "ndarray" version = "0.16.1" @@ -4372,6 +4890,34 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "oauth2" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c38841cdd844847e3e7c8d29cef9dcfed8877f8f56f9071f77843ecf3baf937f" +dependencies = [ + "base64 0.13.1", + "chrono", + "getrandom 0.2.16", + "http 0.2.12", + "rand 0.8.5", + "serde", + "serde_json", + "serde_path_to_error", + "sha2", + "thiserror 1.0.69", + "url", +] + [[package]] name = "object" version = "0.32.2" @@ -4397,7 +4943,7 @@ dependencies = [ "http-body-util", "httparse", "humantime", - "hyper", + "hyper 1.8.1", "itertools 0.14.0", "md-5", "parking_lot", @@ -4483,12 +5029,50 @@ dependencies = [ "uuid", ] +[[package]] +name = "openssl" +version = "0.10.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "openssl-probe" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "openssl-sys" +version = "0.9.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -4772,6 +5356,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piper" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" +dependencies = [ + "atomic-waker", + "fastrand 2.3.0", + "futures-io", +] + [[package]] name = "pkcs1" version = "0.7.5" @@ -4816,6 +5411,20 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "polling" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi", + "pin-project-lite", + "rustix 1.1.3", + "windows-sys 0.61.2", +] + [[package]] name = "portable-atomic" version = "1.12.0" @@ -4945,6 +5554,16 @@ dependencies = [ "cc", ] +[[package]] +name = "quick-xml" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quick-xml" version = "0.37.5" @@ -4977,8 +5596,8 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", - "socket2", + "rustls 0.23.35", + "socket2 0.6.1", "thiserror 2.0.17", "tokio", "tracing", @@ -4997,7 +5616,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls", + "rustls 0.23.35", "rustls-pki-types", "slab", "thiserror 2.0.17", @@ -5015,7 +5634,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2", + "socket2 0.6.1", "tracing", "windows-sys 0.60.2", ] @@ -5041,6 +5660,19 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + [[package]] name = "rand" version = "0.8.5" @@ -5062,6 +5694,16 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -5082,6 +5724,15 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + [[package]] name = "rand_core" version = "0.6.4" @@ -5120,6 +5771,15 @@ dependencies = [ "rand 0.9.2", ] +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + [[package]] name = "rand_xoshiro" version = "0.7.0" @@ -5302,21 +5962,23 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2", + "h2 0.4.12", "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-tls", "hyper-util", "js-sys", "log", "mime", "mime_guess", + "native-tls", "percent-encoding", "pin-project-lite", "quinn", - "rustls", + "rustls 0.23.35", "rustls-native-certs", "rustls-pki-types", "serde", @@ -5324,7 +5986,8 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-native-tls", + "tokio-rustls 0.26.4", "tokio-util", "tower", "tower-http 0.6.8", @@ -5460,6 +6123,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.35" @@ -5467,10 +6142,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "aws-lc-rs", + "log", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.8", "subtle", "zeroize", ] @@ -5484,7 +6160,7 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 3.5.1", ] [[package]] @@ -5506,6 +6182,16 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.8" @@ -5604,6 +6290,29 @@ dependencies = [ "sha2", ] +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + [[package]] name = "security-framework" version = "3.5.1" @@ -5611,7 +6320,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ "bitflags", - "core-foundation", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -5686,15 +6395,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.147" +version = "1.0.146" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af14725505314343e673e9ecb7cd7e8a36aa9791eb936235a3567cc31447ae4" +checksum = "217ca874ae0207aac254aa02c957ded05585a90892cc8d87f9e5fa49669dadd8" dependencies = [ "itoa", "memchr", + "ryu", "serde", "serde_core", - "zmij", ] [[package]] @@ -5708,6 +6417,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_qs" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6" +dependencies = [ + "percent-encoding", + "serde", + "thiserror 1.0.69", +] + [[package]] name = "serde_repr" version = "0.1.20" @@ -5899,6 +6619,16 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.1" @@ -6091,6 +6821,27 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tagptr" version = "0.2.0" @@ -6251,11 +7002,11 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tempfile" -version = "3.24.0" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ - "fastrand", + "fastrand 2.3.0", "getrandom 0.3.4", "once_cell", "rustix 1.1.3", @@ -6339,7 +7090,10 @@ checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", "itoa", + "js-sys", + "libc", "num-conv", + "num_threads", "powerfmt", "serde", "time-core", @@ -6408,7 +7162,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.1", "tokio-macros", "windows-sys 0.61.2", ] @@ -6424,13 +7178,33 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.35", "tokio", ] @@ -6686,6 +7460,15 @@ dependencies = [ "typify-impl", ] +[[package]] +name = "tz-rs" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33851b15c848fad2cf4b105c6bb66eb9512b6f6c44a4b13f57c53c73c707e2b4" +dependencies = [ + "const_fn", +] + [[package]] name = "unicase" version = "2.8.1" @@ -6776,6 +7559,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -6788,6 +7577,12 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" +[[package]] +name = "waker-fn" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" + [[package]] name = "walkdir" version = "2.5.0" @@ -6807,6 +7602,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -6994,6 +7795,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + [[package]] name = "windows-result" version = "0.4.1" @@ -7428,12 +8240,6 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" -[[package]] -name = "zmij" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e404bcd8afdaf006e529269d3e85a743f9480c3cef60034d77860d02964f3ba" - [[package]] name = "zstd" version = "0.13.3" diff --git a/java/lance-jni/Cargo.toml b/java/lance-jni/Cargo.toml index 04042265542..e0815845f0d 100644 --- a/java/lance-jni/Cargo.toml +++ b/java/lance-jni/Cargo.toml @@ -12,6 +12,13 @@ description = "JNI bindings for Lance Columnar format" [lib] crate-type = ["cdylib"] +[features] +default = [] +# Credential vending features for DirectoryNamespace +credential-vendor-aws = ["lance-namespace-impls/credential-vendor-aws"] +credential-vendor-gcp = ["lance-namespace-impls/credential-vendor-gcp"] +credential-vendor-azure = ["lance-namespace-impls/credential-vendor-azure"] + [dependencies] lance = { path = "../../rust/lance", features = ["substrait"] } lance-datafusion = { path = "../../rust/lance-datafusion" } @@ -20,7 +27,7 @@ lance-linalg = { path = "../../rust/lance-linalg" } lance-index = { path = "../../rust/lance-index" } lance-io = { path = "../../rust/lance-io" } lance-namespace = { path = "../../rust/lance-namespace" } -lance-namespace-impls = { path = "../../rust/lance-namespace-impls", features = ["rest", "rest-adapter"] } +lance-namespace-impls = { path = "../../rust/lance-namespace-impls", features = ["rest", "rest-adapter", "credential-vendor-aws", "credential-vendor-gcp", "credential-vendor-azure"] } lance-core = { path = "../../rust/lance-core" } lance-file = { path = "../../rust/lance-file" } arrow = { version = "56.1", features = ["ffi"] } diff --git a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java index 19de6d0a4bf..2d13db69694 100644 --- a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java +++ b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java @@ -51,6 +51,43 @@ * for S3, storage.account_name=myaccount for Azure) * * + *

Credential vending properties (requires credential-vendor-* features to be enabled): + * + *

When credential vendor properties are configured, describeTable() will return vended temporary + * credentials. The vendor type is auto-selected based on the table location URI: s3:// for AWS, + * gs:// for GCP, az:// for Azure. + * + *

+ * *

Example usage (local filesystem): * *

{@code
@@ -81,6 +118,21 @@
  * // Use namespace...
  * namespace.close();
  * }
+ * + *

Example usage (AWS S3 with credential vending): + * + *

{@code
+ * Map properties = new HashMap<>();
+ * properties.put("root", "s3://my-bucket/lance-data");
+ * properties.put("credential_vendor.enabled", "true");
+ * properties.put("credential_vendor.aws_role_arn", "arn:aws:iam::123456789012:role/MyRole");
+ * properties.put("credential_vendor.aws_duration_millis", "3600000");  // 1 hour
+ *
+ * DirectoryNamespace namespace = new DirectoryNamespace();
+ * namespace.initialize(properties, allocator);
+ * // describeTable() will now return vended credentials (AWS vendor auto-selected from s3:// URI)
+ * namespace.close();
+ * }
*/ public class DirectoryNamespace implements LanceNamespace, Closeable { static { diff --git a/python/Cargo.lock b/python/Cargo.lock index 3e9a305ae86..d4e19a95a6d 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "RustyXML" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" + [[package]] name = "abi_stable" version = "0.11.3" @@ -202,12 +208,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" -dependencies = [ - "rustversion", -] +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" [[package]] name = "arrayref" @@ -463,6 +466,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener 2.5.3", + "futures-core", +] + [[package]] name = "async-channel" version = "2.5.0" @@ -501,17 +515,53 @@ dependencies = [ "abi_stable", ] +[[package]] +name = "async-io" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc" +dependencies = [ + "autocfg", + "cfg-if", + "concurrent-queue", + "futures-io", + "futures-lite 2.6.1", + "parking", + "polling", + "rustix 1.1.3", + "slab", + "windows-sys 0.61.2", +] + [[package]] name = "async-lock" version = "3.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" dependencies = [ - "event-listener", + "event-listener 5.4.1", "event-listener-strategy", "pin-project-lite", ] +[[package]] +name = "async-process" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc50921ec0055cdd8a16de48773bfeec5c972598674347252c0399676be7da75" +dependencies = [ + "async-channel 2.5.0", + "async-io", + "async-lock", + "async-signal", + "async-task", + "blocking", + "cfg-if", + "event-listener 5.4.1", + "futures-lite 2.6.1", + "rustix 1.1.3", +] + [[package]] name = "async-recursion" version = "1.1.1" @@ -523,6 +573,30 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "async-signal" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43c070bbf59cd3570b6b2dd54cd772527c7c3620fce8be898406dd3ed6adc64c" +dependencies = [ + "async-io", + "async-lock", + "atomic-waker", + "cfg-if", + "futures-core", + "futures-io", + "rustix 1.1.3", + "signal-hook-registry", + "slab", + "windows-sys 0.61.2", +] + +[[package]] +name = "async-task" +version = "4.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" + [[package]] name = "async-trait" version = "0.1.89" @@ -583,7 +657,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "hex", "http 1.4.0", "ring", @@ -643,7 +717,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "http-body 0.4.6", "percent-encoding", @@ -668,7 +742,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -690,7 +764,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -712,7 +786,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -735,7 +809,7 @@ dependencies = [ "aws-smithy-types", "aws-smithy-xml", "aws-types", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -855,9 +929,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.5" +version = "1.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" +checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -866,7 +940,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -998,17 +1072,130 @@ dependencies = [ "tracing", ] +[[package]] +name = "azure_core" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b552ad43a45a746461ec3d3a51dfb6466b4759209414b439c165eb6a6b7729e" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "dyn-clone", + "futures", + "getrandom 0.2.16", + "hmac", + "http-types", + "once_cell", + "paste", + "pin-project", + "quick-xml 0.31.0", + "rand 0.8.5", + "reqwest", + "rustc_version", + "serde", + "serde_json", + "sha2", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_identity" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ddd80344317c40c04b603807b63a5cefa532f1b43522e72f480a988141f744" +dependencies = [ + "async-lock", + "async-process", + "async-trait", + "azure_core", + "futures", + "oauth2", + "pin-project", + "serde", + "time", + "tracing", + "tz-rs", + "url", + "uuid", +] + +[[package]] +name = "azure_storage" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f838159f4d29cb400a14d9d757578ba495ae64feb07a7516bf9e4415127126" +dependencies = [ + "RustyXML", + "async-lock", + "async-trait", + "azure_core", + "bytes", + "serde", + "serde_derive", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_storage_blobs" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97e83c3636ae86d9a6a7962b2112e3b19eb3903915c50ce06ff54ff0a2e6a7e4" +dependencies = [ + "RustyXML", + "azure_core", + "azure_storage", + "azure_svc_blobstorage", + "bytes", + "futures", + "serde", + "serde_derive", + "serde_json", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_svc_blobstorage" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e6c6f20c5611b885ba94c7bae5e02849a267381aecb8aee577e8c35ff4064c6" +dependencies = [ + "azure_core", + "bytes", + "futures", + "log", + "once_cell", + "serde", + "serde_json", + "time", +] + [[package]] name = "backon" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" dependencies = [ - "fastrand", + "fastrand 2.3.0", "gloo-timers", "tokio", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.21.7" @@ -1143,6 +1330,19 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blocking" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83f8d02be6967315521be875afa792a316e28d57b5a2d401897e2a7921b7f21" +dependencies = [ + "async-channel 2.5.0", + "async-task", + "futures-io", + "futures-lite 2.6.1", + "piper", +] + [[package]] name = "bon" version = "3.8.1" @@ -1394,6 +1594,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const_fn" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f8a2ca5ac02d09563609681103aada9e1777d54fc57a5acd7a41404f9c93b6e" + [[package]] name = "const_panic" version = "0.2.15" @@ -1409,6 +1615,16 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -2644,6 +2860,12 @@ version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca81e6b4777c89fd810c25a4be2b1bd93ea034fbe58e6a75216a34c6b82c539b" +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "event-listener" version = "5.4.1" @@ -2661,7 +2883,7 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" dependencies = [ - "event-listener", + "event-listener 5.4.1", "pin-project-lite", ] @@ -2677,6 +2899,15 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -2752,6 +2983,21 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2848,6 +3094,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + +[[package]] +name = "futures-lite" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad" +dependencies = [ + "fastrand 2.3.0", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + [[package]] name = "futures-macro" version = "0.3.31" @@ -3045,6 +3319,17 @@ dependencies = [ "libm", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -3054,7 +3339,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -3090,6 +3375,26 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "google-cloud-auth" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5572275b7f06b6fde8eec61a23d87c83aae362bee586bbeb8773b3f98658ae81" +dependencies = [ + "async-trait", + "base64 0.22.1", + "derive_builder", + "http 1.4.0", + "reqwest", + "rustls 0.23.35", + "rustls-pemfile", + "serde", + "serde_json", + "thiserror 2.0.17", + "time", + "tokio", +] + [[package]] name = "h2" version = "0.3.27" @@ -3288,6 +3593,26 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-types" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e9b187a72d63adbfba487f48095306ac823049cb504ee195541e91c7775f5ad" +dependencies = [ + "anyhow", + "async-channel 1.9.0", + "base64 0.13.1", + "futures-lite 1.13.0", + "infer", + "pin-project-lite", + "rand 0.7.3", + "serde", + "serde_json", + "serde_qs", + "serde_urlencoded", + "url", +] + [[package]] name = "httparse" version = "1.10.1" @@ -3386,6 +3711,22 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.19" @@ -3405,9 +3746,11 @@ dependencies = [ "percent-encoding", "pin-project-lite", "socket2 0.6.1", + "system-configuration", "tokio", "tower-service", "tracing", + "windows-registry", ] [[package]] @@ -3650,6 +3993,12 @@ dependencies = [ "rustversion", ] +[[package]] +name = "infer" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" + [[package]] name = "inout" version = "0.1.4" @@ -3660,6 +4009,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -4118,7 +4476,7 @@ dependencies = [ "arrow-ord", "arrow-schema", "arrow-select", - "async-channel", + "async-channel 2.5.0", "async-recursion", "async-trait", "bitpacking", @@ -4249,9 +4607,18 @@ dependencies = [ "arrow-ipc", "arrow-schema", "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-sts", "axum", + "azure_core", + "azure_identity", + "azure_storage", + "azure_storage_blobs", "bytes", + "chrono", "futures", + "google-cloud-auth", "lance", "lance-core", "lance-index", @@ -4264,6 +4631,7 @@ dependencies = [ "serde", "serde_json", "snafu", + "time", "tokio", "tower", "tower-http 0.5.2", @@ -4817,7 +5185,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] @@ -4838,7 +5206,7 @@ dependencies = [ "crossbeam-epoch", "crossbeam-utils", "equivalent", - "event-listener", + "event-listener 5.4.1", "futures-util", "parking_lot", "portable-atomic", @@ -4859,6 +5227,23 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" +[[package]] +name = "native-tls" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework 2.11.1", + "security-framework-sys", + "tempfile", +] + [[package]] name = "ndarray" version = "0.16.1" @@ -5030,6 +5415,34 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "oauth2" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c38841cdd844847e3e7c8d29cef9dcfed8877f8f56f9071f77843ecf3baf937f" +dependencies = [ + "base64 0.13.1", + "chrono", + "getrandom 0.2.16", + "http 0.2.12", + "rand 0.8.5", + "serde", + "serde_json", + "serde_path_to_error", + "sha2", + "thiserror 1.0.69", + "url", +] + [[package]] name = "object" version = "0.32.2" @@ -5141,12 +5554,50 @@ dependencies = [ "uuid", ] +[[package]] +name = "openssl" +version = "0.10.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +dependencies = [ + "bitflags 2.10.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "openssl-probe" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "openssl-sys" +version = "0.9.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -5415,7 +5866,7 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" dependencies = [ - "fastrand", + "fastrand 2.3.0", "phf_shared 0.13.1", ] @@ -5469,6 +5920,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piper" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" +dependencies = [ + "atomic-waker", + "fastrand 2.3.0", + "futures-io", +] + [[package]] name = "pkcs1" version = "0.7.5" @@ -5513,6 +5975,20 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "polling" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi", + "pin-project-lite", + "rustix 1.1.3", + "windows-sys 0.61.2", +] + [[package]] name = "portable-atomic" version = "1.12.0" @@ -5790,6 +6266,16 @@ dependencies = [ "serde", ] +[[package]] +name = "quick-xml" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quick-xml" version = "0.37.5" @@ -5886,6 +6372,19 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + [[package]] name = "rand" version = "0.8.5" @@ -5907,6 +6406,16 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -5927,6 +6436,15 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + [[package]] name = "rand_core" version = "0.6.4" @@ -5965,6 +6483,15 @@ dependencies = [ "rand 0.9.2", ] +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + [[package]] name = "rand_xoshiro" version = "0.7.0" @@ -6171,11 +6698,13 @@ dependencies = [ "http-body-util", "hyper 1.8.1", "hyper-rustls 0.27.7", + "hyper-tls", "hyper-util", "js-sys", "log", "mime", "mime_guess", + "native-tls", "percent-encoding", "pin-project-lite", "quinn", @@ -6187,6 +6716,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", + "tokio-native-tls", "tokio-rustls 0.26.4", "tokio-util", "tower", @@ -6348,6 +6878,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "aws-lc-rs", + "log", "once_cell", "ring", "rustls-pki-types", @@ -6365,7 +6896,7 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 3.5.1", ] [[package]] @@ -6505,6 +7036,19 @@ dependencies = [ "untrusted", ] +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + [[package]] name = "security-framework" version = "3.5.1" @@ -6512,7 +7056,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ "bitflags 2.10.0", - "core-foundation", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -6587,15 +7131,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.147" +version = "1.0.146" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af14725505314343e673e9ecb7cd7e8a36aa9791eb936235a3567cc31447ae4" +checksum = "217ca874ae0207aac254aa02c957ded05585a90892cc8d87f9e5fa49669dadd8" dependencies = [ "itoa", "memchr", + "ryu", "serde", "serde_core", - "zmij", ] [[package]] @@ -6609,6 +7153,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_qs" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6" +dependencies = [ + "percent-encoding", + "serde", + "thiserror 1.0.69", +] + [[package]] name = "serde_repr" version = "0.1.20" @@ -7023,6 +7578,27 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tagptr" version = "0.2.0" @@ -7200,11 +7776,11 @@ checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" [[package]] name = "tempfile" -version = "3.24.0" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ - "fastrand", + "fastrand 2.3.0", "getrandom 0.3.4", "once_cell", "rustix 1.1.3", @@ -7288,7 +7864,10 @@ checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", "itoa", + "js-sys", + "libc", "num-conv", + "num_threads", "powerfmt", "serde", "time-core", @@ -7373,6 +7952,16 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.24.1" @@ -7683,6 +8272,15 @@ dependencies = [ "typify-impl", ] +[[package]] +name = "tz-rs" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33851b15c848fad2cf4b105c6bb66eb9512b6f6c44a4b13f57c53c73c707e2b4" +dependencies = [ + "const_fn", +] + [[package]] name = "unicase" version = "2.8.1" @@ -7800,6 +8398,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -7818,6 +8422,12 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" +[[package]] +name = "waker-fn" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" + [[package]] name = "walkdir" version = "2.5.0" @@ -7837,6 +8447,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -8024,6 +8640,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + [[package]] name = "windows-result" version = "0.4.1" @@ -8408,12 +9035,6 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" -[[package]] -name = "zmij" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e404bcd8afdaf006e529269d3e85a743f9480c3cef60034d77860d02964f3ba" - [[package]] name = "zstd" version = "0.13.3" diff --git a/python/Cargo.toml b/python/Cargo.toml index ab5ae2e73de..e6f166f275f 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -73,11 +73,15 @@ url = "2.5.0" bytes = "1.4" [features] -default = ["rest", "rest-adapter"] +default = ["rest", "rest-adapter", "credential-vendor-aws", "credential-vendor-gcp", "credential-vendor-azure"] datagen = ["lance-datagen"] fp16kernels = ["lance/fp16kernels"] rest = ["lance-namespace-impls/rest"] rest-adapter = ["lance-namespace-impls/rest-adapter"] +# Credential vending features for DirectoryNamespace +credential-vendor-aws = ["lance-namespace-impls/credential-vendor-aws"] +credential-vendor-gcp = ["lance-namespace-impls/credential-vendor-gcp"] +credential-vendor-azure = ["lance-namespace-impls/credential-vendor-azure"] [profile.ci] debug = "line-tables-only" diff --git a/python/python/lance/namespace.py b/python/python/lance/namespace.py index 3e89ab0cfe9..59db935f8ce 100644 --- a/python/python/lance/namespace.py +++ b/python/python/lance/namespace.py @@ -86,6 +86,40 @@ class DirectoryNamespace(LanceNamespace): (e.g., storage.region="us-west-2" becomes region="us-west-2" in storage options) + Credential vendor properties (vendor is auto-selected based on table location): + When credential vendor properties are configured, describe_table() will + return vended temporary credentials. The vendor type is auto-selected + based on table location URI: s3:// for AWS, gs:// for GCP, az:// for + Azure. Requires the corresponding credential-vendor-* feature. + + Common properties: + - credential_vendor.enabled (required): Set to "true" to enable + - credential_vendor.permission (optional): read, write, or admin + + AWS-specific properties (for s3:// locations): + - credential_vendor.aws_role_arn (required): IAM role ARN to assume + - credential_vendor.aws_external_id (optional): External ID + - credential_vendor.aws_region (optional): AWS region + - credential_vendor.aws_role_session_name (optional): Session name + - credential_vendor.aws_duration_millis (optional): Duration in ms + (default: 3600000, range: 15min-12hrs) + + GCP-specific properties (for gs:// locations): + - credential_vendor.gcp_service_account (optional): Service account + to impersonate using IAM Credentials API + + Note: GCP uses Application Default Credentials (ADC). To use a service + account key file, set the GOOGLE_APPLICATION_CREDENTIALS environment + variable before starting. GCP token duration cannot be configured; + it's determined by the STS endpoint (typically 1 hour). + + Azure-specific properties (for az:// locations): + - credential_vendor.azure_account_name (required): Azure storage + account name + - credential_vendor.azure_tenant_id (optional): Azure tenant ID + - credential_vendor.azure_duration_millis (optional): Duration in ms + (default: 3600000, up to 7 days) + Examples -------- >>> import lance.namespace @@ -95,6 +129,15 @@ class DirectoryNamespace(LanceNamespace): >>> # Using the connect() factory function from lance_namespace >>> import lance_namespace >>> ns = lance_namespace.connect("dir", {"root": "memory://test"}) + >>> + >>> # With AWS credential vending (requires credential-vendor-aws feature) + >>> # Use **dict to pass property names with dots + >>> ns = lance.namespace.DirectoryNamespace(**{ + ... "root": "s3://my-bucket/data", + ... "credential_vendor.enabled": "true", + ... "credential_vendor.aws_role_arn": "arn:aws:iam::123456789012:role/MyRole", + ... "credential_vendor.aws_duration_millis": "3600000", + ... }) """ def __init__(self, session=None, **properties): diff --git a/rust/lance-namespace-impls/Cargo.toml b/rust/lance-namespace-impls/Cargo.toml index 9ce32692ffc..cb0ff52d1e0 100644 --- a/rust/lance-namespace-impls/Cargo.toml +++ b/rust/lance-namespace-impls/Cargo.toml @@ -21,6 +21,10 @@ dir-aws = ["lance-io/aws", "lance/aws"] dir-azure = ["lance-io/azure", "lance/azure"] dir-oss = ["lance-io/oss", "lance/oss"] dir-huggingface = ["lance-io/huggingface", "lance/huggingface"] +# Credential vending features +credential-vendor-aws = ["dep:aws-sdk-sts", "dep:aws-config", "dep:aws-credential-types"] +credential-vendor-gcp = ["dep:google-cloud-auth", "dep:reqwest", "dep:serde"] +credential-vendor-azure = ["dep:azure_core", "dep:azure_identity", "dep:azure_storage", "dep:azure_storage_blobs", "dep:time"] [dependencies] lance-namespace.workspace = true @@ -60,6 +64,22 @@ serde_json = { workspace = true } futures.workspace = true log.workspace = true rand.workspace = true +chrono.workspace = true + +# AWS credential vending dependencies (optional, enabled by "dir-aws" feature) +aws-sdk-sts = { version = "1.38.0", optional = true } +aws-config = { workspace = true, optional = true } +aws-credential-types = { workspace = true, optional = true } + +# GCP credential vending dependencies (optional, enabled by "dir-gcp" feature) +google-cloud-auth = { version = "0.18", optional = true } + +# Azure credential vending dependencies (optional, enabled by "dir-azure" feature) +azure_core = { version = "0.21", optional = true } +azure_identity = { version = "0.21", optional = true } +azure_storage = { version = "0.21", optional = true } +azure_storage_blobs = { version = "0.21", optional = true } +time = { version = "0.3", optional = true } [dev-dependencies] tokio = { workspace = true, features = ["full"] } diff --git a/rust/lance-namespace-impls/src/credentials.rs b/rust/lance-namespace-impls/src/credentials.rs new file mode 100644 index 00000000000..6be4f1e38a4 --- /dev/null +++ b/rust/lance-namespace-impls/src/credentials.rs @@ -0,0 +1,717 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Credential vending for cloud storage access. +//! +//! This module provides credential vending functionality that generates +//! temporary, scoped credentials for accessing cloud storage. Similar to +//! Apache Polaris's credential vending, it supports: +//! +//! - **AWS**: STS AssumeRole with scoped IAM policies (requires `credential-vendor-aws` feature) +//! - **GCP**: OAuth2 tokens with access boundaries (requires `credential-vendor-gcp` feature) +//! - **Azure**: SAS tokens with user delegation keys (requires `credential-vendor-azure` feature) +//! +//! The appropriate vendor is automatically selected based on the table location URI scheme: +//! - `s3://` for AWS +//! - `gs://` for GCP +//! - `az://` for Azure +//! +//! ## Configuration via Properties +//! +//! Credential vendors are configured via properties with the `credential_vendor.` prefix. +//! +//! ### Properties format: +//! +//! ```text +//! # Required to enable credential vending +//! credential_vendor.enabled = "true" +//! +//! # Common properties (apply to all providers) +//! credential_vendor.permission = "read" # read, write, or admin (default: read) +//! +//! # AWS-specific properties (for s3:// locations) +//! credential_vendor.aws_role_arn = "arn:aws:iam::123456789012:role/MyRole" # required for AWS +//! credential_vendor.aws_external_id = "my-external-id" +//! credential_vendor.aws_region = "us-west-2" +//! credential_vendor.aws_role_session_name = "my-session" +//! credential_vendor.aws_duration_millis = "3600000" # 1 hour (default, range: 15min-12hrs) +//! +//! # GCP-specific properties (for gs:// locations) +//! # Note: GCP token duration cannot be configured; it's determined by the STS endpoint +//! # To use a service account key file, set GOOGLE_APPLICATION_CREDENTIALS env var before starting +//! credential_vendor.gcp_service_account = "my-sa@project.iam.gserviceaccount.com" +//! +//! # Azure-specific properties (for az:// locations) +//! credential_vendor.azure_account_name = "mystorageaccount" # required for Azure +//! credential_vendor.azure_tenant_id = "my-tenant-id" +//! credential_vendor.azure_duration_millis = "3600000" # 1 hour (default, up to 7 days) +//! ``` +//! +//! ### Example using ConnectBuilder: +//! +//! ```ignore +//! ConnectBuilder::new("dir") +//! .property("root", "s3://bucket/path") +//! .property("credential_vendor.enabled", "true") +//! .property("credential_vendor.aws_role_arn", "arn:aws:iam::123456789012:role/MyRole") +//! .property("credential_vendor.permission", "read") +//! .connect() +//! .await?; +//! ``` + +#[cfg(feature = "credential-vendor-aws")] +pub mod aws; + +#[cfg(feature = "credential-vendor-azure")] +pub mod azure; + +#[cfg(feature = "credential-vendor-gcp")] +pub mod gcp; + +use std::collections::HashMap; +use std::str::FromStr; + +use async_trait::async_trait; +use lance_core::Result; +use lance_io::object_store::uri_to_url; + +/// Default credential duration: 1 hour (3600000 milliseconds) +pub const DEFAULT_CREDENTIAL_DURATION_MILLIS: u64 = 3600 * 1000; + +/// Redact a credential string for logging, showing first and last few characters. +/// +/// This is useful for debugging while avoiding exposure of sensitive data. +/// Format: `AKIAIOSF***MPLE` (first 8 + "***" + last 4) +/// +/// Shows 8 characters at the start (useful since AWS keys always start with AKIA/ASIA) +/// and 4 characters at the end. For short strings, shows only the first few with "***". +/// +/// # Security Note +/// +/// This function should only be used for identifiers and tokens, never for secrets +/// like `aws_secret_access_key` which should never be logged even in redacted form. +pub fn redact_credential(credential: &str) -> String { + const SHOW_START: usize = 8; + const SHOW_END: usize = 4; + const MIN_LENGTH_FOR_BOTH_ENDS: usize = SHOW_START + SHOW_END + 4; // Need at least 16 chars + + if credential.is_empty() { + return "[empty]".to_string(); + } + + if credential.len() < MIN_LENGTH_FOR_BOTH_ENDS { + // For short credentials, just show beginning + let show = credential.len().min(SHOW_START); + format!("{}***", &credential[..show]) + } else { + // Show first 8 and last 4 characters + format!( + "{}***{}", + &credential[..SHOW_START], + &credential[credential.len() - SHOW_END..] + ) + } +} + +/// Permission level for vended credentials. +/// +/// This determines what access the vended credentials will have: +/// - `Read`: Read-only access to all table content +/// - `Write`: Full read and write access (no delete) +/// - `Admin`: Full read, write, and delete access +/// +/// Permission enforcement by cloud provider: +/// - **AWS**: Permissions are enforced via scoped IAM policies attached to the AssumeRole request +/// - **Azure**: Permissions are enforced via SAS token permissions +/// - **GCP**: Permissions are enforced via Credential Access Boundaries (CAB) that downscope +/// the OAuth2 token to specific GCS IAM roles +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum VendedPermission { + /// Read-only access to all table content (metadata, indices, data files) + #[default] + Read, + /// Full read and write access (no delete) + /// This is intended ONLY for testing purposes to generate a write-only permission set. + /// Technically, any user with write permission could "delete" the file by + /// overwriting the file with empty content. + /// So this cannot really prevent malicious use cases. + Write, + /// Full read, write, and delete access + Admin, +} + +impl VendedPermission { + /// Returns true if this permission allows writing + pub fn can_write(&self) -> bool { + matches!(self, Self::Write | Self::Admin) + } + + /// Returns true if this permission allows deleting + pub fn can_delete(&self) -> bool { + matches!(self, Self::Admin) + } +} + +impl FromStr for VendedPermission { + type Err = String; + + fn from_str(s: &str) -> std::result::Result { + match s.to_lowercase().as_str() { + "read" => Ok(Self::Read), + "write" => Ok(Self::Write), + "admin" => Ok(Self::Admin), + _ => Err(format!( + "Invalid permission '{}'. Must be one of: read, write, admin", + s + )), + } + } +} + +impl std::fmt::Display for VendedPermission { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Read => write!(f, "read"), + Self::Write => write!(f, "write"), + Self::Admin => write!(f, "admin"), + } + } +} + +/// Property key prefix for credential vendor properties. +/// Properties with this prefix are stripped when using `from_properties`. +pub const PROPERTY_PREFIX: &str = "credential_vendor."; + +/// Common property key to explicitly enable credential vending (short form). +pub const ENABLED: &str = "enabled"; + +/// Common property key for permission level (short form). +pub const PERMISSION: &str = "permission"; + +/// AWS-specific property keys (short form, without prefix) +#[cfg(feature = "credential-vendor-aws")] +pub mod aws_props { + pub const ROLE_ARN: &str = "aws_role_arn"; + pub const EXTERNAL_ID: &str = "aws_external_id"; + pub const REGION: &str = "aws_region"; + pub const ROLE_SESSION_NAME: &str = "aws_role_session_name"; + /// AWS credential duration in milliseconds. + /// Default: 3600000 (1 hour). Range: 900000 (15 min) to 43200000 (12 hours). + pub const DURATION_MILLIS: &str = "aws_duration_millis"; +} + +/// GCP-specific property keys (short form, without prefix) +#[cfg(feature = "credential-vendor-gcp")] +pub mod gcp_props { + pub const SERVICE_ACCOUNT: &str = "gcp_service_account"; +} + +/// Azure-specific property keys (short form, without prefix) +#[cfg(feature = "credential-vendor-azure")] +pub mod azure_props { + pub const TENANT_ID: &str = "azure_tenant_id"; + /// Azure storage account name. Required for credential vending. + pub const ACCOUNT_NAME: &str = "azure_account_name"; + /// Azure credential duration in milliseconds. + /// Default: 3600000 (1 hour). Azure SAS tokens can be valid up to 7 days. + pub const DURATION_MILLIS: &str = "azure_duration_millis"; +} + +/// Vended credentials with expiration information. +#[derive(Clone)] +pub struct VendedCredentials { + /// Storage options map containing credential keys. + /// - For AWS: `aws_access_key_id`, `aws_secret_access_key`, `aws_session_token` + /// - For GCP: `google_storage_token` + /// - For Azure: `azure_storage_sas_token`, `azure_storage_account_name` + pub storage_options: HashMap, + + /// Expiration time in milliseconds since Unix epoch. + pub expires_at_millis: u64, +} + +impl std::fmt::Debug for VendedCredentials { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("VendedCredentials") + .field( + "storage_options", + &format!("[{} keys redacted]", self.storage_options.len()), + ) + .field("expires_at_millis", &self.expires_at_millis) + .finish() + } +} + +impl VendedCredentials { + /// Create new vended credentials. + pub fn new(storage_options: HashMap, expires_at_millis: u64) -> Self { + Self { + storage_options, + expires_at_millis, + } + } + + /// Check if the credentials have expired. + pub fn is_expired(&self) -> bool { + let now_millis = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("time went backwards") + .as_millis() as u64; + now_millis >= self.expires_at_millis + } +} + +/// Trait for credential vendors that generate temporary credentials. +/// +/// Each cloud provider has its own configuration passed via the vendor +/// implementation. The permission level is configured at vendor creation time +/// via [`VendedPermission`]. +#[async_trait] +pub trait CredentialVendor: Send + Sync + std::fmt::Debug { + /// Vend credentials for accessing the specified table location. + /// + /// The permission level (read/write/admin) is determined by the vendor's + /// configuration, not per-request. + /// + /// # Arguments + /// + /// * `table_location` - The table URI to vend credentials for + /// + /// # Returns + /// + /// Returns vended credentials with expiration information. + async fn vend_credentials(&self, table_location: &str) -> Result; + + /// Returns the cloud provider name (e.g., "aws", "gcp", "azure"). + fn provider_name(&self) -> &'static str; + + /// Returns the permission level configured for this vendor. + fn permission(&self) -> VendedPermission; +} + +/// Detect the cloud provider from a URI scheme. +/// +/// Supported schemes for credential vending: +/// - AWS S3: `s3://` +/// - GCP GCS: `gs://` +/// - Azure Blob: `az://` +/// +/// Returns "aws", "gcp", "azure", or "unknown". +pub fn detect_provider_from_uri(uri: &str) -> &'static str { + let Ok(url) = uri_to_url(uri) else { + return "unknown"; + }; + + match url.scheme() { + "s3" => "aws", + "gs" => "gcp", + "az" => "azure", + _ => "unknown", + } +} + +/// Check if credential vending is enabled. +/// +/// Returns true only if the `enabled` property is set to "true". +/// This expects properties with short names (prefix already stripped). +pub fn has_credential_vendor_config(properties: &HashMap) -> bool { + properties + .get(ENABLED) + .map(|v| v.eq_ignore_ascii_case("true")) + .unwrap_or(false) +} + +/// Create a credential vendor for the specified table location based on its URI scheme. +/// +/// This function automatically detects the cloud provider from the table location +/// and creates the appropriate credential vendor using the provided properties. +/// +/// # Arguments +/// +/// * `table_location` - The table URI to create a vendor for (e.g., "s3://bucket/path") +/// * `properties` - Configuration properties for credential vendors +/// +/// # Returns +/// +/// Returns `Some(vendor)` if the provider is detected and configured, `None` if: +/// - The provider cannot be detected from the URI (e.g., local file path) +/// - The required feature is not enabled for the detected provider +/// +/// # Errors +/// +/// Returns an error if the provider is detected but required configuration is missing: +/// - AWS: `credential_vendor.aws_role_arn` is required +/// - Azure: `credential_vendor.azure_account_name` is required +#[allow(unused_variables)] +pub async fn create_credential_vendor_for_location( + table_location: &str, + properties: &HashMap, +) -> Result>> { + let provider = detect_provider_from_uri(table_location); + + match provider { + #[cfg(feature = "credential-vendor-aws")] + "aws" => create_aws_vendor(properties).await, + + #[cfg(feature = "credential-vendor-gcp")] + "gcp" => create_gcp_vendor(properties).await, + + #[cfg(feature = "credential-vendor-azure")] + "azure" => create_azure_vendor(properties), + + _ => Ok(None), + } +} + +/// Parse permission from properties, defaulting to Read +fn parse_permission(properties: &HashMap) -> VendedPermission { + properties + .get(PERMISSION) + .and_then(|s| s.parse().ok()) + .unwrap_or_default() +} + +/// Parse duration from properties using a vendor-specific key, defaulting to DEFAULT_CREDENTIAL_DURATION_MILLIS +fn parse_duration_millis(properties: &HashMap, key: &str) -> u64 { + properties + .get(key) + .and_then(|s| s.parse::().ok()) + .unwrap_or(DEFAULT_CREDENTIAL_DURATION_MILLIS) +} + +#[cfg(feature = "credential-vendor-aws")] +async fn create_aws_vendor( + properties: &HashMap, +) -> Result>> { + use aws::{AwsCredentialVendor, AwsCredentialVendorConfig}; + use lance_core::Error; + + // AWS requires role_arn to be configured + let role_arn = properties + .get(aws_props::ROLE_ARN) + .ok_or_else(|| Error::InvalidInput { + source: "AWS credential vending requires 'credential_vendor.aws_role_arn' to be set" + .into(), + location: snafu::location!(), + })?; + + let duration_millis = parse_duration_millis(properties, aws_props::DURATION_MILLIS); + + let permission = parse_permission(properties); + + let mut config = AwsCredentialVendorConfig::new(role_arn) + .with_duration_millis(duration_millis) + .with_permission(permission); + + if let Some(external_id) = properties.get(aws_props::EXTERNAL_ID) { + config = config.with_external_id(external_id); + } + if let Some(region) = properties.get(aws_props::REGION) { + config = config.with_region(region); + } + if let Some(session_name) = properties.get(aws_props::ROLE_SESSION_NAME) { + config = config.with_role_session_name(session_name); + } + + let vendor = AwsCredentialVendor::new(config).await?; + Ok(Some(Box::new(vendor))) +} + +#[cfg(feature = "credential-vendor-gcp")] +async fn create_gcp_vendor( + properties: &HashMap, +) -> Result>> { + use gcp::{GcpCredentialVendor, GcpCredentialVendorConfig}; + + let permission = parse_permission(properties); + + let mut config = GcpCredentialVendorConfig::new().with_permission(permission); + + if let Some(sa) = properties.get(gcp_props::SERVICE_ACCOUNT) { + config = config.with_service_account(sa); + } + + let vendor = GcpCredentialVendor::new(config).await?; + Ok(Some(Box::new(vendor))) +} + +#[cfg(feature = "credential-vendor-azure")] +fn create_azure_vendor( + properties: &HashMap, +) -> Result>> { + use azure::{AzureCredentialVendor, AzureCredentialVendorConfig}; + use lance_core::Error; + + // Azure requires account_name to be configured + let account_name = + properties + .get(azure_props::ACCOUNT_NAME) + .ok_or_else(|| { + Error::InvalidInput { + source: + "Azure credential vending requires 'credential_vendor.azure_account_name' to be set" + .into(), + location: snafu::location!(), + } + })?; + + let duration_millis = parse_duration_millis(properties, azure_props::DURATION_MILLIS); + let permission = parse_permission(properties); + + let mut config = AzureCredentialVendorConfig::new() + .with_account_name(account_name) + .with_duration_millis(duration_millis) + .with_permission(permission); + + if let Some(tenant_id) = properties.get(azure_props::TENANT_ID) { + config = config.with_tenant_id(tenant_id); + } + + let vendor = AzureCredentialVendor::new(config); + Ok(Some(Box::new(vendor))) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_detect_provider_from_uri() { + // AWS (supported scheme: s3://) + assert_eq!(detect_provider_from_uri("s3://bucket/path"), "aws"); + assert_eq!(detect_provider_from_uri("S3://bucket/path"), "aws"); + + // GCP (supported scheme: gs://) + assert_eq!(detect_provider_from_uri("gs://bucket/path"), "gcp"); + assert_eq!(detect_provider_from_uri("GS://bucket/path"), "gcp"); + + // Azure (supported scheme: az://) + assert_eq!(detect_provider_from_uri("az://container/path"), "azure"); + + // Unknown (unsupported schemes) + assert_eq!(detect_provider_from_uri("/local/path"), "unknown"); + assert_eq!(detect_provider_from_uri("file:///local/path"), "unknown"); + assert_eq!(detect_provider_from_uri("memory://test"), "unknown"); + // Hadoop-style schemes not supported by lance-io + assert_eq!(detect_provider_from_uri("s3a://bucket/path"), "unknown"); + assert_eq!( + detect_provider_from_uri("abfss://container@account.dfs.core.windows.net/path"), + "unknown" + ); + assert_eq!( + detect_provider_from_uri("wasbs://container@account.blob.core.windows.net/path"), + "unknown" + ); + } + + #[test] + fn test_vended_permission_from_str() { + // Valid values (case-insensitive) + assert_eq!( + "read".parse::().unwrap(), + VendedPermission::Read + ); + assert_eq!( + "READ".parse::().unwrap(), + VendedPermission::Read + ); + assert_eq!( + "write".parse::().unwrap(), + VendedPermission::Write + ); + assert_eq!( + "WRITE".parse::().unwrap(), + VendedPermission::Write + ); + assert_eq!( + "admin".parse::().unwrap(), + VendedPermission::Admin + ); + assert_eq!( + "Admin".parse::().unwrap(), + VendedPermission::Admin + ); + + // Invalid values should return error + let err = "invalid".parse::().unwrap_err(); + assert!(err.contains("Invalid permission")); + assert!(err.contains("invalid")); + + let err = "".parse::().unwrap_err(); + assert!(err.contains("Invalid permission")); + + let err = "readwrite".parse::().unwrap_err(); + assert!(err.contains("Invalid permission")); + } + + #[test] + fn test_vended_permission_display() { + assert_eq!(VendedPermission::Read.to_string(), "read"); + assert_eq!(VendedPermission::Write.to_string(), "write"); + assert_eq!(VendedPermission::Admin.to_string(), "admin"); + } + + #[test] + fn test_parse_permission_with_invalid_values() { + // Invalid permission should default to Read + let mut props = HashMap::new(); + props.insert(PERMISSION.to_string(), "invalid".to_string()); + assert_eq!(parse_permission(&props), VendedPermission::Read); + + // Empty permission should default to Read + props.insert(PERMISSION.to_string(), "".to_string()); + assert_eq!(parse_permission(&props), VendedPermission::Read); + + // Missing permission should default to Read + let empty_props: HashMap = HashMap::new(); + assert_eq!(parse_permission(&empty_props), VendedPermission::Read); + } + + #[test] + fn test_parse_duration_millis_with_invalid_values() { + const TEST_KEY: &str = "test_duration_millis"; + + // Invalid duration should default to DEFAULT_CREDENTIAL_DURATION_MILLIS + let mut props = HashMap::new(); + props.insert(TEST_KEY.to_string(), "not_a_number".to_string()); + assert_eq!( + parse_duration_millis(&props, TEST_KEY), + DEFAULT_CREDENTIAL_DURATION_MILLIS + ); + + // Negative number (parsed as u64 fails) + props.insert(TEST_KEY.to_string(), "-1000".to_string()); + assert_eq!( + parse_duration_millis(&props, TEST_KEY), + DEFAULT_CREDENTIAL_DURATION_MILLIS + ); + + // Empty string should default + props.insert(TEST_KEY.to_string(), "".to_string()); + assert_eq!( + parse_duration_millis(&props, TEST_KEY), + DEFAULT_CREDENTIAL_DURATION_MILLIS + ); + + // Missing duration should default + let empty_props: HashMap = HashMap::new(); + assert_eq!( + parse_duration_millis(&empty_props, TEST_KEY), + DEFAULT_CREDENTIAL_DURATION_MILLIS + ); + + // Valid duration should work + props.insert(TEST_KEY.to_string(), "7200000".to_string()); + assert_eq!(parse_duration_millis(&props, TEST_KEY), 7200000); + } + + #[test] + fn test_has_credential_vendor_config() { + // enabled = true + let mut props = HashMap::new(); + props.insert(ENABLED.to_string(), "true".to_string()); + assert!(has_credential_vendor_config(&props)); + + // enabled = TRUE (case-insensitive) + props.insert(ENABLED.to_string(), "TRUE".to_string()); + assert!(has_credential_vendor_config(&props)); + + // enabled = false + props.insert(ENABLED.to_string(), "false".to_string()); + assert!(!has_credential_vendor_config(&props)); + + // enabled = invalid value + props.insert(ENABLED.to_string(), "yes".to_string()); + assert!(!has_credential_vendor_config(&props)); + + // enabled missing + let empty_props: HashMap = HashMap::new(); + assert!(!has_credential_vendor_config(&empty_props)); + } + + #[test] + fn test_vended_credentials_debug_redacts_secrets() { + let mut storage_options = HashMap::new(); + storage_options.insert( + "aws_access_key_id".to_string(), + "AKIAIOSFODNN7EXAMPLE".to_string(), + ); + storage_options.insert( + "aws_secret_access_key".to_string(), + "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(), + ); + storage_options.insert( + "aws_session_token".to_string(), + "FwoGZXIvYXdzE...".to_string(), + ); + + let creds = VendedCredentials::new(storage_options, 1234567890); + let debug_output = format!("{:?}", creds); + + // Should NOT contain actual secrets + assert!(!debug_output.contains("AKIAIOSFODNN7EXAMPLE")); + assert!(!debug_output.contains("wJalrXUtnFEMI")); + assert!(!debug_output.contains("FwoGZXIvYXdzE")); + + // Should contain redacted message + assert!(debug_output.contains("redacted")); + assert!(debug_output.contains("3 keys")); + + // Should contain expiration time + assert!(debug_output.contains("1234567890")); + } + + #[test] + fn test_vended_credentials_is_expired() { + // Create credentials that expired in the past + let past_millis = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis() as u64 + - 1000; // 1 second ago + + let expired_creds = VendedCredentials::new(HashMap::new(), past_millis); + assert!(expired_creds.is_expired()); + + // Create credentials that expire in the future + let future_millis = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis() as u64 + + 3600000; // 1 hour from now + + let valid_creds = VendedCredentials::new(HashMap::new(), future_millis); + assert!(!valid_creds.is_expired()); + } + + #[test] + fn test_redact_credential() { + // Long credential: shows first 8 and last 4 + assert_eq!(redact_credential("AKIAIOSFODNN7EXAMPLE"), "AKIAIOSF***MPLE"); + + // Exactly 16 chars: shows first 8 and last 4 + assert_eq!(redact_credential("1234567890123456"), "12345678***3456"); + + // Short credential (< 16 chars): shows only first few + assert_eq!(redact_credential("short1234567"), "short123***"); + assert_eq!(redact_credential("short123"), "short123***"); + assert_eq!(redact_credential("tiny"), "tiny***"); + assert_eq!(redact_credential("ab"), "ab***"); + assert_eq!(redact_credential("a"), "a***"); + + // Empty string + assert_eq!(redact_credential(""), "[empty]"); + + // Real-world examples + // AWS access key ID (20 chars) - shows AKIA + 4 more chars which helps identify the key + assert_eq!(redact_credential("AKIAIOSFODNN7EXAMPLE"), "AKIAIOSF***MPLE"); + + // GCP token (typically very long) + let long_token = "ya29.a0AfH6SMBx1234567890abcdefghijklmnopqrstuvwxyz"; + assert_eq!(redact_credential(long_token), "ya29.a0A***wxyz"); + + // Azure SAS token + let sas_token = "sv=2021-06-08&ss=b&srt=sco&sp=rwdlacuiytfx&se=2024-12-31"; + assert_eq!(redact_credential(sas_token), "sv=2021-***2-31"); + } +} diff --git a/rust/lance-namespace-impls/src/credentials/aws.rs b/rust/lance-namespace-impls/src/credentials/aws.rs new file mode 100644 index 00000000000..96e0e8a2a80 --- /dev/null +++ b/rust/lance-namespace-impls/src/credentials/aws.rs @@ -0,0 +1,881 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! AWS credential vending using STS AssumeRole. +//! +//! This module provides credential vending for AWS S3 storage by assuming +//! an IAM role using AWS STS (Security Token Service). + +use std::collections::HashMap; + +use async_trait::async_trait; +use aws_config::BehaviorVersion; +use aws_sdk_sts::Client as StsClient; +use lance_core::{Error, Result}; +use lance_io::object_store::uri_to_url; +use log::{debug, info}; + +use super::{ + redact_credential, CredentialVendor, VendedCredentials, VendedPermission, + DEFAULT_CREDENTIAL_DURATION_MILLIS, +}; + +/// Configuration for AWS credential vending. +#[derive(Debug, Clone)] +pub struct AwsCredentialVendorConfig { + /// The IAM role ARN to assume. + pub role_arn: String, + + /// Optional external ID for the assume role request. + pub external_id: Option, + + /// Duration for vended credentials in milliseconds. + /// Default: 3600000 (1 hour). + /// AWS STS allows 900-43200 seconds (15 min - 12 hours). + /// Values outside this range will be clamped. + pub duration_millis: u64, + + /// Optional role session name. Defaults to "lance-credential-vending". + pub role_session_name: Option, + + /// Optional AWS region for the STS client. + pub region: Option, + + /// Permission level for vended credentials. + /// Default: Read (full read access) + pub permission: VendedPermission, +} + +impl AwsCredentialVendorConfig { + /// Create a new config with the specified role ARN. + pub fn new(role_arn: impl Into) -> Self { + Self { + role_arn: role_arn.into(), + external_id: None, + duration_millis: DEFAULT_CREDENTIAL_DURATION_MILLIS, + role_session_name: None, + region: None, + permission: VendedPermission::default(), + } + } + + /// Set the external ID for the assume role request. + pub fn with_external_id(mut self, external_id: impl Into) -> Self { + self.external_id = Some(external_id.into()); + self + } + + /// Set the credential duration in milliseconds. + pub fn with_duration_millis(mut self, millis: u64) -> Self { + self.duration_millis = millis; + self + } + + /// Set the role session name. + pub fn with_role_session_name(mut self, name: impl Into) -> Self { + self.role_session_name = Some(name.into()); + self + } + + /// Set the AWS region for the STS client. + pub fn with_region(mut self, region: impl Into) -> Self { + self.region = Some(region.into()); + self + } + + /// Set the permission level for vended credentials. + pub fn with_permission(mut self, permission: VendedPermission) -> Self { + self.permission = permission; + self + } +} + +/// AWS credential vendor that uses STS AssumeRole. +#[derive(Debug)] +pub struct AwsCredentialVendor { + config: AwsCredentialVendorConfig, + sts_client: StsClient, +} + +impl AwsCredentialVendor { + /// Create a new AWS credential vendor with the specified configuration. + pub async fn new(config: AwsCredentialVendorConfig) -> Result { + let mut aws_config_loader = aws_config::defaults(BehaviorVersion::latest()); + + if let Some(ref region) = config.region { + aws_config_loader = aws_config_loader.region(aws_config::Region::new(region.clone())); + } + + let aws_config = aws_config_loader.load().await; + let sts_client = StsClient::new(&aws_config); + + Ok(Self { config, sts_client }) + } + + /// Create a new AWS credential vendor with an existing STS client. + pub fn with_sts_client(config: AwsCredentialVendorConfig, sts_client: StsClient) -> Self { + Self { config, sts_client } + } + + /// Parse an S3 URI to extract bucket and prefix. + fn parse_s3_uri(uri: &str) -> Result<(String, String)> { + let url = uri_to_url(uri)?; + + let bucket = url + .host_str() + .ok_or_else(|| Error::InvalidInput { + source: format!("S3 URI '{}' missing bucket", uri).into(), + location: snafu::location!(), + })? + .to_string(); + + let prefix = url.path().trim_start_matches('/').to_string(); + + Ok((bucket, prefix)) + } + + /// Build a scoped IAM policy for the specified location and permission level. + /// + /// Permission levels: + /// - `Read`: Full read access to all content (metadata, indices, data files) + /// - `Write`: Full read and write access (no delete) + /// - `Admin`: Full read, write, and delete access + fn build_policy(bucket: &str, prefix: &str, permission: VendedPermission) -> String { + let prefix_trimmed = prefix.trim_end_matches('/'); + let base_path = if prefix.is_empty() { + format!("arn:aws:s3:::{}/*", bucket) + } else { + format!("arn:aws:s3:::{}/{}/*", bucket, prefix_trimmed) + }; + let bucket_arn = format!("arn:aws:s3:::{}", bucket); + + let mut statements = vec![]; + + // List bucket permission (always needed) + statements.push(serde_json::json!({ + "Effect": "Allow", + "Action": "s3:ListBucket", + "Resource": bucket_arn, + "Condition": { + "StringLike": { + "s3:prefix": if prefix.is_empty() { + "*".to_string() + } else { + format!("{}/*", prefix_trimmed) + } + } + } + })); + + // Get bucket location (always needed) + statements.push(serde_json::json!({ + "Effect": "Allow", + "Action": "s3:GetBucketLocation", + "Resource": bucket_arn + })); + + // Read access (all permission levels have full read) + statements.push(serde_json::json!({ + "Effect": "Allow", + "Action": ["s3:GetObject", "s3:GetObjectVersion"], + "Resource": base_path + })); + + // Write access (Write and Admin) + if permission.can_write() { + statements.push(serde_json::json!({ + "Effect": "Allow", + "Action": "s3:PutObject", + "Resource": base_path + })); + } + + // Delete access (Admin only) + if permission.can_delete() { + statements.push(serde_json::json!({ + "Effect": "Allow", + "Action": "s3:DeleteObject", + "Resource": base_path + })); + } + + let policy = serde_json::json!({ + "Version": "2012-10-17", + "Statement": statements + }); + + policy.to_string() + } +} + +#[async_trait] +impl CredentialVendor for AwsCredentialVendor { + async fn vend_credentials(&self, table_location: &str) -> Result { + debug!( + "AWS credential vending: location={}, permission={}", + table_location, self.config.permission + ); + + let (bucket, prefix) = Self::parse_s3_uri(table_location)?; + let policy = Self::build_policy(&bucket, &prefix, self.config.permission); + + let role_session_name = self + .config + .role_session_name + .clone() + .unwrap_or_else(|| "lance-credential-vending".to_string()); + + // Cap session name to 64 chars (AWS limit) + let role_session_name = if role_session_name.len() > 64 { + role_session_name[..64].to_string() + } else { + role_session_name + }; + + // Convert millis to seconds for AWS API (rounding up to ensure at least the requested duration) + // AWS STS allows 900-43200 seconds (15 min - 12 hours), clamp to valid range + let duration_secs = self.config.duration_millis.div_ceil(1000).clamp(900, 43200) as i32; + + let mut request = self + .sts_client + .assume_role() + .role_arn(&self.config.role_arn) + .role_session_name(&role_session_name) + .policy(&policy) + .duration_seconds(duration_secs); + + if let Some(ref external_id) = self.config.external_id { + request = request.external_id(external_id); + } + + let response = request.send().await.map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to assume role '{}': {}", + self.config.role_arn, e + ))), + location: snafu::location!(), + })?; + + let credentials = response.credentials().ok_or_else(|| Error::IO { + source: Box::new(std::io::Error::other( + "AssumeRole response missing credentials", + )), + location: snafu::location!(), + })?; + + let access_key_id = credentials.access_key_id().to_string(); + let secret_access_key = credentials.secret_access_key().to_string(); + let session_token = credentials.session_token().to_string(); + + let expiration = credentials.expiration(); + let expires_at_millis = + (expiration.secs() as u64) * 1000 + (expiration.subsec_nanos() / 1_000_000) as u64; + + info!( + "AWS credentials vended: bucket={}, prefix={}, permission={}, expires_at={}, access_key_id={}", + bucket, prefix, self.config.permission, expires_at_millis, redact_credential(&access_key_id) + ); + + let mut storage_options = HashMap::new(); + storage_options.insert("aws_access_key_id".to_string(), access_key_id); + storage_options.insert("aws_secret_access_key".to_string(), secret_access_key); + storage_options.insert("aws_session_token".to_string(), session_token); + storage_options.insert( + "expires_at_millis".to_string(), + expires_at_millis.to_string(), + ); + + // Include region if configured + if let Some(ref region) = self.config.region { + storage_options.insert("aws_region".to_string(), region.clone()); + } + + Ok(VendedCredentials::new(storage_options, expires_at_millis)) + } + + fn provider_name(&self) -> &'static str { + "aws" + } + + fn permission(&self) -> VendedPermission { + self.config.permission + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_s3_uri() { + let (bucket, prefix) = AwsCredentialVendor::parse_s3_uri("s3://my-bucket/path/to/table") + .expect("should parse"); + assert_eq!(bucket, "my-bucket"); + assert_eq!(prefix, "path/to/table"); + + let (bucket, prefix) = + AwsCredentialVendor::parse_s3_uri("s3://my-bucket/").expect("should parse"); + assert_eq!(bucket, "my-bucket"); + assert_eq!(prefix, ""); + + let (bucket, prefix) = + AwsCredentialVendor::parse_s3_uri("s3://my-bucket").expect("should parse"); + assert_eq!(bucket, "my-bucket"); + assert_eq!(prefix, ""); + } + + #[test] + fn test_build_policy_read() { + let policy = + AwsCredentialVendor::build_policy("my-bucket", "path/to/table", VendedPermission::Read); + let parsed: serde_json::Value = serde_json::from_str(&policy).expect("valid json"); + + let statements = parsed["Statement"].as_array().expect("statements array"); + assert_eq!(statements.len(), 3); // ListBucket, GetBucketLocation, GetObject + + // Verify no write actions + for stmt in statements { + let actions = stmt["Action"].clone(); + let action_list: Vec = if actions.is_array() { + actions + .as_array() + .unwrap() + .iter() + .map(|a| a.as_str().unwrap().to_string()) + .collect() + } else { + vec![actions.as_str().unwrap().to_string()] + }; + assert!(!action_list.contains(&"s3:PutObject".to_string())); + assert!(!action_list.contains(&"s3:DeleteObject".to_string())); + } + } + + #[test] + fn test_build_policy_write() { + let policy = AwsCredentialVendor::build_policy( + "my-bucket", + "path/to/table", + VendedPermission::Write, + ); + let parsed: serde_json::Value = serde_json::from_str(&policy).expect("valid json"); + + let statements = parsed["Statement"].as_array().expect("statements array"); + // ListBucket, GetBucketLocation, GetObject, PutObject + assert_eq!(statements.len(), 4); + + // Verify PutObject is present + let write_stmt = statements + .iter() + .find(|s| { + let action = &s["Action"]; + action.as_str() == Some("s3:PutObject") + }) + .expect("should have PutObject statement"); + assert!(write_stmt["Effect"].as_str() == Some("Allow")); + + // Verify DeleteObject is NOT present (Write doesn't have delete) + let delete_stmt = statements.iter().find(|s| { + let action = &s["Action"]; + action.as_str() == Some("s3:DeleteObject") + }); + assert!(delete_stmt.is_none(), "Write should not have DeleteObject"); + + // Verify no Deny statements + let deny_stmt = statements + .iter() + .find(|s| s["Effect"].as_str() == Some("Deny")); + assert!(deny_stmt.is_none(), "Write should not have Deny statements"); + } + + #[test] + fn test_build_policy_admin() { + let policy = AwsCredentialVendor::build_policy( + "my-bucket", + "path/to/table", + VendedPermission::Admin, + ); + let parsed: serde_json::Value = serde_json::from_str(&policy).expect("valid json"); + + let statements = parsed["Statement"].as_array().expect("statements array"); + // ListBucket, GetBucketLocation, GetObject, PutObject, DeleteObject + assert_eq!(statements.len(), 5); + + // Verify read actions + let read_stmt = statements + .iter() + .find(|s| { + let actions = s["Action"].clone(); + if actions.is_array() { + actions + .as_array() + .unwrap() + .iter() + .any(|a| a.as_str().unwrap() == "s3:GetObject") + } else { + false + } + }) + .expect("should have read statement"); + assert!(read_stmt["Effect"].as_str() == Some("Allow")); + + // Verify PutObject + let write_stmt = statements + .iter() + .find(|s| s["Action"].as_str() == Some("s3:PutObject")) + .expect("should have PutObject statement"); + assert!(write_stmt["Effect"].as_str() == Some("Allow")); + + // Verify DeleteObject (Admin only) + let delete_stmt = statements + .iter() + .find(|s| s["Action"].as_str() == Some("s3:DeleteObject")) + .expect("should have DeleteObject statement"); + assert!(delete_stmt["Effect"].as_str() == Some("Allow")); + + // Verify no Deny statements + let deny_stmt = statements + .iter() + .find(|s| s["Effect"].as_str() == Some("Deny")); + assert!(deny_stmt.is_none(), "Admin should not have Deny statements"); + } + + #[test] + fn test_config_builder() { + let config = AwsCredentialVendorConfig::new("arn:aws:iam::123456789012:role/MyRole") + .with_external_id("my-external-id") + .with_duration_millis(7200000) + .with_role_session_name("my-session") + .with_region("us-west-2"); + + assert_eq!(config.role_arn, "arn:aws:iam::123456789012:role/MyRole"); + assert_eq!(config.external_id, Some("my-external-id".to_string())); + assert_eq!(config.duration_millis, 7200000); + assert_eq!(config.role_session_name, Some("my-session".to_string())); + assert_eq!(config.region, Some("us-west-2".to_string())); + } + + // ============================================================================ + // Integration Tests + // ============================================================================ + + /// Integration tests for AWS credential vending. + /// + /// These tests require: + /// - Valid AWS credentials (via environment, IAM role, or credential file) + /// - The `LANCE_TEST_AWS_ROLE_ARN` environment variable set to a role ARN that + /// can be assumed by the current credentials + /// - Access to the S3 bucket `jack-lancedb-devland-us-east-1` + /// + /// Run with: `cargo test --features credential-vendor-aws -- --ignored` + #[cfg(test)] + mod integration { + use super::*; + use crate::DirectoryNamespaceBuilder; + use arrow::array::{Int32Array, StringArray}; + use arrow::datatypes::{DataType, Field, Schema}; + use arrow::ipc::writer::StreamWriter; + use arrow::record_batch::RecordBatch; + use bytes::Bytes; + use lance_namespace::models::*; + use lance_namespace::LanceNamespace; + use std::sync::Arc; + + const TEST_BUCKET: &str = "jack-lancedb-devland-us-east-1"; + + /// Helper to create Arrow IPC data for testing + fn create_test_arrow_data() -> Bytes { + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ]); + + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["alice", "bob", "charlie"])), + ], + ) + .unwrap(); + + let mut buffer = Vec::new(); + { + let mut writer = StreamWriter::try_new(&mut buffer, &batch.schema()).unwrap(); + writer.write(&batch).unwrap(); + writer.finish().unwrap(); + } + + Bytes::from(buffer) + } + + /// Generate a unique test path for each test run to avoid conflicts + fn unique_test_path() -> String { + let timestamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis(); + format!("lance-test/credential-vending-{}", timestamp) + } + + /// Get the role ARN from environment variable + fn get_test_role_arn() -> Option { + std::env::var("LANCE_TEST_AWS_ROLE_ARN").ok() + } + + #[tokio::test] + #[ignore = "requires AWS credentials and LANCE_TEST_AWS_ROLE_ARN env var"] + async fn test_aws_credential_vending_basic() { + let role_arn = get_test_role_arn() + .expect("LANCE_TEST_AWS_ROLE_ARN must be set for integration tests"); + + let test_path = unique_test_path(); + let table_location = format!("s3://{}/{}/test_table", TEST_BUCKET, test_path); + + // Test Read permission + let read_config = AwsCredentialVendorConfig::new(&role_arn) + .with_duration_millis(900_000) // 15 minutes (minimum) + .with_region("us-east-1") + .with_permission(VendedPermission::Read); + + let read_vendor = AwsCredentialVendor::new(read_config) + .await + .expect("should create read vendor"); + + let read_creds = read_vendor + .vend_credentials(&table_location) + .await + .expect("should vend read credentials"); + + assert!( + read_creds.storage_options.contains_key("aws_access_key_id"), + "should have access key id" + ); + assert!( + read_creds + .storage_options + .contains_key("aws_secret_access_key"), + "should have secret access key" + ); + assert!( + read_creds.storage_options.contains_key("aws_session_token"), + "should have session token" + ); + assert!( + !read_creds.is_expired(), + "credentials should not be expired" + ); + assert_eq!( + read_vendor.permission(), + VendedPermission::Read, + "permission should be Read" + ); + + // Test Admin permission + let admin_config = AwsCredentialVendorConfig::new(&role_arn) + .with_duration_millis(900_000) + .with_region("us-east-1") + .with_permission(VendedPermission::Admin); + + let admin_vendor = AwsCredentialVendor::new(admin_config) + .await + .expect("should create admin vendor"); + + let admin_creds = admin_vendor + .vend_credentials(&table_location) + .await + .expect("should vend admin credentials"); + + assert!( + admin_creds + .storage_options + .contains_key("aws_access_key_id"), + "should have access key id" + ); + assert!( + !admin_creds.is_expired(), + "credentials should not be expired" + ); + assert_eq!( + admin_vendor.permission(), + VendedPermission::Admin, + "permission should be Admin" + ); + } + + #[tokio::test] + #[ignore = "requires AWS credentials and LANCE_TEST_AWS_ROLE_ARN env var"] + async fn test_directory_namespace_with_aws_credential_vending() { + let role_arn = get_test_role_arn() + .expect("LANCE_TEST_AWS_ROLE_ARN must be set for integration tests"); + + let test_path = unique_test_path(); + let root = format!("s3://{}/{}", TEST_BUCKET, test_path); + + // Build DirectoryNamespace with credential vending using short property names + let namespace = DirectoryNamespaceBuilder::new(&root) + .manifest_enabled(true) + .credential_vendor_property("enabled", "true") + .credential_vendor_property("aws_role_arn", &role_arn) + .credential_vendor_property("aws_duration_millis", "900000") // 15 minutes + .credential_vendor_property("aws_region", "us-east-1") + .credential_vendor_property("permission", "admin") + .build() + .await + .expect("should build namespace"); + + // Create a child namespace + let create_ns_req = CreateNamespaceRequest { + id: Some(vec!["test_ns".to_string()]), + properties: None, + mode: None, + }; + namespace + .create_namespace(create_ns_req) + .await + .expect("should create namespace"); + + // Create a table with data + let table_data = create_test_arrow_data(); + let create_table_req = CreateTableRequest { + id: Some(vec!["test_ns".to_string(), "test_table".to_string()]), + mode: Some("Create".to_string()), + }; + let create_response = namespace + .create_table(create_table_req, table_data) + .await + .expect("should create table"); + + assert!( + create_response.location.is_some(), + "should have location in response" + ); + assert_eq!(create_response.version, Some(1), "should be version 1"); + + // Describe the table (this should use vended credentials) + let describe_req = DescribeTableRequest { + id: Some(vec!["test_ns".to_string(), "test_table".to_string()]), + ..Default::default() + }; + let describe_response = namespace + .describe_table(describe_req) + .await + .expect("should describe table"); + + assert!(describe_response.location.is_some(), "should have location"); + assert!( + describe_response.storage_options.is_some(), + "should have storage_options with vended credentials" + ); + + let storage_options = describe_response.storage_options.unwrap(); + assert!( + storage_options.contains_key("aws_access_key_id"), + "should have vended aws_access_key_id" + ); + assert!( + storage_options.contains_key("aws_secret_access_key"), + "should have vended aws_secret_access_key" + ); + assert!( + storage_options.contains_key("aws_session_token"), + "should have vended aws_session_token" + ); + assert!( + storage_options.contains_key("expires_at_millis"), + "should have expires_at_millis" + ); + + // Verify expiration is in the future + let expires_at: u64 = storage_options + .get("expires_at_millis") + .unwrap() + .parse() + .expect("should parse expires_at_millis"); + let now_millis = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis() as u64; + assert!( + expires_at > now_millis, + "expiration should be in the future" + ); + + // List tables to verify the table was created + let list_req = ListTablesRequest { + id: Some(vec!["test_ns".to_string()]), + page_token: None, + limit: None, + }; + let list_response = namespace + .list_tables(list_req) + .await + .expect("should list tables"); + assert!( + list_response.tables.contains(&"test_table".to_string()), + "should contain test_table" + ); + + // Clean up: drop the table + let drop_req = DropTableRequest { + id: Some(vec!["test_ns".to_string(), "test_table".to_string()]), + }; + namespace + .drop_table(drop_req) + .await + .expect("should drop table"); + + // Clean up: drop the namespace + let mut drop_ns_req = DropNamespaceRequest::new(); + drop_ns_req.id = Some(vec!["test_ns".to_string()]); + namespace + .drop_namespace(drop_ns_req) + .await + .expect("should drop namespace"); + } + + #[tokio::test] + #[ignore = "requires AWS credentials and LANCE_TEST_AWS_ROLE_ARN env var"] + async fn test_credential_refresh_on_expiration() { + let role_arn = get_test_role_arn() + .expect("LANCE_TEST_AWS_ROLE_ARN must be set for integration tests"); + + let test_path = unique_test_path(); + let table_location = format!("s3://{}/{}/refresh_test", TEST_BUCKET, test_path); + + // Create vendor with minimum duration and Admin permission + let config = AwsCredentialVendorConfig::new(&role_arn) + .with_duration_millis(900_000) // 15 minutes + .with_region("us-east-1") + .with_permission(VendedPermission::Admin); + + let vendor = AwsCredentialVendor::new(config) + .await + .expect("should create vendor"); + + // Vend credentials multiple times to verify consistent behavior + let creds1 = vendor + .vend_credentials(&table_location) + .await + .expect("should vend credentials first time"); + + let creds2 = vendor + .vend_credentials(&table_location) + .await + .expect("should vend credentials second time"); + + // Both should be valid (not expired) + assert!(!creds1.is_expired(), "first credentials should be valid"); + assert!(!creds2.is_expired(), "second credentials should be valid"); + + // Both should have access keys (they may be different due to new STS calls) + assert!( + creds1.storage_options.contains_key("aws_access_key_id"), + "first creds should have access key" + ); + assert!( + creds2.storage_options.contains_key("aws_access_key_id"), + "second creds should have access key" + ); + } + + #[tokio::test] + #[ignore = "requires AWS credentials and LANCE_TEST_AWS_ROLE_ARN env var"] + async fn test_scoped_policy_permissions() { + let role_arn = get_test_role_arn() + .expect("LANCE_TEST_AWS_ROLE_ARN must be set for integration tests"); + + let test_path = unique_test_path(); + + // Create two different table locations + let table1_location = format!("s3://{}/{}/table1", TEST_BUCKET, test_path); + let table2_location = format!("s3://{}/{}/table2", TEST_BUCKET, test_path); + + let config = AwsCredentialVendorConfig::new(&role_arn) + .with_duration_millis(900_000) + .with_region("us-east-1") + .with_permission(VendedPermission::Admin); + + let vendor = AwsCredentialVendor::new(config) + .await + .expect("should create vendor"); + + // Vend credentials for table1 + let creds1 = vendor + .vend_credentials(&table1_location) + .await + .expect("should vend credentials for table1"); + + // Vend credentials for table2 + let creds2 = vendor + .vend_credentials(&table2_location) + .await + .expect("should vend credentials for table2"); + + // Both should be valid + assert!(!creds1.is_expired(), "table1 credentials should be valid"); + assert!(!creds2.is_expired(), "table2 credentials should be valid"); + + // The credentials are scoped to their respective paths via IAM policy + // (the policy restricts access to specific S3 paths) + } + + #[tokio::test] + #[ignore = "requires AWS credentials and LANCE_TEST_AWS_ROLE_ARN env var"] + async fn test_from_properties_builder() { + let role_arn = get_test_role_arn() + .expect("LANCE_TEST_AWS_ROLE_ARN must be set for integration tests"); + + let test_path = unique_test_path(); + let root = format!("s3://{}/{}", TEST_BUCKET, test_path); + + // Build namespace using from_properties (simulating config from external source) + // Properties use the "credential_vendor." prefix which gets stripped + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root.clone()); + properties.insert("manifest_enabled".to_string(), "true".to_string()); + properties.insert("credential_vendor.enabled".to_string(), "true".to_string()); + properties.insert( + "credential_vendor.aws_role_arn".to_string(), + role_arn.clone(), + ); + properties.insert( + "credential_vendor.aws_duration_millis".to_string(), + "900000".to_string(), + ); + properties.insert( + "credential_vendor.aws_region".to_string(), + "us-east-1".to_string(), + ); + properties.insert( + "credential_vendor.permission".to_string(), + "admin".to_string(), + ); + + let namespace = DirectoryNamespaceBuilder::from_properties(properties, None) + .expect("should parse properties") + .build() + .await + .expect("should build namespace"); + + // Verify namespace works + let create_ns_req = CreateNamespaceRequest { + id: Some(vec!["props_test".to_string()]), + properties: None, + mode: None, + }; + namespace + .create_namespace(create_ns_req) + .await + .expect("should create namespace"); + + // Clean up + let mut drop_ns_req = DropNamespaceRequest::new(); + drop_ns_req.id = Some(vec!["props_test".to_string()]); + namespace + .drop_namespace(drop_ns_req) + .await + .expect("should drop namespace"); + } + } +} diff --git a/rust/lance-namespace-impls/src/credentials/azure.rs b/rust/lance-namespace-impls/src/credentials/azure.rs new file mode 100644 index 00000000000..1d4e4ded081 --- /dev/null +++ b/rust/lance-namespace-impls/src/credentials/azure.rs @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Azure credential vending using SAS tokens. +//! +//! This module provides credential vending for Azure Blob Storage by generating +//! SAS (Shared Access Signature) tokens with user delegation keys. + +use std::collections::HashMap; +use std::sync::Arc; + +use async_trait::async_trait; +use azure_core::auth::TokenCredential; +use azure_identity::DefaultAzureCredential; +use azure_storage::prelude::*; +use azure_storage_blobs::prelude::*; +use lance_core::{Error, Result}; +use lance_io::object_store::uri_to_url; +use log::{debug, info, warn}; + +use super::{ + redact_credential, CredentialVendor, VendedCredentials, VendedPermission, + DEFAULT_CREDENTIAL_DURATION_MILLIS, +}; + +/// Configuration for Azure credential vending. +#[derive(Debug, Clone)] +pub struct AzureCredentialVendorConfig { + /// Optional tenant ID for authentication. + pub tenant_id: Option, + + /// Storage account name. Required for credential vending. + pub account_name: Option, + + /// Duration for vended credentials in milliseconds. + /// Default: 3600000 (1 hour). Azure allows up to 7 days for SAS tokens. + pub duration_millis: u64, + + /// Permission level for vended credentials. + /// Default: Read (full read access) + pub permission: VendedPermission, +} + +impl Default for AzureCredentialVendorConfig { + fn default() -> Self { + Self { + tenant_id: None, + account_name: None, + duration_millis: DEFAULT_CREDENTIAL_DURATION_MILLIS, + permission: VendedPermission::default(), + } + } +} + +impl AzureCredentialVendorConfig { + /// Create a new default config. + pub fn new() -> Self { + Self::default() + } + + /// Set the tenant ID. + pub fn with_tenant_id(mut self, tenant_id: impl Into) -> Self { + self.tenant_id = Some(tenant_id.into()); + self + } + + /// Set the storage account name. + pub fn with_account_name(mut self, account_name: impl Into) -> Self { + self.account_name = Some(account_name.into()); + self + } + + /// Set the credential duration in milliseconds. + pub fn with_duration_millis(mut self, millis: u64) -> Self { + self.duration_millis = millis; + self + } + + /// Set the permission level for vended credentials. + pub fn with_permission(mut self, permission: VendedPermission) -> Self { + self.permission = permission; + self + } +} + +/// Azure credential vendor that generates SAS tokens. +#[derive(Debug)] +pub struct AzureCredentialVendor { + config: AzureCredentialVendorConfig, +} + +impl AzureCredentialVendor { + /// Create a new Azure credential vendor with the specified configuration. + pub fn new(config: AzureCredentialVendorConfig) -> Self { + Self { config } + } + + /// Build SAS permissions based on the VendedPermission level. + /// + /// - Read: read + list + /// - Write: read + list + write + add + create + /// - Admin: read + list + write + add + create + delete + #[allow(clippy::field_reassign_with_default)] + fn build_sas_permissions(permission: VendedPermission) -> BlobSasPermissions { + let mut p = BlobSasPermissions::default(); + + // All permission levels have read access + p.read = true; + p.list = true; + + // Write and Admin have write access + if permission.can_write() { + p.write = true; + p.add = true; + p.create = true; + } + + // Admin has delete access + if permission.can_delete() { + p.delete = true; + } + + p + } + + /// Generate a SAS token for the specified container. + async fn generate_sas_token(&self, account: &str, container: &str) -> Result<(String, u64)> { + let credential = + DefaultAzureCredential::create(azure_identity::TokenCredentialOptions::default()) + .map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to create Azure credentials: {}", + e + ))), + location: snafu::location!(), + })?; + + let credential: Arc = Arc::new(credential); + + let blob_service_client = BlobServiceClient::new(account, credential.clone()); + + // Calculate times using time crate (which Azure SDK uses) + let now = time::OffsetDateTime::now_utc(); + let duration_millis = self.config.duration_millis as i64; + let end_time = now + time::Duration::milliseconds(duration_millis); + + // Azure limits user delegation key to 7 days + let max_key_end = now + time::Duration::days(7) - time::Duration::seconds(60); + let key_end_time = if end_time > max_key_end { + max_key_end + } else { + end_time + }; + + // Get user delegation key (note: typo in the library method name) + let user_delegation_key = blob_service_client + .get_user_deligation_key(now, key_end_time) + .await + .map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to get user delegation key for account '{}': {}", + account, e + ))), + location: snafu::location!(), + })?; + + let permissions = Self::build_sas_permissions(self.config.permission); + + // Generate SAS token for the container + let container_client = blob_service_client.container_client(container); + + let sas_token = container_client + .user_delegation_shared_access_signature( + permissions, + &user_delegation_key.user_deligation_key, + ) + .await + .map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to generate SAS token for container '{}': {}", + container, e + ))), + location: snafu::location!(), + })?; + + let expires_at_millis = + (end_time.unix_timestamp() * 1000 + end_time.millisecond() as i64) as u64; + + let token = sas_token.token().map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to get SAS token: {}", + e + ))), + location: snafu::location!(), + })?; + + Ok((token, expires_at_millis)) + } +} + +#[async_trait] +impl CredentialVendor for AzureCredentialVendor { + async fn vend_credentials(&self, table_location: &str) -> Result { + debug!( + "Azure credential vending: location={}, permission={}", + table_location, self.config.permission + ); + + let url = uri_to_url(table_location)?; + + let container = url.host_str().ok_or_else(|| Error::InvalidInput { + source: format!("Azure URI '{}' missing container", table_location).into(), + location: snafu::location!(), + })?; + + // Check if path extends beyond container level + let path = url.path().trim_start_matches('/'); + if !path.is_empty() { + warn!( + "Azure SAS tokens are scoped to container level only. \ + Credentials for '{}' will have access to entire container '{}', not just path '{}'", + table_location, container, path + ); + } + + let account = + self.config + .account_name + .as_ref() + .ok_or_else(|| Error::InvalidInput { + source: "Azure credential vending requires 'credential_vendor.azure_account_name' to be set in configuration".into(), + location: snafu::location!(), + })?; + + let (sas_token, expires_at_millis) = self.generate_sas_token(account, container).await?; + + let mut storage_options = HashMap::new(); + // Use the standard key that object_store/lance-io expects + storage_options.insert("azure_storage_sas_token".to_string(), sas_token.clone()); + storage_options.insert("azure_storage_account_name".to_string(), account.clone()); + storage_options.insert( + "expires_at_millis".to_string(), + expires_at_millis.to_string(), + ); + + info!( + "Azure credentials vended: account={}, container={}, permission={}, expires_at={}, sas_token={}", + account, container, self.config.permission, expires_at_millis, redact_credential(&sas_token) + ); + + Ok(VendedCredentials::new(storage_options, expires_at_millis)) + } + + fn provider_name(&self) -> &'static str { + "azure" + } + + fn permission(&self) -> VendedPermission { + self.config.permission + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_config_builder() { + let config = AzureCredentialVendorConfig::new() + .with_tenant_id("my-tenant-id") + .with_account_name("myaccount") + .with_duration_millis(7200000); + + assert_eq!(config.tenant_id, Some("my-tenant-id".to_string())); + assert_eq!(config.account_name, Some("myaccount".to_string())); + assert_eq!(config.duration_millis, 7200000); + } + + #[test] + fn test_build_sas_permissions_read() { + let permissions = AzureCredentialVendor::build_sas_permissions(VendedPermission::Read); + + assert!(permissions.read, "Read permission should have read=true"); + assert!(permissions.list, "Read permission should have list=true"); + assert!( + !permissions.write, + "Read permission should have write=false" + ); + assert!(!permissions.add, "Read permission should have add=false"); + assert!( + !permissions.create, + "Read permission should have create=false" + ); + assert!( + !permissions.delete, + "Read permission should have delete=false" + ); + } + + #[test] + fn test_build_sas_permissions_write() { + let permissions = AzureCredentialVendor::build_sas_permissions(VendedPermission::Write); + + assert!(permissions.read, "Write permission should have read=true"); + assert!(permissions.list, "Write permission should have list=true"); + assert!(permissions.write, "Write permission should have write=true"); + assert!(permissions.add, "Write permission should have add=true"); + assert!( + permissions.create, + "Write permission should have create=true" + ); + assert!( + !permissions.delete, + "Write permission should have delete=false" + ); + } + + #[test] + fn test_build_sas_permissions_admin() { + let permissions = AzureCredentialVendor::build_sas_permissions(VendedPermission::Admin); + + assert!(permissions.read, "Admin permission should have read=true"); + assert!(permissions.list, "Admin permission should have list=true"); + assert!(permissions.write, "Admin permission should have write=true"); + assert!(permissions.add, "Admin permission should have add=true"); + assert!( + permissions.create, + "Admin permission should have create=true" + ); + assert!( + permissions.delete, + "Admin permission should have delete=true" + ); + } +} diff --git a/rust/lance-namespace-impls/src/credentials/gcp.rs b/rust/lance-namespace-impls/src/credentials/gcp.rs new file mode 100644 index 00000000000..ce4bac40fa1 --- /dev/null +++ b/rust/lance-namespace-impls/src/credentials/gcp.rs @@ -0,0 +1,637 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! GCP credential vending using downscoped OAuth2 tokens. +//! +//! This module provides credential vending for GCP Cloud Storage by obtaining +//! OAuth2 access tokens and downscoping them using Credential Access Boundaries (CAB). +//! +//! ## Authentication +//! +//! This module uses [Application Default Credentials (ADC)][adc] for authentication. +//! ADC automatically finds credentials based on the environment: +//! +//! 1. **`GOOGLE_APPLICATION_CREDENTIALS` environment variable**: Set this to the path +//! of a service account key file (JSON format) before starting the application. +//! 2. **Well-known file locations**: `~/.config/gcloud/application_default_credentials.json` +//! on Linux/macOS, or the equivalent on Windows. +//! 3. **Metadata server**: When running on GCP (Compute Engine, Cloud Run, GKE, etc.), +//! credentials are automatically obtained from the metadata server. +//! +//! For production deployments on GCP, using the metadata server (option 3) is recommended +//! as it doesn't require managing key files. +//! +//! [adc]: https://cloud.google.com/docs/authentication/application-default-credentials +//! +//! ## Service Account Impersonation +//! +//! For multi-tenant scenarios, you can configure `service_account` to impersonate a +//! different service account. The base credentials (from ADC) must have the +//! `roles/iam.serviceAccountTokenCreator` role on the target service account. +//! +//! ## Permission Scoping +//! +//! Permissions are enforced using GCP's Credential Access Boundaries: +//! - **Read**: `roles/storage.legacyObjectReader` + `roles/storage.objectViewer` (read and list) +//! - **Write**: Read permissions + `roles/storage.legacyBucketWriter` + `roles/storage.objectCreator` +//! - **Admin**: Write permissions + `roles/storage.objectAdmin` (includes delete) +//! +//! The downscoped token is restricted to the specific bucket and path prefix. +//! +//! Note: Legacy roles are used because modern roles like `storage.objectCreator` lack +//! `storage.buckets.get` which many client libraries require. + +use std::collections::HashMap; + +use async_trait::async_trait; +use google_cloud_auth::credentials; +use lance_core::{Error, Result}; +use lance_io::object_store::uri_to_url; +use log::{debug, info}; +use reqwest::Client; +use serde::{Deserialize, Serialize}; + +use super::{redact_credential, CredentialVendor, VendedCredentials, VendedPermission}; + +/// GCP STS token exchange endpoint for downscoping credentials. +const STS_TOKEN_EXCHANGE_URL: &str = "https://sts.googleapis.com/v1/token"; + +/// Configuration for GCP credential vending. +#[derive(Debug, Clone, Default)] +pub struct GcpCredentialVendorConfig { + /// Optional service account to impersonate. + /// + /// When set, the vendor will impersonate this service account using the + /// IAM Credentials API's generateAccessToken endpoint before downscoping. + /// This is useful for multi-tenant scenarios where you want to issue tokens + /// on behalf of different service accounts. + /// + /// The base credentials (from ADC) must have the `roles/iam.serviceAccountTokenCreator` + /// role on this service account. + /// + /// Format: `my-sa@project.iam.gserviceaccount.com` + pub service_account: Option, + + /// Permission level for vended credentials. + /// Default: Read + /// Permissions are enforced via Credential Access Boundaries (CAB). + /// + /// Note: GCP token duration cannot be configured; the token lifetime + /// is determined by the STS endpoint (typically 1 hour). + pub permission: VendedPermission, +} + +impl GcpCredentialVendorConfig { + /// Create a new default config. + pub fn new() -> Self { + Self::default() + } + + /// Set the service account to impersonate. + /// + /// When set, the vendor uses the IAM Credentials API to generate an access + /// token for this service account, then downscopes it with CAB. + /// + /// The base credentials (from ADC) must have the `roles/iam.serviceAccountTokenCreator` + /// role on this service account. + pub fn with_service_account(mut self, service_account: impl Into) -> Self { + self.service_account = Some(service_account.into()); + self + } + + /// Set the permission level for vended credentials. + pub fn with_permission(mut self, permission: VendedPermission) -> Self { + self.permission = permission; + self + } +} + +/// Access boundary rule for a single resource. +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct AccessBoundaryRule { + available_resource: String, + available_permissions: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + availability_condition: Option, +} + +/// Condition for access boundary rule. +#[derive(Debug, Clone, Serialize)] +struct AvailabilityCondition { + expression: String, +} + +/// Credential Access Boundary structure. +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct CredentialAccessBoundary { + access_boundary: AccessBoundaryInner, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct AccessBoundaryInner { + access_boundary_rules: Vec, +} + +/// Response from STS token exchange. +#[derive(Debug, Deserialize)] +struct TokenExchangeResponse { + access_token: String, + #[serde(default)] + expires_in: Option, +} + +/// Response from IAM generateAccessToken API. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct GenerateAccessTokenResponse { + access_token: String, + #[allow(dead_code)] + expire_time: String, +} + +/// GCP credential vendor that provides downscoped OAuth2 tokens. +pub struct GcpCredentialVendor { + config: GcpCredentialVendorConfig, + http_client: Client, + credential: credentials::Credential, +} + +impl std::fmt::Debug for GcpCredentialVendor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("GcpCredentialVendor") + .field("config", &self.config) + .field("credential", &"[credential]") + .finish() + } +} + +impl GcpCredentialVendor { + /// Create a new GCP credential vendor with the specified configuration. + /// + /// Uses [Application Default Credentials (ADC)][adc] for authentication. + /// To use a service account key file, set the `GOOGLE_APPLICATION_CREDENTIALS` + /// environment variable to the file path before starting the application. + /// + /// [adc]: https://cloud.google.com/docs/authentication/application-default-credentials + pub async fn new(config: GcpCredentialVendorConfig) -> Result { + let credential = credentials::create_access_token_credential() + .await + .map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to create GCP credentials: {}", + e + ))), + location: snafu::location!(), + })?; + + Ok(Self { + config, + http_client: Client::new(), + credential, + }) + } + + /// Parse a GCS URI to extract bucket and prefix. + fn parse_gcs_uri(uri: &str) -> Result<(String, String)> { + let url = uri_to_url(uri)?; + + if url.scheme() != "gs" { + return Err(Error::InvalidInput { + source: format!( + "Unsupported GCS URI scheme '{}', expected 'gs'", + url.scheme() + ) + .into(), + location: snafu::location!(), + }); + } + + let bucket = url + .host_str() + .ok_or_else(|| Error::InvalidInput { + source: format!("GCS URI '{}' missing bucket", uri).into(), + location: snafu::location!(), + })? + .to_string(); + + let prefix = url.path().trim_start_matches('/').to_string(); + + Ok((bucket, prefix)) + } + + /// Get a source token for downscoping. + /// + /// If service_account is configured, impersonates that service account + /// using the IAM Credentials API. Otherwise, uses the configured credential + /// directly. + async fn get_source_token(&self) -> Result { + let base_token = self.credential.get_token().await.map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to get GCP token: {}", + e + ))), + location: snafu::location!(), + })?; + + // If service account impersonation is configured, use generateAccessToken API + if let Some(ref service_account) = self.config.service_account { + return self + .impersonate_service_account(&base_token.token, service_account) + .await; + } + + Ok(base_token.token) + } + + /// Impersonate a service account using the IAM Credentials API. + /// + /// Uses the base token to call generateAccessToken for the target service account. + async fn impersonate_service_account( + &self, + base_token: &str, + service_account: &str, + ) -> Result { + let url = format!( + "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/{}:generateAccessToken", + service_account + ); + + // Request body with cloud-platform scope (required for GCS access) + let body = serde_json::json!({ + "scope": ["https://www.googleapis.com/auth/cloud-platform"] + }); + + let response = self + .http_client + .post(&url) + .bearer_auth(base_token) + .json(&body) + .send() + .await + .map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call IAM generateAccessToken: {}", + e + ))), + location: snafu::location!(), + })?; + + if !response.status().is_success() { + let status = response.status(); + let body = response + .text() + .await + .unwrap_or_else(|_| "unknown error".to_string()); + return Err(Error::IO { + source: Box::new(std::io::Error::other(format!( + "IAM generateAccessToken failed for '{}' with status {}: {}", + service_account, status, body + ))), + location: snafu::location!(), + }); + } + + let token_response: GenerateAccessTokenResponse = + response.json().await.map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to parse generateAccessToken response: {}", + e + ))), + location: snafu::location!(), + })?; + + Ok(token_response.access_token) + } + + /// Build Credential Access Boundary for the specified bucket/prefix and permission. + fn build_access_boundary( + bucket: &str, + prefix: &str, + permission: VendedPermission, + ) -> CredentialAccessBoundary { + let bucket_resource = format!("//storage.googleapis.com/projects/_/buckets/{}", bucket); + + let mut rules = vec![]; + + // Build condition expression for path restriction + let condition = if prefix.is_empty() { + None + } else { + let prefix_trimmed = prefix.trim_end_matches('/'); + // CEL expression to restrict access to the specific path prefix. + // We append '/' to ensure exact prefix matching - without it, prefix "data" + // would incorrectly match "data-other/file.txt". + // + // For object access: resource.name must start with "prefix/" + // For list operations: listPrefix must equal "prefix" OR start with "prefix/" + let list_prefix_attr = + "api.getAttribute('storage.googleapis.com/objectListPrefix', '')"; + let expr = format!( + "resource.name.startsWith('projects/_/buckets/{}/objects/{}/') || \ + {list_attr} == '{prefix}' || {list_attr}.startsWith('{prefix}/')", + bucket, + prefix_trimmed, + list_attr = list_prefix_attr, + prefix = prefix_trimmed + ); + Some(AvailabilityCondition { expression: expr }) + }; + + // Read permissions: legacyObjectReader for read + objectViewer for list + // Using legacy roles because modern roles lack storage.buckets.get + rules.push(AccessBoundaryRule { + available_resource: bucket_resource.clone(), + available_permissions: vec![ + "inRole:roles/storage.legacyObjectReader".to_string(), + "inRole:roles/storage.objectViewer".to_string(), + ], + availability_condition: condition.clone(), + }); + + // Write permission: legacyBucketWriter + objectCreator for create/update + if permission.can_write() { + rules.push(AccessBoundaryRule { + available_resource: bucket_resource.clone(), + available_permissions: vec![ + "inRole:roles/storage.legacyBucketWriter".to_string(), + "inRole:roles/storage.objectCreator".to_string(), + ], + availability_condition: condition.clone(), + }); + } + + // Admin permission: objectAdmin for delete + if permission.can_delete() { + rules.push(AccessBoundaryRule { + available_resource: bucket_resource, + available_permissions: vec!["inRole:roles/storage.objectAdmin".to_string()], + availability_condition: condition, + }); + } + + CredentialAccessBoundary { + access_boundary: AccessBoundaryInner { + access_boundary_rules: rules, + }, + } + } + + /// Exchange source token for a downscoped token using STS. + async fn downscope_token( + &self, + source_token: &str, + access_boundary: &CredentialAccessBoundary, + ) -> Result<(String, u64)> { + let options_json = serde_json::to_string(access_boundary).map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize access boundary: {}", + e + ))), + location: snafu::location!(), + })?; + + let params = [ + ( + "grant_type", + "urn:ietf:params:oauth:grant-type:token-exchange", + ), + ( + "subject_token_type", + "urn:ietf:params:oauth:token-type:access_token", + ), + ( + "requested_token_type", + "urn:ietf:params:oauth:token-type:access_token", + ), + ("subject_token", source_token), + ("options", &options_json), + ]; + + let response = self + .http_client + .post(STS_TOKEN_EXCHANGE_URL) + .form(¶ms) + .send() + .await + .map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call STS token exchange: {}", + e + ))), + location: snafu::location!(), + })?; + + if !response.status().is_success() { + let status = response.status(); + let body = response + .text() + .await + .unwrap_or_else(|_| "unknown error".to_string()); + return Err(Error::IO { + source: Box::new(std::io::Error::other(format!( + "STS token exchange failed with status {}: {}", + status, body + ))), + location: snafu::location!(), + }); + } + + let token_response: TokenExchangeResponse = + response.json().await.map_err(|e| Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to parse STS response: {}", + e + ))), + location: snafu::location!(), + })?; + + // Calculate expiration time + // Use expires_in from response if available, otherwise default to 1 hour + let expires_in_secs = token_response.expires_in.unwrap_or(3600); + let expires_at_millis = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("time went backwards") + .as_millis() as u64 + + expires_in_secs * 1000; + + Ok((token_response.access_token, expires_at_millis)) + } +} + +#[async_trait] +impl CredentialVendor for GcpCredentialVendor { + async fn vend_credentials(&self, table_location: &str) -> Result { + debug!( + "GCP credential vending: location={}, permission={}", + table_location, self.config.permission + ); + + let (bucket, prefix) = Self::parse_gcs_uri(table_location)?; + + // Get source token from default credentials + let source_token = self.get_source_token().await?; + + // Build access boundary for this location and permission + let access_boundary = Self::build_access_boundary(&bucket, &prefix, self.config.permission); + + // Exchange for downscoped token + let (downscoped_token, expires_at_millis) = self + .downscope_token(&source_token, &access_boundary) + .await?; + + let mut storage_options = HashMap::new(); + storage_options.insert("google_storage_token".to_string(), downscoped_token.clone()); + storage_options.insert( + "expires_at_millis".to_string(), + expires_at_millis.to_string(), + ); + + info!( + "GCP credentials vended: bucket={}, prefix={}, permission={}, expires_at={}, token={}", + bucket, + prefix, + self.config.permission, + expires_at_millis, + redact_credential(&downscoped_token) + ); + + Ok(VendedCredentials::new(storage_options, expires_at_millis)) + } + + fn provider_name(&self) -> &'static str { + "gcp" + } + + fn permission(&self) -> VendedPermission { + self.config.permission + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_gcs_uri() { + let (bucket, prefix) = GcpCredentialVendor::parse_gcs_uri("gs://my-bucket/path/to/table") + .expect("should parse"); + assert_eq!(bucket, "my-bucket"); + assert_eq!(prefix, "path/to/table"); + + let (bucket, prefix) = + GcpCredentialVendor::parse_gcs_uri("gs://my-bucket/").expect("should parse"); + assert_eq!(bucket, "my-bucket"); + assert_eq!(prefix, ""); + + let (bucket, prefix) = + GcpCredentialVendor::parse_gcs_uri("gs://my-bucket").expect("should parse"); + assert_eq!(bucket, "my-bucket"); + assert_eq!(prefix, ""); + } + + #[test] + fn test_parse_gcs_uri_invalid() { + // Wrong scheme - should fail + let result = GcpCredentialVendor::parse_gcs_uri("s3://bucket/path"); + assert!(result.is_err()); + + // Missing bucket + let result = GcpCredentialVendor::parse_gcs_uri("gs:///path"); + assert!(result.is_err()); + + // Invalid URI format + let result = GcpCredentialVendor::parse_gcs_uri("not-a-uri"); + assert!(result.is_err()); + + // Empty string + let result = GcpCredentialVendor::parse_gcs_uri(""); + assert!(result.is_err()); + } + + #[test] + fn test_config_builder() { + let config = GcpCredentialVendorConfig::new() + .with_service_account("my-sa@project.iam.gserviceaccount.com") + .with_permission(VendedPermission::Write); + + assert_eq!( + config.service_account, + Some("my-sa@project.iam.gserviceaccount.com".to_string()) + ); + assert_eq!(config.permission, VendedPermission::Write); + } + + #[test] + fn test_build_access_boundary_read() { + let boundary = GcpCredentialVendor::build_access_boundary( + "my-bucket", + "path/to/data", + VendedPermission::Read, + ); + + let rules = &boundary.access_boundary.access_boundary_rules; + assert_eq!(rules.len(), 1, "Read should have 1 rule"); + + let permissions = &rules[0].available_permissions; + assert!(permissions.contains(&"inRole:roles/storage.legacyObjectReader".to_string())); + assert!(permissions.contains(&"inRole:roles/storage.objectViewer".to_string())); + assert!(rules[0].availability_condition.is_some()); + } + + #[test] + fn test_build_access_boundary_write() { + let boundary = GcpCredentialVendor::build_access_boundary( + "my-bucket", + "path/to/data", + VendedPermission::Write, + ); + + let rules = &boundary.access_boundary.access_boundary_rules; + assert_eq!(rules.len(), 2, "Write should have 2 rules"); + + let permissions: Vec<_> = rules + .iter() + .flat_map(|r| r.available_permissions.iter()) + .collect(); + assert!(permissions.contains(&&"inRole:roles/storage.legacyObjectReader".to_string())); + assert!(permissions.contains(&&"inRole:roles/storage.objectViewer".to_string())); + assert!(permissions.contains(&&"inRole:roles/storage.legacyBucketWriter".to_string())); + assert!(permissions.contains(&&"inRole:roles/storage.objectCreator".to_string())); + } + + #[test] + fn test_build_access_boundary_admin() { + let boundary = GcpCredentialVendor::build_access_boundary( + "my-bucket", + "path/to/data", + VendedPermission::Admin, + ); + + let rules = &boundary.access_boundary.access_boundary_rules; + assert_eq!(rules.len(), 3, "Admin should have 3 rules"); + + let permissions: Vec<_> = rules + .iter() + .flat_map(|r| r.available_permissions.iter()) + .collect(); + assert!(permissions.contains(&&"inRole:roles/storage.legacyObjectReader".to_string())); + assert!(permissions.contains(&&"inRole:roles/storage.objectViewer".to_string())); + assert!(permissions.contains(&&"inRole:roles/storage.legacyBucketWriter".to_string())); + assert!(permissions.contains(&&"inRole:roles/storage.objectCreator".to_string())); + assert!(permissions.contains(&&"inRole:roles/storage.objectAdmin".to_string())); + } + + #[test] + fn test_build_access_boundary_no_prefix() { + let boundary = + GcpCredentialVendor::build_access_boundary("my-bucket", "", VendedPermission::Read); + + let rules = &boundary.access_boundary.access_boundary_rules; + assert_eq!(rules.len(), 1); + // No condition when prefix is empty (full bucket access) + assert!(rules[0].availability_condition.is_none()); + } +} diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index fdb4370f6ab..91714d73d90 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -33,6 +33,10 @@ use lance_core::{box_error, Error, Result}; use lance_namespace::schema::arrow_schema_to_json; use lance_namespace::LanceNamespace; +use crate::credentials::{ + create_credential_vendor_for_location, has_credential_vendor_config, CredentialVendor, +}; + /// Builder for creating a DirectoryNamespace. /// /// This builder provides a fluent API for configuring and establishing @@ -75,6 +79,7 @@ pub struct DirectoryNamespaceBuilder { manifest_enabled: bool, dir_listing_enabled: bool, inline_optimization_enabled: bool, + credential_vendor_properties: HashMap, } impl DirectoryNamespaceBuilder { @@ -91,6 +96,7 @@ impl DirectoryNamespaceBuilder { manifest_enabled: true, dir_listing_enabled: true, // Default to enabled for backwards compatibility inline_optimization_enabled: true, + credential_vendor_properties: HashMap::new(), } } @@ -132,6 +138,29 @@ impl DirectoryNamespaceBuilder { /// - `inline_optimization_enabled`: Enable inline optimization of __manifest table (optional, default: true) /// - `storage.*`: Storage options (optional, prefix will be stripped) /// + /// Credential vendor properties (prefixed with `credential_vendor.`, prefix is stripped): + /// - `credential_vendor.enabled`: Set to "true" to enable credential vending (required) + /// - `credential_vendor.permission`: Permission level: read, write, or admin (default: read) + /// + /// AWS-specific properties (for s3:// locations): + /// - `credential_vendor.aws_role_arn`: AWS IAM role ARN (required for AWS) + /// - `credential_vendor.aws_external_id`: AWS external ID (optional) + /// - `credential_vendor.aws_region`: AWS region (optional) + /// - `credential_vendor.aws_role_session_name`: AWS role session name (optional) + /// - `credential_vendor.aws_duration_millis`: Credential duration in ms (default: 3600000, range: 15min-12hrs) + /// + /// GCP-specific properties (for gs:// locations): + /// - `credential_vendor.gcp_service_account`: Service account to impersonate (optional) + /// + /// Note: GCP uses Application Default Credentials (ADC). To use a service account key file, + /// set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable before starting. + /// GCP token duration cannot be configured; it's determined by the STS endpoint (typically 1 hour). + /// + /// Azure-specific properties (for az:// locations): + /// - `credential_vendor.azure_account_name`: Azure storage account name (required for Azure) + /// - `credential_vendor.azure_tenant_id`: Azure tenant ID (optional) + /// - `credential_vendor.azure_duration_millis`: Credential duration in ms (default: 3600000, up to 7 days) + /// /// # Arguments /// /// * `properties` - Configuration properties @@ -209,6 +238,17 @@ impl DirectoryNamespaceBuilder { .and_then(|v| v.parse::().ok()) .unwrap_or(true); + // Extract credential vendor properties (properties prefixed with "credential_vendor.") + // The prefix is stripped to get short property names + // The build() method will check if enabled=true before creating the vendor + let credential_vendor_properties: HashMap = properties + .iter() + .filter_map(|(k, v)| { + k.strip_prefix("credential_vendor.") + .map(|key| (key.to_string(), v.clone())) + }) + .collect(); + Ok(Self { root: root.trim_end_matches('/').to_string(), storage_options, @@ -216,6 +256,7 @@ impl DirectoryNamespaceBuilder { manifest_enabled, dir_listing_enabled, inline_optimization_enabled, + credential_vendor_properties, }) } @@ -258,6 +299,55 @@ impl DirectoryNamespaceBuilder { self } + /// Add a credential vendor property. + /// + /// Use short property names without the `credential_vendor.` prefix. + /// Common properties: `enabled`, `permission`. + /// AWS properties: `aws_role_arn`, `aws_external_id`, `aws_region`, `aws_role_session_name`, `aws_duration_millis`. + /// GCP properties: `gcp_service_account`. + /// Azure properties: `azure_account_name`, `azure_tenant_id`, `azure_duration_millis`. + /// + /// # Arguments + /// + /// * `key` - Property key (e.g., "enabled", "aws_role_arn") + /// * `value` - Property value + /// + /// # Example + /// + /// ```no_run + /// # use lance_namespace_impls::DirectoryNamespaceBuilder; + /// # async fn example() -> Result<(), Box> { + /// let namespace = DirectoryNamespaceBuilder::new("s3://my-bucket/data") + /// .credential_vendor_property("enabled", "true") + /// .credential_vendor_property("aws_role_arn", "arn:aws:iam::123456789012:role/MyRole") + /// .credential_vendor_property("permission", "read") + /// .build() + /// .await?; + /// # Ok(()) + /// # } + /// ``` + pub fn credential_vendor_property( + mut self, + key: impl Into, + value: impl Into, + ) -> Self { + self.credential_vendor_properties + .insert(key.into(), value.into()); + self + } + + /// Add multiple credential vendor properties. + /// + /// Use short property names without the `credential_vendor.` prefix. + /// + /// # Arguments + /// + /// * `properties` - HashMap of credential vendor properties to add + pub fn credential_vendor_properties(mut self, properties: HashMap) -> Self { + self.credential_vendor_properties.extend(properties); + self + } + /// Build the DirectoryNamespace. /// /// # Returns @@ -300,6 +390,16 @@ impl DirectoryNamespaceBuilder { None }; + // Create credential vendor once during initialization if enabled + let credential_vendor = if has_credential_vendor_config(&self.credential_vendor_properties) + { + create_credential_vendor_for_location(&self.root, &self.credential_vendor_properties) + .await? + .map(Arc::from) + } else { + None + }; + Ok(DirectoryNamespace { root: self.root, storage_options: self.storage_options, @@ -308,6 +408,7 @@ impl DirectoryNamespaceBuilder { base_path, manifest_ns, dir_listing_enabled: self.dir_listing_enabled, + credential_vendor, }) } @@ -357,6 +458,14 @@ impl DirectoryNamespaceBuilder { /// /// When `dir_listing_enabled=true`, the namespace falls back to directory scanning for tables not /// found in the manifest, enabling gradual migration. +/// +/// ## Credential Vending +/// +/// When credential vendor properties are configured, `describe_table` will vend temporary +/// credentials based on the table location URI. The vendor type is auto-selected: +/// - `s3://` locations use AWS STS AssumeRole +/// - `gs://` locations use GCP OAuth2 tokens +/// - `az://` locations use Azure SAS tokens pub struct DirectoryNamespace { root: String, storage_options: Option>, @@ -366,6 +475,9 @@ pub struct DirectoryNamespace { base_path: Path, manifest_ns: Option>, dir_listing_enabled: bool, + /// Credential vendor created once during initialization. + /// Used to vend temporary credentials for table access. + credential_vendor: Option>, } impl std::fmt::Debug for DirectoryNamespace { @@ -496,6 +608,35 @@ impl DirectoryNamespace { .child(".lance-reserved") } + /// Get storage options for a table, using credential vending if configured. + /// + /// If credential vendor properties are configured and the table location matches + /// a supported cloud provider, this will create an appropriate vendor and vend + /// temporary credentials scoped to the table location. Otherwise, returns the + /// static storage options. + /// + /// The vendor type is auto-selected based on the table URI: + /// - `s3://` locations use AWS STS AssumeRole + /// - `gs://` locations use GCP OAuth2 tokens + /// - `az://` locations use Azure SAS tokens + /// + /// The permission level (Read, Write, Admin) is configured at namespace + /// initialization time via the `credential_vendor_permission` property. + /// + /// # Arguments + /// + /// * `table_uri` - The full URI of the table + async fn get_storage_options_for_table( + &self, + table_uri: &str, + ) -> Result>> { + if let Some(ref vendor) = self.credential_vendor { + let vended = vendor.vend_credentials(table_uri).await?; + return Ok(Some(vended.storage_options)); + } + Ok(self.storage_options.clone()) + } + /// Migrate directory-based tables to the manifest. /// /// This is a one-time migration operation that: @@ -776,6 +917,8 @@ impl LanceNamespace for DirectoryNamespace { let lance_schema = dataset.schema(); let arrow_schema: arrow_schema::Schema = lance_schema.into(); let json_schema = arrow_schema_to_json(&arrow_schema)?; + let storage_options = self.get_storage_options_for_table(&table_uri).await?; + Ok(DescribeTableResponse { table: Some(table_name), namespace: request.id.as_ref().map(|id| { @@ -789,7 +932,7 @@ impl LanceNamespace for DirectoryNamespace { location: Some(table_uri.clone()), table_uri: Some(table_uri), schema: Some(Box::new(json_schema)), - storage_options: self.storage_options.clone(), + storage_options, stats: None, }) } @@ -801,6 +944,7 @@ impl LanceNamespace for DirectoryNamespace { .await .unwrap_or(false) { + let storage_options = self.get_storage_options_for_table(&table_uri).await?; Ok(DescribeTableResponse { table: Some(table_name), namespace: request.id.as_ref().map(|id| { @@ -814,7 +958,7 @@ impl LanceNamespace for DirectoryNamespace { location: Some(table_uri.clone()), table_uri: Some(table_uri), schema: None, - storage_options: self.storage_options.clone(), + storage_options, stats: None, }) } else { diff --git a/rust/lance-namespace-impls/src/lib.rs b/rust/lance-namespace-impls/src/lib.rs index 634199ce98a..88248841bcb 100644 --- a/rust/lance-namespace-impls/src/lib.rs +++ b/rust/lance-namespace-impls/src/lib.rs @@ -10,12 +10,49 @@ //! - `rest`: REST API-based namespace implementation //! - `rest-adapter`: REST server adapter that exposes any namespace via HTTP //! - `dir-aws`, `dir-azure`, `dir-gcp`, `dir-oss`: Cloud storage backend support for directory namespace (via lance-io) +//! - `credential-vendor-aws`, `credential-vendor-gcp`, `credential-vendor-azure`: Credential vending for cloud storage //! //! ## Implementations //! //! - `DirectoryNamespace`: Directory-based implementation (always available) //! - `RestNamespace`: REST API-based implementation (requires `rest` feature) //! +//! ## Credential Vending +//! +//! The `credentials` module provides temporary credential vending for cloud storage: +//! - AWS: STS AssumeRole with scoped IAM policies (requires `credential-vendor-aws` feature) +//! - GCP: OAuth2 tokens with access boundaries (requires `credential-vendor-gcp` feature) +//! - Azure: SAS tokens with user delegation keys (requires `credential-vendor-azure` feature) +//! +//! The credential vendor is automatically selected based on the table location URI scheme: +//! - `s3://` for AWS +//! - `gs://` for GCP +//! - `az://` for Azure +//! +//! Configuration properties (prefixed with `credential_vendor.`, prefix is stripped): +//! +//! ```text +//! # Required to enable credential vending +//! credential_vendor.enabled = "true" +//! +//! # Common properties (apply to all providers) +//! credential_vendor.permission = "read" # read, write, or admin (default: read) +//! +//! # AWS-specific properties (for s3:// locations) +//! credential_vendor.aws_role_arn = "arn:aws:iam::123456789012:role/MyRole" # required for AWS +//! credential_vendor.aws_duration_millis = "3600000" # 1 hour (default, range: 15min-12hrs) +//! +//! # GCP-specific properties (for gs:// locations) +//! # Note: GCP uses ADC; set GOOGLE_APPLICATION_CREDENTIALS env var for service account key +//! # Note: GCP token duration cannot be configured; it's determined by the STS endpoint +//! credential_vendor.gcp_service_account = "my-sa@project.iam.gserviceaccount.com" +//! +//! # Azure-specific properties (for az:// locations) +//! credential_vendor.azure_account_name = "mystorageaccount" # required for Azure +//! credential_vendor.azure_tenant_id = "my-tenant-id" +//! credential_vendor.azure_duration_millis = "3600000" # 1 hour (default, up to 7 days) +//! ``` +//! //! ## Usage //! //! The recommended way to connect to a namespace is using [`ConnectBuilder`]: @@ -32,6 +69,7 @@ //! ``` pub mod connect; +pub mod credentials; pub mod dir; #[cfg(feature = "rest")] @@ -44,6 +82,27 @@ pub mod rest_adapter; pub use connect::ConnectBuilder; pub use dir::{manifest::ManifestNamespace, DirectoryNamespace, DirectoryNamespaceBuilder}; +// Re-export credential vending +pub use credentials::{ + create_credential_vendor_for_location, detect_provider_from_uri, has_credential_vendor_config, + redact_credential, CredentialVendor, VendedCredentials, DEFAULT_CREDENTIAL_DURATION_MILLIS, +}; + +#[cfg(feature = "credential-vendor-aws")] +pub use credentials::aws::{AwsCredentialVendor, AwsCredentialVendorConfig}; +#[cfg(feature = "credential-vendor-aws")] +pub use credentials::aws_props; + +#[cfg(feature = "credential-vendor-gcp")] +pub use credentials::gcp::{GcpCredentialVendor, GcpCredentialVendorConfig}; +#[cfg(feature = "credential-vendor-gcp")] +pub use credentials::gcp_props; + +#[cfg(feature = "credential-vendor-azure")] +pub use credentials::azure::{AzureCredentialVendor, AzureCredentialVendorConfig}; +#[cfg(feature = "credential-vendor-azure")] +pub use credentials::azure_props; + #[cfg(feature = "rest")] pub use rest::{RestNamespace, RestNamespaceBuilder};