From c72e402207f4049cb4c6c69541484e6ba93535d7 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Sun, 22 Feb 2026 08:42:14 -0600 Subject: [PATCH 1/6] added ability to pass custom headers to objectstore requests Signed-off-by: Daniel Rammer --- Cargo.lock | 1 + rust/lance-io/Cargo.toml | 1 + rust/lance-io/src/object_store.rs | 80 +++++++++++++++++++ .../src/object_store/providers/aws.rs | 3 +- .../src/object_store/providers/azure.rs | 3 +- .../src/object_store/providers/gcp.rs | 3 +- 6 files changed, 88 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fae7d804f97..7c0ad7d18df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5182,6 +5182,7 @@ dependencies = [ "criterion", "deepsize", "futures", + "http 1.4.0", "lance-arrow", "lance-core", "lance-namespace", diff --git a/rust/lance-io/Cargo.toml b/rust/lance-io/Cargo.toml index 71e9dab1e9f..20eaf6bac15 100644 --- a/rust/lance-io/Cargo.toml +++ b/rust/lance-io/Cargo.toml @@ -36,6 +36,7 @@ bytes.workspace = true chrono.workspace = true deepsize.workspace = true futures.workspace = true +http.workspace = true log.workspace = true pin-project.workspace = true prost.workspace = true diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index e908599c500..402cd02bd8a 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -21,6 +21,7 @@ use lance_core::utils::parse::str_is_truthy; use list_retry::ListRetryStream; #[cfg(feature = "aws")] use object_store::aws::AwsCredentialProvider; +use object_store::{ClientOptions, HeaderMap, HeaderValue}; use object_store::DynObjectStore; use object_store::Error as ObjectStoreError; use object_store::{path::Path, ObjectMeta, ObjectStore as OSObjectStore}; @@ -878,6 +879,31 @@ impl StorageOptions { self.0.get(key) } + /// Build [`ClientOptions`] with default headers extracted from `header.*` keys. + /// + /// Keys prefixed with `header.` are parsed into HTTP headers. For example, + /// `header.x-ms-version = 2023-11-03` results in a default header + /// `x-ms-version: 2023-11-03`. Invalid header names or values are silently + /// skipped. + pub fn client_options(&self) -> ClientOptions { + let mut headers = HeaderMap::new(); + for (key, value) in &self.0 { + if let Some(header_name) = key.strip_prefix("header.") { + if let (Ok(name), Ok(val)) = ( + header_name.parse::(), + HeaderValue::from_str(value), + ) { + headers.insert(name, val); + } + } + } + let mut client_options = ClientOptions::default(); + if !headers.is_empty() { + client_options = client_options.with_default_headers(headers); + } + client_options + } + /// Get the expiration time in milliseconds since epoch, if present pub fn expires_at_millis(&self) -> Option { self.0 @@ -1365,4 +1391,58 @@ mod tests { let copied_content = std::fs::read(&dest_file).unwrap(); assert_eq!(copied_content, b"test content"); } + + #[test] + fn test_client_options_extracts_headers() { + let opts = StorageOptions(HashMap::from([ + ("header.x-custom-foo".to_string(), "bar".to_string()), + ("header.x-ms-version".to_string(), "2023-11-03".to_string()), + ("region".to_string(), "us-west-2".to_string()), + ])); + // Should succeed without panic; the returned ClientOptions is opaque, + // but we can verify it round-trips through a builder. + let client_options = opts.client_options(); + + // Verify non-header keys are not consumed as headers by creating + // another StorageOptions with no header.* keys. + let opts_no_headers = StorageOptions(HashMap::from([( + "region".to_string(), + "us-west-2".to_string(), + )])); + let _ = opts_no_headers.client_options(); + + // Smoke test: the client_options with headers should be usable + // in a builder (we can't inspect the headers directly, but building + // should not fail). + #[cfg(feature = "gcp")] + { + use object_store::gcp::GoogleCloudStorageBuilder; + let _builder = GoogleCloudStorageBuilder::new() + .with_client_options(client_options) + .with_url("gs://test-bucket"); + } + } + + #[test] + fn test_client_options_skips_invalid_headers() { + let opts = StorageOptions(HashMap::from([ + // Invalid header name (spaces not allowed) + ("header.bad header".to_string(), "value".to_string()), + // Invalid header value (non-visible ASCII) + ("header.x-good-name".to_string(), "bad\x01value".to_string()), + // Valid header + ("header.x-valid".to_string(), "good".to_string()), + ])); + // Should not panic even with invalid entries + let _ = opts.client_options(); + } + + #[test] + fn test_client_options_empty_when_no_header_keys() { + let opts = StorageOptions(HashMap::from([ + ("region".to_string(), "us-east-1".to_string()), + ("access_key_id".to_string(), "AKID".to_string()), + ])); + let _ = opts.client_options(); + } } diff --git a/rust/lance-io/src/object_store/providers/aws.rs b/rust/lance-io/src/object_store/providers/aws.rs index 982470581f2..1bdddd03fe0 100644 --- a/rust/lance-io/src/object_store/providers/aws.rs +++ b/rust/lance-io/src/object_store/providers/aws.rs @@ -78,7 +78,8 @@ impl AwsStoreProvider { base_path.set_query(None); // we can't use parse_url_opts here because we need to manually set the credentials provider - let mut builder = AmazonS3Builder::new(); + let mut builder = AmazonS3Builder::new() + .with_client_options(storage_options.client_options()); for (key, value) in s3_storage_options { builder = builder.with_config(key, value); } diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index 7bf566c8972..bcd6c2fbfb9 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -81,7 +81,8 @@ impl AzureBlobStoreProvider { let mut builder = MicrosoftAzureBuilder::new() .with_url(base_path.as_ref()) - .with_retry(retry_config); + .with_retry(retry_config) + .with_client_options(storage_options.client_options()); for (key, value) in storage_options.as_azure_options() { builder = builder.with_config(key, value); } diff --git a/rust/lance-io/src/object_store/providers/gcp.rs b/rust/lance-io/src/object_store/providers/gcp.rs index dba5cd8dd40..85fdc6623cf 100644 --- a/rust/lance-io/src/object_store/providers/gcp.rs +++ b/rust/lance-io/src/object_store/providers/gcp.rs @@ -74,7 +74,8 @@ impl GcsStoreProvider { let mut builder = GoogleCloudStorageBuilder::new() .with_url(base_path.as_ref()) - .with_retry(retry_config); + .with_retry(retry_config) + .with_client_options(storage_options.client_options()); for (key, value) in storage_options.as_gcs_options() { builder = builder.with_config(key, value); } From 5d770cdf2a431d5ce3bfca2708cfde123c8206ec Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Mon, 23 Feb 2026 09:42:52 -0600 Subject: [PATCH 2/6] lint Signed-off-by: Daniel Rammer --- python/Cargo.lock | 1 + rust/lance-io/src/object_store.rs | 2 +- rust/lance-io/src/object_store/providers/aws.rs | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/python/Cargo.lock b/python/Cargo.lock index a7efcab31ec..f6807486674 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -4267,6 +4267,7 @@ dependencies = [ "chrono", "deepsize", "futures", + "http 1.4.0", "lance-arrow", "lance-core", "lance-namespace", diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index 402cd02bd8a..d2e79f574e7 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -21,10 +21,10 @@ use lance_core::utils::parse::str_is_truthy; use list_retry::ListRetryStream; #[cfg(feature = "aws")] use object_store::aws::AwsCredentialProvider; -use object_store::{ClientOptions, HeaderMap, HeaderValue}; use object_store::DynObjectStore; use object_store::Error as ObjectStoreError; use object_store::{path::Path, ObjectMeta, ObjectStore as OSObjectStore}; +use object_store::{ClientOptions, HeaderMap, HeaderValue}; use providers::local::FileStoreProvider; use providers::memory::MemoryStoreProvider; use shellexpand::tilde; diff --git a/rust/lance-io/src/object_store/providers/aws.rs b/rust/lance-io/src/object_store/providers/aws.rs index 1bdddd03fe0..7a2f062372d 100644 --- a/rust/lance-io/src/object_store/providers/aws.rs +++ b/rust/lance-io/src/object_store/providers/aws.rs @@ -78,8 +78,8 @@ impl AwsStoreProvider { base_path.set_query(None); // we can't use parse_url_opts here because we need to manually set the credentials provider - let mut builder = AmazonS3Builder::new() - .with_client_options(storage_options.client_options()); + let mut builder = + AmazonS3Builder::new().with_client_options(storage_options.client_options()); for (key, value) in s3_storage_options { builder = builder.with_config(key, value); } From b804cf9cb99fc25f6645567ffcf256ee7b0d732d Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Mon, 23 Feb 2026 09:54:10 -0600 Subject: [PATCH 3/6] returning error on misformatted header Signed-off-by: Daniel Rammer --- rust/lance-io/src/object_store.rs | 59 +++++++++++-------- .../src/object_store/providers/aws.rs | 2 +- .../src/object_store/providers/azure.rs | 2 +- .../src/object_store/providers/gcp.rs | 2 +- 4 files changed, 39 insertions(+), 26 deletions(-) diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index d2e79f574e7..e8a0a797b71 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -883,25 +883,35 @@ impl StorageOptions { /// /// Keys prefixed with `header.` are parsed into HTTP headers. For example, /// `header.x-ms-version = 2023-11-03` results in a default header - /// `x-ms-version: 2023-11-03`. Invalid header names or values are silently - /// skipped. - pub fn client_options(&self) -> ClientOptions { + /// `x-ms-version: 2023-11-03`. + /// + /// Returns an error if any `header.*` key has an invalid header name or value. + pub fn client_options(&self) -> Result { let mut headers = HeaderMap::new(); for (key, value) in &self.0 { if let Some(header_name) = key.strip_prefix("header.") { - if let (Ok(name), Ok(val)) = ( - header_name.parse::(), - HeaderValue::from_str(value), - ) { - headers.insert(name, val); - } + let name = header_name + .parse::() + .map_err(|e| { + Error::invalid_input( + format!("invalid header name '{header_name}': {e}"), + location!(), + ) + })?; + let val = HeaderValue::from_str(value).map_err(|e| { + Error::invalid_input( + format!("invalid header value for '{header_name}': {e}"), + location!(), + ) + })?; + headers.insert(name, val); } } let mut client_options = ClientOptions::default(); if !headers.is_empty() { client_options = client_options.with_default_headers(headers); } - client_options + Ok(client_options) } /// Get the expiration time in milliseconds since epoch, if present @@ -1399,9 +1409,7 @@ mod tests { ("header.x-ms-version".to_string(), "2023-11-03".to_string()), ("region".to_string(), "us-west-2".to_string()), ])); - // Should succeed without panic; the returned ClientOptions is opaque, - // but we can verify it round-trips through a builder. - let client_options = opts.client_options(); + let client_options = opts.client_options().unwrap(); // Verify non-header keys are not consumed as headers by creating // another StorageOptions with no header.* keys. @@ -1409,7 +1417,7 @@ mod tests { "region".to_string(), "us-west-2".to_string(), )])); - let _ = opts_no_headers.client_options(); + opts_no_headers.client_options().unwrap(); // Smoke test: the client_options with headers should be usable // in a builder (we can't inspect the headers directly, but building @@ -1424,17 +1432,22 @@ mod tests { } #[test] - fn test_client_options_skips_invalid_headers() { + fn test_client_options_rejects_invalid_header_name() { let opts = StorageOptions(HashMap::from([ - // Invalid header name (spaces not allowed) ("header.bad header".to_string(), "value".to_string()), - // Invalid header value (non-visible ASCII) - ("header.x-good-name".to_string(), "bad\x01value".to_string()), - // Valid header - ("header.x-valid".to_string(), "good".to_string()), ])); - // Should not panic even with invalid entries - let _ = opts.client_options(); + let err = opts.client_options().unwrap_err(); + assert!(err.to_string().contains("invalid header name")); + } + + #[test] + fn test_client_options_rejects_invalid_header_value() { + let opts = StorageOptions(HashMap::from([( + "header.x-good-name".to_string(), + "bad\x01value".to_string(), + )])); + let err = opts.client_options().unwrap_err(); + assert!(err.to_string().contains("invalid header value")); } #[test] @@ -1443,6 +1456,6 @@ mod tests { ("region".to_string(), "us-east-1".to_string()), ("access_key_id".to_string(), "AKID".to_string()), ])); - let _ = opts.client_options(); + opts.client_options().unwrap(); } } diff --git a/rust/lance-io/src/object_store/providers/aws.rs b/rust/lance-io/src/object_store/providers/aws.rs index 7a2f062372d..a69eb1c8ec3 100644 --- a/rust/lance-io/src/object_store/providers/aws.rs +++ b/rust/lance-io/src/object_store/providers/aws.rs @@ -79,7 +79,7 @@ impl AwsStoreProvider { // we can't use parse_url_opts here because we need to manually set the credentials provider let mut builder = - AmazonS3Builder::new().with_client_options(storage_options.client_options()); + AmazonS3Builder::new().with_client_options(storage_options.client_options()?); for (key, value) in s3_storage_options { builder = builder.with_config(key, value); } diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index bcd6c2fbfb9..6b5e70227ec 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -82,7 +82,7 @@ impl AzureBlobStoreProvider { let mut builder = MicrosoftAzureBuilder::new() .with_url(base_path.as_ref()) .with_retry(retry_config) - .with_client_options(storage_options.client_options()); + .with_client_options(storage_options.client_options()?); for (key, value) in storage_options.as_azure_options() { builder = builder.with_config(key, value); } diff --git a/rust/lance-io/src/object_store/providers/gcp.rs b/rust/lance-io/src/object_store/providers/gcp.rs index 85fdc6623cf..52c6cbbbdc9 100644 --- a/rust/lance-io/src/object_store/providers/gcp.rs +++ b/rust/lance-io/src/object_store/providers/gcp.rs @@ -75,7 +75,7 @@ impl GcsStoreProvider { let mut builder = GoogleCloudStorageBuilder::new() .with_url(base_path.as_ref()) .with_retry(retry_config) - .with_client_options(storage_options.client_options()); + .with_client_options(storage_options.client_options()?); for (key, value) in storage_options.as_gcs_options() { builder = builder.with_config(key, value); } From 6df210c4bb13ac247031bc14dd769e85691fab46 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Mon, 23 Feb 2026 10:02:51 -0600 Subject: [PATCH 4/6] cargo fmt Signed-off-by: Daniel Rammer --- rust/lance-io/src/object_store.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index e8a0a797b71..0fa52b731fb 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -1433,9 +1433,10 @@ mod tests { #[test] fn test_client_options_rejects_invalid_header_name() { - let opts = StorageOptions(HashMap::from([ - ("header.bad header".to_string(), "value".to_string()), - ])); + let opts = StorageOptions(HashMap::from([( + "header.bad header".to_string(), + "value".to_string(), + )])); let err = opts.client_options().unwrap_err(); assert!(err.to_string().contains("invalid header name")); } From 8e812d56d5da6026dbb6cb707ae9263c380314f2 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Mon, 23 Feb 2026 10:31:37 -0600 Subject: [PATCH 5/6] putting header parsing behind feature gate Signed-off-by: Daniel Rammer --- rust/lance-io/src/object_store.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index 0fa52b731fb..0f1f0ab91da 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -24,6 +24,7 @@ use object_store::aws::AwsCredentialProvider; use object_store::DynObjectStore; use object_store::Error as ObjectStoreError; use object_store::{path::Path, ObjectMeta, ObjectStore as OSObjectStore}; +#[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] use object_store::{ClientOptions, HeaderMap, HeaderValue}; use providers::local::FileStoreProvider; use providers::memory::MemoryStoreProvider; @@ -886,6 +887,7 @@ impl StorageOptions { /// `x-ms-version: 2023-11-03`. /// /// Returns an error if any `header.*` key has an invalid header name or value. + #[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] pub fn client_options(&self) -> Result { let mut headers = HeaderMap::new(); for (key, value) in &self.0 { @@ -1403,6 +1405,7 @@ mod tests { } #[test] + #[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] fn test_client_options_extracts_headers() { let opts = StorageOptions(HashMap::from([ ("header.x-custom-foo".to_string(), "bar".to_string()), @@ -1432,6 +1435,7 @@ mod tests { } #[test] + #[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] fn test_client_options_rejects_invalid_header_name() { let opts = StorageOptions(HashMap::from([( "header.bad header".to_string(), @@ -1442,6 +1446,7 @@ mod tests { } #[test] + #[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] fn test_client_options_rejects_invalid_header_value() { let opts = StorageOptions(HashMap::from([( "header.x-good-name".to_string(), @@ -1452,6 +1457,7 @@ mod tests { } #[test] + #[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] fn test_client_options_empty_when_no_header_keys() { let opts = StorageOptions(HashMap::from([ ("region".to_string(), "us-east-1".to_string()), From 8fe8d4e7e8ac6d21790b3bee899e9a7e5a3845c5 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 24 Feb 2026 15:26:28 -0600 Subject: [PATCH 6/6] updated naming from header to headers to adhere to REST namespace precedent Signed-off-by: Daniel Rammer --- rust/lance-io/src/object_store.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index fad27fe4acf..8644351cf97 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -896,18 +896,18 @@ impl StorageOptions { self.0.get(key) } - /// Build [`ClientOptions`] with default headers extracted from `header.*` keys. + /// Build [`ClientOptions`] with default headers extracted from `headers.*` keys. /// - /// Keys prefixed with `header.` are parsed into HTTP headers. For example, - /// `header.x-ms-version = 2023-11-03` results in a default header + /// Keys prefixed with `headers.` are parsed into HTTP headers. For example, + /// `headers.x-ms-version = 2023-11-03` results in a default header /// `x-ms-version: 2023-11-03`. /// - /// Returns an error if any `header.*` key has an invalid header name or value. + /// Returns an error if any `headers.*` key has an invalid header name or value. #[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] pub fn client_options(&self) -> Result { let mut headers = HeaderMap::new(); for (key, value) in &self.0 { - if let Some(header_name) = key.strip_prefix("header.") { + if let Some(header_name) = key.strip_prefix("headers.") { let name = header_name .parse::() .map_err(|e| { @@ -1423,14 +1423,14 @@ mod tests { #[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] fn test_client_options_extracts_headers() { let opts = StorageOptions(HashMap::from([ - ("header.x-custom-foo".to_string(), "bar".to_string()), - ("header.x-ms-version".to_string(), "2023-11-03".to_string()), + ("headers.x-custom-foo".to_string(), "bar".to_string()), + ("headers.x-ms-version".to_string(), "2023-11-03".to_string()), ("region".to_string(), "us-west-2".to_string()), ])); let client_options = opts.client_options().unwrap(); // Verify non-header keys are not consumed as headers by creating - // another StorageOptions with no header.* keys. + // another StorageOptions with no headers.* keys. let opts_no_headers = StorageOptions(HashMap::from([( "region".to_string(), "us-west-2".to_string(), @@ -1453,7 +1453,7 @@ mod tests { #[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] fn test_client_options_rejects_invalid_header_name() { let opts = StorageOptions(HashMap::from([( - "header.bad header".to_string(), + "headers.bad header".to_string(), "value".to_string(), )])); let err = opts.client_options().unwrap_err(); @@ -1464,7 +1464,7 @@ mod tests { #[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] fn test_client_options_rejects_invalid_header_value() { let opts = StorageOptions(HashMap::from([( - "header.x-good-name".to_string(), + "headers.x-good-name".to_string(), "bad\x01value".to_string(), )])); let err = opts.client_options().unwrap_err();