From 0e81cfc7d34b6a269a782ebf578059ba1dcc5dfe Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Wed, 5 Nov 2025 15:25:49 -0800 Subject: [PATCH 01/12] fix: prevent cache collisions in ObjectStoreRegistry ObjectStoreRegistry maintains a cache of ObjectStore objects. Previously, when caching an Azure blob store, the account name was not part of the cache key. This could result in collisions in the case where we tried to cache two Azure blob stores using the same container name, but different account names. Container names are not unique in Azure; only the combination of account name, container name is. The flow here is roughly that we determine what type of object store we're creating by looking at the first part of the URL (the scheme), and then use the associated object store Provider object to generate the appropriate store_prefix from the second part of the URL (the authority). We want to be able to do all this relatively quickly, without instantiating heavyweight objects, to support the cache key use-case. In the specific case of Azure, the account name may not even be in the URL at all. It can be passed as part of the storage options. Therefore, the function generating the object store prefix must also take the storage_options as a parameter. The final object store prefix we generate for Azure must include this account information, as well as the container information. This PR modifies WrappingObjectStore.wrap() to take the store prefix as an argument, rather than the storage options. In general, the storage options are highly specific to the type of the object store, which the wrapper generally doesn't know. It would not make sense to look for Azure-specific storage options when wrapping an s3 store, for example. --- rust/lance-io/src/object_store.rs | 34 +++--- rust/lance-io/src/object_store/providers.rs | 101 +++++++++++++----- .../src/object_store/providers/azure.rs | 39 +++++++ rust/lance-io/src/utils/tracking_store.rs | 6 +- 4 files changed, 135 insertions(+), 45 deletions(-) diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index d9fcbbc9290..be7dd494e7e 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -145,13 +145,9 @@ impl std::fmt::Display for ObjectStore { pub trait WrappingObjectStore: std::fmt::Debug + Send + Sync { /// Wrap an object store with additional functionality /// - /// The storage_options contain namespace information (e.g., azure_storage_account_name) - /// that wrappers may need for proper isolation - fn wrap( - &self, - original: Arc, - storage_options: Option<&HashMap>, - ) -> Arc; + /// The store_prefix is a string which uniquely identifies the object + /// store being wrapped. + fn wrap(&self, store_prefix: &str, original: Arc) -> Arc; } #[derive(Debug, Clone)] @@ -170,14 +166,10 @@ impl ChainedWrappingObjectStore { } impl WrappingObjectStore for ChainedWrappingObjectStore { - fn wrap( - &self, - original: Arc, - storage_options: Option<&HashMap>, - ) -> Arc { + fn wrap(&self, store_prefix: &str, original: Arc) -> Arc { self.wrappers .iter() - .fold(original, |acc, wrapper| wrapper.wrap(acc, storage_options)) + .fold(original, |acc, wrapper| wrapper.wrap(store_prefix, acc)) } } @@ -353,8 +345,10 @@ impl ObjectStore { #[allow(deprecated)] if let Some((store, path)) = params.object_store.as_ref() { let mut inner = store.clone(); + let store_prefix = + registry.calculate_object_store_prefix(uri, params.storage_options.as_ref())?; if let Some(wrapper) = params.object_store_wrapper.as_ref() { - inner = wrapper.wrap(inner, params.storage_options.as_ref()); + inner = wrapper.wrap(&store_prefix, inner); } let store = Self { inner, @@ -734,6 +728,9 @@ impl From> for StorageOptions { } } +static DEFAULT_OBJECT_STORE_REGISTRY: std::sync::LazyLock = + std::sync::LazyLock::new(ObjectStoreRegistry::default); + impl ObjectStore { #[allow(clippy::too_many_arguments)] pub fn new( @@ -751,7 +748,12 @@ impl ObjectStore { let block_size = block_size.unwrap_or_else(|| infer_block_size(scheme)); let store = match wrapper { - Some(wrapper) => wrapper.wrap(store, storage_options), + Some(wrapper) => { + let store_prefix = DEFAULT_OBJECT_STORE_REGISTRY + .calculate_object_store_prefix(&location.to_string(), storage_options) + .unwrap(); + wrapper.wrap(&store_prefix, store) + } None => store, }; @@ -984,8 +986,8 @@ mod tests { impl WrappingObjectStore for TestWrapper { fn wrap( &self, + _store_prefix: &str, _original: Arc, - _storage_options: Option<&HashMap>, ) -> Arc { self.called.store(true, Ordering::Relaxed); diff --git a/rust/lance-io/src/object_store/providers.rs b/rust/lance-io/src/object_store/providers.rs index 3ac6be93d6f..41d303bcc40 100644 --- a/rust/lance-io/src/object_store/providers.rs +++ b/rust/lance-io/src/object_store/providers.rs @@ -41,22 +41,29 @@ pub trait ObjectStoreProvider: std::fmt::Debug + Sync + Send { }) } - /// Generate a cache URL for this provider. + /// Calculcate the unique prefix that should be used for this object store. /// - /// Providers can override this to implement custom cache key generation - /// that takes into account provider-specific requirements like namespace - /// isolation. - fn cache_url(&self, url: &Url) -> String { - if ["file", "file-object-store", "memory"].contains(&url.scheme()) { - // For file URLs, cache the URL without the path. - // The path can be different for different object stores, - // but we want to cache the object store itself. - format!("{}://", url.scheme()) - } else { - // Bucket is parsed as domain, so drop the path. - let mut url = url.clone(); - url.set_path(""); - url.to_string() + /// For object stores that don't have the concept of buckets, this will just be something like + /// 'file' or 'memory'. + /// + /// In object stores where all bucket names are unique, like s3, this will be + /// simply 's3$my_bucket_name' or similar. + /// + /// In Azure, only the combination of (account name, container name) is unique, so + /// this will be something like 'az$account_name@container' + /// + /// Providers should override this if they have special requirements like Azure's. + fn calculcate_object_store_prefix( + &self, + scheme: &str, + authority: &str, + _storage_options: Option<&HashMap>, + ) -> Result { + match scheme { + "file" => Ok("file".to_string()), + "file-object-store" => Ok("file-object-store".to_string()), + "memory" => Ok("memory".to_string()), + _ => Ok(format!("{}${}", scheme, authority)), } } } @@ -141,6 +148,15 @@ impl ObjectStoreRegistry { output } + fn scheme_not_found_error(&self, scheme: &str) -> Error { + let mut message = format!("No object store provider found for scheme: '{}'", scheme); + if let Ok(providers) = self.providers.read() { + let valid_schemes = providers.keys().cloned().collect::>().join(", "); + message.push_str(&format!("\nValid schemes: {}", valid_schemes)); + } + return Error::invalid_input(message, location!()); + } + /// Get an object store for a given base path and parameters. /// /// If the object store is already in use, it will return a strong reference @@ -153,16 +169,15 @@ impl ObjectStoreRegistry { ) -> Result> { let scheme = base_path.scheme(); let Some(provider) = self.get_provider(scheme) else { - let mut message = format!("No object store provider found for scheme: '{}'", scheme); - if let Ok(providers) = self.providers.read() { - let valid_schemes = providers.keys().cloned().collect::>().join(", "); - message.push_str(&format!("\nValid schemes: {}", valid_schemes)); - } - return Err(Error::invalid_input(message, location!())); + return Err(self.scheme_not_found_error(scheme)); }; - let cache_path = provider.cache_url(&base_path); - let cache_key = (cache_path, params.clone()); + let cache_path = provider.calculcate_object_store_prefix( + base_path.scheme(), + base_path.authority(), + params.storage_options.as_ref(), + )?; + let cache_key = (cache_path.clone(), params.clone()); // Check if we have a cached store for this base path and params { @@ -197,7 +212,7 @@ impl ObjectStoreRegistry { store.inner = store.inner.traced(); if let Some(wrapper) = ¶ms.object_store_wrapper { - store.inner = wrapper.wrap(store.inner, params.storage_options.as_ref()); + store.inner = wrapper.wrap(&cache_path, store.inner); } let store = Arc::new(store); @@ -210,6 +225,44 @@ impl ObjectStoreRegistry { Ok(store) } + + /// Calculate the datastore prefix based on the URI and the storage options. + /// The data store prefix should uniquely identify the datastore. + pub fn calculate_object_store_prefix( + &self, + uri: &str, + storage_options: Option<&HashMap>, + ) -> Result { + let (scheme, authority) = match uri.find("://") { + None => { + // If there is no scheme, this is a file:// URI. + return Ok("file/".to_string()); + } + Some(index) => { + let scheme = &uri[..index]; + let authority = match uri[index + 1..].find("/") { + None => &uri[index + 1..], + Some(sindex) => &uri[index + 1..sindex], + }; + (scheme, authority) + } + }; + match self.get_provider(scheme) { + None => { + if scheme.len() == 1 { + // On Windows, drive letters such as C:/ can sometimes be confused for schemes. + // So if there is no known object store for this single-letter scheme, treat it + // as the local store. + return Ok("file/".to_string()); + } else { + return Err(self.scheme_not_found_error(scheme)); + } + } + Some(provider) => { + provider.calculcate_object_store_prefix(scheme, authority, storage_options) + } + } + } } impl Default for ObjectStoreRegistry { diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index b79ca8498d0..9fa15099e3e 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -119,6 +119,45 @@ impl ObjectStoreProvider for AzureBlobStoreProvider { download_retry_count, }) } + + //.parse_url("az://container@account.dfs.core.windows.net/path-part/file") + fn calculcate_object_store_prefix( + &self, + scheme: &str, + authority: &str, + storage_options: Option<&HashMap>, + ) -> Result { + let (container, account) = match authority.find("@") { + Some(at_index) => { + // The URI looks like 'az://container@account.dfs.core.windows.net/path-part/file', + // or possibly 'az://container@account/path-part/file'. + let container = &authority[..at_index]; + let account = &authority[at_index + 1..]; + ( + container, + account.split(".").next().unwrap_or_default().to_string(), + ) + } + None => { + // The URI looks like 'az://container/path-part/file'. + // We must look at the storage options to find the account. + let opts = storage_options.ok_or(Error::invalid_input( + "Unable to find object store prefix: no Azure " + + "account name in URI, and no storage options configured.", + location!(), + ))?; + let account = opts + .get("azure_storage_account_name") + .ok_or(Error::invalid_input( + "Unable to find object store prefix: no Azure " + + "account name in URI, and no azure_storage_account_name storage option.", + location!(), + ))?; + (authority, account.to_string()) + } + }; + Ok(format!("{}${}@{}", scheme, container, account)) + } } impl StorageOptions { diff --git a/rust/lance-io/src/utils/tracking_store.rs b/rust/lance-io/src/utils/tracking_store.rs index a1b4f3b0a77..eebce400dcc 100644 --- a/rust/lance-io/src/utils/tracking_store.rs +++ b/rust/lance-io/src/utils/tracking_store.rs @@ -32,11 +32,7 @@ impl IOTracker { } impl WrappingObjectStore for IOTracker { - fn wrap( - &self, - target: Arc, - _storage_options: Option<&std::collections::HashMap>, - ) -> Arc { + fn wrap(&self, _store_prefix: &str, target: Arc) -> Arc { Arc::new(IoTrackingStore::new(target, self.0.clone())) } } From 6bb00bb46f729cb539e31be3c5df2743c9065c61 Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Wed, 5 Nov 2025 21:54:48 -0800 Subject: [PATCH 02/12] fixes etc --- rust/lance-io/src/object_store/providers.rs | 28 ++------- .../src/object_store/providers/azure.rs | 60 ++++++++++++------- .../src/object_store/providers/local.rs | 23 ++++++- .../src/object_store/providers/memory.rs | 17 +++++- 4 files changed, 82 insertions(+), 46 deletions(-) diff --git a/rust/lance-io/src/object_store/providers.rs b/rust/lance-io/src/object_store/providers.rs index 41d303bcc40..5c799cc1003 100644 --- a/rust/lance-io/src/object_store/providers.rs +++ b/rust/lance-io/src/object_store/providers.rs @@ -59,12 +59,7 @@ pub trait ObjectStoreProvider: std::fmt::Debug + Sync + Send { authority: &str, _storage_options: Option<&HashMap>, ) -> Result { - match scheme { - "file" => Ok("file".to_string()), - "file-object-store" => Ok("file-object-store".to_string()), - "memory" => Ok("memory".to_string()), - _ => Ok(format!("{}${}", scheme, authority)), - } + Ok(format!("{}${}", scheme, authority)) } } @@ -316,8 +311,8 @@ mod tests { use super::*; #[test] - fn test_cache_url() { - // Test the default cache_url implementation using a dummy provider + fn test_calculcate_object_store_prefix() { + // Test the default calculcate_object_store_prefix implementation using a dummy provider #[derive(Debug)] struct DummyProvider; @@ -333,21 +328,6 @@ mod tests { } let provider = DummyProvider; - let cases = [ - ("s3://bucket/path?param=value", "s3://bucket?param=value"), - ("file:///path/to/file", "file://"), - ("file-object-store:///path/to/file", "file-object-store://"), - ("memory:///", "memory://"), - ( - "http://example.com/path?param=value", - "http://example.com/?param=value", - ), - ]; - - for (url, expected_cache_url) in cases { - let url = Url::parse(url).unwrap(); - let cache_url = provider.cache_url(&url); - assert_eq!(cache_url, expected_cache_url); - } + assert_eq!("dummy$blah", provider.calculcate_object_store_prefix("dummy", "blah", None).unwrap()); } } diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index 9fa15099e3e..3bb8aa9cfeb 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use std::{collections::HashMap, str::FromStr, sync::Arc, time::Duration}; +use std::{collections::HashMap, str::FromStr, sync::{Arc, LazyLock}, time::Duration}; use object_store::ObjectStore as OSObjectStore; use object_store_opendal::OpendalStore; @@ -15,8 +15,7 @@ use object_store::{ use url::Url; use crate::object_store::{ - ObjectStore, ObjectStoreParams, ObjectStoreProvider, StorageOptions, DEFAULT_CLOUD_BLOCK_SIZE, - DEFAULT_CLOUD_IO_PARALLELISM, DEFAULT_MAX_IOP_SIZE, + storage_options, ObjectStore, ObjectStoreParams, ObjectStoreProvider, StorageOptions, DEFAULT_CLOUD_BLOCK_SIZE, DEFAULT_CLOUD_IO_PARALLELISM, DEFAULT_MAX_IOP_SIZE }; use lance_core::error::{Error, Result}; @@ -141,38 +140,49 @@ impl ObjectStoreProvider for AzureBlobStoreProvider { None => { // The URI looks like 'az://container/path-part/file'. // We must look at the storage options to find the account. - let opts = storage_options.ok_or(Error::invalid_input( - "Unable to find object store prefix: no Azure " - + "account name in URI, and no storage options configured.", - location!(), - ))?; - let account = opts - .get("azure_storage_account_name") - .ok_or(Error::invalid_input( - "Unable to find object store prefix: no Azure " - + "account name in URI, and no azure_storage_account_name storage option.", + let mut account = match storage_options { + Some(opts) => StorageOptions::find_configured_storage_account(&opts), + None => None, + }; + if let None = account { + account = StorageOptions::find_configured_storage_account(&ENV_OPTIONS.0); + } + let account = account.ok_or(Error::invalid_input( + "Unable to find object store prefix: no Azure ".to_owned() + + "account name in URI, and no storage account configured.", location!(), ))?; - (authority, account.to_string()) + (authority, account) } }; Ok(format!("{}${}@{}", scheme, container, account)) } } +static ENV_OPTIONS: LazyLock = LazyLock::new(StorageOptions::from_env); + impl StorageOptions { - /// Add values from the environment to storage options - pub fn with_env_azure(&mut self) { + /// Iterate over all environment variables, looking for anything related to Azure. + fn from_env() -> Self { + let mut opts = HashMap::::new(); for (os_key, os_value) in std::env::vars_os() { if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) { if let Ok(config_key) = AzureConfigKey::from_str(&key.to_ascii_lowercase()) { - if !self.0.contains_key(config_key.as_ref()) { - self.0 - .insert(config_key.as_ref().to_string(), value.to_string()); - } + opts.insert(config_key.as_ref().to_string(), value.to_string()); } } } + StorageOptions(opts) + } + + /// Add values from the environment to storage options + pub fn with_env_azure(&mut self) { + for (os_key, os_value) in &ENV_OPTIONS.0 { + if !self.0.contains_key(os_key) { + self.0 + .insert(os_key.clone(), os_value.clone()); + } + } } /// Subset of options relevant for azure storage @@ -185,6 +195,16 @@ impl StorageOptions { }) .collect() } + + fn find_configured_storage_account(map: &HashMap) -> Option { + if let Some(account) = map.get("azure_storage_account_name") { + Some(account.to_string()) + } else if let Some(account) = map.get("account_name") { + Some(account.to_string()) + } else { + None + } + } } #[cfg(test)] diff --git a/rust/lance-io/src/object_store/providers/local.rs b/rust/lance-io/src/object_store/providers/local.rs index 9915ae45679..1b784075821 100644 --- a/rust/lance-io/src/object_store/providers/local.rs +++ b/rust/lance-io/src/object_store/providers/local.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use std::sync::Arc; +use std::{collections::HashMap, sync::Arc}; use crate::object_store::{ ObjectStore, ObjectStoreParams, ObjectStoreProvider, StorageOptions, DEFAULT_LOCAL_BLOCK_SIZE, @@ -48,6 +48,15 @@ impl ObjectStoreProvider for FileStoreProvider { ) }) } + + fn calculcate_object_store_prefix( + &self, + scheme: &str, + _authority: &str, + _storage_options: Option<&HashMap>, + ) -> Result { + Ok(scheme.to_string()) + } } #[cfg(test)] @@ -74,6 +83,18 @@ mod tests { } } + #[test] + fn test_calculcate_object_store_prefix() { + let provider = FileStoreProvider; + assert_eq!("file", provider.calculcate_object_store_prefix("file", "etc", None).unwrap()); + } + + #[test] + fn test_calculcate_object_store_prefix_for_file_object_store() { + let provider = FileStoreProvider; + assert_eq!("file-object-store", provider.calculcate_object_store_prefix("file-object-store", "etc", None).unwrap()); + } + #[test] #[cfg(windows)] fn test_file_store_path_windows() { diff --git a/rust/lance-io/src/object_store/providers/memory.rs b/rust/lance-io/src/object_store/providers/memory.rs index f80ce410a43..c9f05347d31 100644 --- a/rust/lance-io/src/object_store/providers/memory.rs +++ b/rust/lance-io/src/object_store/providers/memory.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use std::sync::Arc; +use std::{collections::HashMap, sync::Arc}; use crate::object_store::{ ObjectStore, ObjectStoreParams, ObjectStoreProvider, StorageOptions, @@ -41,6 +41,15 @@ impl ObjectStoreProvider for MemoryStoreProvider { output.push_str(url.path()); Ok(Path::from(output)) } + + fn calculcate_object_store_prefix( + &self, + _scheme: &str, + _authority: &str, + _storage_options: Option<&HashMap>, + ) -> Result { + Ok("memory".to_string()) + } } #[cfg(test)] @@ -56,4 +65,10 @@ mod tests { let expected_path = Path::from("path/to/file"); assert_eq!(path, expected_path); } + + #[test] + fn test_calculcate_object_store_prefix() { + let provider = MemoryStoreProvider; + assert_eq!("memory", provider.calculcate_object_store_prefix("memory", "etc", None).unwrap()); + } } From 09e142930404b2fabfbacd5e9652a12087ef519e Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Thu, 6 Nov 2025 10:06:26 -0800 Subject: [PATCH 03/12] more fixes --- rust/lance-io/src/object_store/providers/azure.rs | 2 +- rust/lance/src/dataset/cleanup.rs | 2 +- rust/lance/src/utils/test/throttle_store.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index 3bb8aa9cfeb..befb5618003 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -15,7 +15,7 @@ use object_store::{ use url::Url; use crate::object_store::{ - storage_options, ObjectStore, ObjectStoreParams, ObjectStoreProvider, StorageOptions, DEFAULT_CLOUD_BLOCK_SIZE, DEFAULT_CLOUD_IO_PARALLELISM, DEFAULT_MAX_IOP_SIZE + ObjectStore, ObjectStoreParams, ObjectStoreProvider, StorageOptions, DEFAULT_CLOUD_BLOCK_SIZE, DEFAULT_CLOUD_IO_PARALLELISM, DEFAULT_MAX_IOP_SIZE }; use lance_core::error::{Error, Result}; diff --git a/rust/lance/src/dataset/cleanup.rs b/rust/lance/src/dataset/cleanup.rs index c5ec80ffd69..75cf4a60996 100644 --- a/rust/lance/src/dataset/cleanup.rs +++ b/rust/lance/src/dataset/cleanup.rs @@ -667,8 +667,8 @@ mod tests { impl WrappingObjectStore for MockObjectStore { fn wrap( &self, + _storage_prefix: &str, original: Arc, - _storage_options: Option<&std::collections::HashMap>, ) -> Arc { Arc::new(ProxyObjectStore::new(original, self.policy.clone())) } diff --git a/rust/lance/src/utils/test/throttle_store.rs b/rust/lance/src/utils/test/throttle_store.rs index c78cd66583c..7fd42ea3c8a 100644 --- a/rust/lance/src/utils/test/throttle_store.rs +++ b/rust/lance/src/utils/test/throttle_store.rs @@ -17,8 +17,8 @@ pub struct ThrottledStoreWrapper { impl WrappingObjectStore for ThrottledStoreWrapper { fn wrap( &self, + _prefix: &str, original: Arc, - _storage_options: Option<&std::collections::HashMap>, ) -> Arc { let throttle_store = ThrottledStore::new(original, self.config); Arc::new(throttle_store) From eea1c17ca6f86a4559b839388ac263e3169def35 Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Thu, 6 Nov 2025 10:31:18 -0800 Subject: [PATCH 04/12] providers.rs: some tests and fixes --- rust/lance-io/src/object_store/providers.rs | 71 +++++++++++++++------ 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/rust/lance-io/src/object_store/providers.rs b/rust/lance-io/src/object_store/providers.rs index 5c799cc1003..6adac549df3 100644 --- a/rust/lance-io/src/object_store/providers.rs +++ b/rust/lance-io/src/object_store/providers.rs @@ -231,13 +231,14 @@ impl ObjectStoreRegistry { let (scheme, authority) = match uri.find("://") { None => { // If there is no scheme, this is a file:// URI. - return Ok("file/".to_string()); + return Ok("file".to_string()); } Some(index) => { let scheme = &uri[..index]; - let authority = match uri[index + 1..].find("/") { - None => &uri[index + 1..], - Some(sindex) => &uri[index + 1..sindex], + let remainder = &uri[index + 3..]; + let authority = match remainder.find("/") { + None => remainder, + Some(sindex) => &remainder[..sindex], }; (scheme, authority) } @@ -248,7 +249,7 @@ impl ObjectStoreRegistry { // On Windows, drive letters such as C:/ can sometimes be confused for schemes. // So if there is no known object store for this single-letter scheme, treat it // as the local store. - return Ok("file/".to_string()); + return Ok("file".to_string()); } else { return Err(self.scheme_not_found_error(scheme)); } @@ -310,24 +311,54 @@ impl ObjectStoreRegistry { mod tests { use super::*; - #[test] - fn test_calculcate_object_store_prefix() { - // Test the default calculcate_object_store_prefix implementation using a dummy provider - #[derive(Debug)] - struct DummyProvider; - - #[async_trait::async_trait] - impl ObjectStoreProvider for DummyProvider { - async fn new_store( - &self, - _base_path: Url, - _params: &ObjectStoreParams, - ) -> Result { - unreachable!("This test doesn't create stores") - } + #[derive(Debug)] + struct DummyProvider; + + #[async_trait::async_trait] + impl ObjectStoreProvider for DummyProvider { + async fn new_store( + &self, + _base_path: Url, + _params: &ObjectStoreParams, + ) -> Result { + unreachable!("This test doesn't create stores") } + } + #[test] + fn test_calculcate_object_store_prefix() { let provider = DummyProvider; assert_eq!("dummy$blah", provider.calculcate_object_store_prefix("dummy", "blah", None).unwrap()); } + + #[test] + fn test_calculcate_object_store_scheme_not_found() { + let registry = ObjectStoreRegistry::empty(); + registry.insert("dummy", Arc::new(DummyProvider)); + let s = "Invalid user input: No object store provider found for scheme: 'dummy2'\nValid schemes: dummy"; + let result = registry.calculate_object_store_prefix("dummy2://mybucket/my/long/path", None).expect_err("expected error").to_string(); + assert_eq!(s, &result[..s.len()]); + } + + // Test that paths without a scheme get treated as local paths. + #[test] + fn test_calculcate_object_store_prefix_for_local() { + let registry = ObjectStoreRegistry::empty(); + assert_eq!("file", registry.calculate_object_store_prefix("/tmp/foobar", None).unwrap()); + } + + // Test that paths with a single-letter scheme that is not registered for anything get treated as local paths. + #[test] + fn test_calculcate_object_store_prefix_for_local_windows_path() { + let registry = ObjectStoreRegistry::empty(); + assert_eq!("file", registry.calculate_object_store_prefix("c://dos/path", None).unwrap()); + } + + // Test that paths with a given scheme get mapped to that storage provider. + #[test] + fn test_calculcate_object_store_prefix_for_dummy_path() { + let registry = ObjectStoreRegistry::empty(); + registry.insert("dummy", Arc::new(DummyProvider)); + assert_eq!("dummy$mybucket", registry.calculate_object_store_prefix("dummy://mybucket/my/long/path", None).unwrap()); + } } From 2572bf0be974302cf2f4f271cdaa14d876ec5df5 Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Thu, 6 Nov 2025 11:16:02 -0800 Subject: [PATCH 05/12] fmt etc --- rust/lance-io/src/object_store/providers.rs | 33 ++++++++++++++++--- .../src/object_store/providers/azure.rs | 30 ++++++++++++++--- .../src/object_store/providers/local.rs | 14 ++++++-- .../src/object_store/providers/memory.rs | 7 +++- 4 files changed, 72 insertions(+), 12 deletions(-) diff --git a/rust/lance-io/src/object_store/providers.rs b/rust/lance-io/src/object_store/providers.rs index 6adac549df3..313b0a557e5 100644 --- a/rust/lance-io/src/object_store/providers.rs +++ b/rust/lance-io/src/object_store/providers.rs @@ -328,7 +328,12 @@ mod tests { #[test] fn test_calculcate_object_store_prefix() { let provider = DummyProvider; - assert_eq!("dummy$blah", provider.calculcate_object_store_prefix("dummy", "blah", None).unwrap()); + assert_eq!( + "dummy$blah", + provider + .calculcate_object_store_prefix("dummy", "blah", None) + .unwrap() + ); } #[test] @@ -336,7 +341,10 @@ mod tests { let registry = ObjectStoreRegistry::empty(); registry.insert("dummy", Arc::new(DummyProvider)); let s = "Invalid user input: No object store provider found for scheme: 'dummy2'\nValid schemes: dummy"; - let result = registry.calculate_object_store_prefix("dummy2://mybucket/my/long/path", None).expect_err("expected error").to_string(); + let result = registry + .calculate_object_store_prefix("dummy2://mybucket/my/long/path", None) + .expect_err("expected error") + .to_string(); assert_eq!(s, &result[..s.len()]); } @@ -344,14 +352,24 @@ mod tests { #[test] fn test_calculcate_object_store_prefix_for_local() { let registry = ObjectStoreRegistry::empty(); - assert_eq!("file", registry.calculate_object_store_prefix("/tmp/foobar", None).unwrap()); + assert_eq!( + "file", + registry + .calculate_object_store_prefix("/tmp/foobar", None) + .unwrap() + ); } // Test that paths with a single-letter scheme that is not registered for anything get treated as local paths. #[test] fn test_calculcate_object_store_prefix_for_local_windows_path() { let registry = ObjectStoreRegistry::empty(); - assert_eq!("file", registry.calculate_object_store_prefix("c://dos/path", None).unwrap()); + assert_eq!( + "file", + registry + .calculate_object_store_prefix("c://dos/path", None) + .unwrap() + ); } // Test that paths with a given scheme get mapped to that storage provider. @@ -359,6 +377,11 @@ mod tests { fn test_calculcate_object_store_prefix_for_dummy_path() { let registry = ObjectStoreRegistry::empty(); registry.insert("dummy", Arc::new(DummyProvider)); - assert_eq!("dummy$mybucket", registry.calculate_object_store_prefix("dummy://mybucket/my/long/path", None).unwrap()); + assert_eq!( + "dummy$mybucket", + registry + .calculate_object_store_prefix("dummy://mybucket/my/long/path", None) + .unwrap() + ); } } diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index befb5618003..d59e02bba23 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -1,7 +1,12 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use std::{collections::HashMap, str::FromStr, sync::{Arc, LazyLock}, time::Duration}; +use std::{ + collections::HashMap, + str::FromStr, + sync::{Arc, LazyLock}, + time::Duration, +}; use object_store::ObjectStore as OSObjectStore; use object_store_opendal::OpendalStore; @@ -15,7 +20,8 @@ use object_store::{ use url::Url; use crate::object_store::{ - ObjectStore, ObjectStoreParams, ObjectStoreProvider, StorageOptions, DEFAULT_CLOUD_BLOCK_SIZE, DEFAULT_CLOUD_IO_PARALLELISM, DEFAULT_MAX_IOP_SIZE + ObjectStore, ObjectStoreParams, ObjectStoreProvider, StorageOptions, DEFAULT_CLOUD_BLOCK_SIZE, + DEFAULT_CLOUD_IO_PARALLELISM, DEFAULT_MAX_IOP_SIZE, }; use lance_core::error::{Error, Result}; @@ -179,8 +185,7 @@ impl StorageOptions { pub fn with_env_azure(&mut self) { for (os_key, os_value) in &ENV_OPTIONS.0 { if !self.0.contains_key(os_key) { - self.0 - .insert(os_key.clone(), os_value.clone()); + self.0.insert(os_key.clone(), os_value.clone()); } } } @@ -246,4 +251,21 @@ mod tests { .unwrap(); assert_eq!(store.scheme, "az"); } + + #[test] + fn test_find_configured_storage_account() { + assert_eq!( + Some("myaccount".to_string()), + StorageOptions::find_configured_storage_account(&HashMap::from_iter( + [ + ("access_key".to_string(), "myaccesskey".to_string()), + ( + "azure_storage_account_name".to_string(), + "myaccount".to_string() + ) + ] + .into_iter() + )) + ); + } } diff --git a/rust/lance-io/src/object_store/providers/local.rs b/rust/lance-io/src/object_store/providers/local.rs index 1b784075821..49a08f21565 100644 --- a/rust/lance-io/src/object_store/providers/local.rs +++ b/rust/lance-io/src/object_store/providers/local.rs @@ -86,13 +86,23 @@ mod tests { #[test] fn test_calculcate_object_store_prefix() { let provider = FileStoreProvider; - assert_eq!("file", provider.calculcate_object_store_prefix("file", "etc", None).unwrap()); + assert_eq!( + "file", + provider + .calculcate_object_store_prefix("file", "etc", None) + .unwrap() + ); } #[test] fn test_calculcate_object_store_prefix_for_file_object_store() { let provider = FileStoreProvider; - assert_eq!("file-object-store", provider.calculcate_object_store_prefix("file-object-store", "etc", None).unwrap()); + assert_eq!( + "file-object-store", + provider + .calculcate_object_store_prefix("file-object-store", "etc", None) + .unwrap() + ); } #[test] diff --git a/rust/lance-io/src/object_store/providers/memory.rs b/rust/lance-io/src/object_store/providers/memory.rs index c9f05347d31..8b5804f255d 100644 --- a/rust/lance-io/src/object_store/providers/memory.rs +++ b/rust/lance-io/src/object_store/providers/memory.rs @@ -69,6 +69,11 @@ mod tests { #[test] fn test_calculcate_object_store_prefix() { let provider = MemoryStoreProvider; - assert_eq!("memory", provider.calculcate_object_store_prefix("memory", "etc", None).unwrap()); + assert_eq!( + "memory", + provider + .calculcate_object_store_prefix("memory", "etc", None) + .unwrap() + ); } } From 27ddccbad47f89304802c0093e7bac8285f48999 Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Thu, 6 Nov 2025 11:25:08 -0800 Subject: [PATCH 06/12] azure.rs: add some tests --- .../src/object_store/providers/azure.rs | 44 ++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index d59e02bba23..7e0625e8718 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -125,7 +125,6 @@ impl ObjectStoreProvider for AzureBlobStoreProvider { }) } - //.parse_url("az://container@account.dfs.core.windows.net/path-part/file") fn calculcate_object_store_prefix( &self, scheme: &str, @@ -268,4 +267,47 @@ mod tests { )) ); } + + #[test] + fn test_calculcate_object_store_prefix_from_url_and_options() { + let provider = AzureBlobStoreProvider; + let options = + HashMap::from_iter([("account_name".to_string(), "bob".to_string())].into_iter()); + assert_eq!( + "az$container@bob", + provider + .calculcate_object_store_prefix("az", "container", Some(&options)) + .unwrap() + ); + } + + #[test] + fn test_calculcate_object_store_prefix_from_url_and_ignored_options() { + let provider = AzureBlobStoreProvider; + let options = + HashMap::from_iter([("account_name".to_string(), "bob".to_string())].into_iter()); + assert_eq!( + "az$container@account", + provider + .calculcate_object_store_prefix( + "az", + "container@account.dfs.core.windows.net", + Some(&options) + ) + .unwrap() + ); + } + + #[test] + fn test_fail_to_calculcate_object_store_prefix_from_url() { + let provider = AzureBlobStoreProvider; + let options = + HashMap::from_iter([("access_key".to_string(), "myaccesskey".to_string())].into_iter()); + let expected = "Invalid user input: Unable to find object store prefix: no Azure account name in URI, and no storage account configured."; + let result = provider + .calculcate_object_store_prefix("az", "container", Some(&options)) + .expect_err("expected error") + .to_string(); + assert_eq!(expected, &result[..expected.len()]); + } } From 51bcefbfd410ab1522c46e1d32d895638b6e9ef8 Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Thu, 6 Nov 2025 12:39:14 -0800 Subject: [PATCH 07/12] format --- rust/lance/src/utils/test/throttle_store.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/rust/lance/src/utils/test/throttle_store.rs b/rust/lance/src/utils/test/throttle_store.rs index 7fd42ea3c8a..c146841e365 100644 --- a/rust/lance/src/utils/test/throttle_store.rs +++ b/rust/lance/src/utils/test/throttle_store.rs @@ -15,11 +15,7 @@ pub struct ThrottledStoreWrapper { } impl WrappingObjectStore for ThrottledStoreWrapper { - fn wrap( - &self, - _prefix: &str, - original: Arc, - ) -> Arc { + fn wrap(&self, _prefix: &str, original: Arc) -> Arc { let throttle_store = ThrottledStore::new(original, self.config); Arc::new(throttle_store) } From fadea9f1926a3105cdce472094eb3ead7132b13c Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Thu, 6 Nov 2025 12:41:17 -0800 Subject: [PATCH 08/12] fix typo --- rust/lance-io/src/object_store/providers.rs | 20 +++++++++---------- .../src/object_store/providers/azure.rs | 14 ++++++------- .../src/object_store/providers/local.rs | 10 +++++----- .../src/object_store/providers/memory.rs | 6 +++--- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/rust/lance-io/src/object_store/providers.rs b/rust/lance-io/src/object_store/providers.rs index 313b0a557e5..2c8117c7d0e 100644 --- a/rust/lance-io/src/object_store/providers.rs +++ b/rust/lance-io/src/object_store/providers.rs @@ -41,7 +41,7 @@ pub trait ObjectStoreProvider: std::fmt::Debug + Sync + Send { }) } - /// Calculcate the unique prefix that should be used for this object store. + /// Calculate the unique prefix that should be used for this object store. /// /// For object stores that don't have the concept of buckets, this will just be something like /// 'file' or 'memory'. @@ -53,7 +53,7 @@ pub trait ObjectStoreProvider: std::fmt::Debug + Sync + Send { /// this will be something like 'az$account_name@container' /// /// Providers should override this if they have special requirements like Azure's. - fn calculcate_object_store_prefix( + fn calculate_object_store_prefix( &self, scheme: &str, authority: &str, @@ -167,7 +167,7 @@ impl ObjectStoreRegistry { return Err(self.scheme_not_found_error(scheme)); }; - let cache_path = provider.calculcate_object_store_prefix( + let cache_path = provider.calculate_object_store_prefix( base_path.scheme(), base_path.authority(), params.storage_options.as_ref(), @@ -255,7 +255,7 @@ impl ObjectStoreRegistry { } } Some(provider) => { - provider.calculcate_object_store_prefix(scheme, authority, storage_options) + provider.calculate_object_store_prefix(scheme, authority, storage_options) } } } @@ -326,18 +326,18 @@ mod tests { } #[test] - fn test_calculcate_object_store_prefix() { + fn test_calculate_object_store_prefix() { let provider = DummyProvider; assert_eq!( "dummy$blah", provider - .calculcate_object_store_prefix("dummy", "blah", None) + .calculate_object_store_prefix("dummy", "blah", None) .unwrap() ); } #[test] - fn test_calculcate_object_store_scheme_not_found() { + fn test_calculate_object_store_scheme_not_found() { let registry = ObjectStoreRegistry::empty(); registry.insert("dummy", Arc::new(DummyProvider)); let s = "Invalid user input: No object store provider found for scheme: 'dummy2'\nValid schemes: dummy"; @@ -350,7 +350,7 @@ mod tests { // Test that paths without a scheme get treated as local paths. #[test] - fn test_calculcate_object_store_prefix_for_local() { + fn test_calculate_object_store_prefix_for_local() { let registry = ObjectStoreRegistry::empty(); assert_eq!( "file", @@ -362,7 +362,7 @@ mod tests { // Test that paths with a single-letter scheme that is not registered for anything get treated as local paths. #[test] - fn test_calculcate_object_store_prefix_for_local_windows_path() { + fn test_calculate_object_store_prefix_for_local_windows_path() { let registry = ObjectStoreRegistry::empty(); assert_eq!( "file", @@ -374,7 +374,7 @@ mod tests { // Test that paths with a given scheme get mapped to that storage provider. #[test] - fn test_calculcate_object_store_prefix_for_dummy_path() { + fn test_calculate_object_store_prefix_for_dummy_path() { let registry = ObjectStoreRegistry::empty(); registry.insert("dummy", Arc::new(DummyProvider)); assert_eq!( diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index 7e0625e8718..d63f277e7eb 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -125,7 +125,7 @@ impl ObjectStoreProvider for AzureBlobStoreProvider { }) } - fn calculcate_object_store_prefix( + fn calculate_object_store_prefix( &self, scheme: &str, authority: &str, @@ -269,27 +269,27 @@ mod tests { } #[test] - fn test_calculcate_object_store_prefix_from_url_and_options() { + fn test_calculate_object_store_prefix_from_url_and_options() { let provider = AzureBlobStoreProvider; let options = HashMap::from_iter([("account_name".to_string(), "bob".to_string())].into_iter()); assert_eq!( "az$container@bob", provider - .calculcate_object_store_prefix("az", "container", Some(&options)) + .calculate_object_store_prefix("az", "container", Some(&options)) .unwrap() ); } #[test] - fn test_calculcate_object_store_prefix_from_url_and_ignored_options() { + fn test_calculate_object_store_prefix_from_url_and_ignored_options() { let provider = AzureBlobStoreProvider; let options = HashMap::from_iter([("account_name".to_string(), "bob".to_string())].into_iter()); assert_eq!( "az$container@account", provider - .calculcate_object_store_prefix( + .calculate_object_store_prefix( "az", "container@account.dfs.core.windows.net", Some(&options) @@ -299,13 +299,13 @@ mod tests { } #[test] - fn test_fail_to_calculcate_object_store_prefix_from_url() { + fn test_fail_to_calculate_object_store_prefix_from_url() { let provider = AzureBlobStoreProvider; let options = HashMap::from_iter([("access_key".to_string(), "myaccesskey".to_string())].into_iter()); let expected = "Invalid user input: Unable to find object store prefix: no Azure account name in URI, and no storage account configured."; let result = provider - .calculcate_object_store_prefix("az", "container", Some(&options)) + .calculate_object_store_prefix("az", "container", Some(&options)) .expect_err("expected error") .to_string(); assert_eq!(expected, &result[..expected.len()]); diff --git a/rust/lance-io/src/object_store/providers/local.rs b/rust/lance-io/src/object_store/providers/local.rs index 49a08f21565..89bdfa5ab62 100644 --- a/rust/lance-io/src/object_store/providers/local.rs +++ b/rust/lance-io/src/object_store/providers/local.rs @@ -49,7 +49,7 @@ impl ObjectStoreProvider for FileStoreProvider { }) } - fn calculcate_object_store_prefix( + fn calculate_object_store_prefix( &self, scheme: &str, _authority: &str, @@ -84,23 +84,23 @@ mod tests { } #[test] - fn test_calculcate_object_store_prefix() { + fn test_calculate_object_store_prefix() { let provider = FileStoreProvider; assert_eq!( "file", provider - .calculcate_object_store_prefix("file", "etc", None) + .calculate_object_store_prefix("file", "etc", None) .unwrap() ); } #[test] - fn test_calculcate_object_store_prefix_for_file_object_store() { + fn test_calculate_object_store_prefix_for_file_object_store() { let provider = FileStoreProvider; assert_eq!( "file-object-store", provider - .calculcate_object_store_prefix("file-object-store", "etc", None) + .calculate_object_store_prefix("file-object-store", "etc", None) .unwrap() ); } diff --git a/rust/lance-io/src/object_store/providers/memory.rs b/rust/lance-io/src/object_store/providers/memory.rs index 8b5804f255d..b03baea104f 100644 --- a/rust/lance-io/src/object_store/providers/memory.rs +++ b/rust/lance-io/src/object_store/providers/memory.rs @@ -42,7 +42,7 @@ impl ObjectStoreProvider for MemoryStoreProvider { Ok(Path::from(output)) } - fn calculcate_object_store_prefix( + fn calculate_object_store_prefix( &self, _scheme: &str, _authority: &str, @@ -67,12 +67,12 @@ mod tests { } #[test] - fn test_calculcate_object_store_prefix() { + fn test_calculate_object_store_prefix() { let provider = MemoryStoreProvider; assert_eq!( "memory", provider - .calculcate_object_store_prefix("memory", "etc", None) + .calculate_object_store_prefix("memory", "etc", None) .unwrap() ); } From ba5bc87d3f3b726bae4cbe44c288f7fc6084cd84 Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Thu, 6 Nov 2025 14:49:08 -0800 Subject: [PATCH 09/12] fix clippy --- rust/lance-io/src/object_store.rs | 2 +- rust/lance-io/src/object_store/providers.rs | 10 +++++----- .../src/object_store/providers/azure.rs | 20 +++++++++---------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index be7dd494e7e..94967eb93b6 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -750,7 +750,7 @@ impl ObjectStore { let store = match wrapper { Some(wrapper) => { let store_prefix = DEFAULT_OBJECT_STORE_REGISTRY - .calculate_object_store_prefix(&location.to_string(), storage_options) + .calculate_object_store_prefix(location.as_ref(), storage_options) .unwrap(); wrapper.wrap(&store_prefix, store) } diff --git a/rust/lance-io/src/object_store/providers.rs b/rust/lance-io/src/object_store/providers.rs index 2c8117c7d0e..b752b42b48d 100644 --- a/rust/lance-io/src/object_store/providers.rs +++ b/rust/lance-io/src/object_store/providers.rs @@ -149,7 +149,7 @@ impl ObjectStoreRegistry { let valid_schemes = providers.keys().cloned().collect::>().join(", "); message.push_str(&format!("\nValid schemes: {}", valid_schemes)); } - return Error::invalid_input(message, location!()); + Error::invalid_input(message, location!()) } /// Get an object store for a given base path and parameters. @@ -249,14 +249,14 @@ impl ObjectStoreRegistry { // On Windows, drive letters such as C:/ can sometimes be confused for schemes. // So if there is no known object store for this single-letter scheme, treat it // as the local store. - return Ok("file".to_string()); + Ok("file".to_string()) } else { - return Err(self.scheme_not_found_error(scheme)); + Err(self.scheme_not_found_error(scheme)) } - } + }, Some(provider) => { provider.calculate_object_store_prefix(scheme, authority, storage_options) - } + }, } } } diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index d63f277e7eb..235b805b564 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -146,15 +146,14 @@ impl ObjectStoreProvider for AzureBlobStoreProvider { // The URI looks like 'az://container/path-part/file'. // We must look at the storage options to find the account. let mut account = match storage_options { - Some(opts) => StorageOptions::find_configured_storage_account(&opts), + Some(opts) => StorageOptions::find_configured_storage_account(opts), None => None, }; - if let None = account { + if account.is_none() { account = StorageOptions::find_configured_storage_account(&ENV_OPTIONS.0); } let account = account.ok_or(Error::invalid_input( - "Unable to find object store prefix: no Azure ".to_owned() - + "account name in URI, and no storage account configured.", + "Unable to find object store prefix: no Azure account name in URI, and no storage account configured.", location!(), ))?; (authority, account) @@ -177,7 +176,7 @@ impl StorageOptions { } } } - StorageOptions(opts) + Self(opts) } /// Add values from the environment to storage options @@ -200,11 +199,12 @@ impl StorageOptions { .collect() } + #[allow(clippy::manual_map)] fn find_configured_storage_account(map: &HashMap) -> Option { if let Some(account) = map.get("azure_storage_account_name") { - Some(account.to_string()) + Some(account.clone()) } else if let Some(account) = map.get("account_name") { - Some(account.to_string()) + Some(account.clone()) } else { None } @@ -272,7 +272,7 @@ mod tests { fn test_calculate_object_store_prefix_from_url_and_options() { let provider = AzureBlobStoreProvider; let options = - HashMap::from_iter([("account_name".to_string(), "bob".to_string())].into_iter()); + HashMap::from_iter([("account_name".to_string(), "bob".to_string())]); assert_eq!( "az$container@bob", provider @@ -285,7 +285,7 @@ mod tests { fn test_calculate_object_store_prefix_from_url_and_ignored_options() { let provider = AzureBlobStoreProvider; let options = - HashMap::from_iter([("account_name".to_string(), "bob".to_string())].into_iter()); + HashMap::from_iter([("account_name".to_string(), "bob".to_string())]); assert_eq!( "az$container@account", provider @@ -302,7 +302,7 @@ mod tests { fn test_fail_to_calculate_object_store_prefix_from_url() { let provider = AzureBlobStoreProvider; let options = - HashMap::from_iter([("access_key".to_string(), "myaccesskey".to_string())].into_iter()); + HashMap::from_iter([("access_key".to_string(), "myaccesskey".to_string())]); let expected = "Invalid user input: Unable to find object store prefix: no Azure account name in URI, and no storage account configured."; let result = provider .calculate_object_store_prefix("az", "container", Some(&options)) From 1de9fc9f8a3bd3279a0690e0189c71c3958b1a5b Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Thu, 6 Nov 2025 14:49:17 -0800 Subject: [PATCH 10/12] fix fmt --- rust/lance-io/src/object_store/providers.rs | 4 ++-- rust/lance-io/src/object_store/providers/azure.rs | 9 +++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/rust/lance-io/src/object_store/providers.rs b/rust/lance-io/src/object_store/providers.rs index b752b42b48d..633017d4aeb 100644 --- a/rust/lance-io/src/object_store/providers.rs +++ b/rust/lance-io/src/object_store/providers.rs @@ -253,10 +253,10 @@ impl ObjectStoreRegistry { } else { Err(self.scheme_not_found_error(scheme)) } - }, + } Some(provider) => { provider.calculate_object_store_prefix(scheme, authority, storage_options) - }, + } } } } diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index 235b805b564..b3461591e07 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -271,8 +271,7 @@ mod tests { #[test] fn test_calculate_object_store_prefix_from_url_and_options() { let provider = AzureBlobStoreProvider; - let options = - HashMap::from_iter([("account_name".to_string(), "bob".to_string())]); + let options = HashMap::from_iter([("account_name".to_string(), "bob".to_string())]); assert_eq!( "az$container@bob", provider @@ -284,8 +283,7 @@ mod tests { #[test] fn test_calculate_object_store_prefix_from_url_and_ignored_options() { let provider = AzureBlobStoreProvider; - let options = - HashMap::from_iter([("account_name".to_string(), "bob".to_string())]); + let options = HashMap::from_iter([("account_name".to_string(), "bob".to_string())]); assert_eq!( "az$container@account", provider @@ -301,8 +299,7 @@ mod tests { #[test] fn test_fail_to_calculate_object_store_prefix_from_url() { let provider = AzureBlobStoreProvider; - let options = - HashMap::from_iter([("access_key".to_string(), "myaccesskey".to_string())]); + let options = HashMap::from_iter([("access_key".to_string(), "myaccesskey".to_string())]); let expected = "Invalid user input: Unable to find object store prefix: no Azure account name in URI, and no storage account configured."; let result = provider .calculate_object_store_prefix("az", "container", Some(&options)) From 488e50fe8a95316936eac6ba2a8413b65fdf0fb3 Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Thu, 6 Nov 2025 16:13:04 -0800 Subject: [PATCH 11/12] change --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index 0406f31e4ea..4f07fff49de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ exclude = ["python", "java/lance-jni"] # Python package needs to be built by maturin. resolver = "2" + [workspace.package] version = "0.40.0-beta.1" edition = "2021" From f6a36f683bcceb5d7acff0560559ba13c59cd838 Mon Sep 17 00:00:00 2001 From: "Colin P. McCabe" Date: Fri, 7 Nov 2025 10:14:22 -0800 Subject: [PATCH 12/12] fix java exception catching --- .../java/com/lancedb/lance/FileReaderWriterTest.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/java/src/test/java/com/lancedb/lance/FileReaderWriterTest.java b/java/src/test/java/com/lancedb/lance/FileReaderWriterTest.java index 0e52e351614..04f570b569a 100644 --- a/java/src/test/java/com/lancedb/lance/FileReaderWriterTest.java +++ b/java/src/test/java/com/lancedb/lance/FileReaderWriterTest.java @@ -219,14 +219,18 @@ void testWriteNoData(@TempDir Path tempDir) throws Exception { } @Test - void testWriteWithStorage(@TempDir Path tempDir) { + void testWriteWithStorage(@TempDir Path tempDir) throws IOException { String filePath = "az://fail_bucket" + tempDir.resolve("test_write_with_storage"); BufferAllocator allocator = new RootAllocator(); Map storageOptions = new HashMap<>(); try { LanceFileWriter.open(filePath, allocator, null, storageOptions); - } catch (IOException e) { - assertTrue(e.getMessage().contains("Account must be specified")); + } catch (IllegalArgumentException e) { + assertTrue( + e.getMessage() + .contains( + "Unable to find object store prefix: no Azure account " + + "name in URI, and no storage account configured.")); } storageOptions.put("account_name", "some_account");