From 86ed76374f005b33af79efdbde468f79cf3cc71a Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 14 Nov 2025 13:36:23 -0800 Subject: [PATCH 1/4] feat: add inline optimization for dir namespace --- Cargo.lock | 1 + rust/lance-namespace-impls/Cargo.toml | 1 + rust/lance-namespace-impls/src/dir.rs | 21 +++ .../lance-namespace-impls/src/dir/manifest.rs | 163 +++++++++++++++++- 4 files changed, 181 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1bd98c9145f..e6f0b73b826 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4847,6 +4847,7 @@ dependencies = [ "futures", "lance", "lance-core", + "lance-index", "lance-io", "lance-namespace", "log", diff --git a/rust/lance-namespace-impls/Cargo.toml b/rust/lance-namespace-impls/Cargo.toml index 40628b1fb4b..3af76709aa8 100644 --- a/rust/lance-namespace-impls/Cargo.toml +++ b/rust/lance-namespace-impls/Cargo.toml @@ -37,6 +37,7 @@ reqwest = { version = "0.12", optional = true, default-features = false, feature # Directory implementation dependencies (always enabled) url = { workspace = true } lance = { workspace = true } +lance-index = { workspace = true } lance-io = { workspace = true } object_store = { workspace = true } arrow = { workspace = true } diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index 0cf014be27d..9f01169e241 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -74,6 +74,7 @@ pub struct DirectoryNamespaceBuilder { session: Option>, manifest_enabled: bool, dir_listing_enabled: bool, + inline_optimization_enabled: bool, } impl DirectoryNamespaceBuilder { @@ -89,6 +90,7 @@ impl DirectoryNamespaceBuilder { session: None, manifest_enabled: true, dir_listing_enabled: true, // Default to enabled for backwards compatibility + inline_optimization_enabled: true, // Default to enabled } } @@ -110,6 +112,16 @@ impl DirectoryNamespaceBuilder { self } + /// Enable or disable inline optimization of the __manifest table. + /// + /// When enabled (default), performs compaction and indexing on the __manifest table + /// after every write operation to maintain optimal performance. + /// When disabled, manual optimization must be performed separately. + pub fn inline_optimization_enabled(mut self, enabled: bool) -> Self { + self.inline_optimization_enabled = enabled; + self + } + /// Create a DirectoryNamespaceBuilder from properties HashMap. /// /// This method parses a properties map into builder configuration. @@ -117,6 +129,7 @@ impl DirectoryNamespaceBuilder { /// - `root`: The root directory path (required) /// - `manifest_enabled`: Enable manifest-based table tracking (optional, default: true) /// - `dir_listing_enabled`: Enable directory listing for table discovery (optional, default: true) + /// - `inline_optimization_enabled`: Enable inline optimization of __manifest table (optional, default: true) /// - `storage.*`: Storage options (optional, prefix will be stripped) /// /// # Arguments @@ -190,12 +203,19 @@ impl DirectoryNamespaceBuilder { .and_then(|v| v.parse::().ok()) .unwrap_or(true); + // Extract inline_optimization_enabled (default: true) + let inline_optimization_enabled = properties + .get("inline_optimization_enabled") + .and_then(|v| v.parse::().ok()) + .unwrap_or(true); + Ok(Self { root: root.trim_end_matches('/').to_string(), storage_options, session, manifest_enabled, dir_listing_enabled, + inline_optimization_enabled, }) } @@ -262,6 +282,7 @@ impl DirectoryNamespaceBuilder { object_store.clone(), base_path.clone(), self.dir_listing_enabled, + self.inline_optimization_enabled, ) .await { diff --git a/rust/lance-namespace-impls/src/dir/manifest.rs b/rust/lance-namespace-impls/src/dir/manifest.rs index 2c785b598a5..f53e1c430a8 100644 --- a/rust/lance-namespace-impls/src/dir/manifest.rs +++ b/rust/lance-namespace-impls/src/dir/manifest.rs @@ -12,10 +12,15 @@ use arrow_ipc::reader::StreamReader; use async_trait::async_trait; use bytes::Bytes; use futures::stream::StreamExt; +use lance::dataset::optimize::{compact_files, CompactionOptions}; use lance::dataset::WriteParams; use lance::session::Session; use lance::{dataset::scanner::Scanner, Dataset}; use lance_core::{box_error, Error, Result}; +use lance_index::optimize::OptimizeOptions; +use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams}; +use lance_index::traits::DatasetIndexExt; +use lance_index::IndexType; use lance_io::object_store::ObjectStore; use lance_namespace::models::{ CreateEmptyTableRequest, CreateEmptyTableResponse, CreateNamespaceRequest, @@ -42,6 +47,14 @@ use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; const MANIFEST_TABLE_NAME: &str = "__manifest"; const DELIMITER: &str = "$"; +// Index names for the __manifest table +/// BTREE index on the object_id column for fast lookups +const OBJECT_ID_INDEX_NAME: &str = "object_id_btree"; +/// Bitmap index on the object_type column for filtering by type +const OBJECT_TYPE_INDEX_NAME: &str = "object_type_bitmap"; +/// LabelList index on the base_objects column for materialized view dependencies +const BASE_OBJECTS_INDEX_NAME: &str = "base_objects_label_list"; + /// Object types that can be stored in the manifest #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ObjectType { @@ -226,6 +239,9 @@ pub struct ManifestNamespace { /// If true, root namespace tables use {table_name}.lance naming /// If false, they use namespace-prefixed names dir_listing_enabled: bool, + /// Whether to perform inline optimization (compaction and indexing) on the __manifest table + /// after every write. Defaults to true. + inline_optimization_enabled: bool, } impl ManifestNamespace { @@ -237,6 +253,7 @@ impl ManifestNamespace { object_store: Arc, base_path: Path, dir_listing_enabled: bool, + inline_optimization_enabled: bool, ) -> Result { let manifest_dataset = Self::create_or_get_manifest(&root, object_store.clone(), session.clone()).await?; @@ -249,6 +266,7 @@ impl ManifestNamespace { base_path, manifest_dataset, dir_listing_enabled, + inline_optimization_enabled, }) } @@ -333,6 +351,121 @@ impl ManifestNamespace { Ok(full_url.to_string()) } + /// Perform inline optimization on the __manifest table. + /// + /// This method: + /// 1. Creates three indexes on the manifest table: + /// - BTREE index on object_id for fast lookups + /// - Bitmap index on object_type for filtering by type + /// - LabelList index on base_objects for materialized view dependencies + /// 2. Runs file compaction to merge small files + /// 3. Optimizes existing indices + /// + /// This is called automatically after writes when inline_optimization_enabled is true. + async fn run_inline_optimization(&self) -> Result<()> { + if !self.inline_optimization_enabled { + return Ok(()); + } + + // Get a mutable reference to the dataset to perform optimization + let mut dataset_guard = self.manifest_dataset.get_mut().await?; + let dataset: &mut Dataset = &mut *dataset_guard; + + // Step 1: Create indexes if they don't already exist + let indices = dataset.load_indices().await.map_err(|e| Error::IO { + source: box_error(std::io::Error::other(format!("Failed to load indices: {}", e))), + location: location!(), + })?; + + // Check which indexes already exist + let has_object_id_index = indices.iter().any(|idx| idx.name == OBJECT_ID_INDEX_NAME); + let has_object_type_index = indices.iter().any(|idx| idx.name == OBJECT_TYPE_INDEX_NAME); + let has_base_objects_index = indices.iter().any(|idx| idx.name == BASE_OBJECTS_INDEX_NAME); + + // Create BTREE index on object_id + if !has_object_id_index { + log::info!("Creating BTREE index '{}' on object_id for __manifest table", OBJECT_ID_INDEX_NAME); + let params = ScalarIndexParams::for_builtin(BuiltinIndexType::BTree); + dataset + .create_index(&["object_id"], IndexType::BTree, Some(OBJECT_ID_INDEX_NAME.to_string()), ¶ms, true) + .await + .map_err(|e| Error::IO { + source: box_error(std::io::Error::other(format!( + "Failed to create BTREE index on object_id: {}", + e + ))), + location: location!(), + })?; + } + + // Create Bitmap index on object_type + if !has_object_type_index { + log::info!("Creating Bitmap index '{}' on object_type for __manifest table", OBJECT_TYPE_INDEX_NAME); + let params = ScalarIndexParams::default(); + dataset + .create_index(&["object_type"], IndexType::Bitmap, Some(OBJECT_TYPE_INDEX_NAME.to_string()), ¶ms, true) + .await + .map_err(|e| Error::IO { + source: box_error(std::io::Error::other(format!( + "Failed to create Bitmap index on object_type: {}", + e + ))), + location: location!(), + })?; + } + + // Create LabelList index on base_objects + if !has_base_objects_index { + log::info!("Creating LabelList index '{}' on base_objects for __manifest table", BASE_OBJECTS_INDEX_NAME); + let params = ScalarIndexParams::default(); + dataset + .create_index(&["base_objects"], IndexType::LabelList, Some(BASE_OBJECTS_INDEX_NAME.to_string()), ¶ms, true) + .await + .map_err(|e| Error::IO { + source: box_error(std::io::Error::other(format!( + "Failed to create LabelList index on base_objects: {}", + e + ))), + location: location!(), + })?; + } + + // Step 2: Run file compaction + log::debug!("Running file compaction on __manifest table"); + let compaction_metrics = compact_files(dataset, CompactionOptions::default(), None) + .await + .map_err(|e| Error::IO { + source: box_error(std::io::Error::other(format!( + "Failed to compact files: {}", + e + ))), + location: location!(), + })?; + + if compaction_metrics.fragments_removed > 0 { + log::info!( + "Compacted __manifest table: removed {} fragments, added {} fragments", + compaction_metrics.fragments_removed, + compaction_metrics.fragments_added + ); + } + + // Step 3: Optimize indices + log::debug!("Optimizing indices on __manifest table"); + dataset + .optimize_indices(&OptimizeOptions::default()) + .await + .map_err(|e| Error::IO { + source: box_error(std::io::Error::other(format!( + "Failed to optimize indices: {}", + e + ))), + location: location!(), + })?; + + Ok(()) + } + /// Get the manifest schema fn manifest_schema() -> Arc { Arc::new(ArrowSchema::new(vec![ @@ -511,30 +644,43 @@ impl ManifestNamespace { object_type: ObjectType, location: Option, ) -> Result<()> { - self.insert_into_manifest_with_metadata(object_id, object_type, location, None) + self.insert_into_manifest_with_metadata(object_id, object_type, location, None, None) .await } - /// Insert an entry into the manifest table with metadata + /// Insert an entry into the manifest table with metadata and base_objects async fn insert_into_manifest_with_metadata( &self, object_id: String, object_type: ObjectType, location: Option, metadata: Option, + base_objects: Option>, ) -> Result<()> { use arrow::array::builder::{ListBuilder, StringBuilder}; let schema = Self::manifest_schema(); - // Create empty base_objects array + // Create base_objects array from the provided list let string_builder = StringBuilder::new(); let mut list_builder = ListBuilder::new(string_builder).with_field(Arc::new(Field::new( "object_id", DataType::Utf8, true, ))); - list_builder.append_null(); + + match base_objects { + Some(objects) => { + for obj in objects { + list_builder.values().append_value(obj); + } + list_builder.append(true); + } + None => { + list_builder.append_null(); + } + } + let base_objects_array = list_builder.finish(); // Create arrays with optional values @@ -621,6 +767,9 @@ impl ManifestNamespace { let new_dataset = Arc::try_unwrap(new_dataset_arc).unwrap_or_else(|arc| (*arc).clone()); self.manifest_dataset.set_latest(new_dataset).await; + // Run inline optimization after write + self.run_inline_optimization().await?; + Ok(()) } @@ -639,6 +788,10 @@ impl ManifestNamespace { } // Drop the guard here self.manifest_dataset.reload().await?; + + // Run inline optimization after delete + self.run_inline_optimization().await?; + Ok(()) } @@ -1194,7 +1347,7 @@ impl LanceNamespace for ManifestNamespace { } }); - self.insert_into_manifest_with_metadata(object_id, ObjectType::Namespace, None, metadata) + self.insert_into_manifest_with_metadata(object_id, ObjectType::Namespace, None, metadata, None) .await?; Ok(CreateNamespaceResponse { From 8062c923ab655897670f779aae2e294e061251b6 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 14 Nov 2025 14:27:53 -0800 Subject: [PATCH 2/4] improve testing --- Cargo.lock | 1 + python/Cargo.lock | 1 + rust/lance-namespace-impls/Cargo.toml | 1 + rust/lance-namespace-impls/src/dir.rs | 2 +- .../lance-namespace-impls/src/dir/manifest.rs | 272 ++++++++++++------ 5 files changed, 194 insertions(+), 83 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e6f0b73b826..cc68a400af1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4854,6 +4854,7 @@ dependencies = [ "object_store", "rand 0.9.2", "reqwest", + "rstest", "serde", "serde_json", "snafu", diff --git a/python/Cargo.lock b/python/Cargo.lock index f9b7e015187..275014f9d3b 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -4257,6 +4257,7 @@ dependencies = [ "futures", "lance", "lance-core", + "lance-index", "lance-io", "lance-namespace", "log", diff --git a/rust/lance-namespace-impls/Cargo.toml b/rust/lance-namespace-impls/Cargo.toml index 3af76709aa8..a973f24c55b 100644 --- a/rust/lance-namespace-impls/Cargo.toml +++ b/rust/lance-namespace-impls/Cargo.toml @@ -66,6 +66,7 @@ tempfile.workspace = true wiremock.workspace = true arrow = { workspace = true } arrow-ipc = { workspace = true } +rstest.workspace = true [lints] workspace = true diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index 9f01169e241..3b9e1807172 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -90,7 +90,7 @@ impl DirectoryNamespaceBuilder { session: None, manifest_enabled: true, dir_listing_enabled: true, // Default to enabled for backwards compatibility - inline_optimization_enabled: true, // Default to enabled + inline_optimization_enabled: true, } } diff --git a/rust/lance-namespace-impls/src/dir/manifest.rs b/rust/lance-namespace-impls/src/dir/manifest.rs index f53e1c430a8..a6daa35eade 100644 --- a/rust/lance-namespace-impls/src/dir/manifest.rs +++ b/rust/lance-namespace-impls/src/dir/manifest.rs @@ -52,7 +52,7 @@ const DELIMITER: &str = "$"; const OBJECT_ID_INDEX_NAME: &str = "object_id_btree"; /// Bitmap index on the object_type column for filtering by type const OBJECT_TYPE_INDEX_NAME: &str = "object_type_bitmap"; -/// LabelList index on the base_objects column for materialized view dependencies +/// LabelList index on the base_objects column for view dependencies const BASE_OBJECTS_INDEX_NAME: &str = "base_objects_label_list"; /// Object types that can be stored in the manifest @@ -357,7 +357,7 @@ impl ManifestNamespace { /// 1. Creates three indexes on the manifest table: /// - BTREE index on object_id for fast lookups /// - Bitmap index on object_type for filtering by type - /// - LabelList index on base_objects for materialized view dependencies + /// - LabelList index on base_objects for view dependencies /// 2. Runs file compaction to merge small files /// 3. Optimizes existing indices /// @@ -369,99 +369,132 @@ impl ManifestNamespace { // Get a mutable reference to the dataset to perform optimization let mut dataset_guard = self.manifest_dataset.get_mut().await?; - let dataset: &mut Dataset = &mut *dataset_guard; + let dataset: &mut Dataset = &mut dataset_guard; // Step 1: Create indexes if they don't already exist let indices = dataset.load_indices().await.map_err(|e| Error::IO { - source: box_error(std::io::Error::other(format!("Failed to load indices: {}", e))), + source: box_error(std::io::Error::other(format!( + "Failed to load indices: {}", + e + ))), location: location!(), })?; // Check which indexes already exist let has_object_id_index = indices.iter().any(|idx| idx.name == OBJECT_ID_INDEX_NAME); let has_object_type_index = indices.iter().any(|idx| idx.name == OBJECT_TYPE_INDEX_NAME); - let has_base_objects_index = indices.iter().any(|idx| idx.name == BASE_OBJECTS_INDEX_NAME); + let has_base_objects_index = indices + .iter() + .any(|idx| idx.name == BASE_OBJECTS_INDEX_NAME); // Create BTREE index on object_id if !has_object_id_index { - log::info!("Creating BTREE index '{}' on object_id for __manifest table", OBJECT_ID_INDEX_NAME); + log::debug!( + "Creating BTREE index '{}' on object_id for __manifest table", + OBJECT_ID_INDEX_NAME + ); let params = ScalarIndexParams::for_builtin(BuiltinIndexType::BTree); - dataset - .create_index(&["object_id"], IndexType::BTree, Some(OBJECT_ID_INDEX_NAME.to_string()), ¶ms, true) + if let Err(e) = dataset + .create_index( + &["object_id"], + IndexType::BTree, + Some(OBJECT_ID_INDEX_NAME.to_string()), + ¶ms, + true, + ) .await - .map_err(|e| Error::IO { - source: box_error(std::io::Error::other(format!( - "Failed to create BTREE index on object_id: {}", - e - ))), - location: location!(), - })?; + { + log::warn!("Failed to create BTREE index on object_id for __manifest table: {}. Query performance may be impacted.", e); + } else { + log::info!( + "Created BTREE index '{}' on object_id for __manifest table", + OBJECT_ID_INDEX_NAME + ); + } } // Create Bitmap index on object_type if !has_object_type_index { - log::info!("Creating Bitmap index '{}' on object_type for __manifest table", OBJECT_TYPE_INDEX_NAME); + log::debug!( + "Creating Bitmap index '{}' on object_type for __manifest table", + OBJECT_TYPE_INDEX_NAME + ); let params = ScalarIndexParams::default(); - dataset - .create_index(&["object_type"], IndexType::Bitmap, Some(OBJECT_TYPE_INDEX_NAME.to_string()), ¶ms, true) + if let Err(e) = dataset + .create_index( + &["object_type"], + IndexType::Bitmap, + Some(OBJECT_TYPE_INDEX_NAME.to_string()), + ¶ms, + true, + ) .await - .map_err(|e| Error::IO { - source: box_error(std::io::Error::other(format!( - "Failed to create Bitmap index on object_type: {}", - e - ))), - location: location!(), - })?; + { + log::warn!("Failed to create Bitmap index on object_type for __manifest table: {}. Query performance may be impacted.", e); + } else { + log::info!( + "Created Bitmap index '{}' on object_type for __manifest table", + OBJECT_TYPE_INDEX_NAME + ); + } } // Create LabelList index on base_objects if !has_base_objects_index { - log::info!("Creating LabelList index '{}' on base_objects for __manifest table", BASE_OBJECTS_INDEX_NAME); + log::debug!( + "Creating LabelList index '{}' on base_objects for __manifest table", + BASE_OBJECTS_INDEX_NAME + ); let params = ScalarIndexParams::default(); - dataset - .create_index(&["base_objects"], IndexType::LabelList, Some(BASE_OBJECTS_INDEX_NAME.to_string()), ¶ms, true) + if let Err(e) = dataset + .create_index( + &["base_objects"], + IndexType::LabelList, + Some(BASE_OBJECTS_INDEX_NAME.to_string()), + ¶ms, + true, + ) .await - .map_err(|e| Error::IO { - source: box_error(std::io::Error::other(format!( - "Failed to create LabelList index on base_objects: {}", - e - ))), - location: location!(), - })?; + { + log::warn!("Failed to create LabelList index on base_objects for __manifest table: {}. Query performance may be impacted.", e); + } else { + log::info!( + "Created LabelList index '{}' on base_objects for __manifest table", + BASE_OBJECTS_INDEX_NAME + ); + } } // Step 2: Run file compaction log::debug!("Running file compaction on __manifest table"); - let compaction_metrics = compact_files(dataset, CompactionOptions::default(), None) - .await - .map_err(|e| Error::IO { - source: box_error(std::io::Error::other(format!( - "Failed to compact files: {}", - e - ))), - location: location!(), - })?; - - if compaction_metrics.fragments_removed > 0 { - log::info!( - "Compacted __manifest table: removed {} fragments, added {} fragments", - compaction_metrics.fragments_removed, - compaction_metrics.fragments_added - ); + match compact_files(dataset, CompactionOptions::default(), None).await { + Ok(compaction_metrics) => { + if compaction_metrics.fragments_removed > 0 { + log::info!( + "Compacted __manifest table: removed {} fragments, added {} fragments", + compaction_metrics.fragments_removed, + compaction_metrics.fragments_added + ); + } + } + Err(e) => { + log::warn!("Failed to compact files for __manifest table: {}. Continuing with optimization.", e); + } } // Step 3: Optimize indices log::debug!("Optimizing indices on __manifest table"); - dataset - .optimize_indices(&OptimizeOptions::default()) - .await - .map_err(|e| Error::IO { - source: box_error(std::io::Error::other(format!( - "Failed to optimize indices: {}", + match dataset.optimize_indices(&OptimizeOptions::default()).await { + Ok(_) => { + log::info!("Successfully optimized indices on __manifest table"); + } + Err(e) => { + log::warn!( + "Failed to optimize indices on __manifest table: {}. Continuing anyway.", e - ))), - location: location!(), - })?; + ); + } + } Ok(()) } @@ -471,13 +504,13 @@ impl ManifestNamespace { Arc::new(ArrowSchema::new(vec![ Field::new("object_id", DataType::Utf8, false), Field::new("object_type", DataType::Utf8, false), - Field::new("location", DataType::Utf8, true), // Optional: namespaces don't have location - Field::new("metadata", DataType::Utf8, true), // Optional: tables don't have metadata + Field::new("location", DataType::Utf8, true), + Field::new("metadata", DataType::Utf8, true), Field::new( "base_objects", DataType::List(Arc::new(Field::new("object_id", DataType::Utf8, true))), true, - ), // Optional: mainly for objects like view to record dependency + ), ])) } @@ -1347,8 +1380,14 @@ impl LanceNamespace for ManifestNamespace { } }); - self.insert_into_manifest_with_metadata(object_id, ObjectType::Namespace, None, metadata, None) - .await?; + self.insert_into_manifest_with_metadata( + object_id, + ObjectType::Namespace, + None, + metadata, + None, + ) + .await?; Ok(CreateNamespaceResponse { properties: request.properties, @@ -1669,6 +1708,7 @@ mod tests { TableExistsRequest, }; use lance_namespace::LanceNamespace; + use rstest::rstest; fn create_test_ipc_data() -> Vec { use arrow::array::{Int32Array, StringArray}; @@ -1700,13 +1740,17 @@ mod tests { buffer } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_manifest_namespace_basic_create_and_list() { + async fn test_manifest_namespace_basic_create_and_list(#[case] inline_optimization: bool) { let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); // Create a DirectoryNamespace with manifest enabled (default) let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -1735,12 +1779,16 @@ mod tests { assert_eq!(response.tables[0], "test_table"); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_manifest_namespace_table_exists() { + async fn test_manifest_namespace_table_exists(#[case] inline_optimization: bool) { let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -1767,12 +1815,16 @@ mod tests { assert!(result.is_ok()); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_manifest_namespace_describe_table() { + async fn test_manifest_namespace_describe_table(#[case] inline_optimization: bool) { let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -1800,12 +1852,16 @@ mod tests { assert!(response.location.unwrap().contains("test_table")); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_manifest_namespace_drop_table() { + async fn test_manifest_namespace_drop_table(#[case] inline_optimization: bool) { let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -1837,12 +1893,16 @@ mod tests { assert_eq!(response.tables.len(), 0); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_manifest_namespace_multiple_tables() { + async fn test_manifest_namespace_multiple_tables(#[case] inline_optimization: bool) { let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -1868,14 +1928,18 @@ mod tests { assert!(response.tables.contains(&"table3".to_string())); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_directory_only_mode() { + async fn test_directory_only_mode(#[case] inline_optimization: bool) { let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); // Create a DirectoryNamespace with manifest disabled let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) .manifest_enabled(false) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -1905,8 +1969,11 @@ mod tests { assert_eq!(response.tables[0], "test_table"); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_dual_mode_merge() { + async fn test_dual_mode_merge(#[case] inline_optimization: bool) { let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); @@ -1914,6 +1981,7 @@ mod tests { let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) .manifest_enabled(true) .dir_listing_enabled(true) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -1935,8 +2003,11 @@ mod tests { assert_eq!(response.tables[0], "table1"); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_manifest_only_mode() { + async fn test_manifest_only_mode(#[case] inline_optimization: bool) { let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); @@ -1944,6 +2015,7 @@ mod tests { let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) .manifest_enabled(true) .dir_listing_enabled(false) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -1965,12 +2037,16 @@ mod tests { assert_eq!(response.tables[0], "test_table"); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_drop_nonexistent_table() { + async fn test_drop_nonexistent_table(#[case] inline_optimization: bool) { let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -1982,12 +2058,16 @@ mod tests { assert!(result.is_err()); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_create_duplicate_table_fails() { + async fn test_create_duplicate_table_fails(#[case] inline_optimization: bool) { let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -2010,8 +2090,11 @@ mod tests { assert!(result.is_err()); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_create_child_namespace() { + async fn test_create_child_namespace(#[case] inline_optimization: bool) { use lance_namespace::models::{ CreateNamespaceRequest, ListNamespacesRequest, NamespaceExistsRequest, }; @@ -2020,6 +2103,7 @@ mod tests { let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -2054,8 +2138,11 @@ mod tests { assert_eq!(namespaces.namespaces[0], "ns1"); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_create_nested_namespace() { + async fn test_create_nested_namespace(#[case] inline_optimization: bool) { use lance_namespace::models::{ CreateNamespaceRequest, ListNamespacesRequest, NamespaceExistsRequest, }; @@ -2064,6 +2151,7 @@ mod tests { let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -2103,14 +2191,18 @@ mod tests { assert_eq!(namespaces.namespaces[0], "child"); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_create_namespace_without_parent_fails() { + async fn test_create_namespace_without_parent_fails(#[case] inline_optimization: bool) { use lance_namespace::models::CreateNamespaceRequest; let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -2122,8 +2214,11 @@ mod tests { assert!(result.is_err(), "Should fail when parent doesn't exist"); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_drop_child_namespace() { + async fn test_drop_child_namespace(#[case] inline_optimization: bool) { use lance_namespace::models::{ CreateNamespaceRequest, DropNamespaceRequest, NamespaceExistsRequest, }; @@ -2132,6 +2227,7 @@ mod tests { let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -2159,14 +2255,18 @@ mod tests { assert!(result.is_err(), "Namespace should not exist after drop"); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_drop_namespace_with_children_fails() { + async fn test_drop_namespace_with_children_fails(#[case] inline_optimization: bool) { use lance_namespace::models::{CreateNamespaceRequest, DropNamespaceRequest}; let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -2187,8 +2287,11 @@ mod tests { assert!(result.is_err(), "Should fail when namespace has children"); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_create_table_in_child_namespace() { + async fn test_create_table_in_child_namespace(#[case] inline_optimization: bool) { use lance_namespace::models::{ CreateNamespaceRequest, CreateTableRequest, ListTablesRequest, }; @@ -2197,6 +2300,7 @@ mod tests { let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); @@ -2232,14 +2336,18 @@ mod tests { assert_eq!(tables.tables[0], "table1"); } + #[rstest] + #[case::with_optimization(true)] + #[case::without_optimization(false)] #[tokio::test] - async fn test_describe_child_namespace() { + async fn test_describe_child_namespace(#[case] inline_optimization: bool) { use lance_namespace::models::{CreateNamespaceRequest, DescribeNamespaceRequest}; let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); let dir_namespace = DirectoryNamespaceBuilder::new(temp_path) + .inline_optimization_enabled(inline_optimization) .build() .await .unwrap(); From 74e594ea866cefff09a1e49522971504c3438a88 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 14 Nov 2025 14:37:53 -0800 Subject: [PATCH 3/4] use debug for error logging --- rust/lance-namespace-impls/src/dir/manifest.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rust/lance-namespace-impls/src/dir/manifest.rs b/rust/lance-namespace-impls/src/dir/manifest.rs index a6daa35eade..c4de415f36a 100644 --- a/rust/lance-namespace-impls/src/dir/manifest.rs +++ b/rust/lance-namespace-impls/src/dir/manifest.rs @@ -404,7 +404,7 @@ impl ManifestNamespace { ) .await { - log::warn!("Failed to create BTREE index on object_id for __manifest table: {}. Query performance may be impacted.", e); + log::warn!("Failed to create BTREE index on object_id for __manifest table: {:?}. Query performance may be impacted.", e); } else { log::info!( "Created BTREE index '{}' on object_id for __manifest table", @@ -430,7 +430,7 @@ impl ManifestNamespace { ) .await { - log::warn!("Failed to create Bitmap index on object_type for __manifest table: {}. Query performance may be impacted.", e); + log::warn!("Failed to create Bitmap index on object_type for __manifest table: {:?}. Query performance may be impacted.", e); } else { log::info!( "Created Bitmap index '{}' on object_type for __manifest table", @@ -456,7 +456,7 @@ impl ManifestNamespace { ) .await { - log::warn!("Failed to create LabelList index on base_objects for __manifest table: {}. Query performance may be impacted.", e); + log::warn!("Failed to create LabelList index on base_objects for __manifest table: {:?}. Query performance may be impacted.", e); } else { log::info!( "Created LabelList index '{}' on base_objects for __manifest table", @@ -478,7 +478,7 @@ impl ManifestNamespace { } } Err(e) => { - log::warn!("Failed to compact files for __manifest table: {}. Continuing with optimization.", e); + log::warn!("Failed to compact files for __manifest table: {:?}. Continuing with optimization.", e); } } @@ -490,7 +490,7 @@ impl ManifestNamespace { } Err(e) => { log::warn!( - "Failed to optimize indices on __manifest table: {}. Continuing anyway.", + "Failed to optimize indices on __manifest table: {:?}. Continuing anyway.", e ); } From a0073d4bb8c0338cb47460eba5ae9eeed50b33bf Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 14 Nov 2025 14:47:41 -0800 Subject: [PATCH 4/4] ensure optimize don't fail --- .../lance-namespace-impls/src/dir/manifest.rs | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/rust/lance-namespace-impls/src/dir/manifest.rs b/rust/lance-namespace-impls/src/dir/manifest.rs index c4de415f36a..0944c629d61 100644 --- a/rust/lance-namespace-impls/src/dir/manifest.rs +++ b/rust/lance-namespace-impls/src/dir/manifest.rs @@ -372,13 +372,7 @@ impl ManifestNamespace { let dataset: &mut Dataset = &mut dataset_guard; // Step 1: Create indexes if they don't already exist - let indices = dataset.load_indices().await.map_err(|e| Error::IO { - source: box_error(std::io::Error::other(format!( - "Failed to load indices: {}", - e - ))), - location: location!(), - })?; + let indices = dataset.load_indices().await?; // Check which indexes already exist let has_object_id_index = indices.iter().any(|idx| idx.name == OBJECT_ID_INDEX_NAME); @@ -801,7 +795,12 @@ impl ManifestNamespace { self.manifest_dataset.set_latest(new_dataset).await; // Run inline optimization after write - self.run_inline_optimization().await?; + if let Err(e) = self.run_inline_optimization().await { + log::warn!( + "Unexpected failure when running inline optimization: {:?}", + e + ); + } Ok(()) } @@ -823,7 +822,12 @@ impl ManifestNamespace { self.manifest_dataset.reload().await?; // Run inline optimization after delete - self.run_inline_optimization().await?; + if let Err(e) = self.run_inline_optimization().await { + log::warn!( + "Unexpected failure when running inline optimization: {:?}", + e + ); + } Ok(()) }