From 236840cc905c1f72bd8c110ff52ab7d39d340de0 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 27 Nov 2025 13:50:08 +0800 Subject: [PATCH 1/2] chore: add more information while error happened Signed-off-by: Xuanwo --- rust/lance/src/dataset/write/merge_insert.rs | 32 ++++++++++++++------ rust/lance/src/io/exec/take.rs | 32 +++++++++++++++++--- 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/rust/lance/src/dataset/write/merge_insert.rs b/rust/lance/src/dataset/write/merge_insert.rs index 48b40034ec0..69222e90903 100644 --- a/rust/lance/src/dataset/write/merge_insert.rs +++ b/rust/lance/src/dataset/write/merge_insert.rs @@ -106,6 +106,7 @@ use std::{ time::Duration, }; use tokio::task::JoinSet; +use tracing::error; mod assign_action; mod exec; @@ -1131,16 +1132,27 @@ impl MergeInsertJob { match frag_id.first() { Some(ScalarValue::UInt64(Some(frag_id))) => { let frag_id = *frag_id; - let fragment = - dataset - .get_fragment(frag_id as usize) - .ok_or_else(|| Error::Internal { - message: format!( - "Got non-existent fragment id from merge result: {}", - frag_id - ), - location: location!(), - })?; + let fragment = dataset.get_fragment(frag_id as usize).ok_or_else(|| { + error!( + fragment_id = frag_id, + dataset_uri = %dataset.uri(), + manifest_version = dataset.manifest().version, + manifest_path = %dataset.manifest_location().path, + branch = ?dataset.manifest().branch, + "Non-existent fragment id returned from merge result", + ); + Error::Internal { + message: format!( + "Got non-existent fragment id from merge result: {} (uri={}, version={}, manifest={}, branch={})", + frag_id, + dataset.uri(), + dataset.manifest().version, + dataset.manifest_location().path, + dataset.manifest().branch.as_deref().unwrap_or("main"), + ), + location: location!(), + } + })?; let metadata = fragment.metadata.clone(); let fut = handle_fragment( diff --git a/rust/lance/src/io/exec/take.rs b/rust/lance/src/io/exec/take.rs index 6f4a2ebcf91..25ca45e2335 100644 --- a/rust/lance/src/io/exec/take.rs +++ b/rust/lance/src/io/exec/take.rs @@ -30,6 +30,7 @@ use lance_core::utils::address::RowAddress; use lance_core::utils::tokio::get_num_compute_intensive_cpus; use lance_core::{ROW_ADDR, ROW_ID}; use lance_io::scheduler::{ScanScheduler, SchedulerConfig}; +use tracing::error; use crate::dataset::fragment::{FragReadConfig, FragmentReader}; use crate::dataset::rowids::get_row_id_index; @@ -101,11 +102,32 @@ impl TakeStream { async fn do_open_reader(&self, fragment_id: u32) -> DataFusionResult> { let fragment = self - .dataset - .get_fragment(fragment_id as usize) - .ok_or_else(|| { - DataFusionError::Execution(format!("The input to a take operation specified fragment id {} but this fragment does not exist in the dataset", fragment_id)) - })?; + .dataset + .get_fragment(fragment_id as usize) + .ok_or_else(|| { + let branch = self + .dataset + .manifest() + .branch + .as_deref() + .unwrap_or("main"); + error!( + fragment_id, + dataset_uri = %self.dataset.uri(), + manifest_version = self.dataset.manifest().version, + manifest_path = %self.dataset.manifest_location().path, + branch = ?self.dataset.manifest().branch, + "Missing fragment id during take operation", + ); + DataFusionError::Execution(format!( + "The input to a take operation specified fragment id {} but this fragment does not exist in the dataset (uri={}, version={}, manifest={}, branch={})", + fragment_id, + self.dataset.uri(), + self.dataset.manifest().version, + self.dataset.manifest_location().path, + branch + )) + })?; let reader = Arc::new( fragment From 1926cb6e0348be8aa3f0a7a602015bc8dee3a860 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 27 Nov 2025 13:59:13 +0800 Subject: [PATCH 2/2] Allow Signed-off-by: Xuanwo --- deny.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/deny.toml b/deny.toml index e192267b97b..367674c1b4b 100644 --- a/deny.toml +++ b/deny.toml @@ -113,6 +113,7 @@ allow = [ "Zlib", "CC0-1.0", "CDLA-Permissive-2.0", + "Apache-2.0 WITH LLVM-exception", ] # The confidence threshold for detecting a license from license text. # The higher the value, the more closely the license text must be to the