diff --git a/deny.toml b/deny.toml index 948349e9f7c..677a87794e1 100644 --- a/deny.toml +++ b/deny.toml @@ -113,6 +113,7 @@ allow = [ "Zlib", "CC0-1.0", "CDLA-Permissive-2.0", + "Apache-2.0 WITH LLVM-exception", "bzip2-1.0.6", ] # The confidence threshold for detecting a license from license text. diff --git a/rust/lance/src/dataset/write/merge_insert.rs b/rust/lance/src/dataset/write/merge_insert.rs index 48b40034ec0..69222e90903 100644 --- a/rust/lance/src/dataset/write/merge_insert.rs +++ b/rust/lance/src/dataset/write/merge_insert.rs @@ -106,6 +106,7 @@ use std::{ time::Duration, }; use tokio::task::JoinSet; +use tracing::error; mod assign_action; mod exec; @@ -1131,16 +1132,27 @@ impl MergeInsertJob { match frag_id.first() { Some(ScalarValue::UInt64(Some(frag_id))) => { let frag_id = *frag_id; - let fragment = - dataset - .get_fragment(frag_id as usize) - .ok_or_else(|| Error::Internal { - message: format!( - "Got non-existent fragment id from merge result: {}", - frag_id - ), - location: location!(), - })?; + let fragment = dataset.get_fragment(frag_id as usize).ok_or_else(|| { + error!( + fragment_id = frag_id, + dataset_uri = %dataset.uri(), + manifest_version = dataset.manifest().version, + manifest_path = %dataset.manifest_location().path, + branch = ?dataset.manifest().branch, + "Non-existent fragment id returned from merge result", + ); + Error::Internal { + message: format!( + "Got non-existent fragment id from merge result: {} (uri={}, version={}, manifest={}, branch={})", + frag_id, + dataset.uri(), + dataset.manifest().version, + dataset.manifest_location().path, + dataset.manifest().branch.as_deref().unwrap_or("main"), + ), + location: location!(), + } + })?; let metadata = fragment.metadata.clone(); let fut = handle_fragment( diff --git a/rust/lance/src/io/exec/take.rs b/rust/lance/src/io/exec/take.rs index 6f4a2ebcf91..25ca45e2335 100644 --- a/rust/lance/src/io/exec/take.rs +++ b/rust/lance/src/io/exec/take.rs @@ -30,6 +30,7 @@ use lance_core::utils::address::RowAddress; use lance_core::utils::tokio::get_num_compute_intensive_cpus; use lance_core::{ROW_ADDR, ROW_ID}; use lance_io::scheduler::{ScanScheduler, SchedulerConfig}; +use tracing::error; use crate::dataset::fragment::{FragReadConfig, FragmentReader}; use crate::dataset::rowids::get_row_id_index; @@ -101,11 +102,32 @@ impl TakeStream { async fn do_open_reader(&self, fragment_id: u32) -> DataFusionResult> { let fragment = self - .dataset - .get_fragment(fragment_id as usize) - .ok_or_else(|| { - DataFusionError::Execution(format!("The input to a take operation specified fragment id {} but this fragment does not exist in the dataset", fragment_id)) - })?; + .dataset + .get_fragment(fragment_id as usize) + .ok_or_else(|| { + let branch = self + .dataset + .manifest() + .branch + .as_deref() + .unwrap_or("main"); + error!( + fragment_id, + dataset_uri = %self.dataset.uri(), + manifest_version = self.dataset.manifest().version, + manifest_path = %self.dataset.manifest_location().path, + branch = ?self.dataset.manifest().branch, + "Missing fragment id during take operation", + ); + DataFusionError::Execution(format!( + "The input to a take operation specified fragment id {} but this fragment does not exist in the dataset (uri={}, version={}, manifest={}, branch={})", + fragment_id, + self.dataset.uri(), + self.dataset.manifest().version, + self.dataset.manifest_location().path, + branch + )) + })?; let reader = Arc::new( fragment