From da6afdafc1b14324ac3077af2429134a9b17f006 Mon Sep 17 00:00:00 2001 From: codedump Date: Fri, 2 Jan 2026 17:56:23 +0800 Subject: [PATCH] feat: refactor use of Error::io --- java/lance-jni/src/ffi.rs | 8 ++-- java/lance-jni/src/sql.rs | 2 +- python/src/dataset.rs | 21 +++++----- rust/lance-core/src/error.rs | 20 ++++++++++ rust/lance-datafusion/src/projection.rs | 4 +- rust/lance-datafusion/src/sql.rs | 5 ++- .../src/previous/format/metadata.rs | 2 +- rust/lance-file/src/previous/reader.rs | 6 +-- rust/lance-file/src/previous/writer/mod.rs | 8 ++-- rust/lance-file/src/reader.rs | 4 +- .../src/scalar/inverted/tokenizer/jieba.rs | 17 ++++---- .../src/scalar/inverted/tokenizer/lindera.rs | 4 +- rust/lance-index/src/vector/utils.rs | 4 +- rust/lance-io/src/encodings/binary.rs | 4 +- rust/lance-io/src/encodings/plain.rs | 2 +- rust/lance-io/src/utils.rs | 2 +- rust/lance-table/src/format.rs | 2 +- rust/lance-table/src/format/index.rs | 2 +- rust/lance-table/src/format/manifest.rs | 2 +- rust/lance-table/src/io/commit/dynamodb.rs | 40 +++++++++---------- rust/lance-table/src/io/manifest.rs | 10 +++-- rust/lance/src/dataset/blob.rs | 24 +++++------ rust/lance/src/dataset/fragment.rs | 31 +++++++------- rust/lance/src/dataset/hash_joiner.rs | 22 +++++++--- rust/lance/src/dataset/scanner.rs | 4 +- rust/lance/src/dataset/transaction.rs | 4 +- rust/lance/src/dataset/updater.rs | 4 +- rust/lance/src/index/vector/builder.rs | 12 ++++-- 28 files changed, 151 insertions(+), 119 deletions(-) diff --git a/java/lance-jni/src/ffi.rs b/java/lance-jni/src/ffi.rs index 371e44563ed..d1f656873d8 100644 --- a/java/lance-jni/src/ffi.rs +++ b/java/lance-jni/src/ffi.rs @@ -296,7 +296,7 @@ impl JNIEnvExt for JNIEnv<'_> { .l()?; let operator_str = self.get_string_from_method(&operator_obj, "name")?; Operator::try_from(operator_str.as_str()) - .map_err(|e| Error::io_error(format!("Invalid operator: {:?}", e))) + .map_err(|e| Error::input_error(format!("Invalid operator: {:?}", e))) } fn get_occur_from_method(&mut self, obj: &JObject) -> Result { @@ -310,7 +310,7 @@ impl JNIEnvExt for JNIEnv<'_> { .l()?; let occur_str = self.get_string_from_method(&occur_obj, "name")?; Occur::try_from(occur_str.as_str()) - .map_err(|e| Error::io_error(format!("Invalid occur: {:?}", e))) + .map_err(|e| Error::input_error(format!("Invalid occur: {:?}", e))) } fn get_string_from_method(&mut self, obj: &JObject, method_name: &str) -> Result { @@ -388,7 +388,7 @@ impl JNIEnvExt for JNIEnv<'_> { self.get_optional_from_method(obj, method_name, |env, inner_jobj| { let inner_value = env.call_method(&inner_jobj, "intValue", "()I", &[])?.i()?; T::try_from(inner_value).map_err(|e| { - Error::io_error(format!("Failed to convert from i32 to rust type: {:?}", e)) + Error::input_error(format!("Failed to convert from i32 to rust type: {:?}", e)) }) }) } @@ -421,7 +421,7 @@ impl JNIEnvExt for JNIEnv<'_> { self.get_optional_from_method(obj, method_name, |env, inner_jobj| { let inner_value = env.call_method(&inner_jobj, "longValue", "()J", &[])?.j()?; T::try_from(inner_value).map_err(|e| { - Error::io_error(format!("Failed to convert from i32 to rust type: {:?}", e)) + Error::input_error(format!("Failed to convert from i32 to rust type: {:?}", e)) }) }) } diff --git a/java/lance-jni/src/sql.rs b/java/lance-jni/src/sql.rs index e667f6c7128..07bc3d8e2fa 100644 --- a/java/lance-jni/src/sql.rs +++ b/java/lance-jni/src/sql.rs @@ -35,7 +35,7 @@ pub extern "system" fn Java_org_lance_SqlQuery_intoBatchRecords( with_row_addr, stream_addr, ) - .map_err(|e| Error::io_error(e.to_string())) + .map_err(|e| Error::input_error(e.to_string())) ) } diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 80be4d98b35..b85c110f01d 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -344,11 +344,14 @@ pub fn transforms_from_python(transforms: &Bound<'_, PyAny>) -> PyResult = result .extract(py) - .map_err(|err| lance::Error::io(err.to_string(), location!()))?; + .map_err(|err| lance::Error::invalid_input(err.to_string(), location!()))?; Ok(result_batch.0) }) }; @@ -3419,7 +3422,7 @@ impl WriteFragmentProgress for PyWriteProgress { Ok(()) }) .map_err(|e| { - lance::Error::io( + lance::Error::invalid_input( format!("Failed to call begin() on WriteFragmentProgress: {}", e), location!(), ) @@ -3436,7 +3439,7 @@ impl WriteFragmentProgress for PyWriteProgress { Ok(()) }) .map_err(|e| { - lance::Error::io( + lance::Error::invalid_input( format!("Failed to call complete() on WriteFragmentProgress: {}", e), location!(), ) @@ -3480,7 +3483,7 @@ impl UDFCheckpointStore for PyBatchUDFCheckpointWrapper { Ok(batch.map(|b| b.0)) }) .map_err(|err: PyErr| { - lance_core::Error::io( + lance_core::Error::invalid_input( format!("Failed to call get_batch() on UDFCheckpointer: {}", err), location!(), ) @@ -3496,7 +3499,7 @@ impl UDFCheckpointStore for PyBatchUDFCheckpointWrapper { Ok(fragment) }) .map_err(|err: PyErr| { - lance_core::Error::io( + lance_core::Error::invalid_input( format!("Failed to call get_fragment() on UDFCheckpointer: {}", err), location!(), ) @@ -3504,7 +3507,7 @@ impl UDFCheckpointStore for PyBatchUDFCheckpointWrapper { fragment_data .map(|data| { serde_json::from_str(&data).map_err(|err| { - lance::Error::io( + lance_core::Error::invalid_input( format!("Failed to deserialize fragment data: {}", err), location!(), ) @@ -3521,7 +3524,7 @@ impl UDFCheckpointStore for PyBatchUDFCheckpointWrapper { Ok(()) }) .map_err(|err: PyErr| { - lance_core::Error::io( + lance_core::Error::invalid_input( format!("Failed to call insert_batch() on UDFCheckpointer: {}", err), location!(), ) @@ -3541,7 +3544,7 @@ impl UDFCheckpointStore for PyBatchUDFCheckpointWrapper { Ok(()) }) .map_err(|err: PyErr| { - lance_core::Error::io( + lance_core::Error::invalid_input( format!( "Failed to call insert_fragment() on UDFCheckpointer: {}", err diff --git a/rust/lance-core/src/error.rs b/rust/lance-core/src/error.rs index f80dbca4a7b..128789f9e73 100644 --- a/rust/lance-core/src/error.rs +++ b/rust/lance-core/src/error.rs @@ -164,6 +164,26 @@ impl Error { location, } } + + pub fn not_found(uri: impl Into) -> Self { + Self::NotFound { + uri: uri.into(), + location: std::panic::Location::caller().to_snafu_location(), + } + } + + pub fn schema(message: impl Into, location: Location) -> Self { + let message: String = message.into(); + Self::Schema { message, location } + } + + pub fn not_supported(message: impl Into, location: Location) -> Self { + let message: String = message.into(); + Self::NotSupported { + source: message.into(), + location, + } + } } pub trait LanceOptionExt { diff --git a/rust/lance-datafusion/src/projection.rs b/rust/lance-datafusion/src/projection.rs index 1c504615ed5..f586ac4bb20 100644 --- a/rust/lance-datafusion/src/projection.rs +++ b/rust/lance-datafusion/src/projection.rs @@ -70,7 +70,7 @@ impl ProjectionBuilder { fn check_duplicate_column(&self, name: &str) -> Result<()> { if self.output.contains_key(name) { - return Err(Error::io( + return Err(Error::invalid_input( format!("Duplicate column name: {}", name), location!(), )); @@ -280,7 +280,7 @@ impl ProjectionPlan { } else { // Regular data column - validate it exists in base schema if base.schema().field(&field.name).is_none() { - return Err(Error::io( + return Err(Error::invalid_input( format!("Column '{}' not found in schema", field.name), location!(), )); diff --git a/rust/lance-datafusion/src/sql.rs b/rust/lance-datafusion/src/sql.rs index 0f9e342c138..547badfdbfa 100644 --- a/rust/lance-datafusion/src/sql.rs +++ b/rust/lance-datafusion/src/sql.rs @@ -80,8 +80,9 @@ pub(crate) fn parse_sql_expr(expr: &str) -> Result { } else { None }; - let expr = selection - .ok_or_else(|| Error::io(format!("Expression is not valid: {expr}"), location!()))?; + let expr = selection.ok_or_else(|| { + Error::invalid_input(format!("Expression is not valid: {expr}"), location!()) + })?; Ok(expr.clone()) } diff --git a/rust/lance-file/src/previous/format/metadata.rs b/rust/lance-file/src/previous/format/metadata.rs index 025ed33d427..4724506218e 100644 --- a/rust/lance-file/src/previous/format/metadata.rs +++ b/rust/lance-file/src/previous/format/metadata.rs @@ -169,7 +169,7 @@ impl Metadata { // TODO: pub(crate) pub fn range_to_batches(&self, range: Range) -> Result)>> { if range.end > *(self.batch_offsets.last().unwrap()) as usize { - return Err(Error::io( + return Err(Error::invalid_input( format!( "Range {:?} is out of bounds {}", range, diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs index 985906698b2..b6d70aafad4 100644 --- a/rust/lance-file/src/previous/reader.rs +++ b/rust/lance-file/src/previous/reader.rs @@ -516,7 +516,7 @@ fn get_page_info<'a>( batch_id: i32, ) -> Result<&'a PageInfo> { page_table.get(field.id, batch_id).ok_or_else(|| { - Error::io( + Error::invalid_input( format!( "No page info found for field: {}, field_id={} batch={}", field.name, field.id, batch_id @@ -560,7 +560,7 @@ fn read_null_array( } else { let idx_max = *indices.values().iter().max().unwrap() as u64; if idx_max >= page_info.length as u64 { - return Err(Error::io( + return Err(Error::invalid_input( format!( "NullArray Reader: request([{}]) out of range: [0..{}]", idx_max, page_info.length @@ -580,7 +580,7 @@ fn read_null_array( _ => unreachable!(), }; if idx_end > page_info.length { - return Err(Error::io( + return Err(Error::invalid_input( format!( "NullArray Reader: request([{}..{}]) out of range: [0..{}]", // and wrap it in here. diff --git a/rust/lance-file/src/previous/writer/mod.rs b/rust/lance-file/src/previous/writer/mod.rs index 3bef0a73455..7006442e08c 100644 --- a/rust/lance-file/src/previous/writer/mod.rs +++ b/rust/lance-file/src/previous/writer/mod.rs @@ -204,7 +204,7 @@ impl FileWriter { .iter() .map(|batch| { batch.column_by_name(&field.name).ok_or_else(|| { - Error::io( + Error::invalid_input( format!("FileWriter::write: Field '{}' not found", field.name), location!(), ) @@ -639,9 +639,9 @@ impl FileWriter { })?; let value_arr = dict_info.values.as_ref().ok_or_else(|| { - Error::io( + Error::invalid_input( format!( - "Lance field {} is dictionary type, but misses the dictionary value array", + "Lance field {} is dictionary type, but misses the dictionary value array", field.name), location!(), ) @@ -658,7 +658,7 @@ impl FileWriter { encoder.encode(&[value_arr]).await? } _ => { - return Err(Error::io( + return Err(Error::schema( format!( "Does not support {} as dictionary value type", value_arr.data_type() diff --git a/rust/lance-file/src/reader.rs b/rust/lance-file/src/reader.rs index 354e0f920d0..ddc866e36db 100644 --- a/rust/lance-file/src/reader.rs +++ b/rust/lance-file/src/reader.rs @@ -445,7 +445,7 @@ impl FileReader { fn decode_footer(footer_bytes: &Bytes) -> Result