From e382e83a35e9b2932c1cc1c6194a6fcd8ceaa831 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 2 Dec 2025 19:14:40 +0800 Subject: [PATCH 1/2] chore: add a test case for variable packed struct Signed-off-by: Xuanwo --- .../src/encodings/physical/packed.rs | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/rust/lance-encoding/src/encodings/physical/packed.rs b/rust/lance-encoding/src/encodings/physical/packed.rs index 88f31be412e..29c33df6a95 100644 --- a/rust/lance-encoding/src/encodings/physical/packed.rs +++ b/rust/lance-encoding/src/encodings/physical/packed.rs @@ -735,13 +735,17 @@ mod tests { use crate::{ compression::CompressionStrategy, compression::{DefaultCompressionStrategy, DefaultDecompressionStrategy}, + constants::PACKED_STRUCT_META_KEY, statistics::ComputeStat, + testing::{check_round_trip_encoding_of_data, TestCases}, version::LanceFileVersion, }; use arrow_array::{ Array, ArrayRef, BinaryArray, Int32Array, Int64Array, LargeStringArray, StringArray, + StructArray, UInt32Array, }; use arrow_schema::{DataType, Field as ArrowField, Fields}; + use std::collections::HashMap; use std::sync::Arc; fn fixed_block_from_array(array: Int64Array) -> FixedWidthDataBlock { @@ -947,6 +951,40 @@ mod tests { Ok(()) } + #[tokio::test] + async fn variable_packed_struct_utf8_round_trip() { + // schema: Struct + let fields = Fields::from(vec![ + Arc::new(ArrowField::new("id", DataType::UInt32, false)), + Arc::new(ArrowField::new("uri", DataType::Utf8, false)), + ]); + + // mark struct as packed + let mut meta = HashMap::new(); + meta.insert(PACKED_STRUCT_META_KEY.to_string(), "true".to_string()); + + let array = Arc::new(StructArray::from(vec![ + ( + fields[0].clone(), + Arc::new(UInt32Array::from(vec![1, 2, 3])) as ArrayRef, + ), + ( + fields[1].clone(), + Arc::new(StringArray::from(vec![ + Some("a"), + Some("b"), + Some("/tmp/x"), + ])) as ArrayRef, + ), + ])); + + let test_cases = TestCases::default() + .with_min_file_version(LanceFileVersion::V2_2) + .with_expected_encoding("variable_packed_struct"); + + check_round_trip_encoding_of_data(vec![array], &test_cases, meta).await; + } + #[test] fn variable_packed_struct_multi_variable_round_trip() -> Result<()> { let arrow_fields: Fields = vec![ From 2c9c4e3fda6778e3d36c28713f262b415875d231 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 5 Dec 2025 16:52:15 +0800 Subject: [PATCH 2/2] Polish tests Signed-off-by: Xuanwo --- rust/lance-encoding/src/encodings/physical/packed.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/rust/lance-encoding/src/encodings/physical/packed.rs b/rust/lance-encoding/src/encodings/physical/packed.rs index 29c33df6a95..31ce7acd1e4 100644 --- a/rust/lance-encoding/src/encodings/physical/packed.rs +++ b/rust/lance-encoding/src/encodings/physical/packed.rs @@ -953,10 +953,11 @@ mod tests { #[tokio::test] async fn variable_packed_struct_utf8_round_trip() { - // schema: Struct + // schema: Struct let fields = Fields::from(vec![ Arc::new(ArrowField::new("id", DataType::UInt32, false)), Arc::new(ArrowField::new("uri", DataType::Utf8, false)), + Arc::new(ArrowField::new("long_text", DataType::LargeUtf8, false)), ]); // mark struct as packed @@ -976,6 +977,14 @@ mod tests { Some("/tmp/x"), ])) as ArrayRef, ), + ( + fields[2].clone(), + Arc::new(LargeStringArray::from(vec![ + Some("alpha"), + Some("a considerably longer payload for testing"), + Some("mid"), + ])) as ArrayRef, + ), ])); let test_cases = TestCases::default()