diff --git a/rust/lance-encoding/src/compression.rs b/rust/lance-encoding/src/compression.rs index 24c2c71d654..af1bd41766f 100644 --- a/rust/lance-encoding/src/compression.rs +++ b/rust/lance-encoding/src/compression.rs @@ -1728,6 +1728,33 @@ mod tests { } } + #[test] + #[cfg(any(feature = "lz4", feature = "zstd"))] + fn test_general_compression_not_selected_for_v2_1_even_if_requested() { + let mut params = CompressionParams::new(); + params.columns.insert( + "dict_values".to_string(), + CompressionFieldParams { + compression: Some(if cfg!(feature = "lz4") { "lz4" } else { "zstd" }.to_string()), + ..Default::default() + }, + ); + + let strategy = + DefaultCompressionStrategy::with_params(params).with_version(LanceFileVersion::V2_1); + let field = create_test_field("dict_values", DataType::FixedSizeBinary(3)); + let data = create_fixed_width_block(24, 1024); + + let (_compressor, encoding) = strategy + .create_block_compressor(&field, &data) + .expect("block compressor selection should succeed"); + + assert!( + !matches!(encoding.compression.as_ref(), Some(Compression::General(_))), + "general compression should not be selected for V2.1" + ); + } + #[test] fn test_rle_block_used_for_version_v2_2() { let field = create_test_field("test_repdef", DataType::UInt16); diff --git a/rust/lance-encoding/src/encoder.rs b/rust/lance-encoding/src/encoder.rs index 203b3b99642..0286a80737d 100644 --- a/rust/lance-encoding/src/encoder.rs +++ b/rust/lance-encoding/src/encoder.rs @@ -790,6 +790,7 @@ pub async fn encode_batch( mod tests { use super::*; use crate::compression_config::{CompressionFieldParams, CompressionParams}; + use arrow_schema::{DataType as ArrowDataType, Field as ArrowField, Fields as ArrowFields}; #[test] fn test_configured_encoding_strategy() { @@ -829,4 +830,38 @@ mod tests { .to_string() .contains("only supported in Lance file version 2.1")); } + + #[test] + fn test_fixed_size_list_struct_requires_v2_2() { + let list_item = ArrowField::new( + "item", + ArrowDataType::Struct(ArrowFields::from(vec![ArrowField::new( + "x", + ArrowDataType::Int32, + true, + )])), + true, + ); + let arrow_field = ArrowField::new( + "list_struct", + ArrowDataType::FixedSizeList(Arc::new(list_item), 2), + true, + ); + let field = Field::try_from(&arrow_field).unwrap(); + + let strategy = StructuralEncodingStrategy::with_version(LanceFileVersion::V2_1); + let mut column_index = ColumnIndexSequence::default(); + let options = EncodingOptions::default(); + + let result = strategy.create_field_encoder(&strategy, &field, &mut column_index, &options); + assert!( + result.is_err(), + "FixedSizeList should be rejected for file version 2.1" + ); + let err = result.err().unwrap(); + + assert!(err + .to_string() + .contains("FixedSizeList is only supported in Lance file format 2.2+")); + } } diff --git a/rust/lance/src/dataset/blob.rs b/rust/lance/src/dataset/blob.rs index b50247925f1..a482972850c 100644 --- a/rust/lance/src/dataset/blob.rs +++ b/rust/lance/src/dataset/blob.rs @@ -1191,6 +1191,42 @@ mod tests { assert_eq!(second.as_ref(), b"world"); } + #[tokio::test] + async fn test_blob_v2_requires_v2_2() { + let test_dir = TempStrDir::default(); + + let mut blob_builder = BlobArrayBuilder::new(1); + blob_builder.push_bytes(b"hello").unwrap(); + let blob_array: arrow_array::ArrayRef = blob_builder.finish().unwrap(); + + let id_array: arrow_array::ArrayRef = Arc::new(UInt32Array::from(vec![0])); + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::UInt32, false), + blob_field("blob", true), + ])); + let batch = RecordBatch::try_new(schema.clone(), vec![id_array, blob_array]).unwrap(); + let reader = RecordBatchIterator::new(vec![batch].into_iter().map(Ok), schema); + + let result = Dataset::write( + reader, + &test_dir, + Some(WriteParams { + data_storage_version: Some(LanceFileVersion::V2_1), + ..Default::default() + }), + ) + .await; + + assert!( + result.is_err(), + "Blob v2 should be rejected for file version 2.1" + ); + assert!(result + .unwrap_err() + .to_string() + .contains("Blob v2 requires file version >= 2.2")); + } + async fn preprocess_kind_with_schema_metadata(metadata_value: &str, data_len: usize) -> u8 { let (object_store, base_path) = ObjectStore::from_uri_and_params( Arc::new(ObjectStoreRegistry::default()),