Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions rust/lance-encoding/src/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1728,6 +1728,33 @@ mod tests {
}
}

#[test]
#[cfg(any(feature = "lz4", feature = "zstd"))]
fn test_general_compression_not_selected_for_v2_1_even_if_requested() {
let mut params = CompressionParams::new();
params.columns.insert(
"dict_values".to_string(),
CompressionFieldParams {
compression: Some(if cfg!(feature = "lz4") { "lz4" } else { "zstd" }.to_string()),
..Default::default()
},
);

let strategy =
DefaultCompressionStrategy::with_params(params).with_version(LanceFileVersion::V2_1);
let field = create_test_field("dict_values", DataType::FixedSizeBinary(3));
let data = create_fixed_width_block(24, 1024);

let (_compressor, encoding) = strategy
.create_block_compressor(&field, &data)
.expect("block compressor selection should succeed");

assert!(
!matches!(encoding.compression.as_ref(), Some(Compression::General(_))),
"general compression should not be selected for V2.1"
);
}

#[test]
fn test_rle_block_used_for_version_v2_2() {
let field = create_test_field("test_repdef", DataType::UInt16);
Expand Down
35 changes: 35 additions & 0 deletions rust/lance-encoding/src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,7 @@ pub async fn encode_batch(
mod tests {
use super::*;
use crate::compression_config::{CompressionFieldParams, CompressionParams};
use arrow_schema::{DataType as ArrowDataType, Field as ArrowField, Fields as ArrowFields};

#[test]
fn test_configured_encoding_strategy() {
Expand Down Expand Up @@ -829,4 +830,38 @@ mod tests {
.to_string()
.contains("only supported in Lance file version 2.1"));
}

#[test]
fn test_fixed_size_list_struct_requires_v2_2() {
let list_item = ArrowField::new(
"item",
ArrowDataType::Struct(ArrowFields::from(vec![ArrowField::new(
"x",
ArrowDataType::Int32,
true,
)])),
true,
);
let arrow_field = ArrowField::new(
"list_struct",
ArrowDataType::FixedSizeList(Arc::new(list_item), 2),
true,
);
let field = Field::try_from(&arrow_field).unwrap();

let strategy = StructuralEncodingStrategy::with_version(LanceFileVersion::V2_1);
let mut column_index = ColumnIndexSequence::default();
let options = EncodingOptions::default();

let result = strategy.create_field_encoder(&strategy, &field, &mut column_index, &options);
assert!(
result.is_err(),
"FixedSizeList<Struct> should be rejected for file version 2.1"
);
let err = result.err().unwrap();

assert!(err
.to_string()
.contains("FixedSizeList<Struct> is only supported in Lance file format 2.2+"));
}
}
36 changes: 36 additions & 0 deletions rust/lance/src/dataset/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1191,6 +1191,42 @@ mod tests {
assert_eq!(second.as_ref(), b"world");
}

#[tokio::test]
async fn test_blob_v2_requires_v2_2() {
let test_dir = TempStrDir::default();

let mut blob_builder = BlobArrayBuilder::new(1);
blob_builder.push_bytes(b"hello").unwrap();
let blob_array: arrow_array::ArrayRef = blob_builder.finish().unwrap();

let id_array: arrow_array::ArrayRef = Arc::new(UInt32Array::from(vec![0]));
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::UInt32, false),
blob_field("blob", true),
]));
let batch = RecordBatch::try_new(schema.clone(), vec![id_array, blob_array]).unwrap();
let reader = RecordBatchIterator::new(vec![batch].into_iter().map(Ok), schema);

let result = Dataset::write(
reader,
&test_dir,
Some(WriteParams {
data_storage_version: Some(LanceFileVersion::V2_1),
..Default::default()
}),
)
.await;

assert!(
result.is_err(),
"Blob v2 should be rejected for file version 2.1"
);
assert!(result
.unwrap_err()
.to_string()
.contains("Blob v2 requires file version >= 2.2"));
}

async fn preprocess_kind_with_schema_metadata(metadata_value: &str, data_len: usize) -> u8 {
let (object_store, base_path) = ObjectStore::from_uri_and_params(
Arc::new(ObjectStoreRegistry::default()),
Expand Down