Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions python/python/tests/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,3 +757,23 @@ def test_session_contains(tmp_path):

assert session.contains("subdir/nested.lance")
assert not session.contains("subdir/nonexistent.lance")


def test_struct_null_regression():
import lance

# Create struct array where 2nd element is null
tag_array = pa.array(["valid", "null_struct", "valid", "valid"])
struct_array = pa.StructArray.from_arrays(
[tag_array],
fields=[pa.field("tag", pa.string(), nullable=True)],
mask=pa.array([True, False, True, True]), # False = null struct element
)

# Create list containing these structs
offsets = pa.array([0, 4], type=pa.int32())
list_array = pa.ListArray.from_arrays(offsets, struct_array)
batch = pa.record_batch([pa.array([0]), list_array], names=["id", "value"])

ds = lance.write_dataset(batch, "memory://", data_storage_version="2.2")
ds.to_table()
34 changes: 34 additions & 0 deletions rust/lance-encoding/src/encodings/logical/struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,40 @@ mod tests {
check_basic_random(field).await;
}

#[test_log::test(tokio::test)]
async fn test_list_of_struct_with_null_struct_element() {
// Regression: a list containing structs where most struct elements are null
// causes a length mismatch during decoding with V2_2 encoding.
use arrow_array::StringArray;

let tag_array = StringArray::from(vec![
Some("valid"),
Some("null_struct"),
Some("valid"),
Some("valid"),
]);
let struct_fields = Fields::from(vec![Field::new("tag", DataType::Utf8, true)]);
// 3 out of 4 struct elements are null
let struct_validity = NullBuffer::from(vec![false, true, false, false]);
let struct_array = StructArray::new(
struct_fields.clone(),
vec![Arc::new(tag_array)],
Some(struct_validity),
);

let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 4]));
let list_field = Field::new("item", DataType::Struct(struct_fields), true);
let list_array =
ListArray::new(Arc::new(list_field), offsets, Arc::new(struct_array), None);

check_round_trip_encoding_of_data(
vec![Arc::new(list_array)],
&TestCases::default().with_min_file_version(LanceFileVersion::V2_2),
HashMap::new(),
)
.await;
}

#[test_log::test(tokio::test)]
async fn test_ragged_scheduling() {
// This test covers scheduling when batches straddle page boundaries
Expand Down
36 changes: 35 additions & 1 deletion rust/lance-encoding/src/repdef.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1278,7 +1278,7 @@ impl RepDefUnraveler {
// This is the highest def level that is still visible. Once we hit a list then
// we stop looking because any null / empty list (or list masked by a higher level
// null) will not be visible
let mut max_level = null_level.max(empty_level);
let mut max_level = null_level.max(empty_level).max(valid_level);
// Anything higher than this (but less than max_level) is a null struct masking our
// list. We will materialize this is a null list.
let upper_null = max_level;
Expand Down Expand Up @@ -2692,6 +2692,40 @@ mod tests {
assert_eq!(val, Some(validity(&[true, false, true, true])));
}

#[test]
fn test_repdef_null_struct_valid_list() {
// This regresses a bug

let rep = vec![1, 0, 0, 0];
let def = vec![2, 0, 2, 2];
// AllValidList<NullableStruct<NullableItem>>
let def_meaning = vec![
DefinitionInterpretation::NullableItem,
DefinitionInterpretation::NullableItem,
DefinitionInterpretation::AllValidList,
];
let num_items = 4;

let mut unraveler = CompositeRepDefUnraveler::new(vec![RepDefUnraveler::new(
Some(rep),
Some(def),
def_meaning.into(),
num_items,
)]);

assert_eq!(
unraveler.unravel_validity(4),
Some(validity(&[false, true, false, false]))
);
assert_eq!(
unraveler.unravel_validity(4),
Some(validity(&[false, true, false, false]))
);
let (off, val) = unraveler.unravel_offsets::<i32>().unwrap();
assert_eq!(off.inner(), offsets_32(&[0, 4]).inner());
assert_eq!(val, None);
}

#[test]
fn test_repdef_no_rep() {
let mut builder = RepDefBuilder::default();
Expand Down