diff --git a/rust/lance-core/src/utils/mask/nullable.rs b/rust/lance-core/src/utils/mask/nullable.rs index 1f4b5b7dad3..3575731e017 100644 --- a/rust/lance-core/src/utils/mask/nullable.rs +++ b/rust/lance-core/src/utils/mask/nullable.rs @@ -237,14 +237,17 @@ impl std::ops::BitOr for NullableRowAddrMask { } (Self::AllowList(allow), Self::BlockList(block)) | (Self::BlockList(block), Self::AllowList(allow)) => { + let allow_true = allow.selected.clone() - &allow.nulls; + let block_false = block.selected.clone() - &block.nulls; + let nulls = if allow.nulls.is_empty() && block.nulls.is_empty() { RowAddrTreeMap::new() // Fast path } else { - // null or null -> null (excluding rows that are true in either) - let allow_true = allow.selected.clone() - &allow.nulls; - ((allow.nulls | block.nulls) & block.selected.clone()) - allow_true + // NULL|FALSE=NULL, FALSE|NULL=NULL, NULL|NULL=NULL, TRUE|NULL=TRUE. + // So NULL rows are: (allow NULL & block FALSE) or (block NULL & allow not TRUE). + (allow.nulls & &block_false) | (block.nulls - &allow_true) }; - let selected = (block.selected - allow.selected) | &nulls; + let selected = (block_false - &allow_true) | &nulls; Self::BlockList(NullableRowAddrSet { selected, nulls }) } (Self::BlockList(a), Self::BlockList(b)) => { @@ -363,6 +366,30 @@ mod tests { assert_mask_selects(&result, &[], &[0, 1, 2, 3]); } + #[test] + fn test_or_allow_block_keeps_block_nulls() { + // Allow|Block OR must preserve NULLs from block even when block.selected is empty. + // allow: TRUE=[1], NULL=[0]; block: FALSE=[], NULL=[0] + let allow_mask = allow(&[1], &[0]); + let block_mask = block(&[], &[0]); + let result = allow_mask | block_mask; + + // Row 1 is TRUE; row 0 remains NULL (not selected) + assert_mask_selects(&result, &[1], &[0]); + } + + #[test] + fn test_or_allow_block_keeps_block_nulls_with_false_rows() { + // Ensure FALSE stays FALSE and NULL stays NULL when both appear on the block side. + // allow: TRUE=[2], NULL=[]; block: FALSE=[1], NULL=[0] + let allow_mask = allow(&[2], &[]); + let block_mask = block(&[1], &[0]); + let result = allow_mask | block_mask; + + // Row 2 is TRUE; row 1 is FALSE; row 0 remains NULL (not selected) + assert_mask_selects(&result, &[2], &[0, 1]); + } + #[test] fn test_row_selection_bit_or() { // [T, N, T, N, F, F, F] diff --git a/rust/lance/tests/query/primitives.rs b/rust/lance/tests/query/primitives.rs index b2b8b9db5c1..a6173bf83b7 100644 --- a/rust/lance/tests/query/primitives.rs +++ b/rust/lance/tests/query/primitives.rs @@ -79,6 +79,8 @@ async fn test_query_integer(#[case] data_type: DataType) { test_filter(&original, &ds, "NOT (value > 20)").await; test_filter(&original, &ds, "value is null").await; test_filter(&original, &ds, "value is not null").await; + test_filter(&original, &ds, "(value != 0) OR (value < 20)").await; + test_filter(&original, &ds, "NOT ((value != 0) OR (value < 20))").await; }) .await }