lance-format · wjones127 · Feb 10, 2026 · Jan 31, 2026 · Jan 31, 2026 · Feb 6, 2026
diff --git a/python/python/tests/test_scalar_index.py b/python/python/tests/test_scalar_index.py
@@ -2054,6 +2054,102 @@ def test_label_list_index_array_contains(tmp_path: Path):
     assert "ScalarIndexQuery" not in explain
 
 
+def test_label_list_index_null_element_match(tmp_path: Path):
+    """Covers NULL elements inside non-NULL lists (list itself is never NULL)."""
+    tbl = pa.table(
+        {"labels": [["foo", None], ["foo"], ["bar", None], [None], ["bar"], []]}
+    )
+    dataset = lance.write_dataset(tbl, tmp_path / "dataset")
+
+    filters = [
+        "array_has_any(labels, ['foo'])",
+        "array_has_all(labels, ['foo'])",
+        "array_contains(labels, 'foo')",
+        "NOT array_has_any(labels, ['foo'])",
+        "NOT array_has_all(labels, ['foo'])",
+        "NOT array_contains(labels, 'foo')",
+    ]
+    expected = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+
+    dataset.create_scalar_index("labels", index_type="LABEL_LIST")
+
+    actual = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+    assert actual == expected
+
+
+def test_label_list_index_null_list_match(tmp_path: Path):
+    """Covers NULL lists (list itself is NULL, elements are not NULL)."""
+    tbl = pa.table({"labels": [["foo"], ["bar"], None, []]})
+    dataset = lance.write_dataset(tbl, tmp_path / "dataset")
+
+    filters = [
+        "array_has_any(labels, ['foo'])",
+        "array_has_all(labels, ['foo'])",
+        "array_contains(labels, 'foo')",
+        # TODO(issue #5904): Enable after fixing NOT filters with whole-list NULLs
+        # "NOT array_has_any(labels, ['foo'])",
+        # "NOT array_has_all(labels, ['foo'])",
+        # "NOT array_contains(labels, 'foo')",
+    ]
+    expected = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+
+    dataset.create_scalar_index("labels", index_type="LABEL_LIST")
+
+    actual = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+    assert actual == expected
+
+
+def test_label_list_index_null_literal_filters(tmp_path: Path):
+    """Ensure filters with NULL literal needles produce consistent results with scan."""
+    tbl = pa.table(
+        {"labels": [["foo", None], ["bar", None], [None], ["foo"], ["bar"], []]}
+    )
+    dataset = lance.write_dataset(tbl, tmp_path / "dataset")
+
+    filters = [
+        "array_has_any(labels, [NULL])",
+        "array_has_all(labels, [NULL])",
+        "array_contains(labels, NULL)",
+        "NOT array_has_any(labels, [NULL])",
+        "NOT array_has_all(labels, [NULL])",
+        "NOT array_contains(labels, NULL)",
+    ]
+    expected = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+
+    dataset.create_scalar_index("labels", index_type="LABEL_LIST")
+
+    actual = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+    assert actual == expected
+
+
+def test_label_list_index_explain_null_literals(tmp_path: Path):
+    tbl = pa.table({"labels": [["foo", None], ["foo"]]})
+    dataset = lance.write_dataset(tbl, tmp_path / "dataset")
+    dataset.create_scalar_index("labels", index_type="LABEL_LIST")
+
+    # explain_plan should not panic when list literals include NULLs.
+    for expr in [
+        "array_has_any(labels, [NULL])",
+        "array_has_all(labels, [NULL])",
+        "array_has_any(labels, ['foo', NULL])",
+        "array_has_all(labels, ['foo', NULL])",
+    ]:
+        explain = dataset.scanner(filter=expr).explain_plan()
+        assert isinstance(explain, str)
+
+
 def test_create_index_empty_dataset(tmp_path: Path):
     # Creating an index on an empty dataset is (currently) not terribly useful but
     # we shouldn't return strange errors.

diff --git a/rust/lance-index/src/scalar.rs b/rust/lance-index/src/scalar.rs
@@ -549,7 +549,7 @@ impl AnyQuery for LabelListQuery {
                 let offsets_buffer =
                     OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, labels_arr.len() as i32]));
                 let labels_list = ListArray::try_new(
-                    Arc::new(Field::new("item", labels_arr.data_type().clone(), false)),
+                    Arc::new(Field::new("item", labels_arr.data_type().clone(), true)),
                     offsets_buffer,
                     labels_arr,
                     None,
@@ -569,7 +569,7 @@ impl AnyQuery for LabelListQuery {
                 let offsets_buffer =
                     OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, labels_arr.len() as i32]));
                 let labels_list = ListArray::try_new(
-                    Arc::new(Field::new("item", labels_arr.data_type().clone(), false)),
+                    Arc::new(Field::new("item", labels_arr.data_type().clone(), true)),
                     offsets_buffer,
                     labels_arr,
                     None,

diff --git a/rust/lance-index/src/scalar/label_list.rs b/rust/lance-index/src/scalar/label_list.rs
@@ -13,7 +13,7 @@ use datafusion_common::ScalarValue;
 use deepsize::DeepSizeOf;
 use futures::{stream::BoxStream, StreamExt, TryStream, TryStreamExt};
 use lance_core::cache::LanceCache;
-use lance_core::utils::mask::NullableRowAddrSet;
+use lance_core::utils::mask::{NullableRowAddrSet, RowAddrTreeMap};
 use lance_core::{Error, Result};
 use roaring::RoaringBitmap;
 use snafu::location;
@@ -45,7 +45,12 @@ trait LabelListSubIndex: ScalarIndex + DeepSizeOf {
     ) -> Result<NullableRowAddrSet> {
         let result = self.search(query, metrics).await?;
         match result {
-            SearchResult::Exact(row_ids) => Ok(row_ids),
+            SearchResult::Exact(row_ids) => {
+                // Label list semantics treat NULL elements as non-matches, so only TRUE/FALSE
+                // results should remain for array_has_any/array_has_all when the list itself
+                // is non-NULL. Clear nulls to avoid propagating element-level NULLs.
+                Ok(row_ids.with_nulls(RowAddrTreeMap::new()))
+            }
             _ => Err(Error::Internal {
                 message: "Label list sub-index should return exact results".to_string(),
                 location: location!(),

diff --git a/rust/lance-index/src/scalar/lance_format.rs b/rust/lance-index/src/scalar/lance_format.rs
@@ -1551,7 +1551,7 @@ pub mod tests {
 
         // Test: Search for lists containing value 1
         // Row 0: [1, 2] - contains 1 → TRUE
-        // Row 1: [3, null] - has null item, unknown if it matches → NULL
+        // Row 1: [3, null] - null elements are ignored → FALSE
         // Row 2: [4] - doesn't contain 1 → FALSE
         let query = LabelListQuery::HasAnyLabel(vec![ScalarValue::UInt8(Some(1))]);
         let result = index.search(&query, &NoOpMetricsCollector).await.unwrap();
@@ -1570,17 +1570,9 @@ pub mod tests {
                     "Should find row 0 where list contains 1"
                 );
 
-                let null_row_ids = row_ids.null_rows();
                 assert!(
-                    !null_row_ids.is_empty(),
-                    "null_row_ids should not be empty - row 1 has null item"
-                );
-                let null_rows: Vec<u64> =
-                    null_row_ids.row_addrs().unwrap().map(u64::from).collect();
-                assert_eq!(
-                    null_rows,
-                    vec![1],
-                    "Should report row 1 as null because it contains a null item"
+                    row_ids.null_rows().is_empty(),
+                    "null_row_ids should be empty when null elements are ignored"
                 );
             }
             _ => panic!("Expected Exact search result"),