From 625924cbc6de40622a1485f1d46118660ab84c04 Mon Sep 17 00:00:00 2001
From: fenfeng9 <fenfeng9@qq.com>
Date: Sun, 1 Feb 2026 02:08:43 +0800
Subject: [PATCH 1/5] Fix label list explain for NULL literals

---
 python/python/tests/test_scalar_index.py | 16 ++++++++++++++++
 rust/lance-index/src/scalar.rs           |  4 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/python/python/tests/test_scalar_index.py b/python/python/tests/test_scalar_index.py
index 75ec01d9a82..f43e59b1f46 100644
--- a/python/python/tests/test_scalar_index.py
+++ b/python/python/tests/test_scalar_index.py
@@ -2054,6 +2054,22 @@ def test_label_list_index_array_contains(tmp_path: Path):
     assert "ScalarIndexQuery" not in explain
 
 
+def test_label_list_index_explain_null_literals(tmp_path: Path):
+    tbl = pa.table({"labels": [["foo", None], ["foo"]]})
+    dataset = lance.write_dataset(tbl, tmp_path / "dataset")
+    dataset.create_scalar_index("labels", index_type="LABEL_LIST")
+
+    # explain_plan should not panic when list literals include NULLs.
+    for expr in [
+        "array_has_any(labels, [NULL])",
+        "array_has_all(labels, [NULL])",
+        "array_has_any(labels, ['foo', NULL])",
+        "array_has_all(labels, ['foo', NULL])",
+    ]:
+        explain = dataset.scanner(filter=expr).explain_plan()
+        assert isinstance(explain, str)
+
+
 def test_create_index_empty_dataset(tmp_path: Path):
     # Creating an index on an empty dataset is (currently) not terribly useful but
     # we shouldn't return strange errors.
diff --git a/rust/lance-index/src/scalar.rs b/rust/lance-index/src/scalar.rs
index 6d07b5b8218..98aebe96e8c 100644
--- a/rust/lance-index/src/scalar.rs
+++ b/rust/lance-index/src/scalar.rs
@@ -549,7 +549,7 @@ impl AnyQuery for LabelListQuery {
                 let offsets_buffer =
                     OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, labels_arr.len() as i32]));
                 let labels_list = ListArray::try_new(
-                    Arc::new(Field::new("item", labels_arr.data_type().clone(), false)),
+                    Arc::new(Field::new("item", labels_arr.data_type().clone(), true)),
                     offsets_buffer,
                     labels_arr,
                     None,
@@ -569,7 +569,7 @@ impl AnyQuery for LabelListQuery {
                 let offsets_buffer =
                     OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, labels_arr.len() as i32]));
                 let labels_list = ListArray::try_new(
-                    Arc::new(Field::new("item", labels_arr.data_type().clone(), false)),
+                    Arc::new(Field::new("item", labels_arr.data_type().clone(), true)),
                     offsets_buffer,
                     labels_arr,
                     None,

From b3b5b46f35a8375997f427db6eb6f5c8f84476a9 Mon Sep 17 00:00:00 2001
From: fenfeng9 <fenfeng9@qq.com>
Date: Sun, 1 Feb 2026 02:46:09 +0800
Subject: [PATCH 2/5] Fix label list NULL overlap in bitmap index

---
 python/python/tests/test_scalar_index.py | 22 ++++++++++++++++++++++
 rust/lance-index/src/scalar/bitmap.rs    |  7 ++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/python/python/tests/test_scalar_index.py b/python/python/tests/test_scalar_index.py
index f43e59b1f46..fa1922a4cfd 100644
--- a/python/python/tests/test_scalar_index.py
+++ b/python/python/tests/test_scalar_index.py
@@ -2054,6 +2054,28 @@ def test_label_list_index_array_contains(tmp_path: Path):
     assert "ScalarIndexQuery" not in explain
 
 
+def test_label_list_index_null_element_match(tmp_path: Path):
+    """Ensure LABEL_LIST index keeps scan semantics when lists contain NULLs."""
+    tbl = pa.table({"labels": [["foo", None], ["foo"], None]})
+    dataset = lance.write_dataset(tbl, tmp_path / "dataset")
+
+    filters = [
+        "array_has_any(labels, ['foo'])",
+        "array_has_all(labels, ['foo'])",
+        "array_contains(labels, 'foo')",
+    ]
+    expected = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+
+    dataset.create_scalar_index("labels", index_type="LABEL_LIST")
+
+    actual = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+    assert actual == expected
+
+
 def test_label_list_index_explain_null_literals(tmp_path: Path):
     tbl = pa.table({"labels": [["foo", None], ["foo"]]})
     dataset = lance.write_dataset(tbl, tmp_path / "dataset")
diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs
index 4fb9fc3334c..66d749da0df 100644
--- a/rust/lance-index/src/scalar/bitmap.rs
+++ b/rust/lance-index/src/scalar/bitmap.rs
@@ -546,7 +546,12 @@ impl ScalarIndex for BitmapIndex {
             }
         };
 
-        let selection = NullableRowAddrSet::new(row_ids, null_row_ids.unwrap_or_default());
+        let mut null_rows = null_row_ids.unwrap_or_default();
+        if !null_rows.is_empty() {
+            // A row can be both TRUE and NULL after list flattening; treat it as TRUE.
+            null_rows -= &row_ids;
+        }
+        let selection = NullableRowAddrSet::new(row_ids, null_rows);
         Ok(SearchResult::Exact(selection))
     }
 

From d04587c7988b013cb25f9e95908e99df850fbacc Mon Sep 17 00:00:00 2001
From: fenfeng9 <36840213+fenfeng9@users.noreply.github.com>
Date: Fri, 6 Feb 2026 13:27:22 +0800
Subject: [PATCH 3/5] Update python/python/tests/test_scalar_index.py

Co-authored-by: Will Jones <willjones127@gmail.com>
---
 python/python/tests/test_scalar_index.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/python/tests/test_scalar_index.py b/python/python/tests/test_scalar_index.py
index fa1922a4cfd..741d7128203 100644
--- a/python/python/tests/test_scalar_index.py
+++ b/python/python/tests/test_scalar_index.py
@@ -2063,6 +2063,9 @@ def test_label_list_index_null_element_match(tmp_path: Path):
         "array_has_any(labels, ['foo'])",
         "array_has_all(labels, ['foo'])",
         "array_contains(labels, 'foo')",
+        "NOT array_has_any(labels, ['foo'])",
+        "NOT array_has_all(labels, ['foo'])",
+        "NOT array_contains(labels, 'foo')",
     ]
     expected = {
         f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters

From bc7897bdd9db1a66a5d2d0baf0d0b44553f0c154 Mon Sep 17 00:00:00 2001
From: fenfeng9 <fenfeng9@qq.com>
Date: Sat, 7 Feb 2026 15:10:05 +0800
Subject: [PATCH 4/5] test: update label index test case

---
 python/python/tests/test_scalar_index.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/python/tests/test_scalar_index.py b/python/python/tests/test_scalar_index.py
index 741d7128203..39e14f2ce3c 100644
--- a/python/python/tests/test_scalar_index.py
+++ b/python/python/tests/test_scalar_index.py
@@ -2056,16 +2056,19 @@ def test_label_list_index_array_contains(tmp_path: Path):
 
 def test_label_list_index_null_element_match(tmp_path: Path):
     """Ensure LABEL_LIST index keeps scan semantics when lists contain NULLs."""
-    tbl = pa.table({"labels": [["foo", None], ["foo"], None]})
+    tbl = pa.table(
+        {"labels": [["foo", None], ["foo"], ["bar", None], ["bar"], None, []]}
+    )
     dataset = lance.write_dataset(tbl, tmp_path / "dataset")
 
     filters = [
         "array_has_any(labels, ['foo'])",
         "array_has_all(labels, ['foo'])",
         "array_contains(labels, 'foo')",
-        "NOT array_has_any(labels, ['foo'])",
-        "NOT array_has_all(labels, ['foo'])",
-        "NOT array_contains(labels, 'foo')",
+        # TODO(issue #5904): Enable after fixing NOT filters with NULL lists/elements
+        # "NOT array_has_any(labels, ['foo'])",
+        # "NOT array_has_all(labels, ['foo'])",
+        # "NOT array_contains(labels, 'foo')",
     ]
     expected = {
         f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters

From 690fe230386db13665d7482c3ea60f609608c558 Mon Sep 17 00:00:00 2001
From: fenfeng9 <fenfeng9@qq.com>
Date: Sat, 7 Feb 2026 19:47:23 +0800
Subject: [PATCH 5/5] fix(lance-index): ignore null elements in label_list
 matching   - Clear element-level nulls in label_list searches   - Update
 null-handling tests for label_list

---
 python/python/tests/test_scalar_index.py    | 58 +++++++++++++++++++--
 rust/lance-index/src/scalar/bitmap.rs       |  7 +--
 rust/lance-index/src/scalar/label_list.rs   |  9 +++-
 rust/lance-index/src/scalar/lance_format.rs | 14 ++---
 4 files changed, 66 insertions(+), 22 deletions(-)

diff --git a/python/python/tests/test_scalar_index.py b/python/python/tests/test_scalar_index.py
index 39e14f2ce3c..bc4aa803399 100644
--- a/python/python/tests/test_scalar_index.py
+++ b/python/python/tests/test_scalar_index.py
@@ -2055,9 +2055,9 @@ def test_label_list_index_array_contains(tmp_path: Path):
 
 
 def test_label_list_index_null_element_match(tmp_path: Path):
-    """Ensure LABEL_LIST index keeps scan semantics when lists contain NULLs."""
+    """Covers NULL elements inside non-NULL lists (list itself is never NULL)."""
     tbl = pa.table(
-        {"labels": [["foo", None], ["foo"], ["bar", None], ["bar"], None, []]}
+        {"labels": [["foo", None], ["foo"], ["bar", None], [None], ["bar"], []]}
     )
     dataset = lance.write_dataset(tbl, tmp_path / "dataset")
 
@@ -2065,7 +2065,32 @@ def test_label_list_index_null_element_match(tmp_path: Path):
         "array_has_any(labels, ['foo'])",
         "array_has_all(labels, ['foo'])",
         "array_contains(labels, 'foo')",
-        # TODO(issue #5904): Enable after fixing NOT filters with NULL lists/elements
+        "NOT array_has_any(labels, ['foo'])",
+        "NOT array_has_all(labels, ['foo'])",
+        "NOT array_contains(labels, 'foo')",
+    ]
+    expected = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+
+    dataset.create_scalar_index("labels", index_type="LABEL_LIST")
+
+    actual = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+    assert actual == expected
+
+
+def test_label_list_index_null_list_match(tmp_path: Path):
+    """Covers NULL lists (list itself is NULL, elements are not NULL)."""
+    tbl = pa.table({"labels": [["foo"], ["bar"], None, []]})
+    dataset = lance.write_dataset(tbl, tmp_path / "dataset")
+
+    filters = [
+        "array_has_any(labels, ['foo'])",
+        "array_has_all(labels, ['foo'])",
+        "array_contains(labels, 'foo')",
+        # TODO(issue #5904): Enable after fixing NOT filters with whole-list NULLs
         # "NOT array_has_any(labels, ['foo'])",
         # "NOT array_has_all(labels, ['foo'])",
         # "NOT array_contains(labels, 'foo')",
@@ -2082,6 +2107,33 @@ def test_label_list_index_null_element_match(tmp_path: Path):
     assert actual == expected
 
 
+def test_label_list_index_null_literal_filters(tmp_path: Path):
+    """Ensure filters with NULL literal needles produce consistent results with scan."""
+    tbl = pa.table(
+        {"labels": [["foo", None], ["bar", None], [None], ["foo"], ["bar"], []]}
+    )
+    dataset = lance.write_dataset(tbl, tmp_path / "dataset")
+
+    filters = [
+        "array_has_any(labels, [NULL])",
+        "array_has_all(labels, [NULL])",
+        "array_contains(labels, NULL)",
+        "NOT array_has_any(labels, [NULL])",
+        "NOT array_has_all(labels, [NULL])",
+        "NOT array_contains(labels, NULL)",
+    ]
+    expected = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+
+    dataset.create_scalar_index("labels", index_type="LABEL_LIST")
+
+    actual = {
+        f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
+    }
+    assert actual == expected
+
+
 def test_label_list_index_explain_null_literals(tmp_path: Path):
     tbl = pa.table({"labels": [["foo", None], ["foo"]]})
     dataset = lance.write_dataset(tbl, tmp_path / "dataset")
diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs
index 66d749da0df..4fb9fc3334c 100644
--- a/rust/lance-index/src/scalar/bitmap.rs
+++ b/rust/lance-index/src/scalar/bitmap.rs
@@ -546,12 +546,7 @@ impl ScalarIndex for BitmapIndex {
             }
         };
 
-        let mut null_rows = null_row_ids.unwrap_or_default();
-        if !null_rows.is_empty() {
-            // A row can be both TRUE and NULL after list flattening; treat it as TRUE.
-            null_rows -= &row_ids;
-        }
-        let selection = NullableRowAddrSet::new(row_ids, null_rows);
+        let selection = NullableRowAddrSet::new(row_ids, null_row_ids.unwrap_or_default());
         Ok(SearchResult::Exact(selection))
     }
 
diff --git a/rust/lance-index/src/scalar/label_list.rs b/rust/lance-index/src/scalar/label_list.rs
index 0cfd00d4866..e971c45fa97 100644
--- a/rust/lance-index/src/scalar/label_list.rs
+++ b/rust/lance-index/src/scalar/label_list.rs
@@ -13,7 +13,7 @@ use datafusion_common::ScalarValue;
 use deepsize::DeepSizeOf;
 use futures::{stream::BoxStream, StreamExt, TryStream, TryStreamExt};
 use lance_core::cache::LanceCache;
-use lance_core::utils::mask::NullableRowAddrSet;
+use lance_core::utils::mask::{NullableRowAddrSet, RowAddrTreeMap};
 use lance_core::{Error, Result};
 use roaring::RoaringBitmap;
 use snafu::location;
@@ -45,7 +45,12 @@ trait LabelListSubIndex: ScalarIndex + DeepSizeOf {
     ) -> Result<NullableRowAddrSet> {
         let result = self.search(query, metrics).await?;
         match result {
-            SearchResult::Exact(row_ids) => Ok(row_ids),
+            SearchResult::Exact(row_ids) => {
+                // Label list semantics treat NULL elements as non-matches, so only TRUE/FALSE
+                // results should remain for array_has_any/array_has_all when the list itself
+                // is non-NULL. Clear nulls to avoid propagating element-level NULLs.
+                Ok(row_ids.with_nulls(RowAddrTreeMap::new()))
+            }
             _ => Err(Error::Internal {
                 message: "Label list sub-index should return exact results".to_string(),
                 location: location!(),
diff --git a/rust/lance-index/src/scalar/lance_format.rs b/rust/lance-index/src/scalar/lance_format.rs
index 817fb803c64..cdb3f73db84 100644
--- a/rust/lance-index/src/scalar/lance_format.rs
+++ b/rust/lance-index/src/scalar/lance_format.rs
@@ -1551,7 +1551,7 @@ pub mod tests {
 
         // Test: Search for lists containing value 1
         // Row 0: [1, 2] - contains 1 → TRUE
-        // Row 1: [3, null] - has null item, unknown if it matches → NULL
+        // Row 1: [3, null] - null elements are ignored → FALSE
         // Row 2: [4] - doesn't contain 1 → FALSE
         let query = LabelListQuery::HasAnyLabel(vec![ScalarValue::UInt8(Some(1))]);
         let result = index.search(&query, &NoOpMetricsCollector).await.unwrap();
@@ -1570,17 +1570,9 @@ pub mod tests {
                     "Should find row 0 where list contains 1"
                 );
 
-                let null_row_ids = row_ids.null_rows();
                 assert!(
-                    !null_row_ids.is_empty(),
-                    "null_row_ids should not be empty - row 1 has null item"
-                );
-                let null_rows: Vec<u64> =
-                    null_row_ids.row_addrs().unwrap().map(u64::from).collect();
-                assert_eq!(
-                    null_rows,
-                    vec![1],
-                    "Should report row 1 as null because it contains a null item"
+                    row_ids.null_rows().is_empty(),
+                    "null_row_ids should be empty when null elements are ignored"
                 );
             }
             _ => panic!("Expected Exact search result"),