Description
Follow-up issue for #5867.
Reproduce
import lance
import pyarrow as pa
tbl = pa.table({"labels": [["foo"], ["bar"], None, []]})
dataset = lance.write_dataset(tbl, "/tmp/dataset", mode="overwrite")
filters = [
"NOT array_has_any(labels, ['foo'])",
"NOT array_has_all(labels, ['foo'])",
"NOT array_contains(labels, 'foo')",
]
expected = {
f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
}
dataset.create_scalar_index("labels", index_type="LABEL_LIST")
actual = {
f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
}
print(f"{'Filter':<35} {'Expected':<25} {'Actual':<25} {'Match':<6}")
print("="*95)
for f in filters:
exp_str = str(expected[f])
act_str = str(actual[f])
match = "✓" if expected[f] == actual[f] else "✗"
print(f"{f:<35} {exp_str:<25} {act_str:<25} {match:<6}")
Result
Filter Expected Actual Match
===============================================================================================
NOT array_has_any(labels, ['foo']) [['bar'], []] [['bar'], None, []] ✗
NOT array_has_all(labels, ['foo']) [['bar'], []] [['bar'], None, []] ✗
NOT array_contains(labels, 'foo') [['bar'], []] [['bar'], None, []] ✗
Description
Follow-up issue for #5867.
Reproduce
Result