From 828ccff954eaeb47d985836e6eb1741239903532 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Sat, 7 Dec 2024 08:38:13 -0500 Subject: [PATCH 1/3] Make scalar and array handling for array_has consistent --- datafusion/functions-nested/src/array_has.rs | 11 ++++-- datafusion/sqllogictest/test_files/array.slt | 39 +++++++++++++++++--- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 499b07dafccf3..0ac1578b6d485 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -215,7 +215,11 @@ fn array_has_dispatch_for_array( let needle_row = Scalar::new(needle.slice(i, 1)); let eq_array = compare_with_eq(&arr, &needle_row, is_nested)?; let is_contained = eq_array.true_count() > 0; - boolean_builder.append_value(is_contained) + if is_contained || eq_array.null_count() == 0 { + boolean_builder.append_value(is_contained); + } else { + boolean_builder.append_null(); + } } Ok(Arc::new(boolean_builder.finish())) @@ -249,8 +253,9 @@ fn array_has_dispatch_for_scalar( } let sliced_array = eq_array.slice(start, length); // For nested list, check number of nulls - if sliced_array.null_count() != length { - final_contained[i] = Some(sliced_array.true_count() > 0); + let is_contained = sliced_array.true_count() > 0; + if is_contained || sliced_array.null_count() == 0 { + final_contained[i] = Some(is_contained); } } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 7578692ddb0f8..714d1cfb0ec93 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -352,6 +352,16 @@ AS VALUES (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')) ; +statement ok +CREATE TABLE array_has_table_null +AS VALUES + (make_array(1, 2), 1), + (make_array(1, NULL), 1), + (make_array(3, 4, 5), 2), + (make_array(3, NULL, 5), 2), + (make_array(NULL, NULL, NULL), 2) +; + statement ok CREATE TABLE array_distinct_table_1D AS VALUES @@ -5260,6 +5270,13 @@ select array_has([], null), ---- NULL NULL NULL +# If lhs is has any Nulls, we return Null instead of false +query BB +select array_has([1, null, 2], 3), + array_has([null, null, null], 3); +---- +NULL NULL + #TODO: array_has_all and array_has_any cannot handle NULL #query BBBB #select array_has_any([], null), @@ -5338,6 +5355,16 @@ from array_has_table_1D; true true true false false false +query B +select array_has(column1, column2) +from array_has_table_null; +---- +true +true +false +NULL +NULL + query B select array_has(column1, column2) from fixed_size_array_has_table_1D; @@ -5541,9 +5568,9 @@ select array_has(column1, make_array(5, 6)), from arrays; ---- false false false true -true false true false +true false true NULL true false false true -false true false false +false true NULL false NULL NULL false false false false NULL false false false false NULL @@ -5556,9 +5583,9 @@ select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(5, 6) from arrays; ---- false false false true -true false true false +true false true NULL true false false true -false true false false +false true NULL false NULL NULL false false false false NULL false false false false NULL @@ -5571,9 +5598,9 @@ select array_has(column1, make_array(5, 6)), from fixed_size_arrays; ---- false false false true -true false true false +true false true NULL true false false true -false true false false +false true NULL false NULL NULL false false false false NULL false false false false NULL From 40781a3d4ecb30764c2682a141d23e1b0e879bd0 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Mon, 16 Dec 2024 12:27:13 -0500 Subject: [PATCH 2/3] Ignore null elements for scalars and arrays --- datafusion/functions-nested/src/array_has.rs | 13 ++--------- datafusion/sqllogictest/test_files/array.slt | 24 ++++++++++---------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 0ac1578b6d485..719b257aca964 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -214,12 +214,7 @@ fn array_has_dispatch_for_array( let is_nested = arr.data_type().is_nested(); let needle_row = Scalar::new(needle.slice(i, 1)); let eq_array = compare_with_eq(&arr, &needle_row, is_nested)?; - let is_contained = eq_array.true_count() > 0; - if is_contained || eq_array.null_count() == 0 { - boolean_builder.append_value(is_contained); - } else { - boolean_builder.append_null(); - } + boolean_builder.append_value(eq_array.true_count() > 0); } Ok(Arc::new(boolean_builder.finish())) @@ -252,11 +247,7 @@ fn array_has_dispatch_for_scalar( continue; } let sliced_array = eq_array.slice(start, length); - // For nested list, check number of nulls - let is_contained = sliced_array.true_count() > 0; - if is_contained || sliced_array.null_count() == 0 { - final_contained[i] = Some(is_contained); - } + final_contained[i] = Some(sliced_array.true_count() > 0); } Ok(Arc::new(BooleanArray::from(final_contained))) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 714d1cfb0ec93..7df178bbb6445 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5275,7 +5275,7 @@ query BB select array_has([1, null, 2], 3), array_has([null, null, null], 3); ---- -NULL NULL +false false #TODO: array_has_all and array_has_any cannot handle NULL #query BBBB @@ -5362,8 +5362,8 @@ from array_has_table_null; true true false -NULL -NULL +false +false query B select array_has(column1, column2) @@ -5568,9 +5568,9 @@ select array_has(column1, make_array(5, 6)), from arrays; ---- false false false true -true false true NULL +true false true false true false false true -false true NULL false +false true false false NULL NULL false false false false NULL false false false false NULL @@ -5583,9 +5583,9 @@ select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(5, 6) from arrays; ---- false false false true -true false true NULL +true false true false true false false true -false true NULL false +false true false false NULL NULL false false false false NULL false false false false NULL @@ -5598,12 +5598,12 @@ select array_has(column1, make_array(5, 6)), from fixed_size_arrays; ---- false false false true -true false true NULL +true false true false true false false true -false true NULL false -NULL NULL false false -false false NULL false -false false false NULL +false true false false +false false false false +false false false false +false false false false query BBBBBBBBBBBBB select array_has_all(make_array(1,2,3), make_array(1,3)), From f5d5244dafcef12440e96c5f8df8354b76df16c4 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Mon, 16 Dec 2024 12:29:04 -0500 Subject: [PATCH 3/3] Upate comment --- datafusion/sqllogictest/test_files/array.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 7df178bbb6445..443270e643256 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5270,7 +5270,7 @@ select array_has([], null), ---- NULL NULL NULL -# If lhs is has any Nulls, we return Null instead of false +# Always return false if not contained even if list has null elements query BB select array_has([1, null, 2], 3), array_has([null, null, null], 3);