From 06de93338b60b7f9228d3bd9b8989a234e8917f0 Mon Sep 17 00:00:00 2001 From: buraksenn Date: Mon, 9 Mar 2026 22:47:16 +0300 Subject: [PATCH 1/5] initial implementation --- datafusion/sql/src/expr/mod.rs | 27 ++++++---- datafusion/sqllogictest/test_files/array.slt | 53 +++++++++++++++++++- 2 files changed, 67 insertions(+), 13 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 7902eed1e6922..edc6b6422253d 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -40,7 +40,7 @@ use datafusion_expr::{ }; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_functions_nested::expr_fn::array_has; +use datafusion_functions_nested::expr_fn::{array_has, array_max, array_min}; mod binary_op; mod function; @@ -612,16 +612,21 @@ impl SqlToRel<'_, S> { planner_context, ), _ => { - if compare_op != BinaryOperator::Eq { - plan_err!( - "Unsupported AnyOp: '{compare_op}', only '=' is supported" - ) - } else { - let left_expr = - self.sql_to_expr(*left, schema, planner_context)?; - let right_expr = - self.sql_to_expr(*right, schema, planner_context)?; - Ok(array_has(right_expr, left_expr)) + let left_expr = self.sql_to_expr(*left, schema, planner_context)?; + let right_expr = self.sql_to_expr(*right, schema, planner_context)?; + match compare_op { + BinaryOperator::Eq => Ok(array_has(right_expr, left_expr)), + BinaryOperator::Gt => Ok(array_min(right_expr).lt(left_expr)), + BinaryOperator::Lt => Ok(array_max(right_expr).gt(left_expr)), + BinaryOperator::GtEq => { + Ok(array_min(right_expr).lt_eq(left_expr)) + } + BinaryOperator::LtEq => { + Ok(array_max(right_expr).gt_eq(left_expr)) + } + _ => plan_err!( + "Unsupported AnyOp: '{compare_op}', only '=', '>', '<', '>=', '<=' are supported" + ), } } }, diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 112351c5efa73..abb06a5bad51a 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -7003,8 +7003,57 @@ select count(*) from arrays where 'X'=any(column3); ---- 0 -query error DataFusion error: Error during planning: Unsupported AnyOp: '>', only '=' is supported -select count(*) from arrays where 'X'>any(column3); +query error DataFusion error: Error during planning: Unsupported AnyOp: '<>', only '=', '>', '<', '>=', '<=' are supported +select count(*) from arrays where 'X'<>any(column3); + +# any operator with comparison operators +# Use inline arrays so the test data is visible and the needle (5) +# falls within the range of some arrays but not others. +statement ok +CREATE TABLE any_op_test AS VALUES + (1, make_array(1, 2, 3)), + (2, make_array(4, 5, 6)), + (3, make_array(7, 8, 9)), + (4, make_array(3, 5, 7)); + +# 5 > ANY(arr): true when array_min < 5 +# row1: min=1 < 5 ✓, row2: min=4 < 5 ✓, row3: min=7 < 5 ✗, row4: min=3 < 5 ✓ +query I? +select column1, column2 from any_op_test where 5 > any(column2) order by column1; +---- +1 [1, 2, 3] +2 [4, 5, 6] +4 [3, 5, 7] + +# 5 >= ANY(arr): true when array_min <= 5 +# row1: min=1 <= 5 ✓, row2: min=4 <= 5 ✓, row3: min=7 <= 5 ✗, row4: min=3 <= 5 ✓ +query I? +select column1, column2 from any_op_test where 5 >= any(column2) order by column1; +---- +1 [1, 2, 3] +2 [4, 5, 6] +4 [3, 5, 7] + +# 5 < ANY(arr): true when array_max > 5 +# row1: max=3 > 5 ✗, row2: max=6 > 5 ✓, row3: max=9 > 5 ✓, row4: max=7 > 5 ✓ +query I? +select column1, column2 from any_op_test where 5 < any(column2) order by column1; +---- +2 [4, 5, 6] +3 [7, 8, 9] +4 [3, 5, 7] + +# 5 <= ANY(arr): true when array_max >= 5 +# row1: max=3 >= 5 ✗, row2: max=6 >= 5 ✓, row3: max=9 >= 5 ✓, row4: max=7 >= 5 ✓ +query I? +select column1, column2 from any_op_test where 5 <= any(column2) order by column1; +---- +2 [4, 5, 6] +3 [7, 8, 9] +4 [3, 5, 7] + +statement ok +DROP TABLE any_op_test; ## array_distinct From fbcf5e8444561a4cd5a6d5150e4519aba6835aed Mon Sep 17 00:00:00 2001 From: buraksenn Date: Mon, 9 Mar 2026 23:01:56 +0300 Subject: [PATCH 2/5] add <> support --- datafusion/sql/src/expr/mod.rs | 5 +++- datafusion/sqllogictest/test_files/array.slt | 25 +++++++++++++++++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index edc6b6422253d..aeb983fd9084e 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -616,6 +616,9 @@ impl SqlToRel<'_, S> { let right_expr = self.sql_to_expr(*right, schema, planner_context)?; match compare_op { BinaryOperator::Eq => Ok(array_has(right_expr, left_expr)), + BinaryOperator::NotEq => Ok(array_min(right_expr.clone()) + .not_eq(left_expr.clone()) + .or(array_max(right_expr).not_eq(left_expr))), BinaryOperator::Gt => Ok(array_min(right_expr).lt(left_expr)), BinaryOperator::Lt => Ok(array_max(right_expr).gt(left_expr)), BinaryOperator::GtEq => { @@ -625,7 +628,7 @@ impl SqlToRel<'_, S> { Ok(array_max(right_expr).gt_eq(left_expr)) } _ => plan_err!( - "Unsupported AnyOp: '{compare_op}', only '=', '>', '<', '>=', '<=' are supported" + "Unsupported AnyOp: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported" ), } } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index abb06a5bad51a..f5a53afaf95f6 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -7003,9 +7003,6 @@ select count(*) from arrays where 'X'=any(column3); ---- 0 -query error DataFusion error: Error during planning: Unsupported AnyOp: '<>', only '=', '>', '<', '>=', '<=' are supported -select count(*) from arrays where 'X'<>any(column3); - # any operator with comparison operators # Use inline arrays so the test data is visible and the needle (5) # falls within the range of some arrays but not others. @@ -7052,6 +7049,28 @@ select column1, column2 from any_op_test where 5 <= any(column2) order by column 3 [7, 8, 9] 4 [3, 5, 7] +# 5 <> ANY(arr): true when array_min != 5 OR array_max != 5 +# row1: [1,2,3] min=1!=5 ✓, row2: [4,5,6] min=4!=5 ✓, row3: [7,8,9] min=7!=5 ✓, row4: [3,5,7] min=3!=5 ✓ +query I? +select column1, column2 from any_op_test where 5 <> any(column2) order by column1; +---- +1 [1, 2, 3] +2 [4, 5, 6] +3 [7, 8, 9] +4 [3, 5, 7] + +# For a single-element array where the element equals the needle, <> should return false +query B +select 5 <> any(make_array(5)); +---- +false + +# For a uniform array [5,5,5], <> should also return false +query B +select 5 <> any(make_array(5, 5, 5)); +---- +false + statement ok DROP TABLE any_op_test; From 223d69e2da2d872a551eb5029013de195f97a29b Mon Sep 17 00:00:00 2001 From: buraksenn Date: Tue, 10 Mar 2026 09:59:40 +0300 Subject: [PATCH 3/5] address reviews --- datafusion/sql/src/expr/mod.rs | 15 ++++-- datafusion/sqllogictest/test_files/array.slt | 57 ++++++++++++++++++++ 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index aeb983fd9084e..400316157f94d 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -618,14 +618,19 @@ impl SqlToRel<'_, S> { BinaryOperator::Eq => Ok(array_has(right_expr, left_expr)), BinaryOperator::NotEq => Ok(array_min(right_expr.clone()) .not_eq(left_expr.clone()) - .or(array_max(right_expr).not_eq(left_expr))), - BinaryOperator::Gt => Ok(array_min(right_expr).lt(left_expr)), - BinaryOperator::Lt => Ok(array_max(right_expr).gt(left_expr)), + .or(array_max(right_expr).not_eq(left_expr)) + .is_true()), + BinaryOperator::Gt => { + Ok(array_min(right_expr).lt(left_expr).is_true()) + } + BinaryOperator::Lt => { + Ok(array_max(right_expr).gt(left_expr).is_true()) + } BinaryOperator::GtEq => { - Ok(array_min(right_expr).lt_eq(left_expr)) + Ok(array_min(right_expr).lt_eq(left_expr).is_true()) } BinaryOperator::LtEq => { - Ok(array_max(right_expr).gt_eq(left_expr)) + Ok(array_max(right_expr).gt_eq(left_expr).is_true()) } _ => plan_err!( "Unsupported AnyOp: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported" diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index f5a53afaf95f6..86f21da28cf22 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -7071,6 +7071,63 @@ select 5 <> any(make_array(5, 5, 5)); ---- false +# Empty array: all operators should return false (no elements satisfy the condition) +query B +select 5 = any(make_array()); +---- +false + +query B +select 5 <> any(make_array()); +---- +false + +query B +select 5 > any(make_array()); +---- +false + +query B +select 5 < any(make_array()); +---- +false + +query B +select 5 >= any(make_array()); +---- +false + +query B +select 5 <= any(make_array()); +---- +false + +# All-NULL array: all operators should return false +query B +select 5 > any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 < any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 >= any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 <= any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 <> any(make_array(NULL::INT, NULL::INT)); +---- +false + statement ok DROP TABLE any_op_test; From 4a2705a2902a688d344487e0ed21b613f04012d9 Mon Sep 17 00:00:00 2001 From: buraksenn Date: Tue, 10 Mar 2026 10:19:28 +0300 Subject: [PATCH 4/5] mixed non-null and null tests --- datafusion/sqllogictest/test_files/array.slt | 53 ++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 86f21da28cf22..b448aaa271151 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -7102,6 +7102,59 @@ select 5 <= any(make_array()); ---- false +# Mixed NULL + non-NULL array where no non-NULL element satisfies the condition +# These return false (NULLs are skipped by array_min/array_max) +query B +select 5 > any(make_array(6, NULL)); +---- +false + +query B +select 5 < any(make_array(3, NULL)); +---- +false + +query B +select 5 >= any(make_array(6, NULL)); +---- +false + +query B +select 5 <= any(make_array(3, NULL)); +---- +false + +# Mixed NULL + non-NULL array where a non-NULL element satisfies the condition +query B +select 5 > any(make_array(3, NULL)); +---- +true + +query B +select 5 < any(make_array(6, NULL)); +---- +true + +query B +select 5 >= any(make_array(5, NULL)); +---- +true + +query B +select 5 <= any(make_array(5, NULL)); +---- +true + +query B +select 5 <> any(make_array(3, NULL)); +---- +true + +query B +select 5 <> any(make_array(5, NULL)); +---- +false + # All-NULL array: all operators should return false query B select 5 > any(make_array(NULL::INT, NULL::INT)); From 1ed97ebbc0b73e35c442e173d0c87981ebdcae81 Mon Sep 17 00:00:00 2001 From: buraksenn Date: Thu, 19 Mar 2026 00:52:07 +0300 Subject: [PATCH 5/5] address all comments --- datafusion/sql/src/expr/mod.rs | 79 ++++++++++++++------ datafusion/sqllogictest/test_files/array.slt | 58 ++++++++++++++ 2 files changed, 114 insertions(+), 23 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 400316157f94d..accc3ac0e5b79 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -36,7 +36,7 @@ use datafusion_expr::expr::SetQuantifier; use datafusion_expr::expr::{InList, WildcardOptions}; use datafusion_expr::{ Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal, - Operator, TryCast, lit, + Operator, TryCast, lit, when, }; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; @@ -614,28 +614,7 @@ impl SqlToRel<'_, S> { _ => { let left_expr = self.sql_to_expr(*left, schema, planner_context)?; let right_expr = self.sql_to_expr(*right, schema, planner_context)?; - match compare_op { - BinaryOperator::Eq => Ok(array_has(right_expr, left_expr)), - BinaryOperator::NotEq => Ok(array_min(right_expr.clone()) - .not_eq(left_expr.clone()) - .or(array_max(right_expr).not_eq(left_expr)) - .is_true()), - BinaryOperator::Gt => { - Ok(array_min(right_expr).lt(left_expr).is_true()) - } - BinaryOperator::Lt => { - Ok(array_max(right_expr).gt(left_expr).is_true()) - } - BinaryOperator::GtEq => { - Ok(array_min(right_expr).lt_eq(left_expr).is_true()) - } - BinaryOperator::LtEq => { - Ok(array_max(right_expr).gt_eq(left_expr).is_true()) - } - _ => plan_err!( - "Unsupported AnyOp: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported" - ), - } + plan_any_op(left_expr, right_expr, &compare_op) } }, SQLExpr::AllOp { @@ -1264,6 +1243,60 @@ impl SqlToRel<'_, S> { } } +/// Builds a CASE expression that handles NULL semantics for `x ANY(arr)`: +/// +/// ```text +/// CASE +/// WHEN (arr) IS NOT NULL THEN +/// WHEN arr IS NOT NULL THEN FALSE -- empty or all-null array +/// ELSE NULL -- NULL array +/// END +/// ``` +fn any_op_with_null_handling(bound: Expr, comparison: Expr, arr: Expr) -> Result { + when(bound.is_not_null(), comparison) + .when(arr.is_not_null(), lit(false)) + .otherwise(lit(ScalarValue::Boolean(None))) +} + +/// Plans a ` ANY()` expression for non-subquery operands. +fn plan_any_op( + left_expr: Expr, + right_expr: Expr, + compare_op: &BinaryOperator, +) -> Result { + match compare_op { + BinaryOperator::Eq => Ok(array_has(right_expr, left_expr)), + BinaryOperator::NotEq => { + let min = array_min(right_expr.clone()); + let max = array_max(right_expr.clone()); + // NOT EQ is true when either bound differs from left + let comparison = min + .not_eq(left_expr.clone()) + .or(max.clone().not_eq(left_expr)); + any_op_with_null_handling(max, comparison, right_expr) + } + BinaryOperator::Gt => { + let min = array_min(right_expr.clone()); + any_op_with_null_handling(min.clone(), min.lt(left_expr), right_expr) + } + BinaryOperator::Lt => { + let max = array_max(right_expr.clone()); + any_op_with_null_handling(max.clone(), max.gt(left_expr), right_expr) + } + BinaryOperator::GtEq => { + let min = array_min(right_expr.clone()); + any_op_with_null_handling(min.clone(), min.lt_eq(left_expr), right_expr) + } + BinaryOperator::LtEq => { + let max = array_max(right_expr.clone()); + any_op_with_null_handling(max.clone(), max.gt_eq(left_expr), right_expr) + } + _ => plan_err!( + "Unsupported AnyOp: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported" + ), + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index b448aaa271151..4394a967b7264 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -7181,6 +7181,64 @@ select 5 <> any(make_array(NULL::INT, NULL::INT)); ---- false +# NULL left operand: should return NULL for non-empty arrays +query B +select NULL > any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL < any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL >= any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL <= any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL <> any(make_array(1, 2, 3)); +---- +NULL + +# NULL left operand with empty array: should return false +query B +select NULL > any(make_array()); +---- +false + +# NULL array: should return NULL +query B +select 5 > any(NULL::INT[]); +---- +NULL + +query B +select 5 < any(NULL::INT[]); +---- +NULL + +query B +select 5 >= any(NULL::INT[]); +---- +NULL + +query B +select 5 <= any(NULL::INT[]); +---- +NULL + +query B +select 5 <> any(NULL::INT[]); +---- +NULL + statement ok DROP TABLE any_op_test;