From 1550c84479625eab9b801b62b30b8ba9c88adf60 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Thu, 20 Mar 2025 08:32:28 +0800 Subject: [PATCH 1/3] include some BinaryOperator from sqlparser Signed-off-by: Ruihang Xia --- datafusion/expr-common/src/operator.rs | 72 ++++++++++++++++++- .../expr-common/src/type_coercion/binary.rs | 10 +-- .../physical-expr/src/expressions/binary.rs | 6 +- datafusion/sql/src/expr/binary_op.rs | 16 ++++- datafusion/sql/src/unparser/expr.rs | 24 +++++++ .../substrait/src/logical_plan/producer.rs | 11 +++ 6 files changed, 128 insertions(+), 11 deletions(-) diff --git a/datafusion/expr-common/src/operator.rs b/datafusion/expr-common/src/operator.rs index 6ca0f04897aca..107d9c472d8ba 100644 --- a/datafusion/expr-common/src/operator.rs +++ b/datafusion/expr-common/src/operator.rs @@ -86,6 +86,28 @@ pub enum Operator { AtArrow, /// Arrow at, like `<@` ArrowAt, + /// Arrow, like `->` + Arrow, + /// Long arrow, like `->>` + LongArrow, + /// Hash arrow, like `#>` + HashArrow, + /// Hash long arrow, like `#>>` + HashLongArrow, + /// At at, like `@@` + AtAt, + /// Integer division operator, like `DIV` from MySQL or `//` from DuckDB + IntegerDivide, + /// Hash Minis, like `#-` + HashMinus, + /// At question, like `@?` + AtQuestion, + /// Question, like `?` + Question, + /// Question and, like `?&` + QuestionAnd, + /// Question pipe, like `?|` + QuestionPipe, } impl Operator { @@ -123,7 +145,18 @@ impl Operator { | Operator::BitwiseShiftLeft | Operator::StringConcat | Operator::AtArrow - | Operator::ArrowAt => None, + | Operator::ArrowAt + | Operator::Arrow + | Operator::LongArrow + | Operator::HashArrow + | Operator::HashLongArrow + | Operator::AtAt + | Operator::IntegerDivide + | Operator::HashMinus + | Operator::AtQuestion + | Operator::Question + | Operator::QuestionAnd + | Operator::QuestionPipe => None, } } @@ -216,7 +249,18 @@ impl Operator { | Operator::BitwiseXor | Operator::BitwiseShiftRight | Operator::BitwiseShiftLeft - | Operator::StringConcat => None, + | Operator::StringConcat + | Operator::Arrow + | Operator::LongArrow + | Operator::HashArrow + | Operator::HashLongArrow + | Operator::AtAt + | Operator::IntegerDivide + | Operator::HashMinus + | Operator::AtQuestion + | Operator::Question + | Operator::QuestionAnd + | Operator::QuestionPipe => None, } } @@ -245,7 +289,18 @@ impl Operator { | Operator::BitwiseXor | Operator::StringConcat | Operator::AtArrow - | Operator::ArrowAt => 30, + | Operator::ArrowAt + | Operator::Arrow + | Operator::LongArrow + | Operator::HashArrow + | Operator::HashLongArrow + | Operator::AtAt + | Operator::IntegerDivide + | Operator::HashMinus + | Operator::AtQuestion + | Operator::Question + | Operator::QuestionAnd + | Operator::QuestionPipe => 30, Operator::Plus | Operator::Minus => 40, Operator::Multiply | Operator::Divide | Operator::Modulo => 45, } @@ -286,6 +341,17 @@ impl fmt::Display for Operator { Operator::StringConcat => "||", Operator::AtArrow => "@>", Operator::ArrowAt => "<@", + Operator::Arrow => "->", + Operator::LongArrow => "->>", + Operator::HashArrow => "#>", + Operator::HashLongArrow => "#>>", + Operator::AtAt => "@@", + Operator::IntegerDivide => "DIV", + Operator::HashMinus => "#-", + Operator::AtQuestion => "@?", + Operator::Question => "?", + Operator::QuestionAnd => "?&", + Operator::QuestionPipe => "?|", }; write!(f, "{display}") } diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index fb559e163bb1a..7993a3f97ae9d 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -181,17 +181,17 @@ impl<'a> BinaryTypeCoercer<'a> { ) }) } - AtArrow | ArrowAt => { - // ArrowAt and AtArrow check for whether one array is contained in another. + AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt | HashMinus | + AtQuestion | Question | QuestionAnd | QuestionPipe |IntegerDivide=> { + // These operators check for whether one array is contained in another or other JSON operations. // The result type is boolean. Signature::comparison defines this signature. - // Operation has nothing to do with comparison array_coercion(self.lhs, self.rhs).map(Signature::comparison).ok_or_else(|| { plan_datafusion_err!( - "Cannot infer common array type for arrow operation {} {} {}", self.lhs, self.op, self.rhs + "Cannot infer common array type for operation {} {} {}", self.lhs, self.op, self.rhs ) }) } - Plus | Minus | Multiply | Divide | Modulo => { + Plus | Minus | Multiply | Divide | Modulo => { let get_result = |lhs, rhs| { use arrow::compute::kernels::numeric::*; let l = new_empty_array(lhs); diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index a00d135ef3c1b..c96d37075666b 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -793,8 +793,10 @@ impl BinaryExpr { BitwiseShiftRight => bitwise_shift_right_dyn(left, right), BitwiseShiftLeft => bitwise_shift_left_dyn(left, right), StringConcat => concat_elements(left, right), - AtArrow | ArrowAt => { - unreachable!("ArrowAt and AtArrow should be rewritten to function") + AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt + | HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe + | IntegerDivide => { + unreachable!("These operators should be rewritten to functions") } } } diff --git a/datafusion/sql/src/expr/binary_op.rs b/datafusion/sql/src/expr/binary_op.rs index 729304066d208..1c06f5ee926f9 100644 --- a/datafusion/sql/src/expr/binary_op.rs +++ b/datafusion/sql/src/expr/binary_op.rs @@ -46,6 +46,7 @@ impl SqlToRel<'_, S> { BinaryOperator::PGNotILikeMatch => Ok(Operator::NotILikeMatch), BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd), BinaryOperator::BitwiseOr => Ok(Operator::BitwiseOr), + BinaryOperator::Xor => Ok(Operator::BitwiseXor), BinaryOperator::BitwiseXor => Ok(Operator::BitwiseXor), BinaryOperator::PGBitwiseXor => Ok(Operator::BitwiseXor), BinaryOperator::PGBitwiseShiftRight => Ok(Operator::BitwiseShiftRight), @@ -53,8 +54,21 @@ impl SqlToRel<'_, S> { BinaryOperator::StringConcat => Ok(Operator::StringConcat), BinaryOperator::ArrowAt => Ok(Operator::ArrowAt), BinaryOperator::AtArrow => Ok(Operator::AtArrow), + BinaryOperator::Arrow => Ok(Operator::Arrow), + BinaryOperator::LongArrow => Ok(Operator::LongArrow), + BinaryOperator::HashArrow => Ok(Operator::HashArrow), + BinaryOperator::HashLongArrow => Ok(Operator::HashLongArrow), + BinaryOperator::AtAt => Ok(Operator::AtAt), BinaryOperator::Spaceship => Ok(Operator::IsNotDistinctFrom), - _ => not_impl_err!("Unsupported SQL binary operator {op:?}"), + BinaryOperator::DuckIntegerDivide | BinaryOperator::MyIntegerDivide => { + Ok(Operator::IntegerDivide) + } + BinaryOperator::HashMinus => Ok(Operator::HashMinus), + BinaryOperator::AtQuestion => Ok(Operator::AtQuestion), + BinaryOperator::Question => Ok(Operator::Question), + BinaryOperator::QuestionAnd => Ok(Operator::QuestionAnd), + BinaryOperator::QuestionPipe => Ok(Operator::QuestionPipe), + _ => not_impl_err!("Unsupported binary operator: {:?}", op), } } } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 5e74849cd9989..1472a7b76005f 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -914,6 +914,19 @@ impl Unparser<'_> { BinaryOperator::StringConcat => Ok(Operator::StringConcat), BinaryOperator::AtArrow => Ok(Operator::AtArrow), BinaryOperator::ArrowAt => Ok(Operator::ArrowAt), + BinaryOperator::Arrow => Ok(Operator::Arrow), + BinaryOperator::LongArrow => Ok(Operator::LongArrow), + BinaryOperator::HashArrow => Ok(Operator::HashArrow), + BinaryOperator::HashLongArrow => Ok(Operator::HashLongArrow), + BinaryOperator::AtAt => Ok(Operator::AtAt), + BinaryOperator::DuckIntegerDivide | BinaryOperator::MyIntegerDivide => { + Ok(Operator::IntegerDivide) + } + BinaryOperator::HashMinus => Ok(Operator::HashMinus), + BinaryOperator::AtQuestion => Ok(Operator::AtQuestion), + BinaryOperator::Question => Ok(Operator::Question), + BinaryOperator::QuestionAnd => Ok(Operator::QuestionAnd), + BinaryOperator::QuestionPipe => Ok(Operator::QuestionPipe), _ => not_impl_err!("unsupported operation: {op:?}"), } } @@ -951,6 +964,17 @@ impl Unparser<'_> { Operator::StringConcat => Ok(BinaryOperator::StringConcat), Operator::AtArrow => Ok(BinaryOperator::AtArrow), Operator::ArrowAt => Ok(BinaryOperator::ArrowAt), + Operator::Arrow => Ok(BinaryOperator::Arrow), + Operator::LongArrow => Ok(BinaryOperator::LongArrow), + Operator::HashArrow => Ok(BinaryOperator::HashArrow), + Operator::HashLongArrow => Ok(BinaryOperator::HashLongArrow), + Operator::AtAt => Ok(BinaryOperator::AtAt), + Operator::IntegerDivide => Ok(BinaryOperator::DuckIntegerDivide), + Operator::HashMinus => Ok(BinaryOperator::HashMinus), + Operator::AtQuestion => Ok(BinaryOperator::AtQuestion), + Operator::Question => Ok(BinaryOperator::Question), + Operator::QuestionAnd => Ok(BinaryOperator::QuestionAnd), + Operator::QuestionPipe => Ok(BinaryOperator::QuestionPipe), } } diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index 44baf277786df..07bf0cb96aa33 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -1127,6 +1127,17 @@ pub fn operator_to_name(op: Operator) -> &'static str { Operator::StringConcat => "str_concat", Operator::AtArrow => "at_arrow", Operator::ArrowAt => "arrow_at", + Operator::Arrow => "arrow", + Operator::LongArrow => "long_arrow", + Operator::HashArrow => "hash_arrow", + Operator::HashLongArrow => "hash_long_arrow", + Operator::AtAt => "at_at", + Operator::IntegerDivide => "integer_divide", + Operator::HashMinus => "hash_minus", + Operator::AtQuestion => "at_question", + Operator::Question => "question", + Operator::QuestionAnd => "question_and", + Operator::QuestionPipe => "question_pipe", Operator::BitwiseXor => "bitwise_xor", Operator::BitwiseShiftRight => "bitwise_shift_right", Operator::BitwiseShiftLeft => "bitwise_shift_left", From 2fe7da8e10bc4a75d7bdabce1aa6a11c33a04051 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Fri, 21 Mar 2025 05:32:05 +0800 Subject: [PATCH 2/3] slt for new operators Signed-off-by: Ruihang Xia --- .../expr-common/src/type_coercion/binary.rs | 27 +++++++--- .../physical-expr/src/expressions/binary.rs | 7 ++- datafusion/sqllogictest/test_files/expr.slt | 50 +++++++++++++++++++ 3 files changed, 74 insertions(+), 10 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 7993a3f97ae9d..c8331dfe98c48 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -30,8 +30,8 @@ use arrow::datatypes::{ }; use datafusion_common::types::NativeType; use datafusion_common::{ - exec_err, internal_err, plan_datafusion_err, plan_err, Diagnostic, Result, Span, - Spans, + exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, Diagnostic, + Result, Span, Spans, }; use itertools::Itertools; @@ -181,13 +181,20 @@ impl<'a> BinaryTypeCoercer<'a> { ) }) } - AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt | HashMinus | - AtQuestion | Question | QuestionAnd | QuestionPipe |IntegerDivide=> { - // These operators check for whether one array is contained in another or other JSON operations. - // The result type is boolean. Signature::comparison defines this signature. - array_coercion(self.lhs, self.rhs).map(Signature::comparison).ok_or_else(|| { + AtArrow | ArrowAt => { + // Array contains or search (similar to LIKE) operation + array_coercion(self.lhs, self.rhs) + .or_else(|| like_coercion(self.lhs, self.rhs)).map(Signature::comparison).ok_or_else(|| { + plan_datafusion_err!( + "Cannot infer common argument type for operation {} {} {}", self.lhs, self.op, self.rhs + ) + }) + } + AtAt => { + // text search has similar signature to LIKE + like_coercion(self.lhs, self.rhs).map(Signature::comparison).ok_or_else(|| { plan_datafusion_err!( - "Cannot infer common array type for operation {} {} {}", self.lhs, self.op, self.rhs + "Cannot infer common argument type for AtAt operation {} {} {}", self.lhs, self.op, self.rhs ) }) } @@ -248,6 +255,10 @@ impl<'a> BinaryTypeCoercer<'a> { "Cannot coerce arithmetic expression {} {} {} to valid types", self.lhs, self.op, self.rhs ) } + }, + IntegerDivide | Arrow | LongArrow | HashArrow | HashLongArrow + | HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe => { + not_impl_err!("Operator {} is not yet supported", self.op) } }; result.map_err(|err| { diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index c96d37075666b..f2c68729c56a4 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -33,7 +33,7 @@ use arrow::compute::{cast, ilike, like, nilike, nlike}; use arrow::datatypes::*; use arrow::error::ArrowError; use datafusion_common::cast::as_boolean_array; -use datafusion_common::{internal_err, Result, ScalarValue}; +use datafusion_common::{internal_err, not_impl_err, Result, ScalarValue}; use datafusion_expr::binary::BinaryTypeCoercer; use datafusion_expr::interval_arithmetic::{apply_operator, Interval}; use datafusion_expr::sort_properties::ExprProperties; @@ -796,7 +796,10 @@ impl BinaryExpr { AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt | HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe | IntegerDivide => { - unreachable!("These operators should be rewritten to functions") + not_impl_err!( + "Binary operator '{:?}' is not supported in the physical expr", + self.op + ) } } } diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index 24f7c3ea15c6f..e4d0b72338569 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -1648,6 +1648,56 @@ true true true +#### Other binary operators + +# ArrowAt for strings +query error +select 'foo' <@ 'bar' + +# AtArrow for strings +query error +select 'foo' @> 'bar' + +# AtAt for strings +query error +select 'foo' @@ 'bar' + +# Arrow for arrays +query error +select make_array(1,2,3) -> 2 + +# LongArrow for arrays +query error +select make_array(1,2,3) ->> 2 + +# HashArrow for structs +query error +select struct(1,2,3) #> 0 + +# HashLongArrow for structs +query error +select struct(1,2,3) #>> 0 + +# HashMinus for structs +query error +select struct(1,2,3) #- 0 + +# AtQuestion for JSON/structs +query error +select struct(1,2,3) @? 'a.b.c' + +# Question for JSON/structs +query error +select struct(1,2,3) ? 'a.b.c' + +# QuestionPipe for JSON/structs +query error +select struct(1,2,3) ?| array['a','b','c'] + +# QuestionAnd for JSON/structs +query error +select struct(1,2,3) ?& array['a','b','c'] + #### binary_mathematical_operator_with_null_lt # 1. Integer and NULL From 5828cba5f9f1295cf40dfcbeef7ea9690bfbe9ca Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Fri, 21 Mar 2025 07:19:09 +0800 Subject: [PATCH 3/3] add document about availability Signed-off-by: Ruihang Xia --- datafusion/expr-common/src/operator.rs | 38 ++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/datafusion/expr-common/src/operator.rs b/datafusion/expr-common/src/operator.rs index 107d9c472d8ba..19fc6b80745e2 100644 --- a/datafusion/expr-common/src/operator.rs +++ b/datafusion/expr-common/src/operator.rs @@ -82,31 +82,63 @@ pub enum Operator { BitwiseShiftLeft, /// String concat StringConcat, - /// At arrow, like `@>` + /// At arrow, like `@>`. + /// + /// Currently only supported to be used with lists: + /// ```sql + /// select [1,3] <@ [1,2,3] + /// ``` AtArrow, - /// Arrow at, like `<@` + /// Arrow at, like `<@`. + /// + /// Currently only supported to be used with lists: + /// ```sql + /// select [1,2,3] @> [1,3] + /// ``` ArrowAt, - /// Arrow, like `->` + /// Arrow, like `->`. + /// + /// Not implemented in DataFusion yet. Arrow, /// Long arrow, like `->>` + /// + /// Not implemented in DataFusion yet. LongArrow, /// Hash arrow, like `#>` + /// + /// Not implemented in DataFusion yet. HashArrow, /// Hash long arrow, like `#>>` + /// + /// Not implemented in DataFusion yet. HashLongArrow, /// At at, like `@@` + /// + /// Not implemented in DataFusion yet. AtAt, /// Integer division operator, like `DIV` from MySQL or `//` from DuckDB + /// + /// Not implemented in DataFusion yet. IntegerDivide, /// Hash Minis, like `#-` + /// + /// Not implemented in DataFusion yet. HashMinus, /// At question, like `@?` + /// + /// Not implemented in DataFusion yet. AtQuestion, /// Question, like `?` + /// + /// Not implemented in DataFusion yet. Question, /// Question and, like `?&` + /// + /// Not implemented in DataFusion yet. QuestionAnd, /// Question pipe, like `?|` + /// + /// Not implemented in DataFusion yet. QuestionPipe, }