Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions datafusion/core/src/physical_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2016,14 +2016,6 @@ mod tests {
col("c1").eq(bool_expr.clone()),
// u32 AND bool
col("c2").and(bool_expr),
// utf8 LIKE u32
col("c1").like(col("c2")),
// utf8 NOT LIKE u32
col("c1").not_like(col("c2")),
// utf8 ILIKE u32
col("c1").ilike(col("c2")),
// utf8 NOT ILIKE u32
col("c1").not_ilike(col("c2")),
];
for case in cases {
let logical_plan = test_csv_scan().await?.project(vec![case.clone()]);
Expand Down
11 changes: 4 additions & 7 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,6 @@ impl BinaryExpr {
match self.op {
Operator::Or => 5,
Operator::And => 10,
Operator::Like | Operator::NotLike | Operator::ILike | Operator::NotILike => {
19
}
Operator::NotEq
| Operator::Eq
| Operator::Lt
Expand Down Expand Up @@ -685,22 +682,22 @@ impl Expr {

/// Return `self LIKE other`
pub fn like(self, other: Expr) -> Expr {
binary_expr(self, Operator::Like, other)
Expr::Like(Like::new(false, Box::new(self), Box::new(other), None))
}

/// Return `self NOT LIKE other`
pub fn not_like(self, other: Expr) -> Expr {
binary_expr(self, Operator::NotLike, other)
Expr::Like(Like::new(true, Box::new(self), Box::new(other), None))
}

/// Return `self ILIKE other`
pub fn ilike(self, other: Expr) -> Expr {
binary_expr(self, Operator::ILike, other)
Expr::ILike(Like::new(false, Box::new(self), Box::new(other), None))
}

/// Return `self NOT ILIKE other`
pub fn not_ilike(self, other: Expr) -> Expr {
binary_expr(self, Operator::NotILike, other)
Expr::ILike(Like::new(true, Box::new(self), Box::new(other), None))
}

/// Return `self AS name` alias expression
Expand Down
22 changes: 1 addition & 21 deletions datafusion/expr/src/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,6 @@ pub enum Operator {
And,
/// Logical OR, like `||`
Or,
/// Matches a wildcard pattern
Like,
/// Does not match a wildcard pattern
NotLike,
/// Matches a wildcard pattern, ignores case
ILike,
/// Does not match a wildcard pattern, ignores case
NotILike,
/// IS DISTINCT FROM
IsDistinctFrom,
/// IS NOT DISTINCT FROM
Expand Down Expand Up @@ -96,10 +88,6 @@ impl Operator {
Operator::LtEq => Some(Operator::Gt),
Operator::Gt => Some(Operator::LtEq),
Operator::GtEq => Some(Operator::Lt),
Operator::Like => Some(Operator::NotLike),
Operator::NotLike => Some(Operator::Like),
Operator::ILike => Some(Operator::NotILike),
Operator::NotILike => Some(Operator::ILike),
Operator::IsDistinctFrom => Some(Operator::IsNotDistinctFrom),
Operator::IsNotDistinctFrom => Some(Operator::IsDistinctFrom),
Operator::Plus
Expand Down Expand Up @@ -133,11 +121,7 @@ impl Operator {
Operator::LtEq => Some(Operator::GtEq),
Operator::Gt => Some(Operator::Lt),
Operator::GtEq => Some(Operator::LtEq),
Operator::Like
| Operator::NotLike
| Operator::ILike
| Operator::NotILike
| Operator::IsDistinctFrom
Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom
| Operator::Plus
| Operator::Minus
Expand Down Expand Up @@ -176,10 +160,6 @@ impl fmt::Display for Operator {
Operator::Modulo => "%",
Operator::And => "AND",
Operator::Or => "OR",
Operator::Like => "LIKE",
Operator::NotLike => "NOT LIKE",
Operator::ILike => "ILIKE",
Operator::NotILike => "NOT ILIKE",
Operator::RegexMatch => "~",
Operator::RegexIMatch => "~*",
Operator::RegexNotMatch => "!~",
Expand Down
38 changes: 5 additions & 33 deletions datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ pub fn binary_operator_data_type(
| Operator::NotEq
| Operator::And
| Operator::Or
| Operator::Like
| Operator::NotLike
| Operator::ILike
| Operator::NotILike
| Operator::Lt
| Operator::Gt
| Operator::GtEq
Expand Down Expand Up @@ -117,10 +113,6 @@ pub fn coerce_types(
| Operator::Gt
| Operator::GtEq
| Operator::LtEq => comparison_coercion(lhs_type, rhs_type),
// "like" operators operate on strings and always return a boolean
Operator::Like | Operator::NotLike | Operator::ILike | Operator::NotILike => {
like_coercion(lhs_type, rhs_type)
}
Operator::Plus | Operator::Minus
if is_date(lhs_type) || is_timestamp(lhs_type) =>
{
Expand Down Expand Up @@ -525,7 +517,7 @@ fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>

/// coercion rules for like operations.
/// This is a union of string coercion rules and dictionary coercion rules
fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
string_coercion(lhs_type, rhs_type)
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
.or_else(|| null_coercion(lhs_type, rhs_type))
Expand Down Expand Up @@ -879,30 +871,10 @@ mod tests {

#[test]
fn test_type_coercion() -> Result<()> {
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::Like,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::NotLike,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::ILike,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::NotILike,
DataType::Utf8
);
// test like coercion rule
let result = like_coercion(&DataType::Utf8, &DataType::Utf8);
assert_eq!(result, Some(DataType::Utf8));

test_coercion_binary_rule!(
DataType::Utf8,
DataType::Date32,
Expand Down
16 changes: 15 additions & 1 deletion datafusion/optimizer/src/simplify_expressions/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use datafusion_common::{DataFusionError, Result, ScalarValue};
use datafusion_expr::{
expr::{Between, BinaryExpr},
expr_fn::{and, concat_ws, or},
lit, BuiltinScalarFunction, Expr, Operator,
lit, BuiltinScalarFunction, Expr, Like, Operator,
};

pub static POWS_OF_TEN: [i128; 38] = [
Expand Down Expand Up @@ -232,6 +232,20 @@ pub fn negate_clause(expr: Expr) -> Expr {
between.low,
between.high,
)),
// not (A like B) ===> A not like B
Expr::Like(like) => Expr::Like(Like::new(
!like.negated,
like.expr,
like.pattern,
like.escape_char,
)),
// not (A ilike B) ===> A not ilike B
Expr::ILike(like) => Expr::ILike(Like::new(
!like.negated,
like.expr,
like.pattern,
like.escape_char,
)),
// use not clause
_ => Expr::Not(Box::new(expr)),
}
Expand Down
22 changes: 16 additions & 6 deletions datafusion/optimizer/src/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ use datafusion_common::{
use datafusion_expr::expr::{self, Between, BinaryExpr, Case, Like, WindowFunction};
use datafusion_expr::expr_rewriter::{ExprRewriter, RewriteRecursion};
use datafusion_expr::logical_plan::Subquery;
use datafusion_expr::type_coercion::binary::{coerce_types, comparison_coercion};
use datafusion_expr::type_coercion::binary::{
coerce_types, comparison_coercion, like_coercion,
};
use datafusion_expr::type_coercion::functions::data_types;
use datafusion_expr::type_coercion::other::{
get_coerce_type_for_case_when, get_coerce_type_for_list,
Expand Down Expand Up @@ -175,8 +177,11 @@ impl ExprRewriter for TypeCoercionRewriter {
}) => {
let left_type = expr.get_type(&self.schema)?;
let right_type = pattern.get_type(&self.schema)?;
let coerced_type =
coerce_types(&left_type, &Operator::Like, &right_type)?;
let coerced_type = like_coercion(&left_type, &right_type).ok_or_else(|| {
DataFusionError::Plan(format!(
"There isn't a common type to coerce {left_type} and {right_type} in LIKE expression"
))
})?;
let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?);
let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?);
let expr = Expr::Like(Like::new(negated, expr, pattern, escape_char));
Expand All @@ -190,8 +195,11 @@ impl ExprRewriter for TypeCoercionRewriter {
}) => {
let left_type = expr.get_type(&self.schema)?;
let right_type = pattern.get_type(&self.schema)?;
let coerced_type =
coerce_types(&left_type, &Operator::ILike, &right_type)?;
let coerced_type = like_coercion(&left_type, &right_type).ok_or_else(|| {
DataFusionError::Plan(format!(
"There isn't a common type to coerce {left_type} and {right_type} in ILIKE expression"
))
})?;
let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?);
let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?);
let expr = Expr::ILike(Like::new(negated, expr, pattern, escape_char));
Expand Down Expand Up @@ -932,7 +940,9 @@ mod test {
let plan = LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty)?);
let err = assert_optimized_plan_eq(&plan, expected);
assert!(err.is_err());
assert!(err.unwrap_err().to_string().contains("'Int64 LIKE Utf8' can't be evaluated because there isn't a common type to coerce the types to"));
assert!(err.unwrap_err().to_string().contains(
"There isn't a common type to coerce Int64 and Utf8 in LIKE expression"
));
Ok(())
}

Expand Down
86 changes: 2 additions & 84 deletions datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ use arrow::compute::kernels::arithmetic::{
multiply_scalar, subtract, subtract_scalar,
};
use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
use arrow::compute::kernels::comparison::regexp_is_match_utf8;
use arrow::compute::kernels::comparison::regexp_is_match_utf8_scalar;
use arrow::compute::kernels::comparison::{
eq_dyn_binary_scalar, gt_dyn_binary_scalar, gt_eq_dyn_binary_scalar,
lt_dyn_binary_scalar, lt_eq_dyn_binary_scalar, neq_dyn_binary_scalar,
Expand All @@ -47,13 +49,6 @@ use arrow::compute::kernels::comparison::{
use arrow::compute::kernels::comparison::{
eq_scalar, gt_eq_scalar, gt_scalar, lt_eq_scalar, lt_scalar, neq_scalar,
};
use arrow::compute::kernels::comparison::{
ilike_utf8, like_utf8, nilike_utf8, nlike_utf8, regexp_is_match_utf8,
};
use arrow::compute::kernels::comparison::{
ilike_utf8_scalar, like_utf8_scalar, nilike_utf8_scalar, nlike_utf8_scalar,
regexp_is_match_utf8_scalar,
};

use adapter::{eq_dyn, gt_dyn, gt_eq_dyn, lt_dyn, lt_eq_dyn, neq_dyn};
use arrow::compute::kernels::concat_elements::concat_elements_utf8;
Expand Down Expand Up @@ -344,19 +339,6 @@ macro_rules! compute_op {
}};
}

macro_rules! binary_string_array_op_scalar {
($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr) => {{
let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, StringArray, $OP_TYPE),
other => Err(DataFusionError::Internal(format!(
"Data type {:?} not supported for scalar operation '{}' on string array",
other, stringify!($OP)
))),
};
Some(result)
}};
}

macro_rules! binary_string_array_op {
($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
match $LEFT.data_type() {
Expand Down Expand Up @@ -941,18 +923,6 @@ impl BinaryExpr {
Operator::NotEq => {
binary_array_op_dyn_scalar!(array, scalar.clone(), neq, bool_type)
}
Operator::Like => {
binary_string_array_op_scalar!(array, scalar.clone(), like, bool_type)
}
Operator::NotLike => {
binary_string_array_op_scalar!(array, scalar.clone(), nlike, bool_type)
}
Operator::ILike => {
binary_string_array_op_scalar!(array, scalar.clone(), ilike, bool_type)
}
Operator::NotILike => {
binary_string_array_op_scalar!(array, scalar.clone(), nilike, bool_type)
}
Operator::Plus => {
binary_primitive_array_op_scalar!(array, scalar.clone(), add)
}
Expand Down Expand Up @@ -1053,10 +1023,6 @@ impl BinaryExpr {
right_data_type: &DataType,
) -> Result<ArrayRef> {
match &self.op {
Operator::Like => binary_string_array_op!(left, right, like),
Operator::NotLike => binary_string_array_op!(left, right, nlike),
Operator::ILike => binary_string_array_op!(left, right, ilike),
Operator::NotILike => binary_string_array_op!(left, right, nilike),
Operator::Lt => lt_dyn(&left, &right),
Operator::LtEq => lt_eq_dyn(&left, &right),
Operator::Gt => gt_dyn(&left, &right),
Expand Down Expand Up @@ -1347,54 +1313,6 @@ mod tests {
DataType::Float32,
vec![2f32],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hello world", "world"],
StringArray,
DataType::Utf8,
vec!["%hello%", "%hello%"],
Operator::Like,
BooleanArray,
DataType::Boolean,
vec![true, false],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hello world", "world"],
StringArray,
DataType::Utf8,
vec!["%hello%", "%hello%"],
Operator::NotLike,
BooleanArray,
DataType::Boolean,
vec![false, true],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hEllo world", "world"],
StringArray,
DataType::Utf8,
vec!["%helLo%", "%helLo%"],
Operator::ILike,
BooleanArray,
DataType::Boolean,
vec![true, false],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hEllo world", "world"],
StringArray,
DataType::Utf8,
vec!["%helLo%", "%helLo%"],
Operator::NotILike,
BooleanArray,
DataType::Boolean,
vec![false, true],
);
test_coercion!(
StringArray,
DataType::Utf8,
Expand Down
Loading