diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index 023d319d78d7..6f96ddbeaf3d 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -95,6 +95,9 @@ impl ExpressionVisitor for ApplicabilityVisitor<'_> { | Expr::TryCast { .. } | Expr::BinaryExpr { .. } | Expr::Between { .. } + | Expr::Like { .. } + | Expr::ILike { .. } + | Expr::SimilarTo { .. } | Expr::InList { .. } | Expr::Exists { .. } | Expr::InSubquery { .. } diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs index 747cd1a204b5..ce8722a819a1 100644 --- a/datafusion/core/src/physical_plan/planner.rs +++ b/datafusion/core/src/physical_plan/planner.rs @@ -270,6 +270,63 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { Ok(format!("{} BETWEEN {} AND {}", expr, low, high)) } } + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => { + let expr = create_physical_name(expr, false)?; + let pattern = create_physical_name(pattern, false)?; + let escape = if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + }; + if *negated { + Ok(format!("{} NOT LIKE {}{}", expr, pattern, escape)) + } else { + Ok(format!("{} LIKE {}{}", expr, pattern, escape)) + } + } + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => { + let expr = create_physical_name(expr, false)?; + let pattern = create_physical_name(pattern, false)?; + let escape = if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + }; + if *negated { + Ok(format!("{} NOT ILIKE {}{}", expr, pattern, escape)) + } else { + Ok(format!("{} ILIKE {}{}", expr, pattern, escape)) + } + } + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => { + let expr = create_physical_name(expr, false)?; + let pattern = create_physical_name(pattern, false)?; + let escape = if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + }; + if *negated { + Ok(format!("{} NOT SIMILAR TO {}{}", expr, pattern, escape)) + } else { + Ok(format!("{} SIMILAR TO {}{}", expr, pattern, escape)) + } + } Expr::Sort { .. } => Err(DataFusionError::Internal( "Create physical name does not support sort expression".to_string(), )), diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index f52573b2e78e..daf7670be8c8 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -100,6 +100,27 @@ pub enum Expr { /// Right-hand side of the expression right: Box, }, + /// LIKE expression + Like { + negated: bool, + expr: Box, + pattern: Box, + escape_char: Option, + }, + /// Case-insensitive LIKE expression + ILike { + negated: bool, + expr: Box, + pattern: Box, + escape_char: Option, + }, + /// LIKE expression that uses regular expressions + SimilarTo { + negated: bool, + expr: Box, + pattern: Box, + escape_char: Option, + }, /// Negation of an expression. The expression's type must be a boolean to make sense. Not(Box), /// Whether an expression is not Null. This expression is never null. @@ -347,6 +368,9 @@ impl Expr { Expr::InSubquery { .. } => "InSubquery", Expr::IsNotNull(..) => "IsNotNull", Expr::IsNull(..) => "IsNull", + Expr::Like { .. } => "Like", + Expr::ILike { .. } => "ILike", + Expr::SimilarTo { .. } => "RLike", Expr::IsTrue(..) => "IsTrue", Expr::IsFalse(..) => "IsFalse", Expr::IsUnknown(..) => "IsUnknown", @@ -483,7 +507,42 @@ impl Not for Expr { type Output = Self; fn not(self) -> Self::Output { - Expr::Not(Box::new(self)) + match self { + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => Expr::Like { + negated: !negated, + expr, + pattern, + escape_char, + }, + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => Expr::ILike { + negated: !negated, + expr, + pattern, + escape_char, + }, + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => Expr::SimilarTo { + negated: !negated, + expr, + pattern, + escape_char, + }, + _ => Expr::Not(Box::new(self)), + } } } @@ -662,6 +721,54 @@ impl fmt::Debug for Expr { write!(f, "{:?} BETWEEN {:?} AND {:?}", expr, low, high) } } + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => { + write!(f, "{:?}", expr)?; + if *negated { + write!(f, " NOT")?; + } + if let Some(char) = escape_char { + write!(f, " LIKE {:?} ESCAPE '{}'", pattern, char) + } else { + write!(f, " LIKE {:?}", pattern) + } + } + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => { + write!(f, "{:?}", expr)?; + if *negated { + write!(f, " NOT")?; + } + if let Some(char) = escape_char { + write!(f, " ILIKE {:?} ESCAPE '{}'", pattern, char) + } else { + write!(f, " ILIKE {:?}", pattern) + } + } + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => { + write!(f, "{:?}", expr)?; + if *negated { + write!(f, " NOT")?; + } + if let Some(char) = escape_char { + write!(f, " SIMILAR TO {:?} ESCAPE '{}'", pattern, char) + } else { + write!(f, " SIMILAR TO {:?}", pattern) + } + } Expr::InList { expr, list, @@ -777,6 +884,67 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result { let right = create_name(right, input_schema)?; Ok(format!("{} {} {}", left, op, right)) } + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => { + let s = format!( + "{} {} {} {}", + expr, + if *negated { "NOT LIKE" } else { "LIKE" }, + pattern, + if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + } + ); + Ok(s) + } + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => { + let s = format!( + "{} {} {} {}", + expr, + if *negated { "NOT ILIKE" } else { "ILIKE" }, + pattern, + if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + } + ); + Ok(s) + } + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => { + let s = format!( + "{} {} {} {}", + expr, + if *negated { + "NOT SIMILAR TO" + } else { + "SIMILAR TO" + }, + pattern, + if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + } + ); + Ok(s) + } Expr::Case { expr, when_then_expr, diff --git a/datafusion/expr/src/expr_rewriter.rs b/datafusion/expr/src/expr_rewriter.rs index d2e44f3519e9..e046bd3757c8 100644 --- a/datafusion/expr/src/expr_rewriter.rs +++ b/datafusion/expr/src/expr_rewriter.rs @@ -128,6 +128,39 @@ impl ExprRewritable for Expr { op, right: rewrite_boxed(right, rewriter)?, }, + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => Expr::Like { + negated, + expr: rewrite_boxed(expr, rewriter)?, + pattern: rewrite_boxed(pattern, rewriter)?, + escape_char, + }, + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => Expr::ILike { + negated, + expr: rewrite_boxed(expr, rewriter)?, + pattern: rewrite_boxed(pattern, rewriter)?, + escape_char, + }, + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => Expr::SimilarTo { + negated, + expr: rewrite_boxed(expr, rewriter)?, + pattern: rewrite_boxed(pattern, rewriter)?, + escape_char, + }, Expr::Not(expr) => Expr::Not(rewrite_boxed(expr, rewriter)?), Expr::IsNotNull(expr) => Expr::IsNotNull(rewrite_boxed(expr, rewriter)?), Expr::IsNull(expr) => Expr::IsNull(rewrite_boxed(expr, rewriter)?), diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 0900a29993f6..58b46cc2d868 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -123,6 +123,9 @@ impl ExprSchemable for Expr { op, &right.get_type(schema)?, ), + Expr::Like { .. } | Expr::ILike { .. } | Expr::SimilarTo { .. } => { + Ok(DataType::Boolean) + } Expr::Wildcard => Err(DataFusionError::Internal( "Wildcard expressions are not valid in a logical query plan".to_owned(), )), @@ -207,6 +210,9 @@ impl ExprSchemable for Expr { ref right, .. } => Ok(left.nullable(input_schema)? || right.nullable(input_schema)?), + Expr::Like { expr, .. } => expr.nullable(input_schema), + Expr::ILike { expr, .. } => expr.nullable(input_schema), + Expr::SimilarTo { expr, .. } => expr.nullable(input_schema), Expr::Wildcard => Err(DataFusionError::Internal( "Wildcard expressions are not valid in a logical query plan".to_owned(), )), diff --git a/datafusion/expr/src/expr_visitor.rs b/datafusion/expr/src/expr_visitor.rs index 3166ced7c24b..0eea32b51544 100644 --- a/datafusion/expr/src/expr_visitor.rs +++ b/datafusion/expr/src/expr_visitor.rs @@ -134,6 +134,18 @@ impl ExprVisitable for Expr { let visitor = left.accept(visitor)?; right.accept(visitor) } + Expr::Like { expr, pattern, .. } => { + let visitor = expr.accept(visitor)?; + pattern.accept(visitor) + } + Expr::ILike { expr, pattern, .. } => { + let visitor = expr.accept(visitor)?; + pattern.accept(visitor) + } + Expr::SimilarTo { expr, pattern, .. } => { + let visitor = expr.accept(visitor)?; + pattern.accept(visitor) + } Expr::Between { expr, low, high, .. } => { diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 7d3f78b8fa31..b30311a9c6b6 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -79,6 +79,9 @@ impl ExpressionVisitor for ColumnNameVisitor<'_> { Expr::Alias(_, _) | Expr::Literal(_) | Expr::BinaryExpr { .. } + | Expr::Like { .. } + | Expr::ILike { .. } + | Expr::SimilarTo { .. } | Expr::Not(_) | Expr::IsNotNull(_) | Expr::IsNull(_) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 305283d9943d..aad9eadb5b8e 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -439,6 +439,18 @@ impl ExprIdentifierVisitor<'_> { desc.push_str("Between-"); desc.push_str(&negated.to_string()); } + Expr::Like { negated, .. } => { + desc.push_str("Like-"); + desc.push_str(&negated.to_string()); + } + Expr::ILike { negated, .. } => { + desc.push_str("ILike-"); + desc.push_str(&negated.to_string()); + } + Expr::SimilarTo { negated, .. } => { + desc.push_str("SimilarTo-"); + desc.push_str(&negated.to_string()); + } Expr::Case { .. } => { desc.push_str("Case-"); } diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 1735e80ef780..e731db6b4d11 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -470,6 +470,9 @@ impl<'a> ConstEvaluator<'a> { | Expr::IsNotUnknown(_) | Expr::Negative(_) | Expr::Between { .. } + | Expr::Like { .. } + | Expr::ILike { .. } + | Expr::SimilarTo { .. } | Expr::Case { .. } | Expr::Cast { .. } | Expr::TryCast { .. } diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index b3ea22f43601..aa6d07928d2e 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -319,6 +319,10 @@ message LogicalExprNode { IsNotTrue is_not_true = 28; IsNotFalse is_not_false = 29; IsNotUnknown is_not_unknown = 30; + LikeNode like = 31; + ILikeNode ilike = 32; + SimilarToNode similar_to = 33; + } } @@ -554,6 +558,27 @@ message BetweenNode { LogicalExprNode high = 4; } +message LikeNode { + bool negated = 1; + LogicalExprNode expr = 2; + LogicalExprNode pattern = 3; + string escape_char = 4; +} + +message ILikeNode { + bool negated = 1; + LogicalExprNode expr = 2; + LogicalExprNode pattern = 3; + string escape_char = 4; +} + +message SimilarToNode { + bool negated = 1; + LogicalExprNode expr = 2; + LogicalExprNode pattern = 3; + string escape_char = 4; +} + message CaseNode { LogicalExprNode expr = 1; repeated WhenThen when_then_expr = 2; diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index 39544dca9485..0a4e0e68bbe1 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -930,6 +930,24 @@ pub fn parse_expr( low: Box::new(parse_required_expr(&between.low, registry, "expr")?), high: Box::new(parse_required_expr(&between.high, registry, "expr")?), }), + ExprType::Like(like) => Ok(Expr::Like { + expr: Box::new(parse_required_expr(&like.expr, registry, "expr")?), + negated: like.negated, + pattern: Box::new(parse_required_expr(&like.pattern, registry, "pattern")?), + escape_char: parse_escape_char(&like.escape_char)?, + }), + ExprType::Ilike(like) => Ok(Expr::ILike { + expr: Box::new(parse_required_expr(&like.expr, registry, "expr")?), + negated: like.negated, + pattern: Box::new(parse_required_expr(&like.pattern, registry, "pattern")?), + escape_char: parse_escape_char(&like.escape_char)?, + }), + ExprType::SimilarTo(like) => Ok(Expr::SimilarTo { + expr: Box::new(parse_required_expr(&like.expr, registry, "expr")?), + negated: like.negated, + pattern: Box::new(parse_required_expr(&like.pattern, registry, "pattern")?), + escape_char: parse_escape_char(&like.escape_char)?, + }), ExprType::Case(case) => { let when_then_expr = case .when_then_expr @@ -1214,6 +1232,17 @@ pub fn parse_expr( } } +/// Parse an optional escape_char for Like, ILike, SimilarTo +fn parse_escape_char(s: &str) -> Result, DataFusionError> { + match s.len() { + 0 => Ok(None), + 1 => Ok(s.chars().next()), + _ => Err(DataFusionError::Internal( + "Invalid length for escape char".to_string(), + )), + } +} + impl TryFrom for WindowFrame { type Error = Error; diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index b98e73a74009..e64096a4e482 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -454,6 +454,60 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { expr_type: Some(ExprType::BinaryExpr(binary_expr)), } } + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => { + let pb = Box::new(protobuf::LikeNode { + negated: *negated, + expr: Some(Box::new(expr.as_ref().try_into()?)), + pattern: Some(Box::new(pattern.as_ref().try_into()?)), + escape_char: escape_char + .map(|ch| ch.to_string()) + .unwrap_or_else(|| "".to_string()), + }); + Self { + expr_type: Some(ExprType::Like(pb)), + } + } + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => { + let pb = Box::new(protobuf::ILikeNode { + negated: *negated, + expr: Some(Box::new(expr.as_ref().try_into()?)), + pattern: Some(Box::new(pattern.as_ref().try_into()?)), + escape_char: escape_char + .map(|ch| ch.to_string()) + .unwrap_or_else(|| "".to_string()), + }); + Self { + expr_type: Some(ExprType::Ilike(pb)), + } + } + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => { + let pb = Box::new(protobuf::SimilarToNode { + negated: *negated, + expr: Some(Box::new(expr.as_ref().try_into()?)), + pattern: Some(Box::new(pattern.as_ref().try_into()?)), + escape_char: escape_char + .map(|ch| ch.to_string()) + .unwrap_or_else(|| "".to_string()), + }); + Self { + expr_type: Some(ExprType::SimilarTo(pb)), + } + } Expr::WindowFunction { ref fun, ref args, diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index ebe850bdff56..a04dff4f3460 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -232,6 +232,39 @@ where op: *op, right: Box::new(clone_with_replacement(right, replacement_fn)?), }), + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => Ok(Expr::Like { + negated: *negated, + expr: Box::new(clone_with_replacement(expr, replacement_fn)?), + pattern: Box::new(clone_with_replacement(pattern, replacement_fn)?), + escape_char: *escape_char, + }), + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => Ok(Expr::ILike { + negated: *negated, + expr: Box::new(clone_with_replacement(expr, replacement_fn)?), + pattern: Box::new(clone_with_replacement(pattern, replacement_fn)?), + escape_char: *escape_char, + }), + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => Ok(Expr::SimilarTo { + negated: *negated, + expr: Box::new(clone_with_replacement(expr, replacement_fn)?), + pattern: Box::new(clone_with_replacement(pattern, replacement_fn)?), + escape_char: *escape_char, + }), Expr::Case { expr: case_expr_opt, when_then_expr,