From 5b5717a5ebf9bd820450357c60e8b593c2f658e8 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 30 Aug 2022 08:32:34 -0600 Subject: [PATCH] Add top-level Like, ILike, SimilarTo expressions in logical plan --- .../core/src/datasource/listing/helpers.rs | 3 + datafusion/core/src/physical_plan/planner.rs | 57 ++++++ datafusion/expr/src/expr.rs | 170 +++++++++++++++++- datafusion/expr/src/expr_rewriter.rs | 33 ++++ datafusion/expr/src/expr_schema.rs | 6 + datafusion/expr/src/expr_visitor.rs | 12 ++ datafusion/expr/src/utils.rs | 3 + .../optimizer/src/common_subexpr_eliminate.rs | 12 ++ .../optimizer/src/simplify_expressions.rs | 3 + datafusion/proto/proto/datafusion.proto | 25 +++ datafusion/proto/src/from_proto.rs | 29 +++ datafusion/proto/src/to_proto.rs | 54 ++++++ datafusion/sql/src/utils.rs | 33 ++++ 13 files changed, 439 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index 6c018eda3e76..57579c721f09 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -89,6 +89,9 @@ impl ExpressionVisitor for ApplicabilityVisitor<'_> { | Expr::TryCast { .. } | Expr::BinaryExpr { .. } | Expr::Between { .. } + | Expr::Like { .. } + | Expr::ILike { .. } + | Expr::SimilarTo { .. } | Expr::InList { .. } | Expr::Exists { .. } | Expr::InSubquery { .. } diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs index 658da6a0fef6..ddbae40fcd16 100644 --- a/datafusion/core/src/physical_plan/planner.rs +++ b/datafusion/core/src/physical_plan/planner.rs @@ -246,6 +246,63 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { Ok(format!("{} BETWEEN {} AND {}", expr, low, high)) } } + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => { + let expr = create_physical_name(expr, false)?; + let pattern = create_physical_name(pattern, false)?; + let escape = if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + }; + if *negated { + Ok(format!("{} NOT LIKE {}{}", expr, pattern, escape)) + } else { + Ok(format!("{} LIKE {}{}", expr, pattern, escape)) + } + } + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => { + let expr = create_physical_name(expr, false)?; + let pattern = create_physical_name(pattern, false)?; + let escape = if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + }; + if *negated { + Ok(format!("{} NOT ILIKE {}{}", expr, pattern, escape)) + } else { + Ok(format!("{} ILIKE {}{}", expr, pattern, escape)) + } + } + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => { + let expr = create_physical_name(expr, false)?; + let pattern = create_physical_name(pattern, false)?; + let escape = if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + }; + if *negated { + Ok(format!("{} NOT SIMILAR TO {}{}", expr, pattern, escape)) + } else { + Ok(format!("{} SIMILAR TO {}{}", expr, pattern, escape)) + } + } Expr::Sort { .. } => Err(DataFusionError::Internal( "Create physical name does not support sort expression".to_string(), )), diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index ba6f7a96c29d..57fe98681054 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -100,6 +100,27 @@ pub enum Expr { /// Right-hand side of the expression right: Box, }, + /// LIKE expression + Like { + negated: bool, + expr: Box, + pattern: Box, + escape_char: Option, + }, + /// Case-insensitive LIKE expression + ILike { + negated: bool, + expr: Box, + pattern: Box, + escape_char: Option, + }, + /// LIKE expression that uses regular expressions + SimilarTo { + negated: bool, + expr: Box, + pattern: Box, + escape_char: Option, + }, /// Negation of an expression. The expression's type must be a boolean to make sense. Not(Box), /// Whether an expression is not Null. This expression is never null. @@ -335,6 +356,9 @@ impl Expr { Expr::InSubquery { .. } => "InSubquery", Expr::IsNotNull(..) => "IsNotNull", Expr::IsNull(..) => "IsNull", + Expr::Like { .. } => "Like", + Expr::ILike { .. } => "ILike", + Expr::SimilarTo { .. } => "RLike", Expr::Literal(..) => "Literal", Expr::Negative(..) => "Negative", Expr::Not(..) => "Not", @@ -465,7 +489,42 @@ impl Not for Expr { type Output = Self; fn not(self) -> Self::Output { - Expr::Not(Box::new(self)) + match self { + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => Expr::Like { + negated: !negated, + expr, + pattern, + escape_char, + }, + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => Expr::ILike { + negated: !negated, + expr, + pattern, + escape_char, + }, + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => Expr::SimilarTo { + negated: !negated, + expr, + pattern, + escape_char, + }, + _ => Expr::Not(Box::new(self)), + } } } @@ -638,6 +697,54 @@ impl fmt::Debug for Expr { write!(f, "{:?} BETWEEN {:?} AND {:?}", expr, low, high) } } + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => { + write!(f, "{:?}", expr)?; + if *negated { + write!(f, " NOT")?; + } + if let Some(char) = escape_char { + write!(f, " LIKE {:?} ESCAPE '{}'", pattern, char) + } else { + write!(f, " LIKE {:?}", pattern) + } + } + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => { + write!(f, "{:?}", expr)?; + if *negated { + write!(f, " NOT")?; + } + if let Some(char) = escape_char { + write!(f, " ILIKE {:?} ESCAPE '{}'", pattern, char) + } else { + write!(f, " ILIKE {:?}", pattern) + } + } + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => { + write!(f, "{:?}", expr)?; + if *negated { + write!(f, " NOT")?; + } + if let Some(char) = escape_char { + write!(f, " SIMILAR TO {:?} ESCAPE '{}'", pattern, char) + } else { + write!(f, " SIMILAR TO {:?}", pattern) + } + } Expr::InList { expr, list, @@ -753,6 +860,67 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result { let right = create_name(right, input_schema)?; Ok(format!("{} {} {}", left, op, right)) } + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => { + let s = format!( + "{} {} {} {}", + expr, + if *negated { "NOT LIKE" } else { "LIKE" }, + pattern, + if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + } + ); + Ok(s) + } + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => { + let s = format!( + "{} {} {} {}", + expr, + if *negated { "NOT ILIKE" } else { "ILIKE" }, + pattern, + if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + } + ); + Ok(s) + } + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => { + let s = format!( + "{} {} {} {}", + expr, + if *negated { + "NOT SIMILAR TO" + } else { + "SIMILAR TO" + }, + pattern, + if let Some(char) = escape_char { + format!("CHAR '{}'", char) + } else { + "".to_string() + } + ); + Ok(s) + } Expr::Case { expr, when_then_expr, diff --git a/datafusion/expr/src/expr_rewriter.rs b/datafusion/expr/src/expr_rewriter.rs index e8cf049dde6f..7521ae70ab84 100644 --- a/datafusion/expr/src/expr_rewriter.rs +++ b/datafusion/expr/src/expr_rewriter.rs @@ -127,6 +127,39 @@ impl ExprRewritable for Expr { op, right: rewrite_boxed(right, rewriter)?, }, + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => Expr::Like { + negated, + expr: rewrite_boxed(expr, rewriter)?, + pattern: rewrite_boxed(pattern, rewriter)?, + escape_char, + }, + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => Expr::ILike { + negated, + expr: rewrite_boxed(expr, rewriter)?, + pattern: rewrite_boxed(pattern, rewriter)?, + escape_char, + }, + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => Expr::SimilarTo { + negated, + expr: rewrite_boxed(expr, rewriter)?, + pattern: rewrite_boxed(pattern, rewriter)?, + escape_char, + }, Expr::Not(expr) => Expr::Not(rewrite_boxed(expr, rewriter)?), Expr::IsNotNull(expr) => Expr::IsNotNull(rewrite_boxed(expr, rewriter)?), Expr::IsNull(expr) => Expr::IsNull(rewrite_boxed(expr, rewriter)?), diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index bbb414655c26..29f2a908cde9 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -117,6 +117,9 @@ impl ExprSchemable for Expr { op, &right.get_type(schema)?, ), + Expr::Like { .. } | Expr::ILike { .. } | Expr::SimilarTo { .. } => { + Ok(DataType::Boolean) + } Expr::Wildcard => Err(DataFusionError::Internal( "Wildcard expressions are not valid in a logical query plan".to_owned(), )), @@ -193,6 +196,9 @@ impl ExprSchemable for Expr { ref right, .. } => Ok(left.nullable(input_schema)? || right.nullable(input_schema)?), + Expr::Like { expr, .. } => expr.nullable(input_schema), + Expr::ILike { expr, .. } => expr.nullable(input_schema), + Expr::SimilarTo { expr, .. } => expr.nullable(input_schema), Expr::Wildcard => Err(DataFusionError::Internal( "Wildcard expressions are not valid in a logical query plan".to_owned(), )), diff --git a/datafusion/expr/src/expr_visitor.rs b/datafusion/expr/src/expr_visitor.rs index 162db60a03c9..20bf14512999 100644 --- a/datafusion/expr/src/expr_visitor.rs +++ b/datafusion/expr/src/expr_visitor.rs @@ -128,6 +128,18 @@ impl ExprVisitable for Expr { let visitor = left.accept(visitor)?; right.accept(visitor) } + Expr::Like { expr, pattern, .. } => { + let visitor = expr.accept(visitor)?; + pattern.accept(visitor) + } + Expr::ILike { expr, pattern, .. } => { + let visitor = expr.accept(visitor)?; + pattern.accept(visitor) + } + Expr::SimilarTo { expr, pattern, .. } => { + let visitor = expr.accept(visitor)?; + pattern.accept(visitor) + } Expr::Between { expr, low, high, .. } => { diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 367c722d220a..a43b6a28c0d2 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -79,6 +79,9 @@ impl ExpressionVisitor for ColumnNameVisitor<'_> { Expr::Alias(_, _) | Expr::Literal(_) | Expr::BinaryExpr { .. } + | Expr::Like { .. } + | Expr::ILike { .. } + | Expr::SimilarTo { .. } | Expr::Not(_) | Expr::IsNotNull(_) | Expr::IsNull(_) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 8627b404dce8..19d40555c803 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -421,6 +421,18 @@ impl ExprIdentifierVisitor<'_> { desc.push_str("Between-"); desc.push_str(&negated.to_string()); } + Expr::Like { negated, .. } => { + desc.push_str("Like-"); + desc.push_str(&negated.to_string()); + } + Expr::ILike { negated, .. } => { + desc.push_str("ILike-"); + desc.push_str(&negated.to_string()); + } + Expr::SimilarTo { negated, .. } => { + desc.push_str("SimilarTo-"); + desc.push_str(&negated.to_string()); + } Expr::Case { .. } => { desc.push_str("Case-"); } diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 384fd09ae13b..5e05e5dadc85 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -464,6 +464,9 @@ impl<'a> ConstEvaluator<'a> { | Expr::IsNull(_) | Expr::Negative(_) | Expr::Between { .. } + | Expr::Like { .. } + | Expr::ILike { .. } + | Expr::SimilarTo { .. } | Expr::Case { .. } | Expr::Cast { .. } | Expr::TryCast { .. } diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 0b4a43e83e71..790a3dc010da 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -312,6 +312,10 @@ message LogicalExprNode { CubeNode cube = 23; RollupNode rollup = 24; + + LikeNode like = 25; + ILikeNode ilike = 26; + SimilarToNode similar_to = 27; } } @@ -523,6 +527,27 @@ message BetweenNode { LogicalExprNode high = 4; } +message LikeNode { + bool negated = 1; + LogicalExprNode expr = 2; + LogicalExprNode pattern = 3; + string escape_char = 4; +} + +message ILikeNode { + bool negated = 1; + LogicalExprNode expr = 2; + LogicalExprNode pattern = 3; + string escape_char = 4; +} + +message SimilarToNode { + bool negated = 1; + LogicalExprNode expr = 2; + LogicalExprNode pattern = 3; + string escape_char = 4; +} + message CaseNode { LogicalExprNode expr = 1; repeated WhenThen when_then_expr = 2; diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index 12f94ce3620e..823573a1c22e 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -914,6 +914,24 @@ pub fn parse_expr( low: Box::new(parse_required_expr(&between.low, registry, "expr")?), high: Box::new(parse_required_expr(&between.high, registry, "expr")?), }), + ExprType::Like(like) => Ok(Expr::Like { + expr: Box::new(parse_required_expr(&like.expr, registry, "expr")?), + negated: like.negated, + pattern: Box::new(parse_required_expr(&like.pattern, registry, "pattern")?), + escape_char: parse_escape_char(&like.escape_char)?, + }), + ExprType::Ilike(like) => Ok(Expr::ILike { + expr: Box::new(parse_required_expr(&like.expr, registry, "expr")?), + negated: like.negated, + pattern: Box::new(parse_required_expr(&like.pattern, registry, "pattern")?), + escape_char: parse_escape_char(&like.escape_char)?, + }), + ExprType::SimilarTo(like) => Ok(Expr::SimilarTo { + expr: Box::new(parse_required_expr(&like.expr, registry, "expr")?), + negated: like.negated, + pattern: Box::new(parse_required_expr(&like.pattern, registry, "pattern")?), + escape_char: parse_escape_char(&like.escape_char)?, + }), ExprType::Case(case) => { let when_then_expr = case .when_then_expr @@ -1198,6 +1216,17 @@ pub fn parse_expr( } } +/// Parse an optional escape_char for Like, ILike, SimilarTo +fn parse_escape_char(s: &str) -> Result, DataFusionError> { + match s.len() { + 0 => Ok(None), + 1 => Ok(s.chars().next()), + _ => Err(DataFusionError::Internal( + "Invalid length for escape char".to_string(), + )), + } +} + impl TryFrom for WindowFrame { type Error = Error; diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index d3f68b3b4276..fde803532406 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -454,6 +454,60 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { expr_type: Some(ExprType::BinaryExpr(binary_expr)), } } + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => { + let pb = Box::new(protobuf::LikeNode { + negated: *negated, + expr: Some(Box::new(expr.as_ref().try_into()?)), + pattern: Some(Box::new(pattern.as_ref().try_into()?)), + escape_char: escape_char + .map(|ch| ch.to_string()) + .unwrap_or_else(|| "".to_string()), + }); + Self { + expr_type: Some(ExprType::Like(pb)), + } + } + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => { + let pb = Box::new(protobuf::ILikeNode { + negated: *negated, + expr: Some(Box::new(expr.as_ref().try_into()?)), + pattern: Some(Box::new(pattern.as_ref().try_into()?)), + escape_char: escape_char + .map(|ch| ch.to_string()) + .unwrap_or_else(|| "".to_string()), + }); + Self { + expr_type: Some(ExprType::Ilike(pb)), + } + } + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => { + let pb = Box::new(protobuf::SimilarToNode { + negated: *negated, + expr: Some(Box::new(expr.as_ref().try_into()?)), + pattern: Some(Box::new(pattern.as_ref().try_into()?)), + escape_char: escape_char + .map(|ch| ch.to_string()) + .unwrap_or_else(|| "".to_string()), + }); + Self { + expr_type: Some(ExprType::SimilarTo(pb)), + } + } Expr::WindowFunction { ref fun, ref args, diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 81ea34de187b..e44494b1915e 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -232,6 +232,39 @@ where op: *op, right: Box::new(clone_with_replacement(right, replacement_fn)?), }), + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => Ok(Expr::Like { + negated: *negated, + expr: Box::new(clone_with_replacement(expr, replacement_fn)?), + pattern: Box::new(clone_with_replacement(pattern, replacement_fn)?), + escape_char: *escape_char, + }), + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => Ok(Expr::ILike { + negated: *negated, + expr: Box::new(clone_with_replacement(expr, replacement_fn)?), + pattern: Box::new(clone_with_replacement(pattern, replacement_fn)?), + escape_char: *escape_char, + }), + Expr::SimilarTo { + negated, + expr, + pattern, + escape_char, + } => Ok(Expr::SimilarTo { + negated: *negated, + expr: Box::new(clone_with_replacement(expr, replacement_fn)?), + pattern: Box::new(clone_with_replacement(pattern, replacement_fn)?), + escape_char: *escape_char, + }), Expr::Case { expr: case_expr_opt, when_then_expr,