From 4d61a3c5e312f3faad5010a00991378d4deefdc4 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Sun, 28 Apr 2024 20:19:58 -0500 Subject: [PATCH 1/9] feat: better name for cast --- datafusion/expr/src/expr.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index e310eaa7e48f..1dbc12a48aa9 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1047,6 +1047,10 @@ impl Expr { match self { // call Expr::display_name() on a Expr::Sort will throw an error Expr::Sort(Sort { expr, .. }) => expr.name_for_alias(), + Expr::Cast(Cast { expr, data_type }) => { + let name = expr.name_for_alias()?; + Ok(format!("CAST({} AS {})", name, data_type)) + }, expr => expr.display_name(), } } From aa12a887b0d8864d0103b40b7ff9c71d235d95d3 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Sun, 28 Apr 2024 20:20:11 -0500 Subject: [PATCH 2/9] fmt --- datafusion/expr/src/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 1dbc12a48aa9..0941f93e2dbb 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1050,7 +1050,7 @@ impl Expr { Expr::Cast(Cast { expr, data_type }) => { let name = expr.name_for_alias()?; Ok(format!("CAST({} AS {})", name, data_type)) - }, + } expr => expr.display_name(), } } From d0512c9507e8fa2bfbf6c2c8229943f150b405d2 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Sun, 28 Apr 2024 20:33:22 -0500 Subject: [PATCH 3/9] fix test --- datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index d15d12b690da..b94979b9e908 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -458,7 +458,7 @@ mod tests { .project(proj)? .build()?; - let expected = "Projection: Int32(0) AS Utf8(\"0\")\ + let expected = "Projection: Int32(0) AS CAST(Utf8(\"0\") AS Int32)\ \n TableScan: test"; let actual = get_optimized_plan_formatted(plan, &Utc::now()); assert_eq!(expected, actual); From 3740abecb3eacf8059bb2c5c865518efa3b256e7 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Sun, 28 Apr 2024 23:00:10 -0500 Subject: [PATCH 4/9] fix test --- datafusion/core/tests/simplification.rs | 2 +- datafusion/expr/src/expr.rs | 21 ++++++++------------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/datafusion/core/tests/simplification.rs b/datafusion/core/tests/simplification.rs index 880c294bb7aa..d2dbc52de313 100644 --- a/datafusion/core/tests/simplification.rs +++ b/datafusion/core/tests/simplification.rs @@ -289,7 +289,7 @@ fn select_date_plus_interval() -> Result<()> { // Note that constant folder runs and folds the entire // expression down to a single constant (true) - let expected = r#"Projection: Date32("18636") AS to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) + IntervalDayTime("528280977408") + let expected = r#"Projection: Date32("18636") AS CAST(to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) AS Date32) + IntervalDayTime("528280977408") TableScan: test"#; let actual = get_optimized_plan_formatted(plan, &time); diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 0941f93e2dbb..9bd132d32233 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -910,8 +910,7 @@ impl PartialOrd for Expr { } impl Expr { - /// Returns the name of this expression as it should appear in a schema. This name - /// will not include any CAST expressions. + /// Returns the name of this expression as it should appear in a schema. pub fn display_name(&self) -> Result { create_name(self) } @@ -1047,10 +1046,6 @@ impl Expr { match self { // call Expr::display_name() on a Expr::Sort will throw an error Expr::Sort(Sort { expr, .. }) => expr.name_for_alias(), - Expr::Cast(Cast { expr, data_type }) => { - let name = expr.name_for_alias()?; - Ok(format!("CAST({} AS {})", name, data_type)) - } expr => expr.display_name(), } } @@ -1742,13 +1737,13 @@ fn create_name(e: &Expr) -> Result { name += "END"; Ok(name) } - Expr::Cast(Cast { expr, .. }) => { - // CAST does not change the expression name - create_name(expr) - } - Expr::TryCast(TryCast { expr, .. }) => { - // CAST does not change the expression name - create_name(expr) + Expr::TryCast(TryCast { expr, data_type }) + | Expr::Cast(Cast { expr, data_type }) => { + let name = match **expr { + Expr::Column(_) => create_name(expr)?, + _ => format!("CAST({expr} AS {data_type:?})"), + }; + Ok(name) } Expr::Not(expr) => { let expr = create_name(expr)?; From c24fbb99960d4a2e4c986da28fd1f8c8ac45fe91 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Sun, 28 Apr 2024 23:08:35 -0500 Subject: [PATCH 5/9] fix test --- datafusion/expr/src/expr.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 9bd132d32233..37b1733013f9 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1977,9 +1977,7 @@ mod test { let expected_canonical = "CAST(Float32(1.23) AS Utf8)"; assert_eq!(expected_canonical, expr.canonical_name()); assert_eq!(expected_canonical, format!("{expr}")); - // note that CAST intentionally has a name that is different from its `Display` - // representation. CAST does not change the name of expressions. - assert_eq!("Float32(1.23)", expr.display_name()?); + assert_eq!("CAST(Float32(1.23) AS Utf8)", expr.display_name()?); Ok(()) } From 2f958995e951340068146b44ebb07ee06817c085 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Mon, 29 Apr 2024 11:42:29 -0500 Subject: [PATCH 6/9] only change display_name --- datafusion/expr/src/expr.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 37b1733013f9..0a2dbf060c45 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -912,7 +912,16 @@ impl PartialOrd for Expr { impl Expr { /// Returns the name of this expression as it should appear in a schema. pub fn display_name(&self) -> Result { - create_name(self) + match self { + Expr::TryCast(TryCast { expr, data_type }) | Expr::Cast(Cast { expr, data_type }) => { + let name = match **expr { + Expr::Column(_) => create_name(expr)?, + _ => format!("CAST({expr} AS {data_type:?})"), + }; + Ok(name) + } + _ => create_name(self), + } } /// Returns a full and complete string representation of this expression. @@ -1046,6 +1055,7 @@ impl Expr { match self { // call Expr::display_name() on a Expr::Sort will throw an error Expr::Sort(Sort { expr, .. }) => expr.name_for_alias(), + // Expr::Cast(Cast { expr, .. }) => expr.name_for_alias(), expr => expr.display_name(), } } @@ -1737,13 +1747,9 @@ fn create_name(e: &Expr) -> Result { name += "END"; Ok(name) } - Expr::TryCast(TryCast { expr, data_type }) - | Expr::Cast(Cast { expr, data_type }) => { - let name = match **expr { - Expr::Column(_) => create_name(expr)?, - _ => format!("CAST({expr} AS {data_type:?})"), - }; - Ok(name) + Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => { + // CAST does not change the expression name + create_name(expr) } Expr::Not(expr) => { let expr = create_name(expr)?; From 82170760a5c3325538ec81465b50f03a38d0f101 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Mon, 29 Apr 2024 11:45:53 -0500 Subject: [PATCH 7/9] revert test and fmt --- datafusion/core/tests/simplification.rs | 2 +- datafusion/expr/src/expr.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/datafusion/core/tests/simplification.rs b/datafusion/core/tests/simplification.rs index d2dbc52de313..880c294bb7aa 100644 --- a/datafusion/core/tests/simplification.rs +++ b/datafusion/core/tests/simplification.rs @@ -289,7 +289,7 @@ fn select_date_plus_interval() -> Result<()> { // Note that constant folder runs and folds the entire // expression down to a single constant (true) - let expected = r#"Projection: Date32("18636") AS CAST(to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) AS Date32) + IntervalDayTime("528280977408") + let expected = r#"Projection: Date32("18636") AS to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) + IntervalDayTime("528280977408") TableScan: test"#; let actual = get_optimized_plan_formatted(plan, &time); diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 0a2dbf060c45..4c1eb5dac00a 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -913,7 +913,8 @@ impl Expr { /// Returns the name of this expression as it should appear in a schema. pub fn display_name(&self) -> Result { match self { - Expr::TryCast(TryCast { expr, data_type }) | Expr::Cast(Cast { expr, data_type }) => { + Expr::TryCast(TryCast { expr, data_type }) + | Expr::Cast(Cast { expr, data_type }) => { let name = match **expr { Expr::Column(_) => create_name(expr)?, _ => format!("CAST({expr} AS {data_type:?})"), From 4e3cd7e02eff2d0a49a990156c4c057b0092f7d9 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Mon, 29 Apr 2024 12:02:21 -0500 Subject: [PATCH 8/9] fix integration test --- datafusion/sql/tests/sql_integration.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 319aa5b5fd30..9b48c1c7ceae 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -2211,12 +2211,12 @@ fn sorted_union_with_different_types_and_group_by() { fn union_with_binary_expr_and_cast() { let sql = "SELECT cast(0.0 + a as integer) FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT 2.1 + a FROM (select 1 a) x GROUP BY 1)"; let expected = "Union\ - \n Projection: CAST(Float64(0) + x.a AS Float64) AS Float64(0) + x.a\ + \n Projection: CAST(CAST(Float64(0) + x.a AS Int32) AS Float64) AS CAST(Float64(0) + x.a AS Int32)\ \n Aggregate: groupBy=[[CAST(Float64(0) + x.a AS Int32)]], aggr=[[]]\ \n SubqueryAlias: x\ \n Projection: Int64(1) AS a\ \n EmptyRelation\ - \n Projection: Float64(2.1) + x.a AS Float64(0) + x.a\ + \n Projection: Float64(2.1) + x.a AS CAST(Float64(0) + x.a AS Int32)\ \n Aggregate: groupBy=[[Float64(2.1) + x.a]], aggr=[[]]\ \n SubqueryAlias: x\ \n Projection: Int64(1) AS a\ From 713f6756bafc7011d63dfd05a0b1df0b46146641 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Mon, 29 Apr 2024 20:35:17 -0500 Subject: [PATCH 9/9] latest code --- datafusion/expr/src/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 4c1eb5dac00a..3bc49ec2136f 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1057,7 +1057,7 @@ impl Expr { // call Expr::display_name() on a Expr::Sort will throw an error Expr::Sort(Sort { expr, .. }) => expr.name_for_alias(), // Expr::Cast(Cast { expr, .. }) => expr.name_for_alias(), - expr => expr.display_name(), + expr => create_name(expr), } }