diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index e95fcdd128ede..2e53a682854ce 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -20,7 +20,6 @@ use std::any::Any; use std::cmp::Ordering; use std::collections::{HashMap, HashSet}; -use std::iter::zip; use std::sync::Arc; use crate::dml::CopyTo; @@ -36,7 +35,7 @@ use crate::logical_plan::{ Projection, Repartition, Sort, SubqueryAlias, TableScan, Union, Unnest, Values, Window, }; -use crate::type_coercion::binary::{comparison_coercion, values_coercion}; +use crate::type_coercion::binary::values_coercion; use crate::utils::{ can_hash, columnize_expr, compare_sort_expr, expr_to_columns, find_valid_equijoin_key_pair, group_window_expr_by_sort_keys, @@ -1338,96 +1337,14 @@ pub fn validate_unique_names<'a>( }) } -pub fn project_with_column_index( - expr: Vec, - input: Arc, - schema: DFSchemaRef, -) -> Result { - let alias_expr = expr - .into_iter() - .enumerate() - .map(|(i, e)| match e { - Expr::Alias(Alias { ref name, .. }) if name != schema.field(i).name() => { - e.unalias().alias(schema.field(i).name()) - } - Expr::Column(Column { - relation: _, - ref name, - }) if name != schema.field(i).name() => e.alias(schema.field(i).name()), - Expr::Alias { .. } | Expr::Column { .. } => e, - Expr::Wildcard { .. } => e, - _ => e.alias(schema.field(i).name()), - }) - .collect::>(); - - Projection::try_new_with_schema(alias_expr, input, schema) - .map(LogicalPlan::Projection) -} - /// Union two logical plans. pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result { - let left_col_num = left_plan.schema().fields().len(); - - // check union plan length same. - let right_col_num = right_plan.schema().fields().len(); - if right_col_num != left_col_num { - return plan_err!( - "Union queries must have the same number of columns, (left is {left_col_num}, right is {right_col_num})"); - } - - // create union schema - let union_qualified_fields = - zip(left_plan.schema().iter(), right_plan.schema().iter()) - .map( - |((left_qualifier, left_field), (_right_qualifier, right_field))| { - let nullable = left_field.is_nullable() || right_field.is_nullable(); - let data_type = comparison_coercion( - left_field.data_type(), - right_field.data_type(), - ) - .ok_or_else(|| { - plan_datafusion_err!( - "UNION Column {} (type: {}) is not compatible with column {} (type: {})", - right_field.name(), - right_field.data_type(), - left_field.name(), - left_field.data_type() - ) - })?; - Ok(( - left_qualifier.cloned(), - Arc::new(Field::new(left_field.name(), data_type, nullable)), - )) - }, - ) - .collect::>>()?; - let union_schema = - DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())?; - - let inputs = vec![left_plan, right_plan] - .into_iter() - .map(|p| { - let plan = coerce_plan_expr_for_schema(&p, &union_schema)?; - match plan { - LogicalPlan::Projection(Projection { expr, input, .. }) => { - Ok(Arc::new(project_with_column_index( - expr, - input, - Arc::new(union_schema.clone()), - )?)) - } - other_plan => Ok(Arc::new(other_plan)), - } - }) - .collect::>>()?; - - if inputs.is_empty() { - return plan_err!("Empty UNION"); - } - + // Temporarily use the schema from the left input and later rely on the analyzer to + // coerce the two schemas into a common one. + let schema = Arc::clone(left_plan.schema()); Ok(LogicalPlan::Union(Union { - inputs, - schema: Arc::new(union_schema), + inputs: vec![Arc::new(left_plan), Arc::new(right_plan)], + schema, })) } @@ -1850,23 +1767,6 @@ mod tests { Ok(()) } - #[test] - fn plan_builder_union_different_num_columns_error() -> Result<()> { - let plan1 = - table_scan(TableReference::none(), &employee_schema(), Some(vec![3]))?; - let plan2 = - table_scan(TableReference::none(), &employee_schema(), Some(vec![3, 4]))?; - - let expected = "Error during planning: Union queries must have the same number of columns, (left is 1, right is 2)"; - let err_msg1 = plan1.clone().union(plan2.clone().build()?).unwrap_err(); - let err_msg2 = plan1.union_distinct(plan2.build()?).unwrap_err(); - - assert_eq!(err_msg1.strip_backtrace(), expected); - assert_eq!(err_msg2.strip_backtrace(), expected); - - Ok(()) - } - #[test] fn plan_builder_simple_distinct() -> Result<()> { let plan = diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 7392028ba7aba..40efbba6de7a5 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -17,22 +17,24 @@ //! Optimizer rule for type validation and coercion +use std::collections::HashMap; use std::sync::Arc; -use arrow::datatypes::{DataType, IntervalUnit}; +use itertools::izip; + +use arrow::datatypes::{DataType, Field, IntervalUnit}; use crate::analyzer::AnalyzerRule; use crate::utils::NamePreserver; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion_common::{ - exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema, - DataFusionError, Result, ScalarValue, + exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, + DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, }; -use datafusion_expr::builder::project_with_column_index; use datafusion_expr::expr::{ - self, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, ScalarFunction, - WindowFunction, + self, Alias, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, + ScalarFunction, WindowFunction, }; use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema; use datafusion_expr::expr_schema::cast_subquery; @@ -51,7 +53,7 @@ use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_large_utf8}; use datafusion_expr::utils::merge_schema; use datafusion_expr::{ is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not, - AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, Operator, + AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, Join, LogicalPlan, Operator, Projection, ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits, }; @@ -121,9 +123,8 @@ fn analyze_internal( expr.rewrite(&mut expr_rewrite)? .map_data(|expr| original_name.restore(expr)) })? - // coerce join expressions specially - .map_data(|plan| expr_rewrite.coerce_joins(plan))? - .map_data(|plan| expr_rewrite.coerce_union(plan))? + // some plans need extra coercion after their expressions are coerced + .map_data(|plan| expr_rewrite.coerce_plan(plan))? // recompute the schema after the expressions have been rewritten as the types may have changed .map_data(|plan| plan.recompute_schema()) } @@ -137,6 +138,14 @@ impl<'a> TypeCoercionRewriter<'a> { Self { schema } } + fn coerce_plan(&mut self, plan: LogicalPlan) -> Result { + match plan { + LogicalPlan::Join(join) => self.coerce_join(join), + LogicalPlan::Union(union) => Self::coerce_union(union), + _ => Ok(plan), + } + } + /// Coerce join equality expressions and join filter /// /// Joins must be treated specially as their equality expressions are stored @@ -145,11 +154,7 @@ impl<'a> TypeCoercionRewriter<'a> { /// /// For example, on_exprs like `t1.a = t2.b AND t1.x = t2.y` will be stored /// as a list of `(t1.a, t2.b), (t1.x, t2.y)` - fn coerce_joins(&mut self, plan: LogicalPlan) -> Result { - let LogicalPlan::Join(mut join) = plan else { - return Ok(plan); - }; - + fn coerce_join(&mut self, mut join: Join) -> Result { join.on = join .on .into_iter() @@ -170,36 +175,30 @@ impl<'a> TypeCoercionRewriter<'a> { Ok(LogicalPlan::Join(join)) } - /// Corece the union inputs after expanding the wildcard expressions - /// - /// Union inputs must have the same schema, so we coerce the expressions to match the schema - /// after expanding the wildcard expressions - fn coerce_union(&self, plan: LogicalPlan) -> Result { - let LogicalPlan::Union(union) = plan else { - return Ok(plan); - }; - - let inputs = union + /// Coerce the union’s inputs to a common schema compatible with all inputs. + /// This occurs after wildcard expansion and the coercion of the input expressions. + fn coerce_union(union_plan: Union) -> Result { + let union_schema = Arc::new(coerce_union_schema(&union_plan.inputs)?); + let new_inputs = union_plan .inputs - .into_iter() + .iter() .map(|p| { - let plan = coerce_plan_expr_for_schema(&p, &union.schema)?; + let plan = coerce_plan_expr_for_schema(p, &union_schema)?; match plan { LogicalPlan::Projection(Projection { expr, input, .. }) => { Ok(Arc::new(project_with_column_index( expr, input, - Arc::clone(&union.schema), + Arc::clone(&union_schema), )?)) } other_plan => Ok(Arc::new(other_plan)), } }) .collect::>>()?; - Ok(LogicalPlan::Union(Union { - inputs, - schema: Arc::clone(&union.schema), + inputs: new_inputs, + schema: union_schema, })) } @@ -809,6 +808,92 @@ fn coerce_case_expression(case: Case, schema: &DFSchema) -> Result { Ok(Case::new(case_expr, when_then, else_expr)) } +/// Get a common schema that is compatible with all inputs of UNION. +fn coerce_union_schema(inputs: &[Arc]) -> Result { + let base_schema = inputs[0].schema(); + let mut union_datatypes = base_schema + .fields() + .iter() + .map(|f| f.data_type().clone()) + .collect::>(); + let mut union_nullabilities = base_schema + .fields() + .iter() + .map(|f| f.is_nullable()) + .collect::>(); + + for (i, plan) in inputs.iter().enumerate().skip(1) { + let plan_schema = plan.schema(); + if plan_schema.fields().len() != base_schema.fields().len() { + return plan_err!( + "Union schemas have different number of fields: \ + query 1 has {} fields whereas query {} has {} fields", + base_schema.fields().len(), + i + 1, + plan_schema.fields().len() + ); + } + // coerce data type and nullablity for each field + for (union_datatype, union_nullable, plan_field) in izip!( + union_datatypes.iter_mut(), + union_nullabilities.iter_mut(), + plan_schema.fields() + ) { + let coerced_type = + comparison_coercion(union_datatype, plan_field.data_type()).ok_or_else( + || { + plan_datafusion_err!( + "Incompatible inputs for Union: Previous inputs were \ + of type {}, but got incompatible type {} on column '{}'", + union_datatype, + plan_field.data_type(), + plan_field.name() + ) + }, + )?; + *union_datatype = coerced_type; + *union_nullable = *union_nullable || plan_field.is_nullable(); + } + } + let union_qualified_fields = izip!( + base_schema.iter(), + union_datatypes.into_iter(), + union_nullabilities + ) + .map(|((qualifier, field), datatype, nullable)| { + let field = Arc::new(Field::new(field.name().clone(), datatype, nullable)); + (qualifier.cloned(), field) + }) + .collect::>(); + DFSchema::new_with_metadata(union_qualified_fields, HashMap::new()) +} + +/// See `` +fn project_with_column_index( + expr: Vec, + input: Arc, + schema: DFSchemaRef, +) -> Result { + let alias_expr = expr + .into_iter() + .enumerate() + .map(|(i, e)| match e { + Expr::Alias(Alias { ref name, .. }) if name != schema.field(i).name() => { + e.unalias().alias(schema.field(i).name()) + } + Expr::Column(Column { + relation: _, + ref name, + }) if name != schema.field(i).name() => e.alias(schema.field(i).name()), + Expr::Alias { .. } | Expr::Column { .. } => e, + _ => e.alias(schema.field(i).name()), + }) + .collect::>(); + + Projection::try_new_with_schema(alias_expr, input, schema) + .map(LogicalPlan::Projection) +} + #[cfg(test)] mod test { use std::any::Any; diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs index cc8cf1f56c184..5f41e4f137b15 100644 --- a/datafusion/optimizer/src/eliminate_nested_union.rs +++ b/datafusion/optimizer/src/eliminate_nested_union.rs @@ -114,8 +114,11 @@ fn extract_plan_from_distinct(plan: Arc) -> Arc { #[cfg(test)] mod tests { use super::*; + use crate::analyzer::type_coercion::TypeCoercion; + use crate::analyzer::Analyzer; use crate::test::*; use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_common::config::ConfigOptions; use datafusion_expr::{col, logical_plan::table_scan}; fn schema() -> Schema { @@ -127,7 +130,14 @@ mod tests { } fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> { - assert_optimized_plan_eq(Arc::new(EliminateNestedUnion::new()), plan, expected) + let options = ConfigOptions::default(); + let analyzed_plan = Analyzer::with_rules(vec![Arc::new(TypeCoercion::new())]) + .execute_and_check(plan, &options, |_, _| {})?; + assert_optimized_plan_eq( + Arc::new(EliminateNestedUnion::new()), + analyzed_plan, + expected, + ) } #[test] diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 15efe2d2f03ce..7ce3565fa29f6 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -2160,148 +2160,6 @@ fn union_all() { quick_test(sql, expected); } -#[test] -fn union_with_different_column_names() { - let sql = "SELECT order_id from orders UNION ALL SELECT customer_id FROM orders"; - let expected = "Union\ - \n Projection: orders.order_id\ - \n TableScan: orders\ - \n Projection: orders.customer_id AS order_id\ - \n TableScan: orders"; - quick_test(sql, expected); -} - -#[test] -fn union_values_with_no_alias() { - let sql = "SELECT 1, 2 UNION ALL SELECT 3, 4"; - let expected = "Union\ - \n Projection: Int64(1) AS Int64(1), Int64(2) AS Int64(2)\ - \n EmptyRelation\ - \n Projection: Int64(3) AS Int64(1), Int64(4) AS Int64(2)\ - \n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn union_with_incompatible_data_type() { - let sql = "SELECT interval '1 year 1 day' UNION ALL SELECT 1"; - let err = logical_plan(sql) - .expect_err("query should have failed") - .strip_backtrace(); - assert_eq!( - "Error during planning: UNION Column Int64(1) (type: Int64) is not compatible with column IntervalMonthDayNano(\"IntervalMonthDayNano { months: 12, days: 1, nanoseconds: 0 }\") (type: Interval(MonthDayNano))", - err - ); -} - -#[test] -fn union_with_different_decimal_data_types() { - let sql = "SELECT 1 a UNION ALL SELECT 1.1 a"; - let expected = "Union\ - \n Projection: CAST(Int64(1) AS Float64) AS a\ - \n EmptyRelation\ - \n Projection: Float64(1.1) AS a\ - \n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn union_with_null() { - let sql = "SELECT NULL a UNION ALL SELECT 1.1 a"; - let expected = "Union\ - \n Projection: CAST(NULL AS Float64) AS a\ - \n EmptyRelation\ - \n Projection: Float64(1.1) AS a\ - \n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn union_with_float_and_string() { - let sql = "SELECT 'a' a UNION ALL SELECT 1.1 a"; - let expected = "Union\ - \n Projection: Utf8(\"a\") AS a\ - \n EmptyRelation\ - \n Projection: CAST(Float64(1.1) AS Utf8) AS a\ - \n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn union_with_multiply_cols() { - let sql = "SELECT 'a' a, 1 b UNION ALL SELECT 1.1 a, 1.1 b"; - let expected = "Union\ - \n Projection: Utf8(\"a\") AS a, CAST(Int64(1) AS Float64) AS b\ - \n EmptyRelation\ - \n Projection: CAST(Float64(1.1) AS Utf8) AS a, Float64(1.1) AS b\ - \n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn sorted_union_with_different_types_and_group_by() { - let sql = "SELECT a FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT a FROM (select 1.1 a) x GROUP BY 1) ORDER BY 1"; - let expected = "Sort: x.a ASC NULLS LAST\ - \n Union\ - \n Projection: CAST(x.a AS Float64) AS a\ - \n Aggregate: groupBy=[[x.a]], aggr=[[]]\ - \n SubqueryAlias: x\ - \n Projection: Int64(1) AS a\ - \n EmptyRelation\ - \n Projection: x.a\ - \n Aggregate: groupBy=[[x.a]], aggr=[[]]\ - \n SubqueryAlias: x\ - \n Projection: Float64(1.1) AS a\ - \n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn union_with_binary_expr_and_cast() { - let sql = "SELECT cast(0.0 + a as integer) FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT 2.1 + a FROM (select 1 a) x GROUP BY 1)"; - let expected = "Union\ - \n Projection: CAST(Float64(0) + x.a AS Float64) AS Float64(0) + x.a\ - \n Aggregate: groupBy=[[CAST(Float64(0) + x.a AS Int32)]], aggr=[[]]\ - \n SubqueryAlias: x\ - \n Projection: Int64(1) AS a\ - \n EmptyRelation\ - \n Projection: Float64(2.1) + x.a AS Float64(0) + x.a\ - \n Aggregate: groupBy=[[Float64(2.1) + x.a]], aggr=[[]]\ - \n SubqueryAlias: x\ - \n Projection: Int64(1) AS a\ - \n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn union_with_aliases() { - let sql = "SELECT a as a1 FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT a as a1 FROM (select 1.1 a) x GROUP BY 1)"; - let expected = "Union\ - \n Projection: CAST(x.a AS Float64) AS a1\ - \n Aggregate: groupBy=[[x.a]], aggr=[[]]\ - \n SubqueryAlias: x\ - \n Projection: Int64(1) AS a\ - \n EmptyRelation\ - \n Projection: x.a AS a1\ - \n Aggregate: groupBy=[[x.a]], aggr=[[]]\ - \n SubqueryAlias: x\ - \n Projection: Float64(1.1) AS a\ - \n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn union_with_incompatible_data_types() { - let sql = "SELECT 'a' a UNION ALL SELECT true a"; - let err = logical_plan(sql) - .expect_err("query should have failed") - .strip_backtrace(); - assert_eq!( - "Error during planning: UNION Column a (type: Boolean) is not compatible with column a (type: Utf8)", - err - ); -} - #[test] fn empty_over() { let sql = "SELECT order_id, MAX(order_id) OVER () from orders"; diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt index aa1e6826eca55..e420c0cc71554 100644 --- a/datafusion/sqllogictest/test_files/type_coercion.slt +++ b/datafusion/sqllogictest/test_files/type_coercion.slt @@ -49,3 +49,179 @@ select interval '1 month' - '2023-05-01'::date; # interval - timestamp query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp; + + +#################################### +## Test type coercion with UNIONs ## +#################################### + +# Disable optimizer to test only the analyzer with type coercion +statement ok +set datafusion.optimizer.max_passes = 0; + +statement ok +set datafusion.explain.logical_plan_only = true; + +# Create test table +statement ok +CREATE TABLE orders( + order_id INT UNSIGNED NOT NULL, + customer_id INT UNSIGNED NOT NULL, + o_item_id VARCHAR NOT NULL, + qty INT NOT NULL, + price DOUBLE NOT NULL, + delivered BOOLEAN NOT NULL +); + +# union_different_num_columns_error() / UNION +query error Error during planning: Union schemas have different number of fields: query 1 has 1 fields whereas query 2 has 2 fields +SELECT order_id FROM orders UNION SELECT customer_id, o_item_id FROM orders + +# union_different_num_columns_error() / UNION ALL +query error Error during planning: Union schemas have different number of fields: query 1 has 1 fields whereas query 2 has 2 fields +SELECT order_id FROM orders UNION ALL SELECT customer_id, o_item_id FROM orders + +# union_with_different_column_names() +query TT +EXPLAIN SELECT order_id from orders UNION ALL SELECT customer_id FROM orders +---- +logical_plan +01)Union +02)--Projection: orders.order_id +03)----TableScan: orders +04)--Projection: orders.customer_id AS order_id +05)----TableScan: orders + +# union_values_with_no_alias() +query TT +EXPLAIN SELECT 1, 2 UNION ALL SELECT 3, 4 +---- +logical_plan +01)Union +02)--Projection: Int64(1) AS Int64(1), Int64(2) AS Int64(2) +03)----EmptyRelation +04)--Projection: Int64(3) AS Int64(1), Int64(4) AS Int64(2) +05)----EmptyRelation + +# union_with_incompatible_data_type() +query error Incompatible inputs for Union: Previous inputs were of type Interval\(MonthDayNano\), but got incompatible type Int64 on column 'Int64\(1\)' +SELECT interval '1 year 1 day' UNION ALL SELECT 1 + +# union_with_different_decimal_data_types() +query TT +EXPLAIN SELECT 1 a UNION ALL SELECT 1.1 a +---- +logical_plan +01)Union +02)--Projection: CAST(Int64(1) AS Float64) AS a +03)----EmptyRelation +04)--Projection: Float64(1.1) AS a +05)----EmptyRelation + +# union_with_null() +query TT +EXPLAIN SELECT NULL a UNION ALL SELECT 1.1 a +---- +logical_plan +01)Union +02)--Projection: CAST(NULL AS Float64) AS a +03)----EmptyRelation +04)--Projection: Float64(1.1) AS a +05)----EmptyRelation + +# union_with_float_and_string() +query TT +EXPLAIN SELECT 'a' a UNION ALL SELECT 1.1 a +---- +logical_plan +01)Union +02)--Projection: Utf8("a") AS a +03)----EmptyRelation +04)--Projection: CAST(Float64(1.1) AS Utf8) AS a +05)----EmptyRelation + +# union_with_multiply_cols() +query TT +EXPLAIN SELECT 'a' a, 1 b UNION ALL SELECT 1.1 a, 1.1 b +---- +logical_plan +01)Union +02)--Projection: Utf8("a") AS a, CAST(Int64(1) AS Float64) AS b +03)----EmptyRelation +04)--Projection: CAST(Float64(1.1) AS Utf8) AS a, Float64(1.1) AS b +05)----EmptyRelation + +# sorted_union_with_different_types_and_group_by() +query TT +EXPLAIN SELECT a FROM (select 1 a) x GROUP BY 1 + UNION ALL +(SELECT a FROM (select 1.1 a) x GROUP BY 1) ORDER BY 1 +---- +logical_plan +01)Sort: x.a ASC NULLS LAST +02)--Union +03)----Projection: CAST(x.a AS Float64) AS a +04)------Aggregate: groupBy=[[x.a]], aggr=[[]] +05)--------SubqueryAlias: x +06)----------Projection: Int64(1) AS a +07)------------EmptyRelation +08)----Projection: x.a +09)------Aggregate: groupBy=[[x.a]], aggr=[[]] +10)--------SubqueryAlias: x +11)----------Projection: Float64(1.1) AS a +12)------------EmptyRelation + +# union_with_binary_expr_and_cast() +query TT +EXPLAIN SELECT cast(0.0 + a as integer) FROM (select 1 a) x GROUP BY 1 + UNION ALL +(SELECT 2.1 + a FROM (select 1 a) x GROUP BY 1) +---- +logical_plan +01)Union +02)--Projection: CAST(Float64(0) + x.a AS Float64) AS Float64(0) + x.a +03)----Aggregate: groupBy=[[CAST(Float64(0) + CAST(x.a AS Float64) AS Int32)]], aggr=[[]] +04)------SubqueryAlias: x +05)--------Projection: Int64(1) AS a +06)----------EmptyRelation +07)--Projection: Float64(2.1) + x.a AS Float64(0) + x.a +08)----Aggregate: groupBy=[[Float64(2.1) + CAST(x.a AS Float64)]], aggr=[[]] +09)------SubqueryAlias: x +10)--------Projection: Int64(1) AS a +11)----------EmptyRelation + +# union_with_aliases() +query TT +EXPLAIN SELECT a as a1 FROM (select 1 a) x GROUP BY 1 + UNION ALL +(SELECT a as a1 FROM (select 1.1 a) x GROUP BY 1) +---- +logical_plan +01)Union +02)--Projection: CAST(x.a AS Float64) AS a1 +03)----Aggregate: groupBy=[[x.a]], aggr=[[]] +04)------SubqueryAlias: x +05)--------Projection: Int64(1) AS a +06)----------EmptyRelation +07)--Projection: x.a AS a1 +08)----Aggregate: groupBy=[[x.a]], aggr=[[]] +09)------SubqueryAlias: x +10)--------Projection: Float64(1.1) AS a +11)----------EmptyRelation + +# union_with_incompatible_data_types() +query error Incompatible inputs for Union: Previous inputs were of type Utf8, but got incompatible type Boolean on column 'a' +SELECT 'a' a UNION ALL SELECT true a + +statement ok +SET datafusion.optimizer.max_passes = 3; + +statement ok +SET datafusion.explain.logical_plan_only = false; + +statement ok +DROP TABLE orders; + +######################################## +## Test type coercion with UNIONs end ## +######################################## diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index ffbf54c4d93aa..288f99d82c107 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -677,3 +677,14 @@ DROP TABLE t3; statement ok DROP TABLE t4; + +# Test issue: https://github.com/apache/datafusion/issues/11742 +query R rowsort +WITH + tt(v1) AS (VALUES (1::INT),(NULL::INT)) +SELECT NVL(v1, 0.5) FROM tt + UNION ALL +SELECT NULL WHERE FALSE; +---- +0.5 +1