diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt index 0e3f337071be3..aa66d6e1da4b4 100644 --- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt +++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt @@ -76,23 +76,23 @@ statement error DataFusion error: Schema error: Schema contains duplicate unqual SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100 # csv_query_approx_percentile_cont_with_weight -statement error Error during planning: The function ApproxPercentileContWithWeight does not support inputs of type Utf8. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'APPROX_PERCENTILE_CONT_WITH_WEIGHT\(Utf8, Int8, Float64\)'. You might need to add explicit type casts. SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100 -statement error Error during planning: The weight argument for ApproxPercentileContWithWeight does not support inputs of type Utf8 +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'APPROX_PERCENTILE_CONT_WITH_WEIGHT\(Int16, Utf8, Float64\)'\. You might need to add explicit type casts\. SELECT approx_percentile_cont_with_weight(c3, c1, 0.95) FROM aggregate_test_100 -statement error Error during planning: The percentile argument for ApproxPercentileContWithWeight must be Float64, not Utf8. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'APPROX_PERCENTILE_CONT_WITH_WEIGHT\(Int16, Int8, Utf8\)'\. You might need to add explicit type casts\. SELECT approx_percentile_cont_with_weight(c3, c2, c1) FROM aggregate_test_100 # csv_query_approx_percentile_cont_with_histogram_bins statement error This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\). SELECT c1, approx_percentile_cont(c3, 0.95, -1000) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1 -statement error Error during planning: The percentile sample points count for ApproxPercentileCont must be integer, not Utf8. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'APPROX_PERCENTILE_CONT\(Int16, Float64, Utf8\)'\. You might need to add explicit type casts\. SELECT approx_percentile_cont(c3, 0.95, c1) FROM aggregate_test_100 -statement error Error during planning: The percentile sample points count for ApproxPercentileCont must be integer, not Float64. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'APPROX_PERCENTILE_CONT\(Int16, Float64, Float64\)'\. You might need to add explicit type casts\. SELECT approx_percentile_cont(c3, 0.95, 111.1) FROM aggregate_test_100 # array agg can use order by @@ -1644,10 +1644,10 @@ NULL NULL NULL NULL Row 2 Y # aggregate_timestamps_sum -statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)\. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'SUM\(Timestamp\(Nanosecond, None\)\)'\. You might need to add explicit type casts\. SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t; -statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)\. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'SUM\(Timestamp\(Nanosecond, None\)\)'\. You might need to add explicit type casts\. SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY tag ORDER BY tag; # aggregate_timestamps_count @@ -1688,10 +1688,10 @@ Y 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-0 # aggregate_timestamps_avg -statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\)\. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'AVG\(Timestamp\(Nanosecond, None\)\)'\. You might need to add explicit type casts\. SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t -statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\)\. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'AVG\(Timestamp\(Nanosecond, None\)\)'\. You might need to add explicit type casts\. SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY tag ORDER BY tag; @@ -1738,10 +1738,10 @@ NULL NULL Row 2 Y # aggregate_timestamps_sum -statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Date32\. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'SUM\(Date32\)'\. You might need to add explicit type casts\. SELECT sum(date32), sum(date64) FROM t; -statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Date32\. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'SUM\(Date32\)'\. You might need to add explicit type casts\. SELECT tag, sum(date32), sum(date64) FROM t GROUP BY tag ORDER BY tag; # aggregate_timestamps_count @@ -1782,10 +1782,10 @@ Y 2021-01-01 2021-01-01T00:00:00 # aggregate_timestamps_avg -statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Date32\. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'AVG\(Date32\)'\. You might need to add explicit type casts\. SELECT avg(date32), avg(date64) FROM t -statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Date32\. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'AVG\(Date32\)'\. You might need to add explicit type casts\. SELECT tag, avg(date32), avg(date64) FROM t GROUP BY tag ORDER BY tag; @@ -1835,10 +1835,10 @@ select * from t; 21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28 Row 3 B # aggregate_times_sum -statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Time64\(Nanosecond\). +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'SUM\(Time64\(Nanosecond\)\)'\. You might need to add explicit type casts\. SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t -statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Time64\(Nanosecond\)\. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'SUM\(Time64\(Nanosecond\)\)'\. You might need to add explicit type casts\. SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY tag ORDER BY tag # aggregate_times_count @@ -1880,10 +1880,10 @@ B 21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28 # aggregate_times_avg -statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Time64\(Nanosecond\). +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'AVG\(Time64\(Nanosecond\)\)'\. You might need to add explicit type casts\. SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t -statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Time64\(Nanosecond\)\. +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'AVG\(Time64\(Nanosecond\)\)'\. You might need to add explicit type casts\. SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY tag ORDER BY tag; statement ok diff --git a/datafusion/core/tests/sqllogictests/test_files/errors.slt b/datafusion/core/tests/sqllogictests/test_files/errors.slt index 938209d21cc8e..bdbf525abef9b 100644 --- a/datafusion/core/tests/sqllogictests/test_files/errors.slt +++ b/datafusion/core/tests/sqllogictests/test_files/errors.slt @@ -46,7 +46,9 @@ statement error DataFusion error: Arrow error: Cast error: Cannot cast string 'c SELECT CAST(c1 AS INT) FROM aggregate_test_100 # aggregation_with_bad_arguments -statement error Error during planning: The function Count expects at least one argument +statement error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: Candidate"\) + Candidate functions: + COUNT\(Any, \.\., Any\) SELECT COUNT(DISTINCT) FROM aggregate_test_100 # query_cte_incorrect @@ -72,3 +74,61 @@ SELECT COUNT(*) FROM nonexistentcatalog.public.aggregate_test_100 statement error Error during planning: Unsupported compound identifier '\[Ident \{ value: "way", quote_style: None \}, Ident \{ value: "too", quote_style: None \}, Ident \{ value: "many", quote_style: None \}, Ident \{ value: "namespaces", quote_style: None \}, Ident \{ value: "as", quote_style: None \}, Ident \{ value: "ident", quote_style: None \}, Ident \{ value: "prefixes", quote_style: None \}, Ident \{ value: "aggregate_test_100", quote_style: None \}\]' SELECT COUNT(*) FROM way.too.many.namespaces.as.ident.prefixes.aggregate_test_100 + + + +# +# Wrong scalar function signature +# + +# error message for wrong function signature (Variadic: arbitrary number of args all from some common types) +statement error Error during planning: No function matches the given name and argument types 'concat\(\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tconcat\(Utf8, ..\) +SELECT concat(); + +# error message for wrong function signature (Uniform: t args all from some common types) +statement error Error during planning: No function matches the given name and argument types 'nullif\(Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tnullif\(Boolean/UInt8/UInt16/UInt32/UInt64/Int8/Int16/Int32/Int64/Float32/Float64/Utf8/LargeUtf8, Boolean/UInt8/UInt16/UInt32/UInt64/Int8/Int16/Int32/Int64/Float32/Float64/Utf8/LargeUtf8\) +SELECT nullif(1); + +# error message for wrong function signature (Exact: exact number of args of an exact type) +statement error Error during planning: No function matches the given name and argument types 'pi\(Float64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tpi\(\) +SELECT pi(3.14); + +# error message for wrong function signature (Any: fixed number of args of arbitrary types) +statement error Error during planning: No function matches the given name and argument types 'arrow_typeof\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarrow_typeof\(Any\) +SELECT arrow_typeof(1, 1); + +# error message for wrong function signature (OneOf: fixed number of args of arbitrary types) +statement error Error during planning: No function matches the given name and argument types 'power\(Int64, Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tpower\(Int64, Int64\)\n\tpower\(Float64, Float64\) +SELECT power(1, 2, 3); + +# +# Wrong window/aggregate function signature +# + +# AggregateFunction with wrong number of arguments +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'COUNT\(\)'\. You might need to add explicit type casts\.\n\tCandidate functions:\n\tCOUNT\(Any, \.\., Any\) +select count(); + +# AggregateFunction with wrong number of arguments +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'AVG\(Utf8, Float64\)'\. You might need to add explicit type casts\.\n\tCandidate functions:\n\tAVG\(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64\) +select avg(c1, c12) from aggregate_test_100; + +# AggregateFunction with wrong argument type +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'REGR_SLOPE\(Int64, Utf8\)'\. You might need to add explicit type casts\.\n\tCandidate functions:\n\tREGR_SLOPE\(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64, Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64\) +select regr_slope(1, '2'); + +# WindowFunction using AggregateFunction wrong signature +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'REGR_SLOPE\(Float32, Utf8\)'\. You might need to add explicit type casts\.\n\tCandidate functions:\n\tREGR_SLOPE\(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64, Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64\) +select +c9, +regr_slope(c11, '2') over () as min1 +from aggregate_test_100 +order by c9 + +# WindowFunction with BuiltInWindowFunction wrong signature +statement error DataFusion error: Error during planning: No function matches the given name and argument types 'NTH_VALUE\(Int32, Int64, Int64\)'\. You might need to add explicit type casts\.\n\tCandidate functions:\n\tNTH_VALUE\(Any, Any\) +select +c9, +nth_value(c5, 2, 3) over (order by c9) as nv1 +from aggregate_test_100 +order by c9 diff --git a/datafusion/expr/src/aggregate_function.rs b/datafusion/expr/src/aggregate_function.rs index ec854a6b7db38..fe689b87fafc3 100644 --- a/datafusion/expr/src/aggregate_function.rs +++ b/datafusion/expr/src/aggregate_function.rs @@ -17,6 +17,7 @@ //! Aggregate function module contains all built-in aggregate functions definitions +use crate::utils; use crate::{type_coercion::aggregates::*, Signature, TypeSignature, Volatility}; use arrow::datatypes::{DataType, Field}; use datafusion_common::{plan_err, DataFusionError, Result}; @@ -197,7 +198,16 @@ impl AggregateFunction { self, input_expr_types, &self.signature(), - )?; + ) + // original errors are all related to wrong function signature + // aggregate them for better error message + .map_err(|_| { + DataFusionError::Plan(utils::generate_signature_error_msg( + &format!("{self}"), + self.signature(), + input_expr_types, + )) + })?; match self { AggregateFunction::Count | AggregateFunction::ApproxDistinct => { diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 061d0689cd970..73d3939e0d1af 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -20,7 +20,8 @@ use crate::nullif::SUPPORTED_NULLIF_TYPES; use crate::type_coercion::functions::data_types; use crate::{ - conditional_expressions, struct_expressions, Signature, TypeSignature, Volatility, + conditional_expressions, struct_expressions, utils, Signature, TypeSignature, + Volatility, }; use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit}; use datafusion_common::{plan_err, DataFusionError, Result}; @@ -429,33 +430,6 @@ impl BuiltinScalarFunction { } } - /// Creates a detailed error message for a function with wrong signature. - /// - /// For example, a query like `select round(3.14, 1.1);` would yield: - /// ```text - /// Error during planning: No function matches 'round(Float64, Float64)'. You might need to add explicit type casts. - /// Candidate functions: - /// round(Float64, Int64) - /// round(Float32, Int64) - /// round(Float64) - /// round(Float32) - /// ``` - fn generate_signature_error_msg(&self, input_expr_types: &[DataType]) -> String { - let candidate_signatures = self - .signature() - .type_signature - .to_string_repr() - .iter() - .map(|args_str| format!("\t{self}({args_str})")) - .collect::>() - .join("\n"); - - format!( - "No function matches the given name and argument types '{}({})'. You might need to add explicit type casts.\n\tCandidate functions:\n{}", - self, TypeSignature::join_types(input_expr_types, ", "), candidate_signatures - ) - } - /// Returns the dimension [`DataType`] of [`DataType::List`] if /// treated as a N-dimensional array. /// @@ -488,12 +462,23 @@ impl BuiltinScalarFunction { // or the execution panics. if input_expr_types.is_empty() && !self.supports_zero_argument() { - return plan_err!("{}", self.generate_signature_error_msg(input_expr_types)); + return plan_err!( + "{}", + utils::generate_signature_error_msg( + &format!("{self}"), + self.signature(), + input_expr_types + ) + ); } // verify that this is a valid set of data types for this function data_types(input_expr_types, &self.signature()).map_err(|_| { - DataFusionError::Plan(self.generate_signature_error_msg(input_expr_types)) + DataFusionError::Plan(utils::generate_signature_error_msg( + &format!("{self}"), + self.signature(), + input_expr_types, + )) })?; // the return type of the built in function. diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 8940eeed01236..33a8cd6165139 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -24,6 +24,7 @@ use crate::logical_plan::{ Projection, Repartition, Sort as SortPlan, Subquery, SubqueryAlias, Union, Unnest, Values, Window, }; +use crate::signature::{Signature, TypeSignature}; use crate::{ BinaryExpr, Cast, CreateMemoryTable, CreateView, DdlStatement, DmlStatement, Expr, ExprSchemable, GroupingSet, LogicalPlan, LogicalPlanBuilder, Operator, TableScan, @@ -1286,6 +1287,36 @@ pub fn find_valid_equijoin_key_pair( Ok(join_key_pair) } +/// Creates a detailed error message for a function with wrong signature. +/// +/// For example, a query like `select round(3.14, 1.1);` would yield: +/// ```text +/// Error during planning: No function matches 'round(Float64, Float64)'. You might need to add explicit type casts. +/// Candidate functions: +/// round(Float64, Int64) +/// round(Float32, Int64) +/// round(Float64) +/// round(Float32) +/// ``` +pub fn generate_signature_error_msg( + func_name: &str, + func_signature: Signature, + input_expr_types: &[DataType], +) -> String { + let candidate_signatures = func_signature + .type_signature + .to_string_repr() + .iter() + .map(|args_str| format!("\t{func_name}({args_str})")) + .collect::>() + .join("\n"); + + format!( + "No function matches the given name and argument types '{}({})'. You might need to add explicit type casts.\n\tCandidate functions:\n{}", + func_name, TypeSignature::join_types(input_expr_types, ", "), candidate_signatures + ) +} + #[cfg(test)] mod tests { use super::*; diff --git a/datafusion/expr/src/window_function.rs b/datafusion/expr/src/window_function.rs index 89c59baa4c299..1f36ebdd6b549 100644 --- a/datafusion/expr/src/window_function.rs +++ b/datafusion/expr/src/window_function.rs @@ -22,6 +22,7 @@ use crate::aggregate_function::AggregateFunction; use crate::type_coercion::functions::data_types; +use crate::utils; use crate::{AggregateUDF, Signature, TypeSignature, Volatility, WindowUDF}; use arrow::datatypes::DataType; use datafusion_common::{plan_err, DataFusionError, Result}; @@ -187,7 +188,16 @@ impl BuiltInWindowFunction { // or the execution panics. // verify that this is a valid set of data types for this function - data_types(input_expr_types, &self.signature())?; + data_types(input_expr_types, &self.signature()) + // original errors are all related to wrong function signature + // aggregate them for better error message + .map_err(|_| { + DataFusionError::Plan(utils::generate_signature_error_msg( + &format!("{self}"), + self.signature(), + input_expr_types, + )) + })?; match self { BuiltInWindowFunction::RowNumber diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 9d313e847433a..f0e5f114b7bb0 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -994,7 +994,7 @@ mod test { )); let err = Projection::try_new(vec![agg_expr], empty).err().unwrap(); assert_eq!( - "Plan(\"The function Avg does not support inputs of type Utf8.\")", + "Plan(\"No function matches the given name and argument types 'AVG(Utf8)'. You might need to add explicit type casts.\\n\\tCandidate functions:\\n\\tAVG(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64)\")", &format!("{err:?}") ); Ok(())