From e542a1784aa7ad6ad629eb59db6a38f420955801 Mon Sep 17 00:00:00 2001 From: comphead Date: Tue, 15 Nov 2022 13:19:11 -0800 Subject: [PATCH 1/3] Percentile coerce to f64 from decimal --- .../expr/src/type_coercion/aggregates.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/datafusion/expr/src/type_coercion/aggregates.rs b/datafusion/expr/src/type_coercion/aggregates.rs index 72d6dc3983b29..7510cf6c9f874 100644 --- a/datafusion/expr/src/type_coercion/aggregates.rs +++ b/datafusion/expr/src/type_coercion/aggregates.rs @@ -23,6 +23,8 @@ use std::ops::Deref; use crate::{AggregateFunction, Signature, TypeSignature}; +use super::functions::can_coerce_from; + pub static STRINGS: &[DataType] = &[DataType::Utf8, DataType::LargeUtf8]; pub static NUMERICS: &[DataType] = &[ @@ -166,19 +168,22 @@ pub fn coerce_types( agg_fun, input_types[0] ))); } - if !matches!(input_types[1], DataType::Float64) { - return Err(DataFusionError::Plan(format!( - "The percentile argument for {:?} must be Float64, not {:?}.", - agg_fun, input_types[1] - ))); - } if input_types.len() == 3 && !is_integer_arg_type(&input_types[2]) { return Err(DataFusionError::Plan(format!( "The percentile sample points count for {:?} must be integer, not {:?}.", agg_fun, input_types[2] ))); } - Ok(input_types.to_vec()) + let mut result = input_types.to_vec(); + if can_coerce_from(&DataType::Float64, &input_types[1]) { + result[1] = DataType::Float64; + } else { + return Err(DataFusionError::Plan(format!( + "The percentile argument for {:?} must be Float64, not {:?}.", + agg_fun, input_types[1] + ))); + } + Ok(result) } AggregateFunction::ApproxPercentileContWithWeight => { if !is_approx_percentile_cont_supported_arg_type(&input_types[0]) { From ee0a1be9be2e75d22379ed9a0eecbe07c6279636 Mon Sep 17 00:00:00 2001 From: comphead Date: Wed, 16 Nov 2022 16:29:41 -0800 Subject: [PATCH 2/3] Added test --- datafusion/core/tests/sql/aggregates.rs | 21 +++++++++++++++++++ .../physical-expr/src/aggregate/tdigest.rs | 1 - 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index 4b8a158fb4ff7..1c93c9954a0ce 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -2314,3 +2314,24 @@ async fn aggregate_with_alias() -> Result<()> { ); Ok(()) } + +#[tokio::test] +async fn test_approx_percentile_cont_decimal_support() -> Result<()> { + let ctx = SessionContext::new(); + register_aggregate_csv(&ctx).await?; + let sql = "SELECT c1, approx_percentile_cont(c2, cast(0.85 as decimal(10,2))) apc FROM aggregate_test_100 GROUP BY 1 ORDER BY 1"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----+-----+", + "| c1 | apc |", + "+----+-----+", + "| a | 4 |", + "| b | 5 |", + "| c | 4 |", + "| d | 4 |", + "| e | 4 |", + "+----+-----+", + ]; + assert_batches_eq!(expected, &actual); + Ok(()) +} diff --git a/datafusion/physical-expr/src/aggregate/tdigest.rs b/datafusion/physical-expr/src/aggregate/tdigest.rs index 6457f7025fc7a..56bf04bae4433 100644 --- a/datafusion/physical-expr/src/aggregate/tdigest.rs +++ b/datafusion/physical-expr/src/aggregate/tdigest.rs @@ -231,7 +231,6 @@ impl TDigest { } pub(crate) fn merge_sorted_f64(&self, sorted_values: &[f64]) -> TDigest { - dbg!(&sorted_values); #[cfg(debug_assertions)] debug_assert!(is_sorted(sorted_values), "unsorted input to TDigest"); From ee9c97dc16640e09be74e1092a1cdc8b1d3cfbc6 Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 18 Nov 2022 14:12:59 -0800 Subject: [PATCH 3/3] fix message --- datafusion/expr/src/type_coercion/aggregates.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/type_coercion/aggregates.rs b/datafusion/expr/src/type_coercion/aggregates.rs index 7510cf6c9f874..14f691d979c76 100644 --- a/datafusion/expr/src/type_coercion/aggregates.rs +++ b/datafusion/expr/src/type_coercion/aggregates.rs @@ -179,7 +179,7 @@ pub fn coerce_types( result[1] = DataType::Float64; } else { return Err(DataFusionError::Plan(format!( - "The percentile argument for {:?} must be Float64, not {:?}.", + "Could not coerce the percent argument for {:?} to Float64. Was {:?}.", agg_fun, input_types[1] ))); }