diff --git a/datafusion-examples/examples/advanced_udf.rs b/datafusion-examples/examples/advanced_udf.rs index c8063c0eb1e33..795946422d26b 100644 --- a/datafusion-examples/examples/advanced_udf.rs +++ b/datafusion-examples/examples/advanced_udf.rs @@ -187,7 +187,7 @@ impl ScalarUDFImpl for PowUdf { } fn monotonicity(&self) -> Result> { - Ok(Some(vec![Some(true)])) + Ok(Some(FuncMonotonicity::Increasing)) } } diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index e2505d6fd65f8..e3dcc4d99bc21 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -346,13 +346,60 @@ impl Signature { } } -/// Monotonicity of the `ScalarFunctionExpr` with respect to its arguments. -/// Each element of this vector corresponds to an argument and indicates whether -/// the function's behavior is monotonic, or non-monotonic/unknown for that argument, namely: -/// - `None` signifies unknown monotonicity or non-monotonicity. -/// - `Some(true)` indicates that the function is monotonically increasing w.r.t. the argument in question. -/// - Some(false) indicates that the function is monotonically decreasing w.r.t. the argument in question. -pub type FuncMonotonicity = Vec>; +/// Monotonicity of a function with respect to its arguments. +/// +/// A function is [monotonic] if it preserves the relative order of its inputs. +/// +/// [monotonic]: https://en.wikipedia.org/wiki/Monotonic_function +#[derive(Debug, Clone)] +pub enum FuncMonotonicity { + /// not monotonic or unknown monotonicity + None, + /// Increasing with respect to all of its arguments + Increasing, + /// Decreasing with respect to all of its arguments + Decreasing, + /// Each element of this vector corresponds to an argument and indicates whether + /// the function's behavior is monotonic, or non-monotonic/unknown for that argument, namely: + /// - `None` signifies unknown monotonicity or non-monotonicity. + /// - `Some(true)` indicates that the function is monotonically increasing w.r.t. the argument in question. + /// - Some(false) indicates that the function is monotonically decreasing w.r.t. the argument in question. + Mixed(Vec>), +} + +impl PartialEq for FuncMonotonicity { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (FuncMonotonicity::None, FuncMonotonicity::None) => true, + (FuncMonotonicity::Increasing, FuncMonotonicity::Increasing) => true, + (FuncMonotonicity::Decreasing, FuncMonotonicity::Decreasing) => true, + (FuncMonotonicity::Mixed(vec1), FuncMonotonicity::Mixed(vec2)) => { + vec1 == vec2 + } + _ => false, + } + } +} + +impl FuncMonotonicity { + pub fn matches(&self, other: &Self) -> bool { + match (self, other) { + (FuncMonotonicity::None, FuncMonotonicity::Mixed(inner_vec)) + | (FuncMonotonicity::Mixed(inner_vec), FuncMonotonicity::None) => { + inner_vec.iter().all(|&x| x.is_none()) + } + (FuncMonotonicity::Increasing, FuncMonotonicity::Mixed(inner_vec)) + | (FuncMonotonicity::Mixed(inner_vec), FuncMonotonicity::Increasing) => { + inner_vec.iter().all(|&x| x == Some(true)) + } + (FuncMonotonicity::Decreasing, FuncMonotonicity::Mixed(inner_vec)) + | (FuncMonotonicity::Mixed(inner_vec), FuncMonotonicity::Decreasing) => { + inner_vec.iter().all(|&x| x == Some(false)) + } + _ => self == other, + } + } +} #[cfg(test)] mod tests { diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index da1797cdae817..df3c7aff1881c 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -147,7 +147,7 @@ impl ScalarUDFImpl for DateBinFunc { } fn monotonicity(&self) -> Result> { - Ok(Some(vec![None, Some(true)])) + Ok(Some(FuncMonotonicity::Mixed(vec![None, Some(true)]))) } } diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 0414bf9c2a264..29d318c8752dd 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -206,7 +206,7 @@ impl ScalarUDFImpl for DateTruncFunc { } fn monotonicity(&self) -> Result> { - Ok(Some(vec![None, Some(true)])) + Ok(Some(FuncMonotonicity::Mixed(vec![None, Some(true)]))) } } diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index f451321ea1201..5009d900007b8 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -84,7 +84,7 @@ impl ScalarUDFImpl for LogFunc { } fn monotonicity(&self) -> Result> { - Ok(Some(vec![Some(true), Some(false)])) + Ok(Some(FuncMonotonicity::Mixed(vec![Some(true), Some(false)]))) } // Support overloaded log(base, x) and log(x) which defaults to log(10, x) diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs index b6e8d26b64607..cd899782e24de 100644 --- a/datafusion/functions/src/math/mod.rs +++ b/datafusion/functions/src/math/mod.rs @@ -38,29 +38,83 @@ pub mod trunc; // Create UDFs make_udf_function!(abs::AbsFunc, ABS, abs); make_math_unary_udf!(AcosFunc, ACOS, acos, acos, None); -make_math_unary_udf!(AcoshFunc, ACOSH, acosh, acosh, Some(vec![Some(true)])); +make_math_unary_udf!( + AcoshFunc, + ACOSH, + acosh, + acosh, + Some(FuncMonotonicity::Increasing) +); make_math_unary_udf!(AsinFunc, ASIN, asin, asin, None); -make_math_unary_udf!(AsinhFunc, ASINH, asinh, asinh, Some(vec![Some(true)])); -make_math_unary_udf!(AtanFunc, ATAN, atan, atan, Some(vec![Some(true)])); -make_math_unary_udf!(AtanhFunc, ATANH, atanh, atanh, Some(vec![Some(true)])); -make_math_binary_udf!(Atan2, ATAN2, atan2, atan2, Some(vec![Some(true)])); +make_math_unary_udf!( + AsinhFunc, + ASINH, + asinh, + asinh, + Some(FuncMonotonicity::Increasing) +); +make_math_unary_udf!( + AtanFunc, + ATAN, + atan, + atan, + Some(FuncMonotonicity::Increasing) +); +make_math_unary_udf!( + AtanhFunc, + ATANH, + atanh, + atanh, + Some(FuncMonotonicity::Increasing) +); +make_math_binary_udf!( + Atan2, + ATAN2, + atan2, + atan2, + Some(FuncMonotonicity::Increasing) +); make_math_unary_udf!(CbrtFunc, CBRT, cbrt, cbrt, None); -make_math_unary_udf!(CeilFunc, CEIL, ceil, ceil, Some(vec![Some(true)])); +make_math_unary_udf!( + CeilFunc, + CEIL, + ceil, + ceil, + Some(FuncMonotonicity::Increasing) +); make_math_unary_udf!(CosFunc, COS, cos, cos, None); make_math_unary_udf!(CoshFunc, COSH, cosh, cosh, None); make_udf_function!(cot::CotFunc, COT, cot); make_math_unary_udf!(DegreesFunc, DEGREES, degrees, to_degrees, None); -make_math_unary_udf!(ExpFunc, EXP, exp, exp, Some(vec![Some(true)])); +make_math_unary_udf!(ExpFunc, EXP, exp, exp, Some(FuncMonotonicity::Increasing)); make_udf_function!(factorial::FactorialFunc, FACTORIAL, factorial); -make_math_unary_udf!(FloorFunc, FLOOR, floor, floor, Some(vec![Some(true)])); +make_math_unary_udf!( + FloorFunc, + FLOOR, + floor, + floor, + Some(FuncMonotonicity::Increasing) +); make_udf_function!(log::LogFunc, LOG, log); make_udf_function!(gcd::GcdFunc, GCD, gcd); make_udf_function!(nans::IsNanFunc, ISNAN, isnan); make_udf_function!(iszero::IsZeroFunc, ISZERO, iszero); make_udf_function!(lcm::LcmFunc, LCM, lcm); -make_math_unary_udf!(LnFunc, LN, ln, ln, Some(vec![Some(true)])); -make_math_unary_udf!(Log2Func, LOG2, log2, log2, Some(vec![Some(true)])); -make_math_unary_udf!(Log10Func, LOG10, log10, log10, Some(vec![Some(true)])); +make_math_unary_udf!(LnFunc, LN, ln, ln, Some(FuncMonotonicity::Increasing)); +make_math_unary_udf!( + Log2Func, + LOG2, + log2, + log2, + Some(FuncMonotonicity::Increasing) +); +make_math_unary_udf!( + Log10Func, + LOG10, + log10, + log10, + Some(FuncMonotonicity::Increasing) +); make_udf_function!(nanvl::NanvlFunc, NANVL, nanvl); make_udf_function!(pi::PiFunc, PI, pi); make_udf_function!(power::PowerFunc, POWER, power); diff --git a/datafusion/functions/src/math/pi.rs b/datafusion/functions/src/math/pi.rs index f9403e411fe26..a840baf93bcf7 100644 --- a/datafusion/functions/src/math/pi.rs +++ b/datafusion/functions/src/math/pi.rs @@ -71,6 +71,6 @@ impl ScalarUDFImpl for PiFunc { } fn monotonicity(&self) -> Result> { - Ok(Some(vec![Some(true)])) + Ok(Some(FuncMonotonicity::Increasing)) } } diff --git a/datafusion/functions/src/math/round.rs b/datafusion/functions/src/math/round.rs index f4a163137a35b..7865fa1255c14 100644 --- a/datafusion/functions/src/math/round.rs +++ b/datafusion/functions/src/math/round.rs @@ -81,7 +81,7 @@ impl ScalarUDFImpl for RoundFunc { } fn monotonicity(&self) -> Result> { - Ok(Some(vec![Some(true)])) + Ok(Some(FuncMonotonicity::Increasing)) } } diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs index 6f88099889cc4..d5bbf5a9a74ed 100644 --- a/datafusion/functions/src/math/trunc.rs +++ b/datafusion/functions/src/math/trunc.rs @@ -87,7 +87,7 @@ impl ScalarUDFImpl for TruncFunc { } fn monotonicity(&self) -> Result> { - Ok(Some(vec![Some(true)])) + Ok(Some(FuncMonotonicity::Increasing)) } } diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index 6b84b81e9faef..383e7ace70d01 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -251,10 +251,23 @@ pub fn out_ordering( func: &FuncMonotonicity, arg_orderings: &[SortProperties], ) -> SortProperties { - func.iter().zip(arg_orderings).fold( + arg_orderings.iter().enumerate().fold( SortProperties::Singleton, - |prev_sort, (item, arg)| { - let current_sort = func_order_in_one_dimension(item, arg); + |prev_sort, (index, arg)| { + let arg_monotonicity: Option = match func { + FuncMonotonicity::None => None, + FuncMonotonicity::Increasing => Some(true), + FuncMonotonicity::Decreasing => Some(false), + FuncMonotonicity::Mixed(inner_vec) => { + if inner_vec.len() > index { + inner_vec[index] + } else { + None + } + } + }; + + let current_sort = func_order_in_one_dimension(&arg_monotonicity, arg); match (prev_sort, current_sort) { (_, SortProperties::Unordered) => SortProperties::Unordered, @@ -299,3 +312,39 @@ fn func_order_in_one_dimension( } } } + +#[cfg(test)] +mod tests { + use arrow_schema::Schema; + + use datafusion_common::{DFSchema, Result}; + use datafusion_expr::{FuncMonotonicity, ScalarUDF}; + + use crate::utils::tests::TestScalarUDF; + use crate::ScalarFunctionExpr; + + use super::create_physical_expr; + + #[test] + fn test_function_expr() -> Result<()> { + let udf = ScalarUDF::from(TestScalarUDF::new()); + + let e = crate::expressions::lit(1.1); + let p_expr = + create_physical_expr(&udf, &[e], &Schema::empty(), &[], &DFSchema::empty())?; + let expr_monotonicity = p_expr + .as_any() + .downcast_ref::() + .unwrap() + .monotonicity(); + + assert_eq!(expr_monotonicity, &Some(FuncMonotonicity::Increasing)); + + assert!(expr_monotonicity + .as_ref() + .unwrap() + .matches(&FuncMonotonicity::Mixed(vec![Some(true)]))); + + Ok(()) + } +} diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs index 76cee3a1a786f..c2f884b5fad7e 100644 --- a/datafusion/physical-expr/src/utils/mod.rs +++ b/datafusion/physical-expr/src/utils/mod.rs @@ -310,7 +310,7 @@ pub(crate) mod tests { } fn monotonicity(&self) -> Result> { - Ok(Some(vec![Some(true)])) + Ok(Some(FuncMonotonicity::Increasing)) } fn invoke(&self, args: &[ColumnarValue]) -> Result {