From febb41a5c540c0b71d1d3015a13a69707f521175 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 23 Sep 2025 08:44:15 -0400 Subject: [PATCH] Improve documentation for ordered set aggregate functions --- datafusion/expr/src/udaf.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index 2243682cfd021..389635daf8b7f 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -740,7 +740,20 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync { } /// If this function is ordered-set aggregate function, return true - /// If the function is not, return false + /// otherwise, return false + /// + /// Ordered-set aggregate functions require an explicit `ORDER BY` clause + /// because the calculation performed by these functions is dependent on the + /// specific sequence of the input rows, unlike other aggregate functions + /// like `SUM`, `AVG`, or `COUNT`. + /// + /// An example of an ordered-set aggregate function is `percentile_cont` + /// which computes a specific percentile value from a sorted list of values, and + /// is only meaningful when the input data is ordered. + /// + /// In SQL syntax, ordered-set aggregate functions are used with the + /// `WITHIN GROUP (ORDER BY ...)` clause to specify the ordering of the input + /// data. fn is_ordered_set_aggregate(&self) -> bool { false }