From 46b9e4575d46f2ee5d5858ebbd339fe3e947a694 Mon Sep 17 00:00:00 2001 From: nuno-faria Date: Wed, 13 Aug 2025 10:40:40 +0100 Subject: [PATCH 1/2] fix: Implement AggregateUDFImpl::reverse_expr for StringAgg --- .../functions-aggregate/src/string_agg.rs | 4 ++++ .../sqllogictest/test_files/aggregate.slt | 22 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/datafusion/functions-aggregate/src/string_agg.rs b/datafusion/functions-aggregate/src/string_agg.rs index a3a040da3ff7b..3986984b26304 100644 --- a/datafusion/functions-aggregate/src/string_agg.rs +++ b/datafusion/functions-aggregate/src/string_agg.rs @@ -178,6 +178,10 @@ impl AggregateUDFImpl for StringAgg { ))) } + fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF { + datafusion_expr::ReversedUDAF::Reversed(string_agg_udaf()) + } + fn documentation(&self) -> Option<&Documentation> { self.doc() } diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 5c6b104157273..b9b5e4ea004ee 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -6162,6 +6162,28 @@ from t; ---- a,c,d,b +query TT +explain select string_agg(k, ',' order by v) from t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[string_agg(t.k, Utf8(",")) ORDER BY [t.v ASC NULLS LAST]]] +02)--TableScan: t projection=[k, v] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[string_agg(t.k,Utf8(",")) ORDER BY [t.v ASC NULLS LAST]] +02)--SortExec: expr=[v@1 ASC NULLS LAST], preserve_partitioning=[false] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + +query TT +explain select string_agg(k, ',' order by v desc) from t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[string_agg(t.k, Utf8(",")) ORDER BY [t.v DESC NULLS FIRST]]] +02)--TableScan: t projection=[k, v] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[string_agg(t.k,Utf8(",")) ORDER BY [t.v DESC NULLS FIRST]] +02)--SortExec: expr=[v@1 DESC], preserve_partitioning=[false] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + statement ok drop table t; From 84d58ac9380b925b7883ae915a7bcfec70674130 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 5 Sep 2025 14:29:54 -0400 Subject: [PATCH 2/2] Add a test with two invocations of aggregateion --- .../sqllogictest/test_files/aggregate.slt | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 08cca1529f9af..caf8d637ec45e 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -6203,6 +6203,7 @@ from t; ---- a,c,d,b +# Test explain / reverse_expr for string_agg query TT explain select string_agg(k, ',' order by v) from t; ---- @@ -6214,6 +6215,11 @@ physical_plan 02)--SortExec: expr=[v@1 ASC NULLS LAST], preserve_partitioning=[false] 03)----DataSourceExec: partitions=1, partition_sizes=[1] +query T +select string_agg(k, ',' order by v) from t; +---- +c,a,b,d + query TT explain select string_agg(k, ',' order by v desc) from t; ---- @@ -6225,6 +6231,30 @@ physical_plan 02)--SortExec: expr=[v@1 DESC], preserve_partitioning=[false] 03)----DataSourceExec: partitions=1, partition_sizes=[1] +query T +select string_agg(k, ',' order by v desc) from t; +---- +d,b,a,c + +# Call string_agg with both ASC and DESC orderings, and expect only one sort +# (because the aggregate can handle reversed inputs) +query TT +explain select string_agg(k, ',' order by v asc), string_agg(k, ',' order by v desc) from t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[string_agg(t.k, Utf8(",")) ORDER BY [t.v ASC NULLS LAST], string_agg(t.k, Utf8(",")) ORDER BY [t.v DESC NULLS FIRST]]] +02)--TableScan: t projection=[k, v] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[string_agg(t.k,Utf8(",")) ORDER BY [t.v ASC NULLS LAST], string_agg(t.k,Utf8(",")) ORDER BY [t.v DESC NULLS FIRST]] +02)--SortExec: expr=[v@1 ASC NULLS LAST], preserve_partitioning=[false] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + +query TT +select string_agg(k, ',' order by v asc), string_agg(k, ',' order by v desc) from t; +---- +c,a,b,d d,b,a,c + + statement ok drop table t; @@ -7466,4 +7496,3 @@ NULL NULL statement ok drop table distinct_avg; -