From bd22ef0c18f03be160ec1df3e039cf33dff9e3c9 Mon Sep 17 00:00:00 2001 From: geetanshjuneja Date: Fri, 25 Jul 2025 00:25:45 +0530 Subject: [PATCH 1/6] Change AsyncScalarUDFImpl::invoke_async_with_args return type to ColumnarValue --- datafusion-examples/examples/async_udf.rs | 4 ++-- datafusion/expr/src/async_udf.rs | 5 ++--- datafusion/physical-expr/src/async_scalar_function.rs | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/datafusion-examples/examples/async_udf.rs b/datafusion-examples/examples/async_udf.rs index f1fc3f88852c2..3f640a7fb77ef 100644 --- a/datafusion-examples/examples/async_udf.rs +++ b/datafusion-examples/examples/async_udf.rs @@ -199,7 +199,7 @@ impl AsyncScalarUDFImpl for AskLLM { &self, args: ScalarFunctionArgs, _option: &ConfigOptions, - ) -> Result { + ) -> Result { // in a real UDF you would likely want to special case constant // arguments to improve performance, but this example converts the // arguments to arrays for simplicity. @@ -234,6 +234,6 @@ impl AsyncScalarUDFImpl for AskLLM { }) .collect(); - Ok(Arc::new(result_array)) + Ok(ColumnarValue::from(Arc::new(result_array) as ArrayRef)) } } diff --git a/datafusion/expr/src/async_udf.rs b/datafusion/expr/src/async_udf.rs index 24ed124bb2fdf..86fb1579ceb7b 100644 --- a/datafusion/expr/src/async_udf.rs +++ b/datafusion/expr/src/async_udf.rs @@ -16,7 +16,6 @@ // under the License. use crate::{ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl}; -use arrow::array::ArrayRef; use arrow::datatypes::{DataType, FieldRef}; use async_trait::async_trait; use datafusion_common::config::ConfigOptions; @@ -50,7 +49,7 @@ pub trait AsyncScalarUDFImpl: ScalarUDFImpl { &self, args: ScalarFunctionArgs, option: &ConfigOptions, - ) -> Result; + ) -> Result; } /// A scalar UDF that must be invoked using async methods @@ -83,7 +82,7 @@ impl AsyncScalarUDF { &self, args: ScalarFunctionArgs, option: &ConfigOptions, - ) -> Result { + ) -> Result { self.inner.invoke_async_with_args(args, option).await } } diff --git a/datafusion/physical-expr/src/async_scalar_function.rs b/datafusion/physical-expr/src/async_scalar_function.rs index 547b9c13da622..184b24fed7196 100644 --- a/datafusion/physical-expr/src/async_scalar_function.rs +++ b/datafusion/physical-expr/src/async_scalar_function.rs @@ -196,7 +196,7 @@ impl AsyncFuncExpr { ); } - let datas = result_batches + let datas = ColumnarValue::values_to_arrays(&result_batches)? .iter() .map(|b| b.to_data()) .collect::>(); From 522ea9ab9fb0386396ea9dce811b917ab9a04a0f Mon Sep 17 00:00:00 2001 From: geetanshjuneja Date: Fri, 25 Jul 2025 11:33:20 +0530 Subject: [PATCH 2/6] fix docs --- .../library-user-guide/functions/adding-udfs.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/source/library-user-guide/functions/adding-udfs.md b/docs/source/library-user-guide/functions/adding-udfs.md index 5c95cb3301796..5e695600c6d7c 100644 --- a/docs/source/library-user-guide/functions/adding-udfs.md +++ b/docs/source/library-user-guide/functions/adding-udfs.md @@ -444,13 +444,12 @@ impl AsyncScalarUDFImpl for AsyncUpper { } /// This method is called to execute the async UDF and is similar - /// to the normal `invoke_with_args` except it returns an `ArrayRef` - /// instead of `ColumnarValue` and is `async`. + /// to the normal `invoke_with_args` except it is `async`. async fn invoke_async_with_args( &self, args: ScalarFunctionArgs, _option: &ConfigOptions, - ) -> Result { + ) -> Result { let value = &args.args[0]; // This function simply implements a simple string to uppercase conversion // but can be used for any async operation such as network calls. @@ -465,7 +464,7 @@ impl AsyncScalarUDFImpl for AsyncUpper { } _ => return internal_err!("Expected a string argument, got {:?}", value), }; - Ok(result) + Ok(ColumnarValue::from(result)) } } ``` @@ -550,7 +549,7 @@ We can now transfer the async UDF into the normal scalar using `into_scalar_udf` # &self, # args: ScalarFunctionArgs, # _option: &ConfigOptions, -# ) -> Result { +# ) -> Result { # trace!("Invoking async_upper with args: {:?}", args); # let value = &args.args[0]; # let result = match value { @@ -564,7 +563,7 @@ We can now transfer the async UDF into the normal scalar using `into_scalar_udf` # } # _ => return internal_err!("Expected a string argument, got {:?}", value), # }; -# Ok(result) +# Ok(ColumnarValue::from(result)) # } # } use datafusion::execution::context::SessionContext; From 51a103f7e77e2cc556805fc68d63573478f123c4 Mon Sep 17 00:00:00 2001 From: geetanshjuneja Date: Mon, 28 Jul 2025 21:12:57 +0530 Subject: [PATCH 3/6] cargo fmt --- datafusion/expr/src/async_udf.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/expr/src/async_udf.rs b/datafusion/expr/src/async_udf.rs index e4a0b872654fd..5a42bfa74edc1 100644 --- a/datafusion/expr/src/async_udf.rs +++ b/datafusion/expr/src/async_udf.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. - use crate::utils::{arc_ptr_eq, arc_ptr_hash}; use crate::{ udf_equals_hash, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, From 2ee1ccd7baf2ceac388c03cd6a18aee87cf0c58a Mon Sep 17 00:00:00 2001 From: geetanshjuneja Date: Mon, 28 Jul 2025 21:36:38 +0530 Subject: [PATCH 4/6] cargo clippy --- datafusion/expr/src/async_udf.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/expr/src/async_udf.rs b/datafusion/expr/src/async_udf.rs index 5a42bfa74edc1..02199e065af8c 100644 --- a/datafusion/expr/src/async_udf.rs +++ b/datafusion/expr/src/async_udf.rs @@ -19,7 +19,6 @@ use crate::utils::{arc_ptr_eq, arc_ptr_hash}; use crate::{ udf_equals_hash, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, }; -use arrow::array::ArrayRef; use arrow::datatypes::{DataType, FieldRef}; use async_trait::async_trait; use datafusion_common::config::ConfigOptions; From a405e58b152d87366b5e4993f9e1dcef9904789a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 29 Jul 2025 14:03:13 -0400 Subject: [PATCH 5/6] Add a note in the upgrade guide --- docs/source/library-user-guide/upgrading.md | 42 +++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index 268b156608dc2..4042bc2ace213 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -24,6 +24,48 @@ **Note:** DataFusion `50.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version. You can see the current [status of the `50.0.0 `release here](https://github.com/apache/datafusion/issues/16799) +### `AsyncScalarUDFImpl::invoke_async_with_args` returns `ColumnarValue` + +In order to enable single value optimizations and be consistent with other +user defined function APIs, the `AsyncScalarUDFImpl::invoke_async_with_args` method now +returns a `ColumnarValue` instead of a `ArrayRef`. + +To upgrade, change the return type of your implementation + +```rust +# /* comment to avoid running +impl AsyncScalarUDFImpl for AskLLM { + async fn invoke_async_with_args( + &self, + args: ScalarFunctionArgs, + _option: &ConfigOptions, + ) -> Result { + .. + return array_ref; // old code + } +} +# */ +``` + +To return a `ColumnarValue` + +```rust +# /* comment to avoid running +impl AsyncScalarUDFImpl for AskLLM { + async fn invoke_async_with_args( + &self, + args: ScalarFunctionArgs, + _option: &ConfigOptions, + ) -> Result { + .. + return ColumnarValue::from(array_ref); // new code + } +} +# */ +``` + +See [#16896](https://github.com/apache/datafusion/issues/16896) for more details. + ## DataFusion `49.0.0` ### `MSRV` updated to 1.85.1 From 418e82ea8085eb5c0e1eeadc93ee6c93cb7099bc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 5 Aug 2025 15:36:18 -0400 Subject: [PATCH 6/6] Fix merge blunder --- docs/source/library-user-guide/upgrading.md | 70 +++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index ecfbbbbc0bc18..e18b6682b8e98 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -66,6 +66,76 @@ impl AsyncScalarUDFImpl for AskLLM { See [#16896](https://github.com/apache/datafusion/issues/16896) for more details. +### `SessionState`, `SessionConfig`, and `OptimizerConfig` returns `&Arc` instead of `&ConfigOptions` + +To provide broader access to `ConfigOptions` and reduce required clones, some +APIs have been changed to return a `&Arc` instead of a +`&ConfigOptions`. This allows sharing the same `ConfigOptions` across multiple +threads without needing to clone the entire `ConfigOptions` structure unless it +is modified. + +Most users will not be impacted by this change since the Rust compiler typically +automatically dereference the `Arc` when needed. However, in some cases you may +have to change your code to explicitly call `as_ref()` for example, from + +```rust +# /* comment to avoid running +let optimizer_config: &ConfigOptions = state.options(); +# */ +``` + +To + +```rust +# /* comment to avoid running +let optimizer_config: &ConfigOptions = state.options().as_ref(); +# */ +``` + +See PR [#16970](https://github.com/apache/datafusion/pull/16970) + +### API Change to `AsyncScalarUDFImpl::invoke_async_with_args` + +The `invoke_async_with_args` method of the `AsyncScalarUDFImpl` trait has been +updated to remove the `_option: &ConfigOptions` parameter to simplify the API +now that the `ConfigOptions` can be accessed through the `ScalarFunctionArgs` +parameter. + +You can change your code like this + +```rust +# /* comment to avoid running +impl AsyncScalarUDFImpl for AskLLM { + async fn invoke_async_with_args( + &self, + args: ScalarFunctionArgs, + _option: &ConfigOptions, + ) -> Result { + .. + } + ... +} +# */ +``` + +To this: + +```rust +# /* comment to avoid running + +impl AsyncScalarUDFImpl for AskLLM { + async fn invoke_async_with_args( + &self, + args: ScalarFunctionArgs, + ) -> Result { + let options = &args.config_options; + .. + } + ... +} +# */ +``` + ### Upgrade to arrow `56.0.0` and parquet `56.0.0` This version of DataFusion upgrades the underlying Apache Arrow implementation