From b8c9f9852361f8dbe4e9e4505b1351943dac9d00 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Thu, 17 Jul 2025 19:07:38 +0800 Subject: [PATCH 1/5] Update upgrade md for new unified config for sql string mapping to utf8view --- docs/source/library-user-guide/upgrading.md | 47 +++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index a28686e01fc39..ddedc8c56f1ae 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -120,6 +120,53 @@ SET datafusion.execution.spill_compression = 'zstd'; For more details about this configuration option, including performance trade-offs between different compression codecs, see the [Configuration Settings](../user-guide/configs.md) documentation. +### Deprecated `map_varchar_to_utf8view` +The old configuration + +```text +datafusion.sql_parser.map_varchar_to_utf8view +``` + +is now **deprecated** in favor of the unified option below.\ +If you previously used this to control only `VARCHAR`→`Utf8View` mapping, please migrate to `map_string_types_to_utf8view`. + +--- + +### New `map_string_types_to_utf8view` configuration option + +To unify **all** SQL string types (`CHAR`, `VARCHAR`, `TEXT`, `STRING`) to Arrow’s zero‑copy `Utf8View`, DataFusion 49.0.0 introduces: + +- **Key**: `datafusion.sql_parser.map_string_types_to_utf8view` +- **Default**: `true` + +**Description:** + +- When **true** (default), **all** SQL string types are mapped to `Utf8View`, avoiding full‑copy UTF‑8 allocations and improving performance. +- When **false**, DataFusion falls back to the legacy `Utf8` mapping for **all** string types. + +#### Examples + +```rust +# /* comment to avoid running +// Disable Utf8View mapping for all SQL string types +let opts = datafusion::sql::planner::ParserOptions::new() + .with_map_string_types_to_utf8view(false); + +// Verify the setting is applied +assert!(!opts.map_string_types_to_utf8view); +# */ +``` +--- + +```sql +-- Disable Utf8View mapping globally +SET datafusion.sql_parser.map_string_types_to_utf8view = false; + +-- Now VARCHAR, CHAR, TEXT, STRING all use Utf8 rather than Utf8View +CREATE TABLE my_table (a VARCHAR, b TEXT, c STRING); +DESCRIBE my_table; +``` + ## DataFusion `48.0.1` ### `datafusion.execution.collect_statistics` now defaults to `true` From eec79313fc49f7cb86f602f171bf9169fbe5194b Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Thu, 17 Jul 2025 19:14:16 +0800 Subject: [PATCH 2/5] fix formtat --- docs/source/library-user-guide/upgrading.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index ae4276680785d..41a561e48b89d 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -121,6 +121,7 @@ SET datafusion.execution.spill_compression = 'zstd'; For more details about this configuration option, including performance trade-offs between different compression codecs, see the [Configuration Settings](../user-guide/configs.md) documentation. ### Deprecated `map_varchar_to_utf8view` + See [issue #16290](https://github.com/apache/datafusion/pull/16290) for more information The old configuration From 2ea9074859350216e74f9e1f8397d59c7f99a774 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Thu, 17 Jul 2025 19:20:00 +0800 Subject: [PATCH 3/5] fix --- docs/source/library-user-guide/upgrading.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index 41a561e48b89d..01b7985d1c8fc 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -158,6 +158,7 @@ let opts = datafusion::sql::planner::ParserOptions::new() assert!(!opts.map_string_types_to_utf8view); # */ ``` + --- ```sql From e97273a47892cf6c3c55b5282d1958a1d44e2336 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Thu, 17 Jul 2025 19:34:23 +0800 Subject: [PATCH 4/5] format --- docs/source/library-user-guide/upgrading.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index 01b7985d1c8fc..be2521c575571 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -120,7 +120,7 @@ SET datafusion.execution.spill_compression = 'zstd'; For more details about this configuration option, including performance trade-offs between different compression codecs, see the [Configuration Settings](../user-guide/configs.md) documentation. -### Deprecated `map_varchar_to_utf8view` +### Deprecated `map_varchar_to_utf8view` See [issue #16290](https://github.com/apache/datafusion/pull/16290) for more information The old configuration From 6803a3af57e50fc699bb7b81c691dabdd4c4b17b Mon Sep 17 00:00:00 2001 From: Oleks V Date: Thu, 17 Jul 2025 08:34:47 -0700 Subject: [PATCH 5/5] Update docs/source/library-user-guide/upgrading.md --- docs/source/library-user-guide/upgrading.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index be2521c575571..e6a2f06305c1b 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -120,7 +120,7 @@ SET datafusion.execution.spill_compression = 'zstd'; For more details about this configuration option, including performance trade-offs between different compression codecs, see the [Configuration Settings](../user-guide/configs.md) documentation. -### Deprecated `map_varchar_to_utf8view` +### Deprecated `map_varchar_to_utf8view` configuration option See [issue #16290](https://github.com/apache/datafusion/pull/16290) for more information The old configuration