From 390b98a98a11ef9489e5f842985a550c45c12abe Mon Sep 17 00:00:00 2001 From: logan-keede Date: Sat, 18 Jan 2025 02:27:38 +0530 Subject: [PATCH 1/4] deprecate max_statistics_size --- datafusion/common/src/config.rs | 12 ++++++++++++ datafusion/common/src/file_options/parquet_writer.rs | 9 ++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 8d2742aaafe5b..5df97aacca10f 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -112,19 +112,23 @@ use crate::{DataFusionError, Result}; macro_rules! config_namespace { ( $(#[doc = $struct_d:tt])* + $(#[allow($($struct_depr:tt)*)])? $vis:vis struct $struct_name:ident { $( $(#[doc = $d:tt])* + $(#[allow($($field_depr:tt)*)])? $field_vis:vis $field_name:ident : $field_type:ty, $(warn = $warn: expr,)? $(transform = $transform:expr,)? default = $default:expr )*$(,)* } ) => { $(#[doc = $struct_d])* + $(#[allow($($struct_depr)*)])? #[derive(Debug, Clone, PartialEq)] $vis struct $struct_name{ $( $(#[doc = $d])* + $(#[allow($($field_depr)*)])? $field_vis $field_name : $field_type, )* } @@ -378,6 +382,7 @@ config_namespace! { /// See also: [`SessionConfig`] /// /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html + // #[derive(Debug)] pub struct ParquetOptions { // The following options affect reading parquet files @@ -467,6 +472,7 @@ config_namespace! { /// (writing) Sets max statistics size for any column. If NULL, uses /// default parquet writer setting + #[allow(deprecated)] pub max_statistics_size: Option, default = Some(4096) /// (writing) Target maximum number of rows in each row group (defaults to 1M @@ -1598,19 +1604,23 @@ impl ConfigField for TableParquetOptions { macro_rules! config_namespace_with_hashmap { ( $(#[doc = $struct_d:tt])* + $(#[allow($($struct_depr:tt)*)])? $vis:vis struct $struct_name:ident { $( $(#[doc = $d:tt])* + $(#[allow($($field_depr:tt)*)])? $field_vis:vis $field_name:ident : $field_type:ty, $(transform = $transform:expr,)? default = $default:expr )*$(,)* } ) => { $(#[doc = $struct_d])* + $(#[allow($($struct_depr)*)])? #[derive(Debug, Clone, PartialEq)] $vis struct $struct_name{ $( $(#[doc = $d])* + $(#[allow($($field_depr)*)])? $field_vis $field_name : $field_type, )* } @@ -1669,6 +1679,7 @@ macro_rules! config_namespace_with_hashmap { $( let key = format!("{}.{field}::{}", key_prefix, column_name, field = stringify!($field_name)); let desc = concat!($($d),*).trim(); + #[allow(deprecated)] col_options.$field_name.visit(v, key.as_str(), desc); )* } @@ -1720,6 +1731,7 @@ config_namespace_with_hashmap! { /// Sets max statistics size for the column path. If NULL, uses /// default parquet options + #[allow(deprecated)] pub max_statistics_size: Option, default = None } } diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index 46bce06470f38..a4e24384ca6fd 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -156,9 +156,14 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder { builder.set_column_bloom_filter_ndv(path.clone(), bloom_filter_ndv); } + //max_statistics_size is deprecated as per latest arrow version. + #[allow(deprecated)] if let Some(max_statistics_size) = options.max_statistics_size { builder = - builder.set_column_max_statistics_size(path, max_statistics_size); + { + #[allow(deprecated)] + builder.set_column_max_statistics_size(path, max_statistics_size) + } } } @@ -207,6 +212,8 @@ impl ParquetOptions { dictionary_enabled, dictionary_page_size_limit, statistics_enabled, + + #[allow(deprecated)] max_statistics_size, max_row_group_size, created_by, From 4a43991cecd5b8dfc88b2e9b6cd20da4e1bef222 Mon Sep 17 00:00:00 2001 From: logan-keede <68557630+logan-keede@users.noreply.github.com> Date: Sat, 18 Jan 2025 02:41:45 +0530 Subject: [PATCH 2/4] Update parquet_writer.rs --- datafusion/common/src/file_options/parquet_writer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index a4e24384ca6fd..d528588adac82 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -161,7 +161,7 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder { if let Some(max_statistics_size) = options.max_statistics_size { builder = { - #[allow(deprecated)] + #[allow(deprecated)] builder.set_column_max_statistics_size(path, max_statistics_size) } } From 79969b33bca63ec094c8432dc4488a9729c000de Mon Sep 17 00:00:00 2001 From: logan-keede <68557630+logan-keede@users.noreply.github.com> Date: Sat, 18 Jan 2025 02:45:07 +0530 Subject: [PATCH 3/4] resolving conflict --- datafusion/common/src/file_options/parquet_writer.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index d528588adac82..b080ec3d22bc9 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -159,8 +159,7 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder { //max_statistics_size is deprecated as per latest arrow version. #[allow(deprecated)] if let Some(max_statistics_size) = options.max_statistics_size { - builder = - { + builder = { #[allow(deprecated)] builder.set_column_max_statistics_size(path, max_statistics_size) } From 41fe7c33665250221b3ae853c856df0c783855c1 Mon Sep 17 00:00:00 2001 From: logan-keede Date: Sat, 18 Jan 2025 11:47:01 +0530 Subject: [PATCH 4/4] formatting fix --- datafusion/common/src/file_options/parquet_writer.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index a4e24384ca6fd..b080ec3d22bc9 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -159,9 +159,8 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder { //max_statistics_size is deprecated as per latest arrow version. #[allow(deprecated)] if let Some(max_statistics_size) = options.max_statistics_size { - builder = - { - #[allow(deprecated)] + builder = { + #[allow(deprecated)] builder.set_column_max_statistics_size(path, max_statistics_size) } }