From d52d085f273cc32e3fb6e46f6c4c247cfec78361 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 2 Feb 2025 19:11:13 -0800 Subject: [PATCH 1/2] [SPARK-51064][SQL] Enable `spark.sql.sources.v2.bucketing.enabled` by default --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- .../org/apache/spark/sql/DynamicPartitionPruningSuite.scala | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ef1a6521eb913..b3c7da3685956 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1729,7 +1729,7 @@ object SQLConf { "avoid shuffle if necessary.") .version("3.3.0") .booleanConf - .createWithDefault(false) + .createWithDefault(true) val V2_BUCKETING_PUSH_PART_VALUES_ENABLED = buildConf("spark.sql.sources.v2.bucketing.pushPartValues.enabled") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala index 2c24cc7d570ba..1ed5ea4216a9f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala @@ -1217,7 +1217,8 @@ abstract class DynamicPartitionPruningSuiteBase test("SPARK-32509: Unused Dynamic Pruning filter shouldn't affect " + "canonicalization and exchange reuse") { withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.V2_BUCKETING_ENABLED.key -> "false") { val df = sql( """ WITH view1 as ( | SELECT f.store_id FROM fact_stats f WHERE f.units_sold = 70 From 658b4003bade30d4039d2efb5b551a4ef716b816 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 3 Feb 2025 10:34:36 -0800 Subject: [PATCH 2/2] Update doc too --- docs/sql-performance-tuning.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md index 12b79828e44cb..39efdc3df3645 100644 --- a/docs/sql-performance-tuning.md +++ b/docs/sql-performance-tuning.md @@ -441,7 +441,7 @@ The following SQL properties enable Storage Partition Join in different join que Property NameDefaultMeaningSince Version spark.sql.sources.v2.bucketing.enabled - false + true When true, try to eliminate shuffle by using the partitioning reported by a compatible V2 data source.