-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Unify most of SessionConfig settings into ConfigOptions
#4492
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,28 @@ use std::env; | |
| use std::fmt::{Debug, Formatter}; | ||
| use std::sync::Arc; | ||
|
|
||
| /// Configuration option "datafusion.execution.target_partitions" | ||
| pub const OPT_TARGET_PARTITIONS: &str = "datafusion.execution.target_partitions"; | ||
|
|
||
| /// Configuration option "datafusion.catalog.create_default_catalog_and_schema" | ||
| pub const OPT_CREATE_DEFAULT_CATALOG_AND_SCHEMA: &str = | ||
| "datafusion.catalog.create_default_catalog_and_schema"; | ||
| /// Configuration option "datafusion.catalog.information_schema" | ||
| pub const OPT_INFORMATION_SCHEMA: &str = "datafusion.catalog.information_schema"; | ||
|
|
||
| /// Configuration option "datafusion.optimizer.repartition_joins" | ||
| pub const OPT_REPARTITION_JOINS: &str = "datafusion.optimizer.repartition_joins"; | ||
|
|
||
| /// Configuration option "datafusion.optimizer.repartition_aggregations" | ||
| pub const OPT_REPARTITION_AGGREGATIONS: &str = | ||
| "datafusion.optimizer.repartition_aggregations"; | ||
|
|
||
| /// Configuration option "datafusion.optimizer.repartition_windows" | ||
| pub const OPT_REPARTITION_WINDOWS: &str = "datafusion.optimizer.repartition_windows"; | ||
|
|
||
| /// Configuration option "datafusion.execuction_collect_statistics" | ||
| pub const OPT_COLLECT_STATISTICS: &str = "datafusion.execuction_collect_statistics"; | ||
|
|
||
| /// Configuration option "datafusion.optimizer.filter_null_join_keys" | ||
| pub const OPT_FILTER_NULL_JOIN_KEYS: &str = "datafusion.optimizer.filter_null_join_keys"; | ||
|
Comment on lines
+31
to
53
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any reason we couldn't just make these an enum? Then we could make the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🤔 that is an excellent idea -- I don't think there is any reason we could not do so. I will file a follow on ticket to do so
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
|
|
@@ -199,7 +221,54 @@ impl BuiltInConfigs { | |
| /// configuration options | ||
| pub fn new() -> Self { | ||
| Self { | ||
| config_definitions: vec![ConfigDefinition::new_bool( | ||
| config_definitions: vec![ConfigDefinition::new_u64( | ||
| OPT_TARGET_PARTITIONS, | ||
| "Number of partitions for query execution. Increasing partitions can increase \ | ||
| concurrency. Defaults to the number of cpu cores on the system.", | ||
| num_cpus::get() as u64, | ||
| ), | ||
|
|
||
| ConfigDefinition::new_bool( | ||
| OPT_CREATE_DEFAULT_CATALOG_AND_SCHEMA, | ||
| "Whether the default catalog and schema should be created automatically.", | ||
| true | ||
| ), | ||
|
|
||
| ConfigDefinition::new_bool( | ||
| OPT_INFORMATION_SCHEMA, | ||
| "Should DataFusion provide access to `information_schema` \ | ||
| virtual tables for displaying schema information", | ||
| false | ||
| ), | ||
|
|
||
| ConfigDefinition::new_bool( | ||
| OPT_REPARTITION_JOINS, | ||
| "Should DataFusion repartition data using the join keys to execute joins in parallel \ | ||
| using the provided `target_partitions` level", | ||
| true | ||
| ), | ||
|
|
||
| ConfigDefinition::new_bool( | ||
| OPT_REPARTITION_AGGREGATIONS, | ||
| "Should DataFusion repartition data using the aggregate keys to execute aggregates \ | ||
| in parallel using the provided `target_partitions` level", | ||
| true | ||
| ), | ||
|
|
||
| ConfigDefinition::new_bool( | ||
| OPT_REPARTITION_WINDOWS, | ||
| "Should DataFusion collect statistics after listing files", | ||
| true | ||
| ), | ||
|
|
||
| ConfigDefinition::new_bool( | ||
| OPT_COLLECT_STATISTICS, | ||
| "Should DataFusion repartition data using the partitions keys to execute window \ | ||
| functions in parallel using the provided `target_partitions` level", | ||
| false | ||
| ), | ||
|
|
||
| ConfigDefinition::new_bool( | ||
| OPT_FILTER_NULL_JOIN_KEYS, | ||
| "When set to true, the optimizer will insert filters before a join between \ | ||
| a nullable and non-nullable column to filter out nulls on the nullable side. This \ | ||
|
|
@@ -336,11 +405,14 @@ impl BuiltInConfigs { | |
| let configs = Self::new(); | ||
| let mut docs = "| key | type | default | description |\n".to_string(); | ||
| docs += "|-----|------|---------|-------------|\n"; | ||
| for config in configs | ||
|
|
||
| let config_definitions: Vec<_> = configs | ||
| .config_definitions | ||
| .iter() | ||
| .sorted_by_key(|c| c.key.as_str()) | ||
| { | ||
| .into_iter() | ||
| .map(normalize_for_display) | ||
| .collect(); | ||
|
|
||
| for config in config_definitions.iter().sorted_by_key(|c| c.key.as_str()) { | ||
| let _ = writeln!( | ||
| &mut docs, | ||
| "| {} | {} | {} | {} |", | ||
|
|
@@ -351,6 +423,16 @@ impl BuiltInConfigs { | |
| } | ||
| } | ||
|
|
||
| /// Normalizes a config definition prior to markdown display | ||
| fn normalize_for_display(mut v: ConfigDefinition) -> ConfigDefinition { | ||
| // Since the default value of target_partitions depends on the number of cores, | ||
| // set the default value to 0 in the docs. | ||
| if v.key == OPT_TARGET_PARTITIONS { | ||
| v.default_value = ScalarValue::UInt64(Some(0)) | ||
| } | ||
| v | ||
| } | ||
|
|
||
| /// Configuration options struct. This can contain values for built-in and custom options | ||
| #[derive(Clone)] | ||
| pub struct ConfigOptions { | ||
|
|
@@ -437,6 +519,12 @@ impl ConfigOptions { | |
| self.set(key, ScalarValue::UInt64(Some(value))) | ||
| } | ||
|
|
||
| /// set a `usize` configuration option | ||
| pub fn set_usize(&mut self, key: &str, value: usize) { | ||
| let value: u64 = value.try_into().expect("convert u64 to usize"); | ||
| self.set(key, ScalarValue::UInt64(Some(value))) | ||
| } | ||
|
|
||
| /// set a `String` configuration option | ||
| pub fn set_string(&mut self, key: &str, value: impl Into<String>) { | ||
| self.set(key, ScalarValue::Utf8(Some(value.into()))) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this illustrates the major API change for DataFusion users