diff --git a/datafusion/common/src/format.rs b/datafusion/common/src/format.rs index a6bd42be691a..ea88eca4a65b 100644 --- a/datafusion/common/src/format.rs +++ b/datafusion/common/src/format.rs @@ -23,6 +23,8 @@ use arrow::util::display::{DurationFormat, FormatOptions}; use crate::config::{ConfigField, Visit}; use crate::error::{DataFusionError, Result}; +#[cfg(feature = "sql")] +use sqlparser::ast::{Expr, UtilityOption, Value, ValueWithSpan}; /// The default [`FormatOptions`] to use within DataFusion /// Also see [`crate::config::FormatOptions`] @@ -430,3 +432,470 @@ impl ConfigField for ExplainAnalyzeCategories { Ok(()) } } + +/// Normalized options for a single `EXPLAIN` statement. +/// +/// This collects the knobs that can be set per-statement from either the +/// legacy keyword form (`EXPLAIN ANALYZE VERBOSE FORMAT tree ...`) or the +/// Postgres-style `EXPLAIN (option [arg], ...) ...` form supported on +/// dialects whose +/// [`Dialect::supports_explain_with_utility_options`](https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html#method.supports_explain_with_utility_options) +/// returns `true`. +/// +/// Fields that are `None` / `false` mean "not set at the statement level" — +/// the physical planner falls back to the corresponding session config +/// value. +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] +pub struct ExplainStatementOptions { + /// Whether to actually execute the plan and gather metrics. + /// + /// Corresponds to the `ANALYZE` keyword or the `ANALYZE` option. + pub analyze: bool, + /// Whether to include extra detail in the output. + /// + /// Corresponds to the `VERBOSE` keyword or the `VERBOSE` option. + pub verbose: bool, + /// Output format for the plan. When `None`, the session-config + /// default (`datafusion.explain.format`) is used. + pub format: Option, + /// Override for [`MetricType`] (summary / dev) when running + /// `EXPLAIN ANALYZE`. + pub analyze_level: Option, + /// Override for [`ExplainAnalyzeCategories`] (rows / bytes / timing + /// / uncategorized) when running `EXPLAIN ANALYZE`. + pub analyze_categories: Option, + /// Override for `datafusion.explain.show_statistics`. + pub show_statistics: Option, +} + +#[cfg(feature = "sql")] +impl ExplainStatementOptions { + /// Parse a list of [`UtilityOption`] values (produced by sqlparser's + /// `parse_utility_options`) into a normalized [`ExplainStatementOptions`]. + /// + /// Argument grammar accepted: + /// - `OPTION` — bare, implies `TRUE` for boolean options. + /// - `OPTION TRUE` / `OPTION FALSE` + /// - `OPTION ON` / `OPTION OFF` + /// - `OPTION 1` / `OPTION 0` + /// - `OPTION ` or `OPTION ''` for format / level / metrics. + /// + /// Options recognized by DataFusion are: `ANALYZE`, `VERBOSE`, `FORMAT`, + /// `METRICS`, `LEVEL`, `TIMING`, `SUMMARY`, `COSTS`. + /// + /// Postgres-only options (`BUFFERS`, `WAL`, `SETTINGS`, `GENERIC_PLAN`, + /// `MEMORY`) return a helpful "not supported" error. Any other option + /// name produces an `unknown EXPLAIN option` error. + pub fn from_utility_options(opts: &[UtilityOption]) -> Result { + let mut out = ExplainStatementOptions::default(); + // Track whether METRICS was explicitly set so TIMING can merge + // into it rather than overwrite. + let mut metrics_explicit = false; + + for opt in opts { + let name = opt.name.value.to_ascii_lowercase(); + match name.as_str() { + "analyze" => { + out.analyze = parse_bool_arg(&opt.arg, &name)?; + } + "verbose" => { + out.verbose = parse_bool_arg(&opt.arg, &name)?; + } + "format" => { + let s = parse_ident_or_string_arg(&opt.arg, &name)?; + out.format = Some(ExplainFormat::from_str(&s)?); + } + "metrics" => { + let s = parse_ident_or_string_arg(&opt.arg, &name)?; + out.analyze_categories = + Some(ExplainAnalyzeCategories::from_str(&s)?); + metrics_explicit = true; + } + "level" => { + let s = parse_ident_or_string_arg(&opt.arg, &name)?; + out.analyze_level = Some(MetricType::from_str(&s)?); + } + "timing" => { + let enable = parse_bool_arg(&opt.arg, &name)?; + out.analyze_categories = Some(adjust_timing( + out.analyze_categories.take(), + enable, + metrics_explicit, + )); + } + "summary" => { + let summary = parse_bool_arg(&opt.arg, &name)?; + out.analyze_level = Some(if summary { + MetricType::Summary + } else { + MetricType::Dev + }); + } + "costs" => { + out.show_statistics = Some(parse_bool_arg(&opt.arg, &name)?); + } + // Postgres options DataFusion does not model. Give a helpful + // pointer rather than silently accepting them. + "buffers" | "wal" | "settings" | "generic_plan" | "memory" => { + let upper = name.to_ascii_uppercase(); + return Err(DataFusionError::NotImplemented(format!( + "EXPLAIN option {upper} is not supported by DataFusion; \ + see METRICS for category filtering" + ))); + } + _ => { + return Err(DataFusionError::Plan(format!( + "unknown EXPLAIN option: {}", + opt.name.value + ))); + } + } + } + + Ok(out) + } +} + +/// Parse a boolean argument for an EXPLAIN option. +/// +/// `None` (bare option, e.g. `ANALYZE`) is treated as `true`. Accepts +/// identifiers `TRUE`/`FALSE`/`ON`/`OFF` (case-insensitive) and the numeric +/// literals `0` / `1`. +#[cfg(feature = "sql")] +fn parse_bool_arg(arg: &Option, name: &str) -> Result { + let Some(expr) = arg else { + return Ok(true); + }; + match expr { + Expr::Identifier(ident) => match ident.value.to_ascii_lowercase().as_str() { + "true" | "on" => Ok(true), + "false" | "off" => Ok(false), + other => Err(DataFusionError::Plan(format!( + "expected boolean for EXPLAIN option {name}, got '{other}'" + ))), + }, + Expr::Value(ValueWithSpan { value, .. }) => match value { + Value::Boolean(b) => Ok(*b), + Value::Number(n, _) => match n.as_str() { + "0" => Ok(false), + "1" => Ok(true), + other => Err(DataFusionError::Plan(format!( + "expected boolean (0 or 1) for EXPLAIN option {name}, got '{other}'" + ))), + }, + Value::SingleQuotedString(s) | Value::DoubleQuotedString(s) => { + match s.to_ascii_lowercase().as_str() { + "true" | "on" | "1" => Ok(true), + "false" | "off" | "0" => Ok(false), + other => Err(DataFusionError::Plan(format!( + "expected boolean for EXPLAIN option {name}, got '{other}'" + ))), + } + } + other => Err(DataFusionError::Plan(format!( + "expected boolean for EXPLAIN option {name}, got '{other}'" + ))), + }, + other => Err(DataFusionError::Plan(format!( + "expected boolean for EXPLAIN option {name}, got '{other}'" + ))), + } +} + +/// Parse an identifier-or-string argument (used for `FORMAT`, `METRICS`, +/// `LEVEL`). +#[cfg(feature = "sql")] +fn parse_ident_or_string_arg(arg: &Option, name: &str) -> Result { + let expr = arg.as_ref().ok_or_else(|| { + DataFusionError::Plan(format!( + "EXPLAIN option {} requires an argument", + name.to_ascii_uppercase() + )) + })?; + match expr { + Expr::Identifier(ident) => Ok(ident.value.clone()), + Expr::Value(ValueWithSpan { value, .. }) => match value { + Value::SingleQuotedString(s) | Value::DoubleQuotedString(s) => Ok(s.clone()), + other => Err(DataFusionError::Plan(format!( + "expected identifier or string for EXPLAIN option {name}, got '{other}'" + ))), + }, + other => Err(DataFusionError::Plan(format!( + "expected identifier or string for EXPLAIN option {name}, got '{other}'" + ))), + } +} + +/// Merge a `TIMING on/off` option into an existing `METRICS` selection. +/// +/// If METRICS was already specified, we only add/remove the Timing category +/// within that selection. If METRICS was not specified, TIMING effectively +/// means "Only(Timing)" when on, or "show everything except timing" when off. +#[cfg(feature = "sql")] +fn adjust_timing( + current: Option, + enable: bool, + metrics_explicit: bool, +) -> ExplainAnalyzeCategories { + // METRICS was not specified — TIMING alone shapes the selection. + if !metrics_explicit { + return if enable { + ExplainAnalyzeCategories::All + } else { + ExplainAnalyzeCategories::Only(vec![ + MetricCategory::Rows, + MetricCategory::Bytes, + MetricCategory::Uncategorized, + ]) + }; + } + + // METRICS was specified explicitly earlier — merge into its list. When + // METRICS was explicit, `current` is always `Some(_)`; fall back to All + // to be safe. + match current.unwrap_or(ExplainAnalyzeCategories::All) { + ExplainAnalyzeCategories::All if enable => ExplainAnalyzeCategories::All, + ExplainAnalyzeCategories::All => { + // Everything except timing: rows, bytes, uncategorized. + ExplainAnalyzeCategories::Only(vec![ + MetricCategory::Rows, + MetricCategory::Bytes, + MetricCategory::Uncategorized, + ]) + } + ExplainAnalyzeCategories::Only(mut cats) if enable => { + if !cats.contains(&MetricCategory::Timing) { + cats.push(MetricCategory::Timing); + } + ExplainAnalyzeCategories::Only(cats) + } + ExplainAnalyzeCategories::Only(cats) => ExplainAnalyzeCategories::Only( + cats.into_iter() + .filter(|c| *c != MetricCategory::Timing) + .collect(), + ), + } +} + +#[cfg(all(test, feature = "sql"))] +mod explain_options_tests { + use super::*; + use sqlparser::ast::Ident; + use sqlparser::tokenizer::Span; + + fn bare(name: &str) -> UtilityOption { + UtilityOption { + name: Ident { + value: name.to_string(), + quote_style: None, + span: Span::empty(), + }, + arg: None, + } + } + + fn with_ident_arg(name: &str, arg: &str) -> UtilityOption { + UtilityOption { + name: Ident { + value: name.to_string(), + quote_style: None, + span: Span::empty(), + }, + arg: Some(Expr::Identifier(Ident { + value: arg.to_string(), + quote_style: None, + span: Span::empty(), + })), + } + } + + fn with_string_arg(name: &str, arg: &str) -> UtilityOption { + UtilityOption { + name: Ident { + value: name.to_string(), + quote_style: None, + span: Span::empty(), + }, + arg: Some(Expr::Value(ValueWithSpan { + value: Value::SingleQuotedString(arg.to_string()), + span: Span::empty(), + })), + } + } + + fn with_bool_arg(name: &str, b: bool) -> UtilityOption { + UtilityOption { + name: Ident { + value: name.to_string(), + quote_style: None, + span: Span::empty(), + }, + arg: Some(Expr::Value(ValueWithSpan { + value: Value::Boolean(b), + span: Span::empty(), + })), + } + } + + fn with_number_arg(name: &str, n: &str) -> UtilityOption { + UtilityOption { + name: Ident { + value: name.to_string(), + quote_style: None, + span: Span::empty(), + }, + arg: Some(Expr::Value(ValueWithSpan { + value: Value::Number(n.to_string(), false), + span: Span::empty(), + })), + } + } + + #[test] + fn bare_analyze_and_verbose() { + let opts = ExplainStatementOptions::from_utility_options(&[ + bare("ANALYZE"), + bare("VERBOSE"), + ]) + .unwrap(); + assert!(opts.analyze); + assert!(opts.verbose); + assert!(opts.format.is_none()); + } + + #[test] + fn format_from_ident_and_string() { + let opts = ExplainStatementOptions::from_utility_options(&[with_ident_arg( + "FORMAT", "tree", + )]) + .unwrap(); + assert_eq!(opts.format, Some(ExplainFormat::Tree)); + + let opts = ExplainStatementOptions::from_utility_options(&[with_string_arg( + "FORMAT", "pgjson", + )]) + .unwrap(); + assert_eq!(opts.format, Some(ExplainFormat::PostgresJSON)); + } + + #[test] + fn metrics_and_level() { + let opts = ExplainStatementOptions::from_utility_options(&[ + with_string_arg("METRICS", "rows,bytes"), + with_ident_arg("LEVEL", "dev"), + ]) + .unwrap(); + assert_eq!( + opts.analyze_categories, + Some(ExplainAnalyzeCategories::Only(vec![ + MetricCategory::Rows, + MetricCategory::Bytes, + ])) + ); + assert_eq!(opts.analyze_level, Some(MetricType::Dev)); + } + + #[test] + fn on_off_numeric_bool() { + let opts = ExplainStatementOptions::from_utility_options(&[ + with_ident_arg("ANALYZE", "ON"), + with_ident_arg("VERBOSE", "off"), + with_bool_arg("COSTS", true), + ]) + .unwrap(); + assert!(opts.analyze); + assert!(!opts.verbose); + assert_eq!(opts.show_statistics, Some(true)); + + let opts = ExplainStatementOptions::from_utility_options(&[ + with_number_arg("ANALYZE", "1"), + with_number_arg("VERBOSE", "0"), + ]) + .unwrap(); + assert!(opts.analyze); + assert!(!opts.verbose); + } + + #[test] + fn summary_sugar_sets_level() { + let opts = ExplainStatementOptions::from_utility_options(&[with_ident_arg( + "SUMMARY", "ON", + )]) + .unwrap(); + assert_eq!(opts.analyze_level, Some(MetricType::Summary)); + + let opts = ExplainStatementOptions::from_utility_options(&[with_bool_arg( + "SUMMARY", false, + )]) + .unwrap(); + assert_eq!(opts.analyze_level, Some(MetricType::Dev)); + } + + #[test] + fn timing_merges_with_metrics() { + // METRICS then TIMING off → timing is removed from the list + let opts = ExplainStatementOptions::from_utility_options(&[ + with_string_arg("METRICS", "rows,timing"), + with_bool_arg("TIMING", false), + ]) + .unwrap(); + assert_eq!( + opts.analyze_categories, + Some(ExplainAnalyzeCategories::Only(vec![MetricCategory::Rows])) + ); + + // METRICS 'rows' then TIMING on → timing is appended + let opts = ExplainStatementOptions::from_utility_options(&[ + with_string_arg("METRICS", "rows"), + with_bool_arg("TIMING", true), + ]) + .unwrap(); + assert_eq!( + opts.analyze_categories, + Some(ExplainAnalyzeCategories::Only(vec![ + MetricCategory::Rows, + MetricCategory::Timing, + ])) + ); + } + + #[test] + fn timing_alone() { + let opts = ExplainStatementOptions::from_utility_options(&[with_bool_arg( + "TIMING", false, + )]) + .unwrap(); + assert_eq!( + opts.analyze_categories, + Some(ExplainAnalyzeCategories::Only(vec![ + MetricCategory::Rows, + MetricCategory::Bytes, + MetricCategory::Uncategorized, + ])) + ); + } + + #[test] + fn unknown_option_rejected() { + let err = + ExplainStatementOptions::from_utility_options(&[bare("FOO")]).unwrap_err(); + assert!( + err.to_string().contains("unknown EXPLAIN option: FOO"), + "got: {err}" + ); + } + + #[test] + fn postgres_only_options_rejected() { + for pg_only in ["BUFFERS", "WAL", "SETTINGS", "GENERIC_PLAN", "MEMORY"] { + let err = ExplainStatementOptions::from_utility_options(&[bare(pg_only)]) + .unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains(pg_only), + "msg did not include {pg_only}: {msg}" + ); + assert!(msg.contains("not supported"), "msg: {msg}"); + } + } +} diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index a5749e70ceaa..69ab8d972513 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -682,6 +682,7 @@ impl SessionState { stringified_plans, schema: Arc::clone(&e.schema), logical_optimization_succeeded: false, + show_statistics: e.show_statistics, })); } Err(e) => return Err(e), @@ -719,6 +720,7 @@ impl SessionState { stringified_plans, schema: Arc::clone(&e.schema), logical_optimization_succeeded, + show_statistics: e.show_statistics, })) } else { let analyzed_plan = self.analyzer.execute_and_check( diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index a025446aa37e..5f1954aeaa4e 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -2528,6 +2528,8 @@ impl DefaultPhysicalPlanner { let config = &session_state.config_options().explain; let explain_format = &e.explain_format; + // Statement-level override wins over session config for show_statistics. + let show_statistics = e.show_statistics.unwrap_or(config.show_statistics); if !e.logical_optimization_succeeded { return Ok(Arc::new(ExplainExec::new( @@ -2600,7 +2602,7 @@ impl DefaultPhysicalPlanner { stringified_plans.push(StringifiedPlan::new( InitialPhysicalPlan, displayable(input.as_ref()) - .set_show_statistics(config.show_statistics) + .set_show_statistics(show_statistics) .set_show_schema(config.show_schema) .indent(e.verbose) .to_string(), @@ -2609,7 +2611,7 @@ impl DefaultPhysicalPlanner { // Show statistics + schema in verbose output even if not // explicitly requested if e.verbose { - if !config.show_statistics { + if !show_statistics { stringified_plans.push(StringifiedPlan::new( InitialPhysicalPlanWithStats, displayable(input.as_ref()) @@ -2638,7 +2640,7 @@ impl DefaultPhysicalPlanner { stringified_plans.push(StringifiedPlan::new( plan_type, displayable(plan) - .set_show_statistics(config.show_statistics) + .set_show_statistics(show_statistics) .set_show_schema(config.show_schema) .indent(e.verbose) .to_string(), @@ -2651,7 +2653,7 @@ impl DefaultPhysicalPlanner { stringified_plans.push(StringifiedPlan::new( FinalPhysicalPlan, displayable(input.as_ref()) - .set_show_statistics(config.show_statistics) + .set_show_statistics(show_statistics) .set_show_schema(config.show_schema) .indent(e.verbose) .to_string(), @@ -2660,7 +2662,7 @@ impl DefaultPhysicalPlanner { // Show statistics + schema in verbose output even if not // explicitly requested if e.verbose { - if !config.show_statistics { + if !show_statistics { stringified_plans.push(StringifiedPlan::new( FinalPhysicalPlanWithStats, displayable(input.as_ref()) @@ -2714,13 +2716,18 @@ impl DefaultPhysicalPlanner { let input = self.create_physical_plan(&a.input, session_state).await?; let schema = Arc::clone(a.schema.inner()); let show_statistics = session_state.config_options().explain.show_statistics; - let analyze_level = session_state.config_options().explain.analyze_level; + // Statement-level overrides take precedence over the session config. + let analyze_level = a + .analyze_level + .unwrap_or(session_state.config_options().explain.analyze_level); let metric_types = analyze_level.included_types(); - let analyze_categories = session_state - .config_options() - .explain - .analyze_categories - .clone(); + let analyze_categories = a.analyze_categories.clone().unwrap_or_else(|| { + session_state + .config_options() + .explain + .analyze_categories + .clone() + }); let metric_categories = match analyze_categories { ExplainAnalyzeCategories::All => None, ExplainAnalyzeCategories::Only(cats) => Some(cats), @@ -3844,6 +3851,7 @@ mod tests { stringified_plans, schema: schema.to_dfschema_ref().unwrap(), logical_optimization_succeeded: false, + show_statistics: None, }; let plan = planner .handle_explain(&explain, &ctx.state()) diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs index 8ab0d150a727..402c86cfc2ce 100644 --- a/datafusion/core/tests/sql/explain_analyze.rs +++ b/datafusion/core/tests/sql/explain_analyze.rs @@ -1267,3 +1267,115 @@ async fn explain_analyze_categories() { ); } } + +/// Returns a [`SessionContext`] configured with the PostgreSQL dialect so +/// that `EXPLAIN (option, ...)` utility-option syntax is accepted. +fn session_ctx_with_pg_dialect() -> SessionContext { + use std::str::FromStr; + let mut config = SessionConfig::new(); + let options = config.options_mut(); + options.sql_parser.dialect = + datafusion::config::Dialect::from_str("PostgreSQL").unwrap(); + SessionContext::new_with_config(config) +} + +async fn collect_explain(ctx: &SessionContext, sql: &str) -> String { + let dataframe = ctx.sql(sql).await.unwrap(); + let batches = dataframe.collect().await.unwrap(); + arrow::util::pretty::pretty_format_batches(&batches) + .unwrap() + .to_string() +} + +/// Verifies that the Postgres-style `EXPLAIN (METRICS '...')` form produces +/// the same category filtering as `SET datafusion.explain.analyze_categories`. +#[tokio::test] +async fn explain_analyze_paren_metrics_filtering() { + let ctx = session_ctx_with_pg_dialect(); + let sql = "EXPLAIN (ANALYZE, METRICS 'rows') \ + SELECT * FROM generate_series(10) as t1(v1) ORDER BY v1 DESC"; + let plan = collect_explain(&ctx, sql).await; + assert!( + plan.contains("output_rows"), + "rows category should include output_rows:\n{plan}" + ); + assert!( + !plan.contains("elapsed_compute"), + "rows-only METRICS should exclude elapsed_compute:\n{plan}" + ); + assert!( + !plan.contains("output_bytes"), + "rows-only METRICS should exclude output_bytes:\n{plan}" + ); +} + +/// Verifies that a statement-level METRICS overrides the session config. +#[tokio::test] +async fn explain_analyze_paren_metrics_overrides_session_config() { + let ctx = session_ctx_with_pg_dialect(); + // Session default: show only `rows` via config. + { + let state = ctx.state_ref(); + let mut state = state.write(); + state.config_mut().options_mut().explain.analyze_categories = + ExplainAnalyzeCategories::Only(vec![MetricCategory::Rows]); + } + // Statement overrides with 'bytes' — we should see output_bytes but not + // output_rows (except row-count metrics with the `output_bytes` substring + // are avoided because the metric names are distinct). + let sql = "EXPLAIN (ANALYZE, METRICS 'bytes') \ + SELECT * FROM generate_series(10) as t1(v1) ORDER BY v1 DESC"; + let plan = collect_explain(&ctx, sql).await; + assert!( + plan.contains("output_bytes"), + "statement-level METRICS='bytes' should show output_bytes:\n{plan}" + ); + assert!( + !plan.contains("output_rows"), + "statement-level METRICS='bytes' should hide output_rows:\n{plan}" + ); +} + +/// Verifies that `EXPLAIN (ANALYZE, LEVEL summary)` only shows summary metrics, +/// overriding the session default of `dev`. +#[tokio::test] +async fn explain_analyze_paren_level_overrides_session_config() { + let ctx = session_ctx_with_pg_dialect(); + // Session default: Dev + { + let state = ctx.state_ref(); + let mut state = state.write(); + state.config_mut().options_mut().explain.analyze_level = MetricType::Dev; + } + let sql = "EXPLAIN (ANALYZE, LEVEL summary) \ + SELECT * FROM generate_series(10) as t1(v1) ORDER BY v1 DESC"; + let plan = collect_explain(&ctx, sql).await; + // `spill_count` is Dev-only; `output_rows` is Summary. + assert!( + plan.contains("output_rows"), + "summary should still show output_rows:\n{plan}" + ); + assert!( + !plan.contains("spill_count"), + "summary should hide Dev-only spill_count:\n{plan}" + ); +} + +/// Verifies that `EXPLAIN (ANALYZE, BUFFERS)` returns a helpful error. +#[tokio::test] +async fn explain_paren_buffers_rejected() { + let ctx = session_ctx_with_pg_dialect(); + let err = ctx + .sql("EXPLAIN (ANALYZE, BUFFERS) SELECT 1") + .await + .unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("BUFFERS"), + "error should mention BUFFERS: {msg}" + ); + assert!( + msg.contains("not supported"), + "error should say not supported: {msg}" + ); +} diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 017a123eb035..7635c0bfe945 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -1329,6 +1329,8 @@ impl LogicalPlanBuilder { verbose: explain_option.verbose, input: self.plan, schema, + analyze_level: None, + analyze_categories: None, }))) } else { let stringified_plans = @@ -1341,6 +1343,7 @@ impl LogicalPlanBuilder { stringified_plans, schema, logical_optimization_succeeded: false, + show_statistics: explain_option.show_statistics, }))) } } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index d86024295a06..9ab227ebe841 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -52,7 +52,7 @@ use crate::{ use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef}; use datafusion_common::cse::{NormalizeEq, Normalizeable}; -use datafusion_common::format::ExplainFormat; +use datafusion_common::format::{ExplainAnalyzeCategories, ExplainFormat, MetricType}; use datafusion_common::metadata::check_metadata_with_storage_equal; use datafusion_common::tree_node::{ Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion, @@ -1094,6 +1094,8 @@ impl LogicalPlan { verbose: a.verbose, schema: Arc::clone(&a.schema), input: Arc::new(input), + analyze_level: a.analyze_level, + analyze_categories: a.analyze_categories.clone(), })) } LogicalPlan::Explain(e) => { @@ -1106,6 +1108,7 @@ impl LogicalPlan { stringified_plans: e.stringified_plans.clone(), schema: Arc::clone(&e.schema), logical_optimization_succeeded: e.logical_optimization_succeeded, + show_statistics: e.show_statistics, })) } LogicalPlan::Statement(Statement::Prepare(Prepare { @@ -3205,6 +3208,9 @@ pub struct ExplainOption { pub analyze: bool, /// Output syntax/format pub format: ExplainFormat, + /// Statement-level override for `datafusion.explain.show_statistics`. + /// `None` means "fall back to session config". + pub show_statistics: Option, } impl Default for ExplainOption { @@ -3213,6 +3219,7 @@ impl Default for ExplainOption { verbose: false, analyze: false, format: ExplainFormat::Indent, + show_statistics: None, } } } @@ -3235,6 +3242,13 @@ impl ExplainOption { self.format = format; self } + + /// Builder-style setter for a statement-level override of + /// `datafusion.explain.show_statistics`. + pub fn with_show_statistics(mut self, show_statistics: Option) -> Self { + self.show_statistics = show_statistics; + self + } } /// Produces a relation with string representations of @@ -3258,6 +3272,9 @@ pub struct Explain { pub schema: DFSchemaRef, /// Used by physical planner to check if should proceed with planning pub logical_optimization_succeeded: bool, + /// Statement-level override for `datafusion.explain.show_statistics`. + /// When `None`, the session-config value is used. + pub show_statistics: Option, } // Manual implementation needed because of `schema` field. Comparison excludes this field. @@ -3273,18 +3290,22 @@ impl PartialOrd for Explain { pub stringified_plans: &'a Vec, /// Used by physical planner to check if should proceed with planning pub logical_optimization_succeeded: &'a bool, + /// Statement-level override for show_statistics + pub show_statistics: &'a Option, } let comparable_self = ComparableExplain { verbose: &self.verbose, plan: &self.plan, stringified_plans: &self.stringified_plans, logical_optimization_succeeded: &self.logical_optimization_succeeded, + show_statistics: &self.show_statistics, }; let comparable_other = ComparableExplain { verbose: &other.verbose, plan: &other.plan, stringified_plans: &other.stringified_plans, logical_optimization_succeeded: &other.logical_optimization_succeeded, + show_statistics: &other.show_statistics, }; comparable_self .partial_cmp(&comparable_other) @@ -3303,9 +3324,18 @@ pub struct Analyze { pub input: Arc, /// The output schema of the explain (2 columns of text) pub schema: DFSchemaRef, + /// Statement-level override for `datafusion.explain.analyze_level`. + /// When `None`, the session-config value is used. + pub analyze_level: Option, + /// Statement-level override for `datafusion.explain.analyze_categories`. + /// When `None`, the session-config value is used. + pub analyze_categories: Option, } -// Manual implementation needed because of `schema` field. Comparison excludes this field. +// Manual implementation needed because of `schema` field and the lack of +// `PartialOrd` on `MetricType` / `ExplainAnalyzeCategories`. Ordering is +// defined over `(verbose, input)` and then falls back to `==` for the +// remaining statement-level override fields. impl PartialOrd for Analyze { fn partial_cmp(&self, other: &Self) -> Option { match self.verbose.partial_cmp(&other.verbose) { diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs index a1285510da56..78e020fd4512 100644 --- a/datafusion/expr/src/logical_plan/tree_node.rs +++ b/datafusion/expr/src/logical_plan/tree_node.rs @@ -203,6 +203,7 @@ impl TreeNode for LogicalPlan { stringified_plans, schema, logical_optimization_succeeded, + show_statistics, }) => plan.map_elements(f)?.update_data(|plan| { LogicalPlan::Explain(Explain { verbose, @@ -211,17 +212,22 @@ impl TreeNode for LogicalPlan { stringified_plans, schema, logical_optimization_succeeded, + show_statistics, }) }), LogicalPlan::Analyze(Analyze { verbose, input, schema, + analyze_level, + analyze_categories, }) => input.map_elements(f)?.update_data(|input| { LogicalPlan::Analyze(Analyze { verbose, input, schema, + analyze_level, + analyze_categories, }) }), LogicalPlan::Dml(DmlStatement { diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index 9715ecf8d97c..bcf0bc754958 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -1667,6 +1667,11 @@ impl AsLogicalPlan for LogicalPlanNode { )), }), LogicalPlan::Analyze(a) => { + // TODO: propagate statement-level `analyze_level` and + // `analyze_categories` overrides through the proto so round-trips + // preserve them. For now, these fields default to `None` on the + // other side (falling back to session config), which matches the + // previous behavior. let input = LogicalPlanNode::try_from_logical_plan( a.input.as_ref(), extension_codec, @@ -1681,6 +1686,10 @@ impl AsLogicalPlan for LogicalPlanNode { }) } LogicalPlan::Explain(a) => { + // TODO: propagate the statement-level `show_statistics` override + // through the proto so round-trips preserve it. For now this + // field defaults to `None` on the other side (falling back to + // session config), which matches the previous behavior. let input = LogicalPlanNode::try_from_logical_plan( a.plan.as_ref(), extension_codec, diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 1ecf90b7947c..e987247478f5 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -22,6 +22,7 @@ use datafusion_common::DataFusionError; use datafusion_common::config::SqlParserOptions; +use datafusion_common::format::{ExplainFormat, ExplainStatementOptions}; use datafusion_common::{Diagnostic, Span, sql_err}; use sqlparser::ast::{ExprWithAlias, Ident, OrderByOptions}; use sqlparser::tokenizer::TokenWithSpan; @@ -36,6 +37,7 @@ use sqlparser::{ }; use std::collections::VecDeque; use std::fmt; +use std::str::FromStr; // Use `Parser::expected` instead, if possible macro_rules! parser_err { @@ -55,18 +57,25 @@ fn parse_file_type(s: &str) -> Result { /// DataFusion specific `EXPLAIN` /// -/// Syntax: +/// Supports both the legacy keyword form and, on dialects whose +/// [`Dialect::supports_explain_with_utility_options`] returns `true` +/// (PostgreSQL, DuckDB, etc.), the Postgres-style parenthesized option list: +/// /// ```sql +/// -- Legacy keyword form (any dialect) /// EXPLAIN [FORMAT format] statement +/// +/// -- Postgres-style option form (dialect-gated) +/// EXPLAIN (option [arg] [, ...]) statement /// ``` +/// +/// See [`ExplainStatementOptions`] for the list of supported options in the +/// parenthesized form. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ExplainStatement { - /// `EXPLAIN ANALYZE ..` - pub analyze: bool, - /// `EXPLAIN .. VERBOSE ..` - pub verbose: bool, - /// `EXPLAIN .. FORMAT ` - pub format: Option, + /// Normalized options parsed from either the legacy keyword form or the + /// parenthesized option list. + pub options: ExplainStatementOptions, /// The statement to analyze. Note this is a DataFusion [`Statement`] (not a /// [`sqlparser::ast::Statement`] so that we can use `EXPLAIN`, `COPY`, and other /// DataFusion specific statements @@ -75,22 +84,47 @@ pub struct ExplainStatement { impl fmt::Display for ExplainStatement { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { - analyze, - verbose, - format, - statement, - } = self; + let Self { options, statement } = self; + + // If only the legacy-era fields are set, print the legacy keyword + // form so existing round-trip tests continue to pass. + let uses_parenthesized = options.analyze_level.is_some() + || options.analyze_categories.is_some() + || options.show_statistics.is_some(); write!(f, "EXPLAIN ")?; - if *analyze { - write!(f, "ANALYZE ")?; - } - if *verbose { - write!(f, "VERBOSE ")?; - } - if let Some(format) = format.as_ref() { - write!(f, "FORMAT {format} ")?; + if uses_parenthesized { + // Emit a parenthesized option list. + let mut parts: Vec = Vec::new(); + if options.analyze { + parts.push("ANALYZE".to_string()); + } + if options.verbose { + parts.push("VERBOSE".to_string()); + } + if let Some(format) = &options.format { + parts.push(format!("FORMAT {format}")); + } + if let Some(level) = options.analyze_level { + parts.push(format!("LEVEL {level}")); + } + if let Some(cats) = &options.analyze_categories { + parts.push(format!("METRICS '{cats}'")); + } + if let Some(stats) = options.show_statistics { + parts.push(format!("COSTS {}", if stats { "ON" } else { "OFF" })); + } + write!(f, "({}) ", parts.join(", "))?; + } else { + if options.analyze { + write!(f, "ANALYZE ")?; + } + if options.verbose { + write!(f, "VERBOSE ")?; + } + if let Some(format) = &options.format { + write!(f, "FORMAT {format} ")?; + } } write!(f, "{statement}") @@ -325,6 +359,10 @@ fn ensure_not_set(field: &Option, name: &str) -> Result<(), DataFusionErro pub struct DFParser<'a> { pub parser: Parser<'a>, options: SqlParserOptions, + /// Whether the configured dialect supports Postgres-style + /// `EXPLAIN (option, ...)` utility-option syntax. Cached here because + /// sqlparser's [`Parser::dialect`] field is private. + supports_explain_with_utility_options: bool, } /// Same as `sqlparser` @@ -437,10 +475,34 @@ impl<'a, 'b> DFParserBuilder<'a, 'b> { recursion_limit: self.recursion_limit, ..Default::default() }, + supports_explain_with_utility_options: self + .dialect + .supports_explain_with_utility_options(), }) } } +/// Returns true when `tok` is the start of a query / parenthesized query +/// group. Used to disambiguate `EXPLAIN (SELECT ...)` (a parenthesized query) +/// from `EXPLAIN (ANALYZE) SELECT ...` (a Postgres-style option list). +fn token_starts_query(tok: &Token) -> bool { + match tok { + Token::LParen => true, + Token::Word(Word { keyword, .. }) => matches!( + keyword, + Keyword::SELECT + | Keyword::WITH + | Keyword::VALUES + | Keyword::TABLE + | Keyword::INSERT + | Keyword::UPDATE + | Keyword::DELETE + | Keyword::MERGE + ), + _ => false, + } +} + impl<'a> DFParser<'a> { #[deprecated(since = "46.0.0", note = "DFParserBuilder")] pub fn new(sql: &'a str) -> Result { @@ -758,18 +820,46 @@ impl<'a> DFParser<'a> { } /// Parse a SQL `EXPLAIN` + /// + /// After the `EXPLAIN` keyword, if the dialect supports the Postgres-style + /// option list and the next non-whitespace token is `(`, we must + /// disambiguate between an option list (`EXPLAIN (ANALYZE) SELECT ...`) + /// and a parenthesized query (`EXPLAIN (SELECT ...)` or + /// `EXPLAIN (q1 EXCEPT q2) UNION ALL ...`). pub fn parse_explain(&mut self) -> Result { + if self.supports_explain_with_utility_options + && self.parser.peek_token().token == Token::LParen + && !token_starts_query(&self.parser.peek_nth_token(1).token) + { + let raw = self.parser.parse_utility_options()?; + let options = ExplainStatementOptions::from_utility_options(&raw)?; + let statement = self.parse_statement()?; + return Ok(Statement::Explain(ExplainStatement { + statement: Box::new(statement), + options, + })); + } + + // Legacy keyword form. let analyze = self.parser.parse_keyword(Keyword::ANALYZE); let verbose = self.parser.parse_keyword(Keyword::VERBOSE); - let format = self.parse_explain_format()?; + let format = self + .parse_explain_format()? + .map(|s| ExplainFormat::from_str(&s)) + .transpose()?; let statement = self.parse_statement()?; - Ok(Statement::Explain(ExplainStatement { - statement: Box::new(statement), + let options = ExplainStatementOptions { analyze, verbose, format, + ..Default::default() + }; + + Ok(Statement::Explain(ExplainStatement { + statement: Box::new(statement), + options, })) } @@ -1873,9 +1963,14 @@ mod tests { options: vec![], }); let expected = Statement::Explain(ExplainStatement { - analyze, - verbose, - format: None, + options: ExplainStatementOptions { + analyze, + verbose, + format: None, + analyze_level: None, + analyze_categories: None, + show_statistics: None, + }, statement: Box::new(expected_copy), }); assert_eq!(verified_stmt(sql), expected); @@ -2203,4 +2298,164 @@ mod tests { "Expected: end of expression, found: bar", ) } + + // ------------------------------------------------------------------ + // Postgres-style `EXPLAIN (option, ...)` tests + // ------------------------------------------------------------------ + + fn parse_with_pg(sql: &str) -> Result { + let dialect = sqlparser::dialect::PostgreSqlDialect {}; + let mut statements = DFParser::parse_sql_with_dialect(sql, &dialect)?; + assert_eq!(statements.len(), 1, "Expected exactly one statement"); + Ok(statements.pop_front().unwrap()) + } + + fn parse_with_generic(sql: &str) -> Result { + let mut statements = DFParser::parse_sql(sql)?; + assert_eq!(statements.len(), 1, "Expected exactly one statement"); + Ok(statements.pop_front().unwrap()) + } + + #[test] + fn explain_legacy_keyword_form_postgres_dialect() { + // The legacy keyword form still works under PostgreSQL dialect. + let stmt = parse_with_pg("EXPLAIN ANALYZE VERBOSE SELECT 1").unwrap(); + let Statement::Explain(ExplainStatement { options, .. }) = stmt else { + panic!("Expected Statement::Explain"); + }; + assert!(options.analyze); + assert!(options.verbose); + assert!(options.format.is_none()); + assert!(options.analyze_level.is_none()); + } + + #[test] + fn explain_paren_form_on_generic_supports_utility_options() { + // sqlparser's GenericDialect also declares + // `supports_explain_with_utility_options = true`, so DataFusion's + // default parser accepts the parenthesized form too. + let stmt = parse_with_generic("EXPLAIN (FORMAT TREE) SELECT 1").unwrap(); + let Statement::Explain(ExplainStatement { options, .. }) = stmt else { + panic!("Expected Statement::Explain"); + }; + assert_eq!(options.format, Some(ExplainFormat::Tree)); + } + + #[test] + fn explain_paren_form_on_non_supporting_dialect_is_parse_error() { + // Dialects that do NOT declare support for utility options (e.g. + // Snowflake) must still error on the parenthesized form — proving + // the dialect gate itself works. + use sqlparser::dialect::SnowflakeDialect; + let dialect = SnowflakeDialect {}; + let res = + DFParser::parse_sql_with_dialect("EXPLAIN (FORMAT TREE) SELECT 1", &dialect); + assert!( + res.is_err(), + "expected parse error under non-supporting dialect" + ); + } + + #[test] + fn explain_paren_grouping_query_is_not_mistaken_for_options() { + // Historic DataFusion behavior allows parentheses around the + // query after EXPLAIN (e.g. `EXPLAIN (SELECT ...)` or + // `EXPLAIN (q1 EXCEPT q2) UNION ALL (q3 EXCEPT q4)`). The dialect + // gate for Postgres-style options must not swallow these. + for sql in [ + "EXPLAIN (SELECT 1)", + "EXPLAIN (WITH t AS (SELECT 1) SELECT * FROM t)", + "EXPLAIN (VALUES (1), (2))", + "EXPLAIN ((SELECT 1))", + ] { + let stmt = parse_with_pg(sql).unwrap_or_else(|e| { + panic!("{sql} failed under PG dialect: {e}"); + }); + let Statement::Explain(ExplainStatement { options, .. }) = stmt else { + panic!("Expected Statement::Explain for {sql}"); + }; + assert!(!options.analyze, "{sql} should not be ANALYZE"); + assert!(!options.verbose, "{sql} should not be VERBOSE"); + assert!(options.format.is_none(), "{sql} should have no FORMAT"); + } + } + + #[test] + fn explain_paren_form_analyze_verbose() { + let stmt = parse_with_pg("EXPLAIN (ANALYZE, VERBOSE) SELECT 1").unwrap(); + let Statement::Explain(ExplainStatement { options, .. }) = stmt else { + panic!("Expected Statement::Explain"); + }; + assert!(options.analyze); + assert!(options.verbose); + } + + #[test] + fn explain_paren_form_format_tree() { + let stmt = parse_with_pg("EXPLAIN (FORMAT tree) SELECT 1").unwrap(); + let Statement::Explain(ExplainStatement { options, .. }) = stmt else { + panic!("Expected Statement::Explain"); + }; + assert!(!options.analyze); + assert_eq!(options.format, Some(ExplainFormat::Tree)); + } + + #[test] + fn explain_paren_form_metrics_level() { + use datafusion_common::format::{ + ExplainAnalyzeCategories, MetricCategory, MetricType, + }; + let stmt = + parse_with_pg("EXPLAIN (ANALYZE, METRICS 'rows,bytes', LEVEL dev) SELECT 1") + .unwrap(); + let Statement::Explain(ExplainStatement { options, .. }) = stmt else { + panic!("Expected Statement::Explain"); + }; + assert!(options.analyze); + assert_eq!(options.analyze_level, Some(MetricType::Dev)); + assert_eq!( + options.analyze_categories, + Some(ExplainAnalyzeCategories::Only(vec![ + MetricCategory::Rows, + MetricCategory::Bytes, + ])) + ); + } + + #[test] + fn explain_paren_form_bool_spellings() { + let stmt = + parse_with_pg("EXPLAIN (ANALYZE ON, VERBOSE OFF, COSTS TRUE) SELECT 1") + .unwrap(); + let Statement::Explain(ExplainStatement { options, .. }) = stmt else { + panic!("Expected Statement::Explain"); + }; + assert!(options.analyze); + assert!(!options.verbose); + assert_eq!(options.show_statistics, Some(true)); + } + + #[test] + fn explain_paren_form_buffers_rejected() { + let err = parse_with_pg("EXPLAIN (BUFFERS) SELECT 1").unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("BUFFERS"), + "error should mention BUFFERS: {msg}" + ); + assert!( + msg.contains("not supported"), + "error should say not supported: {msg}" + ); + } + + #[test] + fn explain_paren_form_unknown_option_rejected() { + let err = parse_with_pg("EXPLAIN (ASDF) SELECT 1").unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("unknown EXPLAIN option"), + "error should describe unknown option: {msg}" + ); + } } diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 587ed02d1318..dfb1293f0fd1 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -31,6 +31,7 @@ use crate::utils::normalize_ident; use arrow::datatypes::{Field, FieldRef, Fields}; use datafusion_common::error::_plan_err; +use datafusion_common::format::ExplainStatementOptions; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::{ Column, Constraint, Constraints, DFSchema, DFSchemaRef, DataFusionError, Result, @@ -227,12 +228,9 @@ impl SqlToRel<'_, S> { DFStatement::CreateExternalTable(s) => self.external_table_to_plan(s), DFStatement::Statement(s) => self.sql_statement_to_plan(*s), DFStatement::CopyTo(s) => self.copy_to_plan(s), - DFStatement::Explain(ExplainStatement { - verbose, - analyze, - format, - statement, - }) => self.explain_to_plan(verbose, analyze, format, *statement), + DFStatement::Explain(ExplainStatement { options, statement }) => { + self.explain_to_plan(options, *statement) + } DFStatement::Reset(statement) => self.reset_statement_to_plan(statement), } } @@ -283,9 +281,19 @@ impl SqlToRel<'_, S> { describe_alias: _, .. } => { - let format = format.map(|format| format.to_string()); + let format = format + .map(|format| ExplainFormat::from_str(&format.to_string())) + .transpose()?; let statement = DFStatement::Statement(statement); - self.explain_to_plan(verbose, analyze, format, statement) + let options = ExplainStatementOptions { + analyze, + verbose, + format, + analyze_level: None, + analyze_categories: None, + show_statistics: None, + }; + self.explain_to_plan(options, statement) } Statement::Query(query) => self.query_to_plan(*query, planner_context), Statement::ShowVariable { variable } => self.show_variable_to_plan(&variable), @@ -1922,9 +1930,7 @@ impl SqlToRel<'_, S> { /// datafusion `EXPLAIN` statement. fn explain_to_plan( &self, - verbose: bool, - analyze: bool, - format: Option, + opts: ExplainStatementOptions, statement: DFStatement, ) -> Result { let plan = self.statement_to_plan(statement)?; @@ -1936,9 +1942,30 @@ impl SqlToRel<'_, S> { let schema = LogicalPlan::explain_schema(); let schema = schema.to_dfschema_ref()?; + let ExplainStatementOptions { + analyze, + verbose, + format, + analyze_level, + analyze_categories, + show_statistics, + } = opts; + + // Mutual exclusivity checks if verbose && format.is_some() { return plan_err!("EXPLAIN VERBOSE with FORMAT is not supported"); } + if !analyze { + if analyze_level.is_some() { + return plan_err!("EXPLAIN option LEVEL requires ANALYZE"); + } + if analyze_categories.is_some() { + return plan_err!("EXPLAIN option METRICS requires ANALYZE"); + } + } + if analyze && show_statistics.is_some() { + return plan_err!("EXPLAIN option COSTS cannot be combined with ANALYZE"); + } if analyze { if format.is_some() { @@ -1948,6 +1975,8 @@ impl SqlToRel<'_, S> { verbose, input: plan, schema, + analyze_level, + analyze_categories, })) } else { let stringified_plans = @@ -1959,7 +1988,7 @@ impl SqlToRel<'_, S> { let format = if verbose { ExplainFormat::Indent } else if let Some(format) = format { - ExplainFormat::from_str(&format)? + format } else { options.explain.format.clone() }; @@ -1971,6 +2000,7 @@ impl SqlToRel<'_, S> { stringified_plans, schema, logical_optimization_succeeded: false, + show_statistics, })) } } diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index 2e8a65385541..796cc410a265 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -687,3 +687,69 @@ logical_plan statement ok drop table foo; + +# ------------------------------------------------------------------ +# Postgres-style `EXPLAIN (option, ...)` tests (dialect-gated). +# +# These require a dialect whose `supports_explain_with_utility_options()` +# returns true. DataFusion's default Generic dialect also declares this +# (mirroring sqlparser-rs 0.61.0), so the parenthesized form works there +# too. We set PostgreSQL explicitly for clarity. +# ------------------------------------------------------------------ + +statement ok +set datafusion.sql_parser.dialect = 'PostgreSQL'; + +# `EXPLAIN (FORMAT tree)` matches the legacy `EXPLAIN FORMAT tree` form. +query TT +EXPLAIN (FORMAT tree) SELECT 1; +---- +physical_plan +01)┌───────────────────────────┐ +02)│ ProjectionExec │ +03)│ -------------------- │ +04)│ Int64(1): 1 │ +05)└─────────────┬─────────────┘ +06)┌─────────────┴─────────────┐ +07)│ PlaceholderRowExec │ +08)└───────────────────────────┘ + +# Unknown options are rejected with a clear error. +statement error DataFusion error: Error during planning: unknown EXPLAIN option: FOO +EXPLAIN (FOO) SELECT 1; + +# Postgres-only options return a "not supported" message pointing at METRICS. +statement error DataFusion error: This feature is not implemented: EXPLAIN option BUFFERS is not supported by DataFusion +EXPLAIN (BUFFERS) SELECT 1; + +statement error DataFusion error: This feature is not implemented: EXPLAIN option WAL is not supported by DataFusion +EXPLAIN (WAL) SELECT 1; + +# LEVEL / METRICS / TIMING / SUMMARY all require ANALYZE. +statement error DataFusion error: Error during planning: EXPLAIN option LEVEL requires ANALYZE +EXPLAIN (LEVEL dev) SELECT 1; + +statement error DataFusion error: Error during planning: EXPLAIN option METRICS requires ANALYZE +EXPLAIN (METRICS 'rows') SELECT 1; + +# COSTS and ANALYZE are mutually exclusive (COSTS only applies to plan-only +# EXPLAIN). +statement error DataFusion error: Error during planning: EXPLAIN option COSTS cannot be combined with ANALYZE +EXPLAIN (ANALYZE, COSTS ON) SELECT 1; + +# Legacy keyword form still works on PostgreSQL dialect. +query TT +EXPLAIN FORMAT tree SELECT 1; +---- +physical_plan +01)┌───────────────────────────┐ +02)│ ProjectionExec │ +03)│ -------------------- │ +04)│ Int64(1): 1 │ +05)└─────────────┬─────────────┘ +06)┌─────────────┴─────────────┐ +07)│ PlaceholderRowExec │ +08)└───────────────────────────┘ + +statement ok +reset datafusion.sql_parser.dialect; diff --git a/docs/source/user-guide/explain-usage.md b/docs/source/user-guide/explain-usage.md index 5ea7a250b415..9661e386aa68 100644 --- a/docs/source/user-guide/explain-usage.md +++ b/docs/source/user-guide/explain-usage.md @@ -240,6 +240,46 @@ When predicate pushdown is enabled, `DataSourceExec` with `ParquetSource` gains - `row_pushdown_eval_time`: time spent evaluating row-level filters - `page_index_eval_time`: time required to evaluate the page index filters +## Postgres-style `EXPLAIN (...)` options + +In addition to the legacy keyword form (`EXPLAIN ANALYZE VERBOSE FORMAT tree SELECT ...`), +DataFusion accepts a Postgres-style option list on dialects whose +[`supports_explain_with_utility_options`](https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html#method.supports_explain_with_utility_options) +returns `true`. This includes the default `GenericDialect`, `PostgreSqlDialect`, and +`DuckDbDialect`, among others. + +```sql +EXPLAIN (ANALYZE, VERBOSE, METRICS 'rows,bytes', LEVEL dev) +SELECT ... ; +``` + +The recognized options are: + +| Option | Argument | Effect | +| --------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------ | +| `ANALYZE` | boolean, optional | Execute the plan and collect metrics. Defaults to `TRUE` when bare. Equivalent to the `ANALYZE` keyword. | +| `VERBOSE` | boolean, optional | Show per-partition metrics and additional detail. Equivalent to the `VERBOSE` keyword. | +| `FORMAT` | identifier/string | One of `indent`, `tree`, `pgjson`, `graphviz`. Equivalent to the `FORMAT ` clause. | +| `METRICS` | string | Filter `ANALYZE` metrics by category. Accepts `'all'`, `'none'`, or any comma-separated subset of `rows,bytes,timing,uncategorized`. | +| `LEVEL` | identifier/string | `summary` or `dev`. Controls metric verbosity for `ANALYZE`. | +| `TIMING` | boolean | Sugar over `METRICS`: toggles inclusion of the `timing` category. | +| `SUMMARY` | boolean | Sugar over `LEVEL`: `TRUE` → `summary`, `FALSE` → `dev`. | +| `COSTS` | boolean | Include statistics in plain `EXPLAIN` output (equivalent to `SET datafusion.explain.show_statistics`). Not valid with `ANALYZE`. | + +Boolean arguments can be written bare (`ANALYZE` → `true`), as `TRUE`/`FALSE`, +`ON`/`OFF`, or `0`/`1`. + +The statement-level options take precedence over session config, so you can leave +the session defaults alone and override just for the current query: + +```sql +EXPLAIN (ANALYZE, LEVEL dev, METRICS 'rows,bytes') SELECT ...; +``` + +Postgres options that DataFusion does not model (`BUFFERS`, `WAL`, `SETTINGS`, +`GENERIC_PLAN`, `MEMORY`) return a clear error rather than being silently +accepted — use `METRICS` to filter what appears in the output. + ## Partitions and Execution DataFusion determines the optimal number of cores to use as part of query