diff --git a/datafusion/core/src/execution/context.rs b/datafusion/core/src/execution/context.rs index 6593e22e6cc01..799fedd7eb85f 100644 --- a/datafusion/core/src/execution/context.rs +++ b/datafusion/core/src/execution/context.rs @@ -1698,30 +1698,39 @@ impl SessionState { } let mut visitor = RelationVisitor(&mut relations); - match statement { - DFStatement::Statement(s) => { - let _ = s.as_ref().visit(&mut visitor); - } - DFStatement::CreateExternalTable(table) => { - visitor - .0 - .insert(ObjectName(vec![Ident::from(table.name.as_str())])); - } - DFStatement::DescribeTableStmt(table) => visitor.insert(&table.table_name), - DFStatement::CopyTo(CopyToStatement { - source, - target: _, - options: _, - }) => match source { - CopyToSource::Relation(table_name) => { - visitor.insert(table_name); + fn visit_statement(statement: &DFStatement, visitor: &mut RelationVisitor<'_>) { + match statement { + DFStatement::Statement(s) => { + let _ = s.as_ref().visit(visitor); } - CopyToSource::Query(query) => { - query.visit(&mut visitor); + DFStatement::CreateExternalTable(table) => { + visitor + .0 + .insert(ObjectName(vec![Ident::from(table.name.as_str())])); } - }, + DFStatement::DescribeTableStmt(table) => { + visitor.insert(&table.table_name) + } + DFStatement::CopyTo(CopyToStatement { + source, + target: _, + options: _, + }) => match source { + CopyToSource::Relation(table_name) => { + visitor.insert(table_name); + } + CopyToSource::Query(query) => { + query.visit(visitor); + } + }, + DFStatement::Explain(explain) => { + visit_statement(&explain.statement, visitor) + } + } } + visit_statement(statement, &mut visitor); + // Always include information_schema if available if self.config.information_schema() { for s in INFORMATION_SCHEMA_TABLES { diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 0d2d601a6851b..09db4e281abe8 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -44,6 +44,35 @@ fn parse_file_type(s: &str) -> Result { Ok(s.to_uppercase()) } +/// DataFusion specific EXPLAIN (needed so we can EXPLAIN datafusion +/// specific COPY and other statements) +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ExplainStatement { + pub analyze: bool, + pub verbose: bool, + pub statement: Box, +} + +impl fmt::Display for ExplainStatement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { + analyze, + verbose, + statement, + } = self; + + write!(f, "EXPLAIN ")?; + if *analyze { + write!(f, "ANALYZE ")?; + } + if *verbose { + write!(f, "VERBOSE ")?; + } + + write!(f, "{statement}") + } +} + /// DataFusion extension DDL for `COPY` /// /// # Syntax: @@ -74,7 +103,7 @@ pub struct CopyToStatement { /// The URL to where the data is heading pub target: String, /// Target specific options - pub options: HashMap, + pub options: Vec<(String, Value)>, } impl fmt::Display for CopyToStatement { @@ -88,10 +117,8 @@ impl fmt::Display for CopyToStatement { write!(f, "COPY {source} TO {target}")?; if !options.is_empty() { - let mut opts: Vec<_> = - options.iter().map(|(k, v)| format!("{k} {v}")).collect(); + let opts: Vec<_> = options.iter().map(|(k, v)| format!("{k} {v}")).collect(); // print them in sorted order - opts.sort_unstable(); write!(f, " ({})", opts.join(", "))?; } @@ -208,6 +235,8 @@ pub enum Statement { DescribeTableStmt(DescribeTableStmt), /// Extension: `COPY TO` CopyTo(CopyToStatement), + /// EXPLAIN for extensions + Explain(ExplainStatement), } impl fmt::Display for Statement { @@ -217,11 +246,12 @@ impl fmt::Display for Statement { Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"), Statement::DescribeTableStmt(_) => write!(f, "DESCRIBE TABLE ..."), Statement::CopyTo(stmt) => write!(f, "{stmt}"), + Statement::Explain(stmt) => write!(f, "{stmt}"), } } } -/// DataFusion SQL Parser based on [`sqlparser`] +/// Datafusion SQL Parser based on [`sqlparser`] /// /// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s /// [`Parser`](sqlparser::parser::Parser). @@ -307,24 +337,24 @@ impl<'a> DFParser<'a> { Token::Word(w) => { match w.keyword { Keyword::CREATE => { - // move one token forward - self.parser.next_token(); - // use custom parsing + self.parser.next_token(); // CREATE self.parse_create() } Keyword::COPY => { - // move one token forward - self.parser.next_token(); + self.parser.next_token(); // COPY self.parse_copy() } Keyword::DESCRIBE => { - // move one token forward - self.parser.next_token(); - // use custom parsing + self.parser.next_token(); // DESCRIBE self.parse_describe() } + Keyword::EXPLAIN => { + // (TODO parse all supported statements) + self.parser.next_token(); // EXPLAIN + self.parse_explain() + } _ => { - // use the native parser + // use sqlparser-rs parser Ok(Statement::Statement(Box::from( self.parser.parse_statement()?, ))) @@ -369,7 +399,7 @@ impl<'a> DFParser<'a> { let options = if self.parser.peek_token().token == Token::LParen { self.parse_value_options()? } else { - HashMap::new() + vec![] }; Ok(Statement::CopyTo(CopyToStatement { @@ -421,6 +451,19 @@ impl<'a> DFParser<'a> { } } + /// Parse a SQL `EXPLAIN` + pub fn parse_explain(&mut self) -> Result { + let analyze = self.parser.parse_keyword(Keyword::ANALYZE); + let verbose = self.parser.parse_keyword(Keyword::VERBOSE); + let statement = self.parse_statement()?; + + Ok(Statement::Explain(ExplainStatement { + statement: Box::new(statement), + analyze, + verbose, + })) + } + /// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE` pub fn parse_create(&mut self) -> Result { if self.parser.parse_keyword(Keyword::EXTERNAL) { @@ -758,14 +801,14 @@ impl<'a> DFParser<'a> { /// Unlike [`Self::parse_string_options`], this method supports /// keywords as key names as well as multiple value types such as /// Numbers as well as Strings. - fn parse_value_options(&mut self) -> Result, ParserError> { - let mut options = HashMap::new(); + fn parse_value_options(&mut self) -> Result, ParserError> { + let mut options = vec![]; self.parser.expect_token(&Token::LParen)?; loop { let key = self.parse_option_key()?; let value = self.parse_option_value()?; - options.insert(key, value); + options.push((key, value)); let comma = self.parser.consume_token(&Token::Comma); if self.parser.consume_token(&Token::RParen) { // allow a trailing comma, even though it's not in standard @@ -1285,13 +1328,39 @@ mod tests { let expected = Statement::CopyTo(CopyToStatement { source: object_name("foo"), target: "bar".to_string(), - options: HashMap::new(), + options: vec![], }); assert_eq!(verified_stmt(sql), expected); Ok(()) } + #[test] + fn explain_copy_to_table_to_table() -> Result<(), ParserError> { + let cases = vec![ + ("EXPLAIN COPY foo TO bar", false, false), + ("EXPLAIN ANALYZE COPY foo TO bar", true, false), + ("EXPLAIN VERBOSE COPY foo TO bar", false, true), + ("EXPLAIN ANALYZE VERBOSE COPY foo TO bar", true, true), + ]; + for (sql, analyze, verbose) in cases { + println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}"); + + let expected_copy = Statement::CopyTo(CopyToStatement { + source: object_name("foo"), + target: "bar".to_string(), + options: vec![], + }); + let expected = Statement::Explain(ExplainStatement { + analyze, + verbose, + statement: Box::new(expected_copy), + }); + assert_eq!(verified_stmt(sql), expected); + } + Ok(()) + } + #[test] fn copy_to_query_to_table() -> Result<(), ParserError> { let statement = verified_stmt("SELECT 1"); @@ -1313,7 +1382,7 @@ mod tests { let expected = Statement::CopyTo(CopyToStatement { source: CopyToSource::Query(query), target: "bar".to_string(), - options: HashMap::new(), + options: vec![], }); assert_eq!(verified_stmt(sql), expected); Ok(()) @@ -1325,10 +1394,10 @@ mod tests { let expected = Statement::CopyTo(CopyToStatement { source: object_name("foo"), target: "bar".to_string(), - options: HashMap::from([( + options: vec![( "row_group_size".to_string(), Value::Number("55".to_string(), false), - )]), + )], }); assert_eq!(verified_stmt(sql), expected); Ok(()) @@ -1336,17 +1405,11 @@ mod tests { #[test] fn copy_to_multi_options() -> Result<(), ParserError> { + // order of options is preserved let sql = "COPY foo TO bar (format parquet, row_group_size 55, compression snappy)"; - // canonical order is alphabetical - let canonical = - "COPY foo TO bar (compression snappy, format parquet, row_group_size 55)"; - let expected_options = HashMap::from([ - ( - "compression".to_string(), - Value::UnQuotedString("snappy".to_string()), - ), + let expected_options = vec![ ( "format".to_string(), Value::UnQuotedString("parquet".to_string()), @@ -1355,14 +1418,17 @@ mod tests { "row_group_size".to_string(), Value::Number("55".to_string(), false), ), - ]); + ( + "compression".to_string(), + Value::UnQuotedString("snappy".to_string()), + ), + ]; - let options = - if let Statement::CopyTo(copy_to) = one_statement_parses_to(sql, canonical) { - copy_to.options - } else { - panic!("Expected copy"); - }; + let options = if let Statement::CopyTo(copy_to) = verified_stmt(sql) { + copy_to.options + } else { + panic!("Expected copy"); + }; assert_eq!(options, expected_options); diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index c9a34bfaf2203..f21b80937ea39 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -17,7 +17,7 @@ use crate::parser::{ CopyToSource, CopyToStatement, CreateExternalTable, DFParser, DescribeTableStmt, - LexOrdering, Statement as DFStatement, + ExplainStatement, LexOrdering, Statement as DFStatement, }; use crate::planner::{ object_name_to_qualifier, ContextProvider, PlannerContext, SqlToRel, @@ -93,6 +93,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { DFStatement::Statement(s) => self.sql_statement_to_plan(*s), DFStatement::DescribeTableStmt(s) => self.describe_table_to_plan(s), DFStatement::CopyTo(s) => self.copy_to_plan(s), + DFStatement::Explain(ExplainStatement { + verbose, + analyze, + statement, + }) => self.explain_to_plan(verbose, analyze, *statement), } } @@ -116,7 +121,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { format: _, describe_alias: _, .. - } => self.explain_statement_to_plan(verbose, analyze, *statement), + } => { + self.explain_to_plan(verbose, analyze, DFStatement::Statement(statement)) + } Statement::Query(query) => self.query_to_plan(*query, planner_context), Statement::ShowVariable { variable } => self.show_variable_to_plan(&variable), Statement::SetVariable { @@ -706,13 +713,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// Generate a plan for EXPLAIN ... that will print out a plan /// - fn explain_statement_to_plan( + /// Note this is the sqlparser explain statement, not the + /// datafusion `EXPLAIN` statement. + fn explain_to_plan( &self, verbose: bool, analyze: bool, - statement: Statement, + statement: DFStatement, ) -> Result { - let plan = self.sql_statement_to_plan(statement)?; + let plan = self.statement_to_plan(statement)?; + if matches!(plan, LogicalPlan::Explain(_)) { + return plan_err!("Nested EXPLAINs are not supported"); + } let plan = Arc::new(plan); let schema = LogicalPlan::explain_schema(); let schema = schema.to_dfschema_ref()?; diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index d46fbd65d82a2..c315266b3cd89 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -336,6 +336,18 @@ CopyTo: format=csv output_url=output.csv per_thread_output=false options: () quick_test(sql, plan); } +#[test] +fn plan_explain_copy_to() { + let sql = "EXPLAIN COPY test_decimal to 'output.csv'"; + let plan = r#" +Explain + CopyTo: format=csv output_url=output.csv per_thread_output=false options: () + TableScan: test_decimal + "# + .trim(); + quick_test(sql, plan); +} + #[test] fn plan_copy_to_query() { let sql = "COPY (select * from test_decimal limit 10) to 'output.csv'"; diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt index 364459fa2df1a..a44a662ada239 100644 --- a/datafusion/sqllogictest/test_files/copy.slt +++ b/datafusion/sqllogictest/test_files/copy.slt @@ -25,12 +25,19 @@ COPY source_table TO 'test_files/scratch/table' (format parquet, per_thread_outp ---- 2 -#Explain copy queries not currently working -query error DataFusion error: This feature is not implemented: Unsupported SQL statement: Some\("COPY source_table TO 'test_files/scratch/table'"\) +# Error case +query error DataFusion error: Error during planning: Copy To format not explicitly set and unable to get file extension! EXPLAIN COPY source_table to 'test_files/scratch/table' -query error DataFusion error: SQL error: ParserError\("Expected end of statement, found: source_table"\) +query TT EXPLAIN COPY source_table to 'test_files/scratch/table' (format parquet, per_thread_output true) +---- +logical_plan +CopyTo: format=parquet output_url=test_files/scratch/table per_thread_output=true options: (format parquet, per_thread_output true) +--TableScan: source_table projection=[col1, col2] +physical_plan +InsertExec: sink=ParquetSink(writer_mode=PutMultipart, file_groups=[]) +--MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0] # Copy more files to directory via query query IT diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index 7aa7870630dfd..ad9b2be40e9e3 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -101,13 +101,17 @@ set datafusion.explain.physical_plan_only = false ## explain nested -statement error Explain must be root of the plan +query error DataFusion error: Error during planning: Nested EXPLAINs are not supported EXPLAIN explain select 1 +## explain nested +statement error DataFusion error: Error during planning: Nested EXPLAINs are not supported +EXPLAIN EXPLAIN explain select 1 + statement ok set datafusion.explain.physical_plan_only = true -statement error Explain must be root of the plan +statement error DataFusion error: Error during planning: Nested EXPLAINs are not supported EXPLAIN explain select 1 statement ok