From 4c0114da1e9245a69cf617a147f519ebca25ff59 Mon Sep 17 00:00:00 2001 From: logan-keede Date: Fri, 11 Apr 2025 02:02:16 +0530 Subject: [PATCH 01/12] ParserError->DataFusionError+attach a diagnostic --- datafusion/sql/src/parser.rs | 129 +++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 59 deletions(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 822b651eae864..95aef267ca5cd 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -22,7 +22,8 @@ use std::collections::VecDeque; use std::fmt; - +use datafusion_common::DataFusionError; +use datafusion_common::{Diagnostic, Span, sql_err}; use sqlparser::ast::{ExprWithAlias, OrderByOptions}; use sqlparser::tokenizer::TokenWithSpan; use sqlparser::{ @@ -42,7 +43,7 @@ macro_rules! parser_err { }; } -fn parse_file_type(s: &str) -> Result { +fn parse_file_type(s: &str) -> Result { Ok(s.to_uppercase()) } @@ -266,11 +267,11 @@ impl fmt::Display for Statement { } } -fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { +fn ensure_not_set(field: &Option, name: &str) -> Result<(), DataFusionError> { if field.is_some() { - return Err(ParserError::ParserError(format!( + return parser_err!(format!( "{name} specified more than once", - ))); + ))?; } Ok(()) } @@ -356,9 +357,11 @@ impl<'a> DFParserBuilder<'a> { self } - pub fn build(self) -> Result, ParserError> { + pub fn build(self) -> Result, DataFusionError> { let mut tokenizer = Tokenizer::new(self.dialect, self.sql); - let tokens = tokenizer.tokenize_with_location()?; + // Convert TokenizerError -> ParserError + let tokens = tokenizer.tokenize_with_location() + .map_err(|e| ParserError::TokenizerError(e.to_string()))?; Ok(DFParser { parser: Parser::new(self.dialect) @@ -370,7 +373,7 @@ impl<'a> DFParserBuilder<'a> { impl<'a> DFParser<'a> { #[deprecated(since = "46.0.0", note = "DFParserBuilder")] - pub fn new(sql: &'a str) -> Result { + pub fn new(sql: &'a str) -> Result { DFParserBuilder::new(sql).build() } @@ -378,13 +381,13 @@ impl<'a> DFParser<'a> { pub fn new_with_dialect( sql: &'a str, dialect: &'a dyn Dialect, - ) -> Result { + ) -> Result { DFParserBuilder::new(sql).with_dialect(dialect).build() } /// Parse a sql string into one or [`Statement`]s using the /// [`GenericDialect`]. - pub fn parse_sql(sql: &'a str) -> Result, ParserError> { + pub fn parse_sql(sql: &'a str) -> Result, DataFusionError> { let mut parser = DFParserBuilder::new(sql).build()?; parser.parse_statements() @@ -395,7 +398,7 @@ impl<'a> DFParser<'a> { pub fn parse_sql_with_dialect( sql: &str, dialect: &dyn Dialect, - ) -> Result, ParserError> { + ) -> Result, DataFusionError> { let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?; parser.parse_statements() } @@ -403,14 +406,14 @@ impl<'a> DFParser<'a> { pub fn parse_sql_into_expr_with_dialect( sql: &str, dialect: &dyn Dialect, - ) -> Result { + ) -> Result { let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?; parser.parse_expr() } /// Parse a sql string into one or [`Statement`]s - pub fn parse_statements(&mut self) -> Result, ParserError> { + pub fn parse_statements(&mut self) -> Result, DataFusionError> { let mut stmts = VecDeque::new(); let mut expecting_statement_delimiter = false; loop { @@ -438,12 +441,21 @@ impl<'a> DFParser<'a> { &self, expected: &str, found: TokenWithSpan, - ) -> Result { - parser_err!(format!("Expected {expected}, found: {found}")) + ) -> Result { + let sql_parser_span = found.span; + sql_err!( + parser_err!(format!("Expected {expected}, found: {found}"))? + ) + .map_err(|e| { + let span = Span::try_from_sqlparser_span(sql_parser_span); + let mut diagnostic= Diagnostic::new_error(e.to_string(), span); + diagnostic.add_note(format!("Expected {expected}, found: {found}"), span); + e.with_diagnostic(diagnostic) + }) } /// Parse a new expression - pub fn parse_statement(&mut self) -> Result { + pub fn parse_statement(&mut self) -> Result { match self.parser.peek_token().token { Token::Word(w) => { match w.keyword { @@ -484,21 +496,21 @@ impl<'a> DFParser<'a> { } } - pub fn parse_expr(&mut self) -> Result { + pub fn parse_expr(&mut self) -> Result { if let Token::Word(w) = self.parser.peek_token().token { match w.keyword { Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => { - return parser_err!("Unsupported command in expression"); + return parser_err!("Unsupported command in expression")?; } _ => {} } } - self.parser.parse_expr_with_alias() + Ok(self.parser.parse_expr_with_alias()?) } /// Parse a SQL `COPY TO` statement - pub fn parse_copy(&mut self) -> Result { + pub fn parse_copy(&mut self) -> Result { // parse as a query let source = if self.parser.consume_token(&Token::LParen) { let query = self.parser.parse_query()?; @@ -541,7 +553,7 @@ impl<'a> DFParser<'a> { Keyword::WITH => { self.parser.expect_keyword(Keyword::HEADER)?; self.parser.expect_keyword(Keyword::ROW)?; - return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')"); + return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')")?; } Keyword::PARTITIONED => { self.parser.expect_keyword(Keyword::BY)?; @@ -561,17 +573,17 @@ impl<'a> DFParser<'a> { if token == Token::EOF || token == Token::SemiColon { break; } else { - return Err(ParserError::ParserError(format!( + return parser_err!(format!( "Unexpected token {token}" - ))); + ))?; } } } let Some(target) = builder.target else { - return Err(ParserError::ParserError( - "Missing TO clause in COPY statement".into(), - )); + return parser_err!( + format!("Missing TO clause in COPY statement") + )?; }; Ok(Statement::CopyTo(CopyToStatement { @@ -589,7 +601,7 @@ impl<'a> DFParser<'a> { /// because it allows keywords as well as other non words /// /// [`parse_literal_string`]: sqlparser::parser::Parser::parse_literal_string - pub fn parse_option_key(&mut self) -> Result { + pub fn parse_option_key(&mut self) -> Result { let next_token = self.parser.next_token(); match next_token.token { Token::Word(Word { value, .. }) => { @@ -602,7 +614,7 @@ impl<'a> DFParser<'a> { // Unquoted namespaced keys have to conform to the syntax // "[\.]*". If we have a key that breaks this // pattern, error out: - return self.parser.expected("key name", next_token); + return self.expected("key name", next_token); } } Ok(parts.join(".")) @@ -610,7 +622,7 @@ impl<'a> DFParser<'a> { Token::SingleQuotedString(s) => Ok(s), Token::DoubleQuotedString(s) => Ok(s), Token::EscapedStringLiteral(s) => Ok(s), - _ => self.parser.expected("key name", next_token), + _ => self.expected("key name", next_token), } } @@ -620,7 +632,7 @@ impl<'a> DFParser<'a> { /// word or keyword in this location. /// /// [`parse_value`]: sqlparser::parser::Parser::parse_value - pub fn parse_option_value(&mut self) -> Result { + pub fn parse_option_value(&mut self) -> Result { let next_token = self.parser.next_token(); match next_token.token { // e.g. things like "snappy" or "gzip" that may be keywords @@ -629,12 +641,12 @@ impl<'a> DFParser<'a> { Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)), Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)), Token::Number(n, l) => Ok(Value::Number(n, l)), - _ => self.parser.expected("string or numeric value", next_token), + _ => self.expected("string or numeric value", next_token), } } /// Parse a SQL `EXPLAIN` - pub fn parse_explain(&mut self) -> Result { + pub fn parse_explain(&mut self) -> Result { let analyze = self.parser.parse_keyword(Keyword::ANALYZE); let verbose = self.parser.parse_keyword(Keyword::VERBOSE); let format = self.parse_explain_format()?; @@ -649,7 +661,7 @@ impl<'a> DFParser<'a> { })) } - pub fn parse_explain_format(&mut self) -> Result, ParserError> { + pub fn parse_explain_format(&mut self) -> Result, DataFusionError> { if !self.parser.parse_keyword(Keyword::FORMAT) { return Ok(None); } @@ -660,14 +672,13 @@ impl<'a> DFParser<'a> { Token::SingleQuotedString(w) => Ok(w), Token::DoubleQuotedString(w) => Ok(w), _ => self - .parser .expected("an explain format such as TREE", next_token), }?; Ok(Some(format)) } /// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE` - pub fn parse_create(&mut self) -> Result { + pub fn parse_create(&mut self) -> Result { if self.parser.parse_keyword(Keyword::EXTERNAL) { self.parse_create_external_table(false) } else if self.parser.parse_keyword(Keyword::UNBOUNDED) { @@ -678,7 +689,7 @@ impl<'a> DFParser<'a> { } } - fn parse_partitions(&mut self) -> Result, ParserError> { + fn parse_partitions(&mut self) -> Result, DataFusionError> { let mut partitions: Vec = vec![]; if !self.parser.consume_token(&Token::LParen) || self.parser.consume_token(&Token::RParen) @@ -708,7 +719,7 @@ impl<'a> DFParser<'a> { } /// Parse the ordering clause of a `CREATE EXTERNAL TABLE` SQL statement - pub fn parse_order_by_exprs(&mut self) -> Result, ParserError> { + pub fn parse_order_by_exprs(&mut self) -> Result, DataFusionError> { let mut values = vec![]; self.parser.expect_token(&Token::LParen)?; loop { @@ -721,7 +732,7 @@ impl<'a> DFParser<'a> { } /// Parse an ORDER BY sub-expression optionally followed by ASC or DESC. - pub fn parse_order_by_expr(&mut self) -> Result { + pub fn parse_order_by_expr(&mut self) -> Result { let expr = self.parser.parse_expr()?; let asc = if self.parser.parse_keyword(Keyword::ASC) { @@ -753,7 +764,7 @@ impl<'a> DFParser<'a> { // This is a copy of the equivalent implementation in sqlparser. fn parse_columns( &mut self, - ) -> Result<(Vec, Vec), ParserError> { + ) -> Result<(Vec, Vec), DataFusionError> { let mut columns = vec![]; let mut constraints = vec![]; if !self.parser.consume_token(&Token::LParen) @@ -789,7 +800,7 @@ impl<'a> DFParser<'a> { Ok((columns, constraints)) } - fn parse_column_def(&mut self) -> Result { + fn parse_column_def(&mut self) -> Result { let name = self.parser.parse_identifier()?; let data_type = self.parser.parse_data_type()?; let mut options = vec![]; @@ -820,7 +831,7 @@ impl<'a> DFParser<'a> { fn parse_create_external_table( &mut self, unbounded: bool, - ) -> Result { + ) -> Result { let temporary = self .parser .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) @@ -868,15 +879,15 @@ impl<'a> DFParser<'a> { } else { self.parser.expect_keyword(Keyword::HEADER)?; self.parser.expect_keyword(Keyword::ROW)?; - return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)"); + return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)")?; } } Keyword::DELIMITER => { - return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')"); + return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')")?; } Keyword::COMPRESSION => { self.parser.expect_keyword(Keyword::TYPE)?; - return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)"); + return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)")?; } Keyword::PARTITIONED => { self.parser.expect_keyword(Keyword::BY)?; @@ -899,7 +910,7 @@ impl<'a> DFParser<'a> { columns.extend(cols); if !cons.is_empty() { - return Err(ParserError::ParserError( + return sql_err!(ParserError::ParserError( "Constraints on Partition Columns are not supported" .to_string(), )); @@ -919,7 +930,7 @@ impl<'a> DFParser<'a> { if token == Token::EOF || token == Token::SemiColon { break; } else { - return Err(ParserError::ParserError(format!( + return sql_err!(ParserError::ParserError(format!( "Unexpected token {token}" ))); } @@ -928,12 +939,12 @@ impl<'a> DFParser<'a> { // Validations: location and file_type are required if builder.file_type.is_none() { - return Err(ParserError::ParserError( + return sql_err!(ParserError::ParserError( "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(), )); } if builder.location.is_none() { - return Err(ParserError::ParserError( + return sql_err!(ParserError::ParserError( "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(), )); } @@ -955,7 +966,7 @@ impl<'a> DFParser<'a> { } /// Parses the set of valid formats - fn parse_file_format(&mut self) -> Result { + fn parse_file_format(&mut self) -> Result { let token = self.parser.next_token(); match &token.token { Token::Word(w) => parse_file_type(&w.value), @@ -967,7 +978,7 @@ impl<'a> DFParser<'a> { /// /// This method supports keywords as key names as well as multiple /// value types such as Numbers as well as Strings. - fn parse_value_options(&mut self) -> Result, ParserError> { + fn parse_value_options(&mut self) -> Result, DataFusionError> { let mut options = vec![]; self.parser.expect_token(&Token::LParen)?; @@ -999,7 +1010,7 @@ mod tests { use sqlparser::dialect::SnowflakeDialect; use sqlparser::tokenizer::Span; - fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> { + fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> { let statements = DFParser::parse_sql(sql)?; assert_eq!( statements.len(), @@ -1041,7 +1052,7 @@ mod tests { } #[test] - fn create_external_table() -> Result<(), ParserError> { + fn create_external_table() -> Result<(), DataFusionError> { // positive case let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'"; let display = None; @@ -1514,7 +1525,7 @@ mod tests { } #[test] - fn copy_to_table_to_table() -> Result<(), ParserError> { + fn copy_to_table_to_table() -> Result<(), DataFusionError> { // positive case let sql = "COPY foo TO bar STORED AS CSV"; let expected = Statement::CopyTo(CopyToStatement { @@ -1530,7 +1541,7 @@ mod tests { } #[test] - fn skip_copy_into_snowflake() -> Result<(), ParserError> { + fn skip_copy_into_snowflake() -> Result<(), DataFusionError> { let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');"; let dialect = Box::new(SnowflakeDialect); let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?; @@ -1547,7 +1558,7 @@ mod tests { } #[test] - fn explain_copy_to_table_to_table() -> Result<(), ParserError> { + fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> { let cases = vec![ ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false), ( @@ -1588,7 +1599,7 @@ mod tests { } #[test] - fn copy_to_query_to_table() -> Result<(), ParserError> { + fn copy_to_query_to_table() -> Result<(), DataFusionError> { let statement = verified_stmt("SELECT 1"); // unwrap the various layers @@ -1621,7 +1632,7 @@ mod tests { } #[test] - fn copy_to_options() -> Result<(), ParserError> { + fn copy_to_options() -> Result<(), DataFusionError> { let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')"; let expected = Statement::CopyTo(CopyToStatement { source: object_name("foo"), @@ -1638,7 +1649,7 @@ mod tests { } #[test] - fn copy_to_partitioned_by() -> Result<(), ParserError> { + fn copy_to_partitioned_by() -> Result<(), DataFusionError> { let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')"; let expected = Statement::CopyTo(CopyToStatement { source: object_name("foo"), @@ -1655,7 +1666,7 @@ mod tests { } #[test] - fn copy_to_multi_options() -> Result<(), ParserError> { + fn copy_to_multi_options() -> Result<(), DataFusionError> { // order of options is preserved let sql = "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)"; From fc22ac27ea25132a82763f33336727ad81d4e9e6 Mon Sep 17 00:00:00 2001 From: logan-keede Date: Sun, 13 Apr 2025 17:19:37 +0530 Subject: [PATCH 02/12] fix: ci --- datafusion-examples/examples/sql_dialect.rs | 6 +-- datafusion/sql/src/parser.rs | 41 +++++++++------------ 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/datafusion-examples/examples/sql_dialect.rs b/datafusion-examples/examples/sql_dialect.rs index 12141847ca361..840faa63b1a48 100644 --- a/datafusion-examples/examples/sql_dialect.rs +++ b/datafusion-examples/examples/sql_dialect.rs @@ -17,10 +17,10 @@ use std::fmt::Display; -use datafusion::error::Result; +use datafusion::error::{DataFusionError, Result}; use datafusion::sql::{ parser::{CopyToSource, CopyToStatement, DFParser, DFParserBuilder, Statement}, - sqlparser::{keywords::Keyword, parser::ParserError, tokenizer::Token}, + sqlparser::{keywords::Keyword, tokenizer::Token}, }; /// This example demonstrates how to use the DFParser to parse a statement in a custom way @@ -62,7 +62,7 @@ impl<'a> MyParser<'a> { /// This is the entry point to our parser -- it handles `COPY` statements specially /// but otherwise delegates to the existing DataFusion parser. - pub fn parse_statement(&mut self) -> Result { + pub fn parse_statement(&mut self) -> Result { if self.is_copy() { self.df_parser.parser.next_token(); // COPY let df_statement = self.df_parser.parse_copy()?; diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 95aef267ca5cd..3dba4c9615492 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -20,10 +20,8 @@ //! This parser implements DataFusion specific statements such as //! `CREATE EXTERNAL TABLE` -use std::collections::VecDeque; -use std::fmt; use datafusion_common::DataFusionError; -use datafusion_common::{Diagnostic, Span, sql_err}; +use datafusion_common::{sql_err, Diagnostic, Span}; use sqlparser::ast::{ExprWithAlias, OrderByOptions}; use sqlparser::tokenizer::TokenWithSpan; use sqlparser::{ @@ -35,6 +33,8 @@ use sqlparser::{ parser::{Parser, ParserError}, tokenizer::{Token, Tokenizer, Word}, }; +use std::collections::VecDeque; +use std::fmt; // Use `Parser::expected` instead, if possible macro_rules! parser_err { @@ -269,9 +269,7 @@ impl fmt::Display for Statement { fn ensure_not_set(field: &Option, name: &str) -> Result<(), DataFusionError> { if field.is_some() { - return parser_err!(format!( - "{name} specified more than once", - ))?; + return parser_err!(format!("{name} specified more than once",))?; } Ok(()) } @@ -360,8 +358,9 @@ impl<'a> DFParserBuilder<'a> { pub fn build(self) -> Result, DataFusionError> { let mut tokenizer = Tokenizer::new(self.dialect, self.sql); // Convert TokenizerError -> ParserError - let tokens = tokenizer.tokenize_with_location() - .map_err(|e| ParserError::TokenizerError(e.to_string()))?; + let tokens = tokenizer + .tokenize_with_location() + .map_err(|e| ParserError::TokenizerError(e.to_string()))?; Ok(DFParser { parser: Parser::new(self.dialect) @@ -443,15 +442,14 @@ impl<'a> DFParser<'a> { found: TokenWithSpan, ) -> Result { let sql_parser_span = found.span; - sql_err!( - parser_err!(format!("Expected {expected}, found: {found}"))? + sql_err!(parser_err!(format!("Expected {expected}, found: {found}"))?).map_err( + |e| { + let span = Span::try_from_sqlparser_span(sql_parser_span); + let mut diagnostic = Diagnostic::new_error(e.to_string(), span); + diagnostic.add_note(format!("Expected {expected}, found: {found}"), span); + e.with_diagnostic(diagnostic) + }, ) - .map_err(|e| { - let span = Span::try_from_sqlparser_span(sql_parser_span); - let mut diagnostic= Diagnostic::new_error(e.to_string(), span); - diagnostic.add_note(format!("Expected {expected}, found: {found}"), span); - e.with_diagnostic(diagnostic) - }) } /// Parse a new expression @@ -573,17 +571,13 @@ impl<'a> DFParser<'a> { if token == Token::EOF || token == Token::SemiColon { break; } else { - return parser_err!(format!( - "Unexpected token {token}" - ))?; + return parser_err!(format!("Unexpected token {token}"))?; } } } let Some(target) = builder.target else { - return parser_err!( - format!("Missing TO clause in COPY statement") - )?; + return parser_err!(format!("Missing TO clause in COPY statement"))?; }; Ok(Statement::CopyTo(CopyToStatement { @@ -671,8 +665,7 @@ impl<'a> DFParser<'a> { Token::Word(w) => Ok(w.value), Token::SingleQuotedString(w) => Ok(w), Token::DoubleQuotedString(w) => Ok(w), - _ => self - .expected("an explain format such as TREE", next_token), + _ => self.expected("an explain format such as TREE", next_token), }?; Ok(Some(format)) } From 30b21ead445e1ae91712767f4cc35951ea6f1c15 Mon Sep 17 00:00:00 2001 From: logan-keede Date: Sun, 13 Apr 2025 18:03:52 +0530 Subject: [PATCH 03/12] fix: fmt --- datafusion/sql/src/parser.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 3dba4c9615492..d473a4f53d449 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -1266,13 +1266,13 @@ mod tests { "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'"; expect_parse_error( sql, - "sql parser error: Expected: a data type name, found: )", + "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")", ); // negative case: mixed column defs and column names in `PARTITIONED BY` clause let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'"; - expect_parse_error(sql, "sql parser error: Expected ',' or ')' after partition definition, found: int"); + expect_parse_error(sql, "SQL error: ParserError(\"Expected ',' or ')' after partition definition, found: int\")"); // positive case: additional options (one entry) can be specified let sql = @@ -1756,9 +1756,6 @@ mod tests { .parse_statements() .unwrap_err(); - assert_contains!( - err.to_string(), - "sql parser error: recursion limit exceeded" - ); + assert_contains!(err.to_string(), "SQL error: RecursionLimitExceeded"); } } From d2bace2af4e64a58702bceaceb3571fcd9e80d7f Mon Sep 17 00:00:00 2001 From: logan-keede Date: Sun, 13 Apr 2025 21:34:04 +0530 Subject: [PATCH 04/12] fix:clippy --- datafusion/sql/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index d473a4f53d449..ac6b0fb7b00b8 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -269,7 +269,7 @@ impl fmt::Display for Statement { fn ensure_not_set(field: &Option, name: &str) -> Result<(), DataFusionError> { if field.is_some() { - return parser_err!(format!("{name} specified more than once",))?; + parser_err!(format!("{name} specified more than once",))? } Ok(()) } From a0139d3b9ad523f2364ffaac9bcd0980ad0930dc Mon Sep 17 00:00:00 2001 From: logan-keede Date: Sun, 13 Apr 2025 21:43:38 +0530 Subject: [PATCH 05/12] does this fix ci test? --- datafusion/sql/src/parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index ac6b0fb7b00b8..a97a8c03b2945 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -442,11 +442,11 @@ impl<'a> DFParser<'a> { found: TokenWithSpan, ) -> Result { let sql_parser_span = found.span; - sql_err!(parser_err!(format!("Expected {expected}, found: {found}"))?).map_err( + sql_err!(parser_err!(format!("Expected {expected}, found: {}",found.span.start))?).map_err( |e| { let span = Span::try_from_sqlparser_span(sql_parser_span); let mut diagnostic = Diagnostic::new_error(e.to_string(), span); - diagnostic.add_note(format!("Expected {expected}, found: {found}"), span); + diagnostic.add_note(format!("Expected {expected}, found: {}", found.span.start), span); e.with_diagnostic(diagnostic) }, ) From 3150feefed71076751d09e196930b3d133ef6ef0 Mon Sep 17 00:00:00 2001 From: logan-keede Date: Sun, 13 Apr 2025 21:53:27 +0530 Subject: [PATCH 06/12] this fixes sqllogictest --- datafusion/sql/src/parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index a97a8c03b2945..6571487ffd0da 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -442,11 +442,11 @@ impl<'a> DFParser<'a> { found: TokenWithSpan, ) -> Result { let sql_parser_span = found.span; - sql_err!(parser_err!(format!("Expected {expected}, found: {}",found.span.start))?).map_err( + sql_err!(parser_err!(format!("Expected: {expected}, found: {found}{}",found.span.start))?).map_err( |e| { let span = Span::try_from_sqlparser_span(sql_parser_span); let mut diagnostic = Diagnostic::new_error(e.to_string(), span); - diagnostic.add_note(format!("Expected {expected}, found: {}", found.span.start), span); + diagnostic.add_note(format!("Expected: {expected}, found: {found}{}", found.span.start), span); e.with_diagnostic(diagnostic) }, ) From f2d483aa075b5e8465765822ebf5fa59c1a67dfc Mon Sep 17 00:00:00 2001 From: logan-keede Date: Sun, 13 Apr 2025 23:05:13 +0530 Subject: [PATCH 07/12] fix: cargo test --- datafusion/sql/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 6571487ffd0da..76f2aa530418e 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -1272,7 +1272,7 @@ mod tests { // negative case: mixed column defs and column names in `PARTITIONED BY` clause let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'"; - expect_parse_error(sql, "SQL error: ParserError(\"Expected ',' or ')' after partition definition, found: int\")"); + expect_parse_error(sql, "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")"); // positive case: additional options (one entry) can be specified let sql = From 07878dfcae16e5ce44f60c4c105959dd2735341f Mon Sep 17 00:00:00 2001 From: logan-keede Date: Sun, 13 Apr 2025 23:30:38 +0530 Subject: [PATCH 08/12] fix: fmt --- datafusion/sql/src/parser.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 76f2aa530418e..3174384c1b92b 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -442,14 +442,19 @@ impl<'a> DFParser<'a> { found: TokenWithSpan, ) -> Result { let sql_parser_span = found.span; - sql_err!(parser_err!(format!("Expected: {expected}, found: {found}{}",found.span.start))?).map_err( - |e| { - let span = Span::try_from_sqlparser_span(sql_parser_span); - let mut diagnostic = Diagnostic::new_error(e.to_string(), span); - diagnostic.add_note(format!("Expected: {expected}, found: {found}{}", found.span.start), span); - e.with_diagnostic(diagnostic) - }, - ) + sql_err!(parser_err!(format!( + "Expected: {expected}, found: {found}{}", + found.span.start + ))?) + .map_err(|e| { + let span = Span::try_from_sqlparser_span(sql_parser_span); + let mut diagnostic = Diagnostic::new_error(e.to_string(), span); + diagnostic.add_note( + format!("Expected: {expected}, found: {found}{}", found.span.start), + span, + ); + e.with_diagnostic(diagnostic) + }) } /// Parse a new expression From 8ab8c7fcad7449284f8e70019dfd4ae4bee0ec52 Mon Sep 17 00:00:00 2001 From: logan-keede Date: Wed, 16 Apr 2025 02:11:41 +0530 Subject: [PATCH 09/12] add tests --- datafusion/sql/src/parser.rs | 9 +++--- datafusion/sql/tests/cases/diagnostic.rs | 36 ++++++++++++++++++++---- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 3174384c1b92b..69af1355b52e7 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -442,17 +442,18 @@ impl<'a> DFParser<'a> { found: TokenWithSpan, ) -> Result { let sql_parser_span = found.span; - sql_err!(parser_err!(format!( + parser_err!(format!( "Expected: {expected}, found: {found}{}", found.span.start - ))?) + )) .map_err(|e| { + let e: DataFusionError = e.into(); let span = Span::try_from_sqlparser_span(sql_parser_span); - let mut diagnostic = Diagnostic::new_error(e.to_string(), span); - diagnostic.add_note( + let diagnostic = Diagnostic::new_error( format!("Expected: {expected}, found: {found}{}", found.span.start), span, ); + e.with_diagnostic(diagnostic) }) } diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index d08fe787948aa..8d4d1c6466e74 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -20,16 +20,18 @@ use insta::assert_snapshot; use std::{collections::HashMap, sync::Arc}; use datafusion_common::{Diagnostic, Location, Result, Span}; -use datafusion_sql::planner::{ParserOptions, SqlToRel}; +use datafusion_sql::{ + parser::{DFParser, DFParserBuilder}, + planner::{ParserOptions, SqlToRel}, +}; use regex::Regex; -use sqlparser::{dialect::GenericDialect, parser::Parser}; use crate::{MockContextProvider, MockSessionState}; fn do_query(sql: &'static str) -> Diagnostic { - let dialect = GenericDialect {}; - let statement = Parser::new(&dialect) - .try_with_sql(sql) + // let dialect = GenericDialect {}; + let statement = DFParserBuilder::new(sql) + .build() .expect("unable to create parser") .parse_statement() .expect("unable to parse query"); @@ -41,7 +43,7 @@ fn do_query(sql: &'static str) -> Diagnostic { .with_scalar_function(Arc::new(string::concat().as_ref().clone())); let context = MockContextProvider { state }; let sql_to_rel = SqlToRel::new_with_options(&context, options); - match sql_to_rel.sql_statement_to_plan(statement) { + match sql_to_rel.statement_to_plan(statement) { Ok(_) => panic!("expected error"), Err(err) => match err.diagnostic() { Some(diag) => diag.clone(), @@ -366,3 +368,25 @@ fn test_unary_op_plus_with_non_column() -> Result<()> { assert_eq!(diag.span, None); Ok(()) } + +#[test] +fn test_syntax_error() -> Result<()> { + // create a table with a column of type varchar + let query = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 /*int*/int/*int*/) LOCATION 'foo.csv'"; + let spans = get_spans(query); + match DFParser::parse_sql(query) { + Ok(_) => panic!("expected error"), + Err(err) => match err.diagnostic() { + Some(diag) => { + let diag = diag.clone(); + assert_snapshot!(diag.message, @"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 77"); + println!("{:?}", spans); + assert_eq!(diag.span, Some(spans["int"])); + Ok(()) + } + None => { + panic!("expected diagnostic") + } + }, + } +} From 52467c29312202acf0ce81beb7ae81cdcd3a8709 Mon Sep 17 00:00:00 2001 From: logan-keede Date: Wed, 16 Apr 2025 03:08:41 +0530 Subject: [PATCH 10/12] cleanup --- datafusion/sql/tests/cases/diagnostic.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 8d4d1c6466e74..e2e4ada9036b7 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -29,7 +29,6 @@ use regex::Regex; use crate::{MockContextProvider, MockSessionState}; fn do_query(sql: &'static str) -> Diagnostic { - // let dialect = GenericDialect {}; let statement = DFParserBuilder::new(sql) .build() .expect("unable to create parser") From 8aefb89464620212092e619a62e3c0ce035b8ac0 Mon Sep 17 00:00:00 2001 From: logan-keede Date: Wed, 16 Apr 2025 10:33:06 +0530 Subject: [PATCH 11/12] suggestions + expect EOF nicely --- datafusion/sql/src/parser.rs | 12 +++++------- datafusion/sqllogictest/test_files/copy.slt | 2 +- .../test_files/create_external_table.slt | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 69af1355b52e7..21338b8c39982 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -360,7 +360,7 @@ impl<'a> DFParserBuilder<'a> { // Convert TokenizerError -> ParserError let tokens = tokenizer .tokenize_with_location() - .map_err(|e| ParserError::TokenizerError(e.to_string()))?; + .map_err(|e| ParserError::from(e))?; Ok(DFParser { parser: Parser::new(self.dialect) @@ -447,7 +447,7 @@ impl<'a> DFParser<'a> { found.span.start )) .map_err(|e| { - let e: DataFusionError = e.into(); + let e = DataFusionError::from(e); let span = Span::try_from_sqlparser_span(sql_parser_span); let diagnostic = Diagnostic::new_error( format!("Expected: {expected}, found: {found}{}", found.span.start), @@ -577,13 +577,13 @@ impl<'a> DFParser<'a> { if token == Token::EOF || token == Token::SemiColon { break; } else { - return parser_err!(format!("Unexpected token {token}"))?; + return self.expected("end of statement or ;", token)?; } } } let Some(target) = builder.target else { - return parser_err!(format!("Missing TO clause in COPY statement"))?; + return parser_err!("Missing TO clause in COPY statement")?; }; Ok(Statement::CopyTo(CopyToStatement { @@ -929,9 +929,7 @@ impl<'a> DFParser<'a> { if token == Token::EOF || token == Token::SemiColon { break; } else { - return sql_err!(ParserError::ParserError(format!( - "Unexpected token {token}" - ))); + return self.expected("end of statement or ;", token)?; } } } diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt index 925f96bd4ac0c..5eeb05e814ace 100644 --- a/datafusion/sqllogictest/test_files/copy.slt +++ b/datafusion/sqllogictest/test_files/copy.slt @@ -637,7 +637,7 @@ query error DataFusion error: SQL error: ParserError\("Expected: \), found: EOF" COPY (select col2, sum(col1) from source_table # Copy from table with non literal -query error DataFusion error: SQL error: ParserError\("Unexpected token \("\) +query error DataFusion error: SQL error: ParserError\("Expected: end of statement or ;, found: \( at Line: 1, Column: 44"\) COPY source_table to '/tmp/table.parquet' (row_group_size 55 + 102); # Copy using execution.keep_partition_by_columns with an invalid value diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt index bb66aef2514c9..03cb5edb5fcce 100644 --- a/datafusion/sqllogictest/test_files/create_external_table.slt +++ b/datafusion/sqllogictest/test_files/create_external_table.slt @@ -77,7 +77,7 @@ statement error DataFusion error: SQL error: ParserError\("Expected: HEADER, fou CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH LOCATION 'foo.csv'; # Unrecognized random clause -statement error DataFusion error: SQL error: ParserError\("Unexpected token FOOBAR"\) +statement error DataFusion error: SQL error: ParserError\("Expected: end of statement or ;, found: FOOBAR at Line: 1, Column: 47"\) CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV FOOBAR BARBAR BARFOO LOCATION 'foo.csv'; # Missing partition column From 6656f9d33dbc7b20765aba304bdef4e95a8674ab Mon Sep 17 00:00:00 2001 From: logan-keede Date: Wed, 16 Apr 2025 14:39:16 +0530 Subject: [PATCH 12/12] fix: clippy --- datafusion/sql/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 21338b8c39982..a5883783dc968 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -360,7 +360,7 @@ impl<'a> DFParserBuilder<'a> { // Convert TokenizerError -> ParserError let tokens = tokenizer .tokenize_with_location() - .map_err(|e| ParserError::from(e))?; + .map_err(ParserError::from)?; Ok(DFParser { parser: Parser::new(self.dialect)