diff --git a/datafusion/core/tests/sqllogictests/test_files/create_external_table.slt b/datafusion/core/tests/sqllogictests/test_files/create_external_table.slt new file mode 100644 index 0000000000000..b67c6f20b3d8e --- /dev/null +++ b/datafusion/core/tests/sqllogictests/test_files/create_external_table.slt @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +############### +# Error tests # +############### + +# Missing `STORED AS` and `LOCATION` clauses +statement error DataFusion error: SQL error: ParserError\("Missing STORED AS clause in CREATE EXTERNAL TABLE statement"\) +CREATE EXTERNAL TABLE t + +# Missing `STORED AS` clause +statement error DataFusion error: SQL error: ParserError\("Missing STORED AS clause in CREATE EXTERNAL TABLE statement"\) +CREATE EXTERNAL TABLE t LOCATION 'foo.csv' + +# Missing `LOCATION` clause +statement error DataFusion error: SQL error: ParserError\("Missing LOCATION clause in CREATE EXTERNAL TABLE statement"\) +CREATE EXTERNAL TABLE t STORED AS CSV + +# Option value is missing +statement error DataFusion error: SQL error: ParserError\("Expected literal string, found: \)"\) +CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2, k3) LOCATION 'blahblah' + +# Missing `(` in WITH ORDER clause +statement error DataFusion error: SQL error: ParserError\("Expected \(, found: c1"\) +CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER c1 LOCATION 'foo.csv' + +# Missing `)` in WITH ORDER clause +statement error DataFusion error: SQL error: ParserError\("Expected \), found: LOCATION"\) +CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 LOCATION 'foo.csv' + +# Missing `ROW` in WITH HEADER clause +statement error DataFusion error: SQL error: ParserError\("Expected ROW, found: LOCATION"\) +CREATE EXTERNAL TABLE t STORED AS CSV WITH HEADER LOCATION 'abc' + +# Missing `BY` in PARTITIONED clause +statement error DataFusion error: SQL error: ParserError\("Expected BY, found: LOCATION"\) +CREATE EXTERNAL TABLE t STORED AS CSV PARTITIONED LOCATION 'abc' + +# Missing `TYPE` in COMPRESSION clause +statement error DataFusion error: SQL error: ParserError\("Expected TYPE, found: LOCATION"\) +CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION LOCATION 'abc' + +# Invalid compression type +statement error DataFusion error: SQL error: ParserError\("Unsupported file compression type ZZZ"\) +CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION TYPE ZZZ LOCATION 'blahblah' + +# Duplicate `STORED AS` clause +statement error DataFusion error: SQL error: ParserError\("STORED AS specified more than once"\) +CREATE EXTERNAL TABLE t STORED AS CSV STORED AS PARQUET LOCATION 'foo.parquet' + +# Duplicate `LOCATION` clause +statement error DataFusion error: SQL error: ParserError\("LOCATION specified more than once"\) +CREATE EXTERNAL TABLE t STORED AS CSV LOCATION 'foo.csv' LOCATION 'bar.csv' + +# Duplicate `WITH HEADER ROW` clause +statement error DataFusion error: SQL error: ParserError\("WITH HEADER ROW specified more than once"\) +CREATE EXTERNAL TABLE t STORED AS CSV WITH HEADER ROW WITH HEADER ROW LOCATION 'foo.csv' + +# Duplicate `WITH ORDER` clause +statement error DataFusion error: SQL error: ParserError\("WITH ORDER specified more than once"\) +CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) WITH ORDER (c1) LOCATION 'foo.csv' + +# Duplicate `DELIMITER` clause +statement error DataFusion error: SQL error: ParserError\("DELIMITER specified more than once"\) +CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV DELIMITER '-' DELIMITER '+' LOCATION 'foo.csv' + +# Duplicate `COMPRESSION TYPE` clause +statement error DataFusion error: SQL error: ParserError\("COMPRESSION TYPE specified more than once"\) +CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION TYPE BZIP2 COMPRESSION TYPE XZ COMPRESSION TYPE ZSTD COMPRESSION TYPE GZIP LOCATION 'foo.csv' + +# Duplicate `PARTITIONED BY` clause +statement error DataFusion error: SQL error: ParserError\("PARTITIONED BY specified more than once"\) +create EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV PARTITIONED BY (c1) partitioned by (c2) LOCATION 'foo.csv' + +# Duplicate `OPTIONS` clause +statement error DataFusion error: SQL error: ParserError\("OPTIONS specified more than once"\) +CREATE EXTERNAL TABLE t STORED AS CSV OPTIONS ('k1' 'v1', 'k2' 'v2') OPTIONS ('k3' 'v3') LOCATION 'foo.csv' diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 27bfa32501026..1706991c19b0d 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -379,59 +379,92 @@ impl<'a> DFParser<'a> { .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parser.parse_object_name()?; let (columns, _) = self.parse_columns()?; - self.parser - .expect_keywords(&[Keyword::STORED, Keyword::AS])?; - // THIS is the main difference: we parse a different file format. - let file_type = self.parse_file_format()?; - - let has_header = self.parse_csv_has_header(); - - let has_delimiter = self.parse_has_delimiter(); - let delimiter = match has_delimiter { - true => self.parse_delimiter()?, - false => ',', - }; - - let file_compression_type = if self.parse_has_file_compression_type() { - self.parse_file_compression_type()? - } else { - CompressionTypeVariant::UNCOMPRESSED - }; - - let table_partition_cols = if self.parse_has_partition() { - self.parse_partitions()? - } else { - vec![] - }; + #[derive(Default)] + struct Builder { + file_type: Option, + location: Option, + has_header: Option, + delimiter: Option, + file_compression_type: Option, + table_partition_cols: Option>, + order_exprs: Option>, + options: Option>, + } + let mut builder = Builder::default(); - let order_exprs = if self.parse_has_order() { - self.parse_order_by_exprs()? - } else { - vec![] - }; + fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { + if field.is_some() { + return Err(ParserError::ParserError(format!( + "{name} specified more than once", + ))); + } + Ok(()) + } - let options = if self.parse_has_options() { - self.parse_options()? - } else { - HashMap::new() - }; + loop { + if self.parser.parse_keyword(Keyword::STORED) { + self.parser.expect_keyword(Keyword::AS)?; + ensure_not_set(&builder.file_type, "STORED AS")?; + builder.file_type = Some(self.parse_file_format()?); + } else if self.parser.parse_keyword(Keyword::LOCATION) { + ensure_not_set(&builder.location, "LOCATION")?; + builder.location = Some(self.parser.parse_literal_string()?); + } else if self + .parser + .parse_keywords(&[Keyword::WITH, Keyword::HEADER]) + { + self.parser.expect_keyword(Keyword::ROW)?; + ensure_not_set(&builder.has_header, "WITH HEADER ROW")?; + builder.has_header = Some(true); + } else if self.parser.parse_keywords(&[Keyword::WITH, Keyword::ORDER]) { + ensure_not_set(&builder.order_exprs, "WITH ORDER")?; + builder.order_exprs = Some(self.parse_order_by_exprs()?); + } else if self.parser.parse_keyword(Keyword::DELIMITER) { + ensure_not_set(&builder.delimiter, "DELIMITER")?; + builder.delimiter = Some(self.parse_delimiter()?); + } else if self.parser.parse_keyword(Keyword::COMPRESSION) { + self.parser.expect_keyword(Keyword::TYPE)?; + ensure_not_set(&builder.file_compression_type, "COMPRESSION TYPE")?; + builder.file_compression_type = Some(self.parse_file_compression_type()?); + } else if self.parser.parse_keyword(Keyword::PARTITIONED) { + self.parser.expect_keyword(Keyword::BY)?; + ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?; + builder.table_partition_cols = Some(self.parse_partitions()?) + } else if self.parser.parse_keyword(Keyword::OPTIONS) { + ensure_not_set(&builder.options, "OPTIONS")?; + builder.options = Some(self.parse_options()?); + } else { + break; + } + } - self.parser.expect_keyword(Keyword::LOCATION)?; - let location = self.parser.parse_literal_string()?; + // Validations: location and file_type are required + if builder.file_type.is_none() { + return Err(ParserError::ParserError( + "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(), + )); + } + if builder.location.is_none() { + return Err(ParserError::ParserError( + "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(), + )); + } let create = CreateExternalTable { name: table_name.to_string(), columns, - file_type, - has_header, - delimiter, - location, - table_partition_cols, - order_exprs, + file_type: builder.file_type.unwrap(), + has_header: builder.has_header.unwrap_or(false), + delimiter: builder.delimiter.unwrap_or(','), + location: builder.location.unwrap(), + table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]), + order_exprs: builder.order_exprs.unwrap_or(vec![]), if_not_exists, - file_compression_type, - options, + file_compression_type: builder + .file_compression_type + .unwrap_or(CompressionTypeVariant::UNCOMPRESSED), + options: builder.options.unwrap_or(HashMap::new()), }; Ok(Statement::CreateExternalTable(create)) } @@ -456,11 +489,6 @@ impl<'a> DFParser<'a> { } } - fn parse_has_options(&mut self) -> bool { - self.parser.parse_keyword(Keyword::OPTIONS) - } - - // fn parse_options(&mut self) -> Result, ParserError> { let mut options: HashMap = HashMap::new(); self.parser.expect_token(&Token::LParen)?; @@ -483,20 +511,6 @@ impl<'a> DFParser<'a> { Ok(options) } - fn parse_has_file_compression_type(&mut self) -> bool { - self.parser - .parse_keywords(&[Keyword::COMPRESSION, Keyword::TYPE]) - } - - fn parse_csv_has_header(&mut self) -> bool { - self.parser - .parse_keywords(&[Keyword::WITH, Keyword::HEADER, Keyword::ROW]) - } - - fn parse_has_delimiter(&mut self) -> bool { - self.parser.parse_keyword(Keyword::DELIMITER) - } - fn parse_delimiter(&mut self) -> Result { let token = self.parser.parse_literal_string()?; match token.len() { @@ -506,15 +520,6 @@ impl<'a> DFParser<'a> { )), } } - - fn parse_has_partition(&mut self) -> bool { - self.parser - .parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) - } - - fn parse_has_order(&mut self) -> bool { - self.parser.parse_keywords(&[Keyword::WITH, Keyword::ORDER]) - } } #[cfg(test)] @@ -900,48 +905,55 @@ mod tests { }); expect_parse_ok(sql, expected)?; - // Error cases: partition column does not support type - let sql = - "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2, k3) LOCATION 'blahblah'"; - expect_parse_error(sql, "sql parser error: Expected literal string, found: )"); - - // Error cases: partition column does not support type - let sql = - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER c1 LOCATION 'foo.csv'"; - expect_parse_error(sql, "sql parser error: Expected (, found: c1"); - - // Error cases: partition column does not support type - let sql = - "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 LOCATION 'foo.csv'"; - expect_parse_error(sql, "sql parser error: Expected ), found: LOCATION"); - - // Error case: `with header` is an invalid syntax - let sql = "CREATE EXTERNAL TABLE t STORED AS CSV WITH HEADER LOCATION 'abc'"; - expect_parse_error(sql, "sql parser error: Expected LOCATION, found: WITH"); - - // Error case: a single word `partitioned` is invalid - let sql = "CREATE EXTERNAL TABLE t STORED AS CSV PARTITIONED LOCATION 'abc'"; - expect_parse_error( - sql, - "sql parser error: Expected LOCATION, found: PARTITIONED", - ); + // Most complete CREATE EXTERNAL TABLE statement possible + let sql = " + CREATE EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float) + STORED AS PARQUET + DELIMITER '*' + WITH HEADER ROW + WITH ORDER (c1 - c2 ASC) + COMPRESSION TYPE zstd + PARTITIONED BY (c1) + LOCATION 'foo.parquet' + OPTIONS (ROW_GROUP_SIZE '1024', 'TRUNCATE' 'NO') + "; + let expected = Statement::CreateExternalTable(CreateExternalTable { + name: "t".into(), + columns: vec![ + make_column_def("c1", DataType::Int(None)), + make_column_def("c2", DataType::Float(None)), + ], + file_type: "PARQUET".to_string(), + has_header: true, + delimiter: '*', + location: "foo.parquet".into(), + table_partition_cols: vec!["c1".into()], + order_exprs: vec![OrderByExpr { + expr: Expr::BinaryOp { + left: Box::new(Identifier(Ident { + value: "c1".to_owned(), + quote_style: None, + })), + op: BinaryOperator::Minus, + right: Box::new(Identifier(Ident { + value: "c2".to_owned(), + quote_style: None, + })), + }, + asc: Some(true), + nulls_first: None, + }], + if_not_exists: true, + file_compression_type: CompressionTypeVariant::ZSTD, + options: HashMap::from([ + ("ROW_GROUP_SIZE".into(), "1024".into()), + ("TRUNCATE".into(), "NO".into()), + ]), + }); + expect_parse_ok(sql, expected)?; - // Error case: a single word `compression` is invalid - let sql = "CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION LOCATION 'abc'"; - expect_parse_error( - sql, - "sql parser error: Expected LOCATION, found: COMPRESSION", - ); + // For error cases, see: `create_external_table.slt` Ok(()) } - - #[test] - fn invalid_compression_type() { - let sql = "CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION TYPE ZZZ LOCATION 'blahblah'"; - expect_parse_error( - sql, - "sql parser error: Unsupported file compression type ZZZ", - ) - } }