diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 20cb0967022f8..2fc8bb45888e9 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -47,4 +47,4 @@ ordered-float = "3.0" parquet = { version = "20.0.0", features = ["arrow"], optional = true } pyo3 = { version = "0.16", optional = true } serde_json = "1.0" -sqlparser = "0.20" +sqlparser = "0.21" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 189e53d637df4..121857028ae55 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -85,7 +85,7 @@ pyo3 = { version = "0.16", optional = true } rand = "0.8" rayon = { version = "1.5", optional = true } smallvec = { version = "1.6", features = ["union"] } -sqlparser = "0.20" +sqlparser = "0.21" tempfile = "3" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } tokio-stream = "0.1" diff --git a/datafusion/core/tests/sql/functions.rs b/datafusion/core/tests/sql/functions.rs index 88c00b45ef1cf..e780e7e0f81a0 100644 --- a/datafusion/core/tests/sql/functions.rs +++ b/datafusion/core/tests/sql/functions.rs @@ -130,17 +130,17 @@ async fn query_array() -> Result<()> { let ctx = SessionContext::new(); ctx.register_table("test", Arc::new(table))?; - let sql = "SELECT array(c1, cast(c2 as varchar)) FROM test"; + let sql = "SELECT make_array(c1, cast(c2 as varchar)) FROM test"; let actual = execute_to_batches(&ctx, sql).await; let expected = vec![ - "+--------------------------------------+", - "| array(test.c1,CAST(test.c2 AS Utf8)) |", - "+--------------------------------------+", - "| [, 0] |", - "| [a, 1] |", - "| [aa, ] |", - "| [aaa, 3] |", - "+--------------------------------------+", + "+------------------------------------------+", + "| makearray(test.c1,CAST(test.c2 AS Utf8)) |", + "+------------------------------------------+", + "| [, 0] |", + "| [a, 1] |", + "| [aa, ] |", + "| [aaa, 3] |", + "+------------------------------------------+", ]; assert_batches_eq!(expected, &actual); Ok(()) @@ -150,14 +150,14 @@ async fn query_array() -> Result<()> { async fn query_array_scalar() -> Result<()> { let ctx = SessionContext::new(); - let sql = "SELECT array(1, 2, 3);"; + let sql = "SELECT make_array(1, 2, 3);"; let actual = execute_to_batches(&ctx, sql).await; let expected = vec![ - "+-----------------------------------+", - "| array(Int64(1),Int64(2),Int64(3)) |", - "+-----------------------------------+", - "| [1, 2, 3] |", - "+-----------------------------------+", + "+---------------------------------------+", + "| makearray(Int64(1),Int64(2),Int64(3)) |", + "+---------------------------------------+", + "| [1, 2, 3] |", + "+---------------------------------------+", ]; assert_batches_eq!(expected, &actual); Ok(()) diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 6d368c63cb3ad..3fcd7d5d1e306 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -38,4 +38,4 @@ path = "src/lib.rs" ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } arrow = { version = "20.0.0", features = ["prettyprint"] } datafusion-common = { path = "../common", version = "11.0.0" } -sqlparser = "0.20" +sqlparser = "0.21" diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 532699a37cbbb..fa3adbab681cd 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -73,7 +73,7 @@ pub enum BuiltinScalarFunction { // string functions /// construct an array from columns - Array, + MakeArray, /// ascii Ascii, /// bit_length @@ -204,7 +204,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Sqrt => Volatility::Immutable, BuiltinScalarFunction::Tan => Volatility::Immutable, BuiltinScalarFunction::Trunc => Volatility::Immutable, - BuiltinScalarFunction::Array => Volatility::Immutable, + BuiltinScalarFunction::MakeArray => Volatility::Immutable, BuiltinScalarFunction::Ascii => Volatility::Immutable, BuiltinScalarFunction::BitLength => Volatility::Immutable, BuiltinScalarFunction::Btrim => Volatility::Immutable, @@ -297,8 +297,10 @@ impl FromStr for BuiltinScalarFunction { // conditional functions "coalesce" => BuiltinScalarFunction::Coalesce, + // array functions + "make_array" => BuiltinScalarFunction::MakeArray, + // string functions - "array" => BuiltinScalarFunction::Array, "ascii" => BuiltinScalarFunction::Ascii, "bit_length" => BuiltinScalarFunction::BitLength, "btrim" => BuiltinScalarFunction::Btrim, diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 09ac0c2870413..d23a14ced17b1 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -384,7 +384,7 @@ unary_scalar_expr!(ArrowTypeof, arrow_typeof, "data type"); /// Returns an array of fixed size with each argument on it. pub fn array(args: Vec) -> Expr { Expr::ScalarFunction { - fun: built_in_function::BuiltinScalarFunction::Array, + fun: built_in_function::BuiltinScalarFunction::MakeArray, args, } } diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index 5cf42fbd21243..263c4540f37ca 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -96,7 +96,7 @@ pub fn return_type( // the return type of the built in function. // Some built-in functions' return type depends on the incoming type. match fun { - BuiltinScalarFunction::Array => Ok(DataType::FixedSizeList( + BuiltinScalarFunction::MakeArray => Ok(DataType::FixedSizeList( Box::new(Field::new("item", input_expr_types[0].clone(), true)), input_expr_types.len() as i32, )), @@ -269,7 +269,7 @@ pub fn signature(fun: &BuiltinScalarFunction) -> Signature { // for now, the list is small, as we do not have many built-in functions. match fun { - BuiltinScalarFunction::Array => Signature::variadic( + BuiltinScalarFunction::MakeArray => Signature::variadic( array_expressions::SUPPORTED_ARRAY_TYPES.to_vec(), fun.volatility(), ), diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index dde0ee0a06bef..a34c1ab90c5d2 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -322,7 +322,7 @@ pub fn create_physical_fun( } // string functions - BuiltinScalarFunction::Array => Arc::new(array_expressions::array), + BuiltinScalarFunction::MakeArray => Arc::new(array_expressions::array), BuiltinScalarFunction::Struct => Arc::new(struct_expressions::struct_expr), BuiltinScalarFunction::Ascii => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { @@ -2737,7 +2737,7 @@ mod tests { let execution_props = ExecutionProps::new(); let expr = create_physical_expr( - &BuiltinScalarFunction::Array, + &BuiltinScalarFunction::MakeArray, &[col("a", &schema)?, col("b", &schema)?], &schema, &execution_props, diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index 524b03bd69333..97b21c4cdcbb0 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -431,7 +431,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Ltrim => Self::Ltrim, ScalarFunction::Rtrim => Self::Rtrim, ScalarFunction::ToTimestamp => Self::ToTimestamp, - ScalarFunction::Array => Self::Array, + ScalarFunction::Array => Self::MakeArray, ScalarFunction::NullIf => Self::NullIf, ScalarFunction::DatePart => Self::DatePart, ScalarFunction::DateTrunc => Self::DateTrunc, diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index 045b97a3188df..a022769dcab19 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -1083,7 +1083,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Ltrim => Self::Ltrim, BuiltinScalarFunction::Rtrim => Self::Rtrim, BuiltinScalarFunction::ToTimestamp => Self::ToTimestamp, - BuiltinScalarFunction::Array => Self::Array, + BuiltinScalarFunction::MakeArray => Self::Array, BuiltinScalarFunction::NullIf => Self::NullIf, BuiltinScalarFunction::DatePart => Self::DatePart, BuiltinScalarFunction::DateTrunc => Self::DateTrunc, diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 6ad1da9e7fef1..1b0c9446e247f 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -42,5 +42,5 @@ arrow = { version = "20.0.0", features = ["prettyprint"] } datafusion-common = { path = "../common", version = "11.0.0" } datafusion-expr = { path = "../expr", version = "11.0.0" } hashbrown = "0.12" -sqlparser = "0.20" +sqlparser = "0.21" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 28c82f80246f9..aaab12e267f66 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -241,6 +241,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { })) } + Statement::ShowTables { + extended, + full, + db_name, + filter, + } => self.show_tables_to_plan(extended, full, db_name, filter), + Statement::ShowColumns { extended, full, @@ -254,6 +261,35 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } } + /// Generate a logical plan from a "SHOW TABLES" query + fn show_tables_to_plan( + &self, + extended: bool, + full: bool, + db_name: Option, + filter: Option, + ) -> Result { + if self.has_table("information_schema", "tables") { + // we only support the basic "SHOW TABLES" + // https://github.com/apache/arrow-datafusion/issues/3188 + if db_name.is_some() || filter.is_some() || full || extended { + Err(DataFusionError::Plan( + "Unsupported parameters to SHOW TABLES".to_string(), + )) + } else { + let query = "SELECT * FROM information_schema.tables;"; + let mut rewrite = DFParser::parse_sql(query)?; + assert_eq!(rewrite.len(), 1); + self.statement_to_plan(rewrite.pop_front().unwrap()) + } + } else { + Err(DataFusionError::Plan( + "SHOW TABLES is not supported unless information_schema is enabled" + .to_string(), + )) + } + } + /// Generate a logical plan from an SQL query pub fn query_to_plan( &self, @@ -1550,8 +1586,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { BinaryOperator::Modulo => Ok(Operator::Modulo), BinaryOperator::And => Ok(Operator::And), BinaryOperator::Or => Ok(Operator::Or), - BinaryOperator::Like => Ok(Operator::Like), - BinaryOperator::NotLike => Ok(Operator::NotLike), BinaryOperator::PGRegexMatch => Ok(Operator::RegexMatch), BinaryOperator::PGRegexIMatch => Ok(Operator::RegexIMatch), BinaryOperator::PGRegexNotMatch => Ok(Operator::RegexNotMatch), @@ -1896,6 +1930,33 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { }) } + SQLExpr::Like { negated, expr, pattern, escape_char } => { + match escape_char { + Some(_) => { + // to support this we will need to introduce `Expr::Like` instead + // of treating it like a binary expression + Err(DataFusionError::NotImplemented("LIKE with ESCAPE is not yet supported".to_string())) + }, + _ => { + Ok(Expr::BinaryExpr { + left: Box::new(self.sql_expr_to_logical_expr(*expr, schema, ctes)?), + op: if negated { Operator::NotLike } else { Operator::Like }, + right: Box::new(self.sql_expr_to_logical_expr(*pattern, schema, ctes)?), + }) + } + } + } + + SQLExpr::ILike { .. } => { + // https://github.com/apache/arrow-datafusion/issues/3099 + Err(DataFusionError::NotImplemented("ILIKE is not yet supported".to_string())) + } + + SQLExpr::SimilarTo { .. } => { + // https://github.com/apache/arrow-datafusion/issues/3099 + Err(DataFusionError::NotImplemented("SIMILAR TO is not yet supported".to_string())) + } + SQLExpr::BinaryOp { left, op, @@ -1960,21 +2021,21 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { )) } - SQLExpr::Trim { expr, trim_where } => { - let (fun, where_expr) = match trim_where { - Some((TrimWhereField::Leading, expr)) => { - (BuiltinScalarFunction::Ltrim, Some(expr)) + SQLExpr::Trim { expr, trim_where, trim_what } => { + let fun = match trim_where { + Some(TrimWhereField::Leading) => { + BuiltinScalarFunction::Ltrim } - Some((TrimWhereField::Trailing, expr)) => { - (BuiltinScalarFunction::Rtrim, Some(expr)) + Some(TrimWhereField::Trailing) => { + BuiltinScalarFunction::Rtrim } - Some((TrimWhereField::Both, expr)) => { - (BuiltinScalarFunction::Btrim, Some(expr)) + Some(TrimWhereField::Both) => { + BuiltinScalarFunction::Btrim } - None => (BuiltinScalarFunction::Trim, None), + None => BuiltinScalarFunction::Trim }; let arg = self.sql_expr_to_logical_expr(*expr, schema, ctes)?; - let args = match where_expr { + let args = match trim_what { Some(to_trim) => { let to_trim = self.sql_expr_to_logical_expr(*to_trim, schema, ctes)?; vec![arg, to_trim] @@ -2261,26 +2322,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } fn show_variable_to_plan(&self, variable: &[Ident]) -> Result { - // Special case SHOW TABLES let variable = ObjectName(variable.to_vec()).to_string(); - if variable.as_str().eq_ignore_ascii_case("tables") { - if self.has_table("information_schema", "tables") { - let query = "SELECT * FROM information_schema.tables;"; - let mut rewrite = DFParser::parse_sql(query)?; - assert_eq!(rewrite.len(), 1); - self.statement_to_plan(rewrite.pop_front().unwrap()) - } else { - Err(DataFusionError::Plan( - "SHOW TABLES is not supported unless information_schema is enabled" - .to_string(), - )) - } - } else { - Err(DataFusionError::NotImplemented(format!( - "SHOW {} not implemented. Supported syntax: SHOW ", - variable - ))) - } + Err(DataFusionError::NotImplemented(format!( + "SHOW {} not implemented. Supported syntax: SHOW ", + variable + ))) } fn show_columns_to_plan(