From dc53f30ae7079a987d4f2dded6d06818486fd17f Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 30 Apr 2025 14:06:52 -0400 Subject: [PATCH 01/18] Intermediate work on setting up sql feature flags throughout repo --- Cargo.lock | 11 ------- Cargo.toml | 4 +-- benchmarks/Cargo.toml | 2 +- datafusion/catalog/Cargo.toml | 1 - datafusion/common/Cargo.toml | 1 + datafusion/core/Cargo.toml | 2 +- datafusion/core/src/dataframe/mod.rs | 8 ++--- datafusion/core/src/execution/context/mod.rs | 3 ++ .../core/src/execution/session_state.rs | 23 ++++++++++++--- datafusion/core/src/lib.rs | 3 ++ datafusion/core/src/physical_planner.rs | 5 +--- datafusion/core/src/test_util/mod.rs | 1 + datafusion/core/tests/expr_api/mod.rs | 2 +- datafusion/execution/Cargo.toml | 7 +++-- datafusion/expr/Cargo.toml | 6 ++-- datafusion/expr/src/expr.rs | 29 ++++++++++++++++++- datafusion/expr/src/expr_fn.rs | 5 ++-- datafusion/expr/src/lib.rs | 2 ++ datafusion/expr/src/planner.rs | 3 +- datafusion/optimizer/Cargo.toml | 2 +- datafusion/physical-expr-common/Cargo.toml | 2 +- datafusion/physical-expr/Cargo.toml | 2 +- datafusion/physical-optimizer/Cargo.toml | 2 +- datafusion/physical-plan/Cargo.toml | 2 +- datafusion/proto/Cargo.toml | 2 +- datafusion/session/Cargo.toml | 10 ------- datafusion/sql/Cargo.toml | 2 +- datafusion/sql/src/expr/function.rs | 8 ++--- datafusion/wasmtest/Cargo.toml | 2 +- 29 files changed, 91 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 933a0ee44a76d..10310734029ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1957,7 +1957,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -2639,22 +2638,12 @@ dependencies = [ name = "datafusion-session" version = "49.0.2" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 988f382515ce3..375c5d6a8839b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -121,8 +121,8 @@ datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "49. datafusion-datasource-json = { path = "datafusion/datasource-json", version = "49.0.0", default-features = false } datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "49.0.0", default-features = false } datafusion-doc = { path = "datafusion/doc", version = "49.0.0" } -datafusion-execution = { path = "datafusion/execution", version = "49.0.0" } -datafusion-expr = { path = "datafusion/expr", version = "49.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "49.0.0", default-features = false } +datafusion-expr = { path = "datafusion/expr", version = "49.0.0", default-features = false } datafusion-expr-common = { path = "datafusion/expr-common", version = "49.0.0" } datafusion-ffi = { path = "datafusion/ffi", version = "49.0.0" } datafusion-functions = { path = "datafusion/functions", version = "49.0.0" } diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 5a846cb49e0c7..01c657c1f71b9 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -38,7 +38,7 @@ mimalloc_extended = ["libmimalloc-sys/extended"] [dependencies] arrow = { workspace = true } datafusion = { workspace = true, default-features = true } -datafusion-common = { workspace = true, default-features = true } +datafusion-common = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } libmimalloc-sys = { version = "0.1", optional = true } diff --git a/datafusion/catalog/Cargo.toml b/datafusion/catalog/Cargo.toml index 7307c4de87a8a..5edb1c4a68786 100644 --- a/datafusion/catalog/Cargo.toml +++ b/datafusion/catalog/Cargo.toml @@ -42,7 +42,6 @@ datafusion-expr = { workspace = true } datafusion-physical-expr = { workspace = true } datafusion-physical-plan = { workspace = true } datafusion-session = { workspace = true } -datafusion-sql = { workspace = true } futures = { workspace = true } itertools = { workspace = true } log = { workspace = true } diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index aea5e51befb09..052c858b4a8bc 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -49,6 +49,7 @@ pyarrow = ["pyo3", "arrow/pyarrow", "parquet"] force_hash_collisions = [] recursive_protection = ["dep:recursive"] parquet = ["dep:parquet"] +sql = ["sqlparser"] [dependencies] ahash = { workspace = true } diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 21c6e38945ee7..2083428139d95 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -133,7 +133,7 @@ datafusion-physical-expr-common = { workspace = true } datafusion-physical-optimizer = { workspace = true } datafusion-physical-plan = { workspace = true } datafusion-session = { workspace = true } -datafusion-sql = { workspace = true } +datafusion-sql = { workspace = true, optional = true } flate2 = { version = "1.1.2", optional = true } futures = { workspace = true } hex = { workspace = true, optional = true } diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 7a3739d36cb2e..b0bdc5b7bed16 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -50,10 +50,7 @@ use arrow::array::{Array, ArrayRef, Int64Array, StringArray}; use arrow::compute::{cast, concat}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_common::config::{CsvOptions, JsonOptions}; -use datafusion_common::{ - exec_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, - DataFusionError, ParamValues, ScalarValue, SchemaError, UnnestOptions, -}; +use datafusion_common::{exec_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaError, TableReference, UnnestOptions}; use datafusion_expr::select_expr::SelectExpr; use datafusion_expr::{ case, @@ -70,7 +67,6 @@ use datafusion_functions_aggregate::expr_fn::{ use async_trait::async_trait; use datafusion_catalog::Session; -use datafusion_sql::TableReference; /// Contains options that control how data is /// written out from a DataFrame @@ -267,6 +263,7 @@ impl DataFrame { /// # Ok(()) /// # } /// ``` + #[cfg(feature = "sql")] pub fn parse_sql_expr(&self, sql: &str) -> Result { let df_schema = self.schema(); @@ -333,6 +330,7 @@ impl DataFrame { /// # Ok(()) /// # } /// ``` + #[cfg(feature = "sql")] pub fn select_exprs(self, exprs: &[&str]) -> Result { let expr_list = exprs .iter() diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index de1d40dda3493..e7a66c4f9e8e2 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -585,6 +585,7 @@ impl SessionContext { /// # Ok(()) /// # } /// ``` + #[cfg(feature = "sql")] pub async fn sql(&self, sql: &str) -> Result { self.sql_with_options(sql, SQLOptions::new()).await } @@ -615,6 +616,7 @@ impl SessionContext { /// # Ok(()) /// # } /// ``` + #[cfg(feature = "sql")] pub async fn sql_with_options( &self, sql: &str, @@ -648,6 +650,7 @@ impl SessionContext { /// # Ok(()) /// # } /// ``` + #[cfg(feature = "sql")] pub fn parse_sql_expr(&self, sql: &str, df_schema: &DFSchema) -> Result { self.state.read().create_logical_expr(sql, df_schema) } diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index a7b3bdeeace84..cde92fe526a91 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -69,16 +69,22 @@ use datafusion_physical_optimizer::optimizer::PhysicalOptimizer; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::ExecutionPlan; use datafusion_session::Session; -use datafusion_sql::parser::{DFParserBuilder, Statement}; -use datafusion_sql::planner::{ContextProvider, ParserOptions, PlannerContext, SqlToRel}; +#[cfg(feature = "sql")] +use datafusion_sql::{ + parser::{DFParserBuilder, Statement}, + planner::{ContextProvider, ParserOptions, PlannerContext, SqlToRel}, +}; use async_trait::async_trait; use chrono::{DateTime, Utc}; use itertools::Itertools; use log::{debug, info}; use object_store::ObjectStore; -use sqlparser::ast::{Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias}; -use sqlparser::dialect::dialect_from_str; +#[cfg(feature = "sql")] +use sqlparser::{ + ast::{Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias}, + dialect::dialect_from_str, +}; use url::Url; use uuid::Uuid; @@ -358,6 +364,7 @@ impl SessionState { /// [`Statement`]. See [`SessionContext::sql`] for running queries. /// /// [`SessionContext::sql`]: crate::execution::context::SessionContext::sql + #[cfg(feature = "sql")] pub fn sql_to_statement( &self, sql: &str, @@ -394,6 +401,7 @@ impl SessionState { /// parse a sql string into a sqlparser-rs AST [`SQLExpr`]. /// /// See [`Self::create_logical_expr`] for parsing sql to [`Expr`]. + #[cfg(feature = "sql")] pub fn sql_to_expr( &self, sql: &str, @@ -405,6 +413,7 @@ impl SessionState { /// parse a sql string into a sqlparser-rs AST [`SQLExprWithAlias`]. /// /// See [`Self::create_logical_expr`] for parsing sql to [`Expr`]. + #[cfg(feature = "sql")] pub fn sql_to_expr_with_alias( &self, sql: &str, @@ -433,6 +442,7 @@ impl SessionState { /// See [`datafusion_sql::resolve::resolve_table_references`] for more information. /// /// [`datafusion_sql::resolve::resolve_table_references`]: datafusion_sql::resolve::resolve_table_references + #[cfg(feature = "sql")] pub fn resolve_table_references( &self, statement: &Statement, @@ -447,6 +457,7 @@ impl SessionState { } /// Convert an AST Statement into a LogicalPlan + #[cfg(feature = "sql")] pub async fn statement_to_plan( &self, statement: Statement, @@ -474,6 +485,7 @@ impl SessionState { query.statement_to_plan(statement) } + #[cfg(feature = "sql")] fn get_parser_options(&self) -> ParserOptions { let sql_parser_options = &self.config.options().sql_parser; @@ -504,6 +516,7 @@ impl SessionState { /// /// [`SessionContext::sql`]: crate::execution::context::SessionContext::sql /// [`SessionContext::sql_with_options`]: crate::execution::context::SessionContext::sql_with_options + #[cfg(feature = "sql")] pub async fn create_logical_plan( &self, sql: &str, @@ -517,6 +530,7 @@ impl SessionState { /// Creates a datafusion style AST [`Expr`] from a SQL string. /// /// See example on [SessionContext::parse_sql_expr](crate::execution::context::SessionContext::parse_sql_expr) + #[cfg(feature = "sql")] pub fn create_logical_expr( &self, sql: &str, @@ -1639,6 +1653,7 @@ struct SessionContextProvider<'a> { tables: HashMap>, } +#[cfg(feature = "sql")] impl ContextProvider for SessionContextProvider<'_> { fn get_expr_planners(&self) -> &[Arc] { self.state.expr_planners() diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index bb69d0c85a5c4..6640d4158c6b3 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -734,6 +734,8 @@ pub const DATAFUSION_VERSION: &str = env!("CARGO_PKG_VERSION"); extern crate core; + +#[cfg(feature = "sql")] extern crate sqlparser; pub mod dataframe; @@ -820,6 +822,7 @@ pub use datafusion_common::assert_batches_eq; pub use datafusion_common::assert_batches_sorted_eq; /// re-export of [`datafusion_sql`] crate +#[cfg(feature = "sql")] pub mod sql { pub use datafusion_sql::*; } diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 6618d9495d78f..0fae3aacd5b10 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -71,10 +71,7 @@ use datafusion_common::{ use datafusion_datasource::file_groups::FileGroup; use datafusion_datasource::memory::MemorySourceConfig; use datafusion_expr::dml::{CopyTo, InsertOp}; -use datafusion_expr::expr::{ - physical_name, AggregateFunction, AggregateFunctionParams, Alias, GroupingSet, - WindowFunction, WindowFunctionParams, -}; +use datafusion_expr::expr::{physical_name, AggregateFunction, AggregateFunctionParams, Alias, GroupingSet, NullTreatment, WindowFunction, WindowFunctionParams}; use datafusion_expr::expr_rewriter::unnormalize_cols; use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary; use datafusion_expr::{ diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs index 2f8e66a2bbfbb..299b73ccbe40c 100644 --- a/datafusion/core/src/test_util/mod.rs +++ b/datafusion/core/src/test_util/mod.rs @@ -135,6 +135,7 @@ pub async fn test_table() -> Result { } /// Execute SQL and return results +#[cfg(feature = "sql")] pub async fn plan_and_collect( ctx: &SessionContext, sql: &str, diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index a9cf7f04bb3a2..cc48c4312e599 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -31,9 +31,9 @@ use datafusion_functions_aggregate::first_last::first_value_udaf; use datafusion_functions_aggregate::sum::sum_udaf; use datafusion_functions_nested::expr_ext::{IndexAccessor, SliceAccessor}; use datafusion_optimizer::simplify_expressions::ExprSimplifier; -use sqlparser::ast::NullTreatment; /// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan use std::sync::{Arc, LazyLock}; +use datafusion_expr::expr::NullTreatment; mod parse_sql_expr; mod simplification; diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml index afe9039f8baed..67a37a86c7066 100644 --- a/datafusion/execution/Cargo.toml +++ b/datafusion/execution/Cargo.toml @@ -38,16 +38,19 @@ workspace = true name = "datafusion_execution" [features] +default = ["sql"] + parquet_encryption = [ "parquet/encryption", ] +sql = [] [dependencies] arrow = { workspace = true } async-trait = { workspace = true } dashmap = { workspace = true } -datafusion-common = { workspace = true, default-features = true } -datafusion-expr = { workspace = true } +datafusion-common = { workspace = true, default-features = false } +datafusion-expr = { workspace = true, default-features = false } futures = { workspace = true } log = { workspace = true } object_store = { workspace = true, features = ["fs"] } diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 812544587bf9d..108e1ed30fa86 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -38,13 +38,15 @@ workspace = true name = "datafusion_expr" [features] +default = ["sql"] recursive_protection = ["dep:recursive"] +sql = ["sqlparser"] [dependencies] arrow = { workspace = true } async-trait = { workspace = true } chrono = { workspace = true } -datafusion-common = { workspace = true } +datafusion-common = { workspace = true, default-features = false } datafusion-doc = { workspace = true } datafusion-expr-common = { workspace = true } datafusion-functions-aggregate-common = { workspace = true } @@ -54,7 +56,7 @@ indexmap = { workspace = true } paste = "^1.0" recursive = { workspace = true, optional = true } serde_json = { workspace = true } -sqlparser = { workspace = true } +sqlparser = { workspace = true, optional = true } [dev-dependencies] ctor = { workspace = true } diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index f2493abbdea4b..4d88b37217045 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -41,9 +41,36 @@ use datafusion_common::{ use datafusion_functions_window_common::field::WindowUDFFieldArgs; use sqlparser::ast::{ display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, - NullTreatment, RenameSelectItem, ReplaceSelectElement, + RenameSelectItem, ReplaceSelectElement, }; +// This mirrors sqlparser::ast::NullTreatment but we need our own variant +// for when the sql feature is disabled. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] +pub enum NullTreatment { + IgnoreNulls, + RespectNulls, +} + +impl Display for NullTreatment { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_str(match self { + NullTreatment::IgnoreNulls => "IGNORE NULLS", + NullTreatment::RespectNulls => "RESPECT NULLS", + }) + } +} + +impl From for NullTreatment { + fn from(value: sqlparser::ast::NullTreatment) -> Self { + match value { + sqlparser::ast::NullTreatment::IgnoreNulls => Self::IgnoreNulls, + sqlparser::ast::NullTreatment::RespectNulls => Self::RespectNulls, + } + } +} + + /// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`. /// /// For example the expression `A + 1` will be represented as diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 63b4333f218ae..9da97720235d2 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -19,7 +19,7 @@ use crate::expr::{ AggregateFunction, BinaryExpr, Cast, Exists, GroupingSet, InList, InSubquery, - Placeholder, TryCast, Unnest, WildcardOptions, WindowFunction, + NullTreatment, Placeholder, TryCast, Unnest, WildcardOptions, WindowFunction, }; use crate::function::{ AccumulatorArgs, AccumulatorFactoryFunction, PartitionEvaluatorFactory, @@ -42,7 +42,6 @@ use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::{plan_err, Column, Result, ScalarValue, Spans, TableReference}; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; -use sqlparser::ast::NullTreatment; use std::any::Any; use std::fmt::Debug; use std::hash::Hash; @@ -716,8 +715,8 @@ pub fn interval_month_day_nano_lit(value: &str) -> Expr { /// # Example /// ```no_run /// # use datafusion_common::Result; +/// # use datafusion_expr::expr::NullTreatment; /// # use datafusion_expr::test::function_stub::count; -/// # use sqlparser::ast::NullTreatment; /// # use datafusion_expr::{ExprFunctionExt, lit, Expr, col}; /// # // first_value is an aggregate function in another crate /// # fn first_value(_arg: Expr) -> Expr { diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index b4ad8387215ec..c35654345e9fa 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -34,6 +34,8 @@ //! //! The [expr_fn] module contains functions for creating expressions. +extern crate core; + mod literal; mod operation; mod partition_evaluator; diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index 669931d7bae7b..d2aa7ab43d8c0 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -25,12 +25,13 @@ use datafusion_common::{ config::ConfigOptions, file_options::file_type::FileType, not_impl_err, DFSchema, Result, TableReference, }; -use sqlparser::ast::{self, NullTreatment}; +use sqlparser::ast; use crate::{ AggregateUDF, Expr, GetFieldAccess, ScalarUDF, SortExpr, TableSource, WindowFrame, WindowFunctionDefinition, WindowUDF, }; +use crate::expr::NullTreatment; /// Provides the `SQL` query planner meta-data about tables and /// functions referenced in SQL statements, without a direct dependency on the diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index f10510e0973c3..2b799f437bdad 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -43,7 +43,7 @@ recursive_protection = ["dep:recursive"] [dependencies] arrow = { workspace = true } chrono = { workspace = true } -datafusion-common = { workspace = true, default-features = true } +datafusion-common = { workspace = true } datafusion-expr = { workspace = true } datafusion-expr-common = { workspace = true } datafusion-physical-expr = { workspace = true } diff --git a/datafusion/physical-expr-common/Cargo.toml b/datafusion/physical-expr-common/Cargo.toml index a5a12b5527b7d..58dc767dbad2a 100644 --- a/datafusion/physical-expr-common/Cargo.toml +++ b/datafusion/physical-expr-common/Cargo.toml @@ -40,7 +40,7 @@ name = "datafusion_physical_expr_common" [dependencies] ahash = { workspace = true } arrow = { workspace = true } -datafusion-common = { workspace = true, default-features = true } +datafusion-common = { workspace = true } datafusion-expr-common = { workspace = true } hashbrown = { workspace = true } itertools = { workspace = true } diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index c0eaa3e205280..194822a269216 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -40,7 +40,7 @@ name = "datafusion_physical_expr" [dependencies] ahash = { workspace = true } arrow = { workspace = true } -datafusion-common = { workspace = true, default-features = true } +datafusion-common = { workspace = true } datafusion-expr = { workspace = true } datafusion-expr-common = { workspace = true } datafusion-functions-aggregate-common = { workspace = true } diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml index c7795418bf10a..0b024fe920c8d 100644 --- a/datafusion/physical-optimizer/Cargo.toml +++ b/datafusion/physical-optimizer/Cargo.toml @@ -39,7 +39,7 @@ recursive_protection = ["dep:recursive"] [dependencies] arrow = { workspace = true } -datafusion-common = { workspace = true, default-features = true } +datafusion-common = { workspace = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-expr-common = { workspace = true, default-features = true } diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 830a159d5eb15..fbb0a08cff8f5 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -49,7 +49,7 @@ arrow-ord = { workspace = true } arrow-schema = { workspace = true } async-trait = { workspace = true } chrono = { workspace = true } -datafusion-common = { workspace = true, default-features = true } +datafusion-common = { workspace = true } datafusion-common-runtime = { workspace = true, default-features = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index c95f392a051a5..b64c175891088 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -47,7 +47,7 @@ avro = ["datafusion/avro", "datafusion-common/avro"] arrow = { workspace = true } chrono = { workspace = true } datafusion = { workspace = true, default-features = true } -datafusion-common = { workspace = true, default-features = true } +datafusion-common = { workspace = true } datafusion-expr = { workspace = true } datafusion-proto-common = { workspace = true } object_store = { workspace = true } diff --git a/datafusion/session/Cargo.toml b/datafusion/session/Cargo.toml index c6e268735a7b3..09bfbe1c48427 100644 --- a/datafusion/session/Cargo.toml +++ b/datafusion/session/Cargo.toml @@ -31,22 +31,12 @@ version.workspace = true all-features = true [dependencies] -arrow = { workspace = true } async-trait = { workspace = true } -dashmap = { workspace = true } datafusion-common = { workspace = true } -datafusion-common-runtime = { workspace = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } -datafusion-physical-expr = { workspace = true } datafusion-physical-plan = { workspace = true } -datafusion-sql = { workspace = true } -futures = { workspace = true } -itertools = { workspace = true } -log = { workspace = true } -object_store = { workspace = true } parking_lot = { workspace = true } -tokio = { workspace = true } [lints] workspace = true diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index eca40c553280b..78603a1b2fd13 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -50,7 +50,7 @@ recursive_protection = ["dep:recursive"] [dependencies] arrow = { workspace = true } bigdecimal = { workspace = true } -datafusion-common = { workspace = true, default-features = true } +datafusion-common = { workspace = true } datafusion-expr = { workspace = true } indexmap = { workspace = true } log = { workspace = true } diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 5a47c56f3488a..5c4a55e46ed5a 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -22,13 +22,13 @@ use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema, Dependency, Diagnostic, Result, Span, }; -use datafusion_expr::expr::{ScalarFunction, Unnest, WildcardOptions, WindowFunction}; +use datafusion_expr::expr::{NullTreatment, ScalarFunction, Unnest, WildcardOptions, WindowFunction}; use datafusion_expr::planner::{PlannerResult, RawAggregateExpr, RawWindowExpr}; use datafusion_expr::{expr, Expr, ExprSchemable, WindowFrame, WindowFunctionDefinition}; use sqlparser::ast::{ DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, - NullTreatment, ObjectName, OrderByExpr, Spanned, WindowType, + ObjectName, OrderByExpr, Spanned, WindowType, }; /// Suggest a valid function based on an invalid input function name @@ -115,7 +115,7 @@ impl FunctionArgs { order_by: vec![], over, filter, - null_treatment, + null_treatment: null_treatment.map(|v| v.into()), distinct: false, within_group, function_without_parentheses: matches!(args, FunctionArguments::None), @@ -197,7 +197,7 @@ impl FunctionArgs { order_by, over, filter, - null_treatment, + null_treatment: null_treatment.map(|v| v.into()), distinct, within_group, function_without_parentheses: false, diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml index b43c34f197601..2fc9f0df30841 100644 --- a/datafusion/wasmtest/Cargo.toml +++ b/datafusion/wasmtest/Cargo.toml @@ -46,7 +46,7 @@ chrono = { version = "0.4", features = ["wasmbind"] } # code size when deploying. console_error_panic_hook = { version = "0.1.1", optional = true } datafusion = { workspace = true, features = ["parquet"] } -datafusion-common = { workspace = true, default-features = true } +datafusion-common = { workspace = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-optimizer = { workspace = true, default-features = true } From 61654305443fbd4d942be5bba4443cd4a392f167 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 30 Apr 2025 14:53:44 -0400 Subject: [PATCH 02/18] More intermediate work, but does not yet compile --- datafusion/common/src/column.rs | 10 ++++++- datafusion/common/src/error.rs | 6 ++++ .../common/src/file_options/json_writer.rs | 2 +- datafusion/common/src/parsers.rs | 8 ++--- datafusion/common/src/spans.rs | 2 ++ datafusion/common/src/table_reference.rs | 29 ++++++++++++++++++- datafusion/common/src/utils/mod.rs | 7 +++-- datafusion/expr/src/expr.rs | 2 ++ datafusion/expr/src/lib.rs | 1 + datafusion/expr/src/planner.rs | 7 +++-- datafusion/expr/src/window_frame.rs | 23 ++++++++++----- 11 files changed, 77 insertions(+), 20 deletions(-) diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index 78b45a1306167..f64ab76d5a129 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -18,7 +18,9 @@ //! Column use crate::error::{_schema_err, add_possible_columns_to_diag}; -use crate::utils::{parse_identifiers_normalized, quote_identifier}; +#[cfg(feature = "sql")] +use crate::utils::parse_identifiers_normalized; +use crate::utils::quote_identifier; use crate::{DFSchema, Diagnostic, Result, SchemaError, Spans, TableReference}; use arrow::datatypes::{Field, FieldRef}; use std::collections::HashSet; @@ -128,6 +130,7 @@ impl Column { /// Treats the name as a SQL identifier. For example /// `foo.BAR` would be parsed to a reference to relation `foo`, column name `bar` (lower case) /// where `"foo.BAR"` would be parsed to a reference to column named `foo.BAR` + #[cfg(feature = "sql")] pub fn from_qualified_name(flat_name: impl Into) -> Self { let flat_name = flat_name.into(); Self::from_idents(parse_identifiers_normalized(&flat_name, false)).unwrap_or_else( @@ -140,6 +143,7 @@ impl Column { } /// Deserialize a fully qualified name string into a column preserving column text case + #[cfg(feature = "sql")] pub fn from_qualified_name_ignore_case(flat_name: impl Into) -> Self { let flat_name = flat_name.into(); Self::from_idents(parse_identifiers_normalized(&flat_name, true)).unwrap_or_else( @@ -322,6 +326,7 @@ impl Column { } } +#[cfg(feature = "sql")] impl From<&str> for Column { fn from(c: &str) -> Self { Self::from_qualified_name(c) @@ -329,6 +334,7 @@ impl From<&str> for Column { } /// Create a column, cloning the string +#[cfg(feature = "sql")] impl From<&String> for Column { fn from(c: &String) -> Self { Self::from_qualified_name(c) @@ -336,6 +342,7 @@ impl From<&String> for Column { } /// Create a column, reusing the existing string +#[cfg(feature = "sql")] impl From for Column { fn from(c: String) -> Self { Self::from_qualified_name(c) @@ -356,6 +363,7 @@ impl From<(Option<&TableReference>, &FieldRef)> for Column { } } +#[cfg(feature = "sql")] impl FromStr for Column { type Err = Infallible; diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index 8b64c0a6a4435..8d0f89b7bd10d 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -35,6 +35,7 @@ use apache_avro::Error as AvroError; use arrow::error::ArrowError; #[cfg(feature = "parquet")] use parquet::errors::ParquetError; +#[cfg(feature = "sql")] use sqlparser::parser::ParserError; use tokio::task::JoinError; @@ -68,6 +69,7 @@ pub enum DataFusionError { /// Error when SQL is syntactically incorrect. /// /// 2nd argument is for optional backtrace + #[cfg(feature = "sql")] SQL(Box, Option), /// Error when a feature is not yet implemented. /// @@ -329,6 +331,7 @@ impl From for DataFusionError { } } +#[cfg(feature = "sql")] impl From for DataFusionError { fn from(e: ParserError) -> Self { DataFusionError::SQL(Box::new(e), None) @@ -369,6 +372,7 @@ impl Error for DataFusionError { #[cfg(feature = "object_store")] DataFusionError::ObjectStore(e) => Some(e.as_ref()), DataFusionError::IoError(e) => Some(e), + #[cfg(feature = "sql")] DataFusionError::SQL(e, _) => Some(e.as_ref()), DataFusionError::NotImplemented(_) => None, DataFusionError::Internal(_) => None, @@ -497,6 +501,7 @@ impl DataFusionError { #[cfg(feature = "object_store")] DataFusionError::ObjectStore(_) => "Object Store error: ", DataFusionError::IoError(_) => "IO error: ", + #[cfg(feature = "sql")] DataFusionError::SQL(_, _) => "SQL error: ", DataFusionError::NotImplemented(_) => { "This feature is not implemented: " @@ -534,6 +539,7 @@ impl DataFusionError { #[cfg(feature = "avro")] DataFusionError::AvroError(ref desc) => Cow::Owned(desc.to_string()), DataFusionError::IoError(ref desc) => Cow::Owned(desc.to_string()), + #[cfg(feature = "sql")] DataFusionError::SQL(ref desc, ref backtrace) => { let backtrace: String = backtrace.clone().unwrap_or_else(|| "".to_owned()); diff --git a/datafusion/common/src/file_options/json_writer.rs b/datafusion/common/src/file_options/json_writer.rs index 750d2972329bb..889aec236af42 100644 --- a/datafusion/common/src/file_options/json_writer.rs +++ b/datafusion/common/src/file_options/json_writer.rs @@ -20,7 +20,7 @@ use crate::{ config::JsonOptions, error::{DataFusionError, Result}, - parsers::CompressionTypeVariant, + parsers::CompressionTypeVariant }; /// Options for writing JSON files diff --git a/datafusion/common/src/parsers.rs b/datafusion/common/src/parsers.rs index 41571ebb8576c..cd3d607dacd88 100644 --- a/datafusion/common/src/parsers.rs +++ b/datafusion/common/src/parsers.rs @@ -20,7 +20,7 @@ use std::fmt::Display; use std::str::FromStr; -use sqlparser::parser::ParserError; +use crate::DataFusionError; /// Readable file compression type #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -38,9 +38,9 @@ pub enum CompressionTypeVariant { } impl FromStr for CompressionTypeVariant { - type Err = ParserError; + type Err = DataFusionError; - fn from_str(s: &str) -> Result { + fn from_str(s: &str) -> Result { let s = s.to_uppercase(); match s.as_str() { "GZIP" | "GZ" => Ok(Self::GZIP), @@ -48,7 +48,7 @@ impl FromStr for CompressionTypeVariant { "XZ" => Ok(Self::XZ), "ZST" | "ZSTD" => Ok(Self::ZSTD), "" | "UNCOMPRESSED" => Ok(Self::UNCOMPRESSED), - _ => Err(ParserError::ParserError(format!( + _ => Err(DataFusionError::NotImplemented(format!( "Unsupported file compression type {s}" ))), } diff --git a/datafusion/common/src/spans.rs b/datafusion/common/src/spans.rs index 5111e264123ce..c0b52977e14a9 100644 --- a/datafusion/common/src/spans.rs +++ b/datafusion/common/src/spans.rs @@ -39,6 +39,7 @@ impl fmt::Debug for Location { } } +#[cfg(feature = "sql")] impl From for Location { fn from(value: sqlparser::tokenizer::Location) -> Self { Self { @@ -70,6 +71,7 @@ impl Span { /// Convert a [`Span`](sqlparser::tokenizer::Span) from the parser, into a /// DataFusion [`Span`]. If the input span is empty (line 0 column 0, to /// line 0 column 0), then [`None`] is returned. + #[cfg(feature = "sql")] pub fn try_from_sqlparser_span(span: sqlparser::tokenizer::Span) -> Option { if span == sqlparser::tokenizer::Span::empty() { None diff --git a/datafusion/common/src/table_reference.rs b/datafusion/common/src/table_reference.rs index 9b6f9696c00bb..91cd7a0862e7c 100644 --- a/datafusion/common/src/table_reference.rs +++ b/datafusion/common/src/table_reference.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -use crate::utils::{parse_identifiers_normalized, quote_identifier}; +#[cfg(feature = "sql")] +use crate::utils::parse_identifiers_normalized; +use crate::utils::quote_identifier; use std::sync::Arc; /// A fully resolved path to a table of the form "catalog.schema.table" @@ -269,6 +271,7 @@ impl TableReference { /// Forms a [`TableReference`] by parsing `s` as a multipart SQL /// identifier. See docs on [`TableReference`] for more details. + #[cfg(feature = "sql")] pub fn parse_str(s: &str) -> Self { let mut parts = parse_identifiers_normalized(s, false); @@ -312,24 +315,48 @@ impl TableReference { /// Parse a string into a TableReference, normalizing where appropriate /// /// See full details on [`TableReference::parse_str`] +#[cfg(feature = "sql")] impl<'a> From<&'a str> for TableReference { fn from(s: &'a str) -> Self { Self::parse_str(s) } } +#[cfg(feature = "sql")] impl<'a> From<&'a String> for TableReference { fn from(s: &'a String) -> Self { Self::parse_str(s) } } +#[cfg(feature = "sql")] impl From for TableReference { fn from(s: String) -> Self { Self::parse_str(&s) } } +#[cfg(not(feature = "sql"))] +impl<'a> From<&'a str> for TableReference { + fn from(s: &'a str) -> Self { + Self::Bare { table: s.into() } + } +} + +#[cfg(not(feature = "sql"))] +impl<'a> From<&'a String> for TableReference { + fn from(s: &'a String) -> Self { + Self::Bare { table: s.as_str().into() } + } +} + +#[cfg(not(feature = "sql"))] +impl From for TableReference { + fn from(s: String) -> Self { + Self::Bare { table: s.into() } + } +} + impl From for TableReference { fn from(resolved: ResolvedTableReference) -> Self { Self::Full { diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 3f776a44bc59f..9a733bbb1e2b6 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -31,9 +31,8 @@ use arrow::array::{ use arrow::buffer::OffsetBuffer; use arrow::compute::{partition, SortColumn, SortOptions}; use arrow::datatypes::{DataType, Field, SchemaRef}; -use sqlparser::ast::Ident; -use sqlparser::dialect::GenericDialect; -use sqlparser::parser::Parser; +#[cfg(feature = "sql")] +use sqlparser::{ast::Ident, dialect::GenericDialect, parser::Parser}; use std::borrow::{Borrow, Cow}; use std::cmp::{min, Ordering}; use std::collections::HashSet; @@ -282,6 +281,7 @@ fn needs_quotes(s: &str) -> bool { !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') } +#[cfg(feature = "sql")] pub(crate) fn parse_identifiers(s: &str) -> Result> { let dialect = GenericDialect; let mut parser = Parser::new(&dialect).try_with_sql(s)?; @@ -289,6 +289,7 @@ pub(crate) fn parse_identifiers(s: &str) -> Result> { Ok(idents) } +#[cfg(feature = "sql")] pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec { parse_identifiers(s) .unwrap_or_default() diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 4d88b37217045..a0e9e7d330303 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -39,6 +39,7 @@ use datafusion_common::{ Column, DFSchema, HashMap, Result, ScalarValue, Spans, TableReference, }; use datafusion_functions_window_common::field::WindowUDFFieldArgs; +#[cfg(feature = "sql")] use sqlparser::ast::{ display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, RenameSelectItem, ReplaceSelectElement, @@ -61,6 +62,7 @@ impl Display for NullTreatment { } } +#[cfg(feature = "sql")] impl From for NullTreatment { fn from(value: sqlparser::ast::NullTreatment) -> Self { match value { diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index c35654345e9fa..f5c0501911df8 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -104,6 +104,7 @@ pub use literal::{ }; pub use logical_plan::*; pub use partition_evaluator::PartitionEvaluator; +#[cfg(feature = "sql")] pub use sqlparser; pub use table_source::{TableProviderFilterPushDown, TableSource, TableType}; pub use udaf::{ diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index d2aa7ab43d8c0..c8fe6fd9eacea 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -25,8 +25,7 @@ use datafusion_common::{ config::ConfigOptions, file_options::file_type::FileType, not_impl_err, DFSchema, Result, TableReference, }; -use sqlparser::ast; - +use datafusion_expr_common::operator::Operator; use crate::{ AggregateUDF, Expr, GetFieldAccess, ScalarUDF, SortExpr, TableSource, WindowFrame, WindowFunctionDefinition, WindowUDF, @@ -86,6 +85,7 @@ pub trait ContextProvider { } /// Return [`TypePlanner`] extensions for planning data types + #[cfg(feature = "sql")] fn get_type_planner(&self) -> Option> { None } @@ -262,7 +262,7 @@ pub trait ExprPlanner: Debug + Send + Sync { /// custom expressions. #[derive(Debug, Clone)] pub struct RawBinaryExpr { - pub op: ast::BinaryOperator, + pub op: Operator, pub left: Expr, pub right: Expr, } @@ -323,6 +323,7 @@ pub enum PlannerResult { } /// Customize planning SQL types to DataFusion (Arrow) types. +#[cfg(feature = "sql")] pub trait TypePlanner: Debug + Send + Sync { /// Plan SQL [`ast::DataType`] to DataFusion [`DataType`] /// diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs index b91bbddd8bac3..44d0a1b04323e 100644 --- a/datafusion/expr/src/window_frame.rs +++ b/datafusion/expr/src/window_frame.rs @@ -28,9 +28,14 @@ use arrow::datatypes::DataType; use std::fmt::{self, Formatter}; use std::hash::Hash; -use datafusion_common::{plan_err, sql_err, DataFusionError, Result, ScalarValue}; -use sqlparser::ast::{self, ValueWithSpan}; -use sqlparser::parser::ParserError::ParserError; +use datafusion_common::{ + exec_err, plan_err, DataFusionError, Result, ScalarValue, +}; +#[cfg(feature = "sql")] +use sqlparser::{ + ast::{self, ValueWithSpan}, + parser::ParserError::ParserError, +}; /// The frame specification determines which output rows are read by an aggregate /// window function. The ending frame boundary can be omitted if the `BETWEEN` @@ -115,6 +120,7 @@ impl fmt::Debug for WindowFrame { } } +#[cfg(feature = "sql")] impl TryFrom for WindowFrame { type Error = DataFusionError; @@ -343,6 +349,7 @@ impl WindowFrameBound { } impl WindowFrameBound { + #[cfg(feature = "sql")] fn try_parse( value: ast::WindowFrameBound, units: &ast::WindowFrameUnits, @@ -365,6 +372,7 @@ impl WindowFrameBound { } } +#[cfg(feature = "sql")] fn convert_frame_bound_to_scalar_value( v: ast::Expr, units: &ast::WindowFrameUnits, @@ -385,9 +393,9 @@ fn convert_frame_bound_to_scalar_value( let value = match *value { ast::Expr::Value(ValueWithSpan{value: ast::Value::SingleQuotedString(item), span: _}) => item, e => { - return sql_err!(ParserError(format!( + return exec_err!( "INTERVAL expression cannot be {e:?}" - ))); + ); } }; Ok(ScalarValue::try_from_string(value, &DataType::UInt64)?) @@ -408,9 +416,9 @@ fn convert_frame_bound_to_scalar_value( let result = match *value { ast::Expr::Value(ValueWithSpan{value: ast::Value::SingleQuotedString(item), span: _}) => item, e => { - return sql_err!(ParserError(format!( + return exec_err!( "INTERVAL expression cannot be {e:?}" - ))); + ); } }; if let Some(leading_field) = leading_field { @@ -477,6 +485,7 @@ impl fmt::Display for WindowFrameUnits { } } +#[cfg(feature = "sql")] impl From for WindowFrameUnits { fn from(value: ast::WindowFrameUnits) -> Self { match value { From 7c466de3fc861b1732be1aae2cac81d8adb53e91 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Thu, 1 May 2025 14:59:06 -0400 Subject: [PATCH 03/18] Working through more points of friction to remove sql as feature --- datafusion/common/src/column.rs | 17 ++- .../core/src/execution/session_state.rs | 37 +++-- datafusion/expr/Cargo.toml | 1 + datafusion/expr/src/expr.rs | 129 +++++++++++++++++- datafusion/expr/src/logical_plan/ddl.rs | 3 + datafusion/expr/src/planner.rs | 5 +- datafusion/expr/src/utils.rs | 3 + datafusion/proto/Cargo.toml | 2 +- datafusion/sql/Cargo.toml | 4 +- 9 files changed, 182 insertions(+), 19 deletions(-) diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index f64ab76d5a129..4d4fbef24832d 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -141,7 +141,14 @@ impl Column { }, ) } - + #[cfg(not(feature = "sql"))] + pub fn from_qualified_name(flat_name: impl Into) -> Self { + Self { + relation: None, + name: flat_name.into(), + spans: Spans::new(), + } + } /// Deserialize a fully qualified name string into a column preserving column text case #[cfg(feature = "sql")] pub fn from_qualified_name_ignore_case(flat_name: impl Into) -> Self { @@ -154,7 +161,10 @@ impl Column { }, ) } - + #[cfg(not(feature = "sql"))] + pub fn from_qualified_name_ignore_case(flat_name: impl Into) -> Self { + Self::from_qualified_name(flat_name) + } /// return the column's name. /// /// Note: This ignores the relation and returns the column name only. @@ -326,7 +336,6 @@ impl Column { } } -#[cfg(feature = "sql")] impl From<&str> for Column { fn from(c: &str) -> Self { Self::from_qualified_name(c) @@ -334,7 +343,6 @@ impl From<&str> for Column { } /// Create a column, cloning the string -#[cfg(feature = "sql")] impl From<&String> for Column { fn from(c: &String) -> Self { Self::from_qualified_name(c) @@ -342,7 +350,6 @@ impl From<&String> for Column { } /// Create a column, reusing the existing string -#[cfg(feature = "sql")] impl From for Column { fn from(c: String) -> Self { Self::from_qualified_name(c) diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index cde92fe526a91..5a27c769f1eb1 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -51,7 +51,9 @@ use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_execution::TaskContext; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::expr_rewriter::FunctionRewrite; -use datafusion_expr::planner::{ExprPlanner, TypePlanner}; +use datafusion_expr::planner::ExprPlanner; +#[cfg(feature = "sql")] +use datafusion_expr::planner::TypePlanner; use datafusion_expr::registry::{FunctionRegistry, SerializerRegistry}; use datafusion_expr::simplify::SimplifyInfo; use datafusion_expr::var_provider::{is_system_variables, VarType}; @@ -138,6 +140,7 @@ pub struct SessionState { /// Provides support for customizing the SQL planner, e.g. to add support for custom operators like `->>` or `?` expr_planners: Vec>, /// Provides support for customizing the SQL type planning + #[cfg(feature = "sql")] type_planner: Option>, /// Responsible for optimizing a logical plan optimizer: Optimizer, @@ -191,7 +194,8 @@ impl Debug for SessionState { /// Prefer having short fields at the top and long vector fields near the end /// Group fields by fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("SessionState") + let mut debug_struct = f.debug_struct("SessionState"); + let ret = debug_struct .field("session_id", &self.session_id) .field("config", &self.config) .field("runtime_env", &self.runtime_env) @@ -202,9 +206,12 @@ impl Debug for SessionState { .field("table_options", &self.table_options) .field("table_factories", &self.table_factories) .field("function_factory", &self.function_factory) - .field("expr_planners", &self.expr_planners) - .field("type_planner", &self.type_planner) - .field("query_planners", &self.query_planner) + .field("expr_planners", &self.expr_planners); + + #[cfg(feature = "sql")] + let ret = ret.field("type_planner", &self.type_planner); + + ret.field("query_planners", &self.query_planner) .field("analyzer", &self.analyzer) .field("optimizer", &self.optimizer) .field("physical_optimizers", &self.physical_optimizers) @@ -906,6 +913,7 @@ pub struct SessionStateBuilder { session_id: Option, analyzer: Option, expr_planners: Option>>, + #[cfg(feature = "sql")] type_planner: Option>, optimizer: Option, physical_optimizers: Option, @@ -942,6 +950,7 @@ impl SessionStateBuilder { session_id: None, analyzer: None, expr_planners: None, + #[cfg(feature = "sql")] type_planner: None, optimizer: None, physical_optimizers: None, @@ -991,6 +1000,7 @@ impl SessionStateBuilder { session_id: None, analyzer: Some(existing.analyzer), expr_planners: Some(existing.expr_planners), + #[cfg(feature = "sql")] type_planner: existing.type_planner, optimizer: Some(existing.optimizer), physical_optimizers: Some(existing.physical_optimizers), @@ -1132,6 +1142,7 @@ impl SessionStateBuilder { } /// Set the [`TypePlanner`] used to customize the behavior of the SQL planner. + #[cfg(feature = "sql")] pub fn with_type_planner(mut self, type_planner: Arc) -> Self { self.type_planner = Some(type_planner); self @@ -1343,6 +1354,7 @@ impl SessionStateBuilder { session_id, analyzer, expr_planners, + #[cfg(feature = "sql")] type_planner, optimizer, physical_optimizers, @@ -1372,6 +1384,7 @@ impl SessionStateBuilder { session_id: session_id.unwrap_or_else(|| Uuid::new_v4().to_string()), analyzer: analyzer.unwrap_or_default(), expr_planners: expr_planners.unwrap_or_default(), + #[cfg(feature = "sql")] type_planner, optimizer: optimizer.unwrap_or_default(), physical_optimizers: physical_optimizers.unwrap_or_default(), @@ -1490,6 +1503,7 @@ impl SessionStateBuilder { } /// Returns the current type_planner value + #[cfg(feature = "sql")] pub fn type_planner(&mut self) -> &mut Option> { &mut self.type_planner } @@ -1604,7 +1618,9 @@ impl Debug for SessionStateBuilder { /// Prefer having short fields at the top and long vector fields near the end /// Group fields by fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("SessionStateBuilder") + let mut debug_struct = f + .debug_struct("SessionStateBuilder"); + let ret = debug_struct .field("session_id", &self.session_id) .field("config", &self.config) .field("runtime_env", &self.runtime_env) @@ -1615,9 +1631,10 @@ impl Debug for SessionStateBuilder { .field("table_options", &self.table_options) .field("table_factories", &self.table_factories) .field("function_factory", &self.function_factory) - .field("expr_planners", &self.expr_planners) - .field("type_planner", &self.type_planner) - .field("query_planners", &self.query_planner) + .field("expr_planners", &self.expr_planners); + #[cfg(feature = "sql")] + let ret = ret.field("type_planner", &self.type_planner); + ret.field("query_planners", &self.query_planner) .field("analyzer_rules", &self.analyzer_rules) .field("analyzer", &self.analyzer) .field("optimizer_rules", &self.optimizer_rules) @@ -2011,8 +2028,10 @@ mod tests { use std::sync::Arc; #[test] + #[cfg(feature = "sql")] fn test_session_state_with_default_features() { // test array planners with and without builtin planners + #[cfg(feature = "sql")] fn sql_to_expr(state: &SessionState) -> Result { let provider = SessionContextProvider { state, diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 108e1ed30fa86..b80cf23e104fe 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -57,6 +57,7 @@ paste = "^1.0" recursive = { workspace = true, optional = true } serde_json = { workspace = true } sqlparser = { workspace = true, optional = true } +itertools = "0.14.0" [dev-dependencies] ctor = { workspace = true } diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index a0e9e7d330303..df05bee5d3665 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -41,7 +41,7 @@ use datafusion_common::{ use datafusion_functions_window_common::field::WindowUDFFieldArgs; #[cfg(feature = "sql")] use sqlparser::ast::{ - display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, + display_comma_separated, ExceptSelectItem, ExcludeSelectItem, Ident, IdentWithAlias, IlikeSelectItem, RenameSelectItem, ReplaceSelectElement, }; @@ -1410,6 +1410,133 @@ impl GroupingSet { } } +#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +#[cfg(not(feature = "sql"))] +pub struct IlikeSelectItem{ + pub pattern: String, +} +#[cfg(not(feature = "sql"))] +impl Display for IlikeSelectItem { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "ILIKE '{}'", + &self.pattern + )?; + Ok(()) + } +} +#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +#[cfg(not(feature = "sql"))] +pub enum ExcludeSelectItem { + Single(Ident), + Multiple(Vec), +} +#[cfg(not(feature = "sql"))] +impl Display for ExcludeSelectItem { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "EXCLUDE")?; + match self { + Self::Single(column) => { + write!(f, " {column}")?; + } + Self::Multiple(columns) => { + write!(f, " ({})", display_comma_separated(columns))?; + } + } + Ok(()) + } +} +#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +#[cfg(not(feature = "sql"))] +pub struct ExceptSelectItem { + pub first_element: Ident, + pub additional_elements: Vec, +} +#[cfg(not(feature = "sql"))] +impl Display for ExceptSelectItem { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "EXCEPT ")?; + if self.additional_elements.is_empty() { + write!(f, "({})", self.first_element)?; + } else { + write!( + f, + "({}, {})", + self.first_element, + display_comma_separated(&self.additional_elements) + )?; + } + Ok(()) + } +} + +#[cfg(not(feature = "sql"))] +pub fn display_comma_separated(slice: &[T]) -> String +where + T: Display { + use itertools::Itertools; + slice.iter().map(|v| format!("{v}")).join(", ") +} + +#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +#[cfg(not(feature = "sql"))] +pub enum RenameSelectItem { + Single(String), + Multiple(Vec), +} +#[cfg(not(feature = "sql"))] +impl Display for RenameSelectItem { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "RENAME")?; + match self { + Self::Single(column) => { + write!(f, " {column}")?; + } + Self::Multiple(columns) => { + write!(f, " ({})", display_comma_separated(columns))?; + } + } + Ok(()) + } +} + +#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +#[cfg(not(feature = "sql"))] +pub struct Ident { + /// The value of the identifier without quotes. + pub value: String, + /// The starting quote if any. Valid quote characters are the single quote, + /// double quote, backtick, and opening square bracket. + pub quote_style: Option, + /// The span of the identifier in the original SQL string. + pub span: String, +} +#[cfg(not(feature = "sql"))] +impl Display for Ident { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "[{}]", self.value) + } +} + +#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +#[cfg(not(feature = "sql"))] +pub struct ReplaceSelectElement { + pub expr: String, + pub column_name: Ident, + pub as_keyword: bool, +} +#[cfg(not(feature = "sql"))] +impl Display for ReplaceSelectElement { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + if self.as_keyword { + write!(f, "{} AS {}", self.expr, self.column_name) + } else { + write!(f, "{} {}", self.expr, self.column_name) + } + } +} + /// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`. #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)] pub struct WildcardOptions { diff --git a/datafusion/expr/src/logical_plan/ddl.rs b/datafusion/expr/src/logical_plan/ddl.rs index 827e2812ecae1..0c2c7f1dc2794 100644 --- a/datafusion/expr/src/logical_plan/ddl.rs +++ b/datafusion/expr/src/logical_plan/ddl.rs @@ -30,7 +30,10 @@ use datafusion_common::tree_node::{Transformed, TreeNodeContainer, TreeNodeRecur use datafusion_common::{ Constraints, DFSchemaRef, Result, SchemaReference, TableReference, }; +#[cfg(feature = "sql")] use sqlparser::ast::Ident; +#[cfg(not(feature = "sql"))] +use crate::expr::Ident; /// Various types of DDL (CREATE / DROP) catalog manipulation #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index c8fe6fd9eacea..d69bbbdddf2ea 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -262,7 +262,10 @@ pub trait ExprPlanner: Debug + Send + Sync { /// custom expressions. #[derive(Debug, Clone)] pub struct RawBinaryExpr { + #[cfg(not(feature = "sql"))] pub op: Operator, + #[cfg(feature = "sql")] + pub op: sqlparser::ast::BinaryOperator, pub left: Expr, pub right: Expr, } @@ -328,7 +331,7 @@ pub trait TypePlanner: Debug + Send + Sync { /// Plan SQL [`ast::DataType`] to DataFusion [`DataType`] /// /// Returns None if not possible - fn plan_type(&self, _sql_type: &ast::DataType) -> Result> { + fn plan_type(&self, _sql_type: &sqlparser::ast::DataType) -> Result> { Ok(None) } } diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 2e364d0d2b80f..7361c77cb5b59 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -39,7 +39,10 @@ use datafusion_common::{ }; use indexmap::IndexSet; +#[cfg(feature = "sql")] use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem}; +#[cfg(not(feature = "sql"))] +use crate::expr::{ExceptSelectItem, ExcludeSelectItem}; pub use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity; diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index b64c175891088..956887e1bb170 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -46,7 +46,7 @@ avro = ["datafusion/avro", "datafusion-common/avro"] [dependencies] arrow = { workspace = true } chrono = { workspace = true } -datafusion = { workspace = true, default-features = true } +datafusion = { workspace = true, default-features = false } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } datafusion-proto-common = { workspace = true } diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 78603a1b2fd13..7bd775fb5393c 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -50,8 +50,8 @@ recursive_protection = ["dep:recursive"] [dependencies] arrow = { workspace = true } bigdecimal = { workspace = true } -datafusion-common = { workspace = true } -datafusion-expr = { workspace = true } +datafusion-common = { workspace = true, features = ["sql"] } +datafusion-expr = { workspace = true, features = ["sql"] } indexmap = { workspace = true } log = { workspace = true } recursive = { workspace = true, optional = true } From 4a6ca20bed5cd13ef5b0ced70655aebf236baf53 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Thu, 1 May 2025 19:52:40 -0400 Subject: [PATCH 04/18] Switch optimizer to compare full logical plan instead of hash of the plan in an effort to remove the hash functions from the final wasm blob. Probably need to revert or put under a feature flag. --- datafusion/optimizer/src/optimizer.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index 49806d6db344a..6a697289bd907 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -325,8 +325,8 @@ impl Optimizer { let options = config.options(); let mut new_plan = plan; - let mut previous_plans = HashSet::with_capacity(16); - previous_plans.insert(LogicalPlanSignature::new(&new_plan)); + let mut previous_plans = Vec::with_capacity(16); + previous_plans.push(new_plan.clone()); let starting_schema = Arc::clone(new_plan.schema()); @@ -412,10 +412,10 @@ impl Optimizer { } log_plan(&format!("Optimized plan (pass {i})"), &new_plan); - // HashSet::insert returns, whether the value was newly inserted. - let plan_is_fresh = - previous_plans.insert(LogicalPlanSignature::new(&new_plan)); - if !plan_is_fresh { + let plan_is_fresh = previous_plans.contains(&new_plan); + if plan_is_fresh { + previous_plans.push(new_plan.clone()); + } else { // plan did not change, so no need to continue trying to optimize debug!("optimizer pass {i} did not make changes"); break; From f2a988939e2acea3ae667b4454e13cdf5f90f604 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 26 Aug 2025 08:17:56 -0400 Subject: [PATCH 05/18] Corrections after rebase --- Cargo.lock | 1 + datafusion/common/Cargo.toml | 2 +- datafusion/core/Cargo.toml | 8 +++++++- datafusion/core/src/physical_planner.rs | 1 - 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 10310734029ed..c6d594ca9087d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2271,6 +2271,7 @@ dependencies = [ "env_logger", "indexmap 2.11.0", "insta", + "itertools 0.14.0", "paste", "recursive", "serde_json", diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 052c858b4a8bc..22916925c116c 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -74,7 +74,7 @@ parquet = { workspace = true, optional = true, default-features = true } paste = "1.0.15" pyo3 = { version = "0.25", optional = true } recursive = { workspace = true, optional = true } -sqlparser = { workspace = true } +sqlparser = { workspace = true, optional = true } tokio = { workspace = true } [target.'cfg(target_family = "wasm")'.dependencies] diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 2083428139d95..80542efb3d4b9 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -63,6 +63,7 @@ default = [ "compression", "parquet", "recursive_protection", + "sql", ] encoding_expressions = ["datafusion-functions/encoding_expressions"] # Used for testing ONLY: causes all values to hash to the same value (test for collisions) @@ -94,6 +95,11 @@ serde = [ # statements in `arrow-schema` crate "arrow-schema/serde", ] +sql = [ + "datafusion-common/sql", + "datafusion-sql", + "sqlparser", +] string_expressions = ["datafusion-functions/string_expressions"] unicode_expressions = [ "datafusion-sql/unicode_expressions", @@ -145,7 +151,7 @@ parquet = { workspace = true, optional = true, default-features = true } rand = { workspace = true } regex = { workspace = true } serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } -sqlparser = { workspace = true } +sqlparser = { workspace = true, optional = true } tempfile = { workspace = true } tokio = { workspace = true } url = { workspace = true } diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 0fae3aacd5b10..1570d959efaed 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -91,7 +91,6 @@ use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; use datafusion_physical_plan::recursive_query::RecursiveQueryExec; use datafusion_physical_plan::unnest::ListUnnest; use datafusion_sql::TableReference; -use sqlparser::ast::NullTreatment; use async_trait::async_trait; use datafusion_physical_plan::async_func::{AsyncFuncExec, AsyncMapper}; From e3ad2f11510abeb9182bb82f5ec4b812f8f2ac70 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 26 Aug 2025 13:10:30 -0400 Subject: [PATCH 06/18] Resolve errors and clippy warnings after rebase --- benchmarks/Cargo.toml | 2 +- datafusion/common/src/column.rs | 7 +++---- .../common/src/file_options/json_writer.rs | 2 +- datafusion/common/src/table_reference.rs | 4 +++- datafusion/core/src/dataframe/mod.rs | 6 +++++- datafusion/core/src/execution/session_state.rs | 3 +-- datafusion/core/src/physical_planner.rs | 5 ++++- datafusion/core/tests/dataframe/mod.rs | 5 +++-- datafusion/core/tests/expr_api/mod.rs | 2 +- datafusion/expr/src/expr.rs | 14 +++++--------- datafusion/expr/src/logical_plan/ddl.rs | 4 ++-- datafusion/expr/src/planner.rs | 18 ++++++++++-------- datafusion/expr/src/utils.rs | 4 ++-- datafusion/expr/src/window_frame.rs | 14 +++++--------- datafusion/optimizer/Cargo.toml | 2 +- datafusion/optimizer/src/optimizer.rs | 12 ++++++------ datafusion/sql/src/expr/function.rs | 4 +++- 17 files changed, 56 insertions(+), 52 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 01c657c1f71b9..5a846cb49e0c7 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -38,7 +38,7 @@ mimalloc_extended = ["libmimalloc-sys/extended"] [dependencies] arrow = { workspace = true } datafusion = { workspace = true, default-features = true } -datafusion-common = { workspace = true } +datafusion-common = { workspace = true, default-features = true } env_logger = { workspace = true } futures = { workspace = true } libmimalloc-sys = { version = "0.1", optional = true } diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index 4d4fbef24832d..83b68865d2838 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -24,9 +24,7 @@ use crate::utils::quote_identifier; use crate::{DFSchema, Diagnostic, Result, SchemaError, Spans, TableReference}; use arrow::datatypes::{Field, FieldRef}; use std::collections::HashSet; -use std::convert::Infallible; use std::fmt; -use std::str::FromStr; /// A named reference to a qualified field in a schema. #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -90,6 +88,7 @@ impl Column { /// /// For example, `foo.bar` would be represented as a two element vector /// `["foo", "bar"]` + #[cfg(feature = "sql")] fn from_idents(mut idents: Vec) -> Option { let (relation, name) = match idents.len() { 1 => (None, idents.remove(0)), @@ -371,8 +370,8 @@ impl From<(Option<&TableReference>, &FieldRef)> for Column { } #[cfg(feature = "sql")] -impl FromStr for Column { - type Err = Infallible; +impl std::str::FromStr for Column { + type Err = std::convert::Infallible; fn from_str(s: &str) -> Result { Ok(s.into()) diff --git a/datafusion/common/src/file_options/json_writer.rs b/datafusion/common/src/file_options/json_writer.rs index 889aec236af42..750d2972329bb 100644 --- a/datafusion/common/src/file_options/json_writer.rs +++ b/datafusion/common/src/file_options/json_writer.rs @@ -20,7 +20,7 @@ use crate::{ config::JsonOptions, error::{DataFusionError, Result}, - parsers::CompressionTypeVariant + parsers::CompressionTypeVariant, }; /// Options for writing JSON files diff --git a/datafusion/common/src/table_reference.rs b/datafusion/common/src/table_reference.rs index 91cd7a0862e7c..b05e6e0967a22 100644 --- a/datafusion/common/src/table_reference.rs +++ b/datafusion/common/src/table_reference.rs @@ -346,7 +346,9 @@ impl<'a> From<&'a str> for TableReference { #[cfg(not(feature = "sql"))] impl<'a> From<&'a String> for TableReference { fn from(s: &'a String) -> Self { - Self::Bare { table: s.as_str().into() } + Self::Bare { + table: s.as_str().into(), + } } } diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index b0bdc5b7bed16..9832c0e9db1e5 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -50,7 +50,11 @@ use arrow::array::{Array, ArrayRef, Int64Array, StringArray}; use arrow::compute::{cast, concat}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_common::config::{CsvOptions, JsonOptions}; -use datafusion_common::{exec_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaError, TableReference, UnnestOptions}; +use datafusion_common::{ + exec_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, + DataFusionError, ParamValues, ScalarValue, SchemaError, TableReference, + UnnestOptions, +}; use datafusion_expr::select_expr::SelectExpr; use datafusion_expr::{ case, diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 5a27c769f1eb1..5f2e8f391e634 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -1618,8 +1618,7 @@ impl Debug for SessionStateBuilder { /// Prefer having short fields at the top and long vector fields near the end /// Group fields by fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut debug_struct = f - .debug_struct("SessionStateBuilder"); + let mut debug_struct = f.debug_struct("SessionStateBuilder"); let ret = debug_struct .field("session_id", &self.session_id) .field("config", &self.config) diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 1570d959efaed..32df06b9195c1 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -71,7 +71,10 @@ use datafusion_common::{ use datafusion_datasource::file_groups::FileGroup; use datafusion_datasource::memory::MemorySourceConfig; use datafusion_expr::dml::{CopyTo, InsertOp}; -use datafusion_expr::expr::{physical_name, AggregateFunction, AggregateFunctionParams, Alias, GroupingSet, NullTreatment, WindowFunction, WindowFunctionParams}; +use datafusion_expr::expr::{ + physical_name, AggregateFunction, AggregateFunctionParams, Alias, GroupingSet, + NullTreatment, WindowFunction, WindowFunctionParams, +}; use datafusion_expr::expr_rewriter::unnormalize_cols; use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary; use datafusion_expr::{ diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index a563459f42a11..aa984775e4574 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -41,7 +41,6 @@ use datafusion_functions_nested::make_array::make_array_udf; use datafusion_functions_window::expr_fn::{first_value, row_number}; use insta::assert_snapshot; use object_store::local::LocalFileSystem; -use sqlparser::ast::NullTreatment; use std::collections::HashMap; use std::fs; use std::sync::Arc; @@ -71,7 +70,9 @@ use datafusion_common_runtime::SpawnedTask; use datafusion_datasource::file_format::format_as_file_type; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnv; -use datafusion_expr::expr::{FieldMetadata, GroupingSet, Sort, WindowFunction}; +use datafusion_expr::expr::{ + FieldMetadata, GroupingSet, NullTreatment, Sort, WindowFunction, +}; use datafusion_expr::var_provider::{VarProvider, VarType}; use datafusion_expr::{ cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder, diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index cc48c4312e599..4aee274de9083 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -24,6 +24,7 @@ use arrow::util::pretty::{pretty_format_batches, pretty_format_columns}; use datafusion::prelude::*; use datafusion_common::{DFSchema, ScalarValue}; use datafusion_expr::execution_props::ExecutionProps; +use datafusion_expr::expr::NullTreatment; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::ExprFunctionExt; use datafusion_functions::core::expr_ext::FieldAccessor; @@ -33,7 +34,6 @@ use datafusion_functions_nested::expr_ext::{IndexAccessor, SliceAccessor}; use datafusion_optimizer::simplify_expressions::ExprSimplifier; /// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan use std::sync::{Arc, LazyLock}; -use datafusion_expr::expr::NullTreatment; mod parse_sql_expr; mod simplification; diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index df05bee5d3665..7a8ddba7be6df 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -41,7 +41,7 @@ use datafusion_common::{ use datafusion_functions_window_common::field::WindowUDFFieldArgs; #[cfg(feature = "sql")] use sqlparser::ast::{ - display_comma_separated, ExceptSelectItem, ExcludeSelectItem, Ident, IdentWithAlias, IlikeSelectItem, + display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, RenameSelectItem, ReplaceSelectElement, }; @@ -72,7 +72,6 @@ impl From for NullTreatment { } } - /// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`. /// /// For example the expression `A + 1` will be represented as @@ -1412,17 +1411,13 @@ impl GroupingSet { #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] #[cfg(not(feature = "sql"))] -pub struct IlikeSelectItem{ +pub struct IlikeSelectItem { pub pattern: String, } #[cfg(not(feature = "sql"))] impl Display for IlikeSelectItem { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "ILIKE '{}'", - &self.pattern - )?; + write!(f, "ILIKE '{}'", &self.pattern)?; Ok(()) } } @@ -1474,7 +1469,8 @@ impl Display for ExceptSelectItem { #[cfg(not(feature = "sql"))] pub fn display_comma_separated(slice: &[T]) -> String where - T: Display { + T: Display, +{ use itertools::Itertools; slice.iter().map(|v| format!("{v}")).join(", ") } diff --git a/datafusion/expr/src/logical_plan/ddl.rs b/datafusion/expr/src/logical_plan/ddl.rs index 0c2c7f1dc2794..4ee394731e5b6 100644 --- a/datafusion/expr/src/logical_plan/ddl.rs +++ b/datafusion/expr/src/logical_plan/ddl.rs @@ -24,6 +24,8 @@ use std::{ hash::{Hash, Hasher}, }; +#[cfg(not(feature = "sql"))] +use crate::expr::Ident; use crate::expr::Sort; use arrow::datatypes::DataType; use datafusion_common::tree_node::{Transformed, TreeNodeContainer, TreeNodeRecursion}; @@ -32,8 +34,6 @@ use datafusion_common::{ }; #[cfg(feature = "sql")] use sqlparser::ast::Ident; -#[cfg(not(feature = "sql"))] -use crate::expr::Ident; /// Various types of DDL (CREATE / DROP) catalog manipulation #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index d69bbbdddf2ea..955414df0d7e8 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -20,17 +20,16 @@ use std::fmt::Debug; use std::sync::Arc; +use crate::expr::NullTreatment; +use crate::{ + AggregateUDF, Expr, GetFieldAccess, ScalarUDF, SortExpr, TableSource, WindowFrame, + WindowFunctionDefinition, WindowUDF, +}; use arrow::datatypes::{DataType, Field, SchemaRef}; use datafusion_common::{ config::ConfigOptions, file_options::file_type::FileType, not_impl_err, DFSchema, Result, TableReference, }; -use datafusion_expr_common::operator::Operator; -use crate::{ - AggregateUDF, Expr, GetFieldAccess, ScalarUDF, SortExpr, TableSource, WindowFrame, - WindowFunctionDefinition, WindowUDF, -}; -use crate::expr::NullTreatment; /// Provides the `SQL` query planner meta-data about tables and /// functions referenced in SQL statements, without a direct dependency on the @@ -263,7 +262,7 @@ pub trait ExprPlanner: Debug + Send + Sync { #[derive(Debug, Clone)] pub struct RawBinaryExpr { #[cfg(not(feature = "sql"))] - pub op: Operator, + pub op: datafusion_expr_common::operator::Operator, #[cfg(feature = "sql")] pub op: sqlparser::ast::BinaryOperator, pub left: Expr, @@ -331,7 +330,10 @@ pub trait TypePlanner: Debug + Send + Sync { /// Plan SQL [`ast::DataType`] to DataFusion [`DataType`] /// /// Returns None if not possible - fn plan_type(&self, _sql_type: &sqlparser::ast::DataType) -> Result> { + fn plan_type( + &self, + _sql_type: &sqlparser::ast::DataType, + ) -> Result> { Ok(None) } } diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 7361c77cb5b59..48d07c643f2ca 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -38,11 +38,11 @@ use datafusion_common::{ Result, TableReference, }; +#[cfg(not(feature = "sql"))] +use crate::expr::{ExceptSelectItem, ExcludeSelectItem}; use indexmap::IndexSet; #[cfg(feature = "sql")] use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem}; -#[cfg(not(feature = "sql"))] -use crate::expr::{ExceptSelectItem, ExcludeSelectItem}; pub use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity; diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs index 44d0a1b04323e..f72dc10a6950f 100644 --- a/datafusion/expr/src/window_frame.rs +++ b/datafusion/expr/src/window_frame.rs @@ -24,18 +24,12 @@ //! - An EXCLUDE clause. use crate::{expr::Sort, lit}; -use arrow::datatypes::DataType; use std::fmt::{self, Formatter}; use std::hash::Hash; -use datafusion_common::{ - exec_err, plan_err, DataFusionError, Result, ScalarValue, -}; +use datafusion_common::{plan_err, Result, ScalarValue}; #[cfg(feature = "sql")] -use sqlparser::{ - ast::{self, ValueWithSpan}, - parser::ParserError::ParserError, -}; +use sqlparser::ast::{self, ValueWithSpan}; /// The frame specification determines which output rows are read by an aggregate /// window function. The ending frame boundary can be omitted if the `BETWEEN` @@ -122,7 +116,7 @@ impl fmt::Debug for WindowFrame { #[cfg(feature = "sql")] impl TryFrom for WindowFrame { - type Error = DataFusionError; + type Error = datafusion_common::error::DataFusionError; fn try_from(value: ast::WindowFrame) -> Result { let start_bound = WindowFrameBound::try_parse(value.start_bound, &value.units)?; @@ -377,6 +371,8 @@ fn convert_frame_bound_to_scalar_value( v: ast::Expr, units: &ast::WindowFrameUnits, ) -> Result { + use arrow::datatypes::DataType; + use datafusion_common::exec_err; match units { // For ROWS and GROUPS we are sure that the ScalarValue must be a non-negative integer ... ast::WindowFrameUnits::Rows | ast::WindowFrameUnits::Groups => match v { diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index 2b799f437bdad..f10510e0973c3 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -43,7 +43,7 @@ recursive_protection = ["dep:recursive"] [dependencies] arrow = { workspace = true } chrono = { workspace = true } -datafusion-common = { workspace = true } +datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } datafusion-expr-common = { workspace = true } datafusion-physical-expr = { workspace = true } diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index 6a697289bd907..49806d6db344a 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -325,8 +325,8 @@ impl Optimizer { let options = config.options(); let mut new_plan = plan; - let mut previous_plans = Vec::with_capacity(16); - previous_plans.push(new_plan.clone()); + let mut previous_plans = HashSet::with_capacity(16); + previous_plans.insert(LogicalPlanSignature::new(&new_plan)); let starting_schema = Arc::clone(new_plan.schema()); @@ -412,10 +412,10 @@ impl Optimizer { } log_plan(&format!("Optimized plan (pass {i})"), &new_plan); - let plan_is_fresh = previous_plans.contains(&new_plan); - if plan_is_fresh { - previous_plans.push(new_plan.clone()); - } else { + // HashSet::insert returns, whether the value was newly inserted. + let plan_is_fresh = + previous_plans.insert(LogicalPlanSignature::new(&new_plan)); + if !plan_is_fresh { // plan did not change, so no need to continue trying to optimize debug!("optimizer pass {i} did not make changes"); break; diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 5c4a55e46ed5a..772742850237b 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -22,7 +22,9 @@ use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema, Dependency, Diagnostic, Result, Span, }; -use datafusion_expr::expr::{NullTreatment, ScalarFunction, Unnest, WildcardOptions, WindowFunction}; +use datafusion_expr::expr::{ + NullTreatment, ScalarFunction, Unnest, WildcardOptions, WindowFunction, +}; use datafusion_expr::planner::{PlannerResult, RawAggregateExpr, RawWindowExpr}; use datafusion_expr::{expr, Expr, ExprSchemable, WindowFrame, WindowFunctionDefinition}; use sqlparser::ast::{ From 00e22024b7eaf8cf9c28fdcce71f3b5b963a2435 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 26 Aug 2025 13:59:11 -0400 Subject: [PATCH 07/18] Remove unused imports when not using sql feature --- .../core/src/execution/session_state.rs | 37 ++++++++++++------- datafusion/core/src/physical_planner.rs | 2 +- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 5f2e8f391e634..f6582909041a0 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -24,8 +24,8 @@ use std::fmt::Debug; use std::sync::Arc; use crate::catalog::{CatalogProviderList, SchemaProvider, TableProviderFactory}; -use crate::datasource::cte_worktable::CteWorkTable; -use crate::datasource::file_format::{format_as_file_type, FileFormatFactory}; +use crate::datasource::file_format::FileFormatFactory; +#[cfg(feature = "sql")] use crate::datasource::provider_as_source; use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner}; use crate::execution::SessionStateDefaults; @@ -34,16 +34,15 @@ use datafusion_catalog::information_schema::{ InformationSchemaProvider, INFORMATION_SCHEMA, }; -use arrow::datatypes::{DataType, SchemaRef}; +use arrow::datatypes::DataType; use datafusion_catalog::MemoryCatalogProviderList; use datafusion_catalog::{TableFunction, TableFunctionImpl}; use datafusion_common::alias::AliasGenerator; use datafusion_common::config::{ConfigExtension, ConfigOptions, TableOptions}; use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan}; -use datafusion_common::file_options::file_type::FileType; use datafusion_common::tree_node::TreeNode; use datafusion_common::{ - config_err, exec_err, not_impl_err, plan_datafusion_err, DFSchema, DataFusionError, + config_err, exec_err, plan_datafusion_err, DFSchema, DataFusionError, ResolvedTableReference, TableReference, }; use datafusion_execution::config::SessionConfig; @@ -56,10 +55,10 @@ use datafusion_expr::planner::ExprPlanner; use datafusion_expr::planner::TypePlanner; use datafusion_expr::registry::{FunctionRegistry, SerializerRegistry}; use datafusion_expr::simplify::SimplifyInfo; -use datafusion_expr::var_provider::{is_system_variables, VarType}; +#[cfg(feature = "sql")] +use datafusion_expr::TableSource; use datafusion_expr::{ - AggregateUDF, Explain, Expr, ExprSchemable, LogicalPlan, ScalarUDF, TableSource, - WindowUDF, + AggregateUDF, Explain, Expr, ExprSchemable, LogicalPlan, ScalarUDF, WindowUDF, }; use datafusion_optimizer::simplify_expressions::ExprSimplifier; use datafusion_optimizer::{ @@ -394,7 +393,7 @@ impl SessionState { .parse_statements()?; if statements.len() > 1 { - return not_impl_err!( + return datafusion_common::not_impl_err!( "The context currently only supports a single SQL statement" ); } @@ -1664,6 +1663,7 @@ impl From for SessionStateBuilder { /// /// This is used so the SQL planner can access the state of the session without /// having a direct dependency on the [`SessionState`] struct (and core crate) +#[cfg(feature = "sql")] struct SessionContextProvider<'a> { state: &'a SessionState, tables: HashMap>, @@ -1723,9 +1723,11 @@ impl ContextProvider for SessionContextProvider<'_> { fn create_cte_work_table( &self, name: &str, - schema: SchemaRef, + schema: arrow::datatypes::SchemaRef, ) -> datafusion_common::Result> { - let table = Arc::new(CteWorkTable::new(name, schema)); + let table = Arc::new(crate::datasource::cte_worktable::CteWorkTable::new( + name, schema, + )); Ok(provider_as_source(table)) } @@ -1742,6 +1744,8 @@ impl ContextProvider for SessionContextProvider<'_> { } fn get_variable_type(&self, variable_names: &[String]) -> Option { + use datafusion_expr::var_provider::{is_system_variables, VarType}; + if variable_names.is_empty() { return None; } @@ -1775,14 +1779,21 @@ impl ContextProvider for SessionContextProvider<'_> { self.state.window_functions().keys().cloned().collect() } - fn get_file_type(&self, ext: &str) -> datafusion_common::Result> { + fn get_file_type( + &self, + ext: &str, + ) -> datafusion_common::Result< + Arc, + > { self.state .file_formats .get(&ext.to_lowercase()) .ok_or(plan_datafusion_err!( "There is no registered file format with ext {ext}" )) - .map(|file_type| format_as_file_type(Arc::clone(file_type))) + .map(|file_type| { + crate::datasource::file_format::format_as_file_type(Arc::clone(file_type)) + }) } } diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 32df06b9195c1..c58c155dd7caf 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -64,6 +64,7 @@ use datafusion_common::display::ToStringifiedPlan; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeVisitor, }; +use datafusion_common::TableReference; use datafusion_common::{ exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, ScalarValue, @@ -93,7 +94,6 @@ use datafusion_physical_plan::execution_plan::InvariantLevel; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; use datafusion_physical_plan::recursive_query::RecursiveQueryExec; use datafusion_physical_plan::unnest::ListUnnest; -use datafusion_sql::TableReference; use async_trait::async_trait; use datafusion_physical_plan::async_func::{AsyncFuncExec, AsyncMapper}; From 36272cf96f3bc25efd9d1835f1e94628623d5184 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 26 Aug 2025 15:36:33 -0400 Subject: [PATCH 08/18] Working through more feature gating --- Cargo.lock | 1 + datafusion/common/src/config.rs | 4 +++- datafusion/common/src/table_reference.rs | 6 +++--- datafusion/common/src/utils/mod.rs | 3 ++- datafusion/functions-nested/Cargo.toml | 7 ++++++- datafusion/functions-nested/src/planner.rs | 18 +++++++++--------- datafusion/substrait/Cargo.toml | 2 +- 7 files changed, 25 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6d594ca9087d..c909130f039c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2384,6 +2384,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index cdd8e72a06cc9..269c44661c415 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -2671,9 +2671,11 @@ impl Display for OutputFormat { #[cfg(test)] mod tests { + #[cfg(feature = "parquet")] + use crate::config::TableParquetOptions; use crate::config::{ ConfigEntry, ConfigExtension, ConfigField, ConfigFileType, ExtensionOptions, - Extensions, TableOptions, TableParquetOptions, + Extensions, TableOptions, }; use std::any::Any; use std::collections::HashMap; diff --git a/datafusion/common/src/table_reference.rs b/datafusion/common/src/table_reference.rs index b05e6e0967a22..efc2908ae395f 100644 --- a/datafusion/common/src/table_reference.rs +++ b/datafusion/common/src/table_reference.rs @@ -406,16 +406,16 @@ mod tests { #[test] fn test_table_reference_to_vector() { - let table_reference = TableReference::parse_str("table"); + let table_reference = TableReference::from("table"); assert_eq!(vec!["table".to_string()], table_reference.to_vec()); - let table_reference = TableReference::parse_str("schema.table"); + let table_reference = TableReference::from("schema.table"); assert_eq!( vec!["schema".to_string(), "table".to_string()], table_reference.to_vec() ); - let table_reference = TableReference::parse_str("catalog.schema.table"); + let table_reference = TableReference::from("catalog.schema.table"); assert_eq!( vec![ "catalog".to_string(), diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 9a733bbb1e2b6..b934d7494f021 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -886,11 +886,12 @@ pub fn take_function_args( }) } -#[cfg(test)] +#[cfg(all(test, feature = "sql"))] mod tests { use super::*; use crate::ScalarValue::Null; use arrow::array::Float64Array; + use sqlparser::ast::Ident; use sqlparser::tokenizer::Span; #[test] diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index 87a480e16003c..15af9129b7123 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -37,13 +37,18 @@ workspace = true [lib] name = "datafusion_functions_nested" +[features] +default = ["sql"] +sql = ["datafusion-common/sql"] + [dependencies] arrow = { workspace = true } arrow-ord = { workspace = true } datafusion-common = { workspace = true } datafusion-doc = { workspace = true } datafusion-execution = { workspace = true } -datafusion-expr = { workspace = true } +datafusion-expr = { workspace = true, default-features = false } +datafusion-expr-common = { workspace = true } datafusion-functions = { workspace = true } datafusion-functions-aggregate = { workspace = true } datafusion-functions-aggregate-common = { workspace = true } diff --git a/datafusion/functions-nested/src/planner.rs b/datafusion/functions-nested/src/planner.rs index 369eaecb1905f..b7a9b878c69e5 100644 --- a/datafusion/functions-nested/src/planner.rs +++ b/datafusion/functions-nested/src/planner.rs @@ -22,11 +22,15 @@ use datafusion_common::ExprSchema; use datafusion_common::{plan_err, utils::list_ndims, DFSchema, Result}; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::expr::{AggregateFunction, AggregateFunctionParams}; +#[cfg(feature = "sql")] +use datafusion_expr::sqlparser::ast::BinaryOperator; use datafusion_expr::AggregateUDF; use datafusion_expr::{ planner::{ExprPlanner, PlannerResult, RawBinaryExpr, RawFieldAccessExpr}, - sqlparser, Expr, ExprSchemable, GetFieldAccess, + Expr, ExprSchemable, GetFieldAccess, }; +#[cfg(not(feature = "sql"))] +use datafusion_expr_common::operator::Operator as BinaryOperator; use datafusion_functions::core::get_field as get_field_inner; use datafusion_functions::expr_fn::get_field; use datafusion_functions_aggregate::nth_value::nth_value_udaf; @@ -51,7 +55,7 @@ impl ExprPlanner for NestedFunctionPlanner { ) -> Result> { let RawBinaryExpr { op, left, right } = expr; - if op == sqlparser::ast::BinaryOperator::StringConcat { + if op == BinaryOperator::StringConcat { let left_type = left.get_type(schema)?; let right_type = right.get_type(schema)?; let left_list_ndims = list_ndims(&left_type); @@ -75,18 +79,14 @@ impl ExprPlanner for NestedFunctionPlanner { } else if left_list_ndims < right_list_ndims { return Ok(PlannerResult::Planned(array_prepend(left, right))); } - } else if matches!( - op, - sqlparser::ast::BinaryOperator::AtArrow - | sqlparser::ast::BinaryOperator::ArrowAt - ) { + } else if matches!(op, BinaryOperator::AtArrow | BinaryOperator::ArrowAt) { let left_type = left.get_type(schema)?; let right_type = right.get_type(schema)?; let left_list_ndims = list_ndims(&left_type); let right_list_ndims = list_ndims(&right_type); // if both are list if left_list_ndims > 0 && right_list_ndims > 0 { - if op == sqlparser::ast::BinaryOperator::AtArrow { + if op == BinaryOperator::AtArrow { // array1 @> array2 -> array_has_all(array1, array2) return Ok(PlannerResult::Planned(array_has_all(left, right))); } else { @@ -123,7 +123,7 @@ impl ExprPlanner for NestedFunctionPlanner { } fn plan_any(&self, expr: RawBinaryExpr) -> Result> { - if expr.op == sqlparser::ast::BinaryOperator::Eq { + if expr.op == BinaryOperator::Eq { Ok(PlannerResult::Planned(Expr::ScalarFunction( ScalarFunction::new_udf( array_has_udf(), diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 63a69b48866b4..dfdf4899fc9bc 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -34,7 +34,7 @@ workspace = true async-recursion = "1.0" async-trait = { workspace = true } chrono = { workspace = true } -datafusion = { workspace = true } +datafusion = { workspace = true, features = ["sql"] } itertools = { workspace = true } object_store = { workspace = true } pbjson-types = { workspace = true } From 6139746d36505692d1be3763852852b834393c99 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 26 Aug 2025 16:48:01 -0400 Subject: [PATCH 09/18] Working through more feature gating --- .github/workflows/rust.yml | 2 +- Cargo.toml | 4 ++-- datafusion-cli/Cargo.toml | 3 ++- datafusion/common/src/utils/mod.rs | 2 +- datafusion/expr/Cargo.toml | 2 +- datafusion/functions-nested/Cargo.toml | 2 +- datafusion/proto/Cargo.toml | 4 +++- 7 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 05a6d70f0278a..abd42f1707123 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -579,7 +579,7 @@ jobs: with: rust-version: stable - name: Run datafusion-common tests - run: cargo test --profile ci -p datafusion-common --features=pyarrow + run: cargo test --profile ci -p datafusion-common --features=pyarrow,sql vendor: name: Verify Vendored Code diff --git a/Cargo.toml b/Cargo.toml index 375c5d6a8839b..b29cf5d7e3763 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -121,8 +121,8 @@ datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "49. datafusion-datasource-json = { path = "datafusion/datasource-json", version = "49.0.0", default-features = false } datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "49.0.0", default-features = false } datafusion-doc = { path = "datafusion/doc", version = "49.0.0" } -datafusion-execution = { path = "datafusion/execution", version = "49.0.0", default-features = false } -datafusion-expr = { path = "datafusion/expr", version = "49.0.0", default-features = false } +datafusion-execution = { path = "datafusion/execution", version = "49.0.0", default-features = false } +datafusion-expr = { path = "datafusion/expr", version = "49.0.0", default-features = false } datafusion-expr-common = { path = "datafusion/expr-common", version = "49.0.0" } datafusion-ffi = { path = "datafusion/ffi", version = "49.0.0" } datafusion-functions = { path = "datafusion/functions", version = "49.0.0" } diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 394adbb6105f3..052fcf69b2595 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -42,6 +42,7 @@ aws-credential-types = "1.2.6" clap = { version = "4.5.47", features = ["derive", "cargo"] } datafusion = { workspace = true, features = [ "avro", + "compression", "crypto_expressions", "datetime_expressions", "encoding_expressions", @@ -50,8 +51,8 @@ datafusion = { workspace = true, features = [ "parquet_encryption", "recursive_protection", "regex_expressions", + "sql", "unicode_expressions", - "compression", ] } dirs = "6.0.0" env_logger = { workspace = true } diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index b934d7494f021..fbd009e5779d3 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -886,7 +886,7 @@ pub fn take_function_args( }) } -#[cfg(all(test, feature = "sql"))] +#[cfg(test)] mod tests { use super::*; use crate::ScalarValue::Null; diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index b80cf23e104fe..2a8e5ecfb1ab9 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -53,11 +53,11 @@ datafusion-functions-aggregate-common = { workspace = true } datafusion-functions-window-common = { workspace = true } datafusion-physical-expr-common = { workspace = true } indexmap = { workspace = true } +itertools = "0.14.0" paste = "^1.0" recursive = { workspace = true, optional = true } serde_json = { workspace = true } sqlparser = { workspace = true, optional = true } -itertools = "0.14.0" [dev-dependencies] ctor = { workspace = true } diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index 15af9129b7123..9c0b7a16f9a9b 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -39,7 +39,7 @@ name = "datafusion_functions_nested" [features] default = ["sql"] -sql = ["datafusion-common/sql"] +sql = ["datafusion-expr/sql"] [dependencies] arrow = { workspace = true } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 956887e1bb170..9dc433f7efcc9 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -46,7 +46,7 @@ avro = ["datafusion/avro", "datafusion-common/avro"] [dependencies] arrow = { workspace = true } chrono = { workspace = true } -datafusion = { workspace = true, default-features = false } +datafusion = { workspace = true, default-features = false, features = ["parquet", "nested_expressions"] } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } datafusion-proto-common = { workspace = true } @@ -55,7 +55,9 @@ pbjson = { workspace = true, optional = true } prost = { workspace = true } serde = { version = "1.0", optional = true } serde_json = { workspace = true, optional = true } + [dev-dependencies] +datafusion = { workspace = true, default-features = false, features = ["sql"] } datafusion-functions = { workspace = true, default-features = true } datafusion-functions-aggregate = { workspace = true } datafusion-functions-window-common = { workspace = true } From d5b04ea18fcd2d8c9bbc27e005b1bd49cb160f79 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 26 Aug 2025 16:54:42 -0400 Subject: [PATCH 10/18] wasmtest required sql --- datafusion/wasmtest/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml index 2fc9f0df30841..8de9534980d8e 100644 --- a/datafusion/wasmtest/Cargo.toml +++ b/datafusion/wasmtest/Cargo.toml @@ -45,7 +45,7 @@ chrono = { version = "0.4", features = ["wasmbind"] } # all the `std::fmt` and `std::panicking` infrastructure, so isn't great for # code size when deploying. console_error_panic_hook = { version = "0.1.1", optional = true } -datafusion = { workspace = true, features = ["parquet"] } +datafusion = { workspace = true, features = ["parquet", "sql"] } datafusion-common = { workspace = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } From ddaf8a38dc9b970b32ade2eb8a622f24ec46acc7 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 26 Aug 2025 17:07:39 -0400 Subject: [PATCH 11/18] working on CI --- datafusion/common/Cargo.toml | 1 + datafusion/common/src/utils/mod.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 22916925c116c..bb5f53084a412 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -84,3 +84,4 @@ web-time = "1.1.0" chrono = { workspace = true } insta = { workspace = true } rand = { workspace = true } +sqlparser = { workspace = true } diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index fbd009e5779d3..c7f57980abc16 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -1088,6 +1088,7 @@ mod tests { Ok(()) } + #[cfg(feature = "sql")] #[test] fn test_quote_identifier() -> Result<()> { let cases = vec![ From 95eff0df7d0305b5071dd354ef433ec3a8435081 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 26 Aug 2025 19:04:59 -0400 Subject: [PATCH 12/18] Update docs --- datafusion/expr/src/planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index 955414df0d7e8..25a0f83947eee 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -327,7 +327,7 @@ pub enum PlannerResult { /// Customize planning SQL types to DataFusion (Arrow) types. #[cfg(feature = "sql")] pub trait TypePlanner: Debug + Send + Sync { - /// Plan SQL [`ast::DataType`] to DataFusion [`DataType`] + /// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`DataType`] /// /// Returns None if not possible fn plan_type( From 28a0d0111a777a16529f4163785f20e4e6fd034d Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 5 Sep 2025 09:06:33 -0400 Subject: [PATCH 13/18] Remove some duplicate code by doing a bit of sql-like parsing of idedntifiers --- datafusion/common/src/column.rs | 14 ++----- datafusion/common/src/table_reference.rs | 46 ++++++-------------- datafusion/common/src/utils/mod.rs | 53 ++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 45 deletions(-) diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index 83b68865d2838..c7f0b5a4f4881 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -18,7 +18,6 @@ //! Column use crate::error::{_schema_err, add_possible_columns_to_diag}; -#[cfg(feature = "sql")] use crate::utils::parse_identifiers_normalized; use crate::utils::quote_identifier; use crate::{DFSchema, Diagnostic, Result, SchemaError, Spans, TableReference}; @@ -88,7 +87,6 @@ impl Column { /// /// For example, `foo.bar` would be represented as a two element vector /// `["foo", "bar"]` - #[cfg(feature = "sql")] fn from_idents(mut idents: Vec) -> Option { let (relation, name) = match idents.len() { 1 => (None, idents.remove(0)), @@ -129,7 +127,6 @@ impl Column { /// Treats the name as a SQL identifier. For example /// `foo.BAR` would be parsed to a reference to relation `foo`, column name `bar` (lower case) /// where `"foo.BAR"` would be parsed to a reference to column named `foo.BAR` - #[cfg(feature = "sql")] pub fn from_qualified_name(flat_name: impl Into) -> Self { let flat_name = flat_name.into(); Self::from_idents(parse_identifiers_normalized(&flat_name, false)).unwrap_or_else( @@ -140,14 +137,7 @@ impl Column { }, ) } - #[cfg(not(feature = "sql"))] - pub fn from_qualified_name(flat_name: impl Into) -> Self { - Self { - relation: None, - name: flat_name.into(), - spans: Spans::new(), - } - } + /// Deserialize a fully qualified name string into a column preserving column text case #[cfg(feature = "sql")] pub fn from_qualified_name_ignore_case(flat_name: impl Into) -> Self { @@ -160,10 +150,12 @@ impl Column { }, ) } + #[cfg(not(feature = "sql"))] pub fn from_qualified_name_ignore_case(flat_name: impl Into) -> Self { Self::from_qualified_name(flat_name) } + /// return the column's name. /// /// Note: This ignores the relation and returns the column name only. diff --git a/datafusion/common/src/table_reference.rs b/datafusion/common/src/table_reference.rs index efc2908ae395f..7cf8e7af1a794 100644 --- a/datafusion/common/src/table_reference.rs +++ b/datafusion/common/src/table_reference.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -#[cfg(feature = "sql")] use crate::utils::parse_identifiers_normalized; use crate::utils::quote_identifier; use std::sync::Arc; @@ -271,7 +270,6 @@ impl TableReference { /// Forms a [`TableReference`] by parsing `s` as a multipart SQL /// identifier. See docs on [`TableReference`] for more details. - #[cfg(feature = "sql")] pub fn parse_str(s: &str) -> Self { let mut parts = parse_identifiers_normalized(s, false); @@ -315,50 +313,24 @@ impl TableReference { /// Parse a string into a TableReference, normalizing where appropriate /// /// See full details on [`TableReference::parse_str`] -#[cfg(feature = "sql")] impl<'a> From<&'a str> for TableReference { fn from(s: &'a str) -> Self { Self::parse_str(s) } } -#[cfg(feature = "sql")] impl<'a> From<&'a String> for TableReference { fn from(s: &'a String) -> Self { Self::parse_str(s) } } -#[cfg(feature = "sql")] impl From for TableReference { fn from(s: String) -> Self { Self::parse_str(&s) } } -#[cfg(not(feature = "sql"))] -impl<'a> From<&'a str> for TableReference { - fn from(s: &'a str) -> Self { - Self::Bare { table: s.into() } - } -} - -#[cfg(not(feature = "sql"))] -impl<'a> From<&'a String> for TableReference { - fn from(s: &'a String) -> Self { - Self::Bare { - table: s.as_str().into(), - } - } -} - -#[cfg(not(feature = "sql"))] -impl From for TableReference { - fn from(s: String) -> Self { - Self::Bare { table: s.into() } - } -} - impl From for TableReference { fn from(resolved: ResolvedTableReference) -> Self { Self::Full { @@ -396,12 +368,18 @@ mod tests { let actual = TableReference::from("TABLE"); assert_eq!(expected, actual); - // if fail to parse, take entire input string as identifier - let expected = TableReference::Bare { - table: "TABLE()".into(), - }; - let actual = TableReference::from("TABLE()"); - assert_eq!(expected, actual); + // Disable this test for non-sql features so that we don't need to reproduce + // things like table function upper case conventions, since those will not + // be used if SQL is not selected. + #[cfg(feature = "sql")] + { + // if fail to parse, take entire input string as identifier + let expected = TableReference::Bare { + table: "TABLE()".into(), + }; + let actual = TableReference::from("TABLE()"); + assert_eq!(expected, actual); + } } #[test] diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index c7f57980abc16..7443625276d04 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -302,6 +302,59 @@ pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec>() } +#[cfg(not(feature = "sql"))] +pub(crate) fn parse_identifiers(s: &str) -> Result> { + let mut result = Vec::new(); + let mut current = String::new(); + let mut in_quotes = false; + + for ch in s.chars() { + match ch { + '"' => { + in_quotes = !in_quotes; + current.push(ch); + } + '.' if !in_quotes => { + result.push(current.clone()); + current.clear(); + } + _ => { + current.push(ch); + } + } + } + + // Push the last part if it's not empty + if !current.is_empty() { + result.push(current); + } + + Ok(result) +} + +#[cfg(not(feature = "sql"))] +pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec { + parse_identifiers(s) + .unwrap_or_default() + .into_iter() + .map(|id| { + let is_double_quoted = if id.len() > 2 { + let mut chars = id.chars(); + chars.next() == Some('"') && chars.last() == Some('"') + } else { + false + }; + if is_double_quoted { + id[1..id.len() - 1].to_string().replace("\"\"", "\"") + } else if ignore_case { + id + } else { + id.to_ascii_lowercase() + } + }) + .collect::>() +} + /// This function "takes" the elements at `indices` from the slice `items`. pub fn get_at_indices>( items: &[T], From d40eb8be7c6a8d17fa2870bbe8ae045f53938745 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 5 Sep 2025 14:55:11 -0400 Subject: [PATCH 14/18] Conditionally import sql features --- datafusion/core/Cargo.toml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 80542efb3d4b9..e61ab2f990059 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -97,12 +97,13 @@ serde = [ ] sql = [ "datafusion-common/sql", + "datafusion-functions-nested?/sql", "datafusion-sql", "sqlparser", ] string_expressions = ["datafusion-functions/string_expressions"] unicode_expressions = [ - "datafusion-sql/unicode_expressions", + "datafusion-sql?/unicode_expressions", "datafusion-functions/unicode_expressions", ] extended_tests = [] @@ -125,11 +126,11 @@ datafusion-datasource-csv = { workspace = true } datafusion-datasource-json = { workspace = true } datafusion-datasource-parquet = { workspace = true, optional = true } datafusion-execution = { workspace = true } -datafusion-expr = { workspace = true } +datafusion-expr = { workspace = true, default-features = false } datafusion-expr-common = { workspace = true } datafusion-functions = { workspace = true } datafusion-functions-aggregate = { workspace = true } -datafusion-functions-nested = { workspace = true, optional = true } +datafusion-functions-nested = { workspace = true, default-features = false, optional = true } datafusion-functions-table = { workspace = true } datafusion-functions-window = { workspace = true } datafusion-optimizer = { workspace = true } From 7066b0ea65838dd431ee691fa0d75e7d9c141ed4 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 5 Sep 2025 15:22:29 -0400 Subject: [PATCH 15/18] Set default features to false so we do not always pull in sql --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b29cf5d7e3763..cf7e03db03911 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -128,7 +128,7 @@ datafusion-ffi = { path = "datafusion/ffi", version = "49.0.0" } datafusion-functions = { path = "datafusion/functions", version = "49.0.0" } datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "49.0.0" } datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "49.0.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "49.0.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "49.0.0", default-features = false } datafusion-functions-table = { path = "datafusion/functions-table", version = "49.0.0" } datafusion-functions-window = { path = "datafusion/functions-window", version = "49.0.0" } datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "49.0.0" } From fdac9bb9175db5b9ae1105944e0435e029cedec6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 16 Sep 2025 07:45:23 -0400 Subject: [PATCH 16/18] Add check for just sql feature --- .github/workflows/rust.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 921f668a120cb..a17bc0d14b130 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -217,6 +217,8 @@ jobs: run: cargo check --profile ci --no-default-features -p datafusion --features=recursive_protection - name: Check datafusion (serde) run: cargo check --profile ci --no-default-features -p datafusion --features=serde + - name: Check datafusion (sql) + run: cargo check --profile ci --no-default-features -p datafusion --features=sql - name: Check datafusion (string_expressions) run: cargo check --profile ci --no-default-features -p datafusion --features=string_expressions - name: Check datafusion (unicode_expressions) From 495ad575859e22a3e1ac27437d44b54d1f3a6263 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 16 Sep 2025 07:49:01 -0400 Subject: [PATCH 17/18] Update readme for features --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2d0e3f70943b0..4c4b955176b2b 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,7 @@ Default features: - `datetime_expressions`: date and time functions such as `to_timestamp` - `encoding_expressions`: `encode` and `decode` functions - `parquet`: support for reading the [Apache Parquet] format +- `sql`: Support for sql parsing / planning - `regex_expressions`: regular expression functions, such as `regexp_match` - `unicode_expressions`: Include unicode aware functions such as `character_length` - `unparser`: enables support to reverse LogicalPlans back into SQL From 80e0436cb54ffd8ca387ad271c21c4a15cf268cf Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 16 Sep 2025 10:24:13 -0400 Subject: [PATCH 18/18] Taplo format --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 22186d8d319e1..6ce6a3d1b2167 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -122,7 +122,7 @@ datafusion-datasource-json = { path = "datafusion/datasource-json", version = "5 datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "50.0.0", default-features = false } datafusion-doc = { path = "datafusion/doc", version = "50.0.0" } datafusion-execution = { path = "datafusion/execution", version = "50.0.0", default-features = false } -datafusion-expr = { path = "datafusion/expr", version = "50.0.0", default-features = false} +datafusion-expr = { path = "datafusion/expr", version = "50.0.0", default-features = false } datafusion-expr-common = { path = "datafusion/expr-common", version = "50.0.0" } datafusion-ffi = { path = "datafusion/ffi", version = "50.0.0" } datafusion-functions = { path = "datafusion/functions", version = "50.0.0" }