From 26176e24b5efd0a9481f2be99056f9577a4f2c85 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 17 Aug 2023 15:37:21 -0400 Subject: [PATCH 1/3] Minor: Clarify documentation for read only queries --- datafusion/core/src/execution/context.rs | 67 ++++++++++++++++++++---- datafusion/core/tests/sql/mod.rs | 1 + 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/datafusion/core/src/execution/context.rs b/datafusion/core/src/execution/context.rs index 6593e22e6cc01..2284b31eaff50 100644 --- a/datafusion/core/src/execution/context.rs +++ b/datafusion/core/src/execution/context.rs @@ -163,12 +163,14 @@ where /// * Register a custom data source that can be referenced from a SQL query. /// * Execution a SQL query /// +/// # Example: DataFrame API +/// /// The following example demonstrates how to use the context to execute a query against a CSV /// data source using the DataFrame API: /// /// ``` /// use datafusion::prelude::*; -/// # use datafusion::error::Result; +/// # use datafusion::{error::Result, assert_batches_eq}; /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); @@ -176,22 +178,47 @@ where /// let df = df.filter(col("a").lt_eq(col("b")))? /// .aggregate(vec![col("a")], vec![min(col("b"))])? /// .limit(0, Some(100))?; -/// let results = df.collect(); +/// let results = df +/// .collect() +/// .await?; +/// assert_batches_eq!( +/// &[ +/// "+---+----------------+", +/// "| a | MIN(example.b) |", +/// "+---+----------------+", +/// "| 1 | 2 |", +/// "+---+----------------+", +/// &results +/// ); /// # Ok(()) /// # } /// ``` /// +/// # Example: SQL APU +/// /// The following example demonstrates how to execute the same query using SQL: /// /// ``` /// use datafusion::prelude::*; -/// -/// # use datafusion::error::Result; +/// # use datafusion::{error::Result, assert_batches_eq}; /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let mut ctx = SessionContext::new(); /// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?; -/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?; +/// let results = ctx +/// .sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100") +/// .await? +/// .collect() +/// .await?; +/// assert_batches_eq!( +/// &[ +/// "+---+----------------+", +/// "| a | MIN(example.b) |", +/// "+---+----------------+", +/// "| 1 | 2 |", +/// "+---+----------------+", +/// &results +/// ); /// # Ok(()) /// # } /// ``` @@ -342,16 +369,34 @@ impl SessionContext { self.state.read().config.clone() } - /// Creates a [`DataFrame`] that will execute a SQL query. + /// Creates a [`DataFrame`] from SQL query text. /// /// Note: This API implements DDL statements such as `CREATE TABLE` and /// `CREATE VIEW` and DML statements such as `INSERT INTO` with in-memory /// default implementations. /// - /// If this is not desirable, consider using [`SessionState::create_logical_plan()`] which - /// does not mutate the state based on such statements. + /// For read only SQL, use [`create_logical_plan()`] to create a + /// [`LogicalPlan`] and [`SessionState::create_physical_plan`] to + /// execute it. + /// + /// # Example: Running SQL queries + /// + /// See the example on [`Self`] + /// + /// # Example: Run SQL supporting DDL + /// ``` + /// use datafusion::prelude::*; + /// # + /// # use datafusion::error::Result; + /// # #[tokio::main] + /// # async fn main() -> Result<()> { + /// let mut ctx = SessionContext::new(); + /// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?; + /// # Ok(()) + /// # } + /// ``` + pub async fn sql(&self, sql: &str) -> Result { - // create a query planner let plan = self.state().create_logical_plan(sql).await?; self.execute_logical_plan(plan).await @@ -1304,7 +1349,7 @@ impl FunctionRegistry for SessionContext { /// A planner used to add extensions to DataFusion logical and physical plans. #[async_trait] pub trait QueryPlanner { - /// Given a `LogicalPlan`, create an `ExecutionPlan` suitable for execution + /// Given a `LogicalPlan`, create an [`ExecutionPlan`] suitable for execution async fn create_physical_plan( &self, logical_plan: &LogicalPlan, @@ -1317,7 +1362,7 @@ struct DefaultQueryPlanner {} #[async_trait] impl QueryPlanner for DefaultQueryPlanner { - /// Given a `LogicalPlan`, create an `ExecutionPlan` suitable for execution + /// Given a `LogicalPlan`, create an [`ExecutionPlan`] suitable for execution async fn create_physical_plan( &self, logical_plan: &LogicalPlan, diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index c1adcf9d0a966..c295d8f1326ec 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -99,6 +99,7 @@ pub mod select; pub mod subqueries; pub mod timestamp; pub mod udf; +mod readonly; fn assert_float_eq(expected: &[Vec], received: &[Vec]) where From ce32471d81d37ed90102cbe47aa60d81d2eeb2de Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 17 Aug 2023 15:38:49 -0400 Subject: [PATCH 2/3] fix comment --- datafusion/core/src/execution/context.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/core/src/execution/context.rs b/datafusion/core/src/execution/context.rs index 2284b31eaff50..ccbaa14635d0c 100644 --- a/datafusion/core/src/execution/context.rs +++ b/datafusion/core/src/execution/context.rs @@ -184,17 +184,18 @@ where /// assert_batches_eq!( /// &[ /// "+---+----------------+", -/// "| a | MIN(example.b) |", +/// "| a | MIN(?table?.b) |", /// "+---+----------------+", /// "| 1 | 2 |", /// "+---+----------------+", +/// ], /// &results /// ); /// # Ok(()) /// # } /// ``` /// -/// # Example: SQL APU +/// # Example: SQL API /// /// The following example demonstrates how to execute the same query using SQL: /// @@ -217,6 +218,7 @@ where /// "+---+----------------+", /// "| 1 | 2 |", /// "+---+----------------+", +/// ], /// &results /// ); /// # Ok(()) From a65b6afc636d06dca8a20db484443e511d0eb2ed Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 18 Aug 2023 08:23:09 -0400 Subject: [PATCH 3/3] Update docs to show how to do "read only" SQL --- datafusion/core/src/execution/context.rs | 45 +++++++++++++++++++++--- datafusion/core/tests/sql/mod.rs | 1 - 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/datafusion/core/src/execution/context.rs b/datafusion/core/src/execution/context.rs index ccbaa14635d0c..bc65bb7a13c5c 100644 --- a/datafusion/core/src/execution/context.rs +++ b/datafusion/core/src/execution/context.rs @@ -377,7 +377,7 @@ impl SessionContext { /// `CREATE VIEW` and DML statements such as `INSERT INTO` with in-memory /// default implementations. /// - /// For read only SQL, use [`create_logical_plan()`] to create a + /// For read only SQL, use [`SessionState::create_logical_plan()`] to create a /// [`LogicalPlan`] and [`SessionState::create_physical_plan`] to /// execute it. /// @@ -385,15 +385,50 @@ impl SessionContext { /// /// See the example on [`Self`] /// - /// # Example: Run SQL supporting DDL + /// # Example: Creating a Table with SQL + /// /// ``` /// use datafusion::prelude::*; - /// # - /// # use datafusion::error::Result; + /// # use datafusion::{error::Result, assert_batches_eq}; + /// # #[tokio::main] + /// # async fn main() -> Result<()> { + /// let mut ctx = SessionContext::new(); + /// ctx + /// .sql("CREATE TABLE foo (x INTEGER)") + /// .await? + /// .collect() + /// .await?; + /// assert!(ctx.table_exist("foo").unwrap()); + /// # Ok(()) + /// # } + /// ``` + /// + /// # Example: Preventing Creating a Table with SQL + /// + /// If you want to avoid creating tables, you must use a different + /// API: + /// + /// ``` + /// use datafusion::prelude::*; + /// # use datafusion::{error::Result}; + /// # use datafusion::physical_plan::collect; /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let mut ctx = SessionContext::new(); - /// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?; + /// let plan = ctx + /// .state() + /// .create_logical_plan("CREATE TABLE foo (x INTEGER)") + /// .await?; + /// // Cab bit create an ExecutionPlan suitable for running + /// let err = ctx + /// .state() + /// .create_physical_plan(&plan) + /// .await + /// .unwrap_err(); + /// assert_eq!( + /// err.to_string(), + /// "This feature is not implemented: Unsupported logical plan: CreateMemoryTable" + /// ); /// # Ok(()) /// # } /// ``` diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index c295d8f1326ec..c1adcf9d0a966 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -99,7 +99,6 @@ pub mod select; pub mod subqueries; pub mod timestamp; pub mod udf; -mod readonly; fn assert_float_eq(expected: &[Vec], received: &[Vec]) where