From c0112e2f47cfb70f531abb1c3295a21fc8f708cf Mon Sep 17 00:00:00 2001 From: Fedomn Date: Sun, 23 Oct 2022 17:31:34 +0800 Subject: [PATCH] refactor(binder): refactor PlanNode output columns Signed-off-by: Fedomn --- src/binder/expression/mod.rs | 51 +++++++----------- src/binder/table/subquery.rs | 5 +- src/db.rs | 10 ++-- src/optimizer/core/rule.rs | 3 +- src/optimizer/heuristic/optimizer.rs | 14 ++--- src/optimizer/input_ref_rewriter.rs | 5 +- src/optimizer/plan_node/dummy.rs | 6 +-- src/optimizer/plan_node/logical_agg.rs | 10 ++-- src/optimizer/plan_node/logical_filter.rs | 10 ++-- src/optimizer/plan_node/logical_join.rs | 53 ++++++++----------- src/optimizer/plan_node/logical_limit.rs | 10 ++-- src/optimizer/plan_node/logical_order.rs | 10 ++-- src/optimizer/plan_node/logical_project.rs | 10 ++-- src/optimizer/plan_node/logical_table_scan.rs | 6 +-- src/optimizer/plan_node/mod.rs | 51 +----------------- .../plan_node/physical_cross_join.rs | 10 ++-- src/optimizer/plan_node/physical_filter.rs | 10 ++-- src/optimizer/plan_node/physical_hash_agg.rs | 10 ++-- src/optimizer/plan_node/physical_hash_join.rs | 10 ++-- src/optimizer/plan_node/physical_limit.rs | 10 ++-- src/optimizer/plan_node/physical_order.rs | 10 ++-- src/optimizer/plan_node/physical_project.rs | 10 ++-- .../plan_node/physical_simple_agg.rs | 10 ++-- .../plan_node/physical_table_scan.rs | 10 ++-- src/optimizer/rules/column_pruning.rs | 26 +++------ src/optimizer/rules/combine_operators.rs | 5 +- src/optimizer/rules/mod.rs | 1 - src/optimizer/rules/physical_rewrite.rs | 3 +- src/optimizer/rules/pushdown_limit.rs | 15 +++--- src/optimizer/rules/pushdown_predicates.rs | 13 +++-- src/optimizer/rules/simplification.rs | 3 +- src/planner/mod.rs | 28 +--------- src/planner/select.rs | 3 -- tests/planner/column-pruning.planner.sql | 31 +++++++++++ tests/planner/column-pruning.yml | 5 ++ 35 files changed, 165 insertions(+), 312 deletions(-) diff --git a/src/binder/expression/mod.rs b/src/binder/expression/mod.rs index 967dee7..ff34cf6 100644 --- a/src/binder/expression/mod.rs +++ b/src/binder/expression/mod.rs @@ -50,20 +50,6 @@ impl BoundExpr { } } - pub fn contains_column_ref(&self) -> bool { - match self { - BoundExpr::Constant(_) => false, - BoundExpr::InputRef(_) => false, - BoundExpr::ColumnRef(_) => true, - BoundExpr::BinaryOp(binary_op) => { - binary_op.left.contains_column_ref() || binary_op.right.contains_column_ref() - } - BoundExpr::TypeCast(tc) => tc.expr.contains_column_ref(), - BoundExpr::AggFunc(agg) => agg.exprs.iter().any(|arg| arg.contains_column_ref()), - BoundExpr::Alias(alias) => alias.expr.contains_column_ref(), - } - } - pub fn get_referenced_column_catalog(&self) -> Vec { match self { BoundExpr::Constant(_) => vec![], @@ -85,48 +71,47 @@ impl BoundExpr { } } - /// Generate a new column catalog in table alias or subquery for outside referenced. + /// Generate a new column catalog for this expression. /// Such as `t.v` in subquery: select t.v from (select a as v from t1) t. - pub fn output_column_catalog_for_alias_table(&self, alias_table_id: String) -> ColumnCatalog { - let (column_id, data_type) = match self { - BoundExpr::Constant(e) => (e.to_string(), e.data_type()), + /// Constant and BinaryOp returns empty table_id. + pub fn output_column_catalog(&self) -> ColumnCatalog { + let (table_id, column_id, data_type) = match self { + BoundExpr::Constant(e) => (String::new(), e.to_string(), e.data_type()), BoundExpr::ColumnRef(e) => ( + e.column_catalog.table_id.clone(), e.column_catalog.column_id.clone(), e.column_catalog.desc.data_type.clone(), ), BoundExpr::InputRef(_) => unreachable!(), BoundExpr::BinaryOp(e) => { - let l = e - .left - .output_column_catalog_for_alias_table(alias_table_id.clone()); - let r = e - .right - .output_column_catalog_for_alias_table(alias_table_id.clone()); + let l = e.left.output_column_catalog(); + let r = e.right.output_column_catalog(); let column_id = format!("{}{}{}", l.column_id, e.op, r.column_id); let data_type = e.return_type.clone().unwrap(); - (column_id, data_type) + (String::new(), column_id, data_type) } BoundExpr::TypeCast(e) => { - let c = e - .expr - .output_column_catalog_for_alias_table(alias_table_id.clone()); + let c = e.expr.output_column_catalog(); + let table_id = c.table_id; let column_id = format!("{}({})", e.cast_type, c.column_id); let data_type = e.cast_type.clone(); - (column_id, data_type) + (table_id, column_id, data_type) } BoundExpr::AggFunc(agg) => { - let c = agg.exprs[0].output_column_catalog_for_alias_table(alias_table_id.clone()); + let c = agg.exprs[0].output_column_catalog(); + let table_id = c.table_id; let column_id = format!("{}({})", agg.func, c.column_id); let data_type = agg.return_type.clone(); - (column_id, data_type) + (table_id, column_id, data_type) } BoundExpr::Alias(e) => { + let table_id = e.table_id.clone(); let column_id = e.column_id.to_string(); let data_type = e.expr.return_type().unwrap(); - (column_id, data_type) + (table_id, column_id, data_type) } }; - ColumnCatalog::new(alias_table_id, column_id, self.nullable(), data_type) + ColumnCatalog::new(table_id, column_id, self.nullable(), data_type) } } diff --git a/src/binder/table/subquery.rs b/src/binder/table/subquery.rs index 0683add..09d2dae 100644 --- a/src/binder/table/subquery.rs +++ b/src/binder/table/subquery.rs @@ -17,7 +17,10 @@ impl BoundSubquery { self.query .select_list .iter() - .map(|expr| expr.output_column_catalog_for_alias_table(self.alias.clone())) + .map(|expr| { + expr.output_column_catalog() + .clone_with_table_id(self.alias.clone()) + }) .collect::>() } diff --git a/src/db.rs b/src/db.rs index 4fb5ae5..8eaab29 100644 --- a/src/db.rs +++ b/src/db.rs @@ -15,7 +15,7 @@ use crate::optimizer::{ RemoveNoopOperators, SimplifyCasts, }; use crate::parser::parse; -use crate::planner::{LogicalPlanError, Planner, PlannerContext}; +use crate::planner::{LogicalPlanError, Planner}; use crate::storage::{CsvStorage, Storage, StorageError, StorageImpl}; use crate::util::pretty_plan_tree_string; @@ -54,7 +54,7 @@ impl Database { Ok(data) } - fn default_optimizer(&self, root: PlanRef, planner_context: PlannerContext) -> HepOptimizer { + fn default_optimizer(&self, root: PlanRef) -> HepOptimizer { // the order of rules is important and affects the rule matching logic let batches = vec![ HepBatch::new( @@ -101,7 +101,7 @@ impl Database { ), ]; - HepOptimizer::new(batches, root, planner_context) + HepOptimizer::new(batches, root) } pub async fn run(&self, sql: &str) -> Result, DatabaseError> { @@ -131,7 +131,7 @@ impl Database { ); // 4. optimize logical plan to physical plan - let mut optimizer = self.default_optimizer(logical_plan, planner.context); + let mut optimizer = self.default_optimizer(logical_plan); let physical_plan = optimizer.find_best(); println!( "optimized_plan:\n{}\n", @@ -173,7 +173,7 @@ impl Database { pretty_plan_tree_string(&*logical_plan) ); - let mut optimizer = self.default_optimizer(logical_plan, planner.context); + let mut optimizer = self.default_optimizer(logical_plan); let physical_plan = optimizer.find_best(); _ = write!( explain_str, diff --git a/src/optimizer/core/rule.rs b/src/optimizer/core/rule.rs index e5feb89..a0654fa 100644 --- a/src/optimizer/core/rule.rs +++ b/src/optimizer/core/rule.rs @@ -1,7 +1,6 @@ use enum_dispatch::enum_dispatch; use super::{OptExpr, Pattern}; -use crate::planner::PlannerContext; /// A rule is to transform logically equivalent expression. There are two kinds of rules: /// @@ -14,7 +13,7 @@ pub trait Rule { /// Apply the rule and write the transformation result to `Substitute`. /// The pattern tree determines the opt_expr tree internal nodes type. - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, planner_context: &PlannerContext); + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute); } /// Define the transformed plans diff --git a/src/optimizer/heuristic/optimizer.rs b/src/optimizer/heuristic/optimizer.rs index 19c6011..dbd4e57 100644 --- a/src/optimizer/heuristic/optimizer.rs +++ b/src/optimizer/heuristic/optimizer.rs @@ -4,23 +4,17 @@ use super::matcher::HepMatcher; use crate::optimizer::core::{PatternMatcher, Rule, Substitute}; use crate::optimizer::rules::RuleImpl; use crate::optimizer::PlanRef; -use crate::planner::PlannerContext; use crate::util::pretty_plan_tree_string; pub struct HepOptimizer { batches: Vec, graph: HepGraph, - planner_context: PlannerContext, } impl HepOptimizer { - pub fn new(batches: Vec, root: PlanRef, planner_context: PlannerContext) -> Self { + pub fn new(batches: Vec, root: PlanRef) -> Self { let graph = HepGraph::new(root); - Self { - batches, - graph, - planner_context, - } + Self { batches, graph } } pub fn find_best(&mut self) -> PlanRef { @@ -103,7 +97,7 @@ impl HepOptimizer { if let Some(opt_expr) = matcher.match_opt_expr() { let mut substitute = Substitute::default(); let opt_expr_root = opt_expr.root.clone(); - rule.apply(opt_expr, &mut substitute, &self.planner_context); + rule.apply(opt_expr, &mut substitute); if !substitute.opt_exprs.is_empty() { assert!(substitute.opt_exprs.len() == 1); @@ -178,7 +172,7 @@ mod tests { HepBatchStrategy::once_topdown(), vec![PhysicalRewriteRule::create()], ); - let mut planner = HepOptimizer::new(vec![batch], root, Default::default()); + let mut planner = HepOptimizer::new(vec![batch], root); let new_plan = planner.find_best(); assert_eq!( new_plan.as_physical_project().unwrap().logical().exprs()[0], diff --git a/src/optimizer/input_ref_rewriter.rs b/src/optimizer/input_ref_rewriter.rs index 0509a4c..d600bff 100644 --- a/src/optimizer/input_ref_rewriter.rs +++ b/src/optimizer/input_ref_rewriter.rs @@ -29,9 +29,8 @@ impl InputRefRewriter { // Find alias expr in bindings. if let Some(idx) = self.bindings.iter().position(|e| { - if let BoundExpr::Alias(alias) = e { - let column_catalog = - e.output_column_catalog_for_alias_table(alias.table_id.clone()); + if let BoundExpr::Alias(_) = e { + let column_catalog = e.output_column_catalog(); let alias_expr = &BoundExpr::ColumnRef(BoundColumnRef { column_catalog }); if expr == alias_expr { return true; diff --git a/src/optimizer/plan_node/dummy.rs b/src/optimizer/plan_node/dummy.rs index cc46925..48faa61 100644 --- a/src/optimizer/plan_node/dummy.rs +++ b/src/optimizer/plan_node/dummy.rs @@ -22,13 +22,9 @@ impl PlanNode for Dummy { vec![] } - fn output_columns(&self, _base_table_id: String) -> Vec { + fn output_columns(&self) -> Vec { vec![] } - - fn get_based_table_id(&self) -> crate::catalog::TableId { - "Dummy".to_string() - } } impl PlanTreeNode for Dummy { diff --git a/src/optimizer/plan_node/logical_agg.rs b/src/optimizer/plan_node/logical_agg.rs index e9ce1b5..1c25b61 100644 --- a/src/optimizer/plan_node/logical_agg.rs +++ b/src/optimizer/plan_node/logical_agg.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use super::{PlanNode, PlanRef, PlanTreeNode}; use crate::binder::BoundExpr; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct LogicalAgg { @@ -43,17 +43,13 @@ impl PlanNode for LogicalAgg { .collect::>() } - fn output_columns(&self, base_table_id: String) -> Vec { + fn output_columns(&self) -> Vec { self.group_by .iter() .chain(self.agg_funcs.iter()) - .map(|e| e.output_column_catalog_for_alias_table(base_table_id.clone())) + .map(|e| e.output_column_catalog()) .collect::>() } - - fn get_based_table_id(&self) -> TableId { - self.children()[0].get_based_table_id() - } } impl PlanTreeNode for LogicalAgg { diff --git a/src/optimizer/plan_node/logical_filter.rs b/src/optimizer/plan_node/logical_filter.rs index 1d2cbf5..8b9604b 100644 --- a/src/optimizer/plan_node/logical_filter.rs +++ b/src/optimizer/plan_node/logical_filter.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use super::{PlanNode, PlanRef, PlanTreeNode}; use crate::binder::BoundExpr; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct LogicalFilter { @@ -32,12 +32,8 @@ impl PlanNode for LogicalFilter { self.expr.get_referenced_column_catalog() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.children()[0].output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.children()[0].get_based_table_id() + fn output_columns(&self) -> Vec { + self.children()[0].output_columns() } } diff --git a/src/optimizer/plan_node/logical_join.rs b/src/optimizer/plan_node/logical_join.rs index b8c0b85..3001b49 100644 --- a/src/optimizer/plan_node/logical_join.rs +++ b/src/optimizer/plan_node/logical_join.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use super::{PlanNode, PlanRef, PlanTreeNode}; use crate::binder::{JoinCondition, JoinType}; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct LogicalJoin { @@ -30,8 +30,7 @@ impl LogicalJoin { join_condition, join_output_columns: vec![], }; - let base_table_id = join.get_based_table_id(); - join.join_output_columns = join.join_output_columns_internal(base_table_id); + join.join_output_columns = join.join_output_columns_internal(); join } @@ -80,7 +79,7 @@ impl LogicalJoin { /// /// So in the left child schema, b's fields is nullable, therefore we should use left join /// schema directly, rather than set b's fields as non-nullable. - fn join_output_columns_internal(&self, base_table_id: String) -> Vec { + fn join_output_columns_internal(&self) -> Vec { let (left_join_keys_force_nullable, right_join_keys_force_nullable) = match self.join_type { JoinType::Inner => (false, false), JoinType::Left => (false, true), @@ -90,7 +89,7 @@ impl LogicalJoin { }; let left_fields = self .left - .output_columns(base_table_id.clone()) + .output_columns() .iter() .map(|c| { c.clone_with_nullable( @@ -102,7 +101,7 @@ impl LogicalJoin { .collect::>(); let right_fields = self .right - .output_columns(base_table_id) + .output_columns() .iter() .map(|c| { c.clone_with_nullable( @@ -140,12 +139,8 @@ impl PlanNode for LogicalJoin { } } - fn output_columns(&self, base_table_id: String) -> Vec { - self.join_output_columns_internal(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.children()[0].get_based_table_id() + fn output_columns(&self) -> Vec { + self.join_output_columns_internal() } } @@ -209,9 +204,8 @@ mod tests { let cond = build_join_condition_eq("t1", "b1", "t2", "b1"); let plan = LogicalJoin::new(t1.clone(), t2.clone(), JoinType::Inner, cond.clone()); - let based_table_id = plan.get_based_table_id(); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], false), build_columns_catalog("t2", vec!["a2", "b1", "c2"], false), @@ -221,7 +215,7 @@ mod tests { let plan = LogicalJoin::new(t1.clone(), t2.clone(), JoinType::Left, cond.clone()); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], false), build_columns_catalog("t2", vec!["a2", "b1", "c2"], true), @@ -231,7 +225,7 @@ mod tests { let plan = LogicalJoin::new(t1.clone(), t2.clone(), JoinType::Right, cond.clone()); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], true), build_columns_catalog("t2", vec!["a2", "b1", "c2"], false), @@ -241,7 +235,7 @@ mod tests { let plan = LogicalJoin::new(t1, t2, JoinType::Full, cond); assert_eq!( - plan.join_output_columns_internal(based_table_id), + plan.join_output_columns_internal(), vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], true), build_columns_catalog("t2", vec!["a2", "b1", "c2"], true), @@ -288,9 +282,8 @@ mod tests { JoinType::Inner, cond2.clone(), ); - let based_table_id = plan.get_based_table_id(); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], false), @@ -314,7 +307,7 @@ mod tests { cond2.clone(), ); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], false), @@ -338,7 +331,7 @@ mod tests { cond2.clone(), ); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], true), @@ -362,7 +355,7 @@ mod tests { cond2.clone(), ); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], true), @@ -387,7 +380,7 @@ mod tests { cond2.clone(), ); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], false), @@ -411,7 +404,7 @@ mod tests { cond2.clone(), ); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], false), @@ -435,7 +428,7 @@ mod tests { cond2.clone(), ); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], true), @@ -459,7 +452,7 @@ mod tests { cond2.clone(), ); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], true), @@ -484,7 +477,7 @@ mod tests { cond2.clone(), ); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], true), @@ -508,7 +501,7 @@ mod tests { cond2.clone(), ); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], true), @@ -532,7 +525,7 @@ mod tests { cond2.clone(), ); assert_eq!( - plan.join_output_columns_internal(based_table_id.clone()), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], true), @@ -551,7 +544,7 @@ mod tests { cond2, ); assert_eq!( - plan.join_output_columns_internal(based_table_id), + plan.join_output_columns_internal(), vec![ vec![ build_columns_catalog("t1", vec!["a1", "b1", "c1"], true), diff --git a/src/optimizer/plan_node/logical_limit.rs b/src/optimizer/plan_node/logical_limit.rs index 46bfd3d..7121f82 100644 --- a/src/optimizer/plan_node/logical_limit.rs +++ b/src/optimizer/plan_node/logical_limit.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use super::{PlanNode, PlanRef, PlanTreeNode}; use crate::binder::BoundExpr; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct LogicalLimit { @@ -39,12 +39,8 @@ impl PlanNode for LogicalLimit { vec![] } - fn output_columns(&self, base_table_id: String) -> Vec { - self.children()[0].output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.children()[0].get_based_table_id() + fn output_columns(&self) -> Vec { + self.children()[0].output_columns() } } diff --git a/src/optimizer/plan_node/logical_order.rs b/src/optimizer/plan_node/logical_order.rs index b9555d7..f11f8cc 100644 --- a/src/optimizer/plan_node/logical_order.rs +++ b/src/optimizer/plan_node/logical_order.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use super::{PlanNode, PlanRef, PlanTreeNode}; use crate::binder::BoundOrderBy; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct LogicalOrder { @@ -33,12 +33,8 @@ impl PlanNode for LogicalOrder { .collect::>() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.children()[0].output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.children()[0].get_based_table_id() + fn output_columns(&self) -> Vec { + self.children()[0].output_columns() } } diff --git a/src/optimizer/plan_node/logical_project.rs b/src/optimizer/plan_node/logical_project.rs index 3a56edc..6f372bd 100644 --- a/src/optimizer/plan_node/logical_project.rs +++ b/src/optimizer/plan_node/logical_project.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use super::{PlanNode, PlanRef, PlanTreeNode}; use crate::binder::BoundExpr; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct LogicalProject { @@ -35,16 +35,12 @@ impl PlanNode for LogicalProject { .collect::>() } - fn output_columns(&self, base_table_id: String) -> Vec { + fn output_columns(&self) -> Vec { self.exprs .iter() - .map(|e| e.output_column_catalog_for_alias_table(base_table_id.clone())) + .map(|e| e.output_column_catalog()) .collect::>() } - - fn get_based_table_id(&self) -> TableId { - self.children()[0].get_based_table_id() - } } impl PlanTreeNode for LogicalProject { diff --git a/src/optimizer/plan_node/logical_table_scan.rs b/src/optimizer/plan_node/logical_table_scan.rs index 6335d36..96c5208 100644 --- a/src/optimizer/plan_node/logical_table_scan.rs +++ b/src/optimizer/plan_node/logical_table_scan.rs @@ -62,7 +62,7 @@ impl PlanNode for LogicalTableScan { self.columns() } - fn output_columns(&self, _: String) -> Vec { + fn output_columns(&self) -> Vec { if let Some(alias) = self.table_alias() { self.columns() .iter() @@ -72,10 +72,6 @@ impl PlanNode for LogicalTableScan { self.columns() } } - - fn get_based_table_id(&self) -> TableId { - self.table_id.clone() - } } impl PlanTreeNode for LogicalTableScan { diff --git a/src/optimizer/plan_node/mod.rs b/src/optimizer/plan_node/mod.rs index c6bda56..e5b4a9f 100644 --- a/src/optimizer/plan_node/mod.rs +++ b/src/optimizer/plan_node/mod.rs @@ -41,7 +41,7 @@ pub use physical_simple_agg::*; pub use physical_table_scan::*; pub use plan_node_traits::*; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; /// The common trait over all plan nodes. Used by optimizer framework which will treat all node as /// `dyn PlanNode`. Meanwhile, we split the trait into lots of sub-traits so that we can easily use @@ -53,11 +53,7 @@ pub trait PlanNode: fn referenced_columns(&self) -> Vec; /// Return output column catalog which converted from `BoundExpr`. - fn output_columns(&self, base_table_id: String) -> Vec; - - // Get this PlanNode based TableId which could be TableScan Id or Join left child based table - // id. - fn get_based_table_id(&self) -> TableId; + fn output_columns(&self) -> Vec; } impl_downcast!(PlanNode); @@ -92,49 +88,6 @@ impl dyn PlanNode { | PlanNodeType::PhysicalCrossJoin => false, } } - - pub fn contains_column_ref_expr(&self) -> bool { - match self.node_type() { - PlanNodeType::Dummy => false, - PlanNodeType::LogicalTableScan => false, - PlanNodeType::LogicalProject => self - .as_logical_project() - .unwrap() - .exprs() - .iter() - .any(|e| e.contains_column_ref()), - PlanNodeType::LogicalFilter => self - .as_logical_filter() - .unwrap() - .expr() - .contains_column_ref(), - PlanNodeType::LogicalAgg => { - let plan = self.as_logical_agg().unwrap(); - plan.group_by() - .iter() - .chain(plan.agg_funcs().iter()) - .any(|e| e.contains_column_ref()) - } - PlanNodeType::LogicalLimit => false, - PlanNodeType::LogicalOrder => { - let plan = self.as_logical_order().unwrap(); - plan.order_by().iter().any(|e| e.expr.contains_column_ref()) - } - PlanNodeType::LogicalJoin => { - let plan = self.as_logical_join().unwrap(); - plan.left().contains_column_ref_expr() || plan.right().contains_column_ref_expr() - } - PlanNodeType::PhysicalTableScan => false, - PlanNodeType::PhysicalProject => false, - PlanNodeType::PhysicalFilter => false, - PlanNodeType::PhysicalSimpleAgg => false, - PlanNodeType::PhysicalHashAgg => false, - PlanNodeType::PhysicalLimit => false, - PlanNodeType::PhysicalOrder => false, - PlanNodeType::PhysicalHashJoin => false, - PlanNodeType::PhysicalCrossJoin => false, - } - } } /// The type of reference to a plan node. diff --git a/src/optimizer/plan_node/physical_cross_join.rs b/src/optimizer/plan_node/physical_cross_join.rs index 137abff..0f5bc00 100644 --- a/src/optimizer/plan_node/physical_cross_join.rs +++ b/src/optimizer/plan_node/physical_cross_join.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use super::{LogicalJoin, PlanNode, PlanRef, PlanTreeNode}; use crate::binder::JoinType; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct PhysicalCrossJoin { @@ -41,12 +41,8 @@ impl PlanNode for PhysicalCrossJoin { self.logical.referenced_columns() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.logical().output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.logical().get_based_table_id() + fn output_columns(&self) -> Vec { + self.logical().output_columns() } } diff --git a/src/optimizer/plan_node/physical_filter.rs b/src/optimizer/plan_node/physical_filter.rs index bb27b81..2c3b4ce 100644 --- a/src/optimizer/plan_node/physical_filter.rs +++ b/src/optimizer/plan_node/physical_filter.rs @@ -2,7 +2,7 @@ use core::fmt; use std::sync::Arc; use super::{LogicalFilter, PlanNode, PlanRef, PlanTreeNode}; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct PhysicalFilter { @@ -24,12 +24,8 @@ impl PlanNode for PhysicalFilter { self.logical.referenced_columns() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.logical().output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.logical().get_based_table_id() + fn output_columns(&self) -> Vec { + self.logical().output_columns() } } diff --git a/src/optimizer/plan_node/physical_hash_agg.rs b/src/optimizer/plan_node/physical_hash_agg.rs index 977be58..06c5626 100644 --- a/src/optimizer/plan_node/physical_hash_agg.rs +++ b/src/optimizer/plan_node/physical_hash_agg.rs @@ -2,7 +2,7 @@ use std::fmt; use std::sync::Arc; use super::{LogicalAgg, PlanNode, PlanRef, PlanTreeNode}; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct PhysicalHashAgg { @@ -24,12 +24,8 @@ impl PlanNode for PhysicalHashAgg { self.logical.referenced_columns() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.logical().output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.logical().get_based_table_id() + fn output_columns(&self) -> Vec { + self.logical().output_columns() } } diff --git a/src/optimizer/plan_node/physical_hash_join.rs b/src/optimizer/plan_node/physical_hash_join.rs index fe60023..381b69b 100644 --- a/src/optimizer/plan_node/physical_hash_join.rs +++ b/src/optimizer/plan_node/physical_hash_join.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use super::{LogicalJoin, PlanNode, PlanRef, PlanTreeNode}; use crate::binder::{JoinCondition, JoinType}; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct PhysicalHashJoin { @@ -45,12 +45,8 @@ impl PlanNode for PhysicalHashJoin { self.logical.referenced_columns() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.logical().output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.logical().get_based_table_id() + fn output_columns(&self) -> Vec { + self.logical().output_columns() } } diff --git a/src/optimizer/plan_node/physical_limit.rs b/src/optimizer/plan_node/physical_limit.rs index 5f285d6..4df8d81 100644 --- a/src/optimizer/plan_node/physical_limit.rs +++ b/src/optimizer/plan_node/physical_limit.rs @@ -2,7 +2,7 @@ use core::fmt; use std::sync::Arc; use super::{LogicalLimit, PlanNode, PlanRef, PlanTreeNode}; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct PhysicalLimit { @@ -24,12 +24,8 @@ impl PlanNode for PhysicalLimit { self.logical.referenced_columns() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.logical().output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.logical().get_based_table_id() + fn output_columns(&self) -> Vec { + self.logical().output_columns() } } diff --git a/src/optimizer/plan_node/physical_order.rs b/src/optimizer/plan_node/physical_order.rs index e3298c4..c5ed713 100644 --- a/src/optimizer/plan_node/physical_order.rs +++ b/src/optimizer/plan_node/physical_order.rs @@ -2,7 +2,7 @@ use core::fmt; use std::sync::Arc; use super::{LogicalOrder, PlanNode, PlanRef, PlanTreeNode}; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct PhysicalOrder { @@ -24,12 +24,8 @@ impl PlanNode for PhysicalOrder { self.logical.referenced_columns() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.logical().output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.logical().get_based_table_id() + fn output_columns(&self) -> Vec { + self.logical().output_columns() } } diff --git a/src/optimizer/plan_node/physical_project.rs b/src/optimizer/plan_node/physical_project.rs index aa3e73b..9412e5e 100644 --- a/src/optimizer/plan_node/physical_project.rs +++ b/src/optimizer/plan_node/physical_project.rs @@ -2,7 +2,7 @@ use std::fmt; use std::sync::Arc; use super::{LogicalProject, PlanNode, PlanRef, PlanTreeNode}; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct PhysicalProject { @@ -24,12 +24,8 @@ impl PlanNode for PhysicalProject { self.logical.referenced_columns() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.logical().output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.logical().get_based_table_id() + fn output_columns(&self) -> Vec { + self.logical().output_columns() } } diff --git a/src/optimizer/plan_node/physical_simple_agg.rs b/src/optimizer/plan_node/physical_simple_agg.rs index 6b521d4..9921910 100644 --- a/src/optimizer/plan_node/physical_simple_agg.rs +++ b/src/optimizer/plan_node/physical_simple_agg.rs @@ -2,7 +2,7 @@ use std::fmt; use std::sync::Arc; use super::{LogicalAgg, PlanNode, PlanRef, PlanTreeNode}; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct PhysicalSimpleAgg { @@ -24,12 +24,8 @@ impl PlanNode for PhysicalSimpleAgg { self.logical.referenced_columns() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.logical().output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.logical().get_based_table_id() + fn output_columns(&self) -> Vec { + self.logical().output_columns() } } diff --git a/src/optimizer/plan_node/physical_table_scan.rs b/src/optimizer/plan_node/physical_table_scan.rs index 1633380..505034b 100644 --- a/src/optimizer/plan_node/physical_table_scan.rs +++ b/src/optimizer/plan_node/physical_table_scan.rs @@ -2,7 +2,7 @@ use std::fmt; use std::sync::Arc; use super::{LogicalTableScan, PlanNode, PlanRef, PlanTreeNode}; -use crate::catalog::{ColumnCatalog, TableId}; +use crate::catalog::ColumnCatalog; #[derive(Debug, Clone)] pub struct PhysicalTableScan { @@ -24,12 +24,8 @@ impl PlanNode for PhysicalTableScan { self.logical.referenced_columns() } - fn output_columns(&self, base_table_id: String) -> Vec { - self.logical().output_columns(base_table_id) - } - - fn get_based_table_id(&self) -> TableId { - self.logical().get_based_table_id() + fn output_columns(&self) -> Vec { + self.logical().output_columns() } } diff --git a/src/optimizer/rules/column_pruning.rs b/src/optimizer/rules/column_pruning.rs index c761548..0e9eb25 100644 --- a/src/optimizer/rules/column_pruning.rs +++ b/src/optimizer/rules/column_pruning.rs @@ -9,7 +9,6 @@ use crate::optimizer::core::{ }; use crate::optimizer::rules::util::is_superset_cols; use crate::optimizer::{Dummy, LogicalProject, LogicalTableScan, PlanNodeType}; -use crate::planner::PlannerContext; lazy_static! { static ref PUSH_PROJECT_INTO_TABLE_SCAN_RULE: Pattern = { @@ -58,7 +57,7 @@ impl Rule for PushProjectIntoTableScan { &PUSH_PROJECT_INTO_TABLE_SCAN_RULE } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { let project_opt_expr_root = opt_expr.root; let table_scan_opt_expr = opt_expr.children[0].clone(); let project_node = project_opt_expr_root @@ -123,7 +122,7 @@ impl Rule for PushProjectThroughChild { &PUSH_PROJECT_THROUGH_CHILD_RULE } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { let project_opt_expr_root = opt_expr.root; let child_opt_expr = opt_expr.children[0].clone(); @@ -135,13 +134,7 @@ impl Rule for PushProjectThroughChild { let mut child_children_cols = child_plan_ref .children() .iter() - .flat_map(|c| { - c.output_columns( - planner_context - .find_subquery_alias(c) - .unwrap_or_else(|| c.get_based_table_id()), - ) - }) + .flat_map(|c| c.output_columns()) .collect::>(); // distinct cols @@ -162,11 +155,7 @@ impl Rule for PushProjectThroughChild { // such as: select a, t2.v1 as max_b from t1 cross join (select max(b) as v1 // from t1) t2; // `t2.v1` should be resolved in child_child_plan output_columns. - let base_table_id = planner_context - .find_subquery_alias(child_child_plan) - .unwrap_or_else(|| child_child_plan.get_based_table_id()); - let mut child_child_output_cols = - child_child_plan.output_columns(base_table_id); + let mut child_child_output_cols = child_child_plan.output_columns(); // for child's child, filter corresponding required columns let mut required_cols_in_child_child = child_child_output_cols .clone() @@ -233,7 +222,7 @@ impl Rule for RemoveNoopOperators { &REMOVE_NOOP_OPERATORS_RULE } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { // eliminate no-op project for those children type: project{input: project/aggregate} let project_opt_expr_root = opt_expr.root; let project_plan_ref = project_opt_expr_root.get_plan_ref(); @@ -305,7 +294,7 @@ LogicalProject: exprs [t1.a:Nullable(Int32) + 1] HepBatchStrategy::fix_point_topdown(100), vec![PushProjectIntoTableScan::create()], ); - let mut optimizer = HepOptimizer::new(vec![batch], logical_plan, Default::default()); + let mut optimizer = HepOptimizer::new(vec![batch], logical_plan); let optimized_plan = optimizer.find_best(); @@ -370,8 +359,7 @@ LogicalProject: exprs [employee.id:Nullable(Int32), employee.first_name:Nullable HepBatchStrategy::fix_point_topdown(100), vec![RemoveNoopOperators::create()], ); - let mut optimizer = - HepOptimizer::new(vec![batch, final_batch], logical_plan, Default::default()); + let mut optimizer = HepOptimizer::new(vec![batch, final_batch], logical_plan); let optimized_plan = optimizer.find_best(); diff --git a/src/optimizer/rules/combine_operators.rs b/src/optimizer/rules/combine_operators.rs index 6bf0a58..a8eb194 100644 --- a/src/optimizer/rules/combine_operators.rs +++ b/src/optimizer/rules/combine_operators.rs @@ -6,7 +6,6 @@ use crate::optimizer::core::{ OptExpr, OptExprNode, Pattern, PatternChildrenPredicate, Rule, Substitute, }; use crate::optimizer::{Dummy, LogicalFilter, PlanNodeType}; -use crate::planner::PlannerContext; lazy_static! { static ref COLLAPSE_PROJECT_RULE: Pattern = { @@ -44,7 +43,7 @@ impl Rule for CollapseProject { &COLLAPSE_PROJECT_RULE } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { // TODO: handle column alias let project_opt_expr = opt_expr; let next_project_opt_expr = project_opt_expr.children[0].clone(); @@ -83,7 +82,7 @@ impl Rule for CombineFilter { &COMBINE_FILTERS } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { // TODO: handle column alias let filter_opt_expr = opt_expr; let next_filter_opt_expr = filter_opt_expr.children[0].clone(); diff --git a/src/optimizer/rules/mod.rs b/src/optimizer/rules/mod.rs index 52f5c3c..78a0aca 100644 --- a/src/optimizer/rules/mod.rs +++ b/src/optimizer/rules/mod.rs @@ -17,7 +17,6 @@ pub use simplification::*; use strum_macros::AsRefStr; use crate::optimizer::core::{OptExpr, Pattern, Rule, Substitute}; -use crate::planner::PlannerContext; #[enum_dispatch(Rule)] #[derive(Clone, AsRefStr)] diff --git a/src/optimizer/rules/physical_rewrite.rs b/src/optimizer/rules/physical_rewrite.rs index 6133cd3..6561369 100644 --- a/src/optimizer/rules/physical_rewrite.rs +++ b/src/optimizer/rules/physical_rewrite.rs @@ -1,7 +1,6 @@ use super::RuleImpl; use crate::optimizer::core::*; use crate::optimizer::{PhysicalRewriter, PlanRewriter}; -use crate::planner::PlannerContext; lazy_static! { static ref PATTERN: Pattern = { @@ -26,7 +25,7 @@ impl Rule for PhysicalRewriteRule { &PATTERN } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { let mut rewriter = PhysicalRewriter::default(); let plan = opt_expr.to_plan_ref(); let new_plan = rewriter.rewrite(plan); diff --git a/src/optimizer/rules/pushdown_limit.rs b/src/optimizer/rules/pushdown_limit.rs index fc2ee2a..077f2e6 100644 --- a/src/optimizer/rules/pushdown_limit.rs +++ b/src/optimizer/rules/pushdown_limit.rs @@ -6,7 +6,6 @@ use crate::optimizer::core::{ OptExpr, OptExprNode, Pattern, PatternChildrenPredicate, Rule, Substitute, }; use crate::optimizer::{Dummy, LogicalLimit, LogicalTableScan, PlanNodeType}; -use crate::planner::PlannerContext; lazy_static! { static ref LIMIT_PROJECT_TRANSPOSE_RULE: Pattern = { @@ -62,7 +61,7 @@ impl Rule for LimitProjectTranspose { &LIMIT_PROJECT_TRANSPOSE_RULE } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { let limit_opt_expr_root = opt_expr.root; let project_opt_expr = opt_expr.children[0].clone(); @@ -91,7 +90,7 @@ impl Rule for EliminateLimits { &ELIMINATE_LIMITS_RULE } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { let limit_opt_expr_root = opt_expr.root; let next_limit_opt_expr = opt_expr.children[0].clone(); let next_limit_opt_expr_root = next_limit_opt_expr.root; @@ -157,7 +156,7 @@ impl Rule for PushLimitThroughJoin { &PUSH_LIMIT_THROUGH_JOIN_RULE } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { let limit_opt_expr_root = opt_expr.root; let limit_node = limit_opt_expr_root .get_plan_ref() @@ -238,7 +237,7 @@ impl Rule for PushLimitIntoTableScan { &PUSH_LIMIT_INTO_TABLE_SCAN_RULE } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { let limit_opt_expr_root = opt_expr.root; let limit_node = limit_opt_expr_root .get_plan_ref() @@ -307,7 +306,7 @@ LogicalProject: exprs [t1.a:Nullable(Int32)] HepBatchStrategy::fix_point_topdown(100), vec![LimitProjectTranspose::create()], ); - let mut optimizer = HepOptimizer::new(vec![batch], logical_plan, Default::default()); + let mut optimizer = HepOptimizer::new(vec![batch], logical_plan); let optimized_plan = optimizer.find_best(); @@ -365,7 +364,7 @@ LogicalProject: exprs [t1.a:Nullable(Int32)] EliminateLimits::create(), ], ); - let mut optimizer = HepOptimizer::new(vec![batch], logical_plan, Default::default()); + let mut optimizer = HepOptimizer::new(vec![batch], logical_plan); let optimized_plan = optimizer.find_best(); @@ -395,7 +394,7 @@ LogicalProject: exprs [t1.a:Nullable(Int32)] PushLimitIntoTableScan::create(), ], ); - let mut optimizer = HepOptimizer::new(vec![batch], logical_plan, Default::default()); + let mut optimizer = HepOptimizer::new(vec![batch], logical_plan); let optimized_plan = optimizer.find_best(); diff --git a/src/optimizer/rules/pushdown_predicates.rs b/src/optimizer/rules/pushdown_predicates.rs index a294ea2..e4f0a10 100644 --- a/src/optimizer/rules/pushdown_predicates.rs +++ b/src/optimizer/rules/pushdown_predicates.rs @@ -12,7 +12,6 @@ use crate::catalog::ColumnCatalog; use crate::optimizer::core::*; use crate::optimizer::expr_rewriter::ExprRewriter; use crate::optimizer::{Dummy, LogicalFilter, LogicalJoin, PlanNodeType}; -use crate::planner::PlannerContext; lazy_static! { static ref PUSH_PREDICATE_THROUGH_JOIN: Pattern = { @@ -106,7 +105,7 @@ impl Rule for PushPredicateThroughJoin { &PUSH_PREDICATE_THROUGH_JOIN } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { let join_opt_expr = opt_expr.children[0].clone(); let join_node = join_opt_expr.root.get_plan_ref().as_logical_join().unwrap(); if !self.can_push_through(join_node.join_type()) { @@ -114,9 +113,9 @@ impl Rule for PushPredicateThroughJoin { } let left = join_node.left(); - let left_output_cols = left.output_columns(left.get_based_table_id()); + let left_output_cols = left.output_columns(); let right = join_node.right(); - let right_output_cols = right.output_columns(right.get_based_table_id()); + let right_output_cols = right.output_columns(); let filter_opt_expr = opt_expr; let join_left_opt_expr = join_opt_expr.children[0].clone(); @@ -206,7 +205,7 @@ impl Rule for PushPredicateThroughNonJoin { &PUSH_PREDICATE_THROUGH_NON_JOIN } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { let filter_opt_expr = opt_expr; let child_opt_expr = filter_opt_expr.children[0].clone(); let child_node = child_opt_expr.root.get_plan_ref(); @@ -357,7 +356,7 @@ LogicalProject: exprs [t1.a:Nullable(Int32), t1.b:Nullable(Int32), t1.c:Nullable HepBatchStrategy::fix_point_topdown(100), vec![PushPredicateThroughJoin::create()], ); - let mut optimizer = HepOptimizer::new(vec![batch], logical_plan, Default::default()); + let mut optimizer = HepOptimizer::new(vec![batch], logical_plan); let optimized_plan = optimizer.find_best(); @@ -407,7 +406,7 @@ LogicalProject: exprs [t1.a:Nullable(Int32), t1.b:Nullable(Int32), t1.c:Nullable ), ]; - let mut optimizer = HepOptimizer::new(batches, logical_plan, Default::default()); + let mut optimizer = HepOptimizer::new(batches, logical_plan); let optimized_plan = optimizer.find_best(); diff --git a/src/optimizer/rules/simplification.rs b/src/optimizer/rules/simplification.rs index 450e71d..c1ee58c 100644 --- a/src/optimizer/rules/simplification.rs +++ b/src/optimizer/rules/simplification.rs @@ -8,7 +8,6 @@ use crate::optimizer::{ LogicalAgg, LogicalFilter, LogicalJoin, LogicalLimit, LogicalOrder, LogicalProject, PlanRef, PlanRewriter, }; -use crate::planner::PlannerContext; lazy_static! { static ref SIMPLIFY_CASTS_RULE: Pattern = { @@ -33,7 +32,7 @@ impl Rule for SimplifyCasts { &SIMPLIFY_CASTS_RULE } - fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, _planner_context: &PlannerContext) { + fn apply(&self, opt_expr: OptExpr, result: &mut Substitute) { let mut rewriter = SimplifyCastsRewriter::default(); let plan = opt_expr.to_plan_ref(); let new_plan = rewriter.rewrite(plan); diff --git a/src/planner/mod.rs b/src/planner/mod.rs index 2348b1e..7b334b5 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -1,32 +1,11 @@ mod select; mod util; -use std::collections::HashMap; - use crate::binder::BoundStatement; use crate::optimizer::PlanRef; #[derive(Default)] -pub struct Planner { - pub context: PlannerContext, -} - -#[derive(Default, Debug)] -pub struct PlannerContext { - // subquery alias to subquery plan - pub subquery_context: HashMap, -} - -impl PlannerContext { - pub fn find_subquery_alias(&self, plan_ref: &PlanRef) -> Option { - for (alias, p) in &self.subquery_context { - if p == plan_ref { - return Some(alias.clone()); - } - } - None - } -} +pub struct Planner {} impl Planner { pub fn plan(&mut self, stmt: BoundStatement) -> Result { @@ -134,10 +113,7 @@ mod planner_test { assert!(node.is_ok()); let plan_ref = node.unwrap(); assert_eq!(plan_ref.node_type(), PlanNodeType::LogicalLimit); - assert_eq!( - plan_ref.output_columns(plan_ref.get_based_table_id()).len(), - 1 - ); + assert_eq!(plan_ref.output_columns().len(), 1); dbg!(plan_ref); } diff --git a/src/planner/select.rs b/src/planner/select.rs index 655dc97..d00b54e 100644 --- a/src/planner/select.rs +++ b/src/planner/select.rs @@ -74,9 +74,6 @@ impl Planner { BoundTableRef::Subquery(subquery) => { let subquery = subquery.clone(); let plan_ref = self.plan_select(*subquery.query)?; - self.context - .subquery_context - .insert(subquery.alias, plan_ref.clone()); Ok(plan_ref) } } diff --git a/tests/planner/column-pruning.planner.sql b/tests/planner/column-pruning.planner.sql index 6c80419..c537aa7 100644 --- a/tests/planner/column-pruning.planner.sql +++ b/tests/planner/column-pruning.planner.sql @@ -116,3 +116,34 @@ PhysicalProject: exprs [t1.a:Int64, (t2.v1:Int64) as t1.max_b] PhysicalTableScan: table: #t1, columns: [b] */ +-- PushProjectThroughChild: column pruning across multiple subquery + +select t1.a, sub0.v0, sub1.v0 from t1 cross join (select max(b) as v0 from t1) sub0 cross join (select min(b) as v0 from t1) sub1; + +/* +original plan: +LogicalProject: exprs [t1.a:Int64, sub0.v0:Int64, sub1.v0:Int64] + LogicalJoin: type Cross, cond None + LogicalJoin: type Cross, cond None + LogicalTableScan: table: #t1, columns: [a, b, c] + LogicalProject: exprs [((Max(t1.b:Int64):Int64) as t1.v0) as sub0.v0] + LogicalAgg: agg_funcs [Max(t1.b:Int64):Int64] group_by [] + LogicalTableScan: table: #t1, columns: [a, b, c] + LogicalProject: exprs [((Min(t1.b:Int64):Int64) as t1.v0) as sub1.v0] + LogicalAgg: agg_funcs [Min(t1.b:Int64):Int64] group_by [] + LogicalTableScan: table: #t1, columns: [a, b, c] + +optimized plan: +PhysicalProject: exprs [t1.a:Int64, sub0.v0:Int64, sub1.v0:Int64] + PhysicalCrossJoin: type Cross + PhysicalProject: exprs [t1.a:Nullable(Int64), sub0.v0:Nullable(Int64)] + PhysicalCrossJoin: type Cross + PhysicalTableScan: table: #t1, columns: [a] + PhysicalProject: exprs [((Max(t1.b:Int64):Int64) as t1.v0) as sub0.v0] + PhysicalSimpleAgg: agg_funcs [Max(t1.b:Int64):Int64] group_by [] + PhysicalTableScan: table: #t1, columns: [b] + PhysicalProject: exprs [((Min(t1.b:Int64):Int64) as t1.v0) as sub1.v0] + PhysicalSimpleAgg: agg_funcs [Min(t1.b:Int64):Int64] group_by [] + PhysicalTableScan: table: #t1, columns: [b] +*/ + diff --git a/tests/planner/column-pruning.yml b/tests/planner/column-pruning.yml index a15986f..930aae9 100644 --- a/tests/planner/column-pruning.yml +++ b/tests/planner/column-pruning.yml @@ -29,3 +29,8 @@ select a, t2.v1 as max_b from t1 cross join (select max(b) as v1 from t1) t2 desc: | PushProjectThroughChild: column pruning across subquery + +- sql: | + select t1.a, sub0.v0, sub1.v0 from t1 cross join (select max(b) as v0 from t1) sub0 cross join (select min(b) as v0 from t1) sub1; + desc: | + PushProjectThroughChild: column pruning across multiple subquery