Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 18 additions & 33 deletions src/binder/expression/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,6 @@ impl BoundExpr {
}
}

pub fn contains_column_ref(&self) -> bool {
match self {
BoundExpr::Constant(_) => false,
BoundExpr::InputRef(_) => false,
BoundExpr::ColumnRef(_) => true,
BoundExpr::BinaryOp(binary_op) => {
binary_op.left.contains_column_ref() || binary_op.right.contains_column_ref()
}
BoundExpr::TypeCast(tc) => tc.expr.contains_column_ref(),
BoundExpr::AggFunc(agg) => agg.exprs.iter().any(|arg| arg.contains_column_ref()),
BoundExpr::Alias(alias) => alias.expr.contains_column_ref(),
}
}

pub fn get_referenced_column_catalog(&self) -> Vec<ColumnCatalog> {
match self {
BoundExpr::Constant(_) => vec![],
Expand All @@ -85,48 +71,47 @@ impl BoundExpr {
}
}

/// Generate a new column catalog in table alias or subquery for outside referenced.
/// Generate a new column catalog for this expression.
/// Such as `t.v` in subquery: select t.v from (select a as v from t1) t.
pub fn output_column_catalog_for_alias_table(&self, alias_table_id: String) -> ColumnCatalog {
let (column_id, data_type) = match self {
BoundExpr::Constant(e) => (e.to_string(), e.data_type()),
/// Constant and BinaryOp returns empty table_id.
pub fn output_column_catalog(&self) -> ColumnCatalog {
let (table_id, column_id, data_type) = match self {
BoundExpr::Constant(e) => (String::new(), e.to_string(), e.data_type()),
BoundExpr::ColumnRef(e) => (
e.column_catalog.table_id.clone(),
e.column_catalog.column_id.clone(),
e.column_catalog.desc.data_type.clone(),
),
BoundExpr::InputRef(_) => unreachable!(),
BoundExpr::BinaryOp(e) => {
let l = e
.left
.output_column_catalog_for_alias_table(alias_table_id.clone());
let r = e
.right
.output_column_catalog_for_alias_table(alias_table_id.clone());
let l = e.left.output_column_catalog();
let r = e.right.output_column_catalog();
let column_id = format!("{}{}{}", l.column_id, e.op, r.column_id);
let data_type = e.return_type.clone().unwrap();
(column_id, data_type)
(String::new(), column_id, data_type)
}
BoundExpr::TypeCast(e) => {
let c = e
.expr
.output_column_catalog_for_alias_table(alias_table_id.clone());
let c = e.expr.output_column_catalog();
let table_id = c.table_id;
let column_id = format!("{}({})", e.cast_type, c.column_id);
let data_type = e.cast_type.clone();
(column_id, data_type)
(table_id, column_id, data_type)
}
BoundExpr::AggFunc(agg) => {
let c = agg.exprs[0].output_column_catalog_for_alias_table(alias_table_id.clone());
let c = agg.exprs[0].output_column_catalog();
let table_id = c.table_id;
let column_id = format!("{}({})", agg.func, c.column_id);
let data_type = agg.return_type.clone();
(column_id, data_type)
(table_id, column_id, data_type)
}
BoundExpr::Alias(e) => {
let table_id = e.table_id.clone();
let column_id = e.column_id.to_string();
let data_type = e.expr.return_type().unwrap();
(column_id, data_type)
(table_id, column_id, data_type)
}
};
ColumnCatalog::new(alias_table_id, column_id, self.nullable(), data_type)
ColumnCatalog::new(table_id, column_id, self.nullable(), data_type)
}
}

Expand Down
5 changes: 4 additions & 1 deletion src/binder/table/subquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ impl BoundSubquery {
self.query
.select_list
.iter()
.map(|expr| expr.output_column_catalog_for_alias_table(self.alias.clone()))
.map(|expr| {
expr.output_column_catalog()
.clone_with_table_id(self.alias.clone())
})
.collect::<Vec<_>>()
}

Expand Down
10 changes: 5 additions & 5 deletions src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::optimizer::{
RemoveNoopOperators, SimplifyCasts,
};
use crate::parser::parse;
use crate::planner::{LogicalPlanError, Planner, PlannerContext};
use crate::planner::{LogicalPlanError, Planner};
use crate::storage::{CsvStorage, Storage, StorageError, StorageImpl};
use crate::util::pretty_plan_tree_string;

Expand Down Expand Up @@ -54,7 +54,7 @@ impl Database {
Ok(data)
}

fn default_optimizer(&self, root: PlanRef, planner_context: PlannerContext) -> HepOptimizer {
fn default_optimizer(&self, root: PlanRef) -> HepOptimizer {
// the order of rules is important and affects the rule matching logic
let batches = vec![
HepBatch::new(
Expand Down Expand Up @@ -101,7 +101,7 @@ impl Database {
),
];

HepOptimizer::new(batches, root, planner_context)
HepOptimizer::new(batches, root)
}

pub async fn run(&self, sql: &str) -> Result<Vec<RecordBatch>, DatabaseError> {
Expand Down Expand Up @@ -131,7 +131,7 @@ impl Database {
);

// 4. optimize logical plan to physical plan
let mut optimizer = self.default_optimizer(logical_plan, planner.context);
let mut optimizer = self.default_optimizer(logical_plan);
let physical_plan = optimizer.find_best();
println!(
"optimized_plan:\n{}\n",
Expand Down Expand Up @@ -173,7 +173,7 @@ impl Database {
pretty_plan_tree_string(&*logical_plan)
);

let mut optimizer = self.default_optimizer(logical_plan, planner.context);
let mut optimizer = self.default_optimizer(logical_plan);
let physical_plan = optimizer.find_best();
_ = write!(
explain_str,
Expand Down
3 changes: 1 addition & 2 deletions src/optimizer/core/rule.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use enum_dispatch::enum_dispatch;

use super::{OptExpr, Pattern};
use crate::planner::PlannerContext;

/// A rule is to transform logically equivalent expression. There are two kinds of rules:
///
Expand All @@ -14,7 +13,7 @@ pub trait Rule {

/// Apply the rule and write the transformation result to `Substitute`.
/// The pattern tree determines the opt_expr tree internal nodes type.
fn apply(&self, opt_expr: OptExpr, result: &mut Substitute, planner_context: &PlannerContext);
fn apply(&self, opt_expr: OptExpr, result: &mut Substitute);
}

/// Define the transformed plans
Expand Down
14 changes: 4 additions & 10 deletions src/optimizer/heuristic/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,17 @@ use super::matcher::HepMatcher;
use crate::optimizer::core::{PatternMatcher, Rule, Substitute};
use crate::optimizer::rules::RuleImpl;
use crate::optimizer::PlanRef;
use crate::planner::PlannerContext;
use crate::util::pretty_plan_tree_string;

pub struct HepOptimizer {
batches: Vec<HepBatch>,
graph: HepGraph,
planner_context: PlannerContext,
}

impl HepOptimizer {
pub fn new(batches: Vec<HepBatch>, root: PlanRef, planner_context: PlannerContext) -> Self {
pub fn new(batches: Vec<HepBatch>, root: PlanRef) -> Self {
let graph = HepGraph::new(root);
Self {
batches,
graph,
planner_context,
}
Self { batches, graph }
}

pub fn find_best(&mut self) -> PlanRef {
Expand Down Expand Up @@ -103,7 +97,7 @@ impl HepOptimizer {
if let Some(opt_expr) = matcher.match_opt_expr() {
let mut substitute = Substitute::default();
let opt_expr_root = opt_expr.root.clone();
rule.apply(opt_expr, &mut substitute, &self.planner_context);
rule.apply(opt_expr, &mut substitute);

if !substitute.opt_exprs.is_empty() {
assert!(substitute.opt_exprs.len() == 1);
Expand Down Expand Up @@ -178,7 +172,7 @@ mod tests {
HepBatchStrategy::once_topdown(),
vec![PhysicalRewriteRule::create()],
);
let mut planner = HepOptimizer::new(vec![batch], root, Default::default());
let mut planner = HepOptimizer::new(vec![batch], root);
let new_plan = planner.find_best();
assert_eq!(
new_plan.as_physical_project().unwrap().logical().exprs()[0],
Expand Down
5 changes: 2 additions & 3 deletions src/optimizer/input_ref_rewriter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ impl InputRefRewriter {

// Find alias expr in bindings.
if let Some(idx) = self.bindings.iter().position(|e| {
if let BoundExpr::Alias(alias) = e {
let column_catalog =
e.output_column_catalog_for_alias_table(alias.table_id.clone());
if let BoundExpr::Alias(_) = e {
let column_catalog = e.output_column_catalog();
let alias_expr = &BoundExpr::ColumnRef(BoundColumnRef { column_catalog });
if expr == alias_expr {
return true;
Expand Down
6 changes: 1 addition & 5 deletions src/optimizer/plan_node/dummy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,9 @@ impl PlanNode for Dummy {
vec![]
}

fn output_columns(&self, _base_table_id: String) -> Vec<ColumnCatalog> {
fn output_columns(&self) -> Vec<ColumnCatalog> {
vec![]
}

fn get_based_table_id(&self) -> crate::catalog::TableId {
"Dummy".to_string()
}
}

impl PlanTreeNode for Dummy {
Expand Down
10 changes: 3 additions & 7 deletions src/optimizer/plan_node/logical_agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::sync::Arc;

use super::{PlanNode, PlanRef, PlanTreeNode};
use crate::binder::BoundExpr;
use crate::catalog::{ColumnCatalog, TableId};
use crate::catalog::ColumnCatalog;

#[derive(Debug, Clone)]
pub struct LogicalAgg {
Expand Down Expand Up @@ -43,17 +43,13 @@ impl PlanNode for LogicalAgg {
.collect::<Vec<_>>()
}

fn output_columns(&self, base_table_id: String) -> Vec<ColumnCatalog> {
fn output_columns(&self) -> Vec<ColumnCatalog> {
self.group_by
.iter()
.chain(self.agg_funcs.iter())
.map(|e| e.output_column_catalog_for_alias_table(base_table_id.clone()))
.map(|e| e.output_column_catalog())
.collect::<Vec<_>>()
}

fn get_based_table_id(&self) -> TableId {
self.children()[0].get_based_table_id()
}
}

impl PlanTreeNode for LogicalAgg {
Expand Down
10 changes: 3 additions & 7 deletions src/optimizer/plan_node/logical_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::sync::Arc;

use super::{PlanNode, PlanRef, PlanTreeNode};
use crate::binder::BoundExpr;
use crate::catalog::{ColumnCatalog, TableId};
use crate::catalog::ColumnCatalog;

#[derive(Debug, Clone)]
pub struct LogicalFilter {
Expand Down Expand Up @@ -32,12 +32,8 @@ impl PlanNode for LogicalFilter {
self.expr.get_referenced_column_catalog()
}

fn output_columns(&self, base_table_id: String) -> Vec<ColumnCatalog> {
self.children()[0].output_columns(base_table_id)
}

fn get_based_table_id(&self) -> TableId {
self.children()[0].get_based_table_id()
fn output_columns(&self) -> Vec<ColumnCatalog> {
self.children()[0].output_columns()
}
}

Expand Down
Loading