From 97d5ac34f0f7ef8ee5434fc1d6d89e12deb28a6d Mon Sep 17 00:00:00 2001 From: Fedomn Date: Tue, 20 Sep 2022 21:43:25 +0800 Subject: [PATCH] feat(distinct): convert select distinct to groupby with no aggregations Signed-off-by: Fedomn --- src/binder/statement/mod.rs | 3 +++ src/planner/mod.rs | 30 ++++++++++++++++++++++++++++++ src/planner/select.rs | 5 +++++ tests/slt/select.slt | 15 +++++++++++++++ 4 files changed, 53 insertions(+) diff --git a/src/binder/statement/mod.rs b/src/binder/statement/mod.rs index 284661e..2e44836 100644 --- a/src/binder/statement/mod.rs +++ b/src/binder/statement/mod.rs @@ -19,6 +19,7 @@ pub struct BoundSelect { pub limit: Option, pub offset: Option, pub order_by: Vec, + pub select_distinct: bool, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -61,6 +62,7 @@ impl Binder { } } } + let select_distinct = select.distinct; // bind where clause let where_clause = select @@ -109,6 +111,7 @@ impl Binder { limit, offset, order_by, + select_distinct, }) } diff --git a/src/planner/mod.rs b/src/planner/mod.rs index 93fa563..3ab0b8c 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -30,6 +30,22 @@ mod planner_test { }; use crate::optimizer::PlanNodeType; + fn build_test_select_distinct_stmt() -> BoundStatement { + let c1 = build_bound_column_ref("t", "c1"); + let t = build_table_ref("t", vec!["c1", "c2"]); + + BoundStatement::Select(BoundSelect { + select_list: vec![c1], + from_table: Some(t), + where_clause: None, + group_by: vec![], + limit: None, + offset: None, + order_by: vec![], + select_distinct: true, + }) + } + fn build_test_select_stmt() -> BoundStatement { let c1 = build_bound_column_ref("t", "c1"); let t = build_table_ref("t", vec!["c1", "c2"]); @@ -49,6 +65,7 @@ mod planner_test { limit: Some(BoundExpr::Constant(10.into())), offset: None, order_by: vec![], + select_distinct: false, }) } @@ -83,6 +100,7 @@ mod planner_test { limit: None, offset: None, order_by: vec![], + select_distinct: false, }) } @@ -152,4 +170,16 @@ mod planner_test { dbg!(plan_ref); } + + #[test] + fn test_plan_select_distinct_works() { + let stmt = build_test_select_distinct_stmt(); + let p = Planner {}; + let node = p.plan(stmt); + assert!(node.is_ok()); + let plan_ref = node.unwrap(); + assert_eq!(plan_ref.node_type(), PlanNodeType::LogicalProject); + assert_eq!(plan_ref.children()[0].node_type(), PlanNodeType::LogicalAgg); + dbg!(plan_ref); + } } diff --git a/src/planner/select.rs b/src/planner/select.rs index 66a1cf1..0d84657 100644 --- a/src/planner/select.rs +++ b/src/planner/select.rs @@ -26,6 +26,11 @@ impl Planner { plan = Arc::new(LogicalAgg::new(agg, stmt.group_by, plan)); } + if stmt.select_distinct { + // convert distinct to groupby with no aggregations + plan = Arc::new(LogicalAgg::new(vec![], stmt.select_list.clone(), plan)); + } + // LogicalOrder should be below LogicalProject in tree due to it could contains column_ref if !stmt.order_by.is_empty() { plan = Arc::new(LogicalOrder::new(stmt.order_by, plan)); diff --git a/tests/slt/select.slt b/tests/slt/select.slt index c946a3c..3d28ead 100644 --- a/tests/slt/select.slt +++ b/tests/slt/select.slt @@ -5,3 +5,18 @@ Bill CA 1 12000 Gregg CO 2 10000 John CO 3 11500 Von (empty) 4 NULL + +query I +select distinct state from employee +---- +CA +CO +(empty) + +query II +select distinct a, b from t2 +---- +10 2 +20 2 +30 3 +40 4