-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Optimizer: avoid every rule must recursive children in optimizer #4618
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,6 +60,13 @@ pub trait OptimizerRule { | |
|
|
||
| /// A human readable name for this optimizer rule | ||
| fn name(&self) -> &str; | ||
|
|
||
| /// How should the rule be applied by the optimizer? See comments on [`ApplyOrder`] for details. | ||
| /// | ||
| /// If a rule use default None, its should traverse recursively plan inside itself | ||
| fn apply_order(&self) -> Option<ApplyOrder> { | ||
| None | ||
| } | ||
|
Comment on lines
+66
to
+69
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It' compatible with origin |
||
| } | ||
|
|
||
| /// Options to control the DataFusion Optimizer. | ||
|
|
@@ -180,6 +187,44 @@ pub struct Optimizer { | |
| pub rules: Vec<Arc<dyn OptimizerRule + Send + Sync>>, | ||
| } | ||
|
|
||
| /// If a rule is with `ApplyOrder`, it means the optimizer will derive to handle children instead of | ||
| /// recursively handling in rule. | ||
| /// We just need handle a subtree pattern itself. | ||
| /// | ||
| /// Notice: **sometime** result after optimize still can be optimized, we need apply again. | ||
| /// | ||
| /// Usage Example: Merge Limit (subtree pattern is: Limit-Limit) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you |
||
| /// ```rust | ||
| /// use datafusion_expr::{Limit, LogicalPlan, LogicalPlanBuilder}; | ||
| /// use datafusion_common::Result; | ||
| /// fn merge_limit(parent: &Limit, child: &Limit) -> LogicalPlan { | ||
| /// // just for run | ||
| /// return parent.input.as_ref().clone(); | ||
| /// } | ||
| /// fn try_optimize(plan: &LogicalPlan) -> Result<Option<LogicalPlan>> { | ||
| /// match plan { | ||
| /// LogicalPlan::Limit(limit) => match limit.input.as_ref() { | ||
| /// LogicalPlan::Limit(child_limit) => { | ||
| /// // merge limit ... | ||
| /// let optimized_plan = merge_limit(limit, child_limit); | ||
| /// // due to optimized_plan may be optimized again, | ||
| /// // for example: plan is Limit-Limit-Limit | ||
| /// Ok(Some( | ||
| /// try_optimize(&optimized_plan)? | ||
| /// .unwrap_or_else(|| optimized_plan.clone()), | ||
| /// )) | ||
| /// } | ||
| /// _ => Ok(None), | ||
| /// }, | ||
| /// _ => Ok(None), | ||
| /// } | ||
| /// } | ||
| /// ``` | ||
| pub enum ApplyOrder { | ||
jackwener marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| TopDown, | ||
| BottomUp, | ||
| } | ||
|
|
||
| impl Default for Optimizer { | ||
| fn default() -> Self { | ||
| Self::new() | ||
|
|
@@ -253,8 +298,8 @@ impl Optimizer { | |
| debug!("Skipping rule {} due to optimizer config", rule.name()); | ||
| continue; | ||
| } | ||
| let result = self.optimize_recursively(rule, &new_plan, config); | ||
|
|
||
| let result = rule.try_optimize(&new_plan, config); | ||
| match result { | ||
| Ok(Some(plan)) => { | ||
| if !plan.schema().equivalent_names_and_types(new_plan.schema()) { | ||
|
|
@@ -315,6 +360,78 @@ impl Optimizer { | |
| debug!("Optimizer took {} ms", start_time.elapsed().as_millis()); | ||
| Ok(new_plan) | ||
| } | ||
|
|
||
| fn optimize_node( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What value does this function add? In other words, why not call
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I forgot to add future TODO: for rule in rules:
rule.try_optimize(rule) |
||
| &self, | ||
| rule: &Arc<dyn OptimizerRule + Send + Sync>, | ||
| plan: &LogicalPlan, | ||
| config: &dyn OptimizerConfig, | ||
| ) -> Result<Option<LogicalPlan>> { | ||
| // TODO: future feature: We can do Batch optimize | ||
| rule.try_optimize(plan, config) | ||
| } | ||
|
|
||
| fn optimize_inputs( | ||
| &self, | ||
| rule: &Arc<dyn OptimizerRule + Send + Sync>, | ||
| plan: &LogicalPlan, | ||
| config: &dyn OptimizerConfig, | ||
| ) -> Result<Option<LogicalPlan>> { | ||
| let inputs = plan.inputs(); | ||
| let result = inputs | ||
| .iter() | ||
| .map(|sub_plan| self.optimize_recursively(rule, sub_plan, config)) | ||
| .collect::<Result<Vec<_>>>()?; | ||
| if result.is_empty() || result.iter().all(|o| o.is_none()) { | ||
| return Ok(None); | ||
| } | ||
|
|
||
| let new_inputs = result | ||
| .into_iter() | ||
| .enumerate() | ||
| .map(|(i, o)| match o { | ||
| Some(plan) => plan, | ||
| None => (*(inputs.get(i).unwrap())).clone(), | ||
| }) | ||
| .collect::<Vec<_>>(); | ||
|
|
||
| Ok(Some(plan.with_new_inputs(new_inputs.as_slice())?)) | ||
| } | ||
|
|
||
| /// Use a rule to optimize the whole plan. | ||
| /// If the rule with `ApplyOrder`, we don't need to recursively handle children in rule. | ||
| pub fn optimize_recursively( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add docstrings about what this API does? Also, I wonder if it needs to be
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sometime we need use it in UT😂 |
||
| &self, | ||
| rule: &Arc<dyn OptimizerRule + Send + Sync>, | ||
| plan: &LogicalPlan, | ||
| config: &dyn OptimizerConfig, | ||
| ) -> Result<Option<LogicalPlan>> { | ||
| match rule.apply_order() { | ||
| Some(order) => match order { | ||
| ApplyOrder::TopDown => { | ||
| let optimize_self_opt = self.optimize_node(rule, plan, config)?; | ||
| let optimize_inputs_opt = match &optimize_self_opt { | ||
| Some(optimized_plan) => { | ||
| self.optimize_inputs(rule, optimized_plan, config)? | ||
| } | ||
| _ => self.optimize_inputs(rule, plan, config)?, | ||
| }; | ||
| Ok(optimize_inputs_opt.or(optimize_self_opt)) | ||
| } | ||
| ApplyOrder::BottomUp => { | ||
| let optimize_inputs_opt = self.optimize_inputs(rule, plan, config)?; | ||
| let optimize_self_opt = match &optimize_inputs_opt { | ||
| Some(optimized_plan) => { | ||
| self.optimize_node(rule, optimized_plan, config)? | ||
| } | ||
| _ => self.optimize_node(rule, plan, config)?, | ||
| }; | ||
| Ok(optimize_self_opt.or(optimize_inputs_opt)) | ||
| } | ||
| }, | ||
| _ => rule.try_optimize(plan, config), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Log the plan in debug/tracing mode after some part of the optimizer runs | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.