From 80e9c1666f4a7fd8a177e66c0e440018d0b2bee8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Nov 2024 12:03:24 +0000 Subject: [PATCH 1/5] Merge pull request #71539 from ClickHouse/enable-merge-filters-optimization Enable merge filters optimization. --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 18 ++- .../QueryPlan/BuildQueryPipelineSettings.cpp | 8 +- .../QueryPlan/BuildQueryPipelineSettings.h | 2 + src/Processors/QueryPlan/FilterStep.cpp | 137 +++++++++++++++++- .../QueryPlanOptimizationSettings.h | 2 +- .../QueryPlan/ReadFromMergeTree.cpp | 2 +- .../MergeTree/MergeTreeBlockReadUtils.cpp | 2 +- src/Storages/MergeTree/MergeTreeIOSettings.h | 2 + .../MergeTree/MergeTreeSelectProcessor.cpp | 8 +- .../MergeTree/MergeTreeSelectProcessor.h | 3 +- .../MergeTreeSplitPrewhereIntoReadSteps.cpp | 112 +++++++------- .../01655_plan_optimizations.reference | 5 +- .../0_stateless/01655_plan_optimizations.sh | 4 +- .../02496_remove_redundant_sorting.reference | 13 +- ...rouping_sets_predicate_push_down.reference | 38 ++--- ...filter_push_down_equivalent_sets.reference | 68 +++++---- .../0_stateless/03199_merge_filters_bug.sql | 38 ++++- .../03262_filter_push_down_view.reference | 2 + .../03262_filter_push_down_view.sql | 36 +++++ 20 files changed, 368 insertions(+), 134 deletions(-) create mode 100644 tests/queries/0_stateless/03262_filter_push_down_view.reference create mode 100644 tests/queries/0_stateless/03262_filter_push_down_view.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b82733f2a44c..48aa8b04029c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -738,7 +738,7 @@ class IColumn; M(Bool, query_plan_push_down_limit, true, "Allow to move LIMITs down in the query plan", 0) \ M(Bool, query_plan_split_filter, true, "Allow to split filters in the query plan", 0) \ M(Bool, query_plan_merge_expressions, true, "Allow to merge expressions in the query plan", 0) \ - M(Bool, query_plan_merge_filters, false, "Allow to merge filters in the query plan", 0) \ + M(Bool, query_plan_merge_filters, true, "Allow to merge filters in the query plan", 0) \ M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \ M(Bool, query_plan_convert_outer_join_to_inner_join, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values", 0) \ M(Bool, query_plan_optimize_prewhere, true, "Allow to push down filter to PREWHERE expression for supported storages", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 4b3d5f60a8ef..946dc4d0511e 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -63,7 +63,23 @@ static std::initializer_listgetSettingsRef(); BuildQueryPipelineSettings settings; - settings.actions_settings = ExpressionActionsSettings::fromSettings(from->getSettingsRef(), CompileExpressions::yes); + settings.actions_settings = ExpressionActionsSettings::fromSettings(query_settings, CompileExpressions::yes); settings.process_list_element = from->getProcessListElement(); settings.progress_callback = from->getProgressCallback(); + + /// Setting query_plan_merge_filters is enabled by default. + /// But it can brake short-circuit without splitting filter step into smaller steps. + /// So, enable and disable this optimizations together. + settings.enable_multiple_filters_transforms_for_and_chain = query_settings.query_plan_merge_filters; return settings; } diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h index 3b5e4e06953c..89c7c0c2a6ae 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h @@ -15,6 +15,8 @@ using QueryStatusPtr = std::shared_ptr; struct BuildQueryPipelineSettings { + bool enable_multiple_filters_transforms_for_and_chain = true; + ExpressionActionsSettings actions_settings; QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index ee770d1077c7..08b5a31787e9 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -5,6 +5,11 @@ #include #include #include +#include +#include +#include +#include +#include namespace DB { @@ -33,6 +38,92 @@ static ITransformingStep::Traits getTraits(const ActionsDAG & expression, const }; } +static bool isTrivialSubtree(const ActionsDAG::Node * node) +{ + while (node->type == ActionsDAG::ActionType::ALIAS) + node = node->children.at(0); + + return node->type != ActionsDAG::ActionType::FUNCTION && node->type != ActionsDAG::ActionType::ARRAY_JOIN; +} + +struct ActionsAndName +{ + ActionsDAG dag; + std::string name; +}; + +static ActionsAndName splitSingleAndFilter(ActionsDAG & dag, const ActionsDAG::Node * filter_node) +{ + auto split_result = dag.split({filter_node}, true); + dag = std::move(split_result.second); + + const auto * split_filter_node = split_result.split_nodes_mapping[filter_node]; + auto filter_type = removeLowCardinality(split_filter_node->result_type); + if (!filter_type->onlyNull() && !isUInt8(removeNullable(filter_type))) + { + DataTypePtr cast_type = std::make_shared(); + if (filter_type->isNullable()) + cast_type = std::make_shared(std::move(cast_type)); + + split_filter_node = &split_result.first.addCast(*split_filter_node, cast_type, {}); + } + + split_result.first.getOutputs().emplace(split_result.first.getOutputs().begin(), split_filter_node); + auto name = split_filter_node->result_name; + return ActionsAndName{std::move(split_result.first), std::move(name)}; +} + +/// Try to split the left most AND atom to a separate DAG. +static std::optional trySplitSingleAndFilter(ActionsDAG & dag, const std::string & filter_name) +{ + const auto * filter = &dag.findInOutputs(filter_name); + while (filter->type == ActionsDAG::ActionType::ALIAS) + filter = filter->children.at(0); + + if (filter->type != ActionsDAG::ActionType::FUNCTION || filter->function_base->getName() != "and") + return {}; + + const ActionsDAG::Node * condition_to_split = nullptr; + std::stack nodes; + nodes.push(filter); + while (!nodes.empty()) + { + const auto * node = nodes.top(); + nodes.pop(); + + if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "and") + { + /// The order is important. We should take the left-most atom, so put conditions on stack in reverse order. + for (const auto * child : node->children | std::ranges::views::reverse) + nodes.push(child); + + continue; + } + + if (isTrivialSubtree(node)) + continue; + + /// Do not split subtree if it's the last non-trivial one. + /// So, split the first found condition only when there is a another one found. + if (condition_to_split) + return splitSingleAndFilter(dag, condition_to_split); + + condition_to_split = node; + } + + return {}; +} + +std::vector splitAndChainIntoMultipleFilters(ActionsDAG & dag, const std::string & filter_name) +{ + std::vector res; + + while (auto condition = trySplitSingleAndFilter(dag, filter_name)) + res.push_back(std::move(*condition)); + + return res; +} + FilterStep::FilterStep( const DataStream & input_stream_, ActionsDAG actions_dag_, @@ -59,6 +150,23 @@ FilterStep::FilterStep( void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { + std::vector and_atoms; + + /// Splitting AND filter condition to steps under the setting, which is enabled with merge_filters optimization. + /// This is needed to support short-circuit properly. + if (settings.enable_multiple_filters_transforms_for_and_chain && !actions_dag.hasStatefulFunctions()) + and_atoms = splitAndChainIntoMultipleFilters(actions_dag, filter_column_name); + + for (auto & and_atom : and_atoms) + { + auto expression = std::make_shared(std::move(and_atom.dag), settings.getActionsSettings()); + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) + { + bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals; + return std::make_shared(header, expression, and_atom.name, true, on_totals); + }); + } + auto expression = std::make_shared(std::move(actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) @@ -85,18 +193,45 @@ void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ void FilterStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, settings.indent_char); + + auto cloned_dag = actions_dag.clone(); + + std::vector and_atoms; + if (!actions_dag.hasStatefulFunctions()) + and_atoms = splitAndChainIntoMultipleFilters(cloned_dag, filter_column_name); + + for (auto & and_atom : and_atoms) + { + auto expression = std::make_shared(std::move(and_atom.dag)); + settings.out << prefix << "AND column: " << and_atom.name << '\n'; + expression->describeActions(settings.out, prefix); + } + settings.out << prefix << "Filter column: " << filter_column_name; if (remove_filter_column) settings.out << " (removed)"; settings.out << '\n'; - auto expression = std::make_shared(actions_dag.clone()); + auto expression = std::make_shared(std::move(cloned_dag)); expression->describeActions(settings.out, prefix); } void FilterStep::describeActions(JSONBuilder::JSONMap & map) const { + auto cloned_dag = actions_dag.clone(); + + std::vector and_atoms; + if (!actions_dag.hasStatefulFunctions()) + and_atoms = splitAndChainIntoMultipleFilters(cloned_dag, filter_column_name); + + for (auto & and_atom : and_atoms) + { + auto expression = std::make_shared(std::move(and_atom.dag)); + map.add("AND column", and_atom.name); + map.add("Expression", expression->toTree()); + } + map.add("Filter Column", filter_column_name); map.add("Removes Filter", remove_filter_column); diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index a2b224958006..69b02a70c86c 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -32,7 +32,7 @@ struct QueryPlanOptimizationSettings bool merge_expressions = true; /// If merge-filters optimization is enabled. - bool merge_filters = false; + bool merge_filters = true; /// If filter push down optimization is enabled. bool filter_push_down = true; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 5d3f823d847d..362c58adb86d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -131,7 +131,6 @@ namespace ProfileEvents namespace DB { - namespace ErrorCodes { extern const int INDEX_NOT_USED; @@ -155,6 +154,7 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings( .use_asynchronous_read_from_pool = settings.allow_asynchronous_read_from_io_pool_for_merge_tree && (settings.max_streams_to_max_threads_ratio > 1 || settings.max_streams_for_merge_tree_reading > 1), .enable_multiple_prewhere_read_steps = settings.enable_multiple_prewhere_read_steps, + .force_short_circuit_execution = settings.query_plan_merge_filters }; } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 3dbb9d64f2f2..bd4029def45d 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -330,7 +330,7 @@ MergeTreeReadTaskColumns getReadTaskColumns( auto prewhere_actions = MergeTreeSelectProcessor::getPrewhereActions( prewhere_info, actions_settings, - reader_settings.enable_multiple_prewhere_read_steps); + reader_settings.enable_multiple_prewhere_read_steps, reader_settings.force_short_circuit_execution); for (const auto & step : prewhere_actions.steps) add_step(*step); diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index ac8001e0007f..64ae59602afa 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -46,6 +46,8 @@ struct MergeTreeReaderSettings bool use_asynchronous_read_from_pool = false; /// If PREWHERE has multiple conditions combined with AND, execute them in separate read/filtering steps. bool enable_multiple_prewhere_read_steps = false; + /// In case of multiple prewhere steps, execute filtering earlier to support short-circuit properly. + bool force_short_circuit_execution = false; /// If true, try to lower size of read buffer according to granule size and compressed block size. bool adjust_read_buffer_size = true; /// If true, it's allowed to read the whole part without reading marks. diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index a7ed91c8fc1e..eef12282ae43 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -92,7 +92,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( , algorithm(std::move(algorithm_)) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) - , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) + , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps, reader_settings_.force_short_circuit_execution)) , reader_settings(reader_settings_) , block_size_params(block_size_params_) , result_header(transformHeader(pool->getHeader(), prewhere_info)) @@ -126,9 +126,9 @@ String MergeTreeSelectProcessor::getName() const return fmt::format("MergeTreeSelect(pool: {}, algorithm: {})", pool->getName(), algorithm->getName()); } -bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere); +bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere, bool force_short_circuit_execution); -PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, bool enable_multiple_prewhere_read_steps) +PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, bool enable_multiple_prewhere_read_steps, bool force_short_circuit_execution) { PrewhereExprInfo prewhere_actions; if (prewhere_info) @@ -149,7 +149,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr } if (!enable_multiple_prewhere_read_steps || - !tryBuildPrewhereSteps(prewhere_info, actions_settings, prewhere_actions)) + !tryBuildPrewhereSteps(prewhere_info, actions_settings, prewhere_actions, force_short_circuit_execution)) { PrewhereExprStep prewhere_step { diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 8a9e3580a9f7..994d08c5f02f 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -74,7 +74,8 @@ class MergeTreeSelectProcessor : private boost::noncopyable static PrewhereExprInfo getPrewhereActions( PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, - bool enable_multiple_prewhere_read_steps); + bool enable_multiple_prewhere_read_steps, + bool force_short_circuit_execution); void addPartLevelToChunk(bool add_part_level_) { add_part_level = add_part_level_; } diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 9c82817e8cb6..1cc4006a285c 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -57,9 +58,9 @@ struct DAGNodeRef const ActionsDAG::Node * node; }; -/// Result name -> DAGNodeRef -using OriginalToNewNodeMap = std::unordered_map; -using NodeNameToLastUsedStepMap = std::unordered_map; +/// ResultNode -> DAGNodeRef +using OriginalToNewNodeMap = std::unordered_map; +using NodeNameToLastUsedStepMap = std::unordered_map; /// Clones the part of original DAG responsible for computing the original_dag_node and adds it to the new DAG. const ActionsDAG::Node & addClonedDAGToDAG( @@ -69,25 +70,28 @@ const ActionsDAG::Node & addClonedDAGToDAG( OriginalToNewNodeMap & node_remap, NodeNameToLastUsedStepMap & node_to_step_map) { - const String & node_name = original_dag_node->result_name; /// Look for the node in the map of already known nodes - if (node_remap.contains(node_name)) + if (node_remap.contains(original_dag_node)) { /// If the node is already in the new DAG, return it - const auto & node_ref = node_remap.at(node_name); + const auto & node_ref = node_remap.at(original_dag_node); if (node_ref.dag == new_dag.get()) return *node_ref.node; /// If the node is known from the previous steps, add it as an input, except for constants if (original_dag_node->type != ActionsDAG::ActionType::COLUMN) { - node_ref.dag->addOrReplaceInOutputs(*node_ref.node); + /// If the node was found in node_remap, it was not added to outputs yet. + /// The only exception is the filter node, which is always the first one. + if (node_ref.dag->getOutputs().at(0) != node_ref.node) + node_ref.dag->getOutputs().push_back(node_ref.node); + const auto & new_node = new_dag->addInput(node_ref.node->result_name, node_ref.node->result_type); - node_remap[node_name] = {new_dag.get(), &new_node}; /// TODO: here we update the node reference. Is it always correct? + node_remap[original_dag_node] = {new_dag.get(), &new_node}; /// Remember the index of the last step which reuses this node. /// We cannot remove this node from the outputs before that step. - node_to_step_map[node_name] = step; + node_to_step_map[original_dag_node] = step; return new_node; } } @@ -96,7 +100,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( if (original_dag_node->type == ActionsDAG::ActionType::INPUT) { const auto & new_node = new_dag->addInput(original_dag_node->result_name, original_dag_node->result_type); - node_remap[node_name] = {new_dag.get(), &new_node}; + node_remap[original_dag_node] = {new_dag.get(), &new_node}; return new_node; } @@ -105,7 +109,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { const auto & new_node = new_dag->addColumn( ColumnWithTypeAndName(original_dag_node->column, original_dag_node->result_type, original_dag_node->result_name)); - node_remap[node_name] = {new_dag.get(), &new_node}; + node_remap[original_dag_node] = {new_dag.get(), &new_node}; return new_node; } @@ -113,7 +117,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { const auto & alias_child = addClonedDAGToDAG(step, original_dag_node->children[0], new_dag, node_remap, node_to_step_map); const auto & new_node = new_dag->addAlias(alias_child, original_dag_node->result_name); - node_remap[node_name] = {new_dag.get(), &new_node}; + node_remap[original_dag_node] = {new_dag.get(), &new_node}; return new_node; } @@ -128,7 +132,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( } const auto & new_node = new_dag->addFunction(original_dag_node->function_base, new_children, original_dag_node->result_name); - node_remap[node_name] = {new_dag.get(), &new_node}; + node_remap[original_dag_node] = {new_dag.get(), &new_node}; return new_node; } @@ -138,11 +142,9 @@ const ActionsDAG::Node & addClonedDAGToDAG( const ActionsDAG::Node & addFunction( const ActionsDAGPtr & new_dag, const FunctionOverloadResolverPtr & function, - ActionsDAG::NodeRawConstPtrs children, - OriginalToNewNodeMap & node_remap) + ActionsDAG::NodeRawConstPtrs children) { const auto & new_node = new_dag->addFunction(function, children, ""); - node_remap[new_node.result_name] = {new_dag.get(), &new_node}; return new_node; } @@ -152,14 +154,12 @@ const ActionsDAG::Node & addFunction( const ActionsDAG::Node & addCast( const ActionsDAGPtr & dag, const ActionsDAG::Node & node_to_cast, - const DataTypePtr & to_type, - OriginalToNewNodeMap & node_remap) + const DataTypePtr & to_type) { if (!node_to_cast.result_type->equals(*to_type)) return node_to_cast; const auto & new_node = dag->addCast(node_to_cast, to_type, {}); - node_remap[new_node.result_name] = {dag.get(), &new_node}; return new_node; } @@ -169,8 +169,7 @@ const ActionsDAG::Node & addCast( /// 2. makes sure that the result contains only 0 or 1 values even if the source column contains non-boolean values. const ActionsDAG::Node & addAndTrue( const ActionsDAGPtr & dag, - const ActionsDAG::Node & filter_node_to_normalize, - OriginalToNewNodeMap & node_remap) + const ActionsDAG::Node & filter_node_to_normalize) { Field const_true_value(true); @@ -181,7 +180,7 @@ const ActionsDAG::Node & addAndTrue( const auto * const_true_node = &dag->addColumn(std::move(const_true_column)); ActionsDAG::NodeRawConstPtrs children = {&filter_node_to_normalize, const_true_node}; FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - return addFunction(dag, func_builder_and, children, node_remap); + return addFunction(dag, func_builder_and, children); } } @@ -206,7 +205,11 @@ const ActionsDAG::Node & addAndTrue( /// 6. Find all outputs of the original DAG /// 7. Find all outputs that were computed in the already built DAGs, mark these nodes as outputs in the steps where they were computed /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4 -bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere) +bool tryBuildPrewhereSteps( + PrewhereInfoPtr prewhere_info, + const ExpressionActionsSettings & actions_settings, + PrewhereExprInfo & prewhere, + bool force_short_circuit_execution) { if (!prewhere_info) return true; @@ -243,7 +246,10 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction struct Step { ActionsDAGPtr actions; - String column_name; + /// Original condition, in case if we have only one condition, and it was not casted + const ActionsDAG::Node * original_node; + /// Result condition node + const ActionsDAG::Node * result_node; }; std::vector steps; @@ -254,7 +260,8 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction { const auto & condition_group = condition_groups[step_index]; ActionsDAGPtr step_dag = std::make_unique(); - String result_name; + const ActionsDAG::Node * original_node = nullptr; + const ActionsDAG::Node * result_node; std::vector new_condition_nodes; for (const auto * node : condition_group) @@ -267,48 +274,37 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction { /// Add AND function to combine the conditions FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - const auto & and_function_node = addFunction(step_dag, func_builder_and, new_condition_nodes, node_remap); - step_dag->addOrReplaceInOutputs(and_function_node); - result_name = and_function_node.result_name; + const auto & and_function_node = addFunction(step_dag, func_builder_and, new_condition_nodes); + result_node = &and_function_node; } else { - const auto & result_node = *new_condition_nodes.front(); + result_node = new_condition_nodes.front(); /// Check if explicit cast is needed for the condition to serve as a filter. - const auto result_type_name = result_node.result_type->getName(); - if (result_type_name == "UInt8" || - result_type_name == "Nullable(UInt8)" || - result_type_name == "LowCardinality(UInt8)" || - result_type_name == "LowCardinality(Nullable(UInt8))") - { - /// No need to cast - step_dag->addOrReplaceInOutputs(result_node); - result_name = result_node.result_name; - } - else + if (!isUInt8(removeNullable(removeLowCardinality(result_node->result_type)))) { /// Build "condition AND True" expression to "cast" the condition to UInt8 or Nullable(UInt8) depending on its type. - const auto & cast_node = addAndTrue(step_dag, result_node, node_remap); - step_dag->addOrReplaceInOutputs(cast_node); - result_name = cast_node.result_name; + result_node = &addAndTrue(step_dag, *result_node); } } - steps.push_back({std::move(step_dag), result_name}); + step_dag->getOutputs().insert(step_dag->getOutputs().begin(), result_node); + steps.push_back({std::move(step_dag), original_node, result_node}); } /// 6. Find all outputs of the original DAG auto original_outputs = prewhere_info->prewhere_actions.getOutputs(); + steps.back().actions->getOutputs().clear(); /// 7. Find all outputs that were computed in the already built DAGs, mark these nodes as outputs in the steps where they were computed /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4 - NameSet all_output_names; + std::unordered_set all_outputs; for (const auto * output : original_outputs) { - all_output_names.insert(output->result_name); - if (node_remap.contains(output->result_name)) + all_outputs.insert(output); + if (node_remap.contains(output)) { - const auto & new_node_info = node_remap[output->result_name]; - new_node_info.dag->addOrReplaceInOutputs(*new_node_info.node); + const auto & new_node_info = node_remap[output]; + new_node_info.dag->getOutputs().push_back(new_node_info.node); } else if (output->result_name == prewhere_info->prewhere_column_name) { @@ -319,20 +315,21 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction /// 1. AND the last condition with constant True. This is needed to make sure that in the last step filter has UInt8 type /// but contains values other than 0 and 1 (e.g. if it is (number%5) it contains 2,3,4) /// 2. CAST the result to the exact type of the PREWHERE column from the original DAG - const auto & last_step_result_node_info = node_remap[steps.back().column_name]; auto & last_step_dag = steps.back().actions; + auto & last_step_result_node = steps.back().result_node; /// Build AND(last_step_result_node, true) - const auto & and_node = addAndTrue(last_step_dag, *last_step_result_node_info.node, node_remap); + const auto & and_node = addAndTrue(last_step_dag, *last_step_result_node); /// Build CAST(and_node, type of PREWHERE column) - const auto & cast_node = addCast(last_step_dag, and_node, output->result_type, node_remap); + const auto & cast_node = addCast(last_step_dag, and_node, output->result_type); /// Add alias for the result with the name of the PREWHERE column const auto & prewhere_result_node = last_step_dag->addAlias(cast_node, output->result_name); - last_step_dag->addOrReplaceInOutputs(prewhere_result_node); + last_step_dag->getOutputs().push_back(&prewhere_result_node); + steps.back().result_node = &prewhere_result_node; } else { const auto & node_in_new_dag = addClonedDAGToDAG(steps.size() - 1, output, steps.back().actions, node_remap, node_to_step); - steps.back().actions->addOrReplaceInOutputs(node_in_new_dag); + steps.back().actions->getOutputs().push_back(&node_in_new_dag); } } @@ -345,17 +342,18 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction { .type = PrewhereExprStep::Filter, .actions = std::make_shared(std::move(*step.actions), actions_settings), - .filter_column_name = step.column_name, + .filter_column_name = step.result_node->result_name, /// Don't remove if it's in the list of original outputs .remove_filter_column = - !all_output_names.contains(step.column_name) && node_to_step[step.column_name] <= step_index, - .need_filter = false, + step.original_node && !all_outputs.contains(step.original_node) && node_to_step[step.original_node] <= step_index, + .need_filter = force_short_circuit_execution, .perform_alter_conversions = true, }; prewhere.steps.push_back(std::make_shared(std::move(new_step))); } + prewhere.steps.back()->remove_filter_column = prewhere_info->remove_prewhere_column; prewhere.steps.back()->need_filter = prewhere_info->need_filter; } diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 1581a269b274..9ccb2a0a26c5 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -82,12 +82,12 @@ Filter column: notEquals(__table1.y, 0_UInt8) 9 10 > one condition of filter should be pushed down after aggregating, other two conditions are ANDed Filter column -FUNCTION and(minus(s, 8) :: 5, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4)) +FUNCTION and(minus(s, 8) :: 3, minus(s, 4) :: 5) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4)) Aggregating Filter column: notEquals(y, 0) > (analyzer) one condition of filter should be pushed down after aggregating, other two conditions are ANDed Filter column -FUNCTION and(minus(__table1.s, 8_UInt8) :: 1, minus(__table1.s, 4_UInt8) :: 2) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8)) +FUNCTION and(minus(__table1.s, 8_UInt8) :: 3, minus(__table1.s, 4_UInt8) :: 5) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8)) Aggregating Filter column: notEquals(__table1.y, 0_UInt8) 0 1 @@ -163,7 +163,6 @@ Filter column: notEquals(__table1.y, 2_UInt8) > filter is pushed down before CreatingSets CreatingSets Filter -Filter 1 3 > one condition of filter is pushed down before LEFT JOIN diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index 7d21eea4c288..32aa8db4f683 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -89,14 +89,14 @@ $CLICKHOUSE_CLIENT --enable_analyzer=0 --convert_query_to_cnf=0 -q " select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 8 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 8) :: 5, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 8) :: 3, minus(s, 4) :: 5) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))" echo "> (analyzer) one condition of filter should be pushed down after aggregating, other two conditions are ANDed" $CLICKHOUSE_CLIENT --enable_analyzer=1 --convert_query_to_cnf=0 -q " explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 8 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|FUNCTION and(minus(__table1.s, 8_UInt8) :: 1, minus(__table1.s, 4_UInt8) :: 2) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|FUNCTION and(minus(__table1.s, 8_UInt8) :: 3, minus(__table1.s, 4_UInt8) :: 5) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8))" $CLICKHOUSE_CLIENT -q " select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 4d004f2f78f8..b815a530e3dd 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -332,13 +332,12 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - Filter ((WHERE + (Projection + Before ORDER BY))) - Filter (HAVING) - Aggregating - Expression ((Before GROUP BY + Projection)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - ReadFromSystemNumbers + Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) + Aggregating + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 1 2 diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 9bb0c022752b..a382e14ce035 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -28,21 +28,17 @@ WHERE type_1 = \'all\' (Expression) ExpressionTransform × 2 (Filter) - FilterTransform × 2 - (Filter) - FilterTransform × 2 - (Filter) - FilterTransform × 2 - (Aggregating) - ExpressionTransform × 2 - AggregatingTransform × 2 - Copy 1 → 2 - (Expression) - ExpressionTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + FilterTransform × 6 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Filter) @@ -68,14 +64,10 @@ ExpressionTransform × 2 ExpressionTransform × 2 AggregatingTransform × 2 Copy 1 → 2 - (Filter) - FilterTransform - (Filter) - FilterTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Aggregating) diff --git a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference index 80f4e3095057..d0a3e7b02aed 100644 --- a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference +++ b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference @@ -163,17 +163,21 @@ Positions: 4 2 0 1 Filter (( + (JOIN actions + Change column names to column identifiers))) Header: __table1.id UInt64 __table1.value String - Filter column: and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) (removed) + AND column: equals(__table1.id, 5_UInt8) Actions: INPUT : 0 -> id UInt64 : 0 - INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 1 + FUNCTION equals(id : 0, 5_UInt8 :: 1) -> equals(__table1.id, 5_UInt8) UInt8 : 2 + Positions: 2 0 2 + Filter column: and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) (removed) + Actions: INPUT : 2 -> value String : 0 + INPUT : 1 -> id UInt64 : 1 COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2 - COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 3 - ALIAS id : 0 -> __table1.id UInt64 : 4 - ALIAS value :: 1 -> __table1.value String : 5 - FUNCTION equals(id : 0, 6_UInt8 :: 2) -> equals(__table1.id, 6_UInt8) UInt8 : 1 - FUNCTION equals(id :: 0, 5_UInt8 :: 3) -> equals(__table1.id, 5_UInt8) UInt8 : 2 - FUNCTION and(equals(__table1.id, 5_UInt8) :: 2, equals(__table1.id, 6_UInt8) :: 1) -> and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) UInt8 : 3 - Positions: 3 4 5 + INPUT : 0 -> equals(__table1.id, 5_UInt8) UInt8 : 3 + ALIAS value :: 0 -> __table1.value String : 4 + ALIAS id : 1 -> __table1.id UInt64 : 0 + FUNCTION equals(id :: 1, 6_UInt8 :: 2) -> equals(__table1.id, 6_UInt8) UInt8 : 5 + FUNCTION and(equals(__table1.id, 5_UInt8) :: 3, equals(__table1.id, 6_UInt8) :: 5) -> and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) UInt8 : 2 + Positions: 2 0 4 ReadFromMergeTree (default.test_table_1) Header: id UInt64 value String @@ -183,17 +187,21 @@ Positions: 4 2 0 1 Filter (( + (JOIN actions + Change column names to column identifiers))) Header: __table2.id UInt64 __table2.value String - Filter column: and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) (removed) + AND column: equals(__table2.id, 6_UInt8) Actions: INPUT : 0 -> id UInt64 : 0 - INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 1 + FUNCTION equals(id : 0, 6_UInt8 :: 1) -> equals(__table2.id, 6_UInt8) UInt8 : 2 + Positions: 2 0 2 + Filter column: and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) (removed) + Actions: INPUT : 2 -> value String : 0 + INPUT : 1 -> id UInt64 : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 - COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 3 - ALIAS id : 0 -> __table2.id UInt64 : 4 - ALIAS value :: 1 -> __table2.value String : 5 - FUNCTION equals(id : 0, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 - FUNCTION equals(id :: 0, 6_UInt8 :: 3) -> equals(__table2.id, 6_UInt8) UInt8 : 2 - FUNCTION and(equals(__table2.id, 6_UInt8) :: 2, equals(__table2.id, 5_UInt8) :: 1) -> and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) UInt8 : 3 - Positions: 3 4 5 + INPUT : 0 -> equals(__table2.id, 6_UInt8) UInt8 : 3 + ALIAS value :: 0 -> __table2.value String : 4 + ALIAS id : 1 -> __table2.id UInt64 : 0 + FUNCTION equals(id :: 1, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 5 + FUNCTION and(equals(__table2.id, 6_UInt8) :: 3, equals(__table2.id, 5_UInt8) :: 5) -> and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) UInt8 : 2 + Positions: 2 0 4 ReadFromMergeTree (default.test_table_2) Header: id UInt64 value String @@ -656,17 +664,21 @@ Positions: 4 2 0 1 __table1.value String __table2.value String __table2.id UInt64 - Filter column: and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) (removed) + AND column: equals(__table1.id, 5_UInt8) Actions: INPUT : 0 -> __table1.id UInt64 : 0 - INPUT :: 1 -> __table1.value String : 1 - INPUT :: 2 -> __table2.value String : 2 - INPUT : 3 -> __table2.id UInt64 : 3 - COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 - COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 5 - FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 6 - FUNCTION equals(__table2.id : 3, 6_UInt8 :: 5) -> equals(__table2.id, 6_UInt8) UInt8 : 4 - FUNCTION and(equals(__table1.id, 5_UInt8) :: 6, equals(__table2.id, 6_UInt8) :: 4) -> and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) UInt8 : 5 - Positions: 5 0 1 2 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 1 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 1) -> equals(__table1.id, 5_UInt8) UInt8 : 2 + Positions: 2 0 2 + Filter column: and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) (removed) + Actions: INPUT :: 1 -> __table1.id UInt64 : 0 + INPUT :: 2 -> __table1.value String : 1 + INPUT :: 3 -> __table2.value String : 2 + INPUT : 4 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 4 + INPUT : 0 -> equals(__table1.id, 5_UInt8) UInt8 : 5 + FUNCTION equals(__table2.id : 3, 6_UInt8 :: 4) -> equals(__table2.id, 6_UInt8) UInt8 : 6 + FUNCTION and(equals(__table1.id, 5_UInt8) :: 5, equals(__table2.id, 6_UInt8) :: 6) -> and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) UInt8 : 4 + Positions: 4 0 1 2 3 Join (JOIN FillRightFirst) Header: __table1.id UInt64 __table1.value String diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.sql b/tests/queries/0_stateless/03199_merge_filters_bug.sql index ed2ec2ea2176..696856c91212 100644 --- a/tests/queries/0_stateless/03199_merge_filters_bug.sql +++ b/tests/queries/0_stateless/03199_merge_filters_bug.sql @@ -1,3 +1,5 @@ +set allow_reorder_prewhere_conditions=0; + drop table if exists t1; drop table if exists t2; @@ -49,7 +51,23 @@ tmp1 AS fs1 FROM t2 LEFT JOIN tmp1 USING (fs1) - WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0; + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0, query_plan_merge_filters=0; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 1, query_plan_merge_filters=1; optimize table t1 final; @@ -67,4 +85,20 @@ tmp1 AS fs1 FROM t2 LEFT JOIN tmp1 USING (fs1) - WHERE (fs1 IN ('test')); + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0, query_plan_merge_filters=0; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 1, query_plan_merge_filters=1; diff --git a/tests/queries/0_stateless/03262_filter_push_down_view.reference b/tests/queries/0_stateless/03262_filter_push_down_view.reference new file mode 100644 index 000000000000..275ff18f73be --- /dev/null +++ b/tests/queries/0_stateless/03262_filter_push_down_view.reference @@ -0,0 +1,2 @@ +Condition: and((materialize(auid) in [1, 1]), (_CAST(toDate(ts)) in (-Inf, 1703980800])) +Granules: 1/3 diff --git a/tests/queries/0_stateless/03262_filter_push_down_view.sql b/tests/queries/0_stateless/03262_filter_push_down_view.sql new file mode 100644 index 000000000000..8492d8c8ebdc --- /dev/null +++ b/tests/queries/0_stateless/03262_filter_push_down_view.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS alpha; +DROP TABLE IF EXISTS alpha__day; + +SET session_timezone = 'Etc/UTC'; + +CREATE TABLE alpha +( + `ts` DateTime64(6), + `auid` Int64, +) +ENGINE = MergeTree +ORDER BY (auid, ts) +SETTINGS index_granularity = 1; + +CREATE VIEW alpha__day +( + `ts_date` Date, + `auid` Int64, +) +AS SELECT + ts_date, + auid, +FROM +( + SELECT + toDate(ts) AS ts_date, + auid + FROM alpha +) +WHERE ts_date <= toDateTime('2024-01-01 00:00:00') - INTERVAL 1 DAY; + +INSERT INTO alpha VALUES (toDateTime64('2024-01-01 00:00:00.000', 3) - INTERVAL 3 DAY, 1); +INSERT INTO alpha VALUES (toDateTime64('2024-01-01 00:00:00.000', 3) - INTERVAL 3 DAY, 2); +INSERT INTO alpha VALUES (toDateTime64('2024-01-01 00:00:00.000', 3) - INTERVAL 3 DAY, 3); + +select trimLeft(explain) from (EXPLAIN indexes = 1 SELECT auid FROM alpha__day WHERE auid = 1) where explain like '%Condition:%' or explain like '%Granules:%' settings allow_experimental_analyzer = 1; From 3f379908a7150d55ab829b049bea30caed64ee8a Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 20 Feb 2025 08:50:29 +0000 Subject: [PATCH 2/5] Cleaned up settings changes history --- src/Core/SettingsChangesHistory.cpp | 40 ++++++----------------------- 1 file changed, 8 insertions(+), 32 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 946dc4d0511e..14998b0b960b 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,46 +57,22 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { - {"24.12", + {"24.8.14", { + {"query_plan_merge_filters", false, true, "Allow to merge filters in the query plan. This is required to properly support filter-push-down with a new analyzer."}, // #71539 via https://github.com/Altinity/ClickHouse/pull/640 } }, - {"24.11", + {"24.8.11", { - {"push_external_roles_in_interserver_queries", false, true, "New setting."}, - {"enable_job_stack_trace", false, true, "Enable by default collecting stack traces from job's scheduling."}, - {"allow_suspicious_types_in_group_by", true, false, "Don't allow Variant/Dynamic types in GROUP BY by default"}, - {"allow_suspicious_types_in_order_by", true, false, "Don't allow Variant/Dynamic types in ORDER BY by default"}, - {"distributed_cache_discard_connection_if_unread_data", true, true, "New setting"}, - {"filesystem_cache_enable_background_download_for_metadata_files_in_packed_storage", true, true, "New setting"}, - {"filesystem_cache_enable_background_download_during_fetch", true, true, "New setting"}, - {"azure_check_objects_after_upload", false, false, "Check each uploaded object in azure blob storage to be sure that upload was successful"}, - {"backup_restore_keeper_max_retries", 20, 1000, "Should be big enough so the whole operation BACKUP or RESTORE operation won't fail because of a temporary [Zoo]Keeper failure in the middle of it."}, - {"backup_restore_failure_after_host_disconnected_for_seconds", 0, 3600, "New setting."}, - {"backup_restore_keeper_max_retries_while_initializing", 0, 20, "New setting."}, - {"backup_restore_keeper_max_retries_while_handling_error", 0, 20, "New setting."}, - {"backup_restore_finish_timeout_after_error_sec", 0, 180, "New setting."}, - {"query_plan_merge_filters", false, true, "Allow to merge filters in the query plan. This is required to properly support filter-push-down with a new analyzer."}, - {"parallel_replicas_local_plan", false, true, "Use local plan for local replica in a query with parallel replicas"}, - {"filesystem_cache_prefer_bigger_buffer_size", true, true, "New setting"}, - {"read_in_order_use_virtual_row", false, false, "Use virtual row while reading in order of primary key or its monotonic function fashion. It is useful when searching over multiple parts as only relevant ones are touched."}, - } - }, - {"24.10", - { - } - }, - {"24.9", - { - {"enable_parallel_replicas", false, false, "Parallel replicas with read tasks became the Beta tier feature."}, - {"parallel_replicas_mode", "read_tasks", "read_tasks", "This setting was introduced as a part of making parallel replicas feature Beta"}, - {"parallel_replicas_mark_segment_size", 128, 0, "Value for this setting now determined automatically"}, - {"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"} + {"parallel_replicas_mark_segment_size", 128, 0, "Value for this setting now determined automatically"}, // #68424 via https://github.com/Altinity/ClickHouse/pull/542 + {"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"}, // #64448 via https://github.com/Altinity/ClickHouse/pull/542 + {"parallel_replicas_mode", "read_tasks", "read_tasks", "This setting was introduced as a part of making parallel replicas feature Beta"}, // #63151 via https://github.com/Altinity/ClickHouse/pull/542 + {"push_external_roles_in_interserver_queries", false, true, "New setting."}, // #70332 via https://github.com/Altinity/ClickHouse/pull/542 } }, {"24.8", { - {"input_format_parquet_bloom_filter_push_down", false, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and bloom filter in the Parquet metadata."}, + {"input_format_parquet_bloom_filter_push_down", false, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and bloom filter in the Parquet metadata."}, {"enable_named_columns_in_function_tuple", false, false, "Retroactively disabled by default due to critical bugs."}, {"rows_before_aggregation", false, false, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"}, {"restore_replace_external_table_functions_to_null", false, false, "New setting."}, From 252854e49aa871db000f6e9ca2c04d918180c0dc Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Fri, 28 Feb 2025 00:03:04 +0100 Subject: [PATCH 3/5] Added enable_parallel_replicas to settings history --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 14998b0b960b..74710596aed7 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -64,6 +64,7 @@ static std::initializer_list Date: Mon, 3 Mar 2025 09:49:05 +0100 Subject: [PATCH 4/5] Added allow_reorder_prewhere_conditions setting --- src/Core/Settings.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 48aa8b04029c..a648d0c53833 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -169,6 +169,7 @@ class IColumn; M(Bool, move_all_conditions_to_prewhere, true, "Move all viable conditions from WHERE to PREWHERE", 0) \ M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \ + M(Bool, allow_reorder_prewhere_conditions, true, "When moving conditions from WHERE to PREWHERE, allow reordering them to optimize filtering.", 0) /* #71539 via https://github.com/Altinity/ClickHouse/pull/640 */ \ \ M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \ M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \ From bd4d269f9e4ec5e4180d6b917b2a24b45bf32731 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Wed, 12 Mar 2025 09:56:47 +0100 Subject: [PATCH 5/5] Update SettingsChangesHistory.cpp --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 74710596aed7..a88be1d4c006 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -60,6 +60,7 @@ static std::initializer_list