Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,8 @@ DEFINE_Bool(exit_on_exception, "false");
DEFINE_String(doris_cgroup_cpu_path, "");
DEFINE_Bool(enable_cpu_hard_limit, "false");

DEFINE_Bool(ignore_always_true_predicate_for_segment, "true");

// clang-format off
#ifdef BE_TEST
// test s3
Expand Down
3 changes: 3 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1173,6 +1173,9 @@ DECLARE_mBool(exit_on_exception);
DECLARE_String(doris_cgroup_cpu_path);
DECLARE_Bool(enable_cpu_hard_limit);

// Remove predicate that is always true for a segment.
DECLARE_Bool(ignore_always_true_predicate_for_segment);

#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/column_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,10 @@ class ColumnPredicate {
return true;
}

virtual bool is_always_true(const std::pair<WrapperField*, WrapperField*>& statistic) const {
return false;
}

virtual bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& statistic) const {
return false;
}
Expand Down
23 changes: 23 additions & 0 deletions be/src/olap/comparison_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,29 @@ class ComparisonPredicateBase : public ColumnPredicate {
}
}

bool is_always_true(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
if (statistic.first->is_null() || statistic.second->is_null()) {
return false;
}

T tmp_min_value {};
T tmp_max_value {};
memcpy((char*)(&tmp_min_value), statistic.first->cell_ptr(), sizeof(WarpperFieldType));
memcpy((char*)(&tmp_max_value), statistic.second->cell_ptr(), sizeof(WarpperFieldType));

if constexpr (PT == PredicateType::LT) {
return _value > tmp_max_value;
} else if constexpr (PT == PredicateType::LE) {
return _value >= tmp_max_value;
} else if constexpr (PT == PredicateType::GT) {
return _value < tmp_min_value;
} else if constexpr (PT == PredicateType::GE) {
return _value <= tmp_min_value;
}

return false;
}

bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
if (statistic.first->is_null() || statistic.second->is_null()) {
return false;
Expand Down
26 changes: 26 additions & 0 deletions be/src/olap/rowset/segment_v2/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "io/fs/file_reader.h"
#include "olap/block_column_predicate.h"
#include "olap/column_predicate.h"
#include "olap/comparison_predicate.h"
#include "olap/decimal12.h"
#include "olap/inverted_index_parser.h"
#include "olap/iterators.h"
Expand Down Expand Up @@ -338,6 +339,31 @@ bool ColumnReader::match_condition(const AndBlockColumnPredicate* col_predicates
col_predicates);
}

bool ColumnReader::prune_predicates_by_zone_map(std::vector<ColumnPredicate*>& predicates,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a config to disable this
and the prune should only be used in query, not in compaction.

const int column_id) const {
if (_zone_map_index == nullptr) {
return false;
}

FieldType type = _type_info->type();
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta_length));
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta_length));
_parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get());

auto pruned = false;
for (auto it = predicates.begin(); it != predicates.end();) {
auto predicate = *it;
if (predicate->column_id() == column_id &&
predicate->is_always_true({min_value.get(), max_value.get()})) {
pruned = true;
it = predicates.erase(it);
} else {
++it;
}
}
return pruned;
}

void ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, WrapperField* min_value_container,
WrapperField* max_value_container) const {
// min value and max value are valid if has_not_null is true
Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/rowset/segment_v2/column_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ class ColumnReader {

bool is_empty() const { return _num_rows == 0; }

bool prune_predicates_by_zone_map(std::vector<ColumnPredicate*>& predicates,
const int column_id) const;

CompressionTypePB get_compression() const { return _meta_compression; }

uint64_t num_rows() const { return _num_rows; }
Expand Down
20 changes: 19 additions & 1 deletion be/src/olap/rowset/segment_v2/segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o
return Status::OK();
}
}

if (read_options.use_topn_opt) {
auto query_ctx = read_options.runtime_state->get_query_ctx();
auto runtime_predicate = query_ctx->get_runtime_predicate().get_predictate();
Expand Down Expand Up @@ -157,6 +156,25 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o
iter->reset(new SegmentIterator(this->shared_from_this(), schema));
}

if (config::ignore_always_true_predicate_for_segment &&
read_options.io_ctx.reader_type == ReaderType::READER_QUERY &&
!read_options.column_predicates.empty()) {
auto pruned_predicates = read_options.column_predicates;
auto pruned = false;
for (auto& it : _column_readers) {
if (it.second->prune_predicates_by_zone_map(pruned_predicates, it.first)) {
pruned = true;
}
}

if (pruned) {
auto options_with_pruned_predicates = read_options;
options_with_pruned_predicates.column_predicates = pruned_predicates;
LOG(INFO) << "column_predicates pruned from " << read_options.column_predicates.size()
<< " to " << pruned_predicates.size();
return iter->get()->init(options_with_pruned_predicates);
}
}
return iter->get()->init(read_options);
}

Expand Down
25 changes: 25 additions & 0 deletions regression-test/data/query_p0/test_select_with_predicate_prune.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select1 --
1 jerry 2020-10-01
2 tom 2020-10-02
3 jack 2020-10-01
4 tony 2020-10-02

-- !select2 --
1 jerry 2020-10-01
3 jack 2020-10-01

-- !select3 --

-- !select4 --
1 jerry 2020-10-01
2 tom 2020-10-02
3 jack 2020-10-01
4 tony 2020-10-02

-- !select5 --
2 tom 2020-10-02
4 tony 2020-10-02

-- !select6 --

Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_select_with_predicate_prune") {
sql """
drop table if exists `test_select_with_predicate_prune`;
"""
sql """
CREATE TABLE IF NOT EXISTS `test_select_with_predicate_prune` (
id int,
name string,
birthday date not null
)
duplicate key(`id`)
AUTO PARTITION BY LIST (`birthday`)()
DISTRIBUTED BY HASH(`id`) buckets 1
PROPERTIES
(
"replication_allocation" = "tag.location.default: 1"
);
"""

sql """
insert into test_select_with_predicate_prune values (1, 'jerry', '2020-10-01'), (2, 'tom', '2020-10-02');
"""
sql """
insert into test_select_with_predicate_prune values (3, 'jack', '2020-10-01'), (4, 'tony', '2020-10-02');
"""

qt_select1 """
select * from test_select_with_predicate_prune where birthday < '2020-10-03' order by id;
"""

qt_select2 """
select * from test_select_with_predicate_prune where birthday < '2020-10-02' order by id;
"""

qt_select3 """
select * from test_select_with_predicate_prune where birthday < '2020-10-01' order by id;
"""


qt_select4 """
select * from test_select_with_predicate_prune where birthday > '2020-09-30' order by id;
"""

qt_select5 """
select * from test_select_with_predicate_prune where birthday > '2020-10-01' order by id;
"""

qt_select6 """
select * from test_select_with_predicate_prune where birthday > '2020-10-02' order by id;
"""
}