Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions be/src/olap/in_list_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,23 @@ class InListPredicateBase : public ColumnPredicate {
sizeof(decimal12_t))) {
return true;
}
} else if constexpr (Type == PrimitiveType::TYPE_DATE) {
const T* value = (const T*)(iter->get_value());
uint24_t date_value(value->to_olap_date());
if (bf->test_bytes(
const_cast<char*>(reinterpret_cast<const char*>(&date_value)),
sizeof(uint24_t))) {
return true;
}
// DatetimeV1 using int64_t in bloom filter
} else if constexpr (Type == PrimitiveType::TYPE_DATETIME) {
const T* value = (const T*)(iter->get_value());
int64_t datetime_value(value->to_olap_datetime());
if (bf->test_bytes(
const_cast<char*>(reinterpret_cast<const char*>(&datetime_value)),
sizeof(int64_t))) {
return true;
}
} else {
const T* value = (const T*)(iter->get_value());
if (bf->test_bytes(reinterpret_cast<const char*>(value), sizeof(*value))) {
Expand Down
140 changes: 140 additions & 0 deletions be/test/olap/date_bloom_filter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
#include <gtest/gtest.h>

#include "olap/comparison_predicate.h"
#include "olap/in_list_predicate.h"
#include "olap/rowset/beta_rowset.h"
#include "olap/rowset/beta_rowset_writer.h"
#include "olap/rowset/rowset_factory.h"
#include "olap/rowset/segment_v2/bloom_filter_index_reader.h"
#include "olap/storage_engine.h"
#include "runtime/define_primitive_type.h"
#include "util/date_func.h"
#include "vec/runtime/vdatetime_value.h"

Expand Down Expand Up @@ -189,4 +191,142 @@ TEST_F(DateBloomFilterTest, query_index_test) {
test("2024-11-20 09:00:00", false);
}
}

TEST_F(DateBloomFilterTest, in_list_predicate_test) {
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());

RowsetSharedPtr rowset;
const auto& res =
RowsetFactory::create_rowset_writer(*_engine_ref, rowset_writer_context(), false);
EXPECT_TRUE(res.has_value()) << res.error();
const auto& rowset_writer = res.value();

Block block = _tablet_schema->create_block();
auto columns = block.mutate_columns();

// Insert test data
auto date = timestamp_from_date("2024-11-08");
auto datetime = timestamp_from_datetime("2024-11-08 09:00:00");
uint24_t olap_date_value(date.to_olap_date());
uint64_t olap_datetime_value(datetime.to_olap_datetime());
columns[0]->insert_many_fix_len_data(reinterpret_cast<const char*>(&olap_date_value), 1);
columns[1]->insert_many_fix_len_data(reinterpret_cast<const char*>(&olap_datetime_value), 1);

date = timestamp_from_date("2024-11-09");
datetime = timestamp_from_datetime("2024-11-09 09:00:00");
olap_date_value = date.to_olap_date();
olap_datetime_value = datetime.to_olap_datetime();
columns[0]->insert_many_fix_len_data(reinterpret_cast<const char*>(&olap_date_value), 1);
columns[1]->insert_many_fix_len_data(reinterpret_cast<const char*>(&olap_datetime_value), 1);

EXPECT_TRUE(rowset_writer->add_block(&block).ok());
EXPECT_TRUE(rowset_writer->flush().ok());
EXPECT_TRUE(rowset_writer->build(rowset).ok());
EXPECT_TRUE(_tablet->add_rowset(rowset).ok());

segment_v2::SegmentSharedPtr segment;
EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok());
auto st = segment->_create_column_readers(*(segment->_footer_pb));
EXPECT_TRUE(st.ok());

// Test DATE column with IN predicate
{
const auto& reader = segment->_column_readers[0];
std::unique_ptr<BloomFilterIndexIterator> bf_iter;
EXPECT_TRUE(reader->_bloom_filter_index->load(true, true, nullptr).ok());
EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter, nullptr).ok());
std::unique_ptr<BloomFilter> bf;
EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok());

// Test positive cases
auto test_positive = [&](const std::vector<std::string>& values, bool result) {
auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>();
for (const auto& value : values) {
auto v = timestamp_from_date(value);
hybrid_set->insert(&v);
}
std::unique_ptr<InListPredicateBase<TYPE_DATE, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATE>>>
date_pred(new InListPredicateBase<TYPE_DATE, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATE>>(
0, hybrid_set));
EXPECT_EQ(date_pred->evaluate_and(bf.get()), result);
};

test_positive({"2024-11-08", "2024-11-09"}, true);
test_positive({"2024-11-08"}, true);
test_positive({"2024-11-09"}, true);

auto test_negative = [&](const std::vector<std::string>& values, bool result) {
auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>();

for (const auto& value : values) {
auto v = timestamp_from_date(value);
hybrid_set->insert(&v);
}

std::unique_ptr<InListPredicateBase<TYPE_DATE, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATE>>>
date_pred(new InListPredicateBase<TYPE_DATE, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATE>>(
0, hybrid_set));

EXPECT_EQ(date_pred->evaluate_and(bf.get()), result);
};

test_negative({"2024-11-20"}, false);
test_negative({"2024-11-08", "2024-11-20"}, true);
test_negative({"2024-11-20", "2024-11-21"}, false);
}

// Test DATETIME column with IN predicate
{
const auto& reader = segment->_column_readers[1];
std::unique_ptr<BloomFilterIndexIterator> bf_iter;
EXPECT_TRUE(reader->_bloom_filter_index->load(true, true, nullptr).ok());
EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter, nullptr).ok());
std::unique_ptr<BloomFilter> bf;
EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok());

// Test positive cases
auto test_positive = [&](const std::vector<std::string>& values, bool result) {
auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>();
for (const auto& value : values) {
auto v = timestamp_from_datetime(value);
hybrid_set->insert(&v);
}
std::unique_ptr<InListPredicateBase<TYPE_DATETIME, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATETIME>>>
datetime_pred(new InListPredicateBase<TYPE_DATETIME, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATETIME>>(
0, hybrid_set));
EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result);
};

test_positive({"2024-11-08 09:00:00", "2024-11-09 09:00:00"}, true);
test_positive({"2024-11-08 09:00:00"}, true);
test_positive({"2024-11-09 09:00:00"}, true);

// Test negative cases
auto test_negative = [&](const std::vector<std::string>& values, bool result) {
auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>();
for (const auto& value : values) {
auto v = timestamp_from_datetime(value);
hybrid_set->insert(&v);
}
std::unique_ptr<InListPredicateBase<TYPE_DATETIME, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATETIME>>>
datetime_pred(new InListPredicateBase<TYPE_DATETIME, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATETIME>>(
0, hybrid_set));
EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result);
};

test_negative({"2024-11-20 09:00:00"}, false);
test_negative({"2024-11-08 09:00:00", "2024-11-20 09:00:00"}, true);
test_negative({"2024-11-20 09:00:00", "2024-11-21 09:00:00"}, false);
}
}

} // namespace doris
21 changes: 21 additions & 0 deletions regression-test/data/bloom_filter_p0/test_bloom_filter.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_datetime_v1 --
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33

-- !select_datetime_v2 --
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33

-- !select_date_v1 --
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33

-- !select_date_v2 --
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33

35 changes: 35 additions & 0 deletions regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,39 @@ suite("test_bloom_filter") {
sql """ALTER TABLE ${test_json_tb} SET("bloom_filter_columns" = "k1,j1")"""
exception "not supported in bloom filter index"
}

// bloom filter index for datetime/date/decimal columns
def test_datetime_tb = "test_datetime_bloom_filter_tb"
sql """DROP TABLE IF EXISTS ${test_datetime_tb}"""
sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'false')"""
sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'false')"""
sql """CREATE TABLE IF NOT EXISTS ${test_datetime_tb} (
a int,
b int,
c int,
d DATETIMEV1,
d2 DATETIMEV2,
da DATEv1,
dav2 DATEV2,
dec decimal(10,2),
dec2 decimalv2(10,2)
) ENGINE=OLAP
DUPLICATE KEY(a)
DISTRIBUTED BY HASH(a) BUCKETS 5
PROPERTIES (
"replication_num" = "1"
)"""
sql """INSERT INTO ${test_datetime_tb} VALUES
(1,1,1,"2024-12-17 20:00:00", "2024-12-17 20:00:00", "2024-12-17", "2024-12-17", "3.32", "3.32"),
(1,1,1,"2024-12-17 20:00:00", "2024-12-17 20:00:00", "2024-12-17", "2024-12-17", "3.32", "3.32"),
(2,2,2,"2024-12-18 20:00:00", "2024-12-18 20:00:00", "2024-12-18", "2024-12-18", "3.33", "3.33"),
(3,3,3,"2024-12-22 20:00:00", "2024-12-22 20:00:00", "2024-12-22", "2024-12-22", "4.33", "4.33")"""
sql """ALTER TABLE ${test_datetime_tb} SET ("bloom_filter_columns" = "d,d2,da,dav2,dec,dec2")"""
Thread.sleep(3000)
qt_select_datetime_v1 """SELECT * FROM ${test_datetime_tb} WHERE d IN ("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
qt_select_datetime_v2 """SELECT * FROM ${test_datetime_tb} WHERE d2 IN ("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
qt_select_date_v1 """SELECT * FROM ${test_datetime_tb} WHERE da IN ("2024-12-17", "2024-12-18") order by a"""
qt_select_date_v2 """SELECT * FROM ${test_datetime_tb} WHERE dav2 IN ("2024-12-17", "2024-12-18") order by a"""
sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'true')"""
sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'true')"""
}
Loading