From fa3bc3fbbd3deff18d0621675228e2928f19a1ab Mon Sep 17 00:00:00 2001 From: airborne12 Date: Wed, 15 Jan 2025 10:56:08 +0800 Subject: [PATCH] [fix](bloomfilter) fix inlist support for date/datetimev1 in bloomfilter index (#46961) Problem Summary: #43351 fix date/datetime v1 support in comparison predicate, this PR try to fix it in inlist predicate. --- be/src/olap/in_list_predicate.h | 17 +++ be/test/olap/date_bloom_filter_test.cpp | 140 ++++++++++++++++++ .../bloom_filter_p0/test_bloom_filter.out | 21 +++ .../bloom_filter_p0/test_bloom_filter.groovy | 35 +++++ 4 files changed, 213 insertions(+) create mode 100644 regression-test/data/bloom_filter_p0/test_bloom_filter.out diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index bd91fe147fbb43..6ab4065fe3330c 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -321,6 +321,23 @@ class InListPredicateBase : public ColumnPredicate { sizeof(decimal12_t))) { return true; } + } else if constexpr (Type == PrimitiveType::TYPE_DATE) { + const T* value = (const T*)(iter->get_value()); + uint24_t date_value(value->to_olap_date()); + if (bf->test_bytes( + const_cast(reinterpret_cast(&date_value)), + sizeof(uint24_t))) { + return true; + } + // DatetimeV1 using int64_t in bloom filter + } else if constexpr (Type == PrimitiveType::TYPE_DATETIME) { + const T* value = (const T*)(iter->get_value()); + int64_t datetime_value(value->to_olap_datetime()); + if (bf->test_bytes( + const_cast(reinterpret_cast(&datetime_value)), + sizeof(int64_t))) { + return true; + } } else { const T* value = (const T*)(iter->get_value()); if (bf->test_bytes(reinterpret_cast(value), sizeof(*value))) { diff --git a/be/test/olap/date_bloom_filter_test.cpp b/be/test/olap/date_bloom_filter_test.cpp index e4b9293021aecd..c528862c87bc28 100644 --- a/be/test/olap/date_bloom_filter_test.cpp +++ b/be/test/olap/date_bloom_filter_test.cpp @@ -18,11 +18,13 @@ #include #include "olap/comparison_predicate.h" +#include "olap/in_list_predicate.h" #include "olap/rowset/beta_rowset.h" #include "olap/rowset/beta_rowset_writer.h" #include "olap/rowset/rowset_factory.h" #include "olap/rowset/segment_v2/bloom_filter_index_reader.h" #include "olap/storage_engine.h" +#include "runtime/define_primitive_type.h" #include "util/date_func.h" #include "vec/runtime/vdatetime_value.h" @@ -189,4 +191,142 @@ TEST_F(DateBloomFilterTest, query_index_test) { test("2024-11-20 09:00:00", false); } } + +TEST_F(DateBloomFilterTest, in_list_predicate_test) { + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok()); + + RowsetSharedPtr rowset; + const auto& res = + RowsetFactory::create_rowset_writer(*_engine_ref, rowset_writer_context(), false); + EXPECT_TRUE(res.has_value()) << res.error(); + const auto& rowset_writer = res.value(); + + Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Insert test data + auto date = timestamp_from_date("2024-11-08"); + auto datetime = timestamp_from_datetime("2024-11-08 09:00:00"); + uint24_t olap_date_value(date.to_olap_date()); + uint64_t olap_datetime_value(datetime.to_olap_datetime()); + columns[0]->insert_many_fix_len_data(reinterpret_cast(&olap_date_value), 1); + columns[1]->insert_many_fix_len_data(reinterpret_cast(&olap_datetime_value), 1); + + date = timestamp_from_date("2024-11-09"); + datetime = timestamp_from_datetime("2024-11-09 09:00:00"); + olap_date_value = date.to_olap_date(); + olap_datetime_value = datetime.to_olap_datetime(); + columns[0]->insert_many_fix_len_data(reinterpret_cast(&olap_date_value), 1); + columns[1]->insert_many_fix_len_data(reinterpret_cast(&olap_datetime_value), 1); + + EXPECT_TRUE(rowset_writer->add_block(&block).ok()); + EXPECT_TRUE(rowset_writer->flush().ok()); + EXPECT_TRUE(rowset_writer->build(rowset).ok()); + EXPECT_TRUE(_tablet->add_rowset(rowset).ok()); + + segment_v2::SegmentSharedPtr segment; + EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok()); + auto st = segment->_create_column_readers(*(segment->_footer_pb)); + EXPECT_TRUE(st.ok()); + + // Test DATE column with IN predicate + { + const auto& reader = segment->_column_readers[0]; + std::unique_ptr bf_iter; + EXPECT_TRUE(reader->_bloom_filter_index->load(true, true, nullptr).ok()); + EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter, nullptr).ok()); + std::unique_ptr bf; + EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok()); + + // Test positive cases + auto test_positive = [&](const std::vector& values, bool result) { + auto hybrid_set = std::make_shared>(); + for (const auto& value : values) { + auto v = timestamp_from_date(value); + hybrid_set->insert(&v); + } + std::unique_ptr>> + date_pred(new InListPredicateBase>( + 0, hybrid_set)); + EXPECT_EQ(date_pred->evaluate_and(bf.get()), result); + }; + + test_positive({"2024-11-08", "2024-11-09"}, true); + test_positive({"2024-11-08"}, true); + test_positive({"2024-11-09"}, true); + + auto test_negative = [&](const std::vector& values, bool result) { + auto hybrid_set = std::make_shared>(); + + for (const auto& value : values) { + auto v = timestamp_from_date(value); + hybrid_set->insert(&v); + } + + std::unique_ptr>> + date_pred(new InListPredicateBase>( + 0, hybrid_set)); + + EXPECT_EQ(date_pred->evaluate_and(bf.get()), result); + }; + + test_negative({"2024-11-20"}, false); + test_negative({"2024-11-08", "2024-11-20"}, true); + test_negative({"2024-11-20", "2024-11-21"}, false); + } + + // Test DATETIME column with IN predicate + { + const auto& reader = segment->_column_readers[1]; + std::unique_ptr bf_iter; + EXPECT_TRUE(reader->_bloom_filter_index->load(true, true, nullptr).ok()); + EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter, nullptr).ok()); + std::unique_ptr bf; + EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok()); + + // Test positive cases + auto test_positive = [&](const std::vector& values, bool result) { + auto hybrid_set = std::make_shared>(); + for (const auto& value : values) { + auto v = timestamp_from_datetime(value); + hybrid_set->insert(&v); + } + std::unique_ptr>> + datetime_pred(new InListPredicateBase>( + 0, hybrid_set)); + EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result); + }; + + test_positive({"2024-11-08 09:00:00", "2024-11-09 09:00:00"}, true); + test_positive({"2024-11-08 09:00:00"}, true); + test_positive({"2024-11-09 09:00:00"}, true); + + // Test negative cases + auto test_negative = [&](const std::vector& values, bool result) { + auto hybrid_set = std::make_shared>(); + for (const auto& value : values) { + auto v = timestamp_from_datetime(value); + hybrid_set->insert(&v); + } + std::unique_ptr>> + datetime_pred(new InListPredicateBase>( + 0, hybrid_set)); + EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result); + }; + + test_negative({"2024-11-20 09:00:00"}, false); + test_negative({"2024-11-08 09:00:00", "2024-11-20 09:00:00"}, true); + test_negative({"2024-11-20 09:00:00", "2024-11-21 09:00:00"}, false); + } +} + } // namespace doris diff --git a/regression-test/data/bloom_filter_p0/test_bloom_filter.out b/regression-test/data/bloom_filter_p0/test_bloom_filter.out new file mode 100644 index 00000000000000..9425c984f08e96 --- /dev/null +++ b/regression-test/data/bloom_filter_p0/test_bloom_filter.out @@ -0,0 +1,21 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_datetime_v1 -- +1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32 +1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32 +2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33 + +-- !select_datetime_v2 -- +1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32 +1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32 +2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33 + +-- !select_date_v1 -- +1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32 +1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32 +2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33 + +-- !select_date_v2 -- +1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32 +1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32 +2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33 + diff --git a/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy b/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy index 23e1c7ed596f62..ff8710c5998fac 100644 --- a/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy +++ b/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy @@ -148,4 +148,39 @@ suite("test_bloom_filter") { sql """ALTER TABLE ${test_json_tb} SET("bloom_filter_columns" = "k1,j1")""" exception "not supported in bloom filter index" } + + // bloom filter index for datetime/date/decimal columns + def test_datetime_tb = "test_datetime_bloom_filter_tb" + sql """DROP TABLE IF EXISTS ${test_datetime_tb}""" + sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'false')""" + sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'false')""" + sql """CREATE TABLE IF NOT EXISTS ${test_datetime_tb} ( + a int, + b int, + c int, + d DATETIMEV1, + d2 DATETIMEV2, + da DATEv1, + dav2 DATEV2, + dec decimal(10,2), + dec2 decimalv2(10,2) + ) ENGINE=OLAP + DUPLICATE KEY(a) + DISTRIBUTED BY HASH(a) BUCKETS 5 + PROPERTIES ( + "replication_num" = "1" + )""" + sql """INSERT INTO ${test_datetime_tb} VALUES + (1,1,1,"2024-12-17 20:00:00", "2024-12-17 20:00:00", "2024-12-17", "2024-12-17", "3.32", "3.32"), + (1,1,1,"2024-12-17 20:00:00", "2024-12-17 20:00:00", "2024-12-17", "2024-12-17", "3.32", "3.32"), + (2,2,2,"2024-12-18 20:00:00", "2024-12-18 20:00:00", "2024-12-18", "2024-12-18", "3.33", "3.33"), + (3,3,3,"2024-12-22 20:00:00", "2024-12-22 20:00:00", "2024-12-22", "2024-12-22", "4.33", "4.33")""" + sql """ALTER TABLE ${test_datetime_tb} SET ("bloom_filter_columns" = "d,d2,da,dav2,dec,dec2")""" + Thread.sleep(3000) + qt_select_datetime_v1 """SELECT * FROM ${test_datetime_tb} WHERE d IN ("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a""" + qt_select_datetime_v2 """SELECT * FROM ${test_datetime_tb} WHERE d2 IN ("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a""" + qt_select_date_v1 """SELECT * FROM ${test_datetime_tb} WHERE da IN ("2024-12-17", "2024-12-18") order by a""" + qt_select_date_v2 """SELECT * FROM ${test_datetime_tb} WHERE dav2 IN ("2024-12-17", "2024-12-18") order by a""" + sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'true')""" + sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'true')""" }