Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 53 additions & 38 deletions be/src/olap/tablet_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -905,22 +905,25 @@ void TabletColumn::append_sparse_column(TabletColumn column) {
}

void TabletSchema::append_index(TabletIndex&& index) {
for (int32_t id : index.col_unique_ids()) {
_col_id_suffix_to_index.emplace(
std::make_tuple(index.index_type(), id, index.get_index_suffix()), _indexes.size());
}
_indexes.push_back(std::make_shared<TabletIndex>(index));
}

void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_type,
TabletIndex&& index) {
int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
const std::string& suffix_path = escape_for_path_name(col.suffix_path());
for (auto& _indexe : _indexes) {
for (int32_t id : _indexe->col_unique_ids()) {
if (_indexe->index_type() == index_type && id == col_unique_id &&
_indexe->get_index_suffix() == suffix_path) {
_indexe = std::make_shared<TabletIndex>(std::move(index));
break;
}
}
IndexKey key(index_type, col_unique_id, suffix_path);
auto iter = _col_id_suffix_to_index.find(key);
if (iter != _col_id_suffix_to_index.end()) {
_indexes[iter->second] = std::make_shared<TabletIndex>(std::move(index));
return;
}
LOG(WARNING) << " failed to update_index: " << index_type << " " << col_unique_id << " "
<< suffix_path;
}

void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
Expand All @@ -938,17 +941,25 @@ void TabletSchema::clear_index_cache_handlers() {
void TabletSchema::clear_index() {
clear_index_cache_handlers();
_indexes.clear();
_col_id_suffix_to_index.clear();
}

void TabletSchema::remove_index(int64_t index_id) {
std::vector<TabletIndexPtr> indexes;
std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index;
for (auto index : _indexes) {
if (index->index_id() == index_id) {
continue;
}
for (int32_t col_uid : index->col_unique_ids()) {
col_id_suffix_to_index.emplace(
std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()),
indexes.size());
}
indexes.emplace_back(std::move(index));
}
_indexes = std::move(indexes);
_col_id_suffix_to_index = std::move(col_id_suffix_to_index);
}

void TabletSchema::clear_columns() {
Expand Down Expand Up @@ -979,6 +990,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac
_num_null_columns = 0;
_cols.clear();
_indexes.clear();
_col_id_suffix_to_index.clear();
_field_name_to_index.clear();
_field_id_to_index.clear();
_cluster_key_uids.clear();
Expand Down Expand Up @@ -1032,6 +1044,11 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac
index = std::make_shared<TabletIndex>();
index->init_from_pb(index_pb);
}
for (int32_t col_uid : index->col_unique_ids()) {
_col_id_suffix_to_index.emplace(
std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()),
_indexes.size());
}
_indexes.emplace_back(std::move(index));
}
_num_short_key_columns = schema.num_short_key_columns();
Expand Down Expand Up @@ -1150,6 +1167,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version
bool has_bf_columns = false;
_cols.clear();
_indexes.clear();
_col_id_suffix_to_index.clear();
_field_name_to_index.clear();
_field_id_to_index.clear();
_delete_sign_idx = -1;
Expand Down Expand Up @@ -1189,7 +1207,12 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version
_num_columns++;
}

for (auto& i : index->indexes) {
for (const auto& i : index->indexes) {
for (int32_t col_uid : i->col_unique_ids()) {
_col_id_suffix_to_index.emplace(
std::make_tuple(i->index_type(), col_uid, i->get_index_suffix()),
_indexes.size());
}
_indexes.emplace_back(std::make_shared<TabletIndex>(*i));
}

Expand Down Expand Up @@ -1371,6 +1394,15 @@ void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTabl
indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index)));
}
_indexes = std::move(indexes);
std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index;
for (size_t i = 0; i < _indexes.size(); i++) {
for (int32_t col_uid : _indexes[i]->col_unique_ids()) {
col_id_suffix_to_index.emplace(std::make_tuple(_indexes[i]->index_type(), col_uid,
_indexes[i]->get_index_suffix()),
i);
}
}
_col_id_suffix_to_index = std::move(col_id_suffix_to_index);
}

bool TabletSchema::exist_column(const std::string& field_name) const {
Expand Down Expand Up @@ -1426,14 +1458,10 @@ bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id,
const std::string& suffix_path) const {
const std::string escaped_suffix = escape_for_path_name(suffix_path);
for (const auto& _index : _indexes) {
if (_index->index_type() == IndexType::INVERTED) {
for (int32_t id : _index->col_unique_ids()) {
if (id == col_unique_id && _index->get_index_suffix() == escaped_suffix) {
return _index.get();
}
}
}
auto it = _col_id_suffix_to_index.find(
std::make_tuple(IndexType::INVERTED, col_unique_id, escaped_suffix));
if (it != _col_id_suffix_to_index.end()) {
return _indexes[it->second].get();
}
return nullptr;
}
Expand All @@ -1450,30 +1478,17 @@ const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const {
}

bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
// TODO use more efficient impl
for (const auto& _index : _indexes) {
if (_index->index_type() == IndexType::NGRAM_BF) {
for (int32_t id : _index->col_unique_ids()) {
if (id == col_unique_id) {
return true;
}
}
}
}

return false;
IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
auto it = _col_id_suffix_to_index.find(index_key);
return it != _col_id_suffix_to_index.end();
}

const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const {
// TODO use more efficient impl
for (const auto& _index : _indexes) {
if (_index->index_type() == IndexType::NGRAM_BF) {
for (int32_t id : _index->col_unique_ids()) {
if (id == col_unique_id) {
return _index.get();
}
}
}
// Get the ngram bf index for the given column unique id
IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
auto it = _col_id_suffix_to_index.find(index_key);
if (it != _col_id_suffix_to_index.end()) {
return _indexes[it->second].get();
}
return nullptr;
}
Expand Down
17 changes: 17 additions & 0 deletions be/src/olap/tablet_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,23 @@ class TabletSchema : public MetadataAdder<TabletSchema> {
std::unordered_map<int32_t, int32_t> _field_id_to_index;
std::unordered_map<vectorized::PathInDataRef, int32_t, vectorized::PathInDataRef::Hash>
_field_path_to_index;

// index_type/col_unique_id/suffix -> idx in _indexes
using IndexKey = std::tuple<IndexType, int32_t, std::string>;
struct IndexKeyHash {
size_t operator()(const IndexKey& t) const {
std::size_t seed = 0;
seed = doris::HashUtil::hash((const char*)&std::get<0>(t), sizeof(std::get<0>(t)),
seed);
seed = doris::HashUtil::hash((const char*)&std::get<1>(t), sizeof(std::get<1>(t)),
seed);
seed = doris::HashUtil::hash((const char*)std::get<2>(t).c_str(), std::get<2>(t).size(),
seed);
return seed;
}
};
std::unordered_map<IndexKey, int32_t, IndexKeyHash> _col_id_suffix_to_index;

size_t _num_columns = 0;
size_t _num_variant_columns = 0;
size_t _num_key_columns = 0;
Expand Down
174 changes: 174 additions & 0 deletions be/test/olap/tablet_schema_index_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

#include "olap/tablet_schema.h"

namespace doris {

class TabletSchemaIndexTest : public testing::Test {
protected:
void SetUp() override {
// Setup common test data
_tablet_schema = std::make_shared<TabletSchema>();
}

TabletIndex create_test_index(int64_t index_id, IndexType type,
const std::vector<int32_t>& col_uids,
const std::string& suffix = "") {
TabletIndex index;
index._index_id = index_id;
index._index_type = type;
index._col_unique_ids = col_uids;
index.set_escaped_escaped_index_suffix_path(suffix);
return index;
}

std::shared_ptr<TabletSchema> _tablet_schema;
};

TEST_F(TabletSchemaIndexTest, TestAddInvertedIndex) {
// Add inverted index with suffix
TabletIndex index = create_test_index(1, IndexType::INVERTED, {100}, "suffix1");
_tablet_schema->append_index(std::move(index));

// Verify index mapping
auto* found_index = _tablet_schema->inverted_index(100, "suffix1");
ASSERT_NE(found_index, nullptr);
EXPECT_EQ(found_index->index_id(), 1);
EXPECT_EQ(found_index->get_index_suffix(), "suffix1");
}

TEST_F(TabletSchemaIndexTest, TestRemoveIndex) {
// Add multiple indexes
_tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "suffix1"));
_tablet_schema->append_index(create_test_index(2, IndexType::INVERTED, {200}, "suffix2"));

// Remove index 1
_tablet_schema->remove_index(1);

// Verify index 1 removed
EXPECT_EQ(_tablet_schema->inverted_index(100, "suffix1"), nullptr);

// Verify index 2 still exists
auto* found_index = _tablet_schema->inverted_index(200, "suffix2");
ASSERT_NE(found_index, nullptr);
EXPECT_EQ(found_index->index_id(), 2);
}

TEST_F(TabletSchemaIndexTest, TestUpdateIndex) {
// Add initial index
_tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "old_suffix"));
ASSERT_NE(_tablet_schema->inverted_index(100, "old_suffix"), nullptr);

// Update index with new suffix
_tablet_schema->remove_index(1);
_tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "new_suffix"));

// Verify update
EXPECT_EQ(_tablet_schema->inverted_index(100, "old_suffix"), nullptr);
auto* found_index = _tablet_schema->inverted_index(100, "new_suffix");
ASSERT_NE(found_index, nullptr);
EXPECT_EQ(found_index->get_index_suffix(), "new%5Fsuffix");
}

TEST_F(TabletSchemaIndexTest, TestMultipleColumnsIndex) {
// Add index with multiple columns
TabletIndex index = create_test_index(1, IndexType::INVERTED, {100, 200}, "multi_col");
_tablet_schema->append_index(std::move(index));

// Verify both columns mapped
auto* index1 = _tablet_schema->inverted_index(100, "multi_col");
auto* index2 = _tablet_schema->inverted_index(200, "multi_col");
ASSERT_NE(index1, nullptr);
ASSERT_EQ(index1, index2); // Should point to same index
}

TEST_F(TabletSchemaIndexTest, TestDuplicateIndexKey) {
// Add two indexes with same (type,col,suffix)
_tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "suffix"));
_tablet_schema->append_index(create_test_index(2, IndexType::INVERTED, {100}, "suffix"));

// The last added should override
auto* found_index = _tablet_schema->inverted_index(100, "suffix");
ASSERT_NE(found_index, nullptr);
EXPECT_EQ(found_index->index_id(), 1);
}

TEST_F(TabletSchemaIndexTest, TestClearIndexes) {
_tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}));
_tablet_schema->clear_index();

EXPECT_EQ(_tablet_schema->inverted_index(100, ""), nullptr);
EXPECT_TRUE(_tablet_schema->inverted_indexes().empty());
}

TEST_F(TabletSchemaIndexTest, TestUpdateIndexMethod) {
TabletColumn col;
col.set_parent_unique_id(100);
col.set_path_info(vectorized::PathInData("v2"));
_tablet_schema->append_column(col);

TabletIndex old_index = create_test_index(1, IndexType::INVERTED, {100}, "v2");
_tablet_schema->append_index(std::move(old_index));

TabletIndex new_index = create_test_index(1, IndexType::INVERTED, {100}, "v2");
new_index._properties["new_prop"] = "value";

_tablet_schema->update_index(col, IndexType::INVERTED, std::move(new_index));

const TabletIndex* updated_index = _tablet_schema->inverted_index(100, "v2");
ASSERT_NE(updated_index, nullptr);
EXPECT_EQ(updated_index->index_id(), 1);
EXPECT_EQ(updated_index->properties().at("new_prop"), "value");

auto key = std::make_tuple(IndexType::INVERTED, 100, "v2");
EXPECT_NE(_tablet_schema->_col_id_suffix_to_index.find(key),
_tablet_schema->_col_id_suffix_to_index.end());
}

TEST_F(TabletSchemaIndexTest, TestUpdateIndexAddNewWhenNotExist) {
// Not exist, return nullptr
TabletColumn col;
col.set_unique_id(200);

TabletIndex new_index = create_test_index(2, IndexType::INVERTED, {200}, "v3");
_tablet_schema->update_index(col, IndexType::INVERTED, std::move(new_index));

const TabletIndex* index = _tablet_schema->inverted_index(200, "v3");
ASSERT_EQ(index, nullptr);
}

TEST_F(TabletSchemaIndexTest, TestUpdateIndexWithMultipleColumns) {
TabletColumn col1, col2;
col1.set_unique_id(300);
col2.set_unique_id(400);
_tablet_schema->append_column(col1);
_tablet_schema->append_column(col2);

TabletIndex old_multi_index = create_test_index(3, IndexType::INVERTED, {300, 400}, "multi");
_tablet_schema->append_index(std::move(old_multi_index));

TabletIndex new_multi_index = create_test_index(3, IndexType::NGRAM_BF, {300, 400});
_tablet_schema->append_index(std::move(new_multi_index));

ASSERT_NE(_tablet_schema->inverted_index(300, "multi"), nullptr);
EXPECT_NE(_tablet_schema->get_ngram_bf_index(400), nullptr);
}

} // namespace doris
Loading