From 930c944e719f323fd711eb831669b712d3f2e3fb Mon Sep 17 00:00:00 2001 From: uchenily Date: Fri, 29 Aug 2025 14:39:43 +0800 Subject: [PATCH 1/8] create index --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 14 +++++++++++--- .../trees/plans/commands/info/CreateIndexOp.java | 6 ------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 717c62a2f41e3f..7d4fdeac65c946 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -804,9 +804,17 @@ Status SegmentIterator::_apply_ann_topn_predicate() { vectorized::IColumn::MutablePtr result_column; std::unique_ptr> result_row_ids; segment_v2::AnnIndexStats ann_index_stats; - RETURN_IF_ERROR(_ann_topn_runtime->evaluate_vector_ann_search(ann_index_iterator, &_row_bitmap, - rows_of_segment, result_column, - result_row_ids, ann_index_stats)); + Status st = _ann_topn_runtime->evaluate_vector_ann_search(ann_index_iterator, &_row_bitmap, + rows_of_segment, result_column, + result_row_ids, ann_index_stats); + if (!st.ok()) { + if (_downgrade_without_index(st)) { + // fallback + return Status::OK(); + } else { + return st; + } + } VLOG_DEBUG << fmt::format("Ann topn filtered {} - {} = {} rows", pre_size, _row_bitmap.cardinality(), pre_size - _row_bitmap.cardinality()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateIndexOp.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateIndexOp.java index 9ecd3946678850..b2cefe7ca56489 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateIndexOp.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateIndexOp.java @@ -20,7 +20,6 @@ import org.apache.doris.alter.AlterOpType; import org.apache.doris.analysis.AlterTableClause; import org.apache.doris.analysis.CreateIndexClause; -import org.apache.doris.analysis.IndexDef; import org.apache.doris.catalog.Index; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.UserException; @@ -80,11 +79,6 @@ public void validate(ConnectContext ctx) throws UserException { tableName.analyze(ctx); } - if (indexDef.getIndexType() == IndexDef.IndexType.ANN) { - throw new AnalysisException( - "ANN index can only be created during table creation, not through CREATE INDEX."); - } - indexDef.validate(); index = indexDef.translateToCatalogStyle(); } From c32b5ffdf14d652f5f3e040414bfc7fef6ab8b86 Mon Sep 17 00:00:00 2001 From: uchenily Date: Tue, 2 Sep 2025 17:06:36 +0800 Subject: [PATCH 2/8] build index --- .../olap/rowset/segment_v2/column_writer.cpp | 8 +- be/src/olap/rowset/segment_v2/column_writer.h | 2 +- .../olap/rowset/segment_v2/index_writer.cpp | 114 ++++++----- .../rowset/segment_v2/segment_iterator.cpp | 10 +- be/src/olap/tablet_schema.cpp | 4 +- be/src/olap/tablet_schema.h | 2 + be/src/olap/task/index_builder.cpp | 184 +++++++++++++----- be/src/olap/task/index_builder.h | 2 +- .../doris/alter/SchemaChangeHandler.java | 6 +- .../java/org/apache/doris/catalog/Index.java | 9 +- .../plans/commands/info/BuildIndexOp.java | 15 +- 11 files changed, 233 insertions(+), 123 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 4d2f3f45f39f96..b165b2b766a6d1 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -938,7 +938,7 @@ Status ArrayColumnWriter::init() { if (_opts.need_inverted_index) { auto* writer = dynamic_cast(_item_writer.get()); if (writer != nullptr) { - RETURN_IF_ERROR(IndexColumnWriter::create(get_field(), &_inverted_index_builder, + RETURN_IF_ERROR(IndexColumnWriter::create(get_field(), &_inverted_index_writer, _opts.index_file_writer, _opts.inverted_indexes[0])); } @@ -956,7 +956,7 @@ Status ArrayColumnWriter::init() { Status ArrayColumnWriter::write_inverted_index() { if (_opts.need_inverted_index) { - return _inverted_index_builder->finish(); + return _inverted_index_writer->finish(); } return Status::OK(); } @@ -988,7 +988,7 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { // now only support nested type is scala if (writer != nullptr) { //NOTE: use array field name as index field, but item_writer size should be used when moving item_data_ptr - RETURN_IF_ERROR(_inverted_index_builder->add_array_values( + RETURN_IF_ERROR(_inverted_index_writer->add_array_values( _item_writer->get_field()->size(), reinterpret_cast(data), reinterpret_cast(nested_null_map), offsets_ptr, num_rows)); } @@ -1025,7 +1025,7 @@ Status ArrayColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t RETURN_IF_ERROR(append_data(ptr, num_rows)); if (is_nullable()) { if (_opts.need_inverted_index) { - RETURN_IF_ERROR(_inverted_index_builder->add_array_nulls(null_map, num_rows)); + RETURN_IF_ERROR(_inverted_index_writer->add_array_nulls(null_map, num_rows)); } RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows)); } diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 05955840bacf39..9e39ef45bb4c00 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -492,7 +492,7 @@ class ArrayColumnWriter final : public ColumnWriter { std::unique_ptr _offset_writer; std::unique_ptr _null_writer; std::unique_ptr _item_writer; - std::unique_ptr _inverted_index_builder; + std::unique_ptr _inverted_index_writer; std::unique_ptr _ann_index_writer; ColumnWriterOptions _opts; }; diff --git a/be/src/olap/rowset/segment_v2/index_writer.cpp b/be/src/olap/rowset/segment_v2/index_writer.cpp index d5cf7f11b0332c..70c51b7be13eb1 100644 --- a/be/src/olap/rowset/segment_v2/index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/index_writer.cpp @@ -17,6 +17,7 @@ #include "common/exception.h" #include "olap/field.h" +#include "olap/rowset/segment_v2/ann_index/ann_index_writer.h" #include "olap/rowset/segment_v2/inverted_index_writer.h" namespace doris::segment_v2 { @@ -40,10 +41,11 @@ bool IndexColumnWriter::check_support_inverted_index(const TabletColumn& column) } bool IndexColumnWriter::check_support_ann_index(const TabletColumn& column) { - // bellow types are not supported in inverted index for extracted columns + // only array are supported in ann index return column.is_array_type(); } +// create index writer Status IndexColumnWriter::create(const Field* field, std::unique_ptr* res, IndexFileWriter* index_file_writer, const TabletIndex* index_meta) { @@ -62,64 +64,78 @@ Status IndexColumnWriter::create(const Field* field, std::unique_ptrunique_id()); } } - bool single_field = true; - if (type == FieldType::OLAP_FIELD_TYPE_ARRAY) { - const auto* array_typeinfo = dynamic_cast(typeinfo); - DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_array_typeinfo_is_nullptr", - { array_typeinfo = nullptr; }) - if (array_typeinfo != nullptr) { - typeinfo = array_typeinfo->item_type_info(); - type = typeinfo->type(); - single_field = false; - } else { - return Status::NotSupported("unsupported array type for inverted index: " + - std::to_string(int(type))); + + if (index_meta->is_inverted_index()) { + bool single_field = true; + if (type == FieldType::OLAP_FIELD_TYPE_ARRAY) { + const auto* array_typeinfo = dynamic_cast(typeinfo); + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_array_typeinfo_is_nullptr", + { array_typeinfo = nullptr; }) + if (array_typeinfo != nullptr) { + typeinfo = array_typeinfo->item_type_info(); + type = typeinfo->type(); + single_field = false; + } else { + return Status::NotSupported("unsupported array type for inverted index: " + + std::to_string(int(type))); + } } - } - DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_unsupported_type_for_inverted_index", - { type = FieldType::OLAP_FIELD_TYPE_JSONB; }) - switch (type) { + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_unsupported_type_for_inverted_index", + { type = FieldType::OLAP_FIELD_TYPE_JSONB; }) + switch (type) { #define M(TYPE) \ case TYPE: \ *res = std::make_unique>(field_name, index_file_writer, \ index_meta, single_field); \ break; - M(FieldType::OLAP_FIELD_TYPE_TINYINT) - M(FieldType::OLAP_FIELD_TYPE_SMALLINT) - M(FieldType::OLAP_FIELD_TYPE_INT) - M(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT) - M(FieldType::OLAP_FIELD_TYPE_BIGINT) - M(FieldType::OLAP_FIELD_TYPE_LARGEINT) - M(FieldType::OLAP_FIELD_TYPE_CHAR) - M(FieldType::OLAP_FIELD_TYPE_VARCHAR) - M(FieldType::OLAP_FIELD_TYPE_STRING) - M(FieldType::OLAP_FIELD_TYPE_DATE) - M(FieldType::OLAP_FIELD_TYPE_DATETIME) - M(FieldType::OLAP_FIELD_TYPE_DECIMAL) - M(FieldType::OLAP_FIELD_TYPE_DATEV2) - M(FieldType::OLAP_FIELD_TYPE_DATETIMEV2) - M(FieldType::OLAP_FIELD_TYPE_DECIMAL32) - M(FieldType::OLAP_FIELD_TYPE_DECIMAL64) - M(FieldType::OLAP_FIELD_TYPE_DECIMAL128I) - M(FieldType::OLAP_FIELD_TYPE_DECIMAL256) - M(FieldType::OLAP_FIELD_TYPE_BOOL) - M(FieldType::OLAP_FIELD_TYPE_IPV4) - M(FieldType::OLAP_FIELD_TYPE_IPV6) - M(FieldType::OLAP_FIELD_TYPE_FLOAT) - M(FieldType::OLAP_FIELD_TYPE_DOUBLE) + M(FieldType::OLAP_FIELD_TYPE_TINYINT) + M(FieldType::OLAP_FIELD_TYPE_SMALLINT) + M(FieldType::OLAP_FIELD_TYPE_INT) + M(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT) + M(FieldType::OLAP_FIELD_TYPE_BIGINT) + M(FieldType::OLAP_FIELD_TYPE_LARGEINT) + M(FieldType::OLAP_FIELD_TYPE_CHAR) + M(FieldType::OLAP_FIELD_TYPE_VARCHAR) + M(FieldType::OLAP_FIELD_TYPE_STRING) + M(FieldType::OLAP_FIELD_TYPE_DATE) + M(FieldType::OLAP_FIELD_TYPE_DATETIME) + M(FieldType::OLAP_FIELD_TYPE_DECIMAL) + M(FieldType::OLAP_FIELD_TYPE_DATEV2) + M(FieldType::OLAP_FIELD_TYPE_DATETIMEV2) + M(FieldType::OLAP_FIELD_TYPE_DECIMAL32) + M(FieldType::OLAP_FIELD_TYPE_DECIMAL64) + M(FieldType::OLAP_FIELD_TYPE_DECIMAL128I) + M(FieldType::OLAP_FIELD_TYPE_DECIMAL256) + M(FieldType::OLAP_FIELD_TYPE_BOOL) + M(FieldType::OLAP_FIELD_TYPE_IPV4) + M(FieldType::OLAP_FIELD_TYPE_IPV6) + M(FieldType::OLAP_FIELD_TYPE_FLOAT) + M(FieldType::OLAP_FIELD_TYPE_DOUBLE) #undef M - default: - return Status::NotSupported("unsupported type for inverted index: " + - std::to_string(int(type))); - } - if (*res != nullptr) { - auto st = (*res)->init(); - if (!st.ok()) { - (*res)->close_on_error(); - return st; + default: + return Status::NotSupported("unsupported type for inverted index: " + + std::to_string(int(type))); + } + if (*res != nullptr) { + auto st = (*res)->init(); + if (!st.ok()) { + (*res)->close_on_error(); + return st; + } + } + } else if (index_meta->is_ann_index()) { + DCHECK(type == FieldType::OLAP_FIELD_TYPE_ARRAY); + *res = std ::make_unique(index_file_writer, index_meta); + if (*res != nullptr) { + auto st = (*res)->init(); + if (!st.ok()) { + (*res)->close_on_error(); + return st; + } } } + return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 7d4fdeac65c946..7bbf97d1c4ac7d 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1690,22 +1690,22 @@ Status SegmentIterator::_seek_columns(const std::vector& column_ids, r * This is an estimate, if we want more precise cost, statistics collection is necessary(this is a todo). * In short, when returned non-pred columns contains string/hll/bitmap, we using Lazy Materialization. * Otherwise, we disable it. - * + * * When Lazy Materialization enable, we need to read column at least two times. * First time to read Pred col, second time to read non-pred. * Here's an interesting question to research, whether read Pred col once is the best plan. * (why not read Pred col twice or more?) * * When Lazy Materialization disable, we just need to read once. - * - * + * + * * 2 Whether the predicate type can be evaluate in a fast way(using SIMD to eval pred) * Such as integer type and float type, they can be eval fast. * But for BloomFilter/string/date, they eval slow. * If a type can be eval fast, we use vectorization to eval it. * Otherwise, we use short-circuit to eval it. - * - * + * + * */ // todo(wb) need a UT here diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 079cf9278ffbaa..b3331cdfb187a4 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -1544,7 +1544,8 @@ void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema, bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const { for (size_t i = 0; i < _indexes.size(); i++) { - if (_indexes[i]->index_type() == IndexType::INVERTED && + if ((_indexes[i]->index_type() == IndexType::INVERTED || + _indexes[i]->index_type() == IndexType::ANN) && _indexes[i]->index_id() == index_id) { return true; } @@ -1645,7 +1646,6 @@ const TabletIndex* TabletSchema::ann_index(int32_t col_unique_id, } const TabletIndex* TabletSchema::ann_index(const TabletColumn& col) const { - // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index if (!segment_v2::IndexColumnWriter::check_support_ann_index(col)) { return nullptr; } diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 57463316cdf038..788038f476d420 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -341,6 +341,8 @@ class TabletIndex : public MetadataAdder { bool is_inverted_index() const { return _index_type == IndexType::INVERTED; } + bool is_ann_index() const { return _index_type == IndexType::ANN; } + void remove_parser_and_analyzer() { _properties.erase(INVERTED_INDEX_PARSER_KEY); _properties.erase(INVERTED_INDEX_PARSER_KEY_ALIAS); diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 9dd418f4ace203..2a4cd80857ca10 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -19,6 +19,7 @@ #include +#include "common/logging.h" #include "common/status.h" #include "olap/olap_define.h" #include "olap/rowset/beta_rowset.h" @@ -50,7 +51,7 @@ IndexBuilder::IndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, IndexBuilder::~IndexBuilder() { _olap_data_convertor.reset(); - _inverted_index_builders.clear(); + _index_column_writers.clear(); } Status IndexBuilder::init() { @@ -113,12 +114,9 @@ Status IndexBuilder::update_inverted_index_info() { } } auto column = output_rs_tablet_schema->column(column_idx); + + // inverted index auto index_metas = output_rs_tablet_schema->inverted_indexs(column); - if (index_metas.empty()) { - LOG(ERROR) << "failed to find column: " << column_name - << " index_id: " << t_inverted_index.index_id; - continue; - } for (const auto& index_meta : index_metas) { if (output_rs_tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { @@ -143,7 +141,36 @@ Status IndexBuilder::update_inverted_index_info() { // remove dropped index_meta from output rowset tablet schema output_rs_tablet_schema->remove_index(index_meta->index_id()); } + + // ann index + const auto* ann_index = output_rs_tablet_schema->ann_index(column); + if (!ann_index) { + continue; + } + if (output_rs_tablet_schema->get_inverted_index_storage_format() == + InvertedIndexStorageFormatPB::V1) { + const auto& fs = io::global_local_filesystem(); + + for (int seg_id = 0; seg_id < num_segments; seg_id++) { + auto seg_path = + local_segment_path(_tablet->tablet_path(), + input_rowset->rowset_id().to_string(), seg_id); + auto index_path = InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix(seg_path), + ann_index->index_id(), ann_index->get_index_suffix()); + int64_t index_size = 0; + RETURN_IF_ERROR(fs->file_size(index_path, &index_size)); + VLOG_DEBUG << "inverted index file:" << index_path + << " size:" << index_size; + drop_index_size += index_size; + } + } + _dropped_inverted_indexes.push_back(*ann_index); + // ATTN: DO NOT REMOVE INDEX AFTER OUTPUT_ROWSET_WRITER CREATED. + // remove dropped index_meta from output rowset tablet schema + output_rs_tablet_schema->remove_index(ann_index->index_id()); } + DBUG_EXECUTE_IF("index_builder.update_inverted_index_info.drop_index", { auto indexes_count = DebugPoints::instance()->get_debug_param_or_default( "index_builder.update_inverted_index_info.drop_index", "indexes_count", 0); @@ -172,6 +199,8 @@ Status IndexBuilder::update_inverted_index_info() { continue; } const TabletColumn& col = output_rs_tablet_schema->column_by_uid(column_uid); + + // inverted index auto exist_indexs = output_rs_tablet_schema->inverted_indexs(col); for (const auto& exist_index : exist_indexs) { if (exist_index->index_id() != index.index_id()) { @@ -188,6 +217,23 @@ Status IndexBuilder::update_inverted_index_info() { } } } + + // ann index + const auto* exist_index = output_rs_tablet_schema->ann_index(col); + if (exist_index && exist_index->index_id() != index.index_id()) { + if (exist_index->is_same_except_id(&index)) { + LOG(WARNING) << fmt::format( + "column: {} has a exist ann index, but the index id not " + "equal " + "request's index id, , exist index id: {}, request's index id: " + "{}, " + "remove exist index in new output_rs_tablet_schema", + column_uid, exist_index->index_id(), index.index_id()); + without_index_uids.insert(exist_index->index_id()); + output_rs_tablet_schema->remove_index(exist_index->index_id()); + } + } + output_rs_tablet_schema->append_index(std::move(index)); } } @@ -357,7 +403,7 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta LOG(INFO) << "all row nums. source_rows=" << output_rowset_meta->num_rows(); return Status::OK(); } else { - // create inverted index writer + // create inverted or ann index writer const auto& fs = output_rowset_meta->fs(); auto output_rowset_schema = output_rowset_meta->tablet_schema(); size_t inverted_index_size = 0; @@ -402,8 +448,10 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta fs, index_path_prefix, output_rowset_meta->rowset_id().to_string(), seg_ptr->id(), output_rowset_schema->get_inverted_index_storage_format()); } - // create inverted index writer + // create inverted index writer, or ann index writer for (auto inverted_index : _alter_inverted_indexes) { + DCHECK(inverted_index.index_type == TIndexType::INVERTED || + inverted_index.index_type == TIndexType::ANN); DCHECK_EQ(inverted_index.columns.size(), 1); auto index_id = inverted_index.index_id; auto column_name = inverted_index.columns[0]; @@ -425,44 +473,79 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta // variant column is not support for building index auto is_support_inverted_index = IndexColumnWriter::check_support_inverted_index(column); + auto is_support_ann_index = IndexColumnWriter::check_support_ann_index(column); DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_support_inverted_index", { is_support_inverted_index = false; }) - if (!is_support_inverted_index) { + if (!is_support_inverted_index && !is_support_ann_index) { continue; } DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id)); _olap_data_convertor->add_column_data_convertor(column); return_columns.emplace_back(column_idx); std::unique_ptr field(FieldFactory::create(column)); - auto index_metas = output_rowset_schema->inverted_indexs(column); - for (const auto& index_meta : index_metas) { - if (index_meta->index_id() != index_id) { - continue; - } - std::unique_ptr inverted_index_builder; - try { - RETURN_IF_ERROR(segment_v2::IndexColumnWriter::create( - field.get(), &inverted_index_builder, index_file_writer.get(), - index_meta)); - DBUG_EXECUTE_IF( - "IndexBuilder::handle_single_rowset_index_column_writer_create_" - "error", - { - _CLTHROWA(CL_ERR_IO, - "debug point: " - "handle_single_rowset_index_column_writer_create_" - "error"); - }) - } catch (const std::exception& e) { - return Status::Error( - "CLuceneError occured: {}", e.what()); + + if (inverted_index.index_type == TIndexType::INVERTED) { + // inverted index + auto index_metas = output_rowset_schema->inverted_indexs(column); + for (const auto& index_meta : index_metas) { + if (index_meta->index_id() != index_id) { + continue; + } + std::unique_ptr inverted_index_builder; + try { + RETURN_IF_ERROR(segment_v2::IndexColumnWriter::create( + field.get(), &inverted_index_builder, index_file_writer.get(), + index_meta)); + DBUG_EXECUTE_IF( + "IndexBuilder::handle_single_rowset_index_column_writer_create_" + "error", + { + _CLTHROWA(CL_ERR_IO, + "debug point: " + "handle_single_rowset_index_column_writer_create_" + "error"); + }) + } catch (const std::exception& e) { + return Status::Error( + "CLuceneError occured: {}", e.what()); + } + + if (inverted_index_builder) { + auto writer_sign = std::make_pair(seg_ptr->id(), index_id); + _index_column_writers.insert( + std::make_pair(writer_sign, std::move(inverted_index_builder))); + inverted_index_writer_signs.emplace_back(writer_sign); + } } + } else if (inverted_index.index_type == TIndexType::ANN) { + // ann index + const auto* index_meta = output_rowset_schema->ann_index(column); + if (index_meta && index_meta->index_id() == index_id) { + std::unique_ptr index_writer; + try { + RETURN_IF_ERROR(segment_v2::IndexColumnWriter::create( + field.get(), &index_writer, index_file_writer.get(), + index_meta)); + DBUG_EXECUTE_IF( + "IndexBuilder::handle_single_rowset_index_column_writer_create_" + "error", + { + _CLTHROWA(CL_ERR_IO, + "debug point: " + "handle_single_rowset_index_column_writer_create_" + "error"); + }) + } catch (const std::exception& e) { + return Status::Error( + "CLuceneError occured: {}", e.what()); + } - if (inverted_index_builder) { - auto writer_sign = std::make_pair(seg_ptr->id(), index_id); - _inverted_index_builders.insert( - std::make_pair(writer_sign, std::move(inverted_index_builder))); - inverted_index_writer_signs.emplace_back(writer_sign); + if (index_writer) { + auto writer_sign = std::make_pair(seg_ptr->id(), index_id); + _index_column_writers.insert( + std::make_pair(writer_sign, std::move(index_writer))); + inverted_index_writer_signs.emplace_back(writer_sign); + } } } } @@ -510,7 +593,7 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta return status; } - // write inverted index data + // write inverted index data, or ann index data status = _write_inverted_index_data(output_rowset_schema, iter->data_id(), block.get()); DBUG_EXECUTE_IF( @@ -529,8 +612,8 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta // finish write inverted index, flush data to compound file for (auto& writer_sign : inverted_index_writer_signs) { try { - if (_inverted_index_builders[writer_sign]) { - RETURN_IF_ERROR(_inverted_index_builders[writer_sign]->finish()); + if (_index_column_writers[writer_sign]) { + RETURN_IF_ERROR(_index_column_writers[writer_sign]->finish()); } DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_index_build_finish_error", { _CLTHROWA(CL_ERR_IO, @@ -556,7 +639,7 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta } inverted_index_size += index_file_writer->get_index_file_total_size(); } - _inverted_index_builders.clear(); + _index_column_writers.clear(); _index_file_writers.clear(); output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size()); output_rowset_meta->set_total_disk_size(output_rowset_meta->total_disk_size() + @@ -571,7 +654,7 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta Status IndexBuilder::_write_inverted_index_data(TabletSchemaSPtr tablet_schema, int64_t segment_idx, vectorized::Block* block) { - VLOG_DEBUG << "begin to write inverted index"; + VLOG_DEBUG << "begin to write inverted/ann index"; // converter block data _olap_data_convertor->set_source_content(block, 0, block->rows()); for (auto i = 0; i < _alter_inverted_indexes.size(); ++i) { @@ -634,14 +717,14 @@ Status IndexBuilder::_add_nullable(const std::string& column_name, try { auto data = *(data_ptr + 2); auto nested_null_map = *(data_ptr + 3); - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values( + RETURN_IF_ERROR(_index_column_writers[index_writer_sign]->add_array_values( field->get_sub_field(0)->size(), reinterpret_cast(data), reinterpret_cast(nested_null_map), offsets_ptr, num_rows)); DBUG_EXECUTE_IF("IndexBuilder::_add_nullable_add_array_values_error", { _CLTHROWA(CL_ERR_IO, "debug point: _add_nullable_add_array_values_error"); }) - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_nulls(null_map, - num_rows)); + RETURN_IF_ERROR( + _index_column_writers[index_writer_sign]->add_array_nulls(null_map, num_rows)); } catch (const std::exception& e) { return Status::Error( "CLuceneError occured: {}", e.what()); @@ -665,11 +748,11 @@ Status IndexBuilder::_add_nullable(const std::string& column_name, do { auto step = next_run_step(); if (null_map[offset]) { - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls( + RETURN_IF_ERROR(_index_column_writers[index_writer_sign]->add_nulls( static_cast(step))); } else { - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values( - column_name, *ptr, step)); + RETURN_IF_ERROR(_index_column_writers[index_writer_sign]->add_values(column_name, + *ptr, step)); } *ptr += field->size() * step; offset += step; @@ -699,13 +782,13 @@ Status IndexBuilder::_add_data(const std::string& column_name, if (element_cnt > 0) { auto data = *(data_ptr + 2); auto nested_null_map = *(data_ptr + 3); - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values( + RETURN_IF_ERROR(_index_column_writers[index_writer_sign]->add_array_values( field->get_sub_field(0)->size(), reinterpret_cast(data), reinterpret_cast(nested_null_map), offsets_ptr, num_rows)); } } else { - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values( - column_name, *ptr, num_rows)); + RETURN_IF_ERROR(_index_column_writers[index_writer_sign]->add_values(column_name, *ptr, + num_rows)); } DBUG_EXECUTE_IF("IndexBuilder::_add_data_throw_exception", { _CLTHROWA(CL_ERR_IO, "debug point: _add_data_throw_exception"); }) @@ -785,6 +868,7 @@ Status IndexBuilder::do_build_inverted_index() { _tablet->tablet_id()); } + DCHECK(!_alter_index_ids.empty()); _input_rowsets = _tablet->pick_candidate_rowsets_to_build_inverted_index(_alter_index_ids, _is_drop_op); if (_input_rowsets.empty()) { diff --git a/be/src/olap/task/index_builder.h b/be/src/olap/task/index_builder.h index 478e6557b93722..d87d88c5e76f63 100644 --- a/be/src/olap/task/index_builder.h +++ b/be/src/olap/task/index_builder.h @@ -83,7 +83,7 @@ class IndexBuilder { std::unique_ptr _olap_data_convertor; // "" -> IndexColumnWriter std::unordered_map, std::unique_ptr> - _inverted_index_builders; + _index_column_writers; std::unordered_map> _index_file_writers; // std::unordered_map, std::unique_ptr> diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 879a114117bf24..f018532f270cf6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -2116,7 +2116,7 @@ public int getAsInt() { lightSchemaChange = false; // ngram_bf index can do light_schema_change in both local and cloud mode - // inverted index can only do light_schema_change in local mode + // inverted index and ann index can only do light_schema_change in local mode if (index.isLightAddIndexSupported(enableAddIndexForNewData)) { alterIndexes.add(index); isDropIndex = false; @@ -2887,7 +2887,7 @@ public void addAlterJobV2(AlterJobV2 alterJob) throws AnalysisException { public void addIndexChangeJob(IndexChangeJob indexChangeJob) { indexChangeJobs.put(indexChangeJob.getJobId(), indexChangeJob); runnableIndexChangeJob.put(indexChangeJob.getJobId(), indexChangeJob); - LOG.info("add inverted index job {}", indexChangeJob.getJobId()); + LOG.info("add inverted/ann index change job {}", indexChangeJob.getJobId()); } private void clearFinishedOrCancelledSchemaChangeJobV2() { @@ -2906,7 +2906,7 @@ private void clearExpireFinishedOrCancelledIndexChangeJobs() { IndexChangeJob indexChangeJob = iterator.next().getValue(); if (indexChangeJob.isExpire()) { iterator.remove(); - LOG.info("remove expired inverted index job {}. finish at {}", + LOG.info("remove expired inverted/ann index change job {}. finish at {}", indexChangeJob.getJobId(), TimeUtils.longToTimeString(indexChangeJob.getFinishedTimeMs())); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java index 2bec0471602404..72742743fd6a6b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java @@ -189,15 +189,18 @@ public boolean getInvertedIndexSupportPhrase() { // Whether the index can be changed in light mode public boolean isLightIndexChangeSupported() { - return indexType == IndexDef.IndexType.INVERTED || indexType == IndexType.NGRAM_BF; + return indexType == IndexDef.IndexType.INVERTED + || indexType == IndexType.NGRAM_BF + || indexType == IndexType.ANN; } // Whether the index can be added in light mode // cloud mode supports light add for ngram_bf index and non-tokenized inverted index (parser="none") - // local mode supports light add for both inverted index and ngram_bf index + // local mode supports light add for inverted index, ann index and ngram_bf index // the rest of the index types do not support light add public boolean isLightAddIndexSupported(boolean enableAddIndexForNewData) { if (Config.isCloudMode()) { + // FIXME: ann index? if (indexType == IndexDef.IndexType.INVERTED) { return isInvertedIndexParserNone() && enableAddIndexForNewData; } else if (indexType == IndexDef.IndexType.NGRAM_BF) { @@ -206,7 +209,7 @@ public boolean isLightAddIndexSupported(boolean enableAddIndexForNewData) { return false; } return (indexType == IndexDef.IndexType.NGRAM_BF && enableAddIndexForNewData) - || (indexType == IndexDef.IndexType.INVERTED); + || (indexType == IndexDef.IndexType.INVERTED) || (indexType == IndexDef.IndexType.ANN); } public String getInvertedIndexCustomAnalyzer() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java index cf8330e7c6da1b..70779a7ef2cc0a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java @@ -144,7 +144,15 @@ public void validate(ConnectContext ctx) throws UserException { throw new AnalysisException(indexType + " index is not needed to build."); } - indexDef = new IndexDefinition(indexName, partitionNamesInfo, indexType); + if (indexType == IndexDef.IndexType.ANN) { + List columns = existedIdx.getColumns(); + Map properties = existedIdx.getProperties(); + String comment = existedIdx.getComment(); + indexDef = new IndexDefinition(indexName, false, columns, "ANN", properties, comment); + } else { + indexDef = new IndexDefinition(indexName, partitionNamesInfo, indexType); + } + if (!table.isPartitionedTable()) { List specifiedPartitions = indexDef.getPartitionNames(); if (!specifiedPartitions.isEmpty()) { @@ -152,10 +160,7 @@ public void validate(ConnectContext ctx) throws UserException { + " is not partitioned, cannot build index with partitions."); } } - if (indexDef.getIndexType() == IndexDef.IndexType.ANN) { - throw new AnalysisException( - "ANN index can only be created during table creation, not through BUILD INDEX."); - } + indexDef.validate(); this.index = existedIdx.clone(); } From 98fea6f409f47189e8f9c78a3a07de00f314e9ac Mon Sep 17 00:00:00 2001 From: uchenily Date: Tue, 2 Sep 2025 17:11:00 +0800 Subject: [PATCH 3/8] regression test --- .../ann_index_p0/build_ann_index_test.groovy | 108 ++++++++++++++++++ .../ann_index_p0/create_ann_index_test.groovy | 23 ++-- 2 files changed, 118 insertions(+), 13 deletions(-) create mode 100644 regression-test/suites/ann_index_p0/build_ann_index_test.groovy diff --git a/regression-test/suites/ann_index_p0/build_ann_index_test.groovy b/regression-test/suites/ann_index_p0/build_ann_index_test.groovy new file mode 100644 index 00000000000000..fe0c88ace11276 --- /dev/null +++ b/regression-test/suites/ann_index_p0/build_ann_index_test.groovy @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("build_ann_index_test") { + // prepare test table + def timeout = 30000 + def delta_time = 1000 + def alter_res = "null" + def useTime = 0 + + def wait_for_latest_op_on_table_finish = { tableName, opTimeout -> + for(int t = delta_time; t <= opTimeout; t += delta_time){ + alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = "${tableName}" ORDER BY CreateTime DESC LIMIT 1;""" + alter_res = alter_res.toString() + if(alter_res.contains("FINISHED")) { + sleep(3000) // wait change table state to normal + logger.info(tableName + " latest alter job finished, detail: " + alter_res) + break + } + useTime = t + sleep(delta_time) + } + assertTrue(useTime <= opTimeout, "wait_for_latest_op_on_table_finish timeout") + } + + def wait_for_last_build_index_on_table_finish = { tableName, opTimeout -> + for(int t = delta_time; t <= opTimeout; t += delta_time){ + alter_res = sql """SHOW BUILD INDEX WHERE TableName = "${tableName}" ORDER BY JobId """ + + if (alter_res.size() == 0) { + logger.info(tableName + " last index job finished") + return "SKIPPED" + } + if (alter_res.size() > 0) { + def last_job_state = alter_res[alter_res.size()-1][7]; + if (last_job_state == "FINISHED" || last_job_state == "CANCELLED") { + sleep(3000) // wait change table state to normal + logger.info(tableName + " last index job finished, state: " + last_job_state + ", detail: " + alter_res) + return last_job_state; + } + } + useTime = t + sleep(delta_time) + } + logger.info("wait_for_last_build_index_on_table_finish debug: " + alter_res) + assertTrue(useTime <= opTimeout, "wait_for_last_build_index_on_table_finish timeout") + return "wait_timeout" + } + + sql "set enable_common_expr_pushdown=true;" + sql "drop table if exists table_build_ann_index_test;" + def tableName = "table_build_ann_index_test" + + // case 1: create table -- insert data -- create index -- build index + sql """ + CREATE TABLE `table_build_ann_index_test` ( + `id` int NOT NULL COMMENT "", + `embedding` array NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`id`) COMMENT "OLAP" + DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + INSERT INTO table_build_ann_index_test (id, embedding) VALUES + (0, [39.906116, 10.495334, 54.08394, 88.67262, 55.243687, 10.162686, 36.335983, 38.684258]), + (1, [62.759315, 97.15586, 25.832521, 39.604908, 88.76715, 72.64085, 9.688437, 17.721428]), + (2, [15.447449, 59.7771, 65.54516, 12.973712, 99.685135, 72.080734, 85.71118, 99.35976]), + (3, [72.26747, 46.42257, 32.368374, 80.50209, 5.777631, 98.803314, 7.0915947, 68.62693]), + (4, [22.098177, 74.10027, 63.634556, 4.710955, 12.405106, 79.39356, 63.014366, 68.67834]), + (5, [27.53003, 72.1106, 50.891026, 38.459953, 68.30715, 20.610682, 94.806274, 45.181377]), + (6, [77.73215, 64.42907, 71.50025, 43.85641, 94.42648, 50.04773, 65.12575, 68.58207]), + (7, [2.1537063, 82.667885, 16.171143, 71.126656, 5.335274, 40.286068, 11.943586, 3.69409]), + (8, [54.435013, 56.800594, 59.335514, 55.829235, 85.46627, 33.388138, 11.076194, 20.480877]), + (9, [76.197945, 60.623528, 84.229805, 31.652937, 71.82595, 48.04684, 71.29212, 30.282396]); + """ + + // CREATE INDEX + sql """ + CREATE INDEX idx_test_ann ON table_build_ann_index_test(`embedding`) USING ANN PROPERTIES( + "index_type"="hnsw", + "metric_type"="l2_distance", + "dim"="8" + ); + """ + wait_for_latest_op_on_table_finish(tableName, timeout) + + // BUILD INDEX + sql "BUILD INDEX idx_test_ann ON table_build_ann_index_test;" + wait_for_last_build_index_on_table_finish(tableName, timeout) +} diff --git a/regression-test/suites/ann_index_p0/create_ann_index_test.groovy b/regression-test/suites/ann_index_p0/create_ann_index_test.groovy index 7313b452bcc7c8..ac5dd72220dc07 100644 --- a/regression-test/suites/ann_index_p0/create_ann_index_test.groovy +++ b/regression-test/suites/ann_index_p0/create_ann_index_test.groovy @@ -17,7 +17,6 @@ suite("create_ann_index_test") { sql "set enable_common_expr_pushdown=true;" - // Test that CREATE INDEX for ANN is not supported sql "drop table if exists tbl_not_null" sql """ CREATE TABLE `tbl_not_null` ( @@ -31,16 +30,14 @@ suite("create_ann_index_test") { ); """ - test { - sql """ - CREATE INDEX idx_test_ann ON tbl_not_null(`embedding`) USING ANN PROPERTIES( - "index_type"="hnsw", - "metric_type"="l2_distance", - "dim"="1" - ); - """ - exception "ANN index can only be created during table creation, not through CREATE INDEX" - } + // Test that CREATE INDEX for ANN is supported + sql """ + CREATE INDEX idx_test_ann ON tbl_not_null(`embedding`) USING ANN PROPERTIES( + "index_type"="hnsw", + "metric_type"="l2_distance", + "dim"="1" + ); + """ // Test cases for creating tables with ANN indexes @@ -343,7 +340,7 @@ suite("create_ann_index_test") { """ sql "drop table if exists tbl_efconstruction" - + test { sql """ CREATE TABLE tbl_efconstruction ( @@ -389,4 +386,4 @@ suite("create_ann_index_test") { exception "ANN index is not supported in index format V1" } -} \ No newline at end of file +} From 2aac5a8c2bc9969f26b9f8f65b926df3f019c152 Mon Sep 17 00:00:00 2001 From: uchenily Date: Wed, 3 Sep 2025 15:37:27 +0800 Subject: [PATCH 4/8] fix test --- be/test/olap/index_builder_test.cpp | 187 ++++++++++++++++++ .../vector_search/ann_index_writer_test.cpp | 55 ++++++ .../ann_index_p0/build_ann_index_test.groovy | 4 + 3 files changed, 246 insertions(+) diff --git a/be/test/olap/index_builder_test.cpp b/be/test/olap/index_builder_test.cpp index afe3a4b8e4dfcb..cb57b9ebc2c087 100644 --- a/be/test/olap/index_builder_test.cpp +++ b/be/test/olap/index_builder_test.cpp @@ -110,6 +110,31 @@ class IndexBuilderTest : public ::testing::Test { tablet_schema->append_column(column_2); } + TabletSchemaSPtr create_ann_tablet_schema() { + TabletSchemaSPtr tablet_schema = std::make_shared(); + TabletSchemaPB tablet_schema_pb; + tablet_schema_pb.set_keys_type(DUP_KEYS); + tablet_schema->init_from_pb(tablet_schema_pb); + // Set basic properties of TabletSchema directly + tablet_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V2; + + TabletColumn array_column; + array_column.set_name("arr1"); + array_column.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY); + array_column.set_unique_id(1); + array_column.set_length(0); + array_column.set_index_length(0); + array_column.set_is_nullable(false); + + TabletColumn child_column; + child_column.set_name("arr_sub_float"); + child_column.set_type(FieldType::OLAP_FIELD_TYPE_FLOAT); + child_column.set_length(INT_MAX); + array_column.add_sub_column(child_column); + tablet_schema->append_column(array_column); + return tablet_schema; + } + TabletMetaSharedPtr create_tablet_meta() { TabletMetaPB tablet_meta_pb; tablet_meta_pb.set_table_id(1); @@ -253,6 +278,7 @@ TEST_F(IndexBuilderTest, DropIndexTest) { // 7. Prepare index for dropping TOlapTableIndex drop_index; + drop_index.index_type = TIndexType::INVERTED; drop_index.index_id = 1; drop_index.columns.emplace_back("k1"); _alter_indexes.push_back(drop_index); @@ -312,6 +338,167 @@ TEST_F(IndexBuilderTest, DropIndexTest) { //EXPECT_FALSE(tablet_schema->has_inverted_index_with_index_id(1)); } +TEST_F(IndexBuilderTest, AnnIndexTest) { + // prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(15676); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + RowsetSharedPtr rowset; + + // Create test ann index properties + std::map properties; + properties["index_type"] = "hnsw"; + properties["metric_type"] = "l2_distance"; + properties["dim"] = "4"; + properties["max_degree"] = "16"; + + // First add an initial index to the schema (for arr1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "arr1_index"; + initial_index._index_type = IndexType::ANN; + initial_index._col_unique_ids.push_back(1); // unique_id for arr1 + initial_index._properties = properties; + + _tablet_schema = create_ann_tablet_schema(); + _tablet_schema->append_index(std::move(initial_index)); + + // 3. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15676); + writer_context.tablet_id = 15676; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // Write data to the rowset + { + vectorized::DataTypePtr inner_float = std::make_shared(); + vectorized::DataTypePtr array_type = + std::make_shared(inner_float); + + // create a MutableColumnPtr + vectorized::MutableColumnPtr col = array_type->create_column(); + // row0 + { + vectorized::Array arr; + arr.push_back(vectorized::Field::create_field(1.0F)); + arr.push_back(vectorized::Field::create_field(2.0F)); + arr.push_back(vectorized::Field::create_field(3.0F)); + arr.push_back(vectorized::Field::create_field(4.0F)); + col->insert(vectorized::Field::create_field(arr)); + } + // row1 + { + vectorized::Array arr; + arr.push_back(vectorized::Field::create_field(5.0F)); + arr.push_back(vectorized::Field::create_field(6.0F)); + arr.push_back(vectorized::Field::create_field(7.0F)); + arr.push_back(vectorized::Field::create_field(8.0F)); + col->insert(vectorized::Field::create_field(arr)); + } + // wrap the constructed column into a ColumnWithTypeAndName + vectorized::ColumnPtr column_array = std::move(col); + vectorized::ColumnWithTypeAndName type_and_name(column_array, array_type, "arr1"); + + // construct Block (containing only this column), with 2 rows + vectorized::Block block; + block.insert(type_and_name); + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // Verify index exists before dropping + EXPECT_TRUE(_tablet_schema->has_ann_index()); + EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1)); + + // Prepare index for dropping + TOlapTableIndex drop_index; + drop_index.index_type = TIndexType::type::ANN; + drop_index.index_id = 1; + drop_index.index_name = "arr1_index"; + drop_index.columns.emplace_back("arr1"); + _alter_indexes.clear(); + _alter_indexes.push_back(drop_index); + + // Create IndexBuilder with drop operation + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, true); + + // Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // Execute drop operation + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // Verify the index has been removed + // check old tablet path and new tablet path + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); + EXPECT_TRUE(exists); + + // Check files in old and new directories + std::vector files; + bool dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files, &dir_exists).ok()); + EXPECT_TRUE(dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : files) { + std::string filename = file.file_name; + if (filename.find("15676_0.idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find("15676_0.dat") != std::string::npos) { + old_dat_file_count++; + } + if (filename.find("020000000000000100000000000000000000000000000000_0.idx") != + std::string::npos) { + new_idx_file_count++; + } + if (filename.find("020000000000000100000000000000000000000000000000_0.dat") != + std::string::npos) { + new_dat_file_count++; + } + } + // The index should have been removed + EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file before drop"; + EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file before drop"; + EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file after drop"; + EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file after drop"; +} + TEST_F(IndexBuilderTest, BuildIndexAfterWritingDataTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14673); diff --git a/be/test/olap/vector_search/ann_index_writer_test.cpp b/be/test/olap/vector_search/ann_index_writer_test.cpp index 37d390379c239d..b49a7487b0dc21 100644 --- a/be/test/olap/vector_search/ann_index_writer_test.cpp +++ b/be/test/olap/vector_search/ann_index_writer_test.cpp @@ -26,6 +26,7 @@ #include #include +#include "olap/field.h" #include "olap/rowset/segment_v2/index_file_writer.h" #include "olap/rowset/segment_v2/inverted_index_fs_directory.h" #include "olap/tablet_schema.h" @@ -88,6 +89,7 @@ class AnnIndexWriterTest : public ::testing::Test { // Create tablet index _tablet_index = std::make_unique(); + _tablet_index->_index_type = IndexType::ANN; _tablet_index->_properties = _properties; _tablet_index->_index_id = 1; _tablet_index->_index_name = "test_ann_index"; @@ -397,6 +399,7 @@ TEST_F(AnnIndexWriterTest, TestInvalidMetricType) { properties["metric_type"] = "invalid_metric"; auto tablet_index = std::make_unique(); + tablet_index->_index_type = IndexType::ANN; tablet_index->_properties = properties; tablet_index->_index_id = 1; @@ -473,4 +476,56 @@ TEST_F(AnnIndexWriterTest, TestAddMoreThanChunkSize) { EXPECT_TRUE(status.ok()); } +TEST_F(AnnIndexWriterTest, TestCreateFromIndexColumnWriter) { + TabletSchemaSPtr tablet_schema = std::make_shared(); + TabletSchemaPB tablet_schema_pb; + tablet_schema_pb.set_keys_type(DUP_KEYS); + tablet_schema->init_from_pb(tablet_schema_pb); + + TabletColumn array_column; + array_column.set_name("arr1"); + array_column.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY); + array_column.set_length(0); + array_column.set_index_length(0); + array_column.set_is_nullable(false); + + TabletColumn child_column; + child_column.set_name("arr_sub_float"); + child_column.set_type(FieldType::OLAP_FIELD_TYPE_FLOAT); + child_column.set_length(INT_MAX); + array_column.add_sub_column(child_column); + tablet_schema->append_column(array_column); + + // Get field for array column + std::unique_ptr field(FieldFactory::create(array_column)); + ASSERT_NE(field.get(), nullptr); + + auto fs_dir = std::make_shared(); + fs_dir->init(doris::io::global_local_filesystem(), "./ut_dir/tmp_vector_search", nullptr); + EXPECT_CALL(*_index_file_writer, open(testing::_)).WillOnce(testing::Return(fs_dir)); + + // Create column writer + std::unique_ptr column_writer; + auto status = IndexColumnWriter::create(field.get(), &column_writer, _index_file_writer.get(), + _tablet_index.get()); + EXPECT_TRUE(status.ok()); + + // Prepare test data + const size_t num_rows = 3; + std::vector vectors = { + 1.0f, 2.0f, 3.0f, 4.0f, // Row 0 + 5.0f, 6.0f, 7.0f, 8.0f, // Row 1 + 9.0f, 10.0f, 11.0f, 12.0f // Row 2 + }; + + std::vector offsets = {0, 4, 8, 12}; // Each row has 4 elements + + status = column_writer->add_array_values(sizeof(float), vectors.data(), nullptr, + reinterpret_cast(offsets.data()), + num_rows); + EXPECT_TRUE(status.ok()); + + ASSERT_TRUE(column_writer->finish().ok()); +} + } // namespace doris::segment_v2 diff --git a/regression-test/suites/ann_index_p0/build_ann_index_test.groovy b/regression-test/suites/ann_index_p0/build_ann_index_test.groovy index fe0c88ace11276..e8de0d3d2d1702 100644 --- a/regression-test/suites/ann_index_p0/build_ann_index_test.groovy +++ b/regression-test/suites/ann_index_p0/build_ann_index_test.groovy @@ -16,6 +16,10 @@ // under the License. suite("build_ann_index_test") { + if (isCloudMode()) { + return // TODO enable this case after enable light index in cloud mode + } + // prepare test table def timeout = 30000 def delta_time = 1000 From e4717feb0ff755ab29b62d7c17262e050d184abe Mon Sep 17 00:00:00 2001 From: uchenily Date: Wed, 3 Sep 2025 22:51:22 +0800 Subject: [PATCH 5/8] temp --- be/src/olap/task/index_builder.cpp | 26 +--- be/test/olap/index_builder_test.cpp | 184 +++++++++++++++++++++++++++- 2 files changed, 185 insertions(+), 25 deletions(-) diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 2a4cd80857ca10..ecfbd2e007b951 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -147,24 +147,8 @@ Status IndexBuilder::update_inverted_index_info() { if (!ann_index) { continue; } - if (output_rs_tablet_schema->get_inverted_index_storage_format() == - InvertedIndexStorageFormatPB::V1) { - const auto& fs = io::global_local_filesystem(); - - for (int seg_id = 0; seg_id < num_segments; seg_id++) { - auto seg_path = - local_segment_path(_tablet->tablet_path(), - input_rowset->rowset_id().to_string(), seg_id); - auto index_path = InvertedIndexDescriptor::get_index_file_path_v1( - InvertedIndexDescriptor::get_index_file_path_prefix(seg_path), - ann_index->index_id(), ann_index->get_index_suffix()); - int64_t index_size = 0; - RETURN_IF_ERROR(fs->file_size(index_path, &index_size)); - VLOG_DEBUG << "inverted index file:" << index_path - << " size:" << index_size; - drop_index_size += index_size; - } - } + DCHECK(output_rs_tablet_schema->get_inverted_index_storage_format() != + InvertedIndexStorageFormatPB::V1); _dropped_inverted_indexes.push_back(*ann_index); // ATTN: DO NOT REMOVE INDEX AFTER OUTPUT_ROWSET_WRITER CREATED. // remove dropped index_meta from output rowset tablet schema @@ -224,10 +208,8 @@ Status IndexBuilder::update_inverted_index_info() { if (exist_index->is_same_except_id(&index)) { LOG(WARNING) << fmt::format( "column: {} has a exist ann index, but the index id not " - "equal " - "request's index id, , exist index id: {}, request's index id: " - "{}, " - "remove exist index in new output_rs_tablet_schema", + "equal request's index id, , exist index id: {}, request's index " + "id: {}, remove exist index in new output_rs_tablet_schema", column_uid, exist_index->index_id(), index.index_id()); without_index_uids.insert(exist_index->index_id()); output_rs_tablet_schema->remove_index(exist_index->index_id()); diff --git a/be/test/olap/index_builder_test.cpp b/be/test/olap/index_builder_test.cpp index cb57b9ebc2c087..469442791c345a 100644 --- a/be/test/olap/index_builder_test.cpp +++ b/be/test/olap/index_builder_test.cpp @@ -202,7 +202,7 @@ TEST_F(IndexBuilderTest, BasicBuildTest) { EXPECT_EQ(builder._alter_index_ids.size(), 1); } -TEST_F(IndexBuilderTest, DropIndexTest) { +TEST_F(IndexBuilderTest, DropInvertedIndexTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(15676); _tablet->_tablet_path = tablet_path; @@ -338,7 +338,7 @@ TEST_F(IndexBuilderTest, DropIndexTest) { //EXPECT_FALSE(tablet_schema->has_inverted_index_with_index_id(1)); } -TEST_F(IndexBuilderTest, AnnIndexTest) { +TEST_F(IndexBuilderTest, DropAnnIndexTest) { // prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(15676); _tablet->_tablet_path = tablet_path; @@ -499,7 +499,7 @@ TEST_F(IndexBuilderTest, AnnIndexTest) { EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file after drop"; } -TEST_F(IndexBuilderTest, BuildIndexAfterWritingDataTest) { +TEST_F(IndexBuilderTest, BuildInvertedIndexAfterWritingDataTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14673); _tablet->_tablet_path = tablet_path; @@ -646,6 +646,184 @@ TEST_F(IndexBuilderTest, BuildIndexAfterWritingDataTest) { //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(2)); } +TEST_F(IndexBuilderTest, BuildAnnIndexAfterWritingDataTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14686); + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 100; + + // 2. Use ANN schema with array column + auto ann_schema = create_ann_tablet_schema(); + + // 3. Update schema in tablet meta + TabletMetaPB tablet_meta_pb; + _tablet_meta->to_meta_pb(&tablet_meta_pb); + + TabletSchemaPB ann_schema_pb; + ann_schema->to_schema_pb(&ann_schema_pb); + tablet_meta_pb.mutable_schema()->CopyFrom(ann_schema_pb); + + _tablet_meta->init_from_pb(tablet_meta_pb); + + // 4. Reinitialize tablet to use new schema + _tablet = std::make_shared(*_engine_ref, _tablet_meta, _data_dir.get()); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(_tablet->init().ok()); + + _tablet_schema = ann_schema; + + // 3. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15686); + writer_context.tablet_id = 15686; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15686); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); + + // 4. Create a rowset writer + auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); + ASSERT_TRUE(res.has_value()) << res.error(); + auto rowset_writer = std::move(res).value(); + + // 5. Write data to the rowset with float arrays + { + vectorized::DataTypePtr inner_float = std::make_shared(); + vectorized::DataTypePtr array_type = + std::make_shared(inner_float); + + // create a MutableColumnPtr + vectorized::MutableColumnPtr col = array_type->create_column(); + + // Add data for each row - arrays of 4 floats (matching dim=4 in properties) + for (int i = 0; i < num_rows; ++i) { + vectorized::Array arr; + // Create 4-dimensional float vectors + arr.push_back(vectorized::Field::create_field(static_cast(i % 10))); + arr.push_back( + vectorized::Field::create_field(static_cast((i + 1) % 10))); + arr.push_back( + vectorized::Field::create_field(static_cast((i + 2) % 10))); + arr.push_back( + vectorized::Field::create_field(static_cast((i + 3) % 10))); + col->insert(vectorized::Field::create_field(arr)); + } + + // wrap the constructed column into a ColumnWithTypeAndName + vectorized::ColumnPtr column_array = std::move(col); + vectorized::ColumnWithTypeAndName type_and_name(column_array, array_type, "arr1"); + + // construct Block (containing only this column), with num_rows rows + vectorized::Block block; + block.insert(type_and_name); + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 6. Prepare ANN index for building + std::map properties; + properties["index_type"] = "hnsw"; + properties["metric_type"] = "l2_distance"; + properties["dim"] = "4"; + properties["max_degree"] = "16"; + + TOlapTableIndex ann_index; + ann_index.__set_index_id(1); + ann_index.__set_columns({"arr1"}); + ann_index.__set_index_name("arr1_ann_index"); + ann_index.__set_index_type(TIndexType::ANN); + // NOTE: wrong way, it doesn't set __isset.properties flag + // ann_index.properties = properties; + ann_index.__set_properties(properties); + _alter_indexes.push_back(ann_index); + + // 7. Create IndexBuilder + IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, + _alter_indexes, false); + + // 8. Initialize and verify + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 9. Build ANN index + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 10. Check paths and files + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15686); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14686); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // Check files in old and new directories + std::vector old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int idx_file_count = 0; + int dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + dat_file_count++; + } + } + EXPECT_EQ(idx_file_count, 0) << "Old directory should contain exactly 0 .idx file"; + EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; +} + TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14675); From 8946c29d5a6d86f9d0d1eb77461d34ae257bfdb5 Mon Sep 17 00:00:00 2001 From: uchenily Date: Fri, 5 Sep 2025 09:53:57 +0800 Subject: [PATCH 6/8] has ann index --- .../rowset/segment_v2/segment_iterator.cpp | 46 +++++++++++-------- .../olap/rowset/segment_v2/segment_iterator.h | 5 +- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 7bbf97d1c4ac7d..592211e016e500 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -734,6 +734,13 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() { return Status::OK(); } +bool SegmentIterator::_column_has_ann_index(int32_t cid) { + bool has_ann_index = _index_iterators[cid] != nullptr && + _index_iterators[cid]->get_reader(AnnIndexReaderType::ANN); + + return has_ann_index; +} + Status SegmentIterator::_apply_ann_topn_predicate() { if (_ann_topn_runtime == nullptr) { return Status::OK(); @@ -743,7 +750,7 @@ Status SegmentIterator::_apply_ann_topn_predicate() { size_t src_col_idx = _ann_topn_runtime->get_src_column_idx(); ColumnId src_cid = _schema->column_id(src_col_idx); IndexIterator* ann_index_iterator = _index_iterators[src_cid].get(); - bool has_ann_index = ann_index_iterator != nullptr; + bool has_ann_index = _column_has_ann_index(src_cid); bool has_common_expr_push_down = !_common_expr_ctxs_push_down.empty(); bool has_column_predicate = std::any_of(_is_pred_column.begin(), _is_pred_column.end(), [](bool is_pred) { return is_pred; }); @@ -804,17 +811,9 @@ Status SegmentIterator::_apply_ann_topn_predicate() { vectorized::IColumn::MutablePtr result_column; std::unique_ptr> result_row_ids; segment_v2::AnnIndexStats ann_index_stats; - Status st = _ann_topn_runtime->evaluate_vector_ann_search(ann_index_iterator, &_row_bitmap, - rows_of_segment, result_column, - result_row_ids, ann_index_stats); - if (!st.ok()) { - if (_downgrade_without_index(st)) { - // fallback - return Status::OK(); - } else { - return st; - } - } + RETURN_IF_ERROR(_ann_topn_runtime->evaluate_vector_ann_search(ann_index_iterator, &_row_bitmap, + rows_of_segment, result_column, + result_row_ids, ann_index_stats)); VLOG_DEBUG << fmt::format("Ann topn filtered {} - {} = {} rows", pre_size, _row_bitmap.cardinality(), pre_size - _row_bitmap.cardinality()); @@ -1427,6 +1426,13 @@ Status SegmentIterator::_init_bitmap_index_iterators() { return Status::OK(); } for (auto cid : _schema->column_ids()) { + const auto& col = _opts.tablet_schema->column(cid); + int col_uid = col.unique_id() >= 0 ? col.unique_id() : col.parent_unique_id(); + // The column is not in this segment + if (!_segment->_tablet_schema->has_column_unique_id(col_uid)) { + continue; + } + if (_bitmap_index_iterators[cid] == nullptr) { RETURN_IF_ERROR(_segment->new_bitmap_index_iterator( _opts.tablet_schema->column(cid), _opts, &_bitmap_index_iterators[cid])); @@ -1489,14 +1495,14 @@ Status SegmentIterator::_init_index_iterators() { for (auto cid : _schema->column_ids()) { if (_index_iterators[cid] == nullptr) { const auto& column = _opts.tablet_schema->column(cid); - int32_t col_unique_id = - column.is_extracted_column() ? column.parent_unique_id() : column.unique_id(); - RETURN_IF_ERROR(_segment->new_index_iterator( - column, - _segment->_tablet_schema->ann_index(col_unique_id, column.suffix_path()), _opts, - &_index_iterators[cid])); - if (_index_iterators[cid] != nullptr) { - _index_iterators[cid]->set_context(_index_query_context); + const auto* index_meta = _segment->_tablet_schema->ann_index(column); + if (index_meta) { + RETURN_IF_ERROR(_segment->new_index_iterator(column, index_meta, _opts, + &_index_iterators[cid])); + + if (_index_iterators[cid] != nullptr) { + _index_iterators[cid]->set_context(_index_query_context); + } } } } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 2fdc265bfe1e3a..3beb9a30d49d91 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -174,7 +174,6 @@ class SegmentIterator : public RowwiseIterator { [[nodiscard]] Status _init_bitmap_index_iterators(); [[nodiscard]] Status _init_index_iterators(); - Status _apply_ann_topn_predicate(); // calculate row ranges that fall into requested key ranges using short key index [[nodiscard]] Status _get_row_ranges_by_keys(); [[nodiscard]] Status _prepare_seek(const StorageReadOptions::KeyRange& key_range); @@ -192,13 +191,17 @@ class SegmentIterator : public RowwiseIterator { // calculate row ranges that satisfy requested column conditions using various column index [[nodiscard]] Status _get_row_ranges_by_column_conditions(); [[nodiscard]] Status _get_row_ranges_from_conditions(RowRanges* condition_row_ranges); + [[nodiscard]] Status _apply_bitmap_index(); [[nodiscard]] Status _apply_inverted_index(); [[nodiscard]] Status _apply_inverted_index_on_column_predicate( ColumnPredicate* pred, std::vector& remaining_predicates, bool* continue_apply); + [[nodiscard]] Status _apply_ann_topn_predicate(); [[nodiscard]] Status _apply_index_expr(); + bool _column_has_fulltext_index(int32_t cid); + bool _column_has_ann_index(int32_t cid); bool _downgrade_without_index(Status res, bool need_remaining = false); inline bool _inverted_index_not_support_pred_type(const PredicateType& type); bool _is_literal_node(const TExprNodeType::type& node_type); From bf63e18833b7f7195a49f7ead3f9953ff6b125cf Mon Sep 17 00:00:00 2001 From: uchenily Date: Tue, 11 Nov 2025 10:09:59 +0800 Subject: [PATCH 7/8] fix beut --- be/test/olap/index_builder_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/test/olap/index_builder_test.cpp b/be/test/olap/index_builder_test.cpp index 469442791c345a..b0af40ff1f0cd5 100644 --- a/be/test/olap/index_builder_test.cpp +++ b/be/test/olap/index_builder_test.cpp @@ -661,7 +661,7 @@ TEST_F(IndexBuilderTest, BuildAnnIndexAfterWritingDataTest) { // 3. Update schema in tablet meta TabletMetaPB tablet_meta_pb; - _tablet_meta->to_meta_pb(&tablet_meta_pb); + _tablet_meta->to_meta_pb(&tablet_meta_pb, false); TabletSchemaPB ann_schema_pb; ann_schema->to_schema_pb(&ann_schema_pb); From 5927b67472b01732739d8157771b0ad00109e082 Mon Sep 17 00:00:00 2001 From: zhiqiang-hhhh Date: Wed, 19 Nov 2025 20:05:37 +0800 Subject: [PATCH 8/8] fix compile --- fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java | 1 - .../doris/nereids/trees/plans/commands/info/BuildIndexOp.java | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java index 41893ae3b4774b..35f4c2a14e4350 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java @@ -25,7 +25,6 @@ import org.apache.doris.common.util.PrintableMap; import org.apache.doris.common.util.SqlUtils; import org.apache.doris.nereids.trees.plans.commands.info.IndexDefinition; -import org.apache.doris.nereids.trees.plans.commands.info.IndexDefinition.IndexType; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.proto.OlapFile; import org.apache.doris.thrift.TIndexType; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java index 1698e69594d411..8d54ba9bd7b231 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BuildIndexOp.java @@ -143,7 +143,7 @@ public void validate(ConnectContext ctx) throws UserException { throw new AnalysisException(indexType + " index is not needed to build."); } - if (indexType == IndexDef.IndexType.ANN) { + if (indexType == IndexDefinition.IndexType.ANN) { List columns = existedIdx.getColumns(); Map properties = existedIdx.getProperties(); String comment = existedIdx.getComment();