diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index d6dd7c34b8c411..e0e1a82e05f021 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -66,7 +66,7 @@ add_library(Olap STATIC reader.cpp row_block.cpp row_cursor.cpp - rowset.cpp + segment_group.cpp run_length_byte_reader.cpp run_length_byte_writer.cpp run_length_integer_reader.cpp diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp index 59bbf18b7f4c1a..eb691d5fb009d9 100644 --- a/be/src/olap/base_compaction.cpp +++ b/be/src/olap/base_compaction.cpp @@ -28,7 +28,7 @@ #include "olap/column_data.h" #include "olap/olap_engine.h" #include "olap/olap_header.h" -#include "olap/rowset.h" +#include "olap/segment_group.h" #include "olap/olap_table.h" #include "olap/utils.h" #include "util/doris_metrics.h" @@ -119,7 +119,7 @@ OLAPStatus BaseCompaction::run() { DorisMetrics::base_compaction_deltas_total.increment(_need_merged_versions.size()); int64_t merge_bytes = 0; for (ColumnData* i_data : base_data_sources) { - merge_bytes += i_data->olap_index()->data_size(); + merge_bytes += i_data->segment_group()->data_size(); } DorisMetrics::base_compaction_bytes_total.increment(merge_bytes); } @@ -148,7 +148,7 @@ OLAPStatus BaseCompaction::run() { // 4. make new versions visable. // If success, remove files belong to old versions; // If fail, gc files belong to new versions. - vector unused_olap_indices; + vector unused_olap_indices; res = _update_header(row_count, &unused_olap_indices); if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to update header. table=" << _table->full_name() << ", " @@ -323,12 +323,12 @@ OLAPStatus BaseCompaction::_do_base_compaction(VersionHash new_base_version_hash vector* base_data_sources, uint64_t* row_count) { // 1. 生成新base文件对应的olap index - Rowset* new_base = new (std::nothrow) Rowset(_table.get(), + SegmentGroup* new_base = new (std::nothrow) SegmentGroup(_table.get(), _new_base_version, new_base_version_hash, false, 0, 0); if (new_base == NULL) { - OLAP_LOG_WARNING("fail to new Rowset."); + OLAP_LOG_WARNING("fail to new SegmentGroup."); return OLAP_ERR_MALLOC_ERROR; } @@ -398,7 +398,7 @@ OLAPStatus BaseCompaction::_do_base_compaction(VersionHash new_base_version_hash // Check row num changes uint64_t source_rows = 0; for (ColumnData* i_data : *base_data_sources) { - source_rows += i_data->olap_index()->num_rows(); + source_rows += i_data->segment_group()->num_rows(); } bool row_nums_check = config::row_nums_check; if (row_nums_check) { @@ -423,7 +423,7 @@ OLAPStatus BaseCompaction::_do_base_compaction(VersionHash new_base_version_hash return OLAP_SUCCESS; } -OLAPStatus BaseCompaction::_update_header(uint64_t row_count, vector* unused_olap_indices) { +OLAPStatus BaseCompaction::_update_header(uint64_t row_count, vector* unused_olap_indices) { WriteLock wrlock(_table->get_header_lock_ptr()); vector unused_versions; _get_unused_versions(&unused_versions); @@ -464,11 +464,11 @@ OLAPStatus BaseCompaction::_update_header(uint64_t row_count, vector* u return OLAP_SUCCESS; } -void BaseCompaction::_delete_old_files(vector* unused_indices) { +void BaseCompaction::_delete_old_files(vector* unused_indices) { if (!unused_indices->empty()) { OLAPEngine* unused_index = OLAPEngine::get_instance(); - for (vector::iterator it = unused_indices->begin(); + for (vector::iterator it = unused_indices->begin(); it != unused_indices->end(); ++it) { unused_index->add_unused_index(*it); } @@ -477,7 +477,7 @@ void BaseCompaction::_delete_old_files(vector* unused_indices) { void BaseCompaction::_garbage_collection() { // 清理掉已生成的版本文件 - for (vector::iterator it = _new_olap_indices.begin(); + for (vector::iterator it = _new_olap_indices.begin(); it != _new_olap_indices.end(); ++it) { (*it)->delete_all_files(); SAFE_DELETE(*it); diff --git a/be/src/olap/base_compaction.h b/be/src/olap/base_compaction.h index 72bf40522af294..f0fc4510b721e4 100644 --- a/be/src/olap/base_compaction.h +++ b/be/src/olap/base_compaction.h @@ -28,7 +28,7 @@ namespace doris { class ColumnData; -class Rowset; +class SegmentGroup; // @brief 实现对START_BASE_COMPACTION命令的处理逻辑,并返回处理结果 class BaseCompaction { @@ -94,23 +94,23 @@ class BaseCompaction { // 更新Header使得修改对外可见 // 输出参数: - // - unused_olap_indices: 需要被物理删除的Rowset* + // - unused_olap_indices: 需要被物理删除的SegmentGroup* // // 返回值: // - 如果执行成功,则返回OLAP_SUCCESS; // - 其它情况下,返回相应的错误码 OLAPStatus _update_header(uint64_t row_count, - std::vector* unused_olap_indices); + std::vector* unused_olap_indices); - // 删除不再使用的Rowset文件 + // 删除不再使用的SegmentGroup文件 // // 输入参数: - // - unused_olap_indices: 需要被物理删除的Rowset* + // - unused_olap_indices: 需要被物理删除的SegmentGroup* // // 返回值: // - 如果执行成功,则返回OLAP_SUCCESS; // - 其它情况下,返回相应的错误码 - void _delete_old_files(std::vector* unused_indices); + void _delete_old_files(std::vector* unused_indices); // 其它函数执行失败时,调用该函数进行清理工作 void _garbage_collection(); @@ -173,8 +173,8 @@ class BaseCompaction { Version _latest_cumulative; // 在此次base compaction执行过程中,将被合并的cumulative文件版本 std::vector _need_merged_versions; - // 需要新增的版本对应的Rowset - std::vector _new_olap_indices; + // 需要新增的版本对应的SegmentGroup + std::vector _new_olap_indices; bool _base_compaction_locked; diff --git a/be/src/olap/column_data.cpp b/be/src/olap/column_data.cpp index 09d133ed36b968..00e70213a1a1c8 100644 --- a/be/src/olap/column_data.cpp +++ b/be/src/olap/column_data.cpp @@ -24,13 +24,13 @@ namespace doris { -ColumnData* ColumnData::create(Rowset* index) { +ColumnData* ColumnData::create(SegmentGroup* segment_group) { ColumnData* data = NULL; - DataFileType file_type = index->table()->data_file_type(); + DataFileType file_type = segment_group->table()->data_file_type(); switch (file_type) { case COLUMN_ORIENTED_FILE: - data = new(std::nothrow) ColumnData(index); + data = new(std::nothrow) ColumnData(segment_group); break; default: @@ -40,9 +40,9 @@ ColumnData* ColumnData::create(Rowset* index) { return data; } -ColumnData::ColumnData(Rowset* olap_index) +ColumnData::ColumnData(SegmentGroup* segment_group) : _data_file_type(COLUMN_ORIENTED_FILE), - _olap_index(olap_index), + _segment_group(segment_group), _eof(false), _conditions(NULL), _col_predicates(NULL), @@ -50,19 +50,19 @@ ColumnData::ColumnData(Rowset* olap_index) _runtime_state(NULL), _is_using_cache(false), _segment_reader(NULL) { - _table = olap_index->table(); + _table = segment_group->table(); _num_rows_per_block = _table->num_rows_per_row_block(); } ColumnData::~ColumnData() { - _olap_index->release(); + _segment_group->release(); SAFE_DELETE(_segment_reader); } OLAPStatus ColumnData::init() { - _olap_index->acquire(); + _segment_group->acquire(); - auto res = _short_key_cursor.init(_olap_index->short_key_fields()); + auto res = _short_key_cursor.init(_segment_group->short_key_fields()); if (res != OLAP_SUCCESS) { LOG(WARNING) << "key cursor init failed, table:" << _table->id() << ", res:" << res; @@ -105,7 +105,7 @@ OLAPStatus ColumnData::_next_row(const RowCursor** row, bool without_filter) { } else { DCHECK(_read_block->block_status() == DEL_PARTIAL_SATISFIED); bool row_del_filter = _delete_handler.is_filter_data( - _olap_index->version().second, _cursor); + _segment_group->version().second, _cursor); if (!row_del_filter) { *row = &_cursor; return OLAP_SUCCESS; @@ -130,16 +130,16 @@ OLAPStatus ColumnData::_seek_to_block(const RowBlockPosition& block_pos, bool wi // TODO(zc): _segment_readers??? // open segment reader if needed if (_segment_reader == nullptr || block_pos.segment != _current_segment) { - if (block_pos.segment >= _olap_index->num_segments() || + if (block_pos.segment >= _segment_group->num_segments() || (_end_key_is_set && block_pos.segment > _end_segment)) { _eof = true; return OLAP_ERR_DATA_EOF; } SAFE_DELETE(_segment_reader); std::string file_name; - file_name = olap_index()->construct_data_file_path(olap_index()->rowset_id(), block_pos.segment); + file_name = segment_group()->construct_data_file_path(segment_group()->segment_group_id(), block_pos.segment); _segment_reader = new(std::nothrow) SegmentReader( - file_name, _table, olap_index(), block_pos.segment, + file_name, _table, segment_group(), block_pos.segment, _seek_columns, _load_bf_columns, _conditions, _col_predicates, _delete_handler, _delete_status, _runtime_state, _stats); if (_segment_reader == nullptr) { @@ -170,7 +170,7 @@ OLAPStatus ColumnData::_seek_to_block(const RowBlockPosition& block_pos, bool wi OLAPStatus ColumnData::_find_position_by_short_key( const RowCursor& key, bool find_last_key, RowBlockPosition *position) { RowBlockPosition tmp_pos; - auto res = _olap_index->find_short_key(key, &_short_key_cursor, find_last_key, &tmp_pos); + auto res = _segment_group->find_short_key(key, &_short_key_cursor, find_last_key, &tmp_pos); if (res != OLAP_SUCCESS) { if (res == OLAP_ERR_INDEX_EOF) { res = OLAP_ERR_DATA_EOF; @@ -179,7 +179,7 @@ OLAPStatus ColumnData::_find_position_by_short_key( } return res; } - res = olap_index()->find_prev_point(tmp_pos, position); + res = segment_group()->find_prev_point(tmp_pos, position); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("find prev row block failed. [res=%d]", res); return res; @@ -190,7 +190,7 @@ OLAPStatus ColumnData::_find_position_by_short_key( OLAPStatus ColumnData::_find_position_by_full_key( const RowCursor& key, bool find_last_key, RowBlockPosition *position) { RowBlockPosition tmp_pos; - auto res = _olap_index->find_short_key(key, &_short_key_cursor, false, &tmp_pos); + auto res = _segment_group->find_short_key(key, &_short_key_cursor, false, &tmp_pos); if (res != OLAP_SUCCESS) { if (res == OLAP_ERR_INDEX_EOF) { res = OLAP_ERR_DATA_EOF; @@ -200,14 +200,14 @@ OLAPStatus ColumnData::_find_position_by_full_key( return res; } RowBlockPosition start_position; - res = olap_index()->find_prev_point(tmp_pos, &start_position); + res = segment_group()->find_prev_point(tmp_pos, &start_position); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("find prev row block failed. [res=%d]", res); return res; } RowBlockPosition end_position; - res = _olap_index->find_short_key(key, &_short_key_cursor, true, &end_position); + res = _segment_group->find_short_key(key, &_short_key_cursor, true, &end_position); if (res != OLAP_SUCCESS) { if (res == OLAP_ERR_INDEX_EOF) { res = OLAP_ERR_DATA_EOF; @@ -226,7 +226,7 @@ OLAPStatus ColumnData::_find_position_by_full_key( OLAPIndexOffset index_offset; index_offset.segment = _end_segment; index_offset.offset = _end_block; - res = olap_index()->get_row_block_position(index_offset, &end_position); + res = segment_group()->get_row_block_position(index_offset, &end_position); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("fail to get row block position. [res=%d]", res); return res; @@ -235,7 +235,7 @@ OLAPStatus ColumnData::_find_position_by_full_key( } // ????end_position - uint32_t distance = olap_index()->compute_distance(start_position, end_position); + uint32_t distance = segment_group()->compute_distance(start_position, end_position); BinarySearchIterator it_start(0u); BinarySearchIterator it_end(distance + 1); @@ -243,7 +243,7 @@ OLAPStatus ColumnData::_find_position_by_full_key( ColumnDataComparator comparator( start_position, this, - olap_index()); + segment_group()); try { if (!find_last_key) { it_result = std::lower_bound(it_start, it_end, key, comparator); @@ -261,7 +261,7 @@ OLAPStatus ColumnData::_find_position_by_full_key( it_result -= 1; } - if (OLAP_SUCCESS != (res = olap_index()->advance_row_block(*it_result, + if (OLAP_SUCCESS != (res = segment_group()->advance_row_block(*it_result, &start_position))) { OLAP_LOG_WARNING("fail to advance row_block. [res=%d it_offset=%u " "start_pos='%s']", res, *it_result, @@ -490,16 +490,16 @@ OLAPStatus ColumnData::get_first_row_block(RowBlock** row_block) { return res; } - // to be same with OLAPData, we use olap_index. + // to be same with OLAPData, we use segment_group. RowBlockPosition block_pos; - res = olap_index()->find_first_row_block(&block_pos); + res = segment_group()->find_first_row_block(&block_pos); if (res != OLAP_SUCCESS) { if (res == OLAP_ERR_INDEX_EOF) { *row_block = nullptr; _eof = true; return res; } - OLAP_LOG_WARNING("fail to find first row block with Rowset."); + OLAP_LOG_WARNING("fail to find first row block with SegmentGroup."); return res; } @@ -545,11 +545,11 @@ bool ColumnData::delta_pruning_filter() { return true; } - if (!_olap_index->has_column_statistics()) { + if (!_segment_group->has_column_statistics()) { return false; } - return _conditions->delta_pruning_filter(_olap_index->get_column_statistics()); + return _conditions->delta_pruning_filter(_segment_group->get_column_statistics()); } int ColumnData::delete_pruning_filter() { @@ -559,9 +559,9 @@ int ColumnData::delete_pruning_filter() { return DEL_NOT_SATISFIED; } - if (false == _olap_index->has_column_statistics()) { + if (false == _segment_group->has_column_statistics()) { /* - * if olap_index has no column statistics, we cannot judge whether the data can be filtered or not + * if segment_group has no column statistics, we cannot judge whether the data can be filtered or not */ return DEL_PARTIAL_SATISFIED; } @@ -576,12 +576,12 @@ int ColumnData::delete_pruning_filter() { bool del_partial_stastified = false; bool del_stastified = false; for (auto& delete_condtion : _delete_handler.get_delete_conditions()) { - if (delete_condtion.filter_version <= _olap_index->version().first) { + if (delete_condtion.filter_version <= _segment_group->version().first) { continue; } Conditions* del_cond = delete_condtion.del_cond; - int del_ret = del_cond->delete_pruning_filter(_olap_index->get_column_statistics()); + int del_ret = del_cond->delete_pruning_filter(_segment_group->get_column_statistics()); if (DEL_SATISFIED == del_ret) { del_stastified = true; break; diff --git a/be/src/olap/column_data.h b/be/src/olap/column_data.h index 5865e53a50b8c2..daee11e4229473 100644 --- a/be/src/olap/column_data.h +++ b/be/src/olap/column_data.h @@ -26,7 +26,7 @@ #include "olap/delete_handler.h" #include "olap/olap_common.h" #include "olap/olap_cond.h" -#include "olap/rowset.h" +#include "olap/segment_group.h" #include "olap/row_block.h" #include "olap/row_cursor.h" #include "util/runtime_profile.h" @@ -41,22 +41,22 @@ class SegmentReader; // This class is column data reader. this class will be used in two case. class ColumnData { public: - static ColumnData* create(Rowset* olap_index); - explicit ColumnData(Rowset* olap_index); + static ColumnData* create(SegmentGroup* segment_group); + explicit ColumnData(SegmentGroup* segment_group); ~ColumnData(); // 为了与之前兼容, 暴露部分index的接口 Version version() const { - return _olap_index->version(); + return _segment_group->version(); } VersionHash version_hash() const { - return _olap_index->version_hash(); + return _segment_group->version_hash(); } bool delete_flag() const { - return _olap_index->delete_flag(); + return _segment_group->delete_flag(); } uint32_t num_segments() const { - return _olap_index->num_segments(); + return _segment_group->num_segments(); } // 查询数据文件类型 @@ -108,16 +108,16 @@ class ColumnData { void set_eof(bool eof) { _eof = eof; } bool* eof_ptr() { return &_eof; } - bool empty() const { return _olap_index->empty(); } - bool zero_num_rows() const { return _olap_index->zero_num_rows(); } + bool empty() const { return _segment_group->empty(); } + bool zero_num_rows() const { return _segment_group->zero_num_rows(); } bool delta_pruning_filter(); int delete_pruning_filter(); uint64_t get_filted_rows(); - Rowset* olap_index() const { return _olap_index; } - void set_olap_index(Rowset* olap_index) { _olap_index = olap_index; } - int64_t num_rows() const { return _olap_index->num_rows(); } + SegmentGroup* segment_group() const { return _segment_group; } + void set_segment_group(SegmentGroup* segment_group) { _segment_group = segment_group; } + int64_t num_rows() const { return _segment_group->num_rows(); } private: DISALLOW_COPY_AND_ASSIGN(ColumnData); @@ -160,7 +160,7 @@ class ColumnData { } private: DataFileType _data_file_type; - Rowset* _olap_index; + SegmentGroup* _segment_group; // 当到达文件末尾或者到达end key时设置此标志 bool _eof; const Conditions* _conditions; @@ -209,10 +209,10 @@ class ColumnDataComparator { ColumnDataComparator( RowBlockPosition position, ColumnData* olap_data, - const Rowset* index) + const SegmentGroup* segment_group) : _start_block_position(position), _olap_data(olap_data), - _index(index) {} + _segment_group(segment_group) {} ~ColumnDataComparator() {} @@ -232,7 +232,7 @@ class ColumnDataComparator { ComparatorEnum comparator_enum) const { OLAPStatus res = OLAP_SUCCESS; RowBlockPosition position = _start_block_position; - if (OLAP_SUCCESS != (res = _index->advance_row_block(index, &position))) { + if (OLAP_SUCCESS != (res = _segment_group->advance_row_block(index, &position))) { OLAP_LOG_FATAL("fail to advance row block. [res=%d]", res); throw ComparatorException(); } @@ -251,7 +251,7 @@ class ColumnDataComparator { const RowBlockPosition _start_block_position; ColumnData* _olap_data; - const Rowset* _index; + const SegmentGroup* _segment_group; }; } // namespace doris diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index aee0239ffea911..abdf555db07ab7 100755 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -128,18 +128,18 @@ OLAPStatus CumulativeCompaction::run() { DorisMetrics::cumulative_compaction_deltas_total.increment(_need_merged_versions.size()); int64_t merge_bytes = 0; for (ColumnData* i_data : _data_source) { - merge_bytes += i_data->olap_index()->data_size(); + merge_bytes += i_data->segment_group()->data_size(); } DorisMetrics::cumulative_compaction_bytes_total.increment(merge_bytes); } do { // 3. 生成新cumulative文件对应的olap index - _new_cumulative_index = new (nothrow) Rowset(_table.get(), + _new_segment_group = new (nothrow) SegmentGroup(_table.get(), _cumulative_version, _cumulative_version_hash, false, 0, 0); - if (_new_cumulative_index == NULL) { + if (_new_segment_group == NULL) { OLAP_LOG_WARNING("failed to malloc new cumulative olap index. " "[table=%s; cumulative_version=%d-%d]", _table->full_name().c_str(), @@ -161,9 +161,9 @@ OLAPStatus CumulativeCompaction::run() { } while (0); // 5. 如果出现错误,执行清理工作 - if (res != OLAP_SUCCESS && _new_cumulative_index != NULL) { - _new_cumulative_index->delete_all_files(); - SAFE_DELETE(_new_cumulative_index); + if (res != OLAP_SUCCESS && _new_segment_group != NULL) { + _new_segment_group->delete_all_files(); + SAFE_DELETE(_new_segment_group); } if (_data_source.size() != 0) { @@ -370,7 +370,7 @@ bool CumulativeCompaction::_find_previous_version(const Version current_version, OLAPStatus CumulativeCompaction::_do_cumulative_compaction() { OLAPStatus res = OLAP_SUCCESS; - Merger merger(_table, _new_cumulative_index, READER_CUMULATIVE_COMPACTION); + Merger merger(_table, _new_segment_group, READER_CUMULATIVE_COMPACTION); // 1. merge delta files into new cumulative file uint64_t merged_rows = 0; @@ -385,7 +385,7 @@ OLAPStatus CumulativeCompaction::_do_cumulative_compaction() { } // 2. load new cumulative file - res = _new_cumulative_index->load(); + res = _new_segment_group->load(); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("failed to load cumulative index. [table=%s; cumulative_version=%d-%d]", _table->full_name().c_str(), @@ -397,24 +397,24 @@ OLAPStatus CumulativeCompaction::_do_cumulative_compaction() { // Check row num changes uint64_t source_rows = 0; for (ColumnData* i_data : _data_source) { - source_rows += i_data->olap_index()->num_rows(); + source_rows += i_data->segment_group()->num_rows(); } bool row_nums_check = config::row_nums_check; if (row_nums_check) { - if (source_rows != _new_cumulative_index->num_rows() + merged_rows + filted_rows) { + if (source_rows != _new_segment_group->num_rows() + merged_rows + filted_rows) { OLAP_LOG_FATAL("fail to check row num! " "[source_rows=%lu merged_rows=%lu filted_rows=%lu new_index_rows=%lu]", - source_rows, merged_rows, filted_rows, _new_cumulative_index->num_rows()); + source_rows, merged_rows, filted_rows, _new_segment_group->num_rows()); return OLAP_ERR_CHECK_LINES_ERROR; } } else { OLAP_LOG_INFO("all row nums. " "[source_rows=%lu merged_rows=%lu filted_rows=%lu new_index_rows=%lu]", - source_rows, merged_rows, filted_rows, _new_cumulative_index->num_rows()); + source_rows, merged_rows, filted_rows, _new_segment_group->num_rows()); } // 3. add new cumulative file into table - vector unused_indices; + vector unused_indices; _obtain_header_wrlock(); res = _update_header(&unused_indices); if (res != OLAP_SUCCESS) { @@ -459,9 +459,9 @@ OLAPStatus CumulativeCompaction::_do_cumulative_compaction() { return res; } -OLAPStatus CumulativeCompaction::_update_header(vector* unused_indices) { - vector new_indices; - new_indices.push_back(_new_cumulative_index); +OLAPStatus CumulativeCompaction::_update_header(vector* unused_indices) { + vector new_indices; + new_indices.push_back(_new_segment_group); OLAPStatus res = OLAP_SUCCESS; res = _table->replace_data_sources(&_need_merged_versions, &new_indices, unused_indices); @@ -481,11 +481,11 @@ OLAPStatus CumulativeCompaction::_update_header(vector* unused_indices) return res; } -void CumulativeCompaction::_delete_unused_delta_files(vector* unused_indices) { +void CumulativeCompaction::_delete_unused_delta_files(vector* unused_indices) { if (!unused_indices->empty()) { OLAPEngine* unused_index = OLAPEngine::get_instance(); - for (vector::iterator it = unused_indices->begin(); + for (vector::iterator it = unused_indices->begin(); it != unused_indices->end(); ++it) { unused_index->add_unused_index(*it); } @@ -525,12 +525,12 @@ OLAPStatus CumulativeCompaction::_validate_delete_file_action() { return OLAP_SUCCESS; } -OLAPStatus CumulativeCompaction::_roll_back(const vector& old_olap_indices) { +OLAPStatus CumulativeCompaction::_roll_back(const vector& old_olap_indices) { vector need_remove_version; need_remove_version.push_back(_cumulative_version); // unused_indices will only contain new cumulative index // we don't need to delete it here; we will delete new cumulative index in the end. - vector unused_indices; + vector unused_indices; OLAPStatus res = OLAP_SUCCESS; res = _table->replace_data_sources(&need_remove_version, &old_olap_indices, &unused_indices); diff --git a/be/src/olap/cumulative_compaction.h b/be/src/olap/cumulative_compaction.h index fe2bbd8f28e08f..c9a923e017185f 100755 --- a/be/src/olap/cumulative_compaction.h +++ b/be/src/olap/cumulative_compaction.h @@ -30,7 +30,7 @@ namespace doris { -class Rowset; +class SegmentGroup; class CumulativeCompaction { public: @@ -40,7 +40,7 @@ class CumulativeCompaction { _old_cumulative_layer_point(0), _new_cumulative_layer_point(0), _max_delta_file_size(0), - _new_cumulative_index(NULL) {} + _new_segment_group(NULL) {} ~CumulativeCompaction() {} @@ -110,13 +110,13 @@ class CumulativeCompaction { // 返回值: // - 如果成功,返回OLAP_SUCCESS // - 如果不成功,返回相应错误码 - OLAPStatus _update_header(std::vector* unused_indices); + OLAPStatus _update_header(std::vector* unused_indices); // 删除不再使用的delta文件 // // 输入输出参数 // - unused_indices: 待删除的不再使用的delta文件对应的olap index - void _delete_unused_delta_files(std::vector* unused_indices); + void _delete_unused_delta_files(std::vector* unused_indices); // 验证得到的m_need_merged_versions是否正确 // @@ -133,7 +133,7 @@ class CumulativeCompaction { OLAPStatus _validate_delete_file_action(); // 恢复header头文件的文件版本和table的data source - OLAPStatus _roll_back(const std::vector& old_olap_indices); + OLAPStatus _roll_back(const std::vector& old_olap_indices); void _obtain_header_rdlock() { _table->obtain_header_rdlock(); @@ -170,7 +170,7 @@ class CumulativeCompaction { // 新cumulative文件的version hash VersionHash _cumulative_version_hash; // 新cumulative文件对应的olap index - Rowset* _new_cumulative_index; + SegmentGroup* _new_segment_group; // 可合并的delta文件的data文件 std::vector _data_source; // 可合并的delta文件的版本 diff --git a/be/src/olap/data_writer.cpp b/be/src/olap/data_writer.cpp index a2a5ebca7d45ef..e445d970ddd171 100644 --- a/be/src/olap/data_writer.cpp +++ b/be/src/olap/data_writer.cpp @@ -20,17 +20,17 @@ #include #include "olap/segment_writer.h" -#include "olap/rowset.h" +#include "olap/segment_group.h" #include "olap/row_block.h" namespace doris { -ColumnDataWriter* ColumnDataWriter::create(OLAPTablePtr table, Rowset *index, bool is_push_write) { +ColumnDataWriter* ColumnDataWriter::create(OLAPTablePtr table, SegmentGroup* segment_group, bool is_push_write) { ColumnDataWriter* writer = NULL; switch (table->data_file_type()) { case COLUMN_ORIENTED_FILE: - writer = new (std::nothrow) ColumnDataWriter(table, index, is_push_write); + writer = new (std::nothrow) ColumnDataWriter(table, segment_group, is_push_write); break; default: LOG(WARNING) << "unknown data file type. type=" << DataFileType_Name(table->data_file_type()); @@ -40,13 +40,13 @@ ColumnDataWriter* ColumnDataWriter::create(OLAPTablePtr table, Rowset *index, bo return writer; } -ColumnDataWriter::ColumnDataWriter(OLAPTablePtr table, Rowset* index, bool is_push_write) +ColumnDataWriter::ColumnDataWriter(OLAPTablePtr table, SegmentGroup* segment_group, bool is_push_write) : _is_push_write(is_push_write), _table(table), _column_statistics(_table->num_key_fields(), std::pair(NULL, NULL)), _row_index(0), - _index(index), + _segment_group(segment_group), _row_block(NULL), _segment_writer(NULL), _num_rows(0), @@ -120,7 +120,7 @@ OLAPStatus ColumnDataWriter::_init_segment() { return res; } - res = _index->add_segment(); + res = _segment_group->add_segment(); if (OLAP_SUCCESS != res) { OLAP_LOG_WARNING("fail to add index segment. [res=%d]", res); return res; @@ -187,7 +187,7 @@ void ColumnDataWriter::next(const char* row, const Schema* schema) { OLAPStatus ColumnDataWriter::finalize() { if (_all_num_rows == 0 && _row_index == 0) { - _index->set_empty(true); + _segment_group->set_empty(true); return OLAP_SUCCESS; } OLAPStatus res = _flush_row_block(true); @@ -202,7 +202,7 @@ OLAPStatus ColumnDataWriter::finalize() { return res; } - res = _index->add_column_statistics(_column_statistics); + res = _segment_group->add_column_statistics(_column_statistics); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("Fail to set delta pruning![res=%d]", res); return res; @@ -233,8 +233,8 @@ OLAPStatus ColumnDataWriter::_flush_row_block(bool finalize) { return OLAP_ERR_WRITER_DATA_WRITE_ERROR; } - // 在Rowset中记录的不是数据文件的偏移,而是block的编号 - if (OLAP_SUCCESS != _index->add_row_block(*_row_block, _block_id++)) { + // 在SegmentGroup中记录的不是数据文件的偏移,而是block的编号 + if (OLAP_SUCCESS != _segment_group->add_row_block(*_row_block, _block_id++)) { OLAP_LOG_WARNING("fail to update index."); return OLAP_ERR_WRITER_INDEX_WRITE_ERROR; } @@ -256,7 +256,7 @@ OLAPStatus ColumnDataWriter::_add_segment() { return OLAP_ERR_WRITER_SEGMENT_NOT_FINALIZED; } - file_name = _index->construct_data_file_path(_index->rowset_id(), _segment); + file_name = _segment_group->construct_data_file_path(_segment_group->segment_group_id(), _segment); _segment_writer = new(std::nothrow) SegmentWriter(file_name, _table, OLAP_DEFAULT_COLUMN_STREAM_BUFFER_SIZE); @@ -309,7 +309,7 @@ OLAPStatus ColumnDataWriter::_finalize_segment() { return OLAP_ERR_WRITER_DATA_WRITE_ERROR; } - if (OLAP_SUCCESS != _index->finalize_segment(data_segment_size, _num_rows)) { + if (OLAP_SUCCESS != _segment_group->finalize_segment(data_segment_size, _num_rows)) { OLAP_LOG_WARNING("fail to finish segment from olap_index."); return OLAP_ERR_WRITER_INDEX_WRITE_ERROR; } diff --git a/be/src/olap/data_writer.h b/be/src/olap/data_writer.h index c9d072d474ee98..d27c006daa911f 100644 --- a/be/src/olap/data_writer.h +++ b/be/src/olap/data_writer.h @@ -31,8 +31,8 @@ class ColumnDataWriter { public: // Factory function // 调用者获得新建的对象, 并负责delete释放 - static ColumnDataWriter* create(OLAPTablePtr table, Rowset* index, bool is_push_write); - ColumnDataWriter(OLAPTablePtr table, Rowset* index, bool is_push_write); + static ColumnDataWriter* create(OLAPTablePtr table, SegmentGroup* segment_group, bool is_push_write); + ColumnDataWriter(OLAPTablePtr table, SegmentGroup* segment_group, bool is_push_write); ~ColumnDataWriter(); OLAPStatus init(); OLAPStatus attached_by(RowCursor* row_cursor); @@ -55,7 +55,7 @@ class ColumnDataWriter { std::vector> _column_statistics; uint32_t _row_index; - Rowset* _index; + SegmentGroup* _segment_group; RowBlock* _row_block; // 使用RowBlcok缓存要写入的数据 RowCursor _cursor; SegmentWriter* _segment_writer; diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index ce13cf5e5bed4c..1c58dde220908a 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -18,7 +18,7 @@ #include "olap/delta_writer.h" #include "olap/schema.h" -#include "olap/rowset.h" +#include "olap/segment_group.h" namespace doris { @@ -29,10 +29,10 @@ OLAPStatus DeltaWriter::open(WriteRequest* req, DeltaWriter** writer) { DeltaWriter::DeltaWriter(WriteRequest* req) : _req(*req), _table(nullptr), - _cur_rowset(nullptr), _new_table(nullptr), + _cur_segment_group(nullptr), _new_table(nullptr), _writer(nullptr), _mem_table(nullptr), _schema(nullptr), _field_infos(nullptr), - _rowset_id(-1), _delta_written_success(false) {} + _segment_group_id(-1), _delta_written_success(false) {} DeltaWriter::~DeltaWriter() { if (!_delta_written_success) { @@ -46,16 +46,16 @@ DeltaWriter::~DeltaWriter() { void DeltaWriter::_garbage_collection() { OLAPEngine::get_instance()->delete_transaction(_req.partition_id, _req.transaction_id, _req.tablet_id, _req.schema_hash); - for (Rowset* rowset : _rowset_vec) { - rowset->release(); - OLAPEngine::get_instance()->add_unused_index(rowset); + for (SegmentGroup* segment_group : _segment_group_vec) { + segment_group->release(); + OLAPEngine::get_instance()->add_unused_index(segment_group); } if (_new_table != nullptr) { OLAPEngine::get_instance()->delete_transaction(_req.partition_id, _req.transaction_id, _new_table->tablet_id(), _new_table->schema_hash()); - for (Rowset* rowset : _new_rowset_vec) { - rowset->release(); - OLAPEngine::get_instance()->add_unused_index(rowset); + for (SegmentGroup* segment_group : _new_segment_group_vec) { + segment_group->release(); + OLAPEngine::get_instance()->add_unused_index(segment_group); } } } @@ -73,7 +73,7 @@ OLAPStatus DeltaWriter::init() { RETURN_NOT_OK(OLAPEngine::get_instance()->add_transaction( _req.partition_id, _req.transaction_id, _req.tablet_id, _req.schema_hash, _req.load_id)); - //_rowset_id = _table->current_pending_rowset_id(_req.transaction_id); + //_segment_group_id = _table->current_pending_segment_group_id(_req.transaction_id); if (_req.need_gen_rollup) { TTabletId new_tablet_id; TSchemaHash new_schema_hash; @@ -102,18 +102,18 @@ OLAPStatus DeltaWriter::init() { } } - ++_rowset_id; - _cur_rowset = new Rowset(_table.get(), false, _rowset_id, 0, true, + ++_segment_group_id; + _cur_segment_group = new SegmentGroup(_table.get(), false, _segment_group_id, 0, true, _req.partition_id, _req.transaction_id); - DCHECK(_cur_rowset != nullptr) << "failed to malloc Rowset"; - _cur_rowset->acquire(); - _cur_rowset->set_load_id(_req.load_id); - _rowset_vec.push_back(_cur_rowset); + DCHECK(_cur_segment_group != nullptr) << "failed to malloc SegmentGroup"; + _cur_segment_group->acquire(); + _cur_segment_group->set_load_id(_req.load_id); + _segment_group_vec.push_back(_cur_segment_group); - // New Writer to write data into Rowset + // New Writer to write data into SegmentGroup VLOG(3) << "init writer. table=" << _table->full_name() << ", " << "block_row_size=" << _table->num_rows_per_row_block(); - _writer = ColumnDataWriter::create(_table, _cur_rowset, true); + _writer = ColumnDataWriter::create(_table, _cur_segment_group, true); DCHECK(_writer != nullptr) << "memory error occur when creating writer"; const std::vector& slots = _req.tuple_desc->slots(); @@ -144,16 +144,16 @@ OLAPStatus DeltaWriter::write(Tuple* tuple) { if (_mem_table->memory_usage() >= config::write_buffer_size) { RETURN_NOT_OK(_mem_table->flush(_writer)); - ++_rowset_id; - _cur_rowset = new Rowset(_table.get(), false, _rowset_id, 0, true, + ++_segment_group_id; + _cur_segment_group = new SegmentGroup(_table.get(), false, _segment_group_id, 0, true, _req.partition_id, _req.transaction_id); - DCHECK(_cur_rowset != nullptr) << "failed to malloc Rowset"; - _cur_rowset->acquire(); - _cur_rowset->set_load_id(_req.load_id); - _rowset_vec.push_back(_cur_rowset); + DCHECK(_cur_segment_group != nullptr) << "failed to malloc SegmentGroup"; + _cur_segment_group->acquire(); + _cur_segment_group->set_load_id(_req.load_id); + _segment_group_vec.push_back(_cur_segment_group); SAFE_DELETE(_writer); - _writer = ColumnDataWriter::create(_table, _cur_rowset, true); + _writer = ColumnDataWriter::create(_table, _cur_segment_group, true); DCHECK(_writer != nullptr) << "memory error occur when creating writer"; SAFE_DELETE(_mem_table); @@ -175,9 +175,9 @@ OLAPStatus DeltaWriter::close(google::protobuf::RepeatedPtrField* t OLAPStatus res = OLAP_SUCCESS; //add pending data to tablet RETURN_NOT_OK(_table->add_pending_version(_req.partition_id, _req.transaction_id, nullptr)); - for (Rowset* rowset : _rowset_vec) { - RETURN_NOT_OK(_table->add_pending_rowset(rowset)); - RETURN_NOT_OK(rowset->load()); + for (SegmentGroup* segment_group : _segment_group_vec) { + RETURN_NOT_OK(_table->add_pending_segment_group(segment_group)); + RETURN_NOT_OK(segment_group->load()); } if (_new_table != nullptr) { LOG(INFO) << "convert version for schema change"; @@ -191,7 +191,7 @@ OLAPStatus DeltaWriter::close(google::protobuf::RepeatedPtrField* t } SchemaChangeHandler schema_change; res = schema_change.schema_version_convert( - _table, _new_table, &_rowset_vec, &_new_rowset_vec); + _table, _new_table, &_segment_group_vec, &_new_segment_group_vec); if (res != OLAP_SUCCESS) { LOG(WARNING) << "failed to convert delta for new table in schema change." << "res: " << res << ", " << "new_table: " << _new_table->full_name(); @@ -199,9 +199,9 @@ OLAPStatus DeltaWriter::close(google::protobuf::RepeatedPtrField* t } RETURN_NOT_OK(_new_table->add_pending_version(_req.partition_id, _req.transaction_id, nullptr)); - for (Rowset* rowset : _new_rowset_vec) { - RETURN_NOT_OK(_new_table->add_pending_rowset(rowset)); - RETURN_NOT_OK(rowset->load()); + for (SegmentGroup* segment_group : _new_segment_group_vec) { + RETURN_NOT_OK(_new_table->add_pending_segment_group(segment_group)); + RETURN_NOT_OK(segment_group->load()); } } diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h index 013df40593be72..4778893fa2441a 100644 --- a/be/src/olap/delta_writer.h +++ b/be/src/olap/delta_writer.h @@ -29,7 +29,7 @@ namespace doris { -class Rowset; +class SegmentGroup; enum WriteType { LOAD = 1, @@ -66,9 +66,9 @@ class DeltaWriter { bool _is_init = false; WriteRequest _req; OLAPTablePtr _table; - Rowset* _cur_rowset; - std::vector _rowset_vec; - std::vector _new_rowset_vec; + SegmentGroup* _cur_segment_group; + std::vector _segment_group_vec; + std::vector _new_segment_group_vec; OLAPTablePtr _new_table; ColumnDataWriter* _writer; MemTable* _mem_table; @@ -76,7 +76,7 @@ class DeltaWriter { std::vector* _field_infos; std::vector _col_ids; - int32_t _rowset_id; + int32_t _segment_group_id; bool _delta_written_success; }; diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp index 48456ef0235f6c..e190738e7ab501 100644 --- a/be/src/olap/merger.cpp +++ b/be/src/olap/merger.cpp @@ -22,7 +22,7 @@ #include "olap/column_data.h" #include "olap/olap_define.h" -#include "olap/rowset.h" +#include "olap/segment_group.h" #include "olap/olap_table.h" #include "olap/reader.h" #include "olap/row_cursor.h" @@ -35,9 +35,9 @@ using std::vector; namespace doris { -Merger::Merger(OLAPTablePtr table, Rowset* index, ReaderType type) : +Merger::Merger(OLAPTablePtr table, SegmentGroup* segment_group, ReaderType type) : _table(table), - _index(index), + _segment_group(segment_group), _reader_type(type), _row_count(0) {} @@ -52,7 +52,7 @@ OLAPStatus Merger::merge(const vector& olap_data_arr, reader_params.olap_data_arr = olap_data_arr; if (_reader_type == READER_BASE_COMPACTION) { - reader_params.version = _index->version(); + reader_params.version = _segment_group->version(); } if (OLAP_SUCCESS != reader.init(reader_params)) { @@ -62,7 +62,7 @@ OLAPStatus Merger::merge(const vector& olap_data_arr, } // create and initiate writer for generating new index and data files. - unique_ptr writer(ColumnDataWriter::create(_table, _index, false)); + unique_ptr writer(ColumnDataWriter::create(_table, _segment_group, false)); if (NULL == writer) { OLAP_LOG_WARNING("fail to allocate writer."); diff --git a/be/src/olap/merger.h b/be/src/olap/merger.h index 4f587aaf93e087..e47469fd4be706 100644 --- a/be/src/olap/merger.h +++ b/be/src/olap/merger.h @@ -23,17 +23,17 @@ namespace doris { -class Rowset; +class SegmentGroup; class ColumnData; class Merger { public: // parameter index is created by caller, and it is empty. - Merger(OLAPTablePtr table, Rowset* index, ReaderType type); + Merger(OLAPTablePtr table, SegmentGroup* index, ReaderType type); virtual ~Merger() {}; - // @brief read from multiple OLAPData and Rowset, then write into single OLAPData and Rowset + // @brief read from multiple OLAPData and SegmentGroup, then write into single OLAPData and SegmentGroup // @return OLAPStatus: OLAP_SUCCESS or FAIL // @note it will take long time to finish. OLAPStatus merge(const std::vector& olap_data_arr, @@ -45,7 +45,7 @@ class Merger { } private: OLAPTablePtr _table; - Rowset* _index; + SegmentGroup* _segment_group; ReaderType _reader_type; uint64_t _row_count; Version _simple_merge_version; diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index f4a5e5ec6c019d..598fb75dd040ed 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -176,11 +176,11 @@ struct Vertex { class Field; class WrapperField; using KeyRange = std::pair; -struct RowSetEntity { - RowSetEntity(int32_t rowset_id, int32_t num_segments, +struct SegmentGroupEntity { + SegmentGroupEntity(int32_t segment_group_id, int32_t num_segments, int64_t num_rows, size_t data_size, size_t index_size, bool empty, const std::vector* column_statistics) - : rowset_id(rowset_id), num_segments(num_segments), num_rows(num_rows), + : segment_group_id(segment_group_id), num_segments(num_segments), num_rows(num_rows), data_size(data_size), index_size(index_size), empty(empty) { if (column_statistics != nullptr) { @@ -188,7 +188,7 @@ struct RowSetEntity { } } - int32_t rowset_id; + int32_t segment_group_id; int32_t num_segments; int64_t num_rows; size_t data_size; @@ -200,13 +200,13 @@ struct RowSetEntity { struct VersionEntity { VersionEntity(Version v, VersionHash version_hash) : version(v), version_hash(version_hash) { } - void add_rowset_entity(const RowSetEntity& rowset) { - rowset_vec.push_back(rowset); + void add_segment_group_entity(const SegmentGroupEntity& segment_group_entity) { + segment_group_vec.push_back(segment_group_entity); } Version version; VersionHash version_hash; - std::vector rowset_vec; + std::vector segment_group_vec; }; // ReaderStatistics used to collect statistics when scan data from storage diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index c471dde1738e6e..85469053192c86 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -237,7 +237,7 @@ enum OLAPStatus { OLAP_ERR_PUSH_INPUT_DATA_ERROR = -910, OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST = -911, - // Rowset + // SegmentGroup // [-1000, -1100) OLAP_ERR_INDEX_LOAD_ERROR = -1000, OLAP_ERR_INDEX_EOF = -1001, diff --git a/be/src/olap/olap_engine.cpp b/be/src/olap/olap_engine.cpp index a513ec06072173..7841dc902e70c1 100644 --- a/be/src/olap/olap_engine.cpp +++ b/be/src/olap/olap_engine.cpp @@ -170,7 +170,7 @@ OLAPStatus OLAPEngine::_load_store(OlapStore* store) { } for (const auto& tablet : tablets) { - // 遍历table目录寻找此table的所有indexedRollupTable,注意不是Rowset,而是OLAPTable + // 遍历table目录寻找此table的所有indexedRollupTable,注意不是SegmentGroup,而是OLAPTable set schema_hashes; string one_tablet_path = one_shard_path + '/' + tablet; if (dir_walk(one_tablet_path, &schema_hashes, NULL) != OLAP_SUCCESS) { @@ -317,7 +317,7 @@ OLAPStatus OLAPEngine::_check_none_row_oriented_table_in_store(OlapStore* store) } for (const auto& tablet : tablets) { - // 遍历table目录寻找此table的所有indexedRollupTable,注意不是Rowset,而是OLAPTable + // 遍历table目录寻找此table的所有indexedRollupTable,注意不是SegmentGroup,而是OLAPTable set schema_hashes; string one_tablet_path = one_shard_path + '/' + tablet; if (dir_walk(one_tablet_path, &schema_hashes, NULL) != OLAP_SUCCESS) { @@ -1508,9 +1508,9 @@ OLAPStatus OLAPEngine::create_init_version(TTabletId tablet_id, SchemaHash schem OLAPTablePtr table; ColumnDataWriter* writer = NULL; - Rowset* new_rowset = NULL; + SegmentGroup* new_segment_group = NULL; OLAPStatus res = OLAP_SUCCESS; - std::vector index_vec; + std::vector index_vec; do { if (version.first > version.second) { @@ -1528,15 +1528,15 @@ OLAPStatus OLAPEngine::create_init_version(TTabletId tablet_id, SchemaHash schem break; } - new_rowset = new(nothrow) Rowset(table.get(), version, version_hash, false, 0, 0); - if (new_rowset == NULL) { + new_segment_group = new(nothrow) SegmentGroup(table.get(), version, version_hash, false, 0, 0); + if (new_segment_group == NULL) { LOG(WARNING) << "fail to malloc index. [table=" << table->full_name() << "]"; res = OLAP_ERR_MALLOC_ERROR; break; } // Create writer, which write nothing to table, to generate empty data file - writer = ColumnDataWriter::create(table, new_rowset, false); + writer = ColumnDataWriter::create(table, new_segment_group, false); if (writer == NULL) { LOG(WARNING) << "fail to create writer. [table=" << table->full_name() << "]"; res = OLAP_ERR_MALLOC_ERROR; @@ -1550,14 +1550,14 @@ OLAPStatus OLAPEngine::create_init_version(TTabletId tablet_id, SchemaHash schem } // Load new index and add to table - res = new_rowset->load(); + res = new_segment_group->load(); if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to load new index. [table=" << table->full_name() << "]"; break; } WriteLock wrlock(table->get_header_lock_ptr()); - index_vec.push_back(new_rowset); + index_vec.push_back(new_segment_group); res = table->register_data_source(index_vec); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("fail to register index to data sources. [table=%s]", @@ -1574,12 +1574,12 @@ OLAPStatus OLAPEngine::create_init_version(TTabletId tablet_id, SchemaHash schem // Unregister index and delete files(index and data) if failed if (res != OLAP_SUCCESS && table.get() != NULL) { - std::vector unused_index; + std::vector unused_index; table->obtain_header_wrlock(); table->unregister_data_source(version, &unused_index); table->release_header_lock(); - for (Rowset* index : index_vec) { + for (SegmentGroup* index : index_vec) { index->delete_all_files(); SAFE_DELETE(index); } @@ -1915,7 +1915,7 @@ OLAPStatus OLAPEngine::start_trash_sweep(double* usage) { } } - // clear expire incremental rowset + // clear expire incremental segment_group _tablet_map_lock.rdlock(); for (const auto& item : _tablet_map) { for (OLAPTablePtr olap_table : item.second.table_arr) { @@ -2213,21 +2213,21 @@ void OLAPEngine::start_delete_unused_index() { _gc_mutex.unlock(); } -void OLAPEngine::add_unused_index(Rowset* olap_index) { +void OLAPEngine::add_unused_index(SegmentGroup* segment_group) { _gc_mutex.lock(); - auto it = _gc_files.find(olap_index); + auto it = _gc_files.find(segment_group); if (it == _gc_files.end()) { vector files; - int32_t rowset_id = olap_index->rowset_id(); - for (size_t seg_id = 0; seg_id < olap_index->num_segments(); ++seg_id) { - string index_file = olap_index->construct_index_file_path(rowset_id, seg_id); + int32_t segment_group_id = segment_group->segment_group_id(); + for (size_t seg_id = 0; seg_id < segment_group->num_segments(); ++seg_id) { + string index_file = segment_group->construct_index_file_path(segment_group_id, seg_id); files.push_back(index_file); - string data_file = olap_index->construct_data_file_path(rowset_id, seg_id); + string data_file = segment_group->construct_data_file_path(segment_group_id, seg_id); files.push_back(data_file); } - _gc_files[olap_index] = files; + _gc_files[segment_group] = files; } _gc_mutex.unlock(); diff --git a/be/src/olap/olap_engine.h b/be/src/olap/olap_engine.h index ccd1b36f4ac3cb..9dc333c3d815d1 100644 --- a/be/src/olap/olap_engine.h +++ b/be/src/olap/olap_engine.h @@ -265,7 +265,7 @@ class OLAPEngine { void start_delete_unused_index(); - void add_unused_index(Rowset* olap_index); + void add_unused_index(SegmentGroup* olap_index); // ######################### ALTER TABLE BEGIN ######################### // The following interfaces are all about alter tablet operation, @@ -435,13 +435,13 @@ class OLAPEngine { const std::string& tablet_path_prefix, const Version& version, VersionHash version_hash, - int32_t rowset_id, int32_t segment) const; + int32_t segment_group_id, int32_t segment) const; std::string _construct_data_file_path( const std::string& tablet_path_prefix, const Version& version, VersionHash version_hash, - int32_t rowset_id, int32_t segment) const; + int32_t segment_group_id, int32_t segment) const; OLAPStatus _generate_new_header( OlapStore* store, @@ -565,7 +565,7 @@ class OLAPEngine { Mutex _snapshot_mutex; uint64_t _snapshot_base_id; - std::unordered_map> _gc_files; + std::unordered_map> _gc_files; Mutex _gc_mutex; // Thread functions diff --git a/be/src/olap/olap_header.cpp b/be/src/olap/olap_header.cpp index 7d8389171a3567..bd4ce1859a1afc 100644 --- a/be/src/olap/olap_header.cpp +++ b/be/src/olap/olap_header.cpp @@ -80,8 +80,8 @@ OLAPHeader::~OLAPHeader() { } void OLAPHeader::change_file_version_to_delta() { - // convert FileVersionMessage to PDelta and PRowSet in initialization. - // FileVersionMessage is used in previous code, and PDelta and PRowSet + // convert FileVersionMessage to PDelta and PSegmentGroup in initialization. + // FileVersionMessage is used in previous code, and PDelta and PSegmentGroup // is used in streaming load branch. for (int i = 0; i < file_version_size(); ++i) { PDelta* delta = add_delta(); @@ -136,7 +136,7 @@ OLAPStatus OLAPHeader::load_and_init() { } if (file_version_size() != 0) { - // convert FileVersionMessage to PDelta and PRowSet in initialization. + // convert FileVersionMessage to PDelta and PSegmentGroup in initialization. for (int i = 0; i < file_version_size(); ++i) { PDelta* delta = add_delta(); _convert_file_version_to_delta(file_version(i), delta); @@ -213,7 +213,7 @@ OLAPStatus OLAPHeader::save(const string& file_path) { } OLAPStatus OLAPHeader::add_version(Version version, VersionHash version_hash, - int32_t rowset_id, int32_t num_segments, + int32_t segment_group_id, int32_t num_segments, int64_t index_size, int64_t data_size, int64_t num_rows, bool empty, const std::vector* column_statistics) { // Check whether version is valid. @@ -227,8 +227,8 @@ OLAPStatus OLAPHeader::add_version(Version version, VersionHash version_hash, for (int i = 0; i < delta_size(); ++i) { if (delta(i).start_version() == version.first && delta(i).end_version() == version.second) { - for (const PRowSet& rowset : delta(i).rowset()) { - if (rowset.rowset_id() == rowset_id) { + for (const PSegmentGroup& segment_group : delta(i).segment_group()) { + if (segment_group.segment_group_id() == segment_group_id) { LOG(WARNING) << "the version is existed." << "version=" << version.first << ", " << version.second; @@ -240,14 +240,14 @@ OLAPStatus OLAPHeader::add_version(Version version, VersionHash version_hash, } } - // if rowset_id is greater or equal than zero, it is used + // if segment_group_id is greater or equal than zero, it is used // to streaming load // Try to add version to protobuf. PDelta* new_delta = nullptr; try { - if (rowset_id == -1 || rowset_id == 0) { - // snapshot will use rowset_id which equals minus one + if (segment_group_id == -1 || segment_group_id == 0) { + // snapshot will use segment_group_id which equals minus one new_delta = add_delta(); new_delta->set_start_version(version.first); new_delta->set_end_version(version.second); @@ -256,17 +256,17 @@ OLAPStatus OLAPHeader::add_version(Version version, VersionHash version_hash, } else { new_delta = const_cast(&delta(delta_id)); } - PRowSet* new_rowset = new_delta->add_rowset(); - new_rowset->set_rowset_id(rowset_id); - new_rowset->set_num_segments(num_segments); - new_rowset->set_index_size(index_size); - new_rowset->set_data_size(data_size); - new_rowset->set_num_rows(num_rows); - new_rowset->set_empty(empty); + PSegmentGroup* new_segment_group = new_delta->add_segment_group(); + new_segment_group->set_segment_group_id(segment_group_id); + new_segment_group->set_num_segments(num_segments); + new_segment_group->set_index_size(index_size); + new_segment_group->set_data_size(data_size); + new_segment_group->set_num_rows(num_rows); + new_segment_group->set_empty(empty); if (NULL != column_statistics) { for (size_t i = 0; i < column_statistics->size(); ++i) { ColumnPruning *column_pruning = - new_rowset->add_column_pruning(); + new_segment_group->add_column_pruning(); column_pruning->set_min(column_statistics->at(i).first->to_string()); column_pruning->set_max(column_statistics->at(i).second->to_string()); column_pruning->set_null_flag(column_statistics->at(i).first->is_null()); @@ -315,16 +315,16 @@ OLAPStatus OLAPHeader::add_pending_version( } } catch (...) { - LOG(WARNING) << "fail to add pending rowset to header protobf"; + LOG(WARNING) << "fail to add pending segment_group to header protobf"; return OLAP_ERR_HEADER_ADD_PENDING_DELTA; } return OLAP_SUCCESS; } -OLAPStatus OLAPHeader::add_pending_rowset( +OLAPStatus OLAPHeader::add_pending_segment_group( int64_t transaction_id, int32_t num_segments, - int32_t pending_rowset_id, const PUniqueId& load_id, + int32_t pending_segment_group_id, const PUniqueId& load_id, bool empty, const std::vector* column_statistics) { int32_t delta_id = 0; @@ -332,12 +332,12 @@ OLAPStatus OLAPHeader::add_pending_rowset( const PPendingDelta& delta = pending_delta(i); if (delta.transaction_id() == transaction_id) { delta_id = i; - for (int j = 0; j < delta.pending_rowset_size(); ++j) { - const PPendingRowSet& pending_rowset = delta.pending_rowset(j); - if (pending_rowset.pending_rowset_id() == pending_rowset_id) { - LOG(WARNING) << "pending rowset already exists in header." + for (int j = 0; j < delta.pending_segment_group_size(); ++j) { + const PPendingSegmentGroup& pending_segment_group = delta.pending_segment_group(j); + if (pending_segment_group.pending_segment_group_id() == pending_segment_group_id) { + LOG(WARNING) << "pending segment_group already exists in header." << "transaction_id:" << transaction_id << ", " - << "pending_rowset_id: " << pending_rowset_id; + << "pending_segment_group_id: " << pending_segment_group_id; return OLAP_ERR_HEADER_ADD_PENDING_DELTA; } } @@ -345,25 +345,24 @@ OLAPStatus OLAPHeader::add_pending_rowset( } try { - PPendingRowSet* new_pending_rowset - = const_cast(pending_delta(delta_id)).add_pending_rowset(); - new_pending_rowset->set_pending_rowset_id(pending_rowset_id); - new_pending_rowset->set_num_segments(num_segments); - new_pending_rowset->mutable_load_id()->set_hi(load_id.hi()); - new_pending_rowset->mutable_load_id()->set_lo(load_id.lo()); - new_pending_rowset->set_empty(empty); + PPendingSegmentGroup* new_pending_segment_group + = const_cast(pending_delta(delta_id)).add_pending_segment_group(); + new_pending_segment_group->set_pending_segment_group_id(pending_segment_group_id); + new_pending_segment_group->set_num_segments(num_segments); + new_pending_segment_group->mutable_load_id()->set_hi(load_id.hi()); + new_pending_segment_group->mutable_load_id()->set_lo(load_id.lo()); + new_pending_segment_group->set_empty(empty); if (NULL != column_statistics) { for (size_t i = 0; i < column_statistics->size(); ++i) { ColumnPruning *column_pruning = - new_pending_rowset->add_column_pruning(); + new_pending_segment_group->add_column_pruning(); column_pruning->set_min(column_statistics->at(i).first->to_string()); column_pruning->set_max(column_statistics->at(i).second->to_string()); column_pruning->set_null_flag(column_statistics->at(i).first->is_null()); } } - } catch (...) { - OLAP_LOG_WARNING("fail to add pending rowset to protobf"); + OLAP_LOG_WARNING("fail to add pending segment_group to protobf"); return OLAP_ERR_HEADER_ADD_PENDING_DELTA; } @@ -371,7 +370,7 @@ OLAPStatus OLAPHeader::add_pending_rowset( } OLAPStatus OLAPHeader::add_incremental_version(Version version, VersionHash version_hash, - int32_t rowset_id, int32_t num_segments, + int32_t segment_group_id, int32_t num_segments, int64_t index_size, int64_t data_size, int64_t num_rows, bool empty, const std::vector* column_statistics) { // Check whether version is valid. @@ -386,12 +385,12 @@ OLAPStatus OLAPHeader::add_incremental_version(Version version, VersionHash vers const PDelta& incre_delta = incremental_delta(i); if (incre_delta.start_version() == version.first) { delta_id = i; - for (int j = 0; j < incre_delta.rowset_size(); ++j) { - const PRowSet& incremental_rowset = incre_delta.rowset(j); - if (incremental_rowset.rowset_id() == rowset_id) { - LOG(WARNING) << "rowset already exists in header." + for (int j = 0; j < incre_delta.segment_group_size(); ++j) { + const PSegmentGroup& incremental_segment_group = incre_delta.segment_group(j); + if (incremental_segment_group.segment_group_id() == segment_group_id) { + LOG(WARNING) << "segment_group already exists in header." << "version: " << version.first << "-" << version.second << "," - << "rowset_id: " << rowset_id; + << "segment_group_id: " << segment_group_id; return OLAP_ERR_HEADER_ADD_PENDING_DELTA; } } @@ -401,7 +400,7 @@ OLAPStatus OLAPHeader::add_incremental_version(Version version, VersionHash vers // Try to add version to protobuf. try { PDelta* new_incremental_delta = nullptr; - if (rowset_id == 0) { + if (segment_group_id == 0) { new_incremental_delta = add_incremental_delta(); new_incremental_delta->set_start_version(version.first); new_incremental_delta->set_end_version(version.second); @@ -410,17 +409,17 @@ OLAPStatus OLAPHeader::add_incremental_version(Version version, VersionHash vers } else { new_incremental_delta = const_cast(&incremental_delta(delta_id)); } - PRowSet* new_incremental_rowset = new_incremental_delta->add_rowset(); - new_incremental_rowset->set_rowset_id(rowset_id); - new_incremental_rowset->set_num_segments(num_segments); - new_incremental_rowset->set_index_size(index_size); - new_incremental_rowset->set_data_size(data_size); - new_incremental_rowset->set_num_rows(num_rows); - new_incremental_rowset->set_empty(empty); + PSegmentGroup* new_incremental_segment_group = new_incremental_delta->add_segment_group(); + new_incremental_segment_group->set_segment_group_id(segment_group_id); + new_incremental_segment_group->set_num_segments(num_segments); + new_incremental_segment_group->set_index_size(index_size); + new_incremental_segment_group->set_data_size(data_size); + new_incremental_segment_group->set_num_rows(num_rows); + new_incremental_segment_group->set_empty(empty); if (NULL != column_statistics) { for (size_t i = 0; i < column_statistics->size(); ++i) { ColumnPruning *column_pruning = - new_incremental_rowset->add_column_pruning(); + new_incremental_segment_group->add_column_pruning(); column_pruning->set_min(column_statistics->at(i).first->to_string()); column_pruning->set_max(column_statistics->at(i).second->to_string()); column_pruning->set_null_flag(column_statistics->at(i).first->is_null()); @@ -470,14 +469,15 @@ const PPendingDelta* OLAPHeader::get_pending_delta(int64_t transaction_id) const return nullptr; } -const PPendingRowSet* OLAPHeader::get_pending_rowset(int64_t transaction_id, int32_t pending_rowset_id) const { +const PPendingSegmentGroup* OLAPHeader::get_pending_segment_group(int64_t transaction_id, + int32_t pending_segment_group_id) const { for (int i = 0; i < pending_delta_size(); i++) { if (pending_delta(i).transaction_id() == transaction_id) { const PPendingDelta& delta = pending_delta(i); - for (int j = 0; j < delta.pending_rowset_size(); ++j) { - const PPendingRowSet& pending_rowset = delta.pending_rowset(j); - if (pending_rowset.pending_rowset_id() == pending_rowset_id) { - return &pending_rowset; + for (int j = 0; j < delta.pending_segment_group_size(); ++j) { + const PPendingSegmentGroup& pending_segment_group = delta.pending_segment_group(j); + if (pending_segment_group.pending_segment_group_id() == pending_segment_group_id) { + return &pending_segment_group; } } } @@ -779,15 +779,15 @@ void OLAPHeader::_convert_file_version_to_delta(const FileVersionMessage& versio delta->set_version_hash(version.version_hash()); delta->set_creation_time(version.creation_time()); - PRowSet* rowset = delta->add_rowset(); - rowset->set_rowset_id(-1); - rowset->set_num_segments(version.num_segments()); - rowset->set_index_size(version.index_size()); - rowset->set_data_size(version.data_size()); - rowset->set_num_rows(version.num_rows()); + PSegmentGroup* segment_group = delta->add_segment_group(); + segment_group->set_segment_group_id(-1); + segment_group->set_num_segments(version.num_segments()); + segment_group->set_index_size(version.index_size()); + segment_group->set_data_size(version.data_size()); + segment_group->set_num_rows(version.num_rows()); if (version.has_delta_pruning()) { for (int i = 0; i < version.delta_pruning().column_pruning_size(); ++i) { - ColumnPruning* column_pruning = rowset->add_column_pruning(); + ColumnPruning* column_pruning = segment_group->add_column_pruning(); *column_pruning = version.delta_pruning().column_pruning(i); } } diff --git a/be/src/olap/olap_header.h b/be/src/olap/olap_header.h index 8bc6d04237d8b1..f29bffc1bbcf13 100644 --- a/be/src/olap/olap_header.h +++ b/be/src/olap/olap_header.h @@ -62,26 +62,26 @@ class OLAPHeader : public OLAPHeaderMessage { // Adds a new version to the header. Do not use the proto's // add_version() directly. OLAPStatus add_version(Version version, VersionHash version_hash, - int32_t rowset_id, int32_t num_segments, + int32_t segment_group_id, int32_t num_segments, int64_t index_size, int64_t data_size, int64_t num_rows, bool empty, const std::vector* column_statistics); OLAPStatus add_pending_version(int64_t partition_id, int64_t transaction_id, const std::vector* delete_conditions); - OLAPStatus add_pending_rowset(int64_t transaction_id, int32_t num_segments, - int32_t pending_rowset_id, const PUniqueId& load_id, + OLAPStatus add_pending_segment_group(int64_t transaction_id, int32_t num_segments, + int32_t pending_segment_group_id, const PUniqueId& load_id, bool empty, const std::vector* column_statistics); - // add incremental rowset into header like "9-9" "10-10", for incremental cloning + // add incremental segment_group into header like "9-9" "10-10", for incremental cloning OLAPStatus add_incremental_version(Version version, VersionHash version_hash, - int32_t rowset_id, int32_t num_segments, + int32_t segment_group_id, int32_t num_segments, int64_t index_size, int64_t data_size, int64_t num_rows, bool empty, const std::vector* column_statistics); void add_delete_condition(const DeleteConditionMessage& delete_condition, int64_t version); const PPendingDelta* get_pending_delta(int64_t transaction_id) const; - const PPendingRowSet* get_pending_rowset(int64_t transaction_id, int32_t pending_rowset_id) const; + const PPendingSegmentGroup* get_pending_segment_group(int64_t transaction_id, int32_t pending_segment_group_id) const; const PDelta* get_incremental_version(Version version) const; // Deletes a version from the header. diff --git a/be/src/olap/olap_index.h b/be/src/olap/olap_index.h index 073f2d50bcf918..4140819c4ee509 100644 --- a/be/src/olap/olap_index.h +++ b/be/src/olap/olap_index.h @@ -39,7 +39,7 @@ namespace doris { class IndexComparator; -class Rowset; +class SegmentGroup; class OLAPTable; class RowBlock; class RowCursor; @@ -167,7 +167,7 @@ struct SegmentMetaInfo { // In memory index structure, all index hold here class MemIndex { public: - friend class Rowset; + friend class SegmentGroup; friend class IndexComparator; friend class SegmentComparator; diff --git a/be/src/olap/olap_snapshot.cpp b/be/src/olap/olap_snapshot.cpp index ab2c7c0cae4f11..da553ddfed25bd 100644 --- a/be/src/olap/olap_snapshot.cpp +++ b/be/src/olap/olap_snapshot.cpp @@ -35,7 +35,6 @@ #include "olap/olap_common.h" #include "olap/column_data.h" #include "olap/olap_define.h" -#include "olap/rowset.h" #include "olap/olap_table.h" #include "olap/olap_header_manager.h" #include "olap/push_handler.h" @@ -169,14 +168,15 @@ void OLAPEngine::_update_header_file_info( for (const VersionEntity& entity : shortest_versions) { Version version = entity.version; VersionHash v_hash = entity.version_hash; - for (RowSetEntity rowset : entity.rowset_vec) { - int32_t rowset_id = rowset.rowset_id; + for (SegmentGroupEntity segment_group_entity : entity.segment_group_vec) { + int32_t segment_group_id = segment_group_entity.segment_group_id; const std::vector* column_statistics = nullptr; - if (!rowset.key_ranges.empty()) { - column_statistics = &(rowset.key_ranges); + if (!segment_group_entity.key_ranges.empty()) { + column_statistics = &(segment_group_entity.key_ranges); } - header->add_version(version, v_hash, rowset_id, rowset.num_segments, rowset.index_size, - rowset.data_size, rowset.num_rows, rowset.empty, column_statistics); + header->add_version(version, v_hash, segment_group_id, segment_group_entity.num_segments, + segment_group_entity.index_size, segment_group_entity.data_size, + segment_group_entity.num_rows, segment_group_entity.empty, column_statistics); } } } @@ -193,13 +193,13 @@ OLAPStatus OLAPEngine::_link_index_and_data_files( for (const VersionEntity& entity : version_entity_vec) { Version version = entity.version; VersionHash v_hash = entity.version_hash; - for (RowSetEntity rowset : entity.rowset_vec) { - int32_t rowset_id = rowset.rowset_id; - for (int seg_id = 0; seg_id < rowset.num_segments; ++seg_id) { + for (SegmentGroupEntity segment_group_entity : entity.segment_group_vec) { + int32_t segment_group_id = segment_group_entity.segment_group_id; + for (int seg_id = 0; seg_id < segment_group_entity.num_segments; ++seg_id) { std::string index_path = - _construct_index_file_path(tablet_path_prefix, version, v_hash, rowset_id, seg_id); + _construct_index_file_path(tablet_path_prefix, version, v_hash, segment_group_id, seg_id); std::string ref_table_index_path = - ref_olap_table->construct_index_file_path(version, v_hash, rowset_id, seg_id); + ref_olap_table->construct_index_file_path(version, v_hash, segment_group_id, seg_id); res = _create_hard_link(ref_table_index_path, index_path); if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to create hard link. " @@ -210,9 +210,9 @@ OLAPStatus OLAPEngine::_link_index_and_data_files( } std:: string data_path = - _construct_data_file_path(tablet_path_prefix, version, v_hash, rowset_id, seg_id); + _construct_data_file_path(tablet_path_prefix, version, v_hash, segment_group_id, seg_id); std::string ref_table_data_path = - ref_olap_table->construct_data_file_path(version, v_hash, rowset_id, seg_id); + ref_olap_table->construct_data_file_path(version, v_hash, segment_group_id, seg_id); res = _create_hard_link(ref_table_data_path, data_path); if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to create hard link." @@ -237,13 +237,13 @@ OLAPStatus OLAPEngine::_copy_index_and_data_files( for (VersionEntity& entity : version_entity_vec) { Version version = entity.version; VersionHash v_hash = entity.version_hash; - for (RowSetEntity rowset : entity.rowset_vec) { - int32_t rowset_id = rowset.rowset_id; - for (int seg_id = 0; seg_id < rowset.num_segments; ++seg_id) { + for (SegmentGroupEntity segment_group_entity : entity.segment_group_vec) { + int32_t segment_group_id = segment_group_entity.segment_group_id; + for (int seg_id = 0; seg_id < segment_group_entity.num_segments; ++seg_id) { string index_path = - _construct_index_file_path(tablet_path_prefix, version, v_hash, rowset_id, seg_id); + _construct_index_file_path(tablet_path_prefix, version, v_hash, segment_group_id, seg_id); string ref_table_index_path = ref_olap_table->construct_index_file_path( - version, v_hash, rowset_id, seg_id); + version, v_hash, segment_group_id, seg_id); Status res = FileUtils::copy_file(ref_table_index_path, index_path); if (!res.ok()) { LOG(WARNING) << "fail to copy index file." @@ -253,9 +253,9 @@ OLAPStatus OLAPEngine::_copy_index_and_data_files( } string data_path = - _construct_data_file_path(tablet_path_prefix, version, v_hash, rowset_id, seg_id); + _construct_data_file_path(tablet_path_prefix, version, v_hash, segment_group_id, seg_id); string ref_table_data_path = ref_olap_table->construct_data_file_path( - version, v_hash, rowset_id, seg_id); + version, v_hash, segment_group_id, seg_id); res = FileUtils::copy_file(ref_table_data_path, data_path); if (!res.ok()) { LOG(WARNING) << "fail to copy data file." @@ -504,11 +504,11 @@ OLAPStatus OLAPEngine::_create_incremental_snapshot_files( "begin to link files. [table=%ld schema_hash=%d version=%ld]", request.tablet_id, request.schema_hash, missing_version); // link files - for (uint32_t i = 0; i < incremental_delta->rowset(0).num_segments(); i++) { - int32_t rowset_id = incremental_delta->rowset(0).rowset_id(); + for (uint32_t i = 0; i < incremental_delta->segment_group(0).num_segments(); i++) { + int32_t segment_group_id = incremental_delta->segment_group(0).segment_group_id(); string from = ref_olap_table->construct_incremental_index_file_path( Version(missing_version, missing_version), - incremental_delta->version_hash(), rowset_id, i); + incremental_delta->version_hash(), segment_group_id, i); string to = schema_full_path + '/' + basename(from.c_str()); if ((res = _create_hard_link(from, to)) != OLAP_SUCCESS) { break; @@ -516,7 +516,7 @@ OLAPStatus OLAPEngine::_create_incremental_snapshot_files( from = ref_olap_table->construct_incremental_data_file_path( Version(missing_version, missing_version), - incremental_delta->version_hash(), rowset_id, i); + incremental_delta->version_hash(), segment_group_id, i); to = schema_full_path + '/' + basename(from.c_str()); if ((res = _create_hard_link(from, to)) != OLAP_SUCCESS) { break; @@ -608,16 +608,16 @@ string OLAPEngine::_construct_index_file_path( const string& tablet_path_prefix, const Version& version, VersionHash version_hash, - int32_t rowset_id, int32_t segment) const { - return OLAPTable::construct_file_path(tablet_path_prefix, version, version_hash, rowset_id, segment, "idx"); + int32_t segment_group_id, int32_t segment) const { + return OLAPTable::construct_file_path(tablet_path_prefix, version, version_hash, segment_group_id, segment, "idx"); } string OLAPEngine::_construct_data_file_path( const string& tablet_path_prefix, const Version& version, VersionHash version_hash, - int32_t rowset_id, int32_t segment) const { - return OLAPTable::construct_file_path(tablet_path_prefix, version, version_hash, rowset_id, segment, "dat"); + int32_t segment_group_id, int32_t segment) const { + return OLAPTable::construct_file_path(tablet_path_prefix, version, version_hash, segment_group_id, segment, "dat"); } OLAPStatus OLAPEngine::_create_hard_link(const string& from_path, const string& to_path) { diff --git a/be/src/olap/olap_table.cpp b/be/src/olap/olap_table.cpp index f7be73051e76d1..183624743d0741 100644 --- a/be/src/olap/olap_table.cpp +++ b/be/src/olap/olap_table.cpp @@ -33,7 +33,6 @@ #include "olap/olap_define.h" #include "olap/olap_engine.h" #include "olap/olap_index.h" -#include "olap/rowset.h" #include "olap/reader.h" #include "olap/store.h" #include "olap/row_cursor.h" @@ -247,11 +246,11 @@ OLAPTable::~OLAPTable() { return; // for convenience of mock test. } - // ensure that there is nobody using OLAPTable, like acquiring OLAPData(Rowset) + // ensure that there is nobody using OLAPTable, like acquiring OLAPData(SegmentGroup) obtain_header_wrlock(); for (auto& it : _data_sources) { - for (Rowset* rowset : it.second) { - SAFE_DELETE(rowset); + for (SegmentGroup* segment_group : it.second) { + SAFE_DELETE(segment_group); } } _data_sources.clear(); @@ -259,11 +258,11 @@ OLAPTable::~OLAPTable() { // clear the transactions in memory for (auto& it : _pending_data_sources) { // false means can't remove the transaction from header, also prevent the loading of tablet - for (Rowset* rowset : it.second) { + for (SegmentGroup* segment_group : it.second) { OLAPEngine::get_instance()->delete_transaction( - rowset->partition_id(), rowset->transaction_id(), + segment_group->partition_id(), segment_group->transaction_id(), _tablet_id, _schema_hash, false); - SAFE_DELETE(rowset); + SAFE_DELETE(segment_group); } } _pending_data_sources.clear(); @@ -356,34 +355,34 @@ OLAPStatus OLAPTable::load_indices() { Version version; version.first = delta.start_version(); version.second = delta.end_version(); - for (int j = 0; j < delta.rowset_size(); ++j) { - const PRowSet& prowset = delta.rowset(j); - Rowset* rowset = new Rowset(this, version, delta.version_hash(), - false, prowset.rowset_id(), prowset.num_segments()); - if (rowset == nullptr) { - LOG(WARNING) << "fail to create olap rowset. [version='" << version.first + for (int j = 0; j < delta.segment_group_size(); ++j) { + const PSegmentGroup& psegment_group = delta.segment_group(j); + SegmentGroup* segment_group = new SegmentGroup(this, version, delta.version_hash(), + false, psegment_group.segment_group_id(), psegment_group.num_segments()); + if (segment_group == nullptr) { + LOG(WARNING) << "fail to create olap segment_group. [version='" << version.first << "-" << version.second << "' table='" << full_name() << "']"; return OLAP_ERR_MALLOC_ERROR; } - if (prowset.has_empty()) { - rowset->set_empty(prowset.empty()); + if (psegment_group.has_empty()) { + segment_group->set_empty(psegment_group.empty()); } - // 在校验和加载索引前把rowset放到data-source,以防止加载索引失败造成内存泄露 - _data_sources[version].push_back(rowset); + // 在校验和加载索引前把segment_group放到data-source,以防止加载索引失败造成内存泄露 + _data_sources[version].push_back(segment_group); - // 判断rowset是否正常, 在所有版本的都检查完成之后才加载所有版本的rowset - if (rowset->validate() != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to validate rowset. [version='%d-%d' version_hash=%ld]", + // 判断segment_group是否正常, 在所有版本的都检查完成之后才加载所有版本的segment_group + if (segment_group->validate() != OLAP_SUCCESS) { + OLAP_LOG_WARNING("fail to validate segment_group. [version='%d-%d' version_hash=%ld]", version.first, version.second, header->delta(delta_id).version_hash()); - // 现在只要一个rowset没有被正确加载,整个table加载失败 + // 现在只要一个segment_group没有被正确加载,整个table加载失败 return OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR; } - if (prowset.column_pruning_size() != 0) { - size_t column_pruning_size = prowset.column_pruning_size(); + if (psegment_group.column_pruning_size() != 0) { + size_t column_pruning_size = psegment_group.column_pruning_size(); if (_num_key_fields != column_pruning_size) { LOG(ERROR) << "column pruning size is error." << "column_pruning_size=" << column_pruning_size << ", " @@ -394,7 +393,7 @@ OLAPStatus OLAPTable::load_indices() { column_statistic_strings(_num_key_fields); std::vector null_vec(_num_key_fields); for (size_t j = 0; j < _num_key_fields; ++j) { - ColumnPruning column_pruning = prowset.column_pruning(j); + ColumnPruning column_pruning = psegment_group.column_pruning(j); column_statistic_strings[j].first = column_pruning.min(); column_statistic_strings[j].second = column_pruning.max(); if (column_pruning.has_null_flag()) { @@ -403,7 +402,7 @@ OLAPStatus OLAPTable::load_indices() { null_vec[j] = false; } } - RETURN_NOT_OK(rowset->add_column_statistics(column_statistic_strings, null_vec)); + RETURN_NOT_OK(segment_group->add_column_statistics(column_statistic_strings, null_vec)); } } } @@ -411,18 +410,18 @@ OLAPStatus OLAPTable::load_indices() { for (version_olap_index_map_t::const_iterator it = _data_sources.begin(); it != _data_sources.end(); ++it) { Version version = it->first; - for (Rowset* rowset : it->second) { - if ((res = rowset->load()) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load rowset. version=" << version.first << "-" << version.second << ", " - << "version_hash=" << rowset->version_hash(); - // 现在只要一个rowset没有被正确加载,整个table加载失败 + for (SegmentGroup* segment_group : it->second) { + if ((res = segment_group->load()) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load segment_group. version=" << version.first << "-" << version.second << ", " + << "version_hash=" << segment_group->version_hash(); + // 现在只要一个segment_group没有被正确加载,整个table加载失败 return res; } - VLOG(3) << "load Rowset success. table=" << full_name() << ", " + VLOG(3) << "load SegmentGroup success. table=" << full_name() << ", " << "version=" << version.first << "-" << version.second << ", " - << "version_hash=" << rowset->version_hash() << ", " - << "num_segments=" << rowset->num_segments(); + << "version_hash=" << segment_group->version_hash() << ", " + << "num_segments=" << segment_group->num_segments(); } } @@ -476,14 +475,14 @@ void OLAPTable::acquire_data_sources_by_versions(const vector& version_ it1 != version_list.end(); ++it1) { version_olap_index_map_t::const_iterator it2 = _data_sources.find(*it1); if (it2 == _data_sources.end()) { - LOG(WARNING) << "fail to find Rowset for version. [version='" << it1->first + LOG(WARNING) << "fail to find SegmentGroup for version. [version='" << it1->first << "-" << it1->second << "' table='" << full_name() << "']"; release_data_sources(sources); return; } - for (Rowset* rowset : it2->second) { - ColumnData* olap_data = ColumnData::create(rowset); + for (SegmentGroup* segment_group : it2->second) { + ColumnData* olap_data = ColumnData::create(segment_group); if (olap_data == NULL) { LOG(WARNING) << "fail to malloc Data. [version='" << it1->first << "-" << it1->second << "' table='" << full_name() << "']"; @@ -518,48 +517,48 @@ OLAPStatus OLAPTable::release_data_sources(vector* data_sources) co return OLAP_SUCCESS; } -OLAPStatus OLAPTable::register_data_source(const std::vector& index_vec) { +OLAPStatus OLAPTable::register_data_source(const std::vector& index_vec) { OLAPStatus res = OLAP_SUCCESS; if (index_vec.empty()) { - LOG(WARNING) << "parameter rowset is null." + LOG(WARNING) << "parameter segment_group is null." << "table=" << full_name(); return OLAP_ERR_INPUT_PARAMETER_ERROR; } - for (Rowset* rowset : index_vec) { - Version version = rowset->version(); + for (SegmentGroup* segment_group : index_vec) { + Version version = segment_group->version(); const std::vector* column_statistics = nullptr; - if (rowset->has_column_statistics()) { - column_statistics = &rowset->get_column_statistics(); + if (segment_group->has_column_statistics()) { + column_statistics = &segment_group->get_column_statistics(); } - res = _header->add_version(version, rowset->version_hash(), rowset->rowset_id(), - rowset->num_segments(), rowset->index_size(), rowset->data_size(), - rowset->num_rows(), rowset->empty(), column_statistics); + res = _header->add_version(version, segment_group->version_hash(), segment_group->segment_group_id(), + segment_group->num_segments(), segment_group->index_size(), segment_group->data_size(), + segment_group->num_rows(), segment_group->empty(), column_statistics); if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to add version to olap header. table=" << full_name() << ", " << "version=" << version.first << "-" << version.second; return res; } - // put the new rowset into _data_sources. + // put the new segment_group into _data_sources. // 由于对header的操作可能失败,因此对_data_sources要放在这里 - _data_sources[version].push_back(rowset); + _data_sources[version].push_back(segment_group); VLOG(3) << "succeed to register data source. table=" << full_name() << ", " << "version=" << version.first << "-" << version.second << ", " - << "version_hash=" << rowset->version_hash() << ", " - << "rowset_id=" << rowset->rowset_id() << ", " - << "num_segments=" << rowset->num_segments(); + << "version_hash=" << segment_group->version_hash() << ", " + << "segment_group_id=" << segment_group->segment_group_id() << ", " + << "num_segments=" << segment_group->num_segments(); } return OLAP_SUCCESS; } -OLAPStatus OLAPTable::unregister_data_source(const Version& version, std::vector* index_vec) { +OLAPStatus OLAPTable::unregister_data_source(const Version& version, std::vector* segment_group_vec) { OLAPStatus res = OLAP_SUCCESS; version_olap_index_map_t::iterator it = _data_sources.find(version); if (it == _data_sources.end()) { - LOG(WARNING) << "olap rowset for version does not exists. [version='" << version.first + LOG(WARNING) << "olap segment_group for version does not exists. [version='" << version.first << "-" << version.second << "' table='" << full_name() << "']"; return OLAP_ERR_VERSION_NOT_EXIST; } @@ -571,7 +570,7 @@ OLAPStatus OLAPTable::unregister_data_source(const Version& version, std::vector return res; } - *index_vec = it->second; + *segment_group_vec = it->second; _data_sources.erase(it); return OLAP_SUCCESS; } @@ -589,33 +588,33 @@ OLAPStatus OLAPTable::add_pending_version(int64_t partition_id, int64_t transact res = save_header(); if (res != OLAP_SUCCESS) { _header->delete_pending_delta(transaction_id); - LOG(FATAL) << "fail to save header when add pending rowset. [table=" << full_name() + LOG(FATAL) << "fail to save header when add pending segment_group. [table=" << full_name() << " transaction_id=" << transaction_id << "]"; return res; } return OLAP_SUCCESS; } -OLAPStatus OLAPTable::add_pending_rowset(Rowset* rowset) { - if (rowset == nullptr) { - LOG(WARNING) << "parameter rowset is null. [table=" << full_name() << "]"; +OLAPStatus OLAPTable::add_pending_segment_group(SegmentGroup* segment_group) { + if (segment_group == nullptr) { + LOG(WARNING) << "parameter segment_group is null. [table=" << full_name() << "]"; return OLAP_ERR_INPUT_PARAMETER_ERROR; } - int64_t transaction_id = rowset->transaction_id(); + int64_t transaction_id = segment_group->transaction_id(); obtain_header_wrlock(); OLAPStatus res = OLAP_SUCCESS; // add to header const std::vector* column_statistics = nullptr; - if (rowset->has_column_statistics()) { - column_statistics = &(rowset->get_column_statistics()); + if (segment_group->has_column_statistics()) { + column_statistics = &(segment_group->get_column_statistics()); } - res = _header->add_pending_rowset(transaction_id, rowset->num_segments(), - rowset->rowset_id(), rowset->load_id(), - rowset->empty(), column_statistics); + res = _header->add_pending_segment_group(transaction_id, segment_group->num_segments(), + segment_group->segment_group_id(), segment_group->load_id(), + segment_group->empty(), column_statistics); if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to add pending rowset to header. [table=" << full_name() + LOG(WARNING) << "fail to add pending segment_group to header. [table=" << full_name() << " transaction_id=" << transaction_id << "]"; release_header_lock(); return res; @@ -625,14 +624,14 @@ OLAPStatus OLAPTable::add_pending_rowset(Rowset* rowset) { res = save_header(); if (res != OLAP_SUCCESS) { _header->delete_pending_delta(transaction_id); - LOG(FATAL) << "fail to save header when add pending rowset. [table=" << full_name() + LOG(FATAL) << "fail to save header when add pending segment_group. [table=" << full_name() << " transaction_id=" << transaction_id << "]"; release_header_lock(); return res; } // add to data sources - _pending_data_sources[transaction_id].push_back(rowset); + _pending_data_sources[transaction_id].push_back(segment_group); release_header_lock(); VLOG(3) << "add pending data to tablet successfully." << "table=" << full_name() << ", transaction_id=" << transaction_id; @@ -640,27 +639,27 @@ OLAPStatus OLAPTable::add_pending_rowset(Rowset* rowset) { return res; } -int32_t OLAPTable::current_pending_rowset_id(int64_t transaction_id) { +int32_t OLAPTable::current_pending_segment_group_id(int64_t transaction_id) { ReadLock rdlock(&_header_lock); - int32_t rowset_id = -1; + int32_t segment_group_id = -1; if (_pending_data_sources.find(transaction_id) != _pending_data_sources.end()) { - for (Rowset* rowset : _pending_data_sources[transaction_id]) { - if (rowset->rowset_id() > rowset_id) { - rowset_id = rowset->rowset_id(); + for (SegmentGroup* segment_group : _pending_data_sources[transaction_id]) { + if (segment_group->segment_group_id() > segment_group_id) { + segment_group_id = segment_group->segment_group_id(); } } } - return rowset_id; + return segment_group_id; } -OLAPStatus OLAPTable::add_pending_data(Rowset* rowset, const std::vector* delete_conditions) { - if (rowset == nullptr) { - LOG(WARNING) << "parameter rowset is null. table=" << full_name(); +OLAPStatus OLAPTable::add_pending_data(SegmentGroup* segment_group, const std::vector* delete_conditions) { + if (segment_group == nullptr) { + LOG(WARNING) << "parameter segment_group is null. table=" << full_name(); return OLAP_ERR_INPUT_PARAMETER_ERROR; } obtain_header_wrlock(); - int64_t transaction_id = rowset->transaction_id(); + int64_t transaction_id = segment_group->transaction_id(); if (_pending_data_sources.find(transaction_id) != _pending_data_sources.end()) { LOG(WARNING) << "find pending data existed when add to tablet. [table=" << full_name() << " transaction_id=" << transaction_id << "]"; @@ -679,9 +678,9 @@ OLAPStatus OLAPTable::add_pending_data(Rowset* rowset, const std::vectoradd_pending_version(rowset->partition_id(), transaction_id, &condition_strs); + res = _header->add_pending_version(segment_group->partition_id(), transaction_id, &condition_strs); } else { - res = _header->add_pending_version(rowset->partition_id(), transaction_id, nullptr); + res = _header->add_pending_version(segment_group->partition_id(), transaction_id, nullptr); } if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to add pending delta to header." @@ -693,14 +692,14 @@ OLAPStatus OLAPTable::add_pending_data(Rowset* rowset, const std::vector* column_statistics = nullptr; - if (rowset->has_column_statistics()) { - column_statistics = &(rowset->get_column_statistics()); + if (segment_group->has_column_statistics()) { + column_statistics = &(segment_group->get_column_statistics()); } - res = _header->add_pending_rowset(transaction_id, rowset->num_segments(), - rowset->rowset_id(), rowset->load_id(), - rowset->empty(), column_statistics); + res = _header->add_pending_segment_group(transaction_id, segment_group->num_segments(), + segment_group->segment_group_id(), segment_group->load_id(), + segment_group->empty(), column_statistics); if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to add pending rowset to header. [table=" << full_name() + LOG(WARNING) << "fail to add pending segment_group to header. [table=" << full_name() << " transaction_id=" << transaction_id << "]"; release_header_lock(); return res; @@ -710,14 +709,14 @@ OLAPStatus OLAPTable::add_pending_data(Rowset* rowset, const std::vectordelete_pending_delta(transaction_id); - LOG(FATAL) << "fail to save header when add pending rowset. [table=" << full_name() + LOG(FATAL) << "fail to save header when add pending segment_group. [table=" << full_name() << " transaction_id=" << transaction_id << "]"; release_header_lock(); return res; } // add to data sources - _pending_data_sources[transaction_id].push_back(rowset); + _pending_data_sources[transaction_id].push_back(segment_group); release_header_lock(); VLOG(3) << "add pending data to tablet successfully." << "table=" << full_name() << ", transaction_id=" << transaction_id; @@ -740,9 +739,9 @@ void OLAPTable::delete_pending_data(int64_t transaction_id) { } // delete from data sources - for (Rowset* rowset : it->second) { - rowset->release(); - OLAPEngine::get_instance()->add_unused_index(rowset); + for (SegmentGroup* segment_group : it->second) { + segment_group->release(); + OLAPEngine::get_instance()->add_unused_index(segment_group); } _pending_data_sources.erase(it); @@ -780,41 +779,42 @@ void OLAPTable::load_pending_data() { << "pending_delta size=" << _header->pending_delta_size(); MutexLock load_lock(&_load_lock); - // if a olap rowset loads failed, delete it from header + // if a olap segment_group loads failed, delete it from header std::set error_pending_data; for (const PPendingDelta& pending_delta : _header->pending_delta()) { - for (const PPendingRowSet& pending_rowset : pending_delta.pending_rowset()) { - Rowset* rowset = new Rowset(this, false, pending_rowset.pending_rowset_id(), - pending_rowset.num_segments(), true, - pending_delta.partition_id(), pending_delta.transaction_id()); - DCHECK(rowset != nullptr); - rowset->set_load_id(pending_rowset.load_id()); - if (pending_rowset.has_empty()) { - rowset->set_empty(pending_rowset.empty()); + for (const PPendingSegmentGroup& pending_segment_group : pending_delta.pending_segment_group()) { + SegmentGroup* segment_group = new SegmentGroup(this, false, + pending_segment_group.pending_segment_group_id(), + pending_segment_group.num_segments(), true, + pending_delta.partition_id(), pending_delta.transaction_id()); + DCHECK(segment_group != nullptr); + segment_group->set_load_id(pending_segment_group.load_id()); + if (pending_segment_group.has_empty()) { + segment_group->set_empty(pending_segment_group.empty()); } - _pending_data_sources[rowset->transaction_id()].push_back(rowset); + _pending_data_sources[segment_group->transaction_id()].push_back(segment_group); - if (rowset->validate() != OLAP_SUCCESS) { - LOG(WARNING) << "fail to validate rowset when load pending data." + if (segment_group->validate() != OLAP_SUCCESS) { + LOG(WARNING) << "fail to validate segment_group when load pending data." << "table=" << full_name() << ", " - << "transaction_id=" << rowset->transaction_id(); - error_pending_data.insert(rowset->transaction_id()); + << "transaction_id=" << segment_group->transaction_id(); + error_pending_data.insert(segment_group->transaction_id()); break; } - if (pending_rowset.column_pruning_size() != 0) { - if (_num_key_fields != pending_rowset.column_pruning_size()) { + if (pending_segment_group.column_pruning_size() != 0) { + if (_num_key_fields != pending_segment_group.column_pruning_size()) { LOG(WARNING) << "column pruning size is error when load pending data." - << "column_pruning_size=" << pending_rowset.column_pruning_size() << ", " + << "column_pruning_size=" << pending_segment_group.column_pruning_size() << ", " << "num_key_fields=" << _num_key_fields; - error_pending_data.insert(rowset->transaction_id()); + error_pending_data.insert(segment_group->transaction_id()); break; } std::vector> column_statistics_string(_num_key_fields); std::vector null_vec(_num_key_fields); for (size_t j = 0; j < _num_key_fields; ++j) { - ColumnPruning column_pruning = pending_rowset.column_pruning(j); + ColumnPruning column_pruning = pending_segment_group.column_pruning(j); column_statistics_string[j].first = column_pruning.min(); column_statistics_string[j].second = column_pruning.max(); if (column_pruning.has_null_flag()) { @@ -824,15 +824,15 @@ void OLAPTable::load_pending_data() { } } - if (rowset->add_column_statistics(column_statistics_string, null_vec) != OLAP_SUCCESS) { + if (segment_group->add_column_statistics(column_statistics_string, null_vec) != OLAP_SUCCESS) { LOG(WARNING) << "fail to set column statistics when load pending data"; error_pending_data.insert(pending_delta.transaction_id()); break; } } - if (rowset->load() != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load rowset when load pending data." + if (segment_group->load() != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load segment_group when load pending data." << "table=" << full_name() << ", transaction_id=" << pending_delta.transaction_id(); error_pending_data.insert(pending_delta.transaction_id()); break; @@ -840,7 +840,7 @@ void OLAPTable::load_pending_data() { OLAPStatus add_status = OLAPEngine::get_instance()->add_transaction( pending_delta.partition_id(), pending_delta.transaction_id(), - _tablet_id, _schema_hash, pending_rowset.load_id()); + _tablet_id, _schema_hash, pending_segment_group.load_id()); if (add_status != OLAP_SUCCESS) { LOG(WARNING) << "find transaction exists in engine when load pending data. [table=" << full_name() @@ -880,25 +880,25 @@ OLAPStatus OLAPTable::publish_version(int64_t transaction_id, Version version, return OLAP_ERR_TRANSACTION_NOT_EXIST; } RETURN_NOT_OK(_handle_existed_version(transaction_id, version, version_hash)); - std::vector index_vec; + std::vector index_vec; vector linked_files; OLAPStatus res = OLAP_SUCCESS; - for (Rowset* rowset : _pending_data_sources[transaction_id]) { - int32_t rowset_id = rowset->rowset_id(); - for (int32_t seg_id = 0; seg_id < rowset->num_segments(); ++seg_id) { - std::string pending_index_path = rowset->construct_index_file_path(rowset_id, seg_id); - std::string index_path = construct_index_file_path(version, version_hash, rowset_id, seg_id); + for (SegmentGroup* segment_group : _pending_data_sources[transaction_id]) { + int32_t segment_group_id = segment_group->segment_group_id(); + for (int32_t seg_id = 0; seg_id < segment_group->num_segments(); ++seg_id) { + std::string pending_index_path = segment_group->construct_index_file_path(segment_group_id, seg_id); + std::string index_path = construct_index_file_path(version, version_hash, segment_group_id, seg_id); res = _create_hard_link(pending_index_path, index_path, &linked_files); if (res != OLAP_SUCCESS) { remove_files(linked_files); return res; } - std::string pending_data_path = rowset->construct_data_file_path(rowset_id, seg_id); - std::string data_path = construct_data_file_path(version, version_hash, rowset_id, seg_id); + std::string pending_data_path = segment_group->construct_data_file_path(segment_group_id, seg_id); + std::string data_path = construct_data_file_path(version, version_hash, segment_group_id, seg_id); res = _create_hard_link(pending_data_path, data_path, &linked_files); if (res != OLAP_SUCCESS) { remove_files(linked_files); return res; } } - rowset->publish_version(version, version_hash); - index_vec.push_back(rowset); + segment_group->publish_version(version, version_hash); + index_vec.push_back(segment_group); } res = register_data_source(index_vec); @@ -923,7 +923,7 @@ OLAPStatus OLAPTable::publish_version(int64_t transaction_id, Version version, LOG(FATAL) << "fail to save header when publish version. res=" << res << ", " << "table=" << full_name() << ", " << "transaction_id=" << transaction_id; - std::vector delete_index_vec; + std::vector delete_index_vec; // if failed, clear new data unregister_data_source(version, &delete_index_vec); _delete_incremental_data(version, version_hash); @@ -940,9 +940,9 @@ OLAPStatus OLAPTable::publish_version(int64_t transaction_id, Version version, << "transaction_id=" << transaction_id; return res; } - for (Rowset* rowset : _pending_data_sources[transaction_id]) { - rowset->delete_all_files(); - rowset->set_pending_finished(); + for (SegmentGroup* segment_group : _pending_data_sources[transaction_id]) { + segment_group->delete_all_files(); + segment_group->set_pending_finished(); } _pending_data_sources.erase(transaction_id); @@ -989,8 +989,8 @@ OLAPStatus OLAPTable::_handle_existed_version(int64_t transaction_id, const Vers } } // delete local data - //Rowset *existed_index = NULL; - std::vector existed_index_vec; + //SegmentGroup *existed_index = NULL; + std::vector existed_index_vec; _delete_incremental_data(version, version_hash); res = unregister_data_source(version, &existed_index_vec); if (res != OLAP_SUCCESS) { @@ -1004,11 +1004,11 @@ OLAPStatus OLAPTable::_handle_existed_version(int64_t transaction_id, const Vers LOG(FATAL) << "fail to save header when unregister data. [tablet=" << full_name() << " transaction_id=" << transaction_id << "]"; } - // use OLAPEngine to delete this rowset + // use OLAPEngine to delete this segment_group if (!existed_index_vec.empty()) { OLAPEngine *unused_index = OLAPEngine::get_instance(); - for (Rowset* rowset : existed_index_vec) { - unused_index->add_unused_index(rowset); + for (SegmentGroup* segment_group : existed_index_vec) { + unused_index->add_unused_index(segment_group); } } // if version_hash is same or version is merged, publish success @@ -1020,50 +1020,50 @@ OLAPStatus OLAPTable::_handle_existed_version(int64_t transaction_id, const Vers return res; } -OLAPStatus OLAPTable::_add_incremental_data(std::vector& index_vec, int64_t transaction_id, +OLAPStatus OLAPTable::_add_incremental_data(std::vector& index_vec, int64_t transaction_id, const Version& version, const VersionHash& version_hash) { if (index_vec.empty()) { LOG(WARNING) << "no parameter when add incremental data. table=" << full_name(); return OLAP_ERR_INPUT_PARAMETER_ERROR; } - // create incremental rowset's dir + // create incremental segment_group's dir std::string dir_path = construct_incremental_delta_dir_path(); OLAPStatus res = OLAP_SUCCESS; if (!check_dir_existed(dir_path)) { res = create_dirs(dir_path); if (res != OLAP_SUCCESS && !check_dir_existed(dir_path)) { - LOG(WARNING) << "fail to create rowset dir. table=" << full_name() << ", " + LOG(WARNING) << "fail to create segment_group dir. table=" << full_name() << ", " << " transaction_id=" << transaction_id; return res; } } std::vector linked_files; - for (Rowset* rowset : index_vec) { - for (int32_t seg_id = 0; seg_id < rowset->num_segments(); ++seg_id) { - int32_t rowset_id = rowset->rowset_id(); - std::string index_path = rowset->construct_index_file_path(rowset_id, seg_id); + for (SegmentGroup* segment_group : index_vec) { + for (int32_t seg_id = 0; seg_id < segment_group->num_segments(); ++seg_id) { + int32_t segment_group_id = segment_group->segment_group_id(); + std::string index_path = segment_group->construct_index_file_path(segment_group_id, seg_id); std::string incremental_index_path = - construct_incremental_index_file_path(version, version_hash, rowset_id, seg_id); + construct_incremental_index_file_path(version, version_hash, segment_group_id, seg_id); res = _create_hard_link(index_path, incremental_index_path, &linked_files); if (res != OLAP_SUCCESS) { remove_files(linked_files); return res; } - std::string data_path = rowset->construct_data_file_path(rowset_id, seg_id); + std::string data_path = segment_group->construct_data_file_path(segment_group_id, seg_id); std::string incremental_data_path = - construct_incremental_data_file_path(version, version_hash, rowset_id, seg_id); + construct_incremental_data_file_path(version, version_hash, segment_group_id, seg_id); res = _create_hard_link(data_path, incremental_data_path, &linked_files); if (res != OLAP_SUCCESS) { remove_files(linked_files); return res; } } const std::vector* column_statistics = nullptr; - if (rowset->has_column_statistics()) { - column_statistics = &(rowset->get_column_statistics()); + if (segment_group->has_column_statistics()) { + column_statistics = &(segment_group->get_column_statistics()); } res = _header->add_incremental_version( - rowset->version(), rowset->version_hash(), - rowset->rowset_id(), rowset->num_segments(), - rowset->index_size(), rowset->data_size(), - rowset->num_rows(), rowset->empty(), column_statistics); + segment_group->version(), segment_group->version_hash(), + segment_group->segment_group_id(), segment_group->num_segments(), + segment_group->index_size(), segment_group->data_size(), + segment_group->num_rows(), segment_group->empty(), column_statistics); if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to add incremental data. res=" << res << ", " << "table=" << full_name() << ", " @@ -1086,7 +1086,7 @@ void OLAPTable::delete_expire_incremental_data() { if (diff >= config::incremental_delta_expire_time_sec) { Version version(it.start_version(), it.end_version()); expire_versions.push_back(std::make_pair(version, it.version_hash())); - VLOG(3) << "find expire incremental rowset. tablet=" << full_name() << ", " + VLOG(3) << "find expire incremental segment_group. tablet=" << full_name() << ", " << "version=" << it.start_version() << "-" << it.end_version() << ", " << "exist_sec=" << diff; } @@ -1108,15 +1108,15 @@ void OLAPTable::_delete_incremental_data(const Version& version, const VersionHa if (incremental_delta == nullptr) { return; } vector files_to_delete; - for (const PRowSet& prowset : incremental_delta->rowset()) { - int32_t rowset_id = prowset.rowset_id(); - for (int seg_id = 0; seg_id < prowset.num_segments(); seg_id++) { + for (const PSegmentGroup& psegment_group : incremental_delta->segment_group()) { + int32_t segment_group_id = psegment_group.segment_group_id(); + for (int seg_id = 0; seg_id < psegment_group.num_segments(); seg_id++) { std::string incremental_index_path = - construct_incremental_index_file_path(version, version_hash, rowset_id, seg_id); + construct_incremental_index_file_path(version, version_hash, segment_group_id, seg_id); files_to_delete.emplace_back(incremental_index_path); std::string incremental_data_path = - construct_incremental_data_file_path(version, version_hash, rowset_id, seg_id); + construct_incremental_data_file_path(version, version_hash, segment_group_id, seg_id); files_to_delete.emplace_back(incremental_data_path); } } @@ -1189,7 +1189,7 @@ OLAPStatus OLAPTable::is_push_for_delete( const PPendingDelta* pending_delta = _header->get_pending_delta(transaction_id); if (pending_delta == nullptr) { - LOG(WARNING) << "pending rowset not found when check push for delete. [table=" << full_name() + LOG(WARNING) << "pending segment_group not found when check push for delete. [table=" << full_name() << " transaction_id=" << transaction_id << "]"; return OLAP_ERR_TRANSACTION_NOT_EXIST; } @@ -1197,50 +1197,50 @@ OLAPStatus OLAPTable::is_push_for_delete( return OLAP_SUCCESS; } -Rowset* OLAPTable::_construct_index_from_version(const PDelta* delta, int32_t rowset_id) { - VLOG(3) << "begin to construct rowset from version." +SegmentGroup* OLAPTable::_construct_segment_group_from_version(const PDelta* delta, int32_t segment_group_id) { + VLOG(3) << "begin to construct segment_group from version." << "table=" << full_name() << ", " << "version=" << delta->start_version() << "-" << delta->end_version() << ", " << "version_hash=" << delta->version_hash(); Version version(delta->start_version(), delta->end_version()); - const PRowSet* prowset = nullptr; - if (rowset_id == -1) { - // Previous FileVersionMessage will be convert to PDelta and PRowset. - // In PRowset, this is rowset_id is set to minus one. - // When to get it, should used rowset + 1 as index. - prowset = &(delta->rowset().Get(rowset_id + 1)); + const PSegmentGroup* psegment_group = nullptr; + if (segment_group_id == -1) { + // Previous FileVersionMessage will be convert to PDelta and PSegmentGroup. + // In PSegmentGroup, this is segment_group_id is set to minus one. + // When to get it, should used segment_group + 1 as index. + psegment_group = &(delta->segment_group().Get(segment_group_id + 1)); } else { - prowset = &(delta->rowset().Get(rowset_id)); + psegment_group = &(delta->segment_group().Get(segment_group_id)); } - Rowset* rowset = new Rowset(this, version, delta->version_hash(), - false, rowset_id, prowset->num_segments()); - if (prowset->has_empty()) { - rowset->set_empty(prowset->empty()); + SegmentGroup* segment_group = new SegmentGroup(this, version, delta->version_hash(), + false, segment_group_id, psegment_group->num_segments()); + if (psegment_group->has_empty()) { + segment_group->set_empty(psegment_group->empty()); } - DCHECK(rowset != nullptr) << "malloc error when construct rowset." + DCHECK(segment_group != nullptr) << "malloc error when construct segment_group." << "table=" << full_name() << ", " << "version=" << version.first << "-" << version.second << ", " << "version_hash=" << delta->version_hash(); - OLAPStatus res = rowset->validate(); + OLAPStatus res = segment_group->validate(); if (res != OLAP_SUCCESS) { - SAFE_DELETE(rowset); + SAFE_DELETE(segment_group); return nullptr; } - if (prowset->column_pruning_size() != 0) { - if (_num_key_fields != prowset->column_pruning_size()) { + if (psegment_group->column_pruning_size() != 0) { + if (_num_key_fields != psegment_group->column_pruning_size()) { LOG(WARNING) << "column pruning size error, " << "table=" << full_name() << ", " << "version=" << version.first << "-" << version.second << ", " << "version_hash=" << delta->version_hash() << ", " - << "column_pruning_size=" << prowset->column_pruning_size() << ", " + << "column_pruning_size=" << psegment_group->column_pruning_size() << ", " << "num_key_fields=" << _num_key_fields; - SAFE_DELETE(rowset); + SAFE_DELETE(segment_group); return nullptr; } vector> column_statistic_strings(_num_key_fields); std::vector null_vec(_num_key_fields); for (size_t j = 0; j < _num_key_fields; ++j) { - ColumnPruning column_pruning = prowset->column_pruning(j); + ColumnPruning column_pruning = psegment_group->column_pruning(j); column_statistic_strings[j].first = column_pruning.min(); column_statistic_strings[j].second = column_pruning.max(); if (column_pruning.has_null_flag()) { @@ -1250,27 +1250,27 @@ Rowset* OLAPTable::_construct_index_from_version(const PDelta* delta, int32_t ro } } - res = rowset->add_column_statistics(column_statistic_strings, null_vec); + res = segment_group->add_column_statistics(column_statistic_strings, null_vec); if (res != OLAP_SUCCESS) { - SAFE_DELETE(rowset); + SAFE_DELETE(segment_group); return nullptr; } } - res = rowset->load(); + res = segment_group->load(); if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load rowset. res=" << res << ", " + LOG(WARNING) << "fail to load segment_group. res=" << res << ", " << "table=" << full_name() << ", " << "version=" << version.first << "-" << version.second << ", " << "version_hash=" << delta->version_hash(); - SAFE_DELETE(rowset); + SAFE_DELETE(segment_group); return nullptr; } - VLOG(3) << "finish to construct rowset from version." + VLOG(3) << "finish to construct segment_group from version." << "table=" << full_name() << ", " << "version=" << version.first << "-" << version.second; - return rowset; + return segment_group; } OLAPStatus OLAPTable::_create_hard_link(const string& from, const string& to, @@ -1319,31 +1319,31 @@ OLAPStatus OLAPTable::clone_data(const OLAPHeader& clone_header, Version version(clone_delta->start_version(), clone_delta->end_version()); - // construct new rowset - for (const PRowSet& prowset : clone_delta->rowset()) { - Rowset* tmp_index = _construct_index_from_version(clone_delta, prowset.rowset_id()); - if (tmp_index == NULL) { - LOG(WARNING) << "fail to construct rowset when clone data. table=" << full_name() << ", " + // construct new segment_group + for (const PSegmentGroup& psegment_group : clone_delta->segment_group()) { + SegmentGroup* tmp_segment_group = _construct_segment_group_from_version(clone_delta, psegment_group.segment_group_id()); + if (tmp_segment_group == NULL) { + LOG(WARNING) << "fail to construct segment_group when clone data. table=" << full_name() << ", " << "version=" << version.first << "-" << version.second << ", " << "version_hash=" << clone_delta->version_hash(); res = OLAP_ERR_INDEX_LOAD_ERROR; break; } - tmp_data_sources[version].push_back(tmp_index); + tmp_data_sources[version].push_back(tmp_segment_group); // add version to new local header const std::vector* column_statistics = nullptr; - if (tmp_index->has_column_statistics()) { - column_statistics = &(tmp_index->get_column_statistics()); + if (tmp_segment_group->has_column_statistics()) { + column_statistics = &(tmp_segment_group->get_column_statistics()); } - res = new_local_header.add_version(version, tmp_index->version_hash(), - tmp_index->rowset_id(), - tmp_index->num_segments(), - tmp_index->index_size(), - tmp_index->data_size(), - tmp_index->num_rows(), - tmp_index->empty(), + res = new_local_header.add_version(version, tmp_segment_group->version_hash(), + tmp_segment_group->segment_group_id(), + tmp_segment_group->num_segments(), + tmp_segment_group->index_size(), + tmp_segment_group->data_size(), + tmp_segment_group->num_rows(), + tmp_segment_group->empty(), column_statistics); if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to add version to new local header when clone." @@ -1401,27 +1401,27 @@ OLAPStatus OLAPTable::clone_data(const OLAPHeader& clone_header, for (const Version& version_to_delete : versions_to_delete) { version_olap_index_map_t::iterator it = _data_sources.find(version_to_delete); if (it != _data_sources.end()) { - std::vector index_to_delete_vec = it->second; + std::vector index_to_delete_vec = it->second; _data_sources.erase(it); OLAPEngine* unused_index = OLAPEngine::get_instance(); - for (Rowset* rowset : index_to_delete_vec) { - unused_index->add_unused_index(rowset); + for (SegmentGroup* segment_group : index_to_delete_vec) { + unused_index->add_unused_index(segment_group); } } } // add new data source for (auto& it : tmp_data_sources) { - for (Rowset* rowset : it.second) { - _data_sources[rowset->version()].push_back(rowset); + for (SegmentGroup* segment_group : it.second) { + _data_sources[segment_group->version()].push_back(segment_group); } } // clear tmp indices if failed } else { for (auto& it : tmp_data_sources) { - for (Rowset* rowset : it.second) { - SAFE_DELETE(rowset); + for (SegmentGroup* segment_group : it.second) { + SAFE_DELETE(segment_group); } } } @@ -1433,8 +1433,8 @@ OLAPStatus OLAPTable::clone_data(const OLAPHeader& clone_header, } OLAPStatus OLAPTable::replace_data_sources(const vector* old_versions, - const vector* new_data_sources, - vector* old_data_sources) { + const vector* new_data_sources, + vector* old_data_sources) { OLAPStatus res = OLAP_SUCCESS; if (old_versions == NULL || new_data_sources == NULL) { @@ -1449,14 +1449,14 @@ OLAPStatus OLAPTable::replace_data_sources(const vector* old_versions, it != old_versions->end(); ++it) { version_olap_index_map_t::iterator data_source_it = _data_sources.find(*it); if (data_source_it == _data_sources.end()) { - LOG(WARNING) << "olap rowset for version does not exists. [version='" << it->first + LOG(WARNING) << "olap segment_group for version does not exists. [version='" << it->first << "-" << it->second << "' table='" << full_name() << "']"; return OLAP_ERR_VERSION_NOT_EXIST; } } // check new versions not existed - for (vector::const_iterator it = new_data_sources->begin(); + for (vector::const_iterator it = new_data_sources->begin(); it != new_data_sources->end(); ++it) { if (_data_sources.find((*it)->version()) != _data_sources.end()) { bool to_be_deleted = false; @@ -1471,7 +1471,7 @@ OLAPStatus OLAPTable::replace_data_sources(const vector* old_versions, } if (!to_be_deleted) { - LOG(WARNING) << "olap rowset for version exists. [version='" << (*it)->version().first + LOG(WARNING) << "olap segment_group for version exists. [version='" << (*it)->version().first << "-" << (*it)->version().second << "' table='" << full_name() << "']"; return OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR; } @@ -1483,8 +1483,8 @@ OLAPStatus OLAPTable::replace_data_sources(const vector* old_versions, it != old_versions->end(); ++it) { version_olap_index_map_t::iterator data_source_it = _data_sources.find(*it); if (data_source_it != _data_sources.end()) { - for (Rowset* rowset : data_source_it->second) { - old_data_sources->push_back(rowset); + for (SegmentGroup* segment_group : data_source_it->second) { + old_data_sources->push_back(segment_group); } _data_sources.erase(data_source_it); } @@ -1500,7 +1500,7 @@ OLAPStatus OLAPTable::replace_data_sources(const vector* old_versions, << "version=" << it->first << "-" << it->second; } - for (vector::const_iterator it = new_data_sources->begin(); + for (vector::const_iterator it = new_data_sources->begin(); it != new_data_sources->end(); ++it) { _data_sources[(*it)->version()].push_back(*it); @@ -1510,7 +1510,7 @@ OLAPStatus OLAPTable::replace_data_sources(const vector* old_versions, column_statistics = &((*it)->get_column_statistics()); } res = _header->add_version((*it)->version(), (*it)->version_hash(), - (*it)->rowset_id(), (*it)->num_segments(), + (*it)->segment_group_id(), (*it)->num_segments(), (*it)->index_size(), (*it)->data_size(), (*it)->num_rows(), (*it)->empty(), column_statistics); @@ -1539,7 +1539,7 @@ OLAPStatus OLAPTable::compute_all_versions_hash(const vector& versions, version_index != versions.end(); ++version_index) { version_olap_index_map_t::const_iterator temp = _data_sources.find(*version_index); if (temp == _data_sources.end()) { - OLAP_LOG_WARNING("fail to find Rowset." + OLAP_LOG_WARNING("fail to find SegmentGroup." "[start_version=%d; end_version=%d]", version_index->first, version_index->second); @@ -1583,11 +1583,11 @@ OLAPStatus OLAPTable::merge_header(const OLAPHeader& hdr, int to_version) { } Version version = { delta->start_version(), delta->end_version() }; VersionHash v_hash = delta->version_hash(); - for (int j = 0; j < delta->rowset_size(); ++j) { - const PRowSet& rowset = delta->rowset(j); - st = _header->add_version(version, v_hash, rowset.rowset_id(), - rowset.num_segments(), rowset.index_size(), rowset.data_size(), - rowset.num_rows(), rowset.empty(), nullptr); + for (int j = 0; j < delta->segment_group_size(); ++j) { + const PSegmentGroup& psegment_group = delta->segment_group(j); + st = _header->add_version(version, v_hash, psegment_group.segment_group_id(), + psegment_group.num_segments(), psegment_group.index_size(), psegment_group.data_size(), + psegment_group.num_rows(), psegment_group.empty(), nullptr); if (st != OLAP_SUCCESS) { LOG(WARNING) << "failed to add version to header" << ", " << "version=" << version.first << "-" << version.second; @@ -1605,20 +1605,20 @@ OLAPStatus OLAPTable::merge_header(const OLAPHeader& hdr, int to_version) { return OLAP_SUCCESS; } -Rowset* OLAPTable::_get_largest_index() { - Rowset* largest_index = NULL; +SegmentGroup* OLAPTable::_get_largest_index() { + SegmentGroup* largest_index = NULL; size_t largest_index_sizes = 0; for (auto& it : _data_sources) { - // use rowset of base file as target rowset when base is not empty, - // or try to find the biggest rowset. - for (Rowset* rowset : it.second) { - if (rowset->empty() || rowset->zero_num_rows()) { + // use segment_group of base file as target segment_group when base is not empty, + // or try to find the biggest segment_group. + for (SegmentGroup* segment_group : it.second) { + if (segment_group->empty() || segment_group->zero_num_rows()) { continue; } - if (rowset->index_size() > largest_index_sizes) { - largest_index = rowset; - largest_index_sizes = rowset->index_size(); + if (segment_group->index_size() > largest_index_sizes) { + largest_index = segment_group; + largest_index_sizes = segment_group->index_size(); } } } @@ -1693,9 +1693,9 @@ OLAPStatus OLAPTable::split_range( } ReadLock rdlock(get_header_lock_ptr()); - Rowset* base_index = _get_largest_index(); + SegmentGroup* base_index = _get_largest_index(); - // 如果找不到合适的rowset,就直接返回startkey,endkey + // 如果找不到合适的segment_group,就直接返回startkey,endkey if (base_index == NULL) { OLAP_LOG_DEBUG("there is no base file now, may be tablet is empty."); // it may be right if the table is empty, so we return success. @@ -1803,12 +1803,12 @@ void OLAPTable::_list_files_with_suffix(const string& file_suffix, set* string tablet_path_prefix = prefix_stream.str(); for (auto& it : _data_sources) { // every data segment has its file name. - for (Rowset* rowset : it.second) { - for (int32_t seg_id = 0; seg_id < rowset->num_segments(); ++seg_id) { + for (SegmentGroup* segment_group : it.second) { + for (int32_t seg_id = 0; seg_id < segment_group->num_segments(); ++seg_id) { file_names->insert(basename(construct_file_path(tablet_path_prefix, - rowset->version(), - rowset->version_hash(), - rowset->rowset_id(), + segment_group->version(), + segment_group->version_hash(), + segment_group->segment_group_id(), seg_id, file_suffix).c_str())); } @@ -1846,17 +1846,17 @@ void OLAPTable::list_version_entities(vector* version_entities) c // version_entities vector is not sorted. version_olap_index_map_t::const_iterator it; for (it = _data_sources.begin(); it != _data_sources.end(); ++it) { - const std::vector& index_vec = it->second; + const std::vector& index_vec = it->second; VersionEntity version_entity(it->first, index_vec[0]->version_hash()); - for (Rowset* rowset : index_vec) { + for (SegmentGroup* segment_group : index_vec) { const std::vector* column_statistics = nullptr; - if (rowset->has_column_statistics()) { - column_statistics = &(rowset->get_column_statistics()); + if (segment_group->has_column_statistics()) { + column_statistics = &(segment_group->get_column_statistics()); } - RowSetEntity rowset_entity(rowset->rowset_id(), rowset->num_segments(), - rowset->num_rows(), rowset->data_size(), - rowset->index_size(), rowset->empty(), column_statistics); - version_entity.add_rowset_entity(rowset_entity); + SegmentGroupEntity segment_group_entity(segment_group->segment_group_id(), segment_group->num_segments(), + segment_group->num_rows(), segment_group->data_size(), + segment_group->index_size(), segment_group->empty(), column_statistics); + version_entity.add_segment_group_entity(segment_group_entity); } version_entities->push_back(version_entity); } @@ -1871,16 +1871,16 @@ void OLAPTable::delete_all_files() { // remove indices and data files, release related resources. for (vector::const_iterator it = versions.begin(); it != versions.end(); ++it) { - std::vector index_vec; + std::vector index_vec; if (unregister_data_source(*it, &index_vec) != OLAP_SUCCESS) { LOG(WARNING) << "fail to unregister version." << "version=" << it->first << "-" << it->second; return; } - for (Rowset* rowset : index_vec) { - rowset->delete_all_files(); - delete rowset; + for (SegmentGroup* segment_group : index_vec) { + segment_group->delete_all_files(); + delete segment_group; } } @@ -1892,29 +1892,29 @@ void OLAPTable::delete_all_files() { string OLAPTable::construct_index_file_path(const Version& version, VersionHash version_hash, - int32_t rowset_id, + int32_t segment_group_id, int32_t segment) const { stringstream prefix_stream; prefix_stream << _tablet_path << "/" << _tablet_id; string tablet_path_prefix = prefix_stream.str(); - return construct_file_path(tablet_path_prefix, version, version_hash, rowset_id, segment, "idx"); + return construct_file_path(tablet_path_prefix, version, version_hash, segment_group_id, segment, "idx"); } string OLAPTable::construct_data_file_path(const Version& version, VersionHash version_hash, - int32_t rowset_id, + int32_t segment_group_id, int32_t segment) const { stringstream prefix_stream; prefix_stream << _tablet_path << "/" << _tablet_id; string tablet_path_prefix = prefix_stream.str(); - return construct_file_path(tablet_path_prefix, version, version_hash, rowset_id, segment, "dat"); + return construct_file_path(tablet_path_prefix, version, version_hash, segment_group_id, segment, "dat"); } string OLAPTable::construct_file_path(const string& tablet_path_prefix, const Version& version, VersionHash version_hash, - int32_t rowset_id, int32_t segment, + int32_t segment_group_id, int32_t segment, const string& suffix) { char file_path[OLAP_MAX_PATH_LEN]; - if (rowset_id == -1) { + if (segment_group_id == -1) { snprintf(file_path, sizeof(file_path), "%s_%ld_%ld_%ld_%d.%s", @@ -1932,7 +1932,7 @@ string OLAPTable::construct_file_path(const string& tablet_path_prefix, version.first, version.second, version_hash, - rowset_id, segment, + segment_group_id, segment, suffix.c_str()); } @@ -1940,54 +1940,54 @@ string OLAPTable::construct_file_path(const string& tablet_path_prefix, } string OLAPTable::construct_incremental_delta_dir_path() const { - stringstream rowset_dir_path; - rowset_dir_path << _tablet_path << INCREMENTAL_DELTA_PREFIX; + stringstream segment_group_dir_path; + segment_group_dir_path << _tablet_path << INCREMENTAL_DELTA_PREFIX; - return rowset_dir_path.str(); + return segment_group_dir_path.str(); } string OLAPTable::construct_incremental_index_file_path(Version version, VersionHash version_hash, - int32_t rowset_id, int32_t segment) const { - string rowset_dir_path = construct_incremental_delta_dir_path(); - stringstream rowset_file_path; - rowset_file_path << rowset_dir_path << "/" - << construct_file_name(version, version_hash, rowset_id, segment, "idx"); - return rowset_file_path.str(); + int32_t segment_group_id, int32_t segment) const { + string segment_group_dir_path = construct_incremental_delta_dir_path(); + stringstream segment_group_file_path; + segment_group_file_path << segment_group_dir_path << "/" + << construct_file_name(version, version_hash, segment_group_id, segment, "idx"); + return segment_group_file_path.str(); } string OLAPTable::construct_incremental_data_file_path(Version version, VersionHash version_hash, - int32_t rowset_id, int32_t segment) const { - string rowset_dir_path = construct_incremental_delta_dir_path(); - stringstream rowset_file_path; - rowset_file_path << rowset_dir_path << "/" - << construct_file_name(version, version_hash, rowset_id, segment, "dat"); - return rowset_file_path.str(); + int32_t segment_group_id, int32_t segment) const { + string segment_group_dir_path = construct_incremental_delta_dir_path(); + stringstream segment_group_file_path; + segment_group_file_path << segment_group_dir_path << "/" + << construct_file_name(version, version_hash, segment_group_id, segment, "dat"); + return segment_group_file_path.str(); } string OLAPTable::construct_pending_data_dir_path() const { return _tablet_path + PENDING_DELTA_PREFIX; } string OLAPTable::construct_pending_index_file_path(TTransactionId transaction_id, - int32_t rowset_id, int32_t segment) const { + int32_t segment_group_id, int32_t segment) const { string dir_path = construct_pending_data_dir_path(); stringstream file_path; file_path << dir_path << "/" << transaction_id << "_" - << rowset_id << "_" << segment << ".idx"; + << segment_group_id << "_" << segment << ".idx"; return file_path.str(); } string OLAPTable::construct_pending_data_file_path(TTransactionId transaction_id, - int32_t rowset_id, int32_t segment) const { + int32_t segment_group_id, int32_t segment) const { string dir_path = construct_pending_data_dir_path(); stringstream file_path; file_path << dir_path << "/" << transaction_id << "_" - << rowset_id << "_" << segment << ".dat"; + << segment_group_id << "_" << segment << ".dat"; return file_path.str(); } string OLAPTable::construct_file_name(const Version& version, VersionHash version_hash, - int32_t rowset_id, int32_t segment, + int32_t segment_group_id, int32_t segment, const string& suffix) const { char file_name[OLAP_MAX_PATH_LEN]; snprintf(file_name, sizeof(file_name), @@ -1996,7 +1996,7 @@ string OLAPTable::construct_file_name(const Version& version, version.first, version.second, version_hash, - rowset_id, + segment_group_id, segment, suffix.c_str()); @@ -2025,7 +2025,7 @@ size_t OLAPTable::get_field_size(const string& field_name) const { } if (static_cast(res_iterator->second) >= _field_sizes.size()) { - LOG(WARNING) << "invalid field rowset. [name='" << field_name << "']"; + LOG(WARNING) << "invalid field segment_group. [name='" << field_name << "']"; return 0; } @@ -2040,7 +2040,7 @@ size_t OLAPTable::get_return_column_size(const string& field_name) const { } if (static_cast(res_iterator->second) >= _field_sizes.size()) { - LOG(WARNING) << "invalid field rowset. [name='" << field_name << "']"; + LOG(WARNING) << "invalid field segment_group. [name='" << field_name << "']"; return 0; } @@ -2067,8 +2067,8 @@ size_t OLAPTable::get_row_size() const { int64_t OLAPTable::get_data_size() const { int64_t total_size = 0; for (const PDelta& delta : _header->delta()) { - for (const PRowSet& prowset : delta.rowset()) { - total_size += prowset.data_size(); + for (const PSegmentGroup& psegment_group : delta.segment_group()) { + total_size += psegment_group.data_size(); } } @@ -2078,8 +2078,8 @@ int64_t OLAPTable::get_data_size() const { int64_t OLAPTable::get_num_rows() const { int64_t num_rows = 0; for (const PDelta& delta : _header->delta()) { - for (const PRowSet& prowset : delta.rowset()) { - num_rows += prowset.num_rows(); + for (const PSegmentGroup& psegment_group : delta.segment_group()) { + num_rows += psegment_group.num_rows(); } } @@ -2190,35 +2190,35 @@ bool OLAPTable::is_used() { } VersionEntity OLAPTable::get_version_entity_by_version(const Version& version) { - std::vector& index_vec = _data_sources[version]; + std::vector& index_vec = _data_sources[version]; VersionEntity version_entity(version, index_vec[0]->version_hash()); - for (Rowset* rowset : index_vec) { + for (SegmentGroup* segment_group : index_vec) { const std::vector* column_statistics = nullptr; - if (rowset->has_column_statistics()) { - column_statistics = &(rowset->get_column_statistics()); + if (segment_group->has_column_statistics()) { + column_statistics = &(segment_group->get_column_statistics()); } - RowSetEntity rowset_entity(rowset->rowset_id(), rowset->num_segments(), - rowset->num_rows(), rowset->data_size(), - rowset->index_size(), rowset->empty(), column_statistics); - version_entity.add_rowset_entity(rowset_entity); + SegmentGroupEntity segment_group_entity(segment_group->segment_group_id(), segment_group->num_segments(), + segment_group->num_rows(), segment_group->data_size(), + segment_group->index_size(), segment_group->empty(), column_statistics); + version_entity.add_segment_group_entity(segment_group_entity); } return version_entity; } size_t OLAPTable::get_version_index_size(const Version& version) { - std::vector& index_vec = _data_sources[version]; + std::vector& index_vec = _data_sources[version]; size_t index_size = 0; - for (Rowset* rowset : index_vec) { - index_size += rowset->index_size(); + for (SegmentGroup* segment_group : index_vec) { + index_size += segment_group->index_size(); } return index_size; } size_t OLAPTable::get_version_data_size(const Version& version) { - std::vector& index_vec = _data_sources[version]; + std::vector& index_vec = _data_sources[version]; size_t data_size = 0; - for (Rowset* rowset : index_vec) { - data_size += rowset->data_size(); + for (SegmentGroup* segment_group : index_vec) { + data_size += segment_group->data_size(); } return data_size; } @@ -2231,33 +2231,33 @@ OLAPStatus OLAPTable::recover_tablet_until_specfic_version( get_missing_versions_with_header_locked(until_version, &missing_versions); } - std::vector rowset_vec; + std::vector segment_group_vec; OLAPStatus res = OLAP_SUCCESS; for (Version& missing_version : missing_versions) { - Rowset* rowset = new Rowset(this, missing_version, version_hash, false, 0, 0); - rowset->set_empty(true); - ColumnDataWriter* writer = ColumnDataWriter::create(std::shared_ptr(this), rowset, true); + SegmentGroup* segment_group = new SegmentGroup(this, missing_version, version_hash, false, 0, 0); + segment_group->set_empty(true); + ColumnDataWriter* writer = ColumnDataWriter::create(std::shared_ptr(this), segment_group, true); if (res != OLAP_SUCCESS) { break; } res = writer->finalize(); if (res != OLAP_SUCCESS) { break; } - rowset_vec.push_back(rowset); + segment_group_vec.push_back(segment_group); } if (res != OLAP_SUCCESS) { - for (Rowset* rowset : rowset_vec) { - rowset->delete_all_files(); - SAFE_DELETE(rowset); + for (SegmentGroup* segment_group : segment_group_vec) { + segment_group->delete_all_files(); + SAFE_DELETE(segment_group); } } else { - for (Rowset* rowset : rowset_vec) { - rowset->load(); + for (SegmentGroup* segment_group : segment_group_vec) { + segment_group->load(); } } { WriteLock wrlock(&_header_lock); - RETURN_NOT_OK(register_data_source(rowset_vec)); + RETURN_NOT_OK(register_data_source(segment_group_vec)); RETURN_NOT_OK(save_header()); } return OLAP_SUCCESS; diff --git a/be/src/olap/olap_table.h b/be/src/olap/olap_table.h index d823d0861daee3..726aadc732927d 100644 --- a/be/src/olap/olap_table.h +++ b/be/src/olap/olap_table.h @@ -38,7 +38,7 @@ namespace doris { class FieldInfo; class ColumnData; class OLAPHeader; -class Rowset; +class SegmentGroup; class OLAPTable; class RowBlockPosition; class OlapStore; @@ -159,20 +159,20 @@ class OLAPTable : public std::enable_shared_from_this { // Registers a newly created data source, making it available for // querying. Adds a reference to the data source in the header file. - OLAPStatus register_data_source(const std::vector& index_vec); + OLAPStatus register_data_source(const std::vector& segment_group_vec); // Unregisters the data source for given version, frees up resources. // resources include memory, files. - // After unregister, index will point to the associated Rowset. - OLAPStatus unregister_data_source(const Version& version, std::vector* index_vec); + // After unregister, segment_group will point to the associated SegmentGroup. + OLAPStatus unregister_data_source(const Version& version, std::vector* segment_group_vec); // if pending data is push_for_delete, delete conditions is not null OLAPStatus add_pending_version(int64_t partition_id, int64_t transaction_id, const std::vector* delete_conditions); - OLAPStatus add_pending_rowset(Rowset* index); - int32_t current_pending_rowset_id(int64_t transaction_id); + OLAPStatus add_pending_segment_group(SegmentGroup* segment_group); + int32_t current_pending_segment_group_id(int64_t transaction_id); - OLAPStatus add_pending_data(Rowset* index, const std::vector* delete_conditions); + OLAPStatus add_pending_data(SegmentGroup* segment_group, const std::vector* delete_conditions); bool has_pending_data(int64_t transaction_id); @@ -209,8 +209,8 @@ class OLAPTable : public std::enable_shared_from_this { // Atomically replaces one set of data sources with another. Returns // true on success. OLAPStatus replace_data_sources(const std::vector* old_versions, - const std::vector* new_data_sources, - std::vector* old_data_sources); + const std::vector* new_data_sources, + std::vector* old_data_sources); // Computes the cumulative hash for given versions. // Only use Base file and Delta files to compute for simplicity and @@ -310,7 +310,7 @@ class OLAPTable : public std::enable_shared_from_this { // DailyWinfoIdeaStats_PRIMARY_20120428_0_200_735382373247_1.idx std::string construct_index_file_path(const Version& version, VersionHash version_hash, - int32_t rowset_id, int32_t segment) const; + int32_t segment_group_id, int32_t segment) const; // Same as construct_index_file_path except that file suffix is .dat // The typical index file path is: @@ -318,29 +318,29 @@ class OLAPTable : public std::enable_shared_from_this { // DailyWinfoIdeaStats_PRIMARY_20120428_0_200_735382373247_1.dat std::string construct_data_file_path(const Version& version, VersionHash version_hash, - int32_t rowset_id, int32_t segment) const; + int32_t segment_group_id, int32_t segment) const; // For index file, suffix is "idx", for data file, suffix is "dat". static std::string construct_file_path(const std::string& tablet_path, const Version& version, VersionHash version_hash, - int32_t rowset_id, int32_t segment, + int32_t segment_group_id, int32_t segment, const std::string& suffix); std::string construct_pending_data_dir_path() const; std::string construct_pending_index_file_path( - TTransactionId transaction_id, int32_t rowset_id, int32_t segment) const; + TTransactionId transaction_id, int32_t segment_group_id, int32_t segment) const; std::string construct_pending_data_file_path( - TTransactionId transaction_id, int32_t rowset_id, int32_t segment) const; + TTransactionId transaction_id, int32_t segment_group_id, int32_t segment) const; std::string construct_incremental_delta_dir_path() const; std::string construct_incremental_index_file_path( - Version version, VersionHash version_hash, int32_t rowset_id, int32_t segment) const; + Version version, VersionHash version_hash, int32_t segment_group_id, int32_t segment) const; std::string construct_incremental_data_file_path( - Version version, VersionHash version_hash, int32_t rowset_id, int32_t segment) const; + Version version, VersionHash version_hash, int32_t segment_group_id, int32_t segment) const; std::string construct_file_name(const Version& version, VersionHash version_hash, - int32_t rowset_id, int32_t segment, + int32_t segment_group_id, int32_t segment, const std::string& suffix) const; std::string construct_dir_path() const; @@ -670,7 +670,7 @@ class OLAPTable : public std::enable_shared_from_this { OLAPStatus recover_tablet_until_specfic_version(const int64_t& until_version, const int64_t& version_hash); private: - // used for hash-struct of hash_map. + // used for hash-struct of hash_map. struct HashOfVersion { uint64_t operator()(const Version& version) const { uint64_t hash_value = version.first; @@ -690,16 +690,16 @@ class OLAPTable : public std::enable_shared_from_this { std::set* file_names) const; // 获取最大的index(只看大小) - Rowset* _get_largest_index(); + SegmentGroup* _get_largest_index(); - Rowset* _construct_index_from_version(const PDelta* delta, int32_t rowset_id); + SegmentGroup* _construct_segment_group_from_version(const PDelta* delta, int32_t segment_group_id); // check if version is same, may delete local data OLAPStatus _handle_existed_version(int64_t transaction_id, const Version& version, const VersionHash& version_hash); // like "9-9" "10-10", for incremental cloning - OLAPStatus _add_incremental_data(std::vector& index_vec, int64_t transaction_id, + OLAPStatus _add_incremental_data(std::vector& index_vec, int64_t transaction_id, const Version& version, const VersionHash& version_hash); void _delete_incremental_data(const Version& version, const VersionHash& version_hash); @@ -718,11 +718,11 @@ class OLAPTable : public std::enable_shared_from_this { std::string _full_name; std::vector _tablet_schema; // field info vector is table schema. - // Version mapping to Rowset. + // Version mapping to SegmentGroup. // data source can be base delta, cumulative delta, singleton delta. - using version_olap_index_map_t = std::unordered_map, HashOfVersion>; + using version_olap_index_map_t = std::unordered_map, HashOfVersion>; version_olap_index_map_t _data_sources; - using transaction_olap_index_map_t = std::unordered_map>; + using transaction_olap_index_map_t = std::unordered_map>; transaction_olap_index_map_t _pending_data_sources; size_t _num_fields; diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 89cb0322064d6d..d6f54cf782f08e 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -280,9 +280,9 @@ OLAPStatus PushHandler::process( continue; } - for (Rowset* rowset : table_var.added_indices) { - rowset->delete_all_files(); - SAFE_DELETE(rowset); + for (SegmentGroup* segment_group : table_var.added_indices) { + segment_group->delete_all_files(); + SAFE_DELETE(segment_group); } } @@ -448,14 +448,14 @@ OLAPStatus PushHandler::process_realtime_push( continue; } - for (Rowset* olap_index : table_var.added_indices) { + for (SegmentGroup* segment_group : table_var.added_indices) { res = table_var.olap_table->add_pending_data( - olap_index, push_type == PUSH_FOR_DELETE ? &request.delete_conditions : NULL); + segment_group, push_type == PUSH_FOR_DELETE ? &request.delete_conditions : NULL); // if pending data exists in tablet, which means push finished if (res == OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST) { - SAFE_DELETE(olap_index); + SAFE_DELETE(segment_group); res = OLAP_SUCCESS; } else if (res != OLAP_SUCCESS) { @@ -498,9 +498,9 @@ OLAPStatus PushHandler::process_realtime_push( table_var.olap_table->tablet_id(), table_var.olap_table->schema_hash()); // actually, olap_index may has been deleted in delete_transaction() - for (Rowset* rowset : table_var.added_indices) { - rowset->release(); - OLAPEngine::get_instance()->add_unused_index(rowset); + for (SegmentGroup* segment_group : table_var.added_indices) { + segment_group->release(); + OLAPEngine::get_instance()->add_unused_index(segment_group); } } } @@ -535,7 +535,7 @@ OLAPStatus PushHandler::_convert( BinaryFile raw_file; IBinaryReader* reader = NULL; ColumnDataWriter* writer = NULL; - Rowset* delta_rowset = NULL; + SegmentGroup* delta_segment_group = NULL; uint32_t num_rows = 0; do { @@ -581,8 +581,8 @@ OLAPStatus PushHandler::_convert( } } - // 2. New Rowset of curr_olap_table for current push - OLAP_LOG_DEBUG("init Rowset."); + // 2. New SegmentGroup of curr_olap_table for current push + OLAP_LOG_DEBUG("init SegmentGroup."); if (_request.__isset.transaction_id) { // create pending data dir @@ -595,11 +595,11 @@ OLAPStatus PushHandler::_convert( } } - delta_rowset = new(std::nothrow) Rowset( + delta_segment_group = new(std::nothrow) SegmentGroup( curr_olap_table.get(), (_request.push_type == TPushType::LOAD_DELETE), 0, 0, true, _request.partition_id, _request.transaction_id); } else { - delta_rowset = new(std::nothrow) Rowset( + delta_segment_group = new(std::nothrow) SegmentGroup( curr_olap_table.get(), Version(_request.version, _request.version), _request.version_hash, @@ -607,20 +607,20 @@ OLAPStatus PushHandler::_convert( 0, 0); } - if (NULL == delta_rowset) { - OLAP_LOG_WARNING("fail to malloc Rowset. [table='%s' size=%ld]", - curr_olap_table->full_name().c_str(), sizeof(Rowset)); + if (NULL == delta_segment_group) { + OLAP_LOG_WARNING("fail to malloc SegmentGroup. [table='%s' size=%ld]", + curr_olap_table->full_name().c_str(), sizeof(SegmentGroup)); res = OLAP_ERR_MALLOC_ERROR; break; } - curr_olap_indices->push_back(delta_rowset); + curr_olap_indices->push_back(delta_segment_group); - // 3. New Writer to write data into Rowset + // 3. New Writer to write data into SegmentGroup OLAP_LOG_DEBUG("init writer. [table='%s' block_row_size=%lu]", curr_olap_table->full_name().c_str(), curr_olap_table->num_rows_per_row_block()); - if (NULL == (writer = ColumnDataWriter::create(curr_olap_table, delta_rowset, true))) { + if (NULL == (writer = ColumnDataWriter::create(curr_olap_table, delta_segment_group, true))) { OLAP_LOG_WARNING("fail to create writer. [table='%s']", curr_olap_table->full_name().c_str()); res = OLAP_ERR_MALLOC_ERROR; @@ -633,7 +633,7 @@ OLAPStatus PushHandler::_convert( break; } - // 5. Read data from raw file and write into Rowset of curr_olap_table + // 5. Read data from raw file and write into SegmentGroup of curr_olap_table if (_request.__isset.http_file_path) { // Convert from raw to delta OLAP_LOG_DEBUG("start to convert row file to delta."); @@ -673,13 +673,13 @@ OLAPStatus PushHandler::_convert( OLAP_LOG_DEBUG("load the index."); - if (OLAP_SUCCESS != (res = delta_rowset->load())) { + if (OLAP_SUCCESS != (res = delta_segment_group->load())) { OLAP_LOG_WARNING("fail to load index. [res=%d table='%s' version=%ld]", res, curr_olap_table->full_name().c_str(), _request.version); break; } - _write_bytes += delta_rowset->data_size(); - _write_rows += delta_rowset->num_rows(); + _write_bytes += delta_segment_group->data_size(); + _write_rows += delta_segment_group->num_rows(); // 7. Convert data for schema change tables OLAP_LOG_TRACE("load to related tables of schema_change if possible. "); diff --git a/be/src/olap/push_handler.h b/be/src/olap/push_handler.h index 920a7d4f8569d2..a8f1769a2b62d2 100644 --- a/be/src/olap/push_handler.h +++ b/be/src/olap/push_handler.h @@ -27,14 +27,14 @@ #include "olap/file_helper.h" #include "olap/merger.h" #include "olap/olap_common.h" -#include "olap/rowset.h" +#include "olap/segment_group.h" #include "olap/row_cursor.h" #include "olap/data_writer.h" namespace doris { typedef std::vector DataSources; -typedef std::vector Indices; +typedef std::vector Indices; class BinaryFile; class BinaryReader; @@ -88,7 +88,7 @@ class PushHandler { Versions* unused_versions); // Convert local data file to internal formatted delta, - // return new delta's Rowset + // return new delta's SegmentGroup OLAPStatus _convert( OLAPTablePtr curr_olap_table, OLAPTablePtr new_olap_table_vec, diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index 23b4f9061fb80f..5df393fb6c9ef8 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -73,7 +73,7 @@ class CollectIterator { } OLAPStatus init() { - auto res = _row_cursor.init(_data->olap_index()->table()->tablet_schema()); + auto res = _row_cursor.init(_data->segment_group()->table()->tablet_schema()); if (res != OLAP_SUCCESS) { LOG(WARNING) << "failed to init row cursor, res=" << res; return res; diff --git a/be/src/olap/rowset_builder.h b/be/src/olap/rowset_builder.h deleted file mode 100644 index 0b49b5949eb7de..00000000000000 --- a/be/src/olap/rowset_builder.h +++ /dev/null @@ -1,65 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "olap/ata_writer.h" - -namespace doris { - -class Rowset; - -class RowsetBuilder : public ColumnDataWriter { -public: - RowsetBuil(OLAPTablePtr table, Rowset* rowset, ColumnDataWriter* writer, bool is_push_write) - : ColumnDataWriter(is_push_write, table), - _rowset(rowset), - _writer(write) { - } - - virtual ~RowSetBuilder() { - } - - OLAPStatus init() override { - return _writer->init(); - } - - OLAPStatus attached_by(RowCursor* row_cursor) override { - return _writer->attached_by(row_cursor); - } - OLAPStatus write(const char* row) override { - return _writer->write(row); - } - OLAPStatus finalize() override { - return _writer->finalize(); - } - uint64_t written_bytes() override { - return _writer->written_bytes(); - } - MemPool* mem_pool() override { - return _writer->mem_pool(); - } - - Rowset* rowset() { return _rowset; } - ColumnDataWriter* writer() { return _writer; } - -private: - Rowset* _rowset; - ColumnDataWriter* _writer; -}; - -} diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 54d35170069699..42391bbe62df7c 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -686,10 +686,10 @@ bool SchemaChangeDirectly::_write_row_block(ColumnDataWriter* writer, RowBlock* return true; } -bool LinkedSchemaChange::process(ColumnData* olap_data, Rowset* new_rowset) { - for (size_t i = 0; i < olap_data->olap_index()->num_segments(); ++i) { - string index_path = new_rowset->construct_index_file_path(new_rowset->rowset_id(), i); - string base_table_index_path = olap_data->olap_index()->construct_index_file_path(olap_data->olap_index()->rowset_id(), i); +bool LinkedSchemaChange::process(ColumnData* olap_data, SegmentGroup* new_segment_group) { + for (size_t i = 0; i < olap_data->segment_group()->num_segments(); ++i) { + string index_path = new_segment_group->construct_index_file_path(new_segment_group->segment_group_id(), i); + string base_table_index_path = olap_data->segment_group()->construct_index_file_path(olap_data->segment_group()->segment_group_id(), i); if (link(base_table_index_path.c_str(), index_path.c_str()) == 0) { OLAP_LOG_DEBUG("success to create hard link. [from_path=%s to_path=%s]", base_table_index_path.c_str(), index_path.c_str()); @@ -700,8 +700,8 @@ bool LinkedSchemaChange::process(ColumnData* olap_data, Rowset* new_rowset) { return false; } - string data_path = new_rowset->construct_data_file_path(new_rowset->rowset_id(), i); - string base_table_data_path = olap_data->olap_index()->construct_data_file_path(olap_data->olap_index()->rowset_id(), i); + string data_path = new_segment_group->construct_data_file_path(new_segment_group->segment_group_id(), i); + string base_table_data_path = olap_data->segment_group()->construct_data_file_path(olap_data->segment_group()->segment_group_id(), i); if (link(base_table_data_path.c_str(), data_path.c_str()) == 0) { OLAP_LOG_DEBUG("success to create hard link. [from_path=%s to_path=%s]", base_table_data_path.c_str(), data_path.c_str()); @@ -713,23 +713,23 @@ bool LinkedSchemaChange::process(ColumnData* olap_data, Rowset* new_rowset) { } } - new_rowset->set_empty(olap_data->empty()); - new_rowset->set_num_segments(olap_data->olap_index()->num_segments()); - new_rowset->add_column_statistics_for_linked_schema_change(olap_data->olap_index()->get_column_statistics()); + new_segment_group->set_empty(olap_data->empty()); + new_segment_group->set_num_segments(olap_data->segment_group()->num_segments()); + new_segment_group->add_column_statistics_for_linked_schema_change(olap_data->segment_group()->get_column_statistics()); - if (OLAP_SUCCESS != new_rowset->load()) { + if (OLAP_SUCCESS != new_segment_group->load()) { OLAP_LOG_WARNING("fail to reload index. [table='%s' version='%d-%d']", _new_olap_table->full_name().c_str(), - new_rowset->version().first, - new_rowset->version().second); + new_segment_group->version().first, + new_segment_group->version().second); return false; } return true; } -bool SchemaChangeDirectly::process(ColumnData* olap_data, Rowset* new_rowset) { - DataFileType data_file_type = new_rowset->table()->data_file_type(); +bool SchemaChangeDirectly::process(ColumnData* olap_data, SegmentGroup* new_segment_group) { + DataFileType data_file_type = new_segment_group->table()->data_file_type(); bool null_supported = true; if (NULL == _row_block_allocator) { @@ -785,14 +785,14 @@ bool SchemaChangeDirectly::process(ColumnData* olap_data, Rowset* new_rowset) { } if (need_create_empty_version) { - res = create_init_version(new_rowset->table()->tablet_id(), - new_rowset->table()->schema_hash(), - new_rowset->version(), - new_rowset->version_hash(), - new_rowset); + res = create_init_version(new_segment_group->table()->tablet_id(), + new_segment_group->table()->schema_hash(), + new_segment_group->version(), + new_segment_group->version_hash(), + new_segment_group); if (res != OLAP_SUCCESS) { LOG(WARNING) << "create empty version for schema change failed." - << "version=" << new_rowset->version().first << "-" << new_rowset->version().second; + << "version=" << new_segment_group->version().first << "-" << new_segment_group->version().second; return false; } return true; @@ -802,7 +802,7 @@ bool SchemaChangeDirectly::process(ColumnData* olap_data, Rowset* new_rowset) { << "block_row_size=" << _olap_table->num_rows_per_row_block(); bool result = true; RowBlock* new_row_block = NULL; - ColumnDataWriter* writer = ColumnDataWriter::create(_olap_table, new_rowset, false); + ColumnDataWriter* writer = ColumnDataWriter::create(_olap_table, new_segment_group, false); if (NULL == writer) { OLAP_LOG_WARNING("failed to create writer."); result = false; @@ -862,11 +862,11 @@ bool SchemaChangeDirectly::process(ColumnData* olap_data, Rowset* new_rowset) { goto DIRECTLY_PROCESS_ERR; } - if (OLAP_SUCCESS != new_rowset->load()) { + if (OLAP_SUCCESS != new_segment_group->load()) { OLAP_LOG_WARNING("fail to reload index. [table='%s' version='%d-%d']", _olap_table->full_name().c_str(), - new_rowset->version().first, - new_rowset->version().second); + new_segment_group->version().first, + new_segment_group->version().second); result = false; goto DIRECTLY_PROCESS_ERR; } @@ -875,19 +875,19 @@ bool SchemaChangeDirectly::process(ColumnData* olap_data, Rowset* new_rowset) { // Check row num changes if (config::row_nums_check) { - if (olap_data->olap_index()->num_rows() - != new_rowset->num_rows() + merged_rows() + filted_rows()) { + if (olap_data->segment_group()->num_rows() + != new_segment_group->num_rows() + merged_rows() + filted_rows()) { OLAP_LOG_FATAL("fail to check row num! " "[source_rows=%lu merged_rows=%lu filted_rows=%lu new_index_rows=%lu]", - olap_data->olap_index()->num_rows(), - merged_rows(), filted_rows(), new_rowset->num_rows()); + olap_data->segment_group()->num_rows(), + merged_rows(), filted_rows(), new_segment_group->num_rows()); result = false; } } else { OLAP_LOG_INFO("all row nums. " "[source_rows=%lu merged_rows=%lu filted_rows=%lu new_index_rows=%lu]", - olap_data->olap_index()->num_rows(), - merged_rows(), filted_rows(), new_rowset->num_rows()); + olap_data->segment_group()->num_rows(), + merged_rows(), filted_rows(), new_segment_group->num_rows()); } DIRECTLY_PROCESS_ERR: @@ -917,7 +917,7 @@ SchemaChangeWithSorting::~SchemaChangeWithSorting() { SAFE_DELETE(_row_block_allocator); } -bool SchemaChangeWithSorting::process(ColumnData* olap_data, Rowset* new_rowset) { +bool SchemaChangeWithSorting::process(ColumnData* olap_data, SegmentGroup* new_segment_group) { if (NULL == _row_block_allocator) { if (NULL == (_row_block_allocator = new(nothrow) RowBlockAllocator( _olap_table->tablet_schema(), _memory_limitation))) { @@ -927,7 +927,7 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, Rowset* new_rowset) } } - DataFileType data_file_type = new_rowset->table()->data_file_type(); + DataFileType data_file_type = new_segment_group->table()->data_file_type(); bool null_supported = true; RowBlock* ref_row_block = NULL; @@ -948,14 +948,14 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, Rowset* new_rowset) } if (need_create_empty_version) { - res = create_init_version(new_rowset->table()->tablet_id(), - new_rowset->table()->schema_hash(), - new_rowset->version(), - new_rowset->version_hash(), - new_rowset); + res = create_init_version(new_segment_group->table()->tablet_id(), + new_segment_group->table()->schema_hash(), + new_segment_group->version(), + new_segment_group->version_hash(), + new_segment_group); if (res != OLAP_SUCCESS) { LOG(WARNING) << "create empty version for schema change failed." - << "version=" << new_rowset->version().first << "-" << new_rowset->version().second; + << "version=" << new_segment_group->version().first << "-" << new_segment_group->version().second; return false; } return true; @@ -970,7 +970,7 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, Rowset* new_rowset) vector row_block_arr; // for external sorting - vector olap_rowsets; + vector olap_segment_groups; _temp_delta_versions.first = _temp_delta_versions.second; @@ -996,18 +996,18 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, Rowset* new_rowset) } // enter here while memory limitation is reached. - Rowset* rowset = NULL; + SegmentGroup* segment_group = NULL; if (!_internal_sorting(row_block_arr, Version(_temp_delta_versions.second, _temp_delta_versions.second), - &rowset)) { + &segment_group)) { OLAP_LOG_WARNING("failed to sorting internally."); result = false; goto SORTING_PROCESS_ERR; } - olap_rowsets.push_back(rowset); + olap_segment_groups.push_back(segment_group); for (vector::iterator it = row_block_arr.begin(); it != row_block_arr.end(); ++it) { @@ -1052,17 +1052,17 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, Rowset* new_rowset) if (!row_block_arr.empty()) { // enter here while memory limitation is reached. - Rowset* rowset = NULL; + SegmentGroup* segment_group = NULL; if (!_internal_sorting(row_block_arr, Version(_temp_delta_versions.second, _temp_delta_versions.second), - &rowset)) { + &segment_group)) { OLAP_LOG_WARNING("failed to sorting internally."); result = false; goto SORTING_PROCESS_ERR; } - olap_rowsets.push_back(rowset); + olap_segment_groups.push_back(segment_group); for (vector::iterator it = row_block_arr.begin(); it != row_block_arr.end(); ++it) { @@ -1076,7 +1076,7 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, Rowset* new_rowset) } // TODO(zyh): 如果_temp_delta_versions只有一个,不需要再外排 - if (!_external_sorting(olap_rowsets, new_rowset)) { + if (!_external_sorting(olap_segment_groups, new_segment_group)) { OLAP_LOG_WARNING("failed to sorting externally."); result = false; goto SORTING_PROCESS_ERR; @@ -1086,24 +1086,24 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, Rowset* new_rowset) // Check row num changes if (config::row_nums_check) { - if (olap_data->olap_index()->num_rows() - != new_rowset->num_rows() + merged_rows() + filted_rows()) { + if (olap_data->segment_group()->num_rows() + != new_segment_group->num_rows() + merged_rows() + filted_rows()) { OLAP_LOG_WARNING("fail to check row num! " "[source_rows=%lu merged_rows=%lu filted_rows=%lu new_index_rows=%lu]", - olap_data->olap_index()->num_rows(), - merged_rows(), filted_rows(), new_rowset->num_rows()); + olap_data->segment_group()->num_rows(), + merged_rows(), filted_rows(), new_segment_group->num_rows()); result = false; } } else { OLAP_LOG_INFO("all row nums. " "[source_rows=%lu merged_rows=%lu filted_rows=%lu new_index_rows=%lu]", - olap_data->olap_index()->num_rows(), - merged_rows(), filted_rows(), new_rowset->num_rows()); + olap_data->segment_group()->num_rows(), + merged_rows(), filted_rows(), new_segment_group->num_rows()); } SORTING_PROCESS_ERR: - for (vector::iterator it = olap_rowsets.begin(); - it != olap_rowsets.end(); ++it) { + for (vector::iterator it = olap_segment_groups.begin(); + it != olap_segment_groups.end(); ++it) { (*it)->delete_all_files(); SAFE_DELETE(*it); } @@ -1119,18 +1119,18 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, Rowset* new_rowset) bool SchemaChangeWithSorting::_internal_sorting(const vector& row_block_arr, const Version& temp_delta_versions, - Rowset** temp_rowset) { + SegmentGroup** temp_segment_group) { ColumnDataWriter* writer = NULL; uint64_t merged_rows = 0; RowBlockMerger merger(_olap_table); - (*temp_rowset) = new(nothrow) Rowset(_olap_table.get(), + (*temp_segment_group) = new(nothrow) SegmentGroup(_olap_table.get(), temp_delta_versions, rand(), false, 0, 0); - if (NULL == (*temp_rowset)) { - OLAP_LOG_WARNING("failed to malloc Rowset. [size=%ld]", sizeof(Rowset)); + if (NULL == (*temp_segment_group)) { + OLAP_LOG_WARNING("failed to malloc SegmentGroup. [size=%ld]", sizeof(SegmentGroup)); goto INTERNAL_SORTING_ERR; } @@ -1138,7 +1138,7 @@ bool SchemaChangeWithSorting::_internal_sorting(const vector& row_blo _olap_table->full_name().c_str(), _olap_table->num_rows_per_row_block()); - writer = ColumnDataWriter::create(_olap_table, *temp_rowset, false); + writer = ColumnDataWriter::create(_olap_table, *temp_segment_group, false); if (NULL == writer) { OLAP_LOG_WARNING("failed to create writer."); goto INTERNAL_SORTING_ERR; @@ -1150,7 +1150,7 @@ bool SchemaChangeWithSorting::_internal_sorting(const vector& row_blo } add_merged_rows(merged_rows); - if (OLAP_SUCCESS != (*temp_rowset)->load()) { + if (OLAP_SUCCESS != (*temp_segment_group)->load()) { OLAP_LOG_WARNING("failed to reload olap index."); goto INTERNAL_SORTING_ERR; } @@ -1161,22 +1161,22 @@ bool SchemaChangeWithSorting::_internal_sorting(const vector& row_blo INTERNAL_SORTING_ERR: SAFE_DELETE(writer); - (*temp_rowset)->delete_all_files(); - SAFE_DELETE(*temp_rowset); + (*temp_segment_group)->delete_all_files(); + SAFE_DELETE(*temp_segment_group); return false; } bool SchemaChangeWithSorting::_external_sorting( - vector& src_rowsets, - Rowset* dest_rowset) { - Merger merger(_olap_table, dest_rowset, READER_ALTER_TABLE); + vector& src_segment_groups, + SegmentGroup* dest_segment_group) { + Merger merger(_olap_table, dest_segment_group, READER_ALTER_TABLE); uint64_t merged_rows = 0; uint64_t filted_rows = 0; vector olap_data_arr; - for (vector::iterator it = src_rowsets.begin(); - it != src_rowsets.end(); ++it) { + for (vector::iterator it = src_segment_groups.begin(); + it != src_segment_groups.end(); ++it) { ColumnData* olap_data = ColumnData::create(*it); if (NULL == olap_data) { OLAP_LOG_WARNING("fail to create ColumnData."); @@ -1197,18 +1197,18 @@ bool SchemaChangeWithSorting::_external_sorting( if (OLAP_SUCCESS != merger.merge(olap_data_arr, &merged_rows, &filted_rows)) { OLAP_LOG_WARNING("fail to merge deltas. [table='%s' version='%d-%d']", _olap_table->full_name().c_str(), - dest_rowset->version().first, - dest_rowset->version().second); + dest_segment_group->version().first, + dest_segment_group->version().second); goto EXTERNAL_SORTING_ERR; } add_merged_rows(merged_rows); add_filted_rows(filted_rows); - if (OLAP_SUCCESS != dest_rowset->load()) { + if (OLAP_SUCCESS != dest_segment_group->load()) { OLAP_LOG_WARNING("fail to reload index. [table='%s' version='%d-%d']", _olap_table->full_name().c_str(), - dest_rowset->version().first, - dest_rowset->version().second); + dest_segment_group->version().first, + dest_segment_group->version().second); goto EXTERNAL_SORTING_ERR; } @@ -1225,7 +1225,7 @@ bool SchemaChangeWithSorting::_external_sorting( SAFE_DELETE(*it); } - dest_rowset->delete_all_files(); + dest_segment_group->delete_all_files(); return false; } @@ -1499,14 +1499,14 @@ OLAPStatus SchemaChangeHandler::_do_alter_table( for (vector::const_iterator it = new_tablet_versions.begin(); it != new_tablet_versions.end(); ++it) { if (it->second <= lastest_file_version->end_version()) { - std::vector rowsets; - res = new_olap_table->unregister_data_source(*it, &rowsets); + std::vector segment_groups; + res = new_olap_table->unregister_data_source(*it, &segment_groups); if (res != OLAP_SUCCESS) { break; } - for (Rowset* rowset : rowsets) { - rowset->delete_all_files(); - delete rowset; + for (SegmentGroup* segment_group : segment_groups) { + segment_group->delete_all_files(); + delete segment_group; } OLAP_LOG_DEBUG("unregister data source from new tablet when schema change. " "[new_tablet=%s version=%d-%d res=%d]", @@ -1741,9 +1741,9 @@ OLAPStatus SchemaChangeHandler::_create_new_olap_table( OLAPStatus SchemaChangeHandler::schema_version_convert( OLAPTablePtr src_olap_table, OLAPTablePtr dest_olap_table, - vector* ref_rowsets, - vector* new_rowsets) { - if (NULL == new_rowsets) { + vector* ref_segment_groups, + vector* new_segment_groups) { + if (NULL == new_segment_groups) { OLAP_LOG_WARNING("new_olap_index is NULL."); return OLAP_ERR_INPUT_PARAMETER_ERROR; } @@ -1799,8 +1799,8 @@ OLAPStatus SchemaChangeHandler::schema_version_convert( // c. 转换数据 ColumnData* olap_data = NULL; - for (vector::iterator it = ref_rowsets->begin(); - it != ref_rowsets->end(); ++it) { + for (vector::iterator it = ref_segment_groups->begin(); + it != ref_segment_groups->end(); ++it) { ColumnData* olap_data = ColumnData::create(*it); if (NULL == olap_data) { OLAP_LOG_WARNING("fail to create ColumnData."); @@ -1810,31 +1810,31 @@ OLAPStatus SchemaChangeHandler::schema_version_convert( olap_data->init(); - Rowset* new_rowset = nullptr; + SegmentGroup* new_segment_group = nullptr; if ((*it)->transaction_id() == 0) { - new_rowset = new Rowset(dest_olap_table.get(), + new_segment_group = new SegmentGroup(dest_olap_table.get(), olap_data->version(), olap_data->version_hash(), olap_data->delete_flag(), - (*it)->rowset_id(), 0); + (*it)->segment_group_id(), 0); } else { - new_rowset = new Rowset(dest_olap_table.get(), + new_segment_group = new SegmentGroup(dest_olap_table.get(), olap_data->delete_flag(), - (*it)->rowset_id(), 0, + (*it)->segment_group_id(), 0, (*it)->is_pending(), (*it)->partition_id(), (*it)->transaction_id()); } - if (NULL == new_rowset) { - OLAP_LOG_FATAL("failed to malloc Rowset. [size=%ld]", sizeof(Rowset)); + if (NULL == new_segment_group) { + OLAP_LOG_FATAL("failed to malloc SegmentGroup. [size=%ld]", sizeof(SegmentGroup)); res = OLAP_ERR_MALLOC_ERROR; goto SCHEMA_VERSION_CONVERT_ERR; } - new_rowsets->push_back(new_rowset); + new_segment_groups->push_back(new_segment_group); - if (!sc_procedure->process(olap_data, new_rowset)) { + if (!sc_procedure->process(olap_data, new_segment_group)) { if ((*it)->is_pending()) { OLAP_LOG_WARNING("failed to process the transaction when schema change. " "[table='%s' transaction=%ld]", @@ -1858,11 +1858,11 @@ OLAPStatus SchemaChangeHandler::schema_version_convert( return res; SCHEMA_VERSION_CONVERT_ERR: - while (!new_rowsets->empty()) { - Rowset* rowset = new_rowsets->back(); - rowset->delete_all_files(); - SAFE_DELETE(rowset); - new_rowsets->pop_back(); + while (!new_segment_groups->empty()) { + SegmentGroup* segment_group = new_segment_groups->back(); + segment_group->delete_all_files(); + SAFE_DELETE(segment_group); + new_segment_groups->pop_back(); } SAFE_DELETE(sc_procedure); @@ -2038,15 +2038,15 @@ OLAPStatus SchemaChangeHandler::_alter_table(SchemaChangeParams* sc_params) { (*it)->version().second); // we create a new delta with the same version as the ColumnData processing currently. - Rowset* new_rowset = new(nothrow) Rowset( + SegmentGroup* new_segment_group = new(nothrow) SegmentGroup( sc_params->new_olap_table.get(), (*it)->version(), (*it)->version_hash(), (*it)->delete_flag(), - (*it)->olap_index()->rowset_id(), 0); + (*it)->segment_group()->segment_group_id(), 0); - if (new_rowset == NULL) { - OLAP_LOG_WARNING("failed to malloc Rowset. [size=%ld]", sizeof(Rowset)); + if (new_segment_group == NULL) { + OLAP_LOG_WARNING("failed to malloc SegmentGroup. [size=%ld]", sizeof(SegmentGroup)); res = OLAP_ERR_MALLOC_ERROR; goto PROCESS_ALTER_EXIT; } @@ -2056,11 +2056,11 @@ OLAPStatus SchemaChangeHandler::_alter_table(SchemaChangeParams* sc_params) { if (DEL_SATISFIED == del_ret) { OLAP_LOG_DEBUG("filter delta in schema change: %d, %d", (*it)->version().first, (*it)->version().second); - res = sc_procedure->create_init_version(new_rowset->table()->tablet_id(), - new_rowset->table()->schema_hash(), - new_rowset->version(), - new_rowset->version_hash(), - new_rowset); + res = sc_procedure->create_init_version(new_segment_group->table()->tablet_id(), + new_segment_group->table()->schema_hash(), + new_segment_group->version(), + new_segment_group->version_hash(), + new_segment_group); sc_procedure->add_filted_rows((*it)->num_rows()); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("fail to create init version. [res=%d]", res); @@ -2077,12 +2077,12 @@ OLAPStatus SchemaChangeHandler::_alter_table(SchemaChangeParams* sc_params) { (*it)->set_delete_status(DEL_NOT_SATISFIED); } - if (DEL_SATISFIED != del_ret && !sc_procedure->process(*it, new_rowset)) { + if (DEL_SATISFIED != del_ret && !sc_procedure->process(*it, new_segment_group)) { //if del_ret is DEL_SATISFIED, the new delta version has already been created in new_olap_table OLAP_LOG_WARNING("failed to process the version. [version='%d-%d']", (*it)->version().first, (*it)->version().second); - new_rowset->delete_all_files(); - SAFE_DELETE(new_rowset); + new_segment_group->delete_all_files(); + SAFE_DELETE(new_segment_group); res = OLAP_ERR_INPUT_PARAMETER_ERROR; goto PROCESS_ALTER_EXIT; @@ -2096,16 +2096,16 @@ OLAPStatus SchemaChangeHandler::_alter_table(SchemaChangeParams* sc_params) { if (!sc_params->new_olap_table->has_version((*it)->version())) { // register version - std::vector rowset_vec; - rowset_vec.push_back(new_rowset); - res = sc_params->new_olap_table->register_data_source(rowset_vec); + std::vector segment_group_vec; + segment_group_vec.push_back(new_segment_group); + res = sc_params->new_olap_table->register_data_source(segment_group_vec); if (OLAP_SUCCESS != res) { OLAP_LOG_WARNING("failed to register new version. [table='%s' version='%d-%d']", sc_params->new_olap_table->full_name().c_str(), (*it)->version().first, (*it)->version().second); - new_rowset->delete_all_files(); - SAFE_DELETE(new_rowset); + new_segment_group->delete_all_files(); + SAFE_DELETE(new_segment_group); sc_params->new_olap_table->release_header_lock(); sc_params->ref_olap_table->release_header_lock(); @@ -2122,8 +2122,8 @@ OLAPStatus SchemaChangeHandler::_alter_table(SchemaChangeParams* sc_params) { "[table='%s' version='%d-%d']", sc_params->new_olap_table->full_name().c_str(), (*it)->version().first, (*it)->version().second); - new_rowset->delete_all_files(); - SAFE_DELETE(new_rowset); + new_segment_group->delete_all_files(); + SAFE_DELETE(new_segment_group); } // 保存header @@ -2384,7 +2384,7 @@ OLAPStatus SchemaChange::create_init_version( SchemaHash schema_hash, Version version, VersionHash version_hash, - Rowset* rowset) { + SegmentGroup* segment_group) { OLAP_LOG_DEBUG("begin to create init version. [begin=%d end=%d]", version.first, version.second); @@ -2409,7 +2409,7 @@ OLAPStatus SchemaChange::create_init_version( } // Create writer, which write nothing to table, to generate empty data file - writer = ColumnDataWriter::create(table, rowset, false); + writer = ColumnDataWriter::create(table, segment_group, false); if (writer == NULL) { LOG(WARNING) << "fail to create writer. [table=" << table->full_name() << "]"; res = OLAP_ERR_MALLOC_ERROR; @@ -2423,7 +2423,7 @@ OLAPStatus SchemaChange::create_init_version( } // Load new index and add to table - res = rowset->load(); + res = segment_group->load(); if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to load new index. [table=" << table->full_name() << "]"; break; diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h index 7fded321d3862f..84cafa6f79a2a0 100644 --- a/be/src/olap/schema_change.h +++ b/be/src/olap/schema_change.h @@ -150,7 +150,7 @@ class SchemaChange { SchemaChange() : _filted_rows(0), _merged_rows(0) {} virtual ~SchemaChange() {} - virtual bool process(ColumnData* olap_data, Rowset* new_olap_index) = 0; + virtual bool process(ColumnData* olap_data, SegmentGroup* new_segment_group) = 0; void add_filted_rows(uint64_t filted_rows) { _filted_rows += filted_rows; @@ -181,7 +181,7 @@ class SchemaChange { TSchemaHash schema_hash, Version version, VersionHash version_hash, - Rowset* olap_index); + SegmentGroup* segment_group); private: uint64_t _filted_rows; @@ -195,7 +195,7 @@ class LinkedSchemaChange : public SchemaChange { OLAPTablePtr new_olap_table); ~LinkedSchemaChange() {} - bool process(ColumnData* olap_data, Rowset* new_olap_index); + bool process(ColumnData* olap_data, SegmentGroup* new_segment_group); private: OLAPTablePtr _base_olap_table; OLAPTablePtr _new_olap_table; @@ -212,7 +212,7 @@ class SchemaChangeDirectly : public SchemaChange { const RowBlockChanger& row_block_changer); virtual ~SchemaChangeDirectly(); - virtual bool process(ColumnData* olap_data, Rowset* new_olap_index); + virtual bool process(ColumnData* olap_data, SegmentGroup* new_segment_group); private: OLAPTablePtr _olap_table; @@ -235,17 +235,17 @@ class SchemaChangeWithSorting : public SchemaChange { size_t memory_limitation); virtual ~SchemaChangeWithSorting(); - virtual bool process(ColumnData* olap_data, Rowset* new_olap_index); + virtual bool process(ColumnData* olap_data, SegmentGroup* new_segment_group); private: bool _internal_sorting( const std::vector& row_block_arr, const Version& temp_delta_versions, - Rowset** temp_olap_index); + SegmentGroup** temp_segment_group); bool _external_sorting( - std::vector& src_olap_index_arr, - Rowset* olap_index); + std::vector& src_segment_group_arr, + SegmentGroup* segment_group); OLAPTablePtr _olap_table; const RowBlockChanger& _row_block_changer; @@ -266,8 +266,8 @@ class SchemaChangeHandler { OLAPStatus schema_version_convert(OLAPTablePtr ref_olap_table, OLAPTablePtr new_olap_table, - std::vector* ref_olap_indices, - std::vector* new_olap_indices); + std::vector* ref_segment_groups, + std::vector* new_segment_groups); // 清空一个table下的schema_change信息:包括split_talbe以及其他schema_change信息 // 这里只清理自身的out链,不考虑related的table diff --git a/be/src/olap/rowset.cpp b/be/src/olap/segment_group.cpp similarity index 85% rename from be/src/olap/rowset.cpp rename to be/src/olap/segment_group.cpp index 240afd6681feed..e050d289fc8e44 100644 --- a/be/src/olap/rowset.cpp +++ b/be/src/olap/segment_group.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "olap/rowset.h" +#include "olap/segment_group.h" #include #include @@ -61,13 +61,13 @@ namespace doris { } \ } while (0); -Rowset::Rowset(OLAPTable* table, Version version, VersionHash version_hash, - bool delete_flag, int32_t rowset_id, int32_t num_segments) +SegmentGroup::SegmentGroup(OLAPTable* table, Version version, VersionHash version_hash, + bool delete_flag, int32_t segment_group_id, int32_t num_segments) : _table(table), _version(version), _version_hash(version_hash), _delete_flag(delete_flag), - _rowset_id(rowset_id), + _segment_group_id(segment_group_id), _num_segments(num_segments) { _index_loaded = false; _ref_count = 0; @@ -94,11 +94,11 @@ Rowset::Rowset(OLAPTable* table, Version version, VersionHash version_hash, } } -Rowset::Rowset(OLAPTable* table, bool delete_flag, - int32_t rowset_id, int32_t num_segments, bool is_pending, +SegmentGroup::SegmentGroup(OLAPTable* table, bool delete_flag, + int32_t segment_group_id, int32_t num_segments, bool is_pending, TPartitionId partition_id, TTransactionId transaction_id) : _table(table), _delete_flag(delete_flag), - _rowset_id(rowset_id), _num_segments(num_segments), + _segment_group_id(segment_group_id), _num_segments(num_segments), _is_pending(is_pending), _partition_id(partition_id), _transaction_id(transaction_id) { @@ -128,7 +128,7 @@ Rowset::Rowset(OLAPTable* table, bool delete_flag, } } -Rowset::~Rowset() { +SegmentGroup::~SegmentGroup() { delete [] _short_key_buf; _current_file_handler.close(); @@ -139,50 +139,50 @@ Rowset::~Rowset() { _seg_pb_map.clear(); } -string Rowset::construct_index_file_path(int32_t rowset_id, int32_t segment) const { +string SegmentGroup::construct_index_file_path(int32_t segment_group_id, int32_t segment) const { if (_is_pending) { - return _table->construct_pending_index_file_path(_transaction_id, _rowset_id, segment); + return _table->construct_pending_index_file_path(_transaction_id, _segment_group_id, segment); } else { - return _table->construct_index_file_path(_version, _version_hash, _rowset_id, segment); + return _table->construct_index_file_path(_version, _version_hash, _segment_group_id, segment); } } -string Rowset::construct_data_file_path(int32_t rowset_id, int32_t segment) const { +string SegmentGroup::construct_data_file_path(int32_t segment_group_id, int32_t segment) const { if (_is_pending) { - return _table->construct_pending_data_file_path(_transaction_id, rowset_id, segment); + return _table->construct_pending_data_file_path(_transaction_id, segment_group_id, segment); } else { - return _table->construct_data_file_path(_version, _version_hash, rowset_id, segment); + return _table->construct_data_file_path(_version, _version_hash, segment_group_id, segment); } } -void Rowset::publish_version(Version version, VersionHash version_hash) { +void SegmentGroup::publish_version(Version version, VersionHash version_hash) { _version = version; _version_hash = version_hash; } -void Rowset::acquire() { +void SegmentGroup::acquire() { atomic_inc(&_ref_count); } -int64_t Rowset::ref_count() { +int64_t SegmentGroup::ref_count() { return _ref_count; } -void Rowset::release() { +void SegmentGroup::release() { atomic_dec(&_ref_count); } -bool Rowset::is_in_use() { +bool SegmentGroup::is_in_use() { return _ref_count > 0; } -// you can not use Rowset after delete_all_files(), or else unknown behavior occurs. -void Rowset::delete_all_files() { +// you can not use SegmentGroup after delete_all_files(), or else unknown behavior occurs. +void SegmentGroup::delete_all_files() { if (!_file_created) { return; } for (uint32_t seg_id = 0; seg_id < _num_segments; ++seg_id) { // get full path for one segment - string index_path = construct_index_file_path(_rowset_id, seg_id); - string data_path = construct_data_file_path(_rowset_id, seg_id); + string index_path = construct_index_file_path(_segment_group_id, seg_id); + string data_path = construct_data_file_path(_segment_group_id, seg_id); if (remove(index_path.c_str()) != 0) { char errmsg[64]; @@ -198,7 +198,7 @@ void Rowset::delete_all_files() { } } -OLAPStatus Rowset::add_column_statistics_for_linked_schema_change( +OLAPStatus SegmentGroup::add_column_statistics_for_linked_schema_change( const std::vector>& column_statistic_fields) { //When add rollup table, the base table index maybe empty if (column_statistic_fields.size() == 0) { @@ -222,7 +222,7 @@ OLAPStatus Rowset::add_column_statistics_for_linked_schema_change( return OLAP_SUCCESS; } -OLAPStatus Rowset::add_column_statistics( +OLAPStatus SegmentGroup::add_column_statistics( const std::vector>& column_statistic_fields) { DCHECK(column_statistic_fields.size() == _table->num_key_fields()); for (size_t i = 0; i < column_statistic_fields.size(); ++i) { @@ -239,7 +239,7 @@ OLAPStatus Rowset::add_column_statistics( return OLAP_SUCCESS; } -OLAPStatus Rowset::add_column_statistics( +OLAPStatus SegmentGroup::add_column_statistics( std::vector > &column_statistic_strings, std::vector &null_vec) { DCHECK(column_statistic_strings.size() == _table->num_key_fields()); @@ -259,7 +259,7 @@ OLAPStatus Rowset::add_column_statistics( return OLAP_SUCCESS; } -OLAPStatus Rowset::load() { +OLAPStatus SegmentGroup::load() { if (_empty) { return OLAP_SUCCESS; } @@ -284,7 +284,7 @@ OLAPStatus Rowset::load() { // for each segment for (uint32_t seg_id = 0; seg_id < _num_segments; ++seg_id) { if (COLUMN_ORIENTED_FILE == _table->data_file_type()) { - string seg_path = construct_data_file_path(_rowset_id, seg_id); + string seg_path = construct_data_file_path(_segment_group_id, seg_id); if (OLAP_SUCCESS != (res = load_pb(seg_path.c_str(), seg_id))) { LOG(WARNING) << "failed to load pb structures. [seg_path='" << seg_path << "']"; _check_io_error(res); @@ -293,7 +293,7 @@ OLAPStatus Rowset::load() { } // get full path for one segment - string path = construct_index_file_path(_rowset_id, seg_id); + string path = construct_index_file_path(_segment_group_id, seg_id); if ((res = _index.load_segment(path.c_str(), &_current_num_rows_per_row_block)) != OLAP_SUCCESS) { LOG(WARNING) << "fail to load segment. [path='" << path << "']"; @@ -309,7 +309,7 @@ OLAPStatus Rowset::load() { return OLAP_SUCCESS; } -OLAPStatus Rowset::load_pb(const char* file, uint32_t seg_id) { +OLAPStatus SegmentGroup::load_pb(const char* file, uint32_t seg_id) { OLAPStatus res = OLAP_SUCCESS; FileHeader seg_file_header; @@ -332,11 +332,11 @@ OLAPStatus Rowset::load_pb(const char* file, uint32_t seg_id) { return OLAP_SUCCESS; } -bool Rowset::index_loaded() { +bool SegmentGroup::index_loaded() { return _index_loaded; } -OLAPStatus Rowset::validate() { +OLAPStatus SegmentGroup::validate() { if (_empty) { return OLAP_SUCCESS; } @@ -347,8 +347,8 @@ OLAPStatus Rowset::validate() { FileHeader data_file_header; // get full path for one segment - string index_path = construct_index_file_path(_rowset_id, seg_id); - string data_path = construct_data_file_path(_rowset_id, seg_id); + string index_path = construct_index_file_path(_segment_group_id, seg_id); + string data_path = construct_data_file_path(_segment_group_id, seg_id); // 检查index文件头 if ((res = index_file_header.validate(index_path)) != OLAP_SUCCESS) { @@ -368,7 +368,7 @@ OLAPStatus Rowset::validate() { return OLAP_SUCCESS; } -OLAPStatus Rowset::find_row_block(const RowCursor& key, +OLAPStatus SegmentGroup::find_row_block(const RowCursor& key, RowCursor* helper_cursor, bool find_last, RowBlockPosition* pos) const { @@ -393,7 +393,7 @@ OLAPStatus Rowset::find_row_block(const RowCursor& key, return _index.get_row_block_position(offset, pos); } -OLAPStatus Rowset::find_short_key(const RowCursor& key, +OLAPStatus SegmentGroup::find_short_key(const RowCursor& key, RowCursor* helper_cursor, bool find_last, RowBlockPosition* pos) const { @@ -416,28 +416,28 @@ OLAPStatus Rowset::find_short_key(const RowCursor& key, return _index.get_row_block_position(offset, pos); } -OLAPStatus Rowset::get_row_block_entry(const RowBlockPosition& pos, EntrySlice* entry) const { +OLAPStatus SegmentGroup::get_row_block_entry(const RowBlockPosition& pos, EntrySlice* entry) const { TABLE_PARAM_VALIDATE(); SLICE_PARAM_VALIDATE(entry); return _index.get_entry(_index.get_offset(pos), entry); } -OLAPStatus Rowset::find_first_row_block(RowBlockPosition* position) const { +OLAPStatus SegmentGroup::find_first_row_block(RowBlockPosition* position) const { TABLE_PARAM_VALIDATE(); POS_PARAM_VALIDATE(position); return _index.get_row_block_position(_index.find_first(), position); } -OLAPStatus Rowset::find_last_row_block(RowBlockPosition* position) const { +OLAPStatus SegmentGroup::find_last_row_block(RowBlockPosition* position) const { TABLE_PARAM_VALIDATE(); POS_PARAM_VALIDATE(position); return _index.get_row_block_position(_index.find_last(), position); } -OLAPStatus Rowset::find_next_row_block(RowBlockPosition* pos, bool* eof) const { +OLAPStatus SegmentGroup::find_next_row_block(RowBlockPosition* pos, bool* eof) const { TABLE_PARAM_VALIDATE(); POS_PARAM_VALIDATE(pos); POS_PARAM_VALIDATE(eof); @@ -454,7 +454,7 @@ OLAPStatus Rowset::find_next_row_block(RowBlockPosition* pos, bool* eof) const { return _index.get_row_block_position(next, pos); } -OLAPStatus Rowset::find_mid_point(const RowBlockPosition& low, +OLAPStatus SegmentGroup::find_mid_point(const RowBlockPosition& low, const RowBlockPosition& high, RowBlockPosition* output, uint32_t* dis) const { @@ -471,7 +471,7 @@ OLAPStatus Rowset::find_mid_point(const RowBlockPosition& low, } } -OLAPStatus Rowset::find_prev_point( +OLAPStatus SegmentGroup::find_prev_point( const RowBlockPosition& current, RowBlockPosition* prev) const { OLAPIndexOffset current_offset = _index.get_offset(current); OLAPIndexOffset prev_offset = _index.prev(current_offset); @@ -479,7 +479,7 @@ OLAPStatus Rowset::find_prev_point( return _index.get_row_block_position(prev_offset, prev); } -OLAPStatus Rowset::advance_row_block(int64_t num_row_blocks, RowBlockPosition* position) const { +OLAPStatus SegmentGroup::advance_row_block(int64_t num_row_blocks, RowBlockPosition* position) const { TABLE_PARAM_VALIDATE(); POS_PARAM_VALIDATE(position); @@ -493,7 +493,7 @@ OLAPStatus Rowset::advance_row_block(int64_t num_row_blocks, RowBlockPosition* p } // PRECONDITION position1 < position2 -uint32_t Rowset::compute_distance(const RowBlockPosition& position1, +uint32_t SegmentGroup::compute_distance(const RowBlockPosition& position1, const RowBlockPosition& position2) const { iterator_offset_t offset1 = _index.get_absolute_offset(_index.get_offset(position1)); iterator_offset_t offset2 = _index.get_absolute_offset(_index.get_offset(position2)); @@ -501,7 +501,7 @@ uint32_t Rowset::compute_distance(const RowBlockPosition& position1, return offset2 > offset1 ? offset2 - offset1 : 0; } -OLAPStatus Rowset::add_segment() { +OLAPStatus SegmentGroup::add_segment() { // 打开文件 ++_num_segments; @@ -535,16 +535,16 @@ OLAPStatus Rowset::add_segment() { return OLAP_SUCCESS; } -OLAPStatus Rowset::add_row_block(const RowBlock& row_block, const uint32_t data_offset) { +OLAPStatus SegmentGroup::add_row_block(const RowBlock& row_block, const uint32_t data_offset) { // get first row of the row_block to distill index item. row_block.get_row(0, &_current_index_row); return add_short_key(_current_index_row, data_offset); } -OLAPStatus Rowset::add_short_key(const RowCursor& short_key, const uint32_t data_offset) { +OLAPStatus SegmentGroup::add_short_key(const RowCursor& short_key, const uint32_t data_offset) { OLAPStatus res = OLAP_SUCCESS; if (!_new_segment_created) { - string file_path = construct_index_file_path(_rowset_id, _num_segments - 1); + string file_path = construct_index_file_path(_segment_group_id, _num_segments - 1); res = _current_file_handler.open_with_mode( file_path.c_str(), O_CREAT | O_EXCL | O_WRONLY, S_IRUSR | S_IWUSR); if (res != OLAP_SUCCESS) { @@ -603,7 +603,7 @@ OLAPStatus Rowset::add_short_key(const RowCursor& short_key, const uint32_t data return OLAP_SUCCESS; } -OLAPStatus Rowset::finalize_segment(uint32_t data_segment_size, int64_t num_rows) { +OLAPStatus SegmentGroup::finalize_segment(uint32_t data_segment_size, int64_t num_rows) { // 准备FileHeader OLAPStatus res = OLAP_SUCCESS; @@ -640,20 +640,20 @@ OLAPStatus Rowset::finalize_segment(uint32_t data_segment_size, int64_t num_rows return OLAP_SUCCESS; } -void Rowset::sync() { +void SegmentGroup::sync() { if (_current_file_handler.sync() == -1) { OLAP_LOG_WARNING("fail to sync file.[err=%m]"); _table->set_io_error(); } } -void Rowset::_check_io_error(OLAPStatus res) { +void SegmentGroup::_check_io_error(OLAPStatus res) { if (is_io_error(res)) { _table->set_io_error(); } } -uint64_t Rowset::num_index_entries() const { +uint64_t SegmentGroup::num_index_entries() const { return _index.count(); } diff --git a/be/src/olap/rowset.h b/be/src/olap/segment_group.h similarity index 90% rename from be/src/olap/rowset.h rename to be/src/olap/segment_group.h index 59d5450a492705..19d17af4f10f16 100644 --- a/be/src/olap/rowset.h +++ b/be/src/olap/segment_group.h @@ -39,22 +39,22 @@ namespace doris { -// Class for managing OLAP table indices +// Class for segments management // For fast key lookup, we maintain a sparse index for every data file. The // index is sparse because we only have one pointer per row block. Each // index entry contains the short key for the first row of the // corresponding row block -class Rowset { +class SegmentGroup { friend class MemIndex; public: - Rowset(OLAPTable* table, Version version, VersionHash version_hash, - bool delete_flag, int rowset_id, int32_t num_segments); + SegmentGroup(OLAPTable* table, Version version, VersionHash version_hash, + bool delete_flag, int segment_group_id, int32_t num_segments); - Rowset(OLAPTable* table, bool delete_flag, int32_t rowset_id, + SegmentGroup(OLAPTable* table, bool delete_flag, int32_t segment_group_id, int32_t num_segments, bool is_pending, TPartitionId partition_id, TTransactionId transaction_id); - virtual ~Rowset(); + virtual ~SegmentGroup(); // Load the index into memory. OLAPStatus load(); @@ -167,8 +167,8 @@ class Rowset { inline bool delete_flag() const { return _delete_flag; } - inline int32_t rowset_id() const { return _rowset_id; } - inline void set_rowset_id(int32_t rowset_id) { _rowset_id = rowset_id; } + inline int32_t segment_group_id() const { return _segment_group_id; } + inline void set_segment_group_id(int32_t segment_group_id) { _segment_group_id = segment_group_id; } inline PUniqueId load_id() const { return _load_id; } inline void set_load_id(const PUniqueId& load_id) { _load_id = load_id; } @@ -233,21 +233,21 @@ class Rowset { return _index.get_null_supported(seg_id); } - std::string construct_index_file_path(int32_t rowset_id, int32_t segment) const; - std::string construct_data_file_path(int32_t rowset_id, int32_t segment) const; + std::string construct_index_file_path(int32_t segment_group_id, int32_t segment) const; + std::string construct_data_file_path(int32_t segment_group_id, int32_t segment) const; void publish_version(Version version, VersionHash version_hash); private: void _check_io_error(OLAPStatus res); - OLAPTable* _table; // table definition for this index + OLAPTable* _table; // table definition for this segmentgroup Version _version; // version of associated data file - VersionHash _version_hash; // version hash for this index + VersionHash _version_hash; // version hash for this segmentgroup bool _delete_flag; - int32_t _rowset_id; // rowset id of olapindex - PUniqueId _load_id; // load id for rowset - int32_t _num_segments; // number of segments in this index - bool _index_loaded; // whether the index has been read + int32_t _segment_group_id; // segmentgroup id of segmentgroup + PUniqueId _load_id; // load id for segmentgroup + int32_t _num_segments; // number of segments in this segmentgroup + bool _index_loaded; // whether the segmentgroup has been read atomic_t _ref_count; // reference count MemIndex _index; bool _is_pending; @@ -284,7 +284,7 @@ class Rowset { std::vector> _column_statistics; std::unordered_map > _seg_pb_map; - DISALLOW_COPY_AND_ASSIGN(Rowset); + DISALLOW_COPY_AND_ASSIGN(SegmentGroup); }; } diff --git a/be/src/olap/segment_reader.cpp b/be/src/olap/segment_reader.cpp index 89b9a8a43a5f64..2a50306feb48e7 100644 --- a/be/src/olap/segment_reader.cpp +++ b/be/src/olap/segment_reader.cpp @@ -26,7 +26,7 @@ #include "olap/out_stream.h" #include "olap/olap_cond.h" #include "olap/row_block.h" -#include "olap/rowset.h" +#include "olap/segment_group.h" namespace doris { @@ -35,7 +35,7 @@ static const uint32_t MIN_FILTER_BLOCK_NUM = 10; SegmentReader::SegmentReader( const std::string file, OLAPTable* table, - Rowset* index, + SegmentGroup* segment_group, uint32_t segment_id, const std::vector& used_columns, const std::set& load_bf_columns, @@ -47,7 +47,7 @@ SegmentReader::SegmentReader( OlapReaderStatistics* stats) : _file_name(file), _table(table), - _olap_index(index), + _segment_group(segment_group), _segment_id(segment_id), _conditions(conditions), _delete_handler(delete_handler), @@ -139,8 +139,8 @@ OLAPStatus SegmentReader::_load_segment_file() { //VLOG(3) << "seg file : " << _file_name; // In file_header.unserialize(), it validates file length, signature, checksum of protobuf. - _file_header = _olap_index->get_seg_pb(_segment_id); - _null_supported = _olap_index->get_null_supported(_segment_id); + _file_header = _segment_group->get_seg_pb(_segment_id); + _null_supported = _segment_group->get_null_supported(_segment_id); _header_length = _file_header->size(); res = _check_file_version(); @@ -389,7 +389,7 @@ OLAPStatus SegmentReader::_pick_delete_row_groups(uint32_t first_block, uint32_t } for (auto& delete_condition : _delete_handler.get_delete_conditions()) { - if (delete_condition.filter_version <= _olap_index->version().first) { + if (delete_condition.filter_version <= _segment_group->version().first) { continue; } @@ -492,7 +492,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t first_block, uint32_t last_b FieldAggregationMethod aggregation = _table->get_aggregation_by_index(i.first); bool is_continue = (aggregation == OLAP_FIELD_AGGREGATION_NONE || (aggregation == OLAP_FIELD_AGGREGATION_REPLACE - && _olap_index->version().first == 0)); + && _segment_group->version().first == 0)); if (!is_continue) { continue; } @@ -533,7 +533,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t first_block, uint32_t last_b FieldAggregationMethod aggregation = _table->get_aggregation_by_index(i); bool is_continue = (aggregation == OLAP_FIELD_AGGREGATION_NONE || (aggregation == OLAP_FIELD_AGGREGATION_REPLACE - && _olap_index->version().first == 0)); + && _segment_group->version().first == 0)); if (!is_continue) { continue; } @@ -722,9 +722,9 @@ OLAPStatus SegmentReader::_load_index(bool is_using_cache) { OLAP_LOG_WARNING("_header_message().number_of_rows()=%d," "_header_message().num_rows_per_block()=%d, table='%s', version='%d-%d'", _header_message().number_of_rows(), _header_message().num_rows_per_block(), - _olap_index->table()->full_name().c_str(), - _olap_index->version().first, _olap_index->version().second); - LOG(WARNING) << "version:" << _olap_index->version().first << "-" << _olap_index->version().second; + _segment_group->table()->full_name().c_str(), + _segment_group->version().first, _segment_group->version().second); + LOG(WARNING) << "version:" << _segment_group->version().first << "-" << _segment_group->version().second; return OLAP_ERR_FILE_FORMAT_ERROR; } } diff --git a/be/src/olap/segment_reader.h b/be/src/olap/segment_reader.h index f51a5e48208e34..5f292669f8da96 100644 --- a/be/src/olap/segment_reader.h +++ b/be/src/olap/segment_reader.h @@ -46,7 +46,7 @@ namespace doris { -class Rowset; +class SegmentGroup; class ColumnReader; @@ -56,7 +56,7 @@ class SegmentReader { public: explicit SegmentReader(const std::string file, OLAPTable* table, - Rowset* index, + SegmentGroup* segment_group, uint32_t segment_id, const std::vector& return_columns, const std::set& load_bf_columns, @@ -86,7 +86,7 @@ class SegmentReader { // first_block: 需要读取的第一个block // last_block: 需要读取的最后一个block,如果last_block大于最大的block, // 则读取所有的block - // 1. 按conditions过滤index中的统计信息, 确定需要读取的block列表 + // 1. 按conditions过滤segment_group中的统计信息, 确定需要读取的block列表 // 2. 读取blocks, 构造InStream // 3. 创建并初始化Readers // Outputs: @@ -278,7 +278,7 @@ class SegmentReader { doris::FileHandler _file_handler; // 文件handler OLAPTable* _table; - Rowset* _olap_index; + SegmentGroup* _segment_group; uint32_t _segment_id; const Conditions* _conditions; // 列过滤条件 diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index e8674f80794c7b..fc58a40c617ef2 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -54,12 +54,12 @@ message PDelta { required int64 end_version = 2; required int64 version_hash = 3; required int64 creation_time = 4; - repeated PRowSet rowset = 5; + repeated PSegmentGroup segment_group = 5; optional DeleteConditionMessage delete_condition = 6; } -message PRowSet { - required int32 rowset_id = 1; +message PSegmentGroup { + required int32 segment_group_id = 1; required int32 num_segments = 2; required int64 index_size = 3; required int64 data_size = 4; @@ -72,12 +72,12 @@ message PPendingDelta { required int64 partition_id = 1; required int64 transaction_id = 2; required int64 creation_time = 3; - repeated PPendingRowSet pending_rowset = 4; + repeated PPendingSegmentGroup pending_segment_group = 4; optional DeleteConditionMessage delete_condition = 5; } -message PPendingRowSet { - required int32 pending_rowset_id = 1; +message PPendingSegmentGroup { + required int32 pending_segment_group_id = 1; required int32 num_segments = 2; required PUniqueId load_id = 3; repeated ColumnPruning column_pruning = 4;