-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[feature](vertical_compaction) support vertical compaction #13359
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ | |
| #include "olap/rowset/rowset.h" | ||
| #include "olap/rowset/rowset_meta.h" | ||
| #include "olap/tablet.h" | ||
| #include "olap/task/engine_checksum_task.h" | ||
| #include "util/time.h" | ||
| #include "util/trace.h" | ||
|
|
||
|
|
@@ -112,14 +113,50 @@ Status Compaction::quick_rowsets_compact() { | |
|
|
||
| Status Compaction::do_compaction(int64_t permits) { | ||
| TRACE("start to do compaction"); | ||
| uint32_t checksum_before; | ||
| uint32_t checksum_after; | ||
| if (config::enable_compaction_checksum) { | ||
| EngineChecksumTask checksum_task(_tablet->tablet_id(), _tablet->schema_hash(), | ||
| _input_rowsets.back()->end_version(), &checksum_before); | ||
| checksum_task.execute(); | ||
| } | ||
|
|
||
| _tablet->data_dir()->disks_compaction_score_increment(permits); | ||
| _tablet->data_dir()->disks_compaction_num_increment(1); | ||
| Status st = do_compaction_impl(permits); | ||
| _tablet->data_dir()->disks_compaction_score_increment(-permits); | ||
| _tablet->data_dir()->disks_compaction_num_increment(-1); | ||
|
|
||
| if (config::enable_compaction_checksum) { | ||
| EngineChecksumTask checksum_task(_tablet->tablet_id(), _tablet->schema_hash(), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. EngineChecksumTask does not do merge for unique key, so it does not work for now? |
||
| _input_rowsets.back()->end_version(), &checksum_after); | ||
| checksum_task.execute(); | ||
| if (checksum_before != checksum_after) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should use DCHECK here |
||
| LOG(WARNING) << "Compaction tablet=" << _tablet->tablet_id() | ||
| << " checksum not consistent" | ||
| << ", before=" << checksum_before << ", checksum_after=" << checksum_after; | ||
| } | ||
| } | ||
| return st; | ||
| } | ||
|
|
||
| bool Compaction::should_vertical_compaction() { | ||
| // some conditions that not use vertical compaction | ||
| if (!config::enable_vertical_compaction) { | ||
| return false; | ||
| } | ||
| if (_tablet->enable_unique_key_merge_on_write()) { | ||
| return false; | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| int64_t Compaction::get_avg_segment_rows() { | ||
| // take care of empty rowset | ||
| // todo(yixiu): add a new conf of segment size in compaction | ||
| return config::write_buffer_size / (_input_rowsets_size / (_input_row_num + 1) + 1); | ||
| } | ||
|
|
||
| Status Compaction::do_compaction_impl(int64_t permits) { | ||
| OlapStopWatch watch; | ||
|
|
||
|
|
@@ -142,17 +179,19 @@ Status Compaction::do_compaction_impl(int64_t permits) { | |
|
|
||
| auto use_vectorized_compaction = config::enable_vectorized_compaction; | ||
| string merge_type = use_vectorized_compaction ? "v" : ""; | ||
| bool vertical_compaction = should_vertical_compaction(); | ||
|
|
||
| LOG(INFO) << "start " << merge_type << compaction_name() << ". tablet=" << _tablet->full_name() | ||
| << ", output_version=" << _output_version << ", permits: " << permits; | ||
| << ", output_version=" << _output_version << ", permits: " << permits | ||
| << ", is_vertical_compaction=" << vertical_compaction; | ||
| // get cur schema if rowset schema exist, rowset schema must be newer than tablet schema | ||
| std::vector<RowsetMetaSharedPtr> rowset_metas(_input_rowsets.size()); | ||
| std::transform(_input_rowsets.begin(), _input_rowsets.end(), rowset_metas.begin(), | ||
| [](const RowsetSharedPtr& rowset) { return rowset->rowset_meta(); }); | ||
| TabletSchemaSPtr cur_tablet_schema = | ||
| _tablet->rowset_meta_with_max_schema_version(rowset_metas)->tablet_schema(); | ||
|
|
||
| RETURN_NOT_OK(construct_output_rowset_writer(cur_tablet_schema)); | ||
| RETURN_NOT_OK(construct_output_rowset_writer(cur_tablet_schema, vertical_compaction)); | ||
| RETURN_NOT_OK(construct_input_rowset_readers()); | ||
| TRACE("prepare finished"); | ||
|
|
||
|
|
@@ -166,8 +205,14 @@ Status Compaction::do_compaction_impl(int64_t permits) { | |
| } | ||
|
|
||
| if (use_vectorized_compaction) { | ||
| res = Merger::vmerge_rowsets(_tablet, compaction_type(), cur_tablet_schema, | ||
| _input_rs_readers, _output_rs_writer.get(), &stats); | ||
| if (vertical_compaction) { | ||
| res = Merger::vertical_merge_rowsets(_tablet, compaction_type(), cur_tablet_schema, | ||
| _input_rs_readers, _output_rs_writer.get(), | ||
| get_avg_segment_rows(), &stats); | ||
| } else { | ||
| res = Merger::vmerge_rowsets(_tablet, compaction_type(), cur_tablet_schema, | ||
| _input_rs_readers, _output_rs_writer.get(), &stats); | ||
| } | ||
| } else { | ||
| res = Merger::merge_rowsets(_tablet, compaction_type(), cur_tablet_schema, | ||
| _input_rs_readers, _output_rs_writer.get(), &stats); | ||
|
|
@@ -233,11 +278,15 @@ Status Compaction::do_compaction_impl(int64_t permits) { | |
| << "s. cumulative_compaction_policy=" | ||
| << (cumu_policy == nullptr ? "quick" : cumu_policy->name()) | ||
| << ", compact_row_per_second=" << int(_input_row_num / watch.get_elapse_second()); | ||
|
|
||
| return Status::OK(); | ||
| } | ||
|
|
||
| Status Compaction::construct_output_rowset_writer(TabletSchemaSPtr schema) { | ||
| Status Compaction::construct_output_rowset_writer(TabletSchemaSPtr schema, bool is_vertical) { | ||
| if (is_vertical) { | ||
| return _tablet->create_vertical_rowset_writer(_output_version, VISIBLE, NONOVERLAPPING, | ||
| schema, _oldest_write_timestamp, | ||
| _newest_write_timestamp, &_output_rs_writer); | ||
| } | ||
| return _tablet->create_rowset_writer(_output_version, VISIBLE, NONOVERLAPPING, schema, | ||
| _oldest_write_timestamp, _newest_write_timestamp, | ||
| &_output_rs_writer); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,8 @@ | |
| #include "olap/tuple_reader.h" | ||
| #include "util/trace.h" | ||
| #include "vec/olap/block_reader.h" | ||
| #include "vec/olap/vertical_block_reader.h" | ||
| #include "vec/olap/vertical_merge_iterator.h" | ||
|
|
||
| namespace doris { | ||
|
|
||
|
|
@@ -188,4 +190,137 @@ Status Merger::vmerge_rowsets(TabletSharedPtr tablet, ReaderType reader_type, | |
| return Status::OK(); | ||
| } | ||
|
|
||
| // split columns into several groups, make sure all keys in one group | ||
| // unique_key should consider sequence&delete column | ||
| void Merger::vertical_split_columns(TabletSchemaSPtr tablet_schema, | ||
| std::vector<std::vector<uint32_t>>* column_groups) { | ||
| uint32_t num_columns_per_group = config::vertical_compaction_num_columns_per_group; | ||
| uint32_t num_key_cols = tablet_schema->num_key_columns(); | ||
| uint32_t total_cols = tablet_schema->num_columns(); | ||
| std::vector<uint32_t> key_columns; | ||
| for (auto i = 0; i < num_key_cols; ++i) { | ||
| key_columns.emplace_back(i); | ||
| } | ||
| // in unique key, sequence & delete sign column should merge with key columns | ||
| int32_t sequence_col_idx = -1; | ||
| int32_t delete_sign_idx = -1; | ||
| // in key column compaction, seq_col real index is _block->columns() -2 | ||
| // and delete_sign column is _block->columns() - 1 | ||
| if (tablet_schema->keys_type() == KeysType::UNIQUE_KEYS) { | ||
| if (tablet_schema->has_sequence_col()) { | ||
| sequence_col_idx = tablet_schema->sequence_col_idx(); | ||
| key_columns.emplace_back(sequence_col_idx); | ||
| } | ||
| delete_sign_idx = tablet_schema->field_index(DELETE_SIGN); | ||
| key_columns.emplace_back(delete_sign_idx); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is not delete sign idx in some table, e.g. created in an older doris. |
||
| } | ||
| VLOG_NOTICE << "sequence_col_idx=" << sequence_col_idx | ||
| << ", delete_sign_idx=" << delete_sign_idx; | ||
| column_groups->emplace_back(std::move(key_columns)); | ||
| std::vector<uint32_t> value_columns; | ||
| for (auto i = num_key_cols; i < total_cols; ++i) { | ||
| if (i == sequence_col_idx || i == delete_sign_idx) { | ||
| continue; | ||
| } | ||
| if ((i - num_key_cols) % num_columns_per_group == 0) { | ||
| column_groups->emplace_back(); | ||
| } | ||
| column_groups->back().emplace_back(i); | ||
| } | ||
| } | ||
|
|
||
| Status Merger::vertical_compact_one_group( | ||
| TabletSharedPtr tablet, ReaderType reader_type, TabletSchemaSPtr tablet_schema, bool is_key, | ||
| const std::vector<uint32_t>& column_group, vectorized::RowSourcesBuffer* row_source_buf, | ||
| const std::vector<RowsetReaderSharedPtr>& src_rowset_readers, | ||
| RowsetWriter* dst_rowset_writer, int64_t max_rows_per_segment, Statistics* stats_output) { | ||
| // build tablet reader | ||
| VLOG_NOTICE << "vertical compact one group, max_rows_per_segment=" << max_rows_per_segment; | ||
| vectorized::VerticalBlockReader reader(row_source_buf); | ||
| TabletReader::ReaderParams reader_params; | ||
| reader_params.is_key_column_group = is_key; | ||
| reader_params.tablet = tablet; | ||
| reader_params.reader_type = reader_type; | ||
| reader_params.rs_readers = src_rowset_readers; | ||
| reader_params.version = dst_rowset_writer->version(); | ||
| { | ||
| std::shared_lock rdlock(tablet->get_header_lock()); | ||
| auto delete_preds = tablet->delete_predicates(); | ||
| std::copy(delete_preds.cbegin(), delete_preds.cend(), | ||
| std::inserter(reader_params.delete_predicates, | ||
| reader_params.delete_predicates.begin())); | ||
| } | ||
| TabletSchemaSPtr merge_tablet_schema = std::make_shared<TabletSchema>(); | ||
| merge_tablet_schema->copy_from(*tablet_schema); | ||
| // Merge the columns in delete predicate that not in latest schema in to current tablet schema | ||
| for (auto& del_pred_rs : reader_params.delete_predicates) { | ||
| merge_tablet_schema->merge_dropped_columns(tablet->tablet_schema(del_pred_rs->version())); | ||
| } | ||
| reader_params.tablet_schema = merge_tablet_schema; | ||
|
|
||
| reader_params.return_columns = column_group; | ||
| reader_params.origin_return_columns = &reader_params.return_columns; | ||
| RETURN_NOT_OK(reader.init(reader_params)); | ||
|
|
||
| vectorized::Block block = tablet_schema->create_block(reader_params.return_columns); | ||
| size_t output_rows = 0; | ||
| bool eof = false; | ||
| while (!eof) { | ||
| // Read one block from block reader | ||
| RETURN_NOT_OK_LOG( | ||
| reader.next_block_with_aggregation(&block, nullptr, nullptr, &eof), | ||
| "failed to read next block when merging rowsets of tablet " + tablet->full_name()); | ||
| RETURN_NOT_OK_LOG( | ||
| dst_rowset_writer->add_columns(&block, column_group, is_key, max_rows_per_segment), | ||
| "failed to write block when merging rowsets of tablet " + tablet->full_name()); | ||
|
|
||
| output_rows += block.rows(); | ||
| block.clear_column_data(); | ||
| } | ||
|
|
||
| if (is_key && stats_output != nullptr) { | ||
| stats_output->output_rows = output_rows; | ||
| stats_output->merged_rows = reader.merged_rows(); | ||
| stats_output->filtered_rows = reader.filtered_rows(); | ||
| } | ||
| RETURN_IF_ERROR(dst_rowset_writer->flush_columns()); | ||
|
|
||
| return Status::OK(); | ||
| } | ||
|
|
||
| // steps to do vertical merge: | ||
| // 1. split columns into column groups | ||
| // 2. compact groups one by one, generate a row_source_buf when compact key group | ||
| // and use this row_source_buf to compact value column groups | ||
| // 3. build output rowset | ||
| Status Merger::vertical_merge_rowsets(TabletSharedPtr tablet, ReaderType reader_type, | ||
| TabletSchemaSPtr tablet_schema, | ||
| const std::vector<RowsetReaderSharedPtr>& src_rowset_readers, | ||
| RowsetWriter* dst_rowset_writer, int64_t max_rows_per_segment, | ||
| Statistics* stats_output) { | ||
| LOG(INFO) << "Start to do vertical compaction, tablet_id: " << tablet->tablet_id(); | ||
| std::vector<std::vector<uint32_t>> column_groups; | ||
| vertical_split_columns(tablet_schema, &column_groups); | ||
|
|
||
| vectorized::RowSourcesBuffer row_sources_buf(tablet->tablet_id(), tablet->tablet_path(), | ||
| reader_type); | ||
| // compact group one by one | ||
| for (auto i = 0; i < column_groups.size(); ++i) { | ||
| VLOG_NOTICE << "row source size: " << row_sources_buf.total_size(); | ||
| bool is_key = (i == 0); | ||
| RETURN_IF_ERROR(vertical_compact_one_group( | ||
| tablet, reader_type, tablet_schema, is_key, column_groups[i], &row_sources_buf, | ||
| src_rowset_readers, dst_rowset_writer, max_rows_per_segment, stats_output)); | ||
| if (is_key) { | ||
| row_sources_buf.flush(); | ||
| } | ||
| row_sources_buf.seek_to_begin(); | ||
| } | ||
| // finish compact, build output rowset | ||
| VLOG_NOTICE << "finish compact groups"; | ||
| RETURN_IF_ERROR(dst_rowset_writer->final_flush()); | ||
|
|
||
| return Status::OK(); | ||
| } | ||
|
|
||
| } // namespace doris | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This config can be changed online and it works according code in merger.cpp