-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[Feature](Row store) support column group with store row format for partial columns of table #34089
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
| #include <ostream> | ||
| #include <string> | ||
| #include <unordered_map> | ||
| #include <unordered_set> | ||
| #include <utility> | ||
|
|
||
| #include "cloud/config.h" | ||
|
|
@@ -265,7 +266,7 @@ void VerticalSegmentWriter::_maybe_invalid_row_cache(const std::string& key) con | |
| // Just invalid row cache for simplicity, since the rowset is not visible at present. | ||
| // If we update/insert cache, if load failed rowset will not be visible but cached data | ||
| // will be visible, and lead to inconsistency. | ||
| if (!config::disable_storage_row_cache && _tablet_schema->store_row_column() && | ||
| if (!config::disable_storage_row_cache && _tablet_schema->has_row_store_for_all_columns() && | ||
| _opts.write_type == DataWriteType::TYPE_DIRECT) { | ||
| // invalidate cache | ||
| RowCache::instance()->erase({_opts.rowset_ctx->tablet_id, key}); | ||
|
|
@@ -278,27 +279,23 @@ void VerticalSegmentWriter::_serialize_block_to_row_column(vectorized::Block& bl | |
| } | ||
| MonotonicStopWatch watch; | ||
| watch.start(); | ||
| // find row column id | ||
| int row_column_id = 0; | ||
| for (int i = 0; i < _tablet_schema->num_columns(); ++i) { | ||
| if (_tablet_schema->column(i).is_row_store_column()) { | ||
| row_column_id = i; | ||
| auto* row_store_column = static_cast<vectorized::ColumnString*>( | ||
| block.get_by_position(i).column->assume_mutable_ref().assume_mutable().get()); | ||
| row_store_column->clear(); | ||
| vectorized::DataTypeSerDeSPtrs serdes = | ||
| vectorized::create_data_type_serdes(block.get_data_types()); | ||
| std::unordered_set<int> row_store_cids_set(_tablet_schema->row_columns_uids().begin(), | ||
| _tablet_schema->row_columns_uids().end()); | ||
| vectorized::JsonbSerializeUtil::block_to_jsonb( | ||
| *_tablet_schema, block, *row_store_column, _tablet_schema->num_columns(), | ||
| serdes, row_store_cids_set); | ||
| break; | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add break |
||
| } | ||
| if (row_column_id == 0) { | ||
| return; | ||
| } | ||
| auto* row_store_column = | ||
| static_cast<vectorized::ColumnString*>(block.get_by_position(row_column_id) | ||
| .column->assume_mutable_ref() | ||
| .assume_mutable() | ||
| .get()); | ||
| row_store_column->clear(); | ||
| vectorized::DataTypeSerDeSPtrs serdes = | ||
| vectorized::create_data_type_serdes(block.get_data_types()); | ||
| vectorized::JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block, *row_store_column, | ||
| _tablet_schema->num_columns(), serdes); | ||
|
|
||
| VLOG_DEBUG << "serialize , num_rows:" << block.rows() << ", row_column_id:" << row_column_id | ||
| << ", total_byte_size:" << block.allocated_bytes() << ", serialize_cost(us)" | ||
| << watch.elapsed_time() / 1000; | ||
|
|
@@ -500,10 +497,8 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da | |
| has_default_or_nullable, segment_start_pos, data.block)); | ||
|
|
||
| // row column should be filled here | ||
| if (_tablet_schema->store_row_column()) { | ||
| // convert block to row store format | ||
| _serialize_block_to_row_column(full_block); | ||
| } | ||
| // convert block to row store format | ||
| _serialize_block_to_row_column(full_block); | ||
|
|
||
| // convert missing columns and send to column writer | ||
| const auto& missing_cids = _opts.rowset_ctx->partial_update_info->missing_cids; | ||
|
|
@@ -567,7 +562,7 @@ Status VerticalSegmentWriter::_fill_missing_columns( | |
| auto old_value_block = _tablet_schema->create_block_by_cids(missing_cids); | ||
| CHECK_EQ(missing_cids.size(), old_value_block.columns()); | ||
| auto mutable_old_columns = old_value_block.mutate_columns(); | ||
| bool has_row_column = _tablet_schema->store_row_column(); | ||
| bool has_row_column = _tablet_schema->has_row_store_for_all_columns(); | ||
| // record real pos, key is input line num, value is old_block line num | ||
| std::map<uint32_t, uint32_t> read_index; | ||
| size_t read_idx = 0; | ||
|
|
@@ -833,9 +828,8 @@ Status VerticalSegmentWriter::write_batch() { | |
| } | ||
| // Row column should be filled here when it's a directly write from memtable | ||
| // or it's schema change write(since column data type maybe changed, so we should reubild) | ||
| if (_tablet_schema->store_row_column() && | ||
| (_opts.write_type == DataWriteType::TYPE_DIRECT || | ||
| _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE)) { | ||
| if (_opts.write_type == DataWriteType::TYPE_DIRECT || | ||
| _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) { | ||
| for (auto& data : _batched_blocks) { | ||
| // TODO: maybe we should pass range to this method | ||
| _serialize_block_to_row_column(*const_cast<vectorized::Block*>(data.block)); | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |||||
| #include <gen_cpp/segment_v2.pb.h> | ||||||
| #include <parallel_hashmap/phmap.h> | ||||||
|
|
||||||
| #include <algorithm> | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: inclusion of deprecated C++ header 'stdint.h'; consider using 'cstdint' instead [modernize-deprecated-headers]
Suggested change
|
||||||
| #include <map> | ||||||
| #include <memory> | ||||||
| #include <string> | ||||||
|
|
@@ -31,6 +32,7 @@ | |||||
| #include <utility> | ||||||
| #include <vector> | ||||||
|
|
||||||
| #include "common/consts.h" | ||||||
| #include "common/status.h" | ||||||
| #include "gutil/stringprintf.h" | ||||||
| #include "olap/olap_common.h" | ||||||
|
|
@@ -342,8 +344,10 @@ class TabletSchema { | |||||
| _enable_single_replica_compaction = enable_single_replica_compaction; | ||||||
| } | ||||||
| bool enable_single_replica_compaction() const { return _enable_single_replica_compaction; } | ||||||
| void set_store_row_column(bool store_row_column) { _store_row_column = store_row_column; } | ||||||
| bool store_row_column() const { return _store_row_column; } | ||||||
| // indicate if full row store column(all the columns encodes as row) exists | ||||||
| bool has_row_store_for_all_columns() const { | ||||||
| return _store_row_column && row_columns_uids().empty(); | ||||||
| } | ||||||
| void set_skip_write_index_on_load(bool skip) { _skip_write_index_on_load = skip; } | ||||||
| bool skip_write_index_on_load() const { return _skip_write_index_on_load; } | ||||||
| int32_t delete_sign_idx() const { return _delete_sign_idx; } | ||||||
|
|
@@ -474,6 +478,8 @@ class TabletSchema { | |||||
| void update_tablet_columns(const TabletSchema& tablet_schema, | ||||||
| const std::vector<TColumn>& t_columns); | ||||||
|
|
||||||
| const std::vector<int32_t>& row_columns_uids() const { return _row_store_column_unique_ids; } | ||||||
|
|
||||||
| private: | ||||||
| friend bool operator==(const TabletSchema& a, const TabletSchema& b); | ||||||
| friend bool operator!=(const TabletSchema& a, const TabletSchema& b); | ||||||
|
|
@@ -515,6 +521,10 @@ class TabletSchema { | |||||
| bool _store_row_column = false; | ||||||
| bool _skip_write_index_on_load = false; | ||||||
| InvertedIndexStorageFormatPB _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; | ||||||
|
|
||||||
| // Contains column ids of which columns should be encoded into row store. | ||||||
| // ATTN: For compability reason empty cids means all columns of tablet schema are encoded to row column | ||||||
| std::vector<int32_t> _row_store_column_unique_ids; | ||||||
| }; | ||||||
|
|
||||||
| bool operator==(const TabletSchema& a, const TabletSchema& b); | ||||||
|
|
||||||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.