From a83ddd3793a18d97a386c391300e439b81473542 Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Mon, 22 Apr 2024 17:39:35 +0800 Subject: [PATCH 1/5] [Feature](Row store) support column group which store row format for partial columns of table be side test case support schema change fix fix proto modify fe code modify be code rebas --- be/src/exec/rowid_fetcher.cpp | 4 +- be/src/olap/base_tablet.cpp | 7 +- be/src/olap/base_tablet.h | 1 + be/src/olap/rowset/segment_creator.cpp | 2 +- .../olap/rowset/segment_v2/segment_writer.cpp | 42 ++- .../segment_v2/vertical_segment_writer.cpp | 42 ++- be/src/olap/schema_change.cpp | 13 +- be/src/olap/tablet_meta.cpp | 4 + be/src/olap/tablet_schema.cpp | 7 +- be/src/olap/tablet_schema.h | 14 +- be/src/runtime/runtime_state.h | 5 + be/src/service/point_query_executor.cpp | 145 +++++++-- be/src/service/point_query_executor.h | 19 +- be/src/vec/common/schema_util.cpp | 2 +- be/src/vec/jsonb/serialize.cpp | 74 +++-- be/src/vec/jsonb/serialize.h | 13 +- be/test/vec/jsonb/serialize_test.cpp | 16 +- .../org/apache/doris/alter/RollupJobV2.java | 4 +- .../doris/alter/SchemaChangeHandler.java | 42 ++- .../apache/doris/alter/SchemaChangeJobV2.java | 23 +- .../doris/analysis/CreateTableStmt.java | 3 +- .../analysis/ModifyTablePropertiesClause.java | 2 + .../org/apache/doris/analysis/SelectStmt.java | 6 +- .../org/apache/doris/backup/RestoreJob.java | 8 +- .../java/org/apache/doris/catalog/Env.java | 10 +- .../org/apache/doris/catalog/OlapTable.java | 25 ++ .../java/org/apache/doris/catalog/Table.java | 2 +- .../apache/doris/catalog/TableProperty.java | 42 ++- .../doris/common/util/PropertyAnalyzer.java | 55 +++- .../doris/datasource/InternalCatalog.java | 23 +- .../apache/doris/master/ReportHandler.java | 6 +- .../plans/commands/info/CreateMTMVInfo.java | 2 +- .../plans/commands/info/CreateTableInfo.java | 10 +- .../org/apache/doris/qe/SessionVariable.java | 16 + .../apache/doris/task/AlterReplicaTask.java | 7 +- .../apache/doris/task/CreateReplicaTask.java | 7 + .../org/apache/doris/task/AgentTaskTest.java | 2 +- gensrc/proto/olap_file.proto | 4 + gensrc/thrift/AgentService.thrift | 2 + gensrc/thrift/PaloInternalService.thrift | 2 + .../test_compaction_uniq_keys_row_store.out | 54 ++++ regression-test/data/point_query_p0/load.out | 15 + .../data/point_query_p0/test_rowstore.out | 89 ++++++ .../test_partial_update_with_row_column.out | 3 + ...test_compaction_uniq_keys_row_store.groovy | 28 +- .../suites/point_query_p0/load.groovy | 105 +++++-- .../point_query_p0/test_rowstore.groovy | 284 +++++++++++++++++- ...test_partial_update_with_row_column.groovy | 3 +- 48 files changed, 1118 insertions(+), 176 deletions(-) diff --git a/be/src/exec/rowid_fetcher.cpp b/be/src/exec/rowid_fetcher.cpp index c921be9509f56a..96ca8ddb786d6d 100644 --- a/be/src/exec/rowid_fetcher.cpp +++ b/be/src/exec/rowid_fetcher.cpp @@ -168,7 +168,7 @@ Status RowIDFetcher::_merge_rpc_results(const PMultiGetRequest& request, for (int i = 0; i < resp.binary_row_data_size(); ++i) { vectorized::JsonbSerializeUtil::jsonb_to_block( serdes, resp.binary_row_data(i).data(), resp.binary_row_data(i).size(), - col_uid_to_idx, *output_block, default_values); + col_uid_to_idx, *output_block, default_values, {}); } return Status::OK(); } @@ -405,7 +405,7 @@ Status RowIdStorageReader::read_by_rowids(const PMultiGetRequest& request, row_loc.segment_id(), row_loc.ordinal_id()); // fetch by row store, more effcient way if (request.fetch_row_store()) { - CHECK(tablet->tablet_schema()->store_row_column()); + CHECK(tablet->tablet_schema()->has_full_row_store_column()); RowLocation loc(rowset_id, segment->id(), row_loc.ordinal_id()); string* value = response->add_binary_row_data(); RETURN_IF_ERROR(scope_timer_run( diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 2fa90051165196..ee7b3ccecf3e5c 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -61,7 +61,7 @@ Status read_columns_by_plan(TabletSchemaSPtr tablet_schema, const PartialUpdateReadPlan& read_plan, const std::map& rsid_to_rowset, vectorized::Block& block, std::map* read_index) { - bool has_row_column = tablet_schema->store_row_column(); + bool has_row_column = tablet_schema->has_full_row_store_column(); auto mutable_columns = block.mutate_columns(); size_t read_idx = 0; for (auto rs_it : read_plan) { @@ -449,7 +449,6 @@ Status BaseTablet::lookup_row_data(const Slice& encoded_key, const RowLocation& BetaRowsetSharedPtr rowset = std::static_pointer_cast(input_rowset); CHECK(rowset); const TabletSchemaSPtr tablet_schema = rowset->tablet_schema(); - CHECK(tablet_schema->store_row_column()); SegmentCacheHandle segment_cache_handle; std::unique_ptr column_iterator; const auto& column = *DORIS_TRY(tablet_schema->column(BeConsts::ROW_STORE_COL)); @@ -874,7 +873,7 @@ Status BaseTablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset, BetaRowsetSharedPtr rowset = std::static_pointer_cast(input_rowset); CHECK(rowset); - CHECK(tablet_schema.store_row_column()); + CHECK(tablet_schema.has_full_row_store_column()); SegmentCacheHandle segment_cache_handle; std::unique_ptr column_iterator; OlapReaderStatistics stats; @@ -900,7 +899,7 @@ Status BaseTablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset, serdes[i] = type->get_serde(); } vectorized::JsonbSerializeUtil::jsonb_to_block(serdes, *string_column, col_uid_to_idx, block, - default_values); + default_values, {}); return Status::OK(); } diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index dc5f488e04492c..695dbc2487eee9 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -26,6 +26,7 @@ #include "olap/rowset/segment_v2/segment.h" #include "olap/tablet_fwd.h" #include "olap/tablet_meta.h" +#include "olap/tablet_schema.h" #include "olap/version_graph.h" #include "util/metrics.h" diff --git a/be/src/olap/rowset/segment_creator.cpp b/be/src/olap/rowset/segment_creator.cpp index df7f09c351ac9c..e78864fbbca813 100644 --- a/be/src/olap/rowset/segment_creator.cpp +++ b/be/src/olap/rowset/segment_creator.cpp @@ -100,7 +100,7 @@ Status SegmentFlusher::_parse_variant_columns(vectorized::Block& block) { } vectorized::schema_util::ParseContext ctx; - ctx.record_raw_json_column = _context.tablet_schema->store_row_column(); + ctx.record_raw_json_column = _context.tablet_schema->has_full_row_store_column(); RETURN_IF_ERROR(vectorized::schema_util::parse_variant_columns(block, variant_column_pos, ctx)); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 78fd69150c21c6..adc8994b53ae10 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -318,7 +318,7 @@ void SegmentWriter::_maybe_invalid_row_cache(const std::string& key) { // Just invalid row cache for simplicity, since the rowset is not visible at present. // If we update/insert cache, if load failed rowset will not be visible but cached data // will be visible, and lead to inconsistency. - if (!config::disable_storage_row_cache && _tablet_schema->store_row_column() && + if (!config::disable_storage_row_cache && _tablet_schema->has_full_row_store_column() && _opts.write_type == DataWriteType::TYPE_DIRECT) { // invalidate cache RowCache::instance()->erase({_opts.rowset_ctx->tablet_id, key}); @@ -437,27 +437,23 @@ void SegmentWriter::_serialize_block_to_row_column(vectorized::Block& block) { } MonotonicStopWatch watch; watch.start(); - // find row column id int row_column_id = 0; for (int i = 0; i < _tablet_schema->num_columns(); ++i) { if (_tablet_schema->column(i).is_row_store_column()) { - row_column_id = i; + auto* row_store_column = static_cast( + block.get_by_position(i).column->assume_mutable_ref().assume_mutable().get()); + row_store_column->clear(); + vectorized::DataTypeSerDeSPtrs serdes = + vectorized::create_data_type_serdes(block.get_data_types()); + vectorized::JsonbSerializeUtil::block_to_jsonb( + *_tablet_schema, block, *row_store_column, _tablet_schema->num_columns(), + serdes, + {_tablet_schema->row_columns_cids().begin(), + _tablet_schema->row_columns_cids().end()}); break; } } - if (row_column_id == 0) { - return; - } - vectorized::ColumnString* row_store_column = - static_cast(block.get_by_position(row_column_id) - .column->assume_mutable_ref() - .assume_mutable() - .get()); - row_store_column->clear(); - vectorized::DataTypeSerDeSPtrs serdes = - vectorized::create_data_type_serdes(block.get_data_types()); - vectorized::JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block, *row_store_column, - _tablet_schema->num_columns(), serdes); + VLOG_DEBUG << "serialize , num_rows:" << block.rows() << ", row_column_id:" << row_column_id << ", total_byte_size:" << block.allocated_bytes() << ", serialize_cost(us)" << watch.elapsed_time() / 1000; @@ -669,11 +665,8 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns, use_default_or_null_flag, has_default_or_nullable, segment_start_pos, block)); full_block.set_columns(std::move(mutable_full_columns)); - // row column should be filled here - if (_tablet_schema->store_row_column()) { - // convert block to row store format - _serialize_block_to_row_column(full_block); - } + // convert block to row store format + _serialize_block_to_row_column(full_block); // convert missing columns and send to column writer RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_columns( @@ -741,7 +734,7 @@ Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f const auto& cids_missing = _opts.rowset_ctx->partial_update_info->missing_cids; auto old_value_block = _tablet_schema->create_block_by_cids(cids_missing); CHECK_EQ(cids_missing.size(), old_value_block.columns()); - bool has_row_column = _tablet_schema->store_row_column(); + bool has_row_column = _tablet_schema->has_full_row_store_column(); // record real pos, key is input line num, value is old_block line num std::map read_index; size_t read_idx = 0; @@ -870,9 +863,8 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po << ", _column_writers.size()=" << _column_writers.size(); // Row column should be filled here when it's a directly write from memtable // or it's schema change write(since column data type maybe changed, so we should reubild) - if (_tablet_schema->store_row_column() && - (_opts.write_type == DataWriteType::TYPE_DIRECT || - _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE)) { + if (_opts.write_type == DataWriteType::TYPE_DIRECT || + _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) { _serialize_block_to_row_column(*const_cast(block)); } diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 44c1997529eecb..ada9d8ffad1565 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "cloud/config.h" @@ -265,7 +266,7 @@ void VerticalSegmentWriter::_maybe_invalid_row_cache(const std::string& key) con // Just invalid row cache for simplicity, since the rowset is not visible at present. // If we update/insert cache, if load failed rowset will not be visible but cached data // will be visible, and lead to inconsistency. - if (!config::disable_storage_row_cache && _tablet_schema->store_row_column() && + if (!config::disable_storage_row_cache && _tablet_schema->has_full_row_store_column() && _opts.write_type == DataWriteType::TYPE_DIRECT) { // invalidate cache RowCache::instance()->erase({_opts.rowset_ctx->tablet_id, key}); @@ -278,27 +279,23 @@ void VerticalSegmentWriter::_serialize_block_to_row_column(vectorized::Block& bl } MonotonicStopWatch watch; watch.start(); - // find row column id int row_column_id = 0; for (int i = 0; i < _tablet_schema->num_columns(); ++i) { if (_tablet_schema->column(i).is_row_store_column()) { - row_column_id = i; + auto* row_store_column = static_cast( + block.get_by_position(i).column->assume_mutable_ref().assume_mutable().get()); + row_store_column->clear(); + vectorized::DataTypeSerDeSPtrs serdes = + vectorized::create_data_type_serdes(block.get_data_types()); + std::unordered_set row_store_cids_set(_tablet_schema->row_columns_cids().begin(), + _tablet_schema->row_columns_cids().end()); + vectorized::JsonbSerializeUtil::block_to_jsonb( + *_tablet_schema, block, *row_store_column, _tablet_schema->num_columns(), + serdes, row_store_cids_set); break; } } - if (row_column_id == 0) { - return; - } - auto* row_store_column = - static_cast(block.get_by_position(row_column_id) - .column->assume_mutable_ref() - .assume_mutable() - .get()); - row_store_column->clear(); - vectorized::DataTypeSerDeSPtrs serdes = - vectorized::create_data_type_serdes(block.get_data_types()); - vectorized::JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block, *row_store_column, - _tablet_schema->num_columns(), serdes); + VLOG_DEBUG << "serialize , num_rows:" << block.rows() << ", row_column_id:" << row_column_id << ", total_byte_size:" << block.allocated_bytes() << ", serialize_cost(us)" << watch.elapsed_time() / 1000; @@ -500,10 +497,8 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da has_default_or_nullable, segment_start_pos, data.block)); // row column should be filled here - if (_tablet_schema->store_row_column()) { - // convert block to row store format - _serialize_block_to_row_column(full_block); - } + // convert block to row store format + _serialize_block_to_row_column(full_block); // convert missing columns and send to column writer const auto& missing_cids = _opts.rowset_ctx->partial_update_info->missing_cids; @@ -567,7 +562,7 @@ Status VerticalSegmentWriter::_fill_missing_columns( auto old_value_block = _tablet_schema->create_block_by_cids(missing_cids); CHECK_EQ(missing_cids.size(), old_value_block.columns()); auto mutable_old_columns = old_value_block.mutate_columns(); - bool has_row_column = _tablet_schema->store_row_column(); + bool has_row_column = _tablet_schema->has_full_row_store_column(); // record real pos, key is input line num, value is old_block line num std::map read_index; size_t read_idx = 0; @@ -833,9 +828,8 @@ Status VerticalSegmentWriter::write_batch() { } // Row column should be filled here when it's a directly write from memtable // or it's schema change write(since column data type maybe changed, so we should reubild) - if (_tablet_schema->store_row_column() && - (_opts.write_type == DataWriteType::TYPE_DIRECT || - _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE)) { + if (_opts.write_type == DataWriteType::TYPE_DIRECT || + _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) { for (auto& data : _batched_blocks) { // TODO: maybe we should pass range to this method _serialize_block_to_row_column(*const_cast(data.block)); diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 66930c77408ddf..ba7ec1eaa24069 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -28,6 +28,7 @@ #include "cloud/cloud_schema_change_job.h" #include "cloud/config.h" +#include "common/consts.h" #include "common/logging.h" #include "common/signal_handler.h" #include "common/status.h" @@ -1315,6 +1316,16 @@ Status SchemaChangeJob::parse_request(const SchemaChangeParams& sc_params, return Status::OK(); } + // if new tablet enable row store, or new tablet has different row store columns + if ((!base_tablet_schema->have_column(BeConsts::ROW_STORE_COL) && + new_tablet_schema->have_column(BeConsts::ROW_STORE_COL)) || + !std::equal(new_tablet_schema->row_columns_cids().begin(), + new_tablet_schema->row_columns_cids().end(), + base_tablet_schema->row_columns_cids().begin(), + base_tablet_schema->row_columns_cids().end())) { + *sc_directly = true; + } + for (size_t i = 0; i < new_tablet_schema->num_columns(); ++i) { ColumnMapping* column_mapping = changer->get_mutable_column_mapping(i); if (column_mapping->expr != nullptr) { @@ -1323,7 +1334,7 @@ Status SchemaChangeJob::parse_request(const SchemaChangeParams& sc_params, } else if (column_mapping->ref_column >= 0) { const auto& column_new = new_tablet_schema->column(i); const auto& column_old = base_tablet_schema->column(column_mapping->ref_column); - // index changed + // check index changed or row store columns changed if (column_new.is_bf_column() != column_old.is_bf_column() || column_new.has_bitmap_index() != column_old.has_bitmap_index() || new_tablet_schema->has_inverted_index(column_new) != diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 46acd61c81309a..6c787b597a1d42 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -304,6 +304,10 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id if (tablet_schema.__isset.skip_write_index_on_load) { schema->set_skip_write_index_on_load(tablet_schema.skip_write_index_on_load); } + if (tablet_schema.__isset.row_store_col_cids) { + schema->mutable_row_store_column_cids()->Add(tablet_schema.row_store_col_cids.begin(), + tablet_schema.row_store_col_cids.end()); + } if (binlog_config.has_value()) { BinlogConfig tmp_binlog_config; tmp_binlog_config = binlog_config.value(); diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index be96f395724c6c..07da82da49da64 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -987,6 +987,9 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac } else { _inverted_index_storage_format = schema.inverted_index_storage_format(); } + + _rowstore_column_cids.assign(schema.row_store_column_cids().begin(), + schema.row_store_column_cids().end()); } void TabletSchema::copy_from(const TabletSchema& tablet_schema) { @@ -1034,7 +1037,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _is_in_memory = ori_tablet_schema.is_in_memory(); _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction(); _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction(); - _store_row_column = ori_tablet_schema.store_row_column(); + _store_row_column = ori_tablet_schema.has_full_row_store_column(); _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load(); _sort_type = ori_tablet_schema.sort_type(); _sort_col_num = ori_tablet_schema.sort_col_num(); @@ -1193,6 +1196,8 @@ void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { tablet_schema_pb->set_compression_type(_compression_type); tablet_schema_pb->set_version_col_idx(_version_col_idx); tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format); + tablet_schema_pb->mutable_row_store_column_cids()->Assign(_rowstore_column_cids.begin(), + _rowstore_column_cids.end()); } size_t TabletSchema::row_size() const { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 240618229b16e1..366061c0275814 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include +#include "common/consts.h" #include "common/status.h" #include "gutil/stringprintf.h" #include "olap/olap_common.h" @@ -342,8 +344,10 @@ class TabletSchema { _enable_single_replica_compaction = enable_single_replica_compaction; } bool enable_single_replica_compaction() const { return _enable_single_replica_compaction; } - void set_store_row_column(bool store_row_column) { _store_row_column = store_row_column; } - bool store_row_column() const { return _store_row_column; } + // indicate if full row store column(all the columns encodes as row) exists + bool has_full_row_store_column() const { + return _store_row_column && row_columns_cids().empty(); + } void set_skip_write_index_on_load(bool skip) { _skip_write_index_on_load = skip; } bool skip_write_index_on_load() const { return _skip_write_index_on_load; } int32_t delete_sign_idx() const { return _delete_sign_idx; } @@ -474,6 +478,8 @@ class TabletSchema { void update_tablet_columns(const TabletSchema& tablet_schema, const std::vector& t_columns); + const std::vector& row_columns_cids() const { return _rowstore_column_cids; } + private: friend bool operator==(const TabletSchema& a, const TabletSchema& b); friend bool operator!=(const TabletSchema& a, const TabletSchema& b); @@ -515,6 +521,10 @@ class TabletSchema { bool _store_row_column = false; bool _skip_write_index_on_load = false; InvertedIndexStorageFormatPB _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; + + // Contains column ids of which columns should be encoded into row store. + // ATTN: For compability reason empty cids means all columns of tablet schema are encoded to row column + std::vector _rowstore_column_cids; }; bool operator==(const TabletSchema& a, const TabletSchema& b); diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 760398fcb9241c..dbd6e7cca5ed12 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -192,6 +192,11 @@ class RuntimeState { _query_options.mysql_row_binary_format; } + bool enable_short_circuit_query_access_column_store() const { + return _query_options.__isset.enable_short_circuit_query_access_column_store && + _query_options.enable_short_circuit_query_access_column_store; + } + // Appends error to the _error_log if there is space bool log_error(const std::string& error); diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index c0be69d4ce24ff..2eee1f385e5b29 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -22,22 +22,28 @@ #include #include #include +#include #include +#include #include #include #include #include "cloud/cloud_tablet.h" #include "cloud/config.h" +#include "common/consts.h" #include "common/status.h" #include "gutil/integral_types.h" #include "olap/lru_cache.h" #include "olap/olap_tuple.h" #include "olap/row_cursor.h" +#include "olap/rowset/beta_rowset.h" #include "olap/storage_engine.h" #include "olap/tablet_manager.h" #include "olap/tablet_schema.h" +#include "olap/utils.h" +#include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/runtime_state.h" #include "runtime/thread_context.h" @@ -47,15 +53,62 @@ #include "vec/data_types/serde/data_type_serde.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" +#include "vec/exprs/vexpr_fwd.h" +#include "vec/exprs/vslot_ref.h" #include "vec/jsonb/serialize.h" #include "vec/sink/vmysql_result_writer.h" namespace doris { -Reusable::~Reusable() {} +Reusable::~Reusable() = default; + +static void get_missing_and_include_cids(const TabletSchema& schema, + const std::vector& slots, + int target_rs_column_id, + std::unordered_set& missing_cids, + std::unordered_set& include_cids) { + missing_cids.clear(); + include_cids.clear(); + for (auto* slot : slots) { + missing_cids.insert(slot->col_unique_id()); + } + if (target_rs_column_id == -1) { + // no row store columns + return; + } + const TabletColumn& target_rs_column = schema.column_by_uid(target_rs_column_id); + DCHECK(target_rs_column.is_row_store_column()); + // The full column group is considered a full match, thus no missing cids + if (schema.row_columns_cids().empty()) { + missing_cids.clear(); + return; + } + for (int cid : schema.row_columns_cids()) { + missing_cids.erase(cid); + include_cids.insert(cid); + } +} + constexpr static int s_preallocted_blocks_num = 32; + +static void extract_slot_ref(const vectorized::VExprSPtr& expr, TupleDescriptor* tuple_desc, + std::vector& slots) { + const auto& children = expr->children(); + for (const auto& i : children) { + extract_slot_ref(i, tuple_desc, slots); + } + + auto node_type = expr->node_type(); + if (node_type == TExprNodeType::SLOT_REF) { + int column_id = static_cast(expr.get())->column_id(); + auto* slot_desc = tuple_desc->slots()[column_id]; + slots.push_back(slot_desc); + } +} + Status Reusable::init(const TDescriptorTable& t_desc_tbl, const std::vector& output_exprs, - const TQueryOptions& query_options, size_t block_size) { + const TQueryOptions& query_options, const TabletSchema& schema, + size_t block_size) { _runtime_state = RuntimeState::create_unique(); _runtime_state->set_query_options(query_options); RETURN_IF_ERROR(DescriptorTbl::create(_runtime_state->obj_pool(), t_desc_tbl, &_desc_tbl)); @@ -80,6 +133,20 @@ Status Reusable::init(const TDescriptorTable& t_desc_tbl, const std::vectorcol_unique_id()] = i; _col_default_values[i] = slot->col_default_value(); } + + // Get the output slot descriptors + std::vector output_slot_descs; + for (const auto& expr : _output_exprs_ctxs) { + extract_slot_ref(expr->root(), tuple_desc(), output_slot_descs); + } + + if (schema.have_column(BeConsts::ROW_STORE_COL)) { + const auto& column = *DORIS_TRY(schema.column(BeConsts::ROW_STORE_COL)); + _row_store_column_ids = column.unique_id(); + } + get_missing_and_include_cids(schema, output_slot_descs, _row_store_column_ids, + _missing_col_uids, _include_col_uids); + return Status::OK(); } @@ -178,6 +245,7 @@ Status PointQueryExecutor::init(const PTabletKeyLookupRequest* request, SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->point_query_executor_mem_tracker()); auto cache_handle = LookupConnectionCache::instance()->get(uuid); _binary_row_format = request->is_binary_row(); + _tablet = DORIS_TRY(ExecEnv::get_tablet(request->tablet_id())); if (cache_handle != nullptr) { _reusable = cache_handle; _profile_metrics.hit_lookup_cache = true; @@ -205,20 +273,21 @@ Status PointQueryExecutor::init(const PTabletKeyLookupRequest* request, if (uuid != 0) { // could be reused by requests after, pre allocte more blocks RETURN_IF_ERROR(reusable_ptr->init(t_desc_tbl, t_output_exprs.exprs, t_query_options, + *_tablet->tablet_schema(), s_preallocted_blocks_num)); LookupConnectionCache::instance()->add(uuid, reusable_ptr); } else { - RETURN_IF_ERROR( - reusable_ptr->init(t_desc_tbl, t_output_exprs.exprs, t_query_options, 1)); + RETURN_IF_ERROR(reusable_ptr->init(t_desc_tbl, t_output_exprs.exprs, t_query_options, + *_tablet->tablet_schema(), 1)); } } - _tablet = DORIS_TRY(ExecEnv::get_tablet(request->tablet_id())); if (request->has_version() && request->version() >= 0) { _version = request->version(); } RETURN_IF_ERROR(_init_keys(request)); _result_block = _reusable->get_block(); CHECK(_result_block != nullptr); + return Status::OK(); } @@ -250,12 +319,14 @@ std::string PointQueryExecutor::print_profile() { ", hit_cached_pages:{}, total_pages_read:{}, compressed_bytes_read:{}, " "io_latency:{}ns, " "uncompressed_bytes_read:{}, result_data_bytes:{}" + ", rs_column_uid:{}" "", total_us, init_us, init_key_us, lookup_key_us, lookup_data_us, output_data_us, _profile_metrics.hit_lookup_cache, _binary_row_format, _reusable->output_exprs().size(), _row_read_ctxs.size(), _profile_metrics.row_cache_hits, read_stats.cached_pages_num, read_stats.total_pages_num, read_stats.compressed_bytes_read, read_stats.io_ns, - read_stats.uncompressed_bytes_read, _profile_metrics.result_data_bytes); + read_stats.uncompressed_bytes_read, _profile_metrics.result_data_bytes, + _reusable->rs_column_uid()); } Status PointQueryExecutor::_init_keys(const PTabletKeyLookupRequest* request) { @@ -335,23 +406,63 @@ Status PointQueryExecutor::_lookup_row_data() { _reusable->get_data_type_serdes(), _row_read_ctxs[i]._cached_row_data.data().data, _row_read_ctxs[i]._cached_row_data.data().size, _reusable->get_col_uid_to_idx(), - *_result_block, _reusable->get_col_default_values()); + *_result_block, _reusable->get_col_default_values(), + _reusable->include_col_uids()); continue; } if (!_row_read_ctxs[i]._row_location.has_value()) { continue; } std::string value; - RETURN_IF_ERROR(_tablet->lookup_row_data( - _row_read_ctxs[i]._primary_key, _row_read_ctxs[i]._row_location.value(), - *(_row_read_ctxs[i]._rowset_ptr), _reusable->tuple_desc(), - _profile_metrics.read_stats, value, - !config::disable_storage_row_cache /*whether write row cache*/)); - // serilize value to block, currently only jsonb row formt - vectorized::JsonbSerializeUtil::jsonb_to_block( - _reusable->get_data_type_serdes(), value.data(), value.size(), - _reusable->get_col_uid_to_idx(), *_result_block, - _reusable->get_col_default_values()); + // fill block by row store + if (_reusable->rs_column_uid() != -1) { + bool use_row_cache = !config::disable_storage_row_cache && + _tablet->tablet_schema()->row_columns_cids().empty(); + RETURN_IF_ERROR(_tablet->lookup_row_data( + _row_read_ctxs[i]._primary_key, _row_read_ctxs[i]._row_location.value(), + *(_row_read_ctxs[i]._rowset_ptr), _reusable->tuple_desc(), + _profile_metrics.read_stats, value, use_row_cache)); + // serilize value to block, currently only jsonb row formt + vectorized::JsonbSerializeUtil::jsonb_to_block( + _reusable->get_data_type_serdes(), value.data(), value.size(), + _reusable->get_col_uid_to_idx(), *_result_block, + _reusable->get_col_default_values(), _reusable->include_col_uids()); + } + if (!_reusable->missing_col_uids().empty()) { + if (!_reusable->runtime_state().enable_short_circuit_query_access_column_store()) { + std::string missing_columns; + for (int cid : _reusable->missing_col_uids()) { + missing_columns += _tablet->tablet_schema()->column_by_uid(cid).name() + ","; + } + return Status::InternalError( + "Not support column store, set store_row_column or column_groups in table " + "properties, missing columns: " + + missing_columns + " should be added to row store"); + } + // fill missing columns by column store + RowLocation row_loc = _row_read_ctxs[i]._row_location.value(); + BetaRowsetSharedPtr rowset = + std::static_pointer_cast(_tablet->get_rowset(row_loc.rowset_id)); + SegmentCacheHandle segment_cache; + RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset, &segment_cache, true)); + // find segment + auto it = std::find_if(segment_cache.get_segments().cbegin(), + segment_cache.get_segments().cend(), + [&](const segment_v2::SegmentSharedPtr& seg) { + return seg->id() == row_loc.segment_id; + }); + const auto& segment = *it; + for (int cid : _reusable->missing_col_uids()) { + int pos = _reusable->get_col_uid_to_idx().at(cid); + auto row_id = static_cast(row_loc.row_id); + vectorized::MutableColumnPtr column = + _result_block->get_by_position(pos).column->assume_mutable(); + std::unique_ptr iter; + RETURN_IF_ERROR(segment->seek_and_read_by_rowid( + *_tablet->tablet_schema(), _reusable->tuple_desc()->slots()[pos], row_id, + column, _read_stats, iter)); + } + } } return Status::OK(); } diff --git a/be/src/service/point_query_executor.h b/be/src/service/point_query_executor.h index 61b597c5da8159..2b5f882717e2e9 100644 --- a/be/src/service/point_query_executor.h +++ b/be/src/service/point_query_executor.h @@ -39,6 +39,7 @@ #include "common/config.h" #include "common/logging.h" #include "common/status.h" +#include "gutil/integral_types.h" #include "olap/lru_cache.h" #include "olap/olap_common.h" #include "olap/rowset/rowset.h" @@ -71,7 +72,8 @@ class Reusable { } Status init(const TDescriptorTable& t_desc_tbl, const std::vector& output_exprs, - const TQueryOptions& query_options, size_t block_size = 1); + const TQueryOptions& query_options, const TabletSchema& schema, + size_t block_size = 1); std::unique_ptr get_block(); @@ -90,6 +92,14 @@ class Reusable { const vectorized::VExprContextSPtrs& output_exprs() { return _output_exprs_ctxs; } + int32_t rs_column_uid() const { return _row_store_column_ids; } + + const std::unordered_set missing_col_uids() const { return _missing_col_uids; } + + const std::unordered_set include_col_uids() const { return _include_col_uids; } + + const RuntimeState& runtime_state() const { return *_runtime_state; } + private: // caching TupleDescriptor, output_expr, etc... std::unique_ptr _runtime_state; @@ -102,6 +112,12 @@ class Reusable { vectorized::DataTypeSerDeSPtrs _data_type_serdes; std::unordered_map _col_uid_to_idx; std::vector _col_default_values; + // picked rowstore(column group) column unique id + int32_t _row_store_column_ids = -1; + // some column is missing in rowstore(column group), we need to fill them with column store values + std::unordered_set _missing_col_uids; + // included cids in rowstore(column group) + std::unordered_set _include_col_uids; }; // RowCache is a LRU cache for row store @@ -313,6 +329,7 @@ class PointQueryExecutor { std::unique_ptr _result_block; Metrics _profile_metrics; bool _binary_row_format = false; + OlapReaderStatistics _read_stats; // snapshot read version int64_t _version = -1; }; diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index eb33dcd1654d0f..55339e5c6c1d64 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -682,7 +682,7 @@ void rebuild_schema_and_block(const TabletSchemaSPtr& original, {}, root->data.get_finalized_column_ptr()->assume_mutable(), root->data.get_least_common_type()); // // set for rowstore - if (original->store_row_column()) { + if (original->has_full_row_store_column()) { static_cast(obj.get())->set_rowstore_column( object_column.get_rowstore_column()); } diff --git a/be/src/vec/jsonb/serialize.cpp b/be/src/vec/jsonb/serialize.cpp index 0dd0d342918daa..a35d722e01565c 100644 --- a/be/src/vec/jsonb/serialize.cpp +++ b/be/src/vec/jsonb/serialize.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include "olap/tablet_schema.h" @@ -45,7 +46,8 @@ namespace doris::vectorized { void JsonbSerializeUtil::block_to_jsonb(const TabletSchema& schema, const Block& block, ColumnString& dst, int num_cols, - const DataTypeSerDeSPtrs& serdes) { + const DataTypeSerDeSPtrs& serdes, + const std::unordered_set& row_store_cids) { auto num_rows = block.rows(); Arena pool; assert(num_cols <= block.columns()); @@ -55,12 +57,15 @@ void JsonbSerializeUtil::block_to_jsonb(const TabletSchema& schema, const Block& for (int j = 0; j < num_cols; ++j) { const auto& column = block.get_by_position(j).column; const auto& tablet_column = *schema.columns()[j]; + // ignore row store columns if (tablet_column.is_row_store_column()) { - // ignore dst row store column continue; } - serdes[j]->write_one_cell_to_jsonb(*column, jsonb_writer, &pool, - tablet_column.unique_id(), i); + // TODO improve performance for checking column in group + if (row_store_cids.empty() || row_store_cids.contains(tablet_column.unique_id())) { + serdes[j]->write_one_cell_to_jsonb(*column, jsonb_writer, &pool, + tablet_column.unique_id(), i); + } } jsonb_writer.writeEndObject(); dst.insert_data(jsonb_writer.getOutput()->getBuffer(), jsonb_writer.getOutput()->getSize()); @@ -71,12 +76,12 @@ void JsonbSerializeUtil::block_to_jsonb(const TabletSchema& schema, const Block& void JsonbSerializeUtil::jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const ColumnString& jsonb_column, const std::unordered_map& col_id_to_idx, - Block& dst, - const std::vector& default_values) { + Block& dst, const std::vector& default_values, + const std::unordered_set& include_cids) { for (int i = 0; i < jsonb_column.size(); ++i) { StringRef jsonb_data = jsonb_column.get_data_at(i); - jsonb_to_block(serdes, jsonb_data.data, jsonb_data.size, col_id_to_idx, dst, - default_values); + jsonb_to_block(serdes, jsonb_data.data, jsonb_data.size, col_id_to_idx, dst, default_values, + include_cids); } } @@ -84,38 +89,53 @@ void JsonbSerializeUtil::jsonb_to_block(const DataTypeSerDeSPtrs& serdes, void JsonbSerializeUtil::jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const char* data, size_t size, const std::unordered_map& col_id_to_idx, - Block& dst, - const std::vector& default_values) { + Block& dst, const std::vector& default_values, + const std::unordered_set& include_cids) { auto pdoc = JsonbDocument::createDocument(data, size); JsonbDocument& doc = *pdoc; size_t num_rows = dst.rows(); size_t filled_columns = 0; for (auto it = doc->begin(); it != doc->end(); ++it) { auto col_it = col_id_to_idx.find(it->getKeyId()); - if (col_it != col_id_to_idx.end()) { + if (col_it != col_id_to_idx.end() && + (include_cids.empty() || include_cids.contains(it->getKeyId()))) { MutableColumnPtr dst_column = dst.get_by_position(col_it->second).column->assume_mutable(); serdes[col_it->second]->read_one_cell_from_jsonb(*dst_column, it->value()); ++filled_columns; } } - if (filled_columns < dst.columns()) { - // fill missing slot - for (int i = 0; i < dst.columns(); ++i) { - const auto& column_type_name = dst.get_by_position(i); - MutableColumnPtr col = column_type_name.column->assume_mutable(); - if (col->size() < num_rows + 1) { - DCHECK(col->size() == num_rows); - if (default_values[i].empty()) { - col->insert_default(); - } else { - Slice value(default_values[i].data(), default_values[i].size()); - DataTypeSerDe::FormatOptions opt; - opt.converted_from_string = true; - static_cast(serdes[i]->deserialize_one_cell_from_json(*col, value, opt)); - } + if (filled_columns >= dst.columns()) { + return; + } + auto fill_column = [&](Block& dst, int pos, size_t old_num_rows) { + MutableColumnPtr dst_column = dst.get_by_position(pos).column->assume_mutable(); + if (dst_column->size() < old_num_rows + 1) { + DCHECK(dst_column->size() == old_num_rows); + if (default_values[pos].empty()) { + dst_column->insert_default(); + } else { + Slice value(default_values[pos].data(), default_values[pos].size()); + DataTypeSerDe::FormatOptions opt; + opt.converted_from_string = true; + static_cast( + serdes[pos]->deserialize_one_cell_from_json(*dst_column, value, opt)); + } + } + DCHECK(dst_column->size() == num_rows + 1); + }; + // fill missing column + if (!include_cids.empty()) { + for (auto cid : include_cids) { + auto col_it = col_id_to_idx.find(cid); + if (col_it == col_id_to_idx.end()) { + continue; } - DCHECK(col->size() == num_rows + 1); + fill_column(dst, col_it->second, num_rows); + } + } else { + for (int i = 0; i < dst.columns(); ++i) { + fill_column(dst, i, num_rows); } } } diff --git a/be/src/vec/jsonb/serialize.h b/be/src/vec/jsonb/serialize.h index 9025642cae33c9..47c76a6a5dd63e 100644 --- a/be/src/vec/jsonb/serialize.h +++ b/be/src/vec/jsonb/serialize.h @@ -19,6 +19,8 @@ #include #include +#include +#include #include "olap/tablet_schema.h" #include "runtime/descriptors.h" @@ -34,15 +36,20 @@ namespace doris::vectorized { // use jsonb codec to store row format class JsonbSerializeUtil { public: + // encode partial columns into jsonb + // empty row_store_cids means encode full schema columns for compability static void block_to_jsonb(const TabletSchema& schema, const Block& block, ColumnString& dst, - int num_cols, const DataTypeSerDeSPtrs& serdes); + int num_cols, const DataTypeSerDeSPtrs& serdes, + const std::unordered_set& row_store_cids); // batch rows static void jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const ColumnString& jsonb_column, const std::unordered_map& col_id_to_idx, - Block& dst, const std::vector& default_values); + Block& dst, const std::vector& default_values, + const std::unordered_set& include_cids); // single row static void jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const char* data, size_t size, const std::unordered_map& col_id_to_idx, - Block& dst, const std::vector& default_values); + Block& dst, const std::vector& default_values, + const std::unordered_set& include_cids); }; } // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/jsonb/serialize_test.cpp b/be/test/vec/jsonb/serialize_test.cpp index 04eb96dce62256..3845c689e1e381 100644 --- a/be/test/vec/jsonb/serialize_test.cpp +++ b/be/test/vec/jsonb/serialize_test.cpp @@ -133,7 +133,7 @@ TEST(BlockSerializeTest, Array) { // serialize JsonbSerializeUtil::block_to_jsonb(schema, block, static_cast(*col.get()), block.columns(), - create_data_type_serdes(block.get_data_types())); + create_data_type_serdes(block.get_data_types()), {}); // deserialize TupleDescriptor read_desc(PTupleDescriptor(), true); // slot1 @@ -175,7 +175,7 @@ TEST(BlockSerializeTest, Array) { std::cout << new_block.dump_data() << std::endl; JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()), static_cast(*col.get()), col_uid_to_idx, - new_block, default_values); + new_block, default_values, {}); std::cout << block.dump_data() << std::endl; std::cout << new_block.dump_data() << std::endl; EXPECT_EQ(block.dump_data(), new_block.dump_data()); @@ -225,7 +225,7 @@ TEST(BlockSerializeTest, Map) { std::cout << "serialize to jsonb" << std::endl; JsonbSerializeUtil::block_to_jsonb(schema, block, static_cast(*col.get()), block.columns(), - create_data_type_serdes(block.get_data_types())); + create_data_type_serdes(block.get_data_types()), {}); // deserialize TupleDescriptor read_desc(PTupleDescriptor(), true); // slot @@ -257,7 +257,7 @@ TEST(BlockSerializeTest, Map) { std::cout << "deserialize from jsonb" << std::endl; JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()), static_cast(*col.get()), col_uid_to_idx, - new_block, default_values); + new_block, default_values, {}); std::cout << block.dump_data() << std::endl; std::cout << new_block.dump_data() << std::endl; EXPECT_EQ(block.dump_data(), new_block.dump_data()); @@ -297,7 +297,7 @@ TEST(BlockSerializeTest, Struct) { std::cout << "serialize to jsonb" << std::endl; JsonbSerializeUtil::block_to_jsonb(schema, block, static_cast(*col.get()), block.columns(), - create_data_type_serdes(block.get_data_types())); + create_data_type_serdes(block.get_data_types()), {}); // deserialize TupleDescriptor read_desc(PTupleDescriptor(), true); // slot @@ -328,7 +328,7 @@ TEST(BlockSerializeTest, Struct) { std::cout << "deserialize from jsonb" << std::endl; JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()), static_cast(*col.get()), col_uid_to_idx, - new_block, default_values); + new_block, default_values, {}); std::cout << block.dump_data() << std::endl; std::cout << new_block.dump_data() << std::endl; EXPECT_EQ(block.dump_data(), new_block.dump_data()); @@ -478,7 +478,7 @@ TEST(BlockSerializeTest, JsonbBlock) { // serialize JsonbSerializeUtil::block_to_jsonb(schema, block, static_cast(*col.get()), block.columns(), - create_data_type_serdes(block.get_data_types())); + create_data_type_serdes(block.get_data_types()), {}); // deserialize TupleDescriptor read_desc(PTupleDescriptor(), true); for (auto t : cols) { @@ -506,7 +506,7 @@ TEST(BlockSerializeTest, JsonbBlock) { } JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(block.get_data_types()), static_cast(*col.get()), col_uid_to_idx, - new_block, default_values); + new_block, default_values, {}); std::cout << block.dump_data() << std::endl; std::cout << new_block.dump_data() << std::endl; EXPECT_EQ(block.dump_data(), new_block.dump_data()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java index 66912307ed169f..ef7d7de63070c5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java @@ -263,7 +263,9 @@ protected void createRollupReplica() throws AlterCancelException { tbl.getTimeSeriesCompactionEmptyRowsetsThreshold(), tbl.getTimeSeriesCompactionLevelThreshold(), tbl.storeRowColumn(), - binlogConfig, objectPool); + binlogConfig, + tbl.getRowStoreColumnsUniqueIds(tbl.getTableProperty().getCopiedRowStoreColumns()), + objectPool); createReplicaTask.setBaseTablet(tabletIdMap.get(rollupTabletId), baseSchemaHash); if (this.storageFormat != null) { createReplicaTask.setStorageFormat(this.storageFormat); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index ab77e242a8a233..81beb1370ab613 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -83,6 +83,7 @@ import org.apache.doris.common.util.Util; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.nereids.trees.plans.commands.info.ColumnDefinition; import org.apache.doris.persist.AlterLightSchemaChangeInfo; import org.apache.doris.persist.RemoveAlterJobV2OperationLog; import org.apache.doris.persist.TableAddOrDropColumnsInfo; @@ -1317,6 +1318,28 @@ private void createJob(String rawSql, long dbId, OlapTable olapTable, Map rsColumns = Lists.newArrayList(); + boolean storeRowColumn = false; + try { + storeRowColumn = PropertyAnalyzer.analyzeStoreRowColumn(propertyMap, true); + rsColumns = PropertyAnalyzer.analyzeRowStoreColumns(propertyMap, + olapTable.getColumns().stream().map(Column::getName).collect(Collectors.toList()), true); + } catch (AnalysisException e) { + throw new DdlException(e.getMessage()); + } + // check row store column has change + boolean hasRowStoreChanged = false; + if (storeRowColumn || (rsColumns != null && !rsColumns.isEmpty())) { + List oriRowStoreColumns = olapTable.getTableProperty().getCopiedRowStoreColumns(); + if ((oriRowStoreColumns != null && !oriRowStoreColumns.equals(rsColumns)) + || storeRowColumn != olapTable.storeRowColumn()) { + hasRowStoreChanged = true; + } + } + // begin checking each table // ATTN: DO NOT change any meta in this loop long tableId = olapTable.getId(); @@ -1378,6 +1401,8 @@ private void createJob(String rawSql, long dbId, OlapTable olapTable, Map rowStoreColumns = null; + @SerializedName(value = "storeRowColumn") + protected boolean storeRowColumn = false; + @SerializedName(value = "hasRowStoreChange") + protected boolean hasRowStoreChange = false; + // save all schema change tasks private AgentBatchTask schemaChangeBatchTask = new AgentBatchTask(); @@ -176,6 +183,13 @@ public void setBloomFilterInfo(boolean hasBfChange, Set bfColumns, doubl this.bfFpp = bfFpp; } + public void setStoreRowColumnInfo(boolean hasRowStoreChange, + boolean storeRowColumn, List rowStoreColumns) { + this.hasRowStoreChange = hasRowStoreChange; + this.storeRowColumn = storeRowColumn; + this.rowStoreColumns = rowStoreColumns; + } + public void setAlterIndexInfo(boolean indexChange, List indexes) { this.indexChange = indexChange; this.indexes = indexes; @@ -277,7 +291,9 @@ protected void createShadowIndexReplica() throws AlterCancelException { tbl.getTimeSeriesCompactionEmptyRowsetsThreshold(), tbl.getTimeSeriesCompactionLevelThreshold(), tbl.storeRowColumn(), - binlogConfig, objectPool); + binlogConfig, + tbl.getRowStoreColumnsUniqueIds(rowStoreColumns), + objectPool); createReplicaTask.setBaseTablet(partitionIndexTabletMap.get(partitionId, shadowIdxId) .get(shadowTabletId), originSchemaHash); @@ -701,6 +717,11 @@ private void onFinished(OlapTable tbl) { if (indexChange) { tbl.setIndexes(indexes); } + // update row store + if (hasRowStoreChange) { + tbl.setStoreRowColumn(storeRowColumn); + tbl.setRowStoreColumns(rowStoreColumns); + } // set storage format of table, only set if format is v2 if (storageFormat == TStorageFormat.V2) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index 320974ee014407..d294b40eff2ef0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -448,7 +448,7 @@ public void analyze(Analyzer analyzer) throws UserException { } } // add a hidden column as row store - if (properties != null && PropertyAnalyzer.analyzeStoreRowColumn(new HashMap<>(properties))) { + if (properties != null && PropertyAnalyzer.analyzeStoreRowColumn(new HashMap<>(properties), true)) { if (keysDesc != null && keysDesc.getKeysType() == KeysType.AGG_KEYS) { throw new AnalysisException("Aggregate table can't support row column now"); } @@ -470,6 +470,7 @@ public void analyze(Analyzer analyzer) throws UserException { columnDefs.add(ColumnDef.newVersionColumnDef(AggregateType.REPLACE)); } } + Set columnSet = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); for (ColumnDef columnDef : columnDefs) { columnDef.analyze(engineName.equalsIgnoreCase(DEFAULT_ENGINE_NAME)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java index 0895890533aa4e..27a46f34af68b4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java @@ -318,6 +318,8 @@ public void analyze(Analyzer analyzer) throws AnalysisException { throw new AnalysisException("You can not modify storage vault id"); } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_ESTIMATE_PARTITION_SIZE)) { throw new AnalysisException("You can not modify estimate partition size"); + } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN)) { + // do nothing, will be analyzed when creating alter job } else { throw new AnalysisException("Unknown table property: " + properties.keySet()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index 36b68b0d3aa7ff..2186c3138963dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -2780,6 +2780,10 @@ public boolean checkAndSetPointQuery() { if (isPointQuery) { return true; } + if (ConnectContext.get() == null + || !ConnectContext.get().getSessionVariable().isEnableShortCircuitQuery()) { + return false; + } eqPredicates = new TreeMap( new Comparator() { @Override @@ -2820,7 +2824,7 @@ public int compare(SlotRef o1, SlotRef o2) { if (eqPredicates == null) { return false; } - if (!olapTable.getEnableUniqueKeyMergeOnWrite() || !olapTable.storeRowColumn()) { + if (!olapTable.getEnableUniqueKeyMergeOnWrite()) { return false; } // check if PK columns are fully matched with predicate diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index d708c5907de210..6cfafd19fb49b8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -1071,7 +1071,11 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc } finally { localTbl.readUnlock(); } +<<<<<<< HEAD Map objectPool = new HashMap(); +======= + List rowStoreColumns = localTbl.getTableProperty().getCopiedRowStoreColumns(); +>>>>>>> dd6a5e0b00 ([Feature](Row store) support column group which store row format for partial columns of table) for (MaterializedIndex restoredIdx : restorePart.getMaterializedIndices(IndexExtState.VISIBLE)) { MaterializedIndexMeta indexMeta = localTbl.getIndexMetaByIndexId(restoredIdx.getId()); List indexes = restoredIdx.getId() == localTbl.getBaseIndexId() @@ -1105,7 +1109,9 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc localTbl.getTimeSeriesCompactionEmptyRowsetsThreshold(), localTbl.getTimeSeriesCompactionLevelThreshold(), localTbl.storeRowColumn(), - binlogConfig, objectPool); + binlogConfig, + localTbl.getRowStoreColumnsUniqueIds(rowStoreColumns). + objectPool); task.setInvertedIndexStorageFormat(localTbl.getInvertedIndexStorageFormat()); task.setInRestoreMode(true); batchTask.addTask(task); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index ea6b20d7b98326..69d0f05892e84f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -3586,8 +3586,14 @@ public static void getDdlStmt(DdlStmt ddlStmt, String dbName, TableIf table, Lis // store row column if (olapTable.storeRowColumn()) { - sb.append(",\n\"").append(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN).append("\" = \""); - sb.append(olapTable.storeRowColumn()).append("\""); + List rsColumnNames = olapTable.getTableProperty().getCopiedRowStoreColumns(); + if (rsColumnNames != null && !rsColumnNames.isEmpty()) { + sb.append(",\n\"").append(PropertyAnalyzer.PROPERTIES_ROW_STORE_COLUMNS).append("\" = \""); + sb.append(Joiner.on(",").join(rsColumnNames)).append("\""); + } else { + sb.append(",\n\"").append(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN).append("\" = \""); + sb.append(olapTable.storeRowColumn()).append("\""); + } } // skip inverted index on load diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 479d214b7a07cf..161fd3f8dad88f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -1228,6 +1228,22 @@ public void setBloomFilterInfo(Set bfColumns, double bfFpp) { this.bfFpp = bfFpp; } + public void setRowStoreColumns(List rowStoreColumns) { + getOrCreatTableProperty().setRowStoreColumns(rowStoreColumns); + } + + public List getRowStoreColumnsUniqueIds(List rsColumnNames) { + List columnIds = Lists.newArrayList(); + if (rsColumnNames != null) { + for (String colName : rsColumnNames) { + Column col = nameToColumn.get(colName); + Preconditions.checkNotNull(col); + columnIds.add(col.getUniqueId()); + } + } + return columnIds; + } + public String getSequenceMapCol() { if (tableProperty == null) { return null; @@ -1969,6 +1985,15 @@ public Column getBaseColumn(String columnName) { return null; } + public Column getBaseColumn(int colUniqueId) { + for (Column column : getBaseSchema()) { + if (column.getUniqueId() == colUniqueId) { + return column; + } + } + return null; + } + public int getKeysNum() { int keysNum = 0; for (Column column : getBaseSchema()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java index dfac5567c64f3c..a7667ec2e31426 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java @@ -408,7 +408,7 @@ public void setNewFullSchema(List newSchema) { } public Column getColumn(String name) { - return nameToColumn.get(name); + return nameToColumn.getOrDefault(name, null); } public List getColumns() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java index f9625a7506e57a..479251ab9d9981 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java @@ -32,7 +32,9 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Joiner; import com.google.common.base.Strings; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.gson.annotations.SerializedName; import org.apache.logging.log4j.LogManager; @@ -41,7 +43,9 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; /** @@ -70,6 +74,9 @@ public class TableProperty implements Writable { private TStorageMedium storageMedium = null; + // which columns stored in RowStore column + private List rowStoreColumns; + /* * the default storage format of this table. * DEFAULT: depends on BE's config 'default_rowset_type' @@ -240,13 +247,18 @@ public boolean enableSingleReplicaCompaction() { public TableProperty buildStoreRowColumn() { storeRowColumn = Boolean.parseBoolean( properties.getOrDefault(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN, "false")); - // Remove deprecated prefix and try again - String deprecatedPrefix = "deprecated_"; - if (!storeRowColumn && PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN.startsWith(deprecatedPrefix)) { - storeRowColumn = Boolean.parseBoolean( - properties.getOrDefault( - PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN.substring(deprecatedPrefix.length()), "false")); + return this; + } + + public TableProperty buildRowStoreColumns() { + String value = properties.get(PropertyAnalyzer.PROPERTIES_ROW_STORE_COLUMNS); + // set empty row store columns by default + if (null == value) { + return this; } + String[] rsColumnArr = value.split(PropertyAnalyzer.COMMA_SEPARATOR); + rowStoreColumns = Lists.newArrayList(); + rowStoreColumns.addAll(Arrays.asList(rsColumnArr)); return this; } @@ -387,6 +399,13 @@ public void removeInvalidProperties() { properties.remove(PropertyAnalyzer.PROPERTIES_COLOCATE_WITH); } + public List getCopiedRowStoreColumns() { + if (rowStoreColumns == null) { + return null; + } + return Lists.newArrayList(rowStoreColumns); + } + public TableProperty buildBinlogConfig() { BinlogConfig binlogConfig = new BinlogConfig(); if (properties.containsKey(PropertyAnalyzer.PROPERTIES_BINLOG_ENABLE)) { @@ -578,6 +597,16 @@ public int getGroupCommitDataBytes() { Integer.toString(PropertyAnalyzer.PROPERTIES_GROUP_COMMIT_DATA_BYTES_DEFAULT_VALUE))); } + public void setRowStoreColumns(List rowStoreColumns) { + if (rowStoreColumns != null && !rowStoreColumns.isEmpty()) { + modifyTableProperties(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN, "true"); + buildStoreRowColumn(); + modifyTableProperties(PropertyAnalyzer.PROPERTIES_ROW_STORE_COLUMNS, + Joiner.on(",").join(rowStoreColumns)); + buildRowStoreColumns(); + } + } + public void buildReplicaAllocation() { try { // Must copy the properties because "analyzeReplicaAllocation" will remove the property @@ -612,6 +641,7 @@ public static TableProperty read(DataInput in) throws IOException { .buildBinlogConfig() .buildEnableLightSchemaChange() .buildStoreRowColumn() + .buildRowStoreColumns() .buildSkipWriteIndexOnLoad() .buildCompactionPolicy() .buildTimeSeriesCompactionGoalSizeMbytes() diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index 19aa966353b6b6..3eccf226006171 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -52,16 +52,19 @@ import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; public class PropertyAnalyzer { @@ -138,7 +141,9 @@ public class PropertyAnalyzer { public static final String PROPERTIES_ENABLE_SINGLE_REPLICA_COMPACTION = "enable_single_replica_compaction"; - public static final String PROPERTIES_STORE_ROW_COLUMN = "store_row_column"; + public static final String PROPERTIES_STORE_ROW_COLUMN = "store_row_column"; // deprecated + + public static final String PROPERTIES_ROW_STORE_COLUMNS = "row_store_columns"; public static final String PROPERTIES_SKIP_WRITE_INDEX_ON_LOAD = "skip_write_index_on_load"; @@ -189,7 +194,7 @@ public class PropertyAnalyzer { // display/DORIS/DSIP-018%3A+Support+Merge-On-Write+implementation+for+UNIQUE+KEY+data+model) public static final String ENABLE_UNIQUE_KEY_MERGE_ON_WRITE = "enable_unique_key_merge_on_write"; private static final Logger LOG = LogManager.getLogger(PropertyAnalyzer.class); - private static final String COMMA_SEPARATOR = ","; + public static final String COMMA_SEPARATOR = ","; private static final double MAX_FPP = 0.05; private static final double MIN_FPP = 0.0001; @@ -766,7 +771,43 @@ public static Boolean analyzeEnableDuplicateWithoutKeysByDefault(Map properties) throws AnalysisException { + public static List analyzeRowStoreColumns(Map properties, + List columns, + boolean stripProperty) throws AnalysisException { + List rowStoreColumns = Lists.newArrayList(); + String value = properties.get(PROPERTIES_ROW_STORE_COLUMNS); + // set empty row store columns by default + if (null == value) { + return null; + } + if (stripProperty) { + properties.remove(PROPERTIES_ROW_STORE_COLUMNS); + } + String[] rsColumnArr = value.split(COMMA_SEPARATOR); + rowStoreColumns.addAll(Arrays.asList(rsColumnArr)); + if (rowStoreColumns.isEmpty()) { + throw new AnalysisException(PROPERTIES_ROW_STORE_COLUMNS + " must not be empty"); + } + // check columns in column def + List invalidColumns = rowStoreColumns.stream() + .filter(expectedColName -> columns.stream().noneMatch( + column -> column.equalsIgnoreCase(expectedColName))) + .collect(Collectors.toList()); + // if (invalidColumns.size() == 1 && invalidColumns.get(0).equalsIgnoreCase("__all__")) { + // // __all__ represents all the columns are encoded to row store + // rowStoreColumns.clear(); + // return rowStoreColumns; + // + if (!invalidColumns.isEmpty()) { + throw new AnalysisException( + "Column does not exist in table. Invalid columns: " + + invalidColumns.stream().collect(Collectors.joining(", ", "", ""))); + } + return rowStoreColumns; + } + + public static Boolean analyzeStoreRowColumn(Map properties, + boolean stripProperty) throws AnalysisException { if (properties == null || properties.isEmpty()) { return false; } @@ -775,14 +816,16 @@ public static Boolean analyzeStoreRowColumn(Map properties) thro if (null == value) { return false; } - properties.remove(PROPERTIES_STORE_ROW_COLUMN); + if (stripProperty) { + properties.remove(PROPERTIES_STORE_ROW_COLUMN); + } if (value.equalsIgnoreCase("true")) { return true; } else if (value.equalsIgnoreCase("false")) { return false; + } else { + throw new AnalysisException(PROPERTIES_STORE_ROW_COLUMN + "must be `true` or `false`"); } - throw new AnalysisException(PROPERTIES_STORE_ROW_COLUMN - + " must be `true` or `false`"); } public static Boolean analyzeSkipWriteIndexOnLoad(Map properties) throws AnalysisException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index ad1bbe5f72db42..a3fa3a71097cf4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -2029,6 +2029,7 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa int totalTaskNum = index.getTablets().size() * totalReplicaNum; MarkedCountDownLatch countDownLatch = new MarkedCountDownLatch(totalTaskNum); AgentBatchTask batchTask = new AgentBatchTask(); + List rowStoreColumns = tbl.getTableProperty().getCopiedRowStoreColumns(); for (Tablet tablet : index.getTablets()) { long tabletId = tablet.getId(); for (Replica replica : tablet.getReplicas()) { @@ -2047,7 +2048,9 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa tbl.getTimeSeriesCompactionTimeThresholdSeconds(), tbl.getTimeSeriesCompactionEmptyRowsetsThreshold(), tbl.getTimeSeriesCompactionLevelThreshold(), - tbl.storeRowColumn(), binlogConfig, objectPool); + tbl.storeRowColumn(), binlogConfig, + tbl.getRowStoreColumnsUniqueIds(rowStoreColumns), + objectPool); task.setStorageFormat(tbl.getStorageFormat()); task.setInvertedIndexStorageFormat(tbl.getInvertedIndexStorageFormat()); @@ -2547,17 +2550,29 @@ private boolean createOlapTable(Database db, CreateTableStmt stmt) throws UserEx } } - boolean storeRowColumn = false; + // analyze row store columns try { - storeRowColumn = PropertyAnalyzer.analyzeStoreRowColumn(properties); + boolean storeRowColumn = false; + storeRowColumn = PropertyAnalyzer.analyzeStoreRowColumn(properties, true); if (storeRowColumn && !enableLightSchemaChange) { throw new DdlException( "Row store column rely on light schema change, enable light schema change first"); } + olapTable.setStoreRowColumn(storeRowColumn); + List rowStoreColumns; + try { + rowStoreColumns = PropertyAnalyzer.analyzeRowStoreColumns(properties, + baseSchema.stream().map(Column::getName).collect(Collectors.toList()), true); + if (rowStoreColumns != null && rowStoreColumns.isEmpty()) { + rowStoreColumns = null; + } + olapTable.setRowStoreColumns(rowStoreColumns); + } catch (AnalysisException e) { + throw new DdlException(e.getMessage()); + } } catch (AnalysisException e) { throw new DdlException(e.getMessage()); } - olapTable.setStoreRowColumn(storeRowColumn); // set skip inverted index on load boolean skipWriteIndexOnLoad = PropertyAnalyzer.analyzeSkipWriteIndexOnLoad(properties); diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index e74467ea6db615..fa8727a63b902a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -860,6 +860,8 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta double bfFpp = olapTable.getBfFpp(); List indexes = indexId == olapTable.getBaseIndexId() ? olapTable.getCopiedIndexes() : null; + List rowStoreColumns = + olapTable.getTableProperty().getCopiedRowStoreColumns(); CreateReplicaTask createReplicaTask = new CreateReplicaTask(backendId, dbId, tableId, partitionId, indexId, tabletId, replica.getId(), indexMeta.getShortKeyColumnCount(), @@ -882,7 +884,9 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta olapTable.getTimeSeriesCompactionEmptyRowsetsThreshold(), olapTable.getTimeSeriesCompactionLevelThreshold(), olapTable.storeRowColumn(), - binlogConfig, objectPool); + binlogConfig, + olapTable.getRowStoreColumnsUniqueIds(rowStoreColumns), + objectPool); createReplicaTask.setIsRecoverTask(true); createReplicaTask.setInvertedIndexStorageFormat(olapTable diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java index c4de4dca35df4d..e53d54fda37bd0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java @@ -385,7 +385,7 @@ private void getColumns(Plan plan) { if (properties != null) { try { boolean storeRowColumn = - PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties)); + PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties), true); if (storeRowColumn) { columns.add(ColumnDefinition.newRowStoreColumnDefinition(null)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java index 07984939320dc3..15da1a9c4d1f10 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java @@ -413,15 +413,20 @@ public void validate(ConnectContext ctx) { // add a hidden column as row store boolean storeRowColumn = false; + List rowStoreColumns = null; if (properties != null) { try { storeRowColumn = - PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties)); + PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties), true); + rowStoreColumns = PropertyAnalyzer.analyzeRowStoreColumns(Maps.newHashMap(properties), + columns.stream() + .map(ColumnDefinition::getName) + .collect(Collectors.toList()), true); } catch (Exception e) { throw new AnalysisException(e.getMessage(), e.getCause()); } } - if (storeRowColumn) { + if (storeRowColumn || (rowStoreColumns != null && !rowStoreColumns.isEmpty())) { if (keysType.equals(KeysType.AGG_KEYS)) { throw new AnalysisException("Aggregate table can't support row column now"); } @@ -437,6 +442,7 @@ public void validate(ConnectContext ctx) { columns.add(ColumnDefinition.newRowStoreColumnDefinition(null)); } } + if (Config.enable_hidden_version_column_by_default && keysType.equals(KeysType.UNIQUE_KEYS)) { if (isEnableMergeOnWrite) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index f25ac66befc63b..b730a67c23ea4c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -284,6 +284,11 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_PROJECTION = "enable_projection"; + public static final String ENABLE_SHORT_CIRCUIT_QUERY = "enable_short_circuit_query"; + + public static final String ENABLE_SHORT_CIRCUIT_QUERY_ACCESS_COLUMN_STORE + = "enable_short_circuit_query_access_column_store"; + public static final String CHECK_OVERFLOW_FOR_DECIMAL = "check_overflow_for_decimal"; public static final String DECIMAL_OVERFLOW_SCALE = "decimal_overflow_scale"; @@ -1161,6 +1166,12 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) { @VariableMgr.VarAttr(name = ENABLE_PROJECTION) private boolean enableProjection = true; + @VariableMgr.VarAttr(name = ENABLE_SHORT_CIRCUIT_QUERY) + private boolean enableShortCircuitQuery = true; + + @VariableMgr.VarAttr(name = ENABLE_SHORT_CIRCUIT_QUERY_ACCESS_COLUMN_STORE) + private boolean enableShortCircuitQueryAcessColumnStore = false; + @VariableMgr.VarAttr(name = CHECK_OVERFLOW_FOR_DECIMAL) private boolean checkOverflowForDecimal = true; @@ -3004,6 +3015,10 @@ public boolean isEnableProjection() { return enableProjection; } + public boolean isEnableShortCircuitQuery() { + return enableShortCircuitQuery; + } + public boolean checkOverflowForDecimal() { return checkOverflowForDecimal; } @@ -3430,6 +3445,7 @@ public TQueryOptions toThrift() { tResult.setEnableLocalMergeSort(enableLocalMergeSort); tResult.setEnableParallelResultSink(enableParallelResultSink); + tResult.setEnableShortCircuitQueryAccessColumnStore(enableShortCircuitQueryAcessColumnStore); return tResult; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java index c95cc2670768c2..cafff6bdda87ed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java @@ -60,16 +60,16 @@ public class AlterReplicaTask extends AgentTask { private long expiration; private String vaultId; - /** * AlterReplicaTask constructor. * */ + public AlterReplicaTask(long backendId, long dbId, long tableId, long partitionId, long rollupIndexId, long baseIndexId, long rollupTabletId, long baseTabletId, long newReplicaId, int newSchemaHash, int baseSchemaHash, long version, long jobId, AlterJobV2.JobType jobType, Map defineExprs, DescriptorTable descTable, List baseSchemaColumns, Map objectPool, - Expr whereClause, long expiration, String vaultId) { + Expr whereClause, long expiration, String vaultIds) { super(null, backendId, TTaskType.ALTER, dbId, tableId, partitionId, rollupIndexId, rollupTabletId); this.baseTabletId = baseTabletId; @@ -172,7 +172,8 @@ public TAlterTabletReqV2 toThrift() { if (value == null) { List columns = new ArrayList(); for (Column column : baseSchemaColumns) { - columns.add(column.toThrift()); + TColumn tColumn = column.toThrift(); + columns.add(tColumn); } objectPool.put(baseSchemaColumns, columns); req.setColumns(columns); diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java index 1de5d4e8d7d86a..435e50807dfdc3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java @@ -124,7 +124,11 @@ public class CreateReplicaTask extends AgentTask { private BinlogConfig binlogConfig; private List clusterKeyIndexes; +<<<<<<< HEAD private Map objectPool; +======= + private List rowStoreColumnUniqueIds; +>>>>>>> dd6a5e0b00 ([Feature](Row store) support column group which store row format for partial columns of table) public CreateReplicaTask(long backendId, long dbId, long tableId, long partitionId, long indexId, long tabletId, long replicaId, short shortKeyColumnCount, int schemaHash, long version, @@ -148,6 +152,7 @@ public CreateReplicaTask(long backendId, long dbId, long tableId, long partition long timeSeriesCompactionLevelThreshold, boolean storeRowColumn, BinlogConfig binlogConfig, + List rowStoreColumnUniqueIds, Map objectPool) { super(null, backendId, TTaskType.CREATE, dbId, tableId, partitionId, indexId, tabletId); @@ -174,6 +179,7 @@ public CreateReplicaTask(long backendId, long dbId, long tableId, long partition this.tabletType = tabletType; this.dataSortInfo = dataSortInfo; this.enableUniqueKeyMergeOnWrite = (keysType == KeysType.UNIQUE_KEYS && enableUniqueKeyMergeOnWrite); + this.rowStoreColumnUniqueIds = rowStoreColumnUniqueIds; if (storagePolicy != null && !storagePolicy.isEmpty()) { Optional policy = Env.getCurrentEnv().getPolicyMgr() .findPolicy(storagePolicy, PolicyTypeEnum.STORAGE); @@ -305,6 +311,7 @@ public TCreateTabletReq toThrift() { tSchema.setDeleteSignIdx(deleteSign); tSchema.setSequenceColIdx(sequenceCol); tSchema.setVersionColIdx(versionCol); + tSchema.setRowStoreColCids(rowStoreColumnUniqueIds); if (!CollectionUtils.isEmpty(clusterKeyIndexes)) { tSchema.setClusterKeyIdxes(clusterKeyIndexes); if (LOG.isDebugEnabled()) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java index b604076ddba6ab..f2295dcd5bc8ee 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java @@ -107,7 +107,7 @@ public void setUp() throws AnalysisException { createReplicaTask = new CreateReplicaTask(backendId1, dbId, tableId, partitionId, indexId1, tabletId1, replicaId1, shortKeyNum, schemaHash1, version, KeysType.AGG_KEYS, storageType, TStorageMedium.SSD, columns, null, 0, latch, null, false, TTabletType.TABLET_TYPE_DISK, null, - TCompressionType.LZ4F, false, "", false, false, false, "", 0, 0, 0, 0, 0, false, null, objectPool); + TCompressionType.LZ4F, false, "", false, false, false, "", 0, 0, 0, 0, 0, false, null, null, objectPool); // drop dropTask = new DropReplicaTask(backendId1, tabletId1, replicaId1, schemaHash1, false); diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 82a9011dc1c78e..2d54231a02ca6e 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -380,6 +380,8 @@ message TabletSchemaPB { optional bool skip_write_index_on_load = 23 [default=false]; repeated int32 cluster_key_idxes = 24; optional InvertedIndexStorageFormatPB inverted_index_storage_format = 25 [default=V1]; + // column unique ids for row store columns + repeated int32 row_store_column_cids = 26; } message TabletSchemaCloudPB { @@ -406,6 +408,8 @@ message TabletSchemaCloudPB { optional bool skip_write_index_on_load = 23 [default=false]; repeated int32 cluster_key_idxes = 24; optional InvertedIndexStorageFormatPB inverted_index_storage_format = 25 [default=V1]; + // column unique ids for row store columns + repeated int32 row_store_column_cids = 26; optional bool is_dynamic_schema = 100 [default=false]; } diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index cc5dc367915fa8..8b4dcf6f2d3cb5 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -45,6 +45,8 @@ struct TTabletSchema { 17: optional bool enable_single_replica_compaction = false 18: optional bool skip_write_index_on_load = false 19: optional list cluster_key_idxes + // col unique id for row store column + 20: optional list row_store_col_cids } // this enum stands for different storage format in src_backends diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 439c666349d9f4..1c84a6b8f24f3e 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -304,6 +304,8 @@ struct TQueryOptions { 114: optional bool enable_parallel_result_sink = false; + 115: optional bool enable_short_circuit_query_access_column_store = false; + // For cloud, to control if the content would be written into file cache 1000: optional bool disable_file_cache = false } diff --git a/regression-test/data/compaction/test_compaction_uniq_keys_row_store.out b/regression-test/data/compaction/test_compaction_uniq_keys_row_store.out index 7c163c62d335f1..19474dc72c2aee 100644 --- a/regression-test/data/compaction/test_compaction_uniq_keys_row_store.out +++ b/regression-test/data/compaction/test_compaction_uniq_keys_row_store.out @@ -1,4 +1,7 @@ -- This file is automatically generated. You should know what you did if you want to edit this +-- !sql_row_size -- +1416 + -- !point_select -- 1 2017-10-01 2017-10-01 2017-10-01T11:11:11.021 2017-10-01T11:11:11.011 Beijing 10 1 2020-01-01T00:00 2020-01-01T00:00 2017-10-01T11:11:11.170 2017-10-01T11:11:11.110111 2020-01-01T00:00 1 30 20 @@ -23,6 +26,30 @@ -- !point_select -- 4 2017-10-01 2017-10-01 2017-10-01T11:11:11.028 2017-10-01T11:11:11.018 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20 +-- !point_select -- +2017-10-01T11:11:11.021 2017-10-01T11:11:11.170 2017-10-01T11:11:11.110111 30 + +-- !point_select -- +2017-10-01T11:11:11.022 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 31 + +-- !point_select -- +2017-10-01T11:11:11.023 2017-10-01T11:11:11.150 2017-10-01T11:11:11.130111 31 + +-- !point_select -- +2017-10-01T11:11:11.024 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 32 + +-- !point_select -- +2017-10-01T11:11:11.025 2017-10-01T11:11:11.100 2017-10-01T11:11:11.140111 32 + +-- !point_select -- +2017-10-01T11:11:11.026 2017-10-01T11:11:11.110 2017-10-01T11:11:11.150111 33 + +-- !point_select -- +2017-10-01T11:11:11.027 \N \N 34 + +-- !point_select -- +2017-10-01T11:11:11.028 \N \N 34 + -- !point_select -- 1 2017-10-01 2017-10-01 2017-10-01T11:11:11.021 2017-10-01T11:11:11.011 Beijing 10 1 2020-01-01T00:00 2020-01-01T00:00 2017-10-01T11:11:11.170 2017-10-01T11:11:11.110111 2020-01-01T00:00 1 30 20 @@ -47,3 +74,30 @@ -- !point_select -- 4 2017-10-01 2017-10-01 2017-10-01T11:11:11.028 2017-10-01T11:11:11.018 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20 +-- !point_select -- +2017-10-01T11:11:11.021 2017-10-01T11:11:11.170 2017-10-01T11:11:11.110111 30 + +-- !point_select -- +2017-10-01T11:11:11.022 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 31 + +-- !point_select -- +2017-10-01T11:11:11.023 2017-10-01T11:11:11.150 2017-10-01T11:11:11.130111 31 + +-- !point_select -- +2017-10-01T11:11:11.024 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 32 + +-- !point_select -- +2017-10-01T11:11:11.025 2017-10-01T11:11:11.100 2017-10-01T11:11:11.140111 32 + +-- !point_select -- +2017-10-01T11:11:11.026 2017-10-01T11:11:11.110 2017-10-01T11:11:11.150111 33 + +-- !point_select -- +2017-10-01T11:11:11.027 \N \N 34 + +-- !point_select -- +2017-10-01T11:11:11.028 \N \N 34 + +-- !sql_row_size -- +1416 + diff --git a/regression-test/data/point_query_p0/load.out b/regression-test/data/point_query_p0/load.out index ef2d9c27021949..e2a2ef88733269 100644 --- a/regression-test/data/point_query_p0/load.out +++ b/regression-test/data/point_query_p0/load.out @@ -23,3 +23,18 @@ -2106969609 true 10 29572 16738 1736115820 -957295886 -13319.206 -1.333603562816737E9 91224478600376111.942 69457425159617037.453 2022-09-06 2022-05-08T19:52:36 2022-04-05 2022-08-17T19:23:31 222.79.139.99 WalterFox@Voomm.net Oxford Alley 77 -2102307005 true 10 -23674 24613 -1810828490 -47095409 -14686.167 2.072108685694799E9 39847820962230526.125 584354832299375.156 2022-03-27 2022-02-11T13:46:06 2022-12-25 2022-11-28T09:37:49 213.146.33.250 JuliaSimmons@Zazio.info Eagle Crest Terrace 84 +-- !sql -- +26743529 + +-- !sql -- +103 + +-- !sql -- +2999834 + +-- !sql -- +-2147303679 2022-12-02T04:39:45 + +-- !sql -- +-2147303679 84525658185172942.967 + diff --git a/regression-test/data/point_query_p0/test_rowstore.out b/regression-test/data/point_query_p0/test_rowstore.out index 8675c7e9d8fdcf..34e40867d6a831 100644 --- a/regression-test/data/point_query_p0/test_rowstore.out +++ b/regression-test/data/point_query_p0/test_rowstore.out @@ -1,4 +1,93 @@ -- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +64 +59 +58 + +-- !sql -- +30 +30 +30 + +-- !sql -- +18 +18 +18 + +-- !sql -- +30 +30 +30 + +-- !point_select -- +11111111111111111111111111111111111111 3 + +-- !point_select -- +222222222222222222222222222222222 3 + +-- !point_select -- +33333333333333333333333333333333 3 + +-- !point_select -- +3 + +-- !point_select -- +3 + +-- !point_select -- +3 + +-- !point_select -- +33333333333333333333333333333333 + +-- !point_select -- +3 33333333333333333333333333333333 + +-- !point_select -- +3 + +-- !point_select -- +3 + +-- !point_select -- +3 + +-- !point_select -- +3 + +-- !point_select -- +2021-02-01T11:11:11 + +-- !point_select -- +2022-02-01T11:11:11 + +-- !point_select -- +2023-02-01T11:11:11 + +-- !point_select -- +2017-10-01T11:11:11.021 2017-10-01T11:11:11.170 2017-10-01T11:11:11.110111 30 + +-- !point_select -- +2017-10-01T11:11:11.022 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 31 + +-- !point_select -- +2017-10-01T11:11:11.023 2017-10-01T11:11:11.150 2017-10-01T11:11:11.130111 31 + +-- !point_select -- +2017-10-01T11:11:11.024 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 32 + +-- !point_select -- +2017-10-01T11:11:11.025 2017-10-01T11:11:11.100 2017-10-01T11:11:11.140111 32 + +-- !point_select -- +2017-10-01T11:11:11.026 2017-10-01T11:11:11.110 2017-10-01T11:11:11.150111 33 + +-- !point_select -- +2017-10-01T11:11:11.027 \N \N 34 + +-- !point_select -- +2017-10-01T11:11:11.028 \N \N 34 + -- !sql -- 1 abc 1111919.123456789190000000 diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.out index bb14012688c756..b648ecaee9a492 100644 --- a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.out +++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.out @@ -3,3 +3,6 @@ 1 doris 200 123 1 2 doris2 400 223 1 +-- !sql -- +137 71 + diff --git a/regression-test/suites/compaction/test_compaction_uniq_keys_row_store.groovy b/regression-test/suites/compaction/test_compaction_uniq_keys_row_store.groovy index 5c3011f3882cd1..fca22dc4022697 100644 --- a/regression-test/suites/compaction/test_compaction_uniq_keys_row_store.groovy +++ b/regression-test/suites/compaction/test_compaction_uniq_keys_row_store.groovy @@ -96,6 +96,26 @@ suite("test_compaction_uniq_keys_row_store", "nonConcurrent") { setPrepareStmtArgs stmt, 4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.28', '2017-10-01 11:11:11.18', 'Beijing', 10, 1 qe_point_select stmt } + + def result2 = connect(user=user, password=password, url=url) { + def stmt = prepareStatement """ SELECT datetimev2_1,datetime_val1,datetime_val2,max_dwell_time FROM ${tableName} t where user_id = ? and date = ? and datev2 = ? and datetimev2_1 = ? and datetimev2_2 = ? and city = ? and age = ? and sex = ?; """ + setPrepareStmtArgs stmt, 1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.21', '2017-10-01 11:11:11.11', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.22', '2017-10-01 11:11:11.12', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.23', '2017-10-01 11:11:11.13', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.24', '2017-10-01 11:11:11.14', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.25', '2017-10-01 11:11:11.15', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.26', '2017-10-01 11:11:11.16', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.27', '2017-10-01 11:11:11.17', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.28', '2017-10-01 11:11:11.18', 'Beijing', 10, 1 + qe_point_select stmt + } } def user = context.config.jdbcUser @@ -121,7 +141,11 @@ suite("test_compaction_uniq_keys_row_store", "nonConcurrent") { `max_dwell_time` INT DEFAULT "0" COMMENT "用户最大停留时间", `min_dwell_time` INT DEFAULT "99999" COMMENT "用户最小停留时间") UNIQUE KEY(`user_id`, `date`, `datev2`, `datetimev2_1`, `datetimev2_2`, `city`, `age`, `sex`) DISTRIBUTED BY HASH(`user_id`) - PROPERTIES ( "replication_num" = "1", "enable_unique_key_merge_on_write" = "true", "light_schema_change" = "true", "store_row_column" = "true" ); + PROPERTIES ( "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "store_row_column" = "true" + ); """ sql """ INSERT INTO ${tableName} VALUES @@ -155,6 +179,7 @@ suite("test_compaction_uniq_keys_row_store", "nonConcurrent") { sql """ INSERT INTO ${tableName} VALUES (4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.028', '2017-10-01 11:11:11.018', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20) """ + qt_sql_row_size "select sum(length(__DORIS_ROW_STORE_COL__)) from regression_test_serving_p0.compaction_uniq_keys_row_store_regression_test" //TabletId,ReplicaIdBackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,QueryHits,PathHash,MetaUrl,CompactionStatus tablets = sql_return_maparray """ show tablets from ${tableName}; """ @@ -210,6 +235,7 @@ suite("test_compaction_uniq_keys_row_store", "nonConcurrent") { } assert (rowCount < 8 * replicaNum) checkValue() + qt_sql_row_size "select sum(length(__DORIS_ROW_STORE_COL__)) from regression_test_serving_p0.compaction_uniq_keys_row_store_regression_test" } finally { // try_sql("DROP TABLE IF EXISTS ${tableName}") } diff --git a/regression-test/suites/point_query_p0/load.groovy b/regression-test/suites/point_query_p0/load.groovy index 2e194b83efc4bb..de425bfcdb0541 100644 --- a/regression-test/suites/point_query_p0/load.groovy +++ b/regression-test/suites/point_query_p0/load.groovy @@ -17,8 +17,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods -suite("test_point_query_load", "p0") { - +suite("test_load_and_schema_change_row_store", "p0,nonConcurrent") { def dataFile = """${getS3Url()}/regression/datatypes/test_scalar_types_10w.csv""" // define dup key table1 @@ -48,7 +47,7 @@ suite("test_point_query_load", "p0") { DUPLICATE KEY(`k1`) COMMENT 'OLAP' DISTRIBUTED BY HASH(`k1`) BUCKETS 10 - PROPERTIES("replication_num" = "1", "store_row_column" = "true"); + PROPERTIES("replication_num" = "1", "row_store_columns" = "k1,c_bool,c_tinyint,c_bigint,c_decimal,c_decimalv3,c_datev2,c_string"); """ // load data @@ -76,20 +75,88 @@ suite("test_point_query_load", "p0") { ALTER table ${testTable} MODIFY COLUMN c_int BIGINT; """ def getJobState = { tableName -> - def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ - return jobStateResult[0][9] - } - int max_try_time = 100 - while (max_try_time--){ - String result = getJobState(testTable) - if (result == "FINISHED") { - break - } else { - sleep(2000) - if (max_try_time < 1){ - assertEquals(1,2) - } - } - } - sql "INSERT INTO ${testTable} SELECT * from ${testTable}" + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + def wait_job_done = { tableName -> + def max_try_time = 100 + while (max_try_time--){ + String result = getJobState("${tableName}") + if (result == "FINISHED") { + break + } else { + sleep(2000) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + } + + sql "DROP TABLE IF EXISTS tbl_scalar_types_dup_1 FORCE" + sql """ + CREATE TABLE IF NOT EXISTS tbl_scalar_types_dup_1 ( + `k1` bigint(11) NULL, + `c_bool` boolean NULL, + `c_tinyint` tinyint(4) NULL, + `c_smallint` smallint(6) NULL, + `c_int` int(11) NULL, + `c_bigint` bigint(20) NULL, + `c_largeint` largeint(40) NULL, + `c_float` float NULL, + `c_double` double NULL, + `c_decimal` decimal(20, 3) NULL, + `c_decimalv3` decimalv3(20, 3) NULL, + `c_date` date NULL, + `c_datetime` datetime NULL, + `c_datev2` datev2 NULL, + `c_datetimev2` datetimev2(0) NULL, + `c_char` char(15) NULL, + `c_varchar` varchar(100) NULL, + `c_string` text NULL + ) ENGINE=OLAP + UNIQUE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 10 + PROPERTIES("replication_num" = "1"); + """ + + wait_job_done.call("tbl_scalar_types_dup") + sql "INSERT INTO tbl_scalar_types_dup_1 SELECT * from tbl_scalar_types_dup" + sql """alter table tbl_scalar_types_dup_1 set ("bloom_filter_columns" = "c_largeint")""" + wait_job_done.call("tbl_scalar_types_dup_1") + sql """alter table tbl_scalar_types_dup_1 set ("store_row_column" = "true")""" + wait_job_done.call("tbl_scalar_types_dup_1") + qt_sql "select sum(length(__DORIS_ROW_STORE_COL__)) from tbl_scalar_types_dup_1" + sql """ + ALTER table tbl_scalar_types_dup_1 ADD COLUMN new_column1 INT default "123"; + """ + sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ * from tbl_scalar_types_dup_1 where k1 = -2147303679" + sql """insert into tbl_scalar_types_dup_1(new_column1) values (9999999)""" + qt_sql """select length(__DORIS_ROW_STORE_COL__) from tbl_scalar_types_dup_1 where new_column1 = 9999999""" + + explain { + sql("select /*+ SET_VAR(enable_nereids_planner=false)*/ * from tbl_scalar_types_dup_1 where k1 = -2147303679") + contains "SHORT-CIRCUIT" + } + sql """alter table tbl_scalar_types_dup_1 set ("row_store_columns" = "k1,c_datetimev2")""" + wait_job_done.call("tbl_scalar_types_dup_1") + qt_sql "select sum(length(__DORIS_ROW_STORE_COL__)) from tbl_scalar_types_dup_1" + test { + sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ * from tbl_scalar_types_dup_1 where k1 = -2147303679" + exception("Not support column store") + } + explain { + sql("select /*+ SET_VAR(enable_nereids_planner=false)*/ k1, c_datetimev2 from tbl_scalar_types_dup_1 where k1 = -2147303679") + contains "SHORT-CIRCUIT" + } + qt_sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ k1, c_datetimev2 from tbl_scalar_types_dup_1 where k1 = -2147303679" + + sql """alter table tbl_scalar_types_dup_1 set ("row_store_columns" = "k1,c_decimalv3")""" + wait_job_done.call("tbl_scalar_types_dup_1") + test { + sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ k1,c_datetimev2 from tbl_scalar_types_dup_1 where k1 = -2147303679" + exception("Not support column store") + } + qt_sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ k1, c_decimalv3 from tbl_scalar_types_dup_1 where k1 = -2147303679" } diff --git a/regression-test/suites/point_query_p0/test_rowstore.groovy b/regression-test/suites/point_query_p0/test_rowstore.groovy index 61b8d378ce978a..47a2f9fe000c1b 100644 --- a/regression-test/suites/point_query_p0/test_rowstore.groovy +++ b/regression-test/suites/point_query_p0/test_rowstore.groovy @@ -15,7 +15,280 @@ // specific language governing permissions and limitations // under the License. -suite("test_rowstore", "p0") { +suite("test_rowstore", "p0,nonConcurrent") { + // Parse url + String jdbcUrl = context.config.jdbcUrl + def user = context.config.jdbcUser + def password = context.config.jdbcPassword + String urlWithoutSchema = jdbcUrl.substring(jdbcUrl.indexOf("://") + 3) + def sql_ip = urlWithoutSchema.substring(0, urlWithoutSchema.indexOf(":")) + def realDb = "regression_test_point_query_p0" + def sql_port + if (urlWithoutSchema.indexOf("/") >= 0) { + // e.g: jdbc:mysql://locahost:8080/?a=b + sql_port = urlWithoutSchema.substring(urlWithoutSchema.indexOf(":") + 1, urlWithoutSchema.indexOf("/")) + } else { + // e.g: jdbc:mysql://locahost:8080 + sql_port = urlWithoutSchema.substring(urlWithoutSchema.indexOf(":") + 1) + } + def prepare_url = "jdbc:mysql://" + sql_ip + ":" + sql_port + "/" + realDb + "?&useServerPrepStmts=true" + + sql "DROP TABLE IF EXISTS table_with_column_group" + sql """ + CREATE TABLE IF NOT EXISTS table_with_column_group ( + `k1` int(11) NULL COMMENT "", + `v1` text NULL COMMENT "", + `v2` bigint NULL COMMENT "", + `v3` double NULL COMMENT "", + `v4` datetime NULL COMMENT "" + ) ENGINE=OLAP + UNIQUE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "row_store_columns" = "v1,v2", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "storage_format" = "V2" + ) + """ + sql """ + insert into table_with_column_group values (1, "11111111111111111111111111111111111111", 3, 4.0, '2021-02-01 11:11:11'), (2, "222222222222222222222222222222222", 3, 4, '2022-02-01 11:11:11'), (3, "33333333333333333333333333333333", 3, 4, '2023-02-01 11:11:11'); + """ + sql "set show_hidden_columns = true" + qt_sql """ + select length(__DORIS_ROW_STORE_COL__) from table_with_column_group order by k1; + """ + + sql "DROP TABLE IF EXISTS table_with_column_group1" + sql """ + CREATE TABLE IF NOT EXISTS table_with_column_group1 ( + `k1` int(11) NULL COMMENT "", + `v1` text NULL COMMENT "", + `v2` bigint NULL COMMENT "", + `v3` double NULL COMMENT "", + `v4` datetime NULL COMMENT "" + ) ENGINE=OLAP + UNIQUE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "row_store_columns" = "v2,v4", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "storage_format" = "V2" + ) + """ + sql """ + insert into table_with_column_group1 values (1, "11111111111111111111111111111111111111", 3, 4.0, '2021-02-01 11:11:11'), (2, "222222222222222222222222222222222", 3, 4, '2022-02-01 11:11:11'), (3, "33333333333333333333333333333333", 3, 4, '2023-02-01 11:11:11'); + """ + sql "set show_hidden_columns = true" + qt_sql """ + select length(__DORIS_ROW_STORE_COL__) from table_with_column_group1 order by k1; + """ + + sql "DROP TABLE IF EXISTS table_with_column_group2" + sql """ + CREATE TABLE IF NOT EXISTS table_with_column_group2 ( + `k1` int(11) NULL COMMENT "", + `v1` text NULL COMMENT "", + `v2` bigint NULL COMMENT "", + `v3` double NULL COMMENT "", + `v4` datetime NULL COMMENT "" + ) ENGINE=OLAP + UNIQUE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "row_store_columns" = "v2", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "storage_format" = "V2" + ) + """ + sql """ + insert into table_with_column_group2 values (1, "11111111111111111111111111111111111111", 3, 4.0, '2021-02-01 11:11:11'), (2, "222222222222222222222222222222222", 3, 4, '2022-02-01 11:11:11'), (3, "33333333333333333333333333333333", 3, 4, '2023-02-01 11:11:11'); + """ + qt_sql """ + select length(__DORIS_ROW_STORE_COL__) from table_with_column_group2 order by k1; + """ + + sql "DROP TABLE IF EXISTS table_with_column_group3" + sql """ + CREATE TABLE IF NOT EXISTS table_with_column_group3 ( + `k1` int(11) NULL COMMENT "", + `v1` text NULL COMMENT "", + `v2` bigint NULL COMMENT "", + `v3` double NULL COMMENT "", + `v4` datetime NULL COMMENT "" + ) ENGINE=OLAP + UNIQUE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "row_store_columns" = "v2,v4", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "storage_format" = "V2" + ) + """ + sql """ + insert into table_with_column_group3 values (1, "11111111111111111111111111111111111111", 3, 4.0, '2021-02-01 11:11:11'), (2, "222222222222222222222222222222222", 3, 4, '2022-02-01 11:11:11'), (3, "33333333333333333333333333333333", 3, 4, '2023-02-01 11:11:11'); + """ + qt_sql """ + select length(__DORIS_ROW_STORE_COL__) from table_with_column_group3 order by k1; + """ + sql "set show_hidden_columns = false" + + sql """DROP TABLE IF EXISTS table_with_column_group_xxx""" + sql """ + CREATE TABLE IF NOT EXISTS table_with_column_group_xxx ( + `user_id` int NOT NULL COMMENT "用户id", + `date` DATE NOT NULL COMMENT "数据灌入日期时间", + `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间", + `datetimev2_1` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间", + `datetimev2_2` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` SMALLINT COMMENT "用户年龄", + `sex` TINYINT COMMENT "用户性别", + `last_visit_date` DATETIME DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间", + `last_update_date` DATETIME DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间", + `datetime_val1` DATETIMEV2(3) DEFAULT "1970-01-01 00:00:00.111" COMMENT "用户最后一次访问时间", + `datetime_val2` DATETIME(6) DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间", + `last_visit_date_not_null` DATETIME NOT NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间", + `cost` BIGINT DEFAULT "0" COMMENT "用户总消费", + `max_dwell_time` INT DEFAULT "0" COMMENT "用户最大停留时间", + `min_dwell_time` INT DEFAULT "99999" COMMENT "用户最小停留时间") + UNIQUE KEY(`user_id`, `date`, `datev2`, `datetimev2_1`, `datetimev2_2`, `city`, `age`, `sex`) DISTRIBUTED BY HASH(`user_id`) + PROPERTIES ( "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "row_store_columns" = "datetimev2_1,datetime_val1,datetime_val2,max_dwell_time" + ); + """ + sql """ INSERT INTO table_with_column_group_xxx values + (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.021', '2017-10-01 11:11:11.011', 'Beijing', 10, 1, '2020-01-01', '2020-01-01', '2017-10-01 11:11:11.170000', '2017-10-01 11:11:11.110111', '2020-01-01', 1, 30, 20) + """ + sql """ INSERT INTO table_with_column_group_xxx VALUES + (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.022', '2017-10-01 11:11:11.012', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.160000', '2017-10-01 11:11:11.100111', '2020-01-02', 1, 31, 19) + """ + + sql """ INSERT INTO table_with_column_group_xxx VALUES + (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.023', '2017-10-01 11:11:11.013', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.150000', '2017-10-01 11:11:11.130111', '2020-01-02', 1, 31, 21) + """ + + sql """ INSERT INTO table_with_column_group_xxx VALUES + (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.024', '2017-10-01 11:11:11.014', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.140000', '2017-10-01 11:11:11.120111', '2020-01-03', 1, 32, 20) + """ + + sql """ INSERT INTO table_with_column_group_xxx VALUES + (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.025', '2017-10-01 11:11:11.015', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.100000', '2017-10-01 11:11:11.140111', '2020-01-03', 1, 32, 22) + """ + + sql """ INSERT INTO table_with_column_group_xxx VALUES + (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.026', '2017-10-01 11:11:11.016', 'Beijing', 10, 1, '2020-01-04', '2020-01-04', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.150111', '2020-01-04', 1, 33, 21) + """ + + sql """ INSERT INTO table_with_column_group_xxx VALUES + (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.027', '2017-10-01 11:11:11.017', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20) + """ + + sql """ INSERT INTO table_with_column_group_xxx VALUES + (4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.028', '2017-10-01 11:11:11.018', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20) + """ + + // set server side prepared statement url + connect(user = user, password = password, url = prepare_url) { + def prep_sql = { sql_str, k -> + def stmt = prepareStatement sql_str + stmt.setInt(1, k) + assertEquals(stmt.class, com.mysql.cj.jdbc.ServerPreparedStatement); + qe_point_select stmt + } + def sql_str = "select v1, v2 from table_with_column_group where k1 = ?" + prep_sql sql_str, 1 + prep_sql sql_str, 2 + prep_sql sql_str, 3 + sql_str = "select v2 from table_with_column_group where k1 = ?" + prep_sql sql_str, 1 + prep_sql sql_str, 2 + prep_sql sql_str, 3 + sql_str = "select v1 from table_with_column_group where k1 = ?" + prep_sql sql_str, 3 + sql_str = "select v2, v1 from table_with_column_group where k1 = ?" + prep_sql sql_str, 3 + + + sql_str = "select v2 from table_with_column_group where k1 = ?" + prep_sql sql_str, 1 + + sql_str = "select v2 from table_with_column_group2 where k1 = ?" + prep_sql sql_str, 1 + prep_sql sql_str, 2 + prep_sql sql_str, 3 + + sql_str = "select v4 from table_with_column_group3 where k1 = ?" + prep_sql sql_str, 1 + prep_sql sql_str, 2 + prep_sql sql_str, 3 + + def setPrepareStmtArgs = {stmt, user_id, date, datev2, datetimev2_1, datetimev2_2, city, age, sex -> + java.text.SimpleDateFormat formater = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SS"); + stmt.setInt(1, user_id) + stmt.setDate(2, java.sql.Date.valueOf(date)) + stmt.setDate(3, java.sql.Date.valueOf(datev2)) + stmt.setTimestamp(4, new java.sql.Timestamp(formater.parse(datetimev2_1).getTime())) + stmt.setTimestamp(5, new java.sql.Timestamp(formater.parse(datetimev2_2).getTime())) + stmt.setString(6, city) + stmt.setInt(7, age) + stmt.setInt(8, sex) + } + + def stmt = prepareStatement """ SELECT datetimev2_1,datetime_val1,datetime_val2,max_dwell_time FROM table_with_column_group_xxx t where user_id = ? and date = ? and datev2 = ? and datetimev2_1 = ? and datetimev2_2 = ? and city = ? and age = ? and sex = ?; """ + setPrepareStmtArgs stmt, 1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.21', '2017-10-01 11:11:11.11', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.22', '2017-10-01 11:11:11.12', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.23', '2017-10-01 11:11:11.13', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.24', '2017-10-01 11:11:11.14', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.25', '2017-10-01 11:11:11.15', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.26', '2017-10-01 11:11:11.16', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.27', '2017-10-01 11:11:11.17', 'Beijing', 10, 1 + qe_point_select stmt + setPrepareStmtArgs stmt, 4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.28', '2017-10-01 11:11:11.18', 'Beijing', 10, 1 + qe_point_select stmt + } + + test { + sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ * from table_with_column_group where k1 = 1" + exception("Not support column store") + } + + sql "DROP TABLE IF EXISTS table_with_column_group4" + sql """ + CREATE TABLE IF NOT EXISTS table_with_column_group4 ( + `k1` int(11) NULL COMMENT "", + `v1` text NULL COMMENT "", + `v2` bigint NULL COMMENT "", + `v3` double NULL COMMENT "", + `v4` datetime NULL COMMENT "" + ) ENGINE=OLAP + UNIQUE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "row_store_columns" = "v4", + "storage_format" = "V2" + ) + """ + sql "set global enable_short_circuit_query_access_column_store = true" + sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ * from table_with_column_group where k1 = 1" + def tableName = "rs_query" sql """DROP TABLE IF EXISTS ${tableName}""" sql "set enable_decimal256 = true" @@ -36,13 +309,12 @@ suite("test_rowstore", "p0") { ) """ - sql "set experimental_enable_nereids_planner = false" sql """insert into ${tableName} values (1, 'abc', 1111919.12345678919)""" explain { - sql("select * from ${tableName}") - contains "OPT TWO PHASE" + sql("select * from ${tableName} order by k1 limit 1") + contains "TOPN OPT" } - qt_sql """select * from ${tableName}""" + qt_sql """select * from ${tableName} order by k1 limit 1""" sql """ ALTER table ${tableName} ADD COLUMN new_column1 INT default "123"; @@ -57,4 +329,6 @@ suite("test_rowstore", "p0") { sql """insert into ${tableName} values (2, 'def', 1111919.12345678919, 456, NULL)""" qt_sql """select * from ${tableName} where k1 = 2""" + + sql "set global enable_short_circuit_query_access_column_store = false" } \ No newline at end of file diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.groovy index 1ba8f9a368f573..23aa2b0d5918d4 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.groovy @@ -30,7 +30,7 @@ suite("test_primary_key_partial_update_with_row_column", "p0") { `dft` int(11) DEFAULT "4321") UNIQUE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES("replication_num" = "1", "enable_unique_key_merge_on_write" = "true", - "store_row_column"="true") + "store_row_column"="true", "column_groups"= "group1:id,name,score") """ // insert 2 lines sql """ @@ -59,6 +59,7 @@ suite("test_primary_key_partial_update_with_row_column", "p0") { qt_select_default """ select * from ${tableName} order by id """ + qt_sql "select sum(length(__DORIS_ROW_STORE_COL__)), sum(length(__DORIS_ROW_STORE_COL__group1)) from ${tableName}" // drop drop sql """ DROP TABLE IF EXISTS ${tableName} """ From 5d4880ae8fe9b7f1ad4c8ec4c6279d786d573658 Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Wed, 12 Jun 2024 15:17:46 +0800 Subject: [PATCH 2/5] fix --- be/src/olap/schema_change.cpp | 2 +- be/src/olap/tablet_schema.cpp | 2 +- .../apache/doris/analysis/ModifyTablePropertiesClause.java | 2 ++ .../partial_update/test_partial_update_with_row_column.groovy | 4 ++-- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index ba7ec1eaa24069..f8e62115f12174 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1334,7 +1334,7 @@ Status SchemaChangeJob::parse_request(const SchemaChangeParams& sc_params, } else if (column_mapping->ref_column >= 0) { const auto& column_new = new_tablet_schema->column(i); const auto& column_old = base_tablet_schema->column(column_mapping->ref_column); - // check index changed or row store columns changed + // index changed if (column_new.is_bf_column() != column_old.is_bf_column() || column_new.has_bitmap_index() != column_old.has_bitmap_index() || new_tablet_schema->has_inverted_index(column_new) != diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 07da82da49da64..07730ae6ab45a4 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -1037,7 +1037,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _is_in_memory = ori_tablet_schema.is_in_memory(); _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction(); _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction(); - _store_row_column = ori_tablet_schema.has_full_row_store_column(); + _store_row_column = ori_tablet_schema._store_row_column; _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load(); _sort_type = ori_tablet_schema.sort_type(); _sort_col_num = ori_tablet_schema.sort_col_num(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java index 27a46f34af68b4..47476740b0514e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java @@ -320,6 +320,8 @@ public void analyze(Analyzer analyzer) throws AnalysisException { throw new AnalysisException("You can not modify estimate partition size"); } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN)) { // do nothing, will be analyzed when creating alter job + } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_ROW_STORE_COLUMNS)) { + // do nothing, will be analyzed when creating alter job } else { throw new AnalysisException("Unknown table property: " + properties.keySet()); } diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.groovy index 23aa2b0d5918d4..4b4de014b6772d 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_with_row_column.groovy @@ -30,7 +30,7 @@ suite("test_primary_key_partial_update_with_row_column", "p0") { `dft` int(11) DEFAULT "4321") UNIQUE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES("replication_num" = "1", "enable_unique_key_merge_on_write" = "true", - "store_row_column"="true", "column_groups"= "group1:id,name,score") + "store_row_column"="true") """ // insert 2 lines sql """ @@ -59,7 +59,7 @@ suite("test_primary_key_partial_update_with_row_column", "p0") { qt_select_default """ select * from ${tableName} order by id """ - qt_sql "select sum(length(__DORIS_ROW_STORE_COL__)), sum(length(__DORIS_ROW_STORE_COL__group1)) from ${tableName}" + qt_sql "select sum(length(__DORIS_ROW_STORE_COL__)) from ${tableName}" // drop drop sql """ DROP TABLE IF EXISTS ${tableName} """ From 875c4974361935564fe8299c0cc4c06e0e161ab5 Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Mon, 17 Jun 2024 21:30:43 +0800 Subject: [PATCH 3/5] fix2 --- be/src/exec/rowid_fetcher.cpp | 2 +- be/src/olap/base_tablet.cpp | 4 ++-- be/src/olap/base_tablet.h | 1 - be/src/olap/rowset/segment_creator.cpp | 2 +- .../olap/rowset/segment_v2/segment_writer.cpp | 8 ++++---- .../segment_v2/vertical_segment_writer.cpp | 8 ++++---- be/src/olap/schema_change.cpp | 8 ++++---- be/src/olap/tablet_meta.cpp | 4 ++-- be/src/olap/tablet_schema.cpp | 9 ++++----- be/src/olap/tablet_schema.h | 8 ++++---- be/src/service/point_query_executor.cpp | 7 +++---- be/src/vec/common/schema_util.cpp | 2 +- .../apache/doris/alter/SchemaChangeHandler.java | 4 ++-- .../apache/doris/analysis/CreateTableStmt.java | 2 +- .../org/apache/doris/catalog/OlapTable.java | 6 +++--- .../doris/common/util/PropertyAnalyzer.java | 17 ++++------------- .../doris/datasource/InternalCatalog.java | 4 ++-- .../plans/commands/info/CreateMTMVInfo.java | 2 +- .../plans/commands/info/CreateTableInfo.java | 4 ++-- .../org/apache/doris/qe/SessionVariable.java | 2 +- .../org/apache/doris/task/AlterReplicaTask.java | 5 ++--- gensrc/proto/olap_file.proto | 4 ++-- .../suites/point_query_p0/load.groovy | 7 ++++--- .../suites/point_query_p0/test_rowstore.groovy | 7 ++----- 24 files changed, 56 insertions(+), 71 deletions(-) diff --git a/be/src/exec/rowid_fetcher.cpp b/be/src/exec/rowid_fetcher.cpp index 96ca8ddb786d6d..82bd0f6baa756e 100644 --- a/be/src/exec/rowid_fetcher.cpp +++ b/be/src/exec/rowid_fetcher.cpp @@ -405,7 +405,7 @@ Status RowIdStorageReader::read_by_rowids(const PMultiGetRequest& request, row_loc.segment_id(), row_loc.ordinal_id()); // fetch by row store, more effcient way if (request.fetch_row_store()) { - CHECK(tablet->tablet_schema()->has_full_row_store_column()); + CHECK(tablet->tablet_schema()->has_row_store_for_all_columns()); RowLocation loc(rowset_id, segment->id(), row_loc.ordinal_id()); string* value = response->add_binary_row_data(); RETURN_IF_ERROR(scope_timer_run( diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index ee7b3ccecf3e5c..611cce2c869e34 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -61,7 +61,7 @@ Status read_columns_by_plan(TabletSchemaSPtr tablet_schema, const PartialUpdateReadPlan& read_plan, const std::map& rsid_to_rowset, vectorized::Block& block, std::map* read_index) { - bool has_row_column = tablet_schema->has_full_row_store_column(); + bool has_row_column = tablet_schema->has_row_store_for_all_columns(); auto mutable_columns = block.mutate_columns(); size_t read_idx = 0; for (auto rs_it : read_plan) { @@ -873,7 +873,7 @@ Status BaseTablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset, BetaRowsetSharedPtr rowset = std::static_pointer_cast(input_rowset); CHECK(rowset); - CHECK(tablet_schema.has_full_row_store_column()); + CHECK(tablet_schema.has_row_store_for_all_columns()); SegmentCacheHandle segment_cache_handle; std::unique_ptr column_iterator; OlapReaderStatistics stats; diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index 695dbc2487eee9..dc5f488e04492c 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -26,7 +26,6 @@ #include "olap/rowset/segment_v2/segment.h" #include "olap/tablet_fwd.h" #include "olap/tablet_meta.h" -#include "olap/tablet_schema.h" #include "olap/version_graph.h" #include "util/metrics.h" diff --git a/be/src/olap/rowset/segment_creator.cpp b/be/src/olap/rowset/segment_creator.cpp index e78864fbbca813..07b0e9e5525900 100644 --- a/be/src/olap/rowset/segment_creator.cpp +++ b/be/src/olap/rowset/segment_creator.cpp @@ -100,7 +100,7 @@ Status SegmentFlusher::_parse_variant_columns(vectorized::Block& block) { } vectorized::schema_util::ParseContext ctx; - ctx.record_raw_json_column = _context.tablet_schema->has_full_row_store_column(); + ctx.record_raw_json_column = _context.tablet_schema->has_row_store_for_all_columns(); RETURN_IF_ERROR(vectorized::schema_util::parse_variant_columns(block, variant_column_pos, ctx)); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index adc8994b53ae10..820afaa2103ff3 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -318,7 +318,7 @@ void SegmentWriter::_maybe_invalid_row_cache(const std::string& key) { // Just invalid row cache for simplicity, since the rowset is not visible at present. // If we update/insert cache, if load failed rowset will not be visible but cached data // will be visible, and lead to inconsistency. - if (!config::disable_storage_row_cache && _tablet_schema->has_full_row_store_column() && + if (!config::disable_storage_row_cache && _tablet_schema->has_row_store_for_all_columns() && _opts.write_type == DataWriteType::TYPE_DIRECT) { // invalidate cache RowCache::instance()->erase({_opts.rowset_ctx->tablet_id, key}); @@ -448,8 +448,8 @@ void SegmentWriter::_serialize_block_to_row_column(vectorized::Block& block) { vectorized::JsonbSerializeUtil::block_to_jsonb( *_tablet_schema, block, *row_store_column, _tablet_schema->num_columns(), serdes, - {_tablet_schema->row_columns_cids().begin(), - _tablet_schema->row_columns_cids().end()}); + {_tablet_schema->row_columns_uids().begin(), + _tablet_schema->row_columns_uids().end()}); break; } } @@ -734,7 +734,7 @@ Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f const auto& cids_missing = _opts.rowset_ctx->partial_update_info->missing_cids; auto old_value_block = _tablet_schema->create_block_by_cids(cids_missing); CHECK_EQ(cids_missing.size(), old_value_block.columns()); - bool has_row_column = _tablet_schema->has_full_row_store_column(); + bool has_row_column = _tablet_schema->has_row_store_for_all_columns(); // record real pos, key is input line num, value is old_block line num std::map read_index; size_t read_idx = 0; diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index ada9d8ffad1565..d0cdea71d1c928 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -266,7 +266,7 @@ void VerticalSegmentWriter::_maybe_invalid_row_cache(const std::string& key) con // Just invalid row cache for simplicity, since the rowset is not visible at present. // If we update/insert cache, if load failed rowset will not be visible but cached data // will be visible, and lead to inconsistency. - if (!config::disable_storage_row_cache && _tablet_schema->has_full_row_store_column() && + if (!config::disable_storage_row_cache && _tablet_schema->has_row_store_for_all_columns() && _opts.write_type == DataWriteType::TYPE_DIRECT) { // invalidate cache RowCache::instance()->erase({_opts.rowset_ctx->tablet_id, key}); @@ -287,8 +287,8 @@ void VerticalSegmentWriter::_serialize_block_to_row_column(vectorized::Block& bl row_store_column->clear(); vectorized::DataTypeSerDeSPtrs serdes = vectorized::create_data_type_serdes(block.get_data_types()); - std::unordered_set row_store_cids_set(_tablet_schema->row_columns_cids().begin(), - _tablet_schema->row_columns_cids().end()); + std::unordered_set row_store_cids_set(_tablet_schema->row_columns_uids().begin(), + _tablet_schema->row_columns_uids().end()); vectorized::JsonbSerializeUtil::block_to_jsonb( *_tablet_schema, block, *row_store_column, _tablet_schema->num_columns(), serdes, row_store_cids_set); @@ -562,7 +562,7 @@ Status VerticalSegmentWriter::_fill_missing_columns( auto old_value_block = _tablet_schema->create_block_by_cids(missing_cids); CHECK_EQ(missing_cids.size(), old_value_block.columns()); auto mutable_old_columns = old_value_block.mutate_columns(); - bool has_row_column = _tablet_schema->has_full_row_store_column(); + bool has_row_column = _tablet_schema->has_row_store_for_all_columns(); // record real pos, key is input line num, value is old_block line num std::map read_index; size_t read_idx = 0; diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index f8e62115f12174..f5d9137f663bb1 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1319,10 +1319,10 @@ Status SchemaChangeJob::parse_request(const SchemaChangeParams& sc_params, // if new tablet enable row store, or new tablet has different row store columns if ((!base_tablet_schema->have_column(BeConsts::ROW_STORE_COL) && new_tablet_schema->have_column(BeConsts::ROW_STORE_COL)) || - !std::equal(new_tablet_schema->row_columns_cids().begin(), - new_tablet_schema->row_columns_cids().end(), - base_tablet_schema->row_columns_cids().begin(), - base_tablet_schema->row_columns_cids().end())) { + !std::equal(new_tablet_schema->row_columns_uids().begin(), + new_tablet_schema->row_columns_uids().end(), + base_tablet_schema->row_columns_uids().begin(), + base_tablet_schema->row_columns_uids().end())) { *sc_directly = true; } diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 6c787b597a1d42..84c09fa942e2e4 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -305,8 +305,8 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id schema->set_skip_write_index_on_load(tablet_schema.skip_write_index_on_load); } if (tablet_schema.__isset.row_store_col_cids) { - schema->mutable_row_store_column_cids()->Add(tablet_schema.row_store_col_cids.begin(), - tablet_schema.row_store_col_cids.end()); + schema->mutable_row_store_column_unique_ids()->Add(tablet_schema.row_store_col_cids.begin(), + tablet_schema.row_store_col_cids.end()); } if (binlog_config.has_value()) { BinlogConfig tmp_binlog_config; diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 07730ae6ab45a4..ed3016313eb7c0 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -988,8 +988,8 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _inverted_index_storage_format = schema.inverted_index_storage_format(); } - _rowstore_column_cids.assign(schema.row_store_column_cids().begin(), - schema.row_store_column_cids().end()); + _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(), + schema.row_store_column_unique_ids().end()); } void TabletSchema::copy_from(const TabletSchema& tablet_schema) { @@ -1037,7 +1037,6 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _is_in_memory = ori_tablet_schema.is_in_memory(); _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction(); _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction(); - _store_row_column = ori_tablet_schema._store_row_column; _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load(); _sort_type = ori_tablet_schema.sort_type(); _sort_col_num = ori_tablet_schema.sort_col_num(); @@ -1196,8 +1195,8 @@ void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { tablet_schema_pb->set_compression_type(_compression_type); tablet_schema_pb->set_version_col_idx(_version_col_idx); tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format); - tablet_schema_pb->mutable_row_store_column_cids()->Assign(_rowstore_column_cids.begin(), - _rowstore_column_cids.end()); + tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign( + _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end()); } size_t TabletSchema::row_size() const { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 366061c0275814..262f66ba561ffe 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -345,8 +345,8 @@ class TabletSchema { } bool enable_single_replica_compaction() const { return _enable_single_replica_compaction; } // indicate if full row store column(all the columns encodes as row) exists - bool has_full_row_store_column() const { - return _store_row_column && row_columns_cids().empty(); + bool has_row_store_for_all_columns() const { + return _store_row_column && row_columns_uids().empty(); } void set_skip_write_index_on_load(bool skip) { _skip_write_index_on_load = skip; } bool skip_write_index_on_load() const { return _skip_write_index_on_load; } @@ -478,7 +478,7 @@ class TabletSchema { void update_tablet_columns(const TabletSchema& tablet_schema, const std::vector& t_columns); - const std::vector& row_columns_cids() const { return _rowstore_column_cids; } + const std::vector& row_columns_uids() const { return _row_store_column_unique_ids; } private: friend bool operator==(const TabletSchema& a, const TabletSchema& b); @@ -524,7 +524,7 @@ class TabletSchema { // Contains column ids of which columns should be encoded into row store. // ATTN: For compability reason empty cids means all columns of tablet schema are encoded to row column - std::vector _rowstore_column_cids; + std::vector _row_store_column_unique_ids; }; bool operator==(const TabletSchema& a, const TabletSchema& b); diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index 2eee1f385e5b29..a35fdc1a1176be 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -79,11 +79,11 @@ static void get_missing_and_include_cids(const TabletSchema& schema, const TabletColumn& target_rs_column = schema.column_by_uid(target_rs_column_id); DCHECK(target_rs_column.is_row_store_column()); // The full column group is considered a full match, thus no missing cids - if (schema.row_columns_cids().empty()) { + if (schema.row_columns_uids().empty()) { missing_cids.clear(); return; } - for (int cid : schema.row_columns_cids()) { + for (int cid : schema.row_columns_uids()) { missing_cids.erase(cid); include_cids.insert(cid); } @@ -416,8 +416,7 @@ Status PointQueryExecutor::_lookup_row_data() { std::string value; // fill block by row store if (_reusable->rs_column_uid() != -1) { - bool use_row_cache = !config::disable_storage_row_cache && - _tablet->tablet_schema()->row_columns_cids().empty(); + bool use_row_cache = !config::disable_storage_row_cache; RETURN_IF_ERROR(_tablet->lookup_row_data( _row_read_ctxs[i]._primary_key, _row_read_ctxs[i]._row_location.value(), *(_row_read_ctxs[i]._rowset_ptr), _reusable->tuple_desc(), diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 55339e5c6c1d64..64f93f98c56680 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -682,7 +682,7 @@ void rebuild_schema_and_block(const TabletSchemaSPtr& original, {}, root->data.get_finalized_column_ptr()->assume_mutable(), root->data.get_least_common_type()); // // set for rowstore - if (original->has_full_row_store_column()) { + if (original->has_row_store_for_all_columns()) { static_cast(obj.get())->set_rowstore_column( object_column.get_rowstore_column()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 81beb1370ab613..24d373a0d5f34a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -1324,9 +1324,9 @@ private void createJob(String rawSql, long dbId, OlapTable olapTable, Map rsColumns = Lists.newArrayList(); boolean storeRowColumn = false; try { - storeRowColumn = PropertyAnalyzer.analyzeStoreRowColumn(propertyMap, true); + storeRowColumn = PropertyAnalyzer.analyzeStoreRowColumn(propertyMap); rsColumns = PropertyAnalyzer.analyzeRowStoreColumns(propertyMap, - olapTable.getColumns().stream().map(Column::getName).collect(Collectors.toList()), true); + olapTable.getColumns().stream().map(Column::getName).collect(Collectors.toList())); } catch (AnalysisException e) { throw new DdlException(e.getMessage()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index d294b40eff2ef0..1e3fb9054638f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -448,7 +448,7 @@ public void analyze(Analyzer analyzer) throws UserException { } } // add a hidden column as row store - if (properties != null && PropertyAnalyzer.analyzeStoreRowColumn(new HashMap<>(properties), true)) { + if (properties != null && PropertyAnalyzer.analyzeStoreRowColumn(new HashMap<>(properties))) { if (keysDesc != null && keysDesc.getKeysType() == KeysType.AGG_KEYS) { throw new AnalysisException("Aggregate table can't support row column now"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 161fd3f8dad88f..0b3cd21f9e6d03 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -1232,10 +1232,10 @@ public void setRowStoreColumns(List rowStoreColumns) { getOrCreatTableProperty().setRowStoreColumns(rowStoreColumns); } - public List getRowStoreColumnsUniqueIds(List rsColumnNames) { + public List getRowStoreColumnsUniqueIds(List rowStoreColumns) { List columnIds = Lists.newArrayList(); - if (rsColumnNames != null) { - for (String colName : rsColumnNames) { + if (rowStoreColumns != null) { + for (String colName : rowStoreColumns) { Column col = nameToColumn.get(colName); Preconditions.checkNotNull(col); columnIds.add(col.getUniqueId()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index 3eccf226006171..e7067894600049 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -780,9 +780,7 @@ public static List analyzeRowStoreColumns(Map properties if (null == value) { return null; } - if (stripProperty) { - properties.remove(PROPERTIES_ROW_STORE_COLUMNS); - } + properties.remove(PROPERTIES_ROW_STORE_COLUMNS); String[] rsColumnArr = value.split(COMMA_SEPARATOR); rowStoreColumns.addAll(Arrays.asList(rsColumnArr)); if (rowStoreColumns.isEmpty()) { @@ -793,11 +791,6 @@ public static List analyzeRowStoreColumns(Map properties .filter(expectedColName -> columns.stream().noneMatch( column -> column.equalsIgnoreCase(expectedColName))) .collect(Collectors.toList()); - // if (invalidColumns.size() == 1 && invalidColumns.get(0).equalsIgnoreCase("__all__")) { - // // __all__ represents all the columns are encoded to row store - // rowStoreColumns.clear(); - // return rowStoreColumns; - // if (!invalidColumns.isEmpty()) { throw new AnalysisException( "Column does not exist in table. Invalid columns: " @@ -806,8 +799,8 @@ public static List analyzeRowStoreColumns(Map properties return rowStoreColumns; } - public static Boolean analyzeStoreRowColumn(Map properties, - boolean stripProperty) throws AnalysisException { + public static Boolean analyzeStoreRowColumn(Map properties) + throws AnalysisException { if (properties == null || properties.isEmpty()) { return false; } @@ -816,9 +809,7 @@ public static Boolean analyzeStoreRowColumn(Map properties, if (null == value) { return false; } - if (stripProperty) { - properties.remove(PROPERTIES_STORE_ROW_COLUMN); - } + properties.remove(PROPERTIES_STORE_ROW_COLUMN); if (value.equalsIgnoreCase("true")) { return true; } else if (value.equalsIgnoreCase("false")) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index a3fa3a71097cf4..57ea0e7c8b6f83 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -2553,7 +2553,7 @@ private boolean createOlapTable(Database db, CreateTableStmt stmt) throws UserEx // analyze row store columns try { boolean storeRowColumn = false; - storeRowColumn = PropertyAnalyzer.analyzeStoreRowColumn(properties, true); + storeRowColumn = PropertyAnalyzer.analyzeStoreRowColumn(properties); if (storeRowColumn && !enableLightSchemaChange) { throw new DdlException( "Row store column rely on light schema change, enable light schema change first"); @@ -2562,7 +2562,7 @@ private boolean createOlapTable(Database db, CreateTableStmt stmt) throws UserEx List rowStoreColumns; try { rowStoreColumns = PropertyAnalyzer.analyzeRowStoreColumns(properties, - baseSchema.stream().map(Column::getName).collect(Collectors.toList()), true); + baseSchema.stream().map(Column::getName).collect(Collectors.toList())); if (rowStoreColumns != null && rowStoreColumns.isEmpty()) { rowStoreColumns = null; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java index e53d54fda37bd0..c4de4dca35df4d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java @@ -385,7 +385,7 @@ private void getColumns(Plan plan) { if (properties != null) { try { boolean storeRowColumn = - PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties), true); + PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties)); if (storeRowColumn) { columns.add(ColumnDefinition.newRowStoreColumnDefinition(null)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java index 15da1a9c4d1f10..e09e8452e8d06c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java @@ -417,11 +417,11 @@ public void validate(ConnectContext ctx) { if (properties != null) { try { storeRowColumn = - PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties), true); + PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties)); rowStoreColumns = PropertyAnalyzer.analyzeRowStoreColumns(Maps.newHashMap(properties), columns.stream() .map(ColumnDefinition::getName) - .collect(Collectors.toList()), true); + .collect(Collectors.toList())); } catch (Exception e) { throw new AnalysisException(e.getMessage(), e.getCause()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index b730a67c23ea4c..48f3e6db5c9be3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -1170,7 +1170,7 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) { private boolean enableShortCircuitQuery = true; @VariableMgr.VarAttr(name = ENABLE_SHORT_CIRCUIT_QUERY_ACCESS_COLUMN_STORE) - private boolean enableShortCircuitQueryAcessColumnStore = false; + private boolean enableShortCircuitQueryAcessColumnStore = true; @VariableMgr.VarAttr(name = CHECK_OVERFLOW_FOR_DECIMAL) private boolean checkOverflowForDecimal = true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java index cafff6bdda87ed..c795b1f34f0cb1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java @@ -69,7 +69,7 @@ public AlterReplicaTask(long backendId, long dbId, long tableId, long partitionI long baseIndexId, long rollupTabletId, long baseTabletId, long newReplicaId, int newSchemaHash, int baseSchemaHash, long version, long jobId, AlterJobV2.JobType jobType, Map defineExprs, DescriptorTable descTable, List baseSchemaColumns, Map objectPool, - Expr whereClause, long expiration, String vaultIds) { + Expr whereClause, long expiration, String vaultId) { super(null, backendId, TTaskType.ALTER, dbId, tableId, partitionId, rollupIndexId, rollupTabletId); this.baseTabletId = baseTabletId; @@ -172,8 +172,7 @@ public TAlterTabletReqV2 toThrift() { if (value == null) { List columns = new ArrayList(); for (Column column : baseSchemaColumns) { - TColumn tColumn = column.toThrift(); - columns.add(tColumn); + columns.add(column.toThrift()); } objectPool.put(baseSchemaColumns, columns); req.setColumns(columns); diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 2d54231a02ca6e..c4aa7654366f30 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -381,7 +381,7 @@ message TabletSchemaPB { repeated int32 cluster_key_idxes = 24; optional InvertedIndexStorageFormatPB inverted_index_storage_format = 25 [default=V1]; // column unique ids for row store columns - repeated int32 row_store_column_cids = 26; + repeated int32 row_store_column_unique_ids = 26; } message TabletSchemaCloudPB { @@ -409,7 +409,7 @@ message TabletSchemaCloudPB { repeated int32 cluster_key_idxes = 24; optional InvertedIndexStorageFormatPB inverted_index_storage_format = 25 [default=V1]; // column unique ids for row store columns - repeated int32 row_store_column_cids = 26; + repeated int32 row_store_column_unique_ids = 26; optional bool is_dynamic_schema = 100 [default=false]; } diff --git a/regression-test/suites/point_query_p0/load.groovy b/regression-test/suites/point_query_p0/load.groovy index de425bfcdb0541..1c6bb9b52c1a8d 100644 --- a/regression-test/suites/point_query_p0/load.groovy +++ b/regression-test/suites/point_query_p0/load.groovy @@ -17,7 +17,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods -suite("test_load_and_schema_change_row_store", "p0,nonConcurrent") { +suite("test_load_and_schema_change_row_store", "p0") { def dataFile = """${getS3Url()}/regression/datatypes/test_scalar_types_10w.csv""" // define dup key table1 @@ -50,6 +50,7 @@ suite("test_load_and_schema_change_row_store", "p0,nonConcurrent") { PROPERTIES("replication_num" = "1", "row_store_columns" = "k1,c_bool,c_tinyint,c_bigint,c_decimal,c_decimalv3,c_datev2,c_string"); """ + // load data streamLoad { table testTable @@ -155,8 +156,8 @@ suite("test_load_and_schema_change_row_store", "p0,nonConcurrent") { sql """alter table tbl_scalar_types_dup_1 set ("row_store_columns" = "k1,c_decimalv3")""" wait_job_done.call("tbl_scalar_types_dup_1") test { - sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ k1,c_datetimev2 from tbl_scalar_types_dup_1 where k1 = -2147303679" + sql "select /*+ SET_VAR(enable_nereids_planner=false,enable_short_circuit_query_access_column_store=false)*/ k1,c_datetimev2 from tbl_scalar_types_dup_1 where k1 = -2147303679" exception("Not support column store") } - qt_sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ k1, c_decimalv3 from tbl_scalar_types_dup_1 where k1 = -2147303679" + qt_sql "select /*+ SET_VAR(enable_nereids_planner=false,enable_short_circuit_query_access_column_store=false)*/ k1, c_decimalv3 from tbl_scalar_types_dup_1 where k1 = -2147303679" } diff --git a/regression-test/suites/point_query_p0/test_rowstore.groovy b/regression-test/suites/point_query_p0/test_rowstore.groovy index 47a2f9fe000c1b..78e1d6c3f1c7c3 100644 --- a/regression-test/suites/point_query_p0/test_rowstore.groovy +++ b/regression-test/suites/point_query_p0/test_rowstore.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_rowstore", "p0,nonConcurrent") { +suite("test_rowstore", "p0") { // Parse url String jdbcUrl = context.config.jdbcUrl def user = context.config.jdbcUser @@ -263,7 +263,7 @@ suite("test_rowstore", "p0,nonConcurrent") { } test { - sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ * from table_with_column_group where k1 = 1" + sql "select /*+ SET_VAR(enable_nereids_planner=false, enable_short_circuit_query_access_column_store=false)*/ * from table_with_column_group where k1 = 1" exception("Not support column store") } @@ -286,7 +286,6 @@ suite("test_rowstore", "p0,nonConcurrent") { "storage_format" = "V2" ) """ - sql "set global enable_short_circuit_query_access_column_store = true" sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ * from table_with_column_group where k1 = 1" def tableName = "rs_query" @@ -329,6 +328,4 @@ suite("test_rowstore", "p0,nonConcurrent") { sql """insert into ${tableName} values (2, 'def', 1111919.12345678919, 456, NULL)""" qt_sql """select * from ${tableName} where k1 = 2""" - - sql "set global enable_short_circuit_query_access_column_store = false" } \ No newline at end of file From d6b8d3ac6c53e0159f7d775b1b7a5b9b9d4f2098 Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Mon, 17 Jun 2024 22:23:29 +0800 Subject: [PATCH 4/5] fix3 --- .../src/main/java/org/apache/doris/backup/RestoreJob.java | 5 +---- .../java/org/apache/doris/common/util/PropertyAnalyzer.java | 3 +-- .../analysis/LogicalResultSinkToShortCircuitPointQuery.java | 2 +- .../src/main/java/org/apache/doris/qe/SessionVariable.java | 4 ---- .../main/java/org/apache/doris/task/CreateReplicaTask.java | 3 --- regression-test/suites/point_query_p0/load.groovy | 2 +- 6 files changed, 4 insertions(+), 15 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 6cfafd19fb49b8..99f5f028e6e057 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -1071,11 +1071,8 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc } finally { localTbl.readUnlock(); } -<<<<<<< HEAD Map objectPool = new HashMap(); -======= List rowStoreColumns = localTbl.getTableProperty().getCopiedRowStoreColumns(); ->>>>>>> dd6a5e0b00 ([Feature](Row store) support column group which store row format for partial columns of table) for (MaterializedIndex restoredIdx : restorePart.getMaterializedIndices(IndexExtState.VISIBLE)) { MaterializedIndexMeta indexMeta = localTbl.getIndexMetaByIndexId(restoredIdx.getId()); List indexes = restoredIdx.getId() == localTbl.getBaseIndexId() @@ -1110,7 +1107,7 @@ >>>>>>> dd6a5e0b00 ([Feature](Row store) support column group which store row fo localTbl.getTimeSeriesCompactionLevelThreshold(), localTbl.storeRowColumn(), binlogConfig, - localTbl.getRowStoreColumnsUniqueIds(rowStoreColumns). + localTbl.getRowStoreColumnsUniqueIds(rowStoreColumns), objectPool); task.setInvertedIndexStorageFormat(localTbl.getInvertedIndexStorageFormat()); task.setInRestoreMode(true); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index e7067894600049..69869188c7779c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -772,8 +772,7 @@ public static Boolean analyzeEnableDuplicateWithoutKeysByDefault(Map analyzeRowStoreColumns(Map properties, - List columns, - boolean stripProperty) throws AnalysisException { + List columns) throws AnalysisException { List rowStoreColumns = Lists.newArrayList(); String value = properties.get(PROPERTIES_ROW_STORE_COLUMNS); // set empty row store columns by default diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/LogicalResultSinkToShortCircuitPointQuery.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/LogicalResultSinkToShortCircuitPointQuery.java index 1438edb9bdd00d..0b620ae6b464a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/LogicalResultSinkToShortCircuitPointQuery.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/LogicalResultSinkToShortCircuitPointQuery.java @@ -60,7 +60,7 @@ private boolean filterMatchShortCircuitCondition(LogicalFilter } private boolean scanMatchShortCircuitCondition(LogicalOlapScan olapScan) { - if (!ConnectContext.get().getSessionVariable().enableShortCircuitQuery) { + if (!ConnectContext.get().getSessionVariable().isEnableShortCircuitQuery()) { return false; } OlapTable olapTable = olapScan.getTable(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 48f3e6db5c9be3..309611c6ab4ec5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -133,7 +133,6 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_INSERT_STRICT = "enable_insert_strict"; public static final String INSERT_MAX_FILTER_RATIO = "insert_max_filter_ratio"; public static final String ENABLE_SPILLING = "enable_spilling"; - public static final String ENABLE_SHORT_CIRCUIT_QUERY = "enable_short_circuit_point_query"; public static final String ENABLE_SERVER_SIDE_PREPARED_STATEMENT = "enable_server_side_prepared_statement"; public static final String PREFER_JOIN_METHOD = "prefer_join_method"; @@ -660,9 +659,6 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = ENABLE_SPILLING) public boolean enableSpilling = false; - @VariableMgr.VarAttr(name = ENABLE_SHORT_CIRCUIT_QUERY) - public boolean enableShortCircuitQuery = true; - // By default, the number of Limit items after OrderBy is changed from 65535 items // before v1.2.0 (not included), to return all items by default @VariableMgr.VarAttr(name = DEFAULT_ORDER_BY_LIMIT) diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java index 435e50807dfdc3..8a658de62bbcca 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java @@ -124,11 +124,8 @@ public class CreateReplicaTask extends AgentTask { private BinlogConfig binlogConfig; private List clusterKeyIndexes; -<<<<<<< HEAD private Map objectPool; -======= private List rowStoreColumnUniqueIds; ->>>>>>> dd6a5e0b00 ([Feature](Row store) support column group which store row format for partial columns of table) public CreateReplicaTask(long backendId, long dbId, long tableId, long partitionId, long indexId, long tabletId, long replicaId, short shortKeyColumnCount, int schemaHash, long version, diff --git a/regression-test/suites/point_query_p0/load.groovy b/regression-test/suites/point_query_p0/load.groovy index 1c6bb9b52c1a8d..11c40115b09b2a 100644 --- a/regression-test/suites/point_query_p0/load.groovy +++ b/regression-test/suites/point_query_p0/load.groovy @@ -144,7 +144,7 @@ suite("test_load_and_schema_change_row_store", "p0") { wait_job_done.call("tbl_scalar_types_dup_1") qt_sql "select sum(length(__DORIS_ROW_STORE_COL__)) from tbl_scalar_types_dup_1" test { - sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ * from tbl_scalar_types_dup_1 where k1 = -2147303679" + sql "select /*+ SET_VAR(enable_nereids_planner=false,enable_short_circuit_query_access_column_store=false)*/ * from tbl_scalar_types_dup_1 where k1 = -2147303679" exception("Not support column store") } explain { From 1921a29b81f6fce923a5cd6bee61d93f1e45d2de Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Tue, 18 Jun 2024 11:38:33 +0800 Subject: [PATCH 5/5] fix4 --- be/src/service/point_query_executor.cpp | 26 ++++++++++++++++--- be/src/service/point_query_executor.h | 1 + .../apache/doris/nereids/NereidsPlanner.java | 1 + .../point_query_p0/test_point_query.groovy | 2 +- .../point_query_p0/test_rowstore.groovy | 16 +++++++----- 5 files changed, 35 insertions(+), 11 deletions(-) diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index a35fdc1a1176be..d4d20ea5a489c9 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -62,6 +62,9 @@ namespace doris { Reusable::~Reusable() = default; +// get missing and include column ids +// input include_cids : the output expr slots columns unique ids +// missing_cids : the output expr columns that not in row columns cids static void get_missing_and_include_cids(const TabletSchema& schema, const std::vector& slots, int target_rs_column_id, @@ -318,14 +321,14 @@ std::string PointQueryExecutor::print_profile() { ", is_binary_row:{}, output_columns:{}, total_keys:{}, row_cache_hits:{}" ", hit_cached_pages:{}, total_pages_read:{}, compressed_bytes_read:{}, " "io_latency:{}ns, " - "uncompressed_bytes_read:{}, result_data_bytes:{}" + "uncompressed_bytes_read:{}, result_data_bytes:{}, row_hits:{}" ", rs_column_uid:{}" "", total_us, init_us, init_key_us, lookup_key_us, lookup_data_us, output_data_us, _profile_metrics.hit_lookup_cache, _binary_row_format, _reusable->output_exprs().size(), _row_read_ctxs.size(), _profile_metrics.row_cache_hits, read_stats.cached_pages_num, read_stats.total_pages_num, read_stats.compressed_bytes_read, read_stats.io_ns, - read_stats.uncompressed_bytes_read, _profile_metrics.result_data_bytes, + read_stats.uncompressed_bytes_read, _profile_metrics.result_data_bytes, _row_hits, _reusable->rs_column_uid()); } @@ -393,6 +396,7 @@ Status PointQueryExecutor::_lookup_row_key() { VLOG_DEBUG << "aquire rowset " << (*rowset_ptr)->rowset_id(); _row_read_ctxs[i]._rowset_ptr = std::unique_ptr( rowset_ptr.release(), &release_rowset); + _row_hits++; } return Status::OK(); } @@ -434,7 +438,8 @@ Status PointQueryExecutor::_lookup_row_data() { missing_columns += _tablet->tablet_schema()->column_by_uid(cid).name() + ","; } return Status::InternalError( - "Not support column store, set store_row_column or column_groups in table " + "Not support column store, set store_row_column=true or row_store_columns " + "in table " "properties, missing columns: " + missing_columns + " should be added to row store"); } @@ -463,6 +468,21 @@ Status PointQueryExecutor::_lookup_row_data() { } } } + if (_result_block->columns() > _reusable->include_col_uids().size()) { + // Padding rows for some columns that no need to output to mysql client + // eg. SELECT k1,v1,v2 FROM TABLE WHERE k1 = 1, k1 is not in output slots, tuple as bellow + // TupleDescriptor{id=1, tbl=table_with_column_group} + // SlotDescriptor{id=8, col=v1, colUniqueId=1 ...} + // SlotDescriptor{id=9, col=v2, colUniqueId=2 ...} + // thus missing in include_col_uids and missing_col_uids + for (size_t i = 0; i < _result_block->columns(); ++i) { + auto column = _result_block->get_by_position(i).column; + int padding_rows = _row_hits - column->size(); + if (padding_rows > 0) { + column->assume_mutable()->insert_many_defaults(padding_rows); + } + } + } return Status::OK(); } diff --git a/be/src/service/point_query_executor.h b/be/src/service/point_query_executor.h index 2b5f882717e2e9..1bed53891c3973 100644 --- a/be/src/service/point_query_executor.h +++ b/be/src/service/point_query_executor.h @@ -330,6 +330,7 @@ class PointQueryExecutor { Metrics _profile_metrics; bool _binary_row_format = false; OlapReaderStatistics _read_stats; + int32_t _row_hits = 0; // snapshot read version int64_t _version = -1; }; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index a49c0251aeef39..829cf6512d6d67 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -113,6 +113,7 @@ public NereidsPlanner(StatementContext statementContext) { @Override public void plan(StatementBase queryStmt, org.apache.doris.thrift.TQueryOptions queryOptions) throws UserException { + this.queryOptions = queryOptions; if (statementContext.getConnectContext().getSessionVariable().isEnableNereidsTrace()) { NereidsTracer.init(); } else { diff --git a/regression-test/suites/point_query_p0/test_point_query.groovy b/regression-test/suites/point_query_p0/test_point_query.groovy index bc674d17e627c8..02c4d1583e44cc 100644 --- a/regression-test/suites/point_query_p0/test_point_query.groovy +++ b/regression-test/suites/point_query_p0/test_point_query.groovy @@ -279,6 +279,6 @@ suite("test_point_query", "nonConcurrent") { set_be_config.call("disable_storage_row_cache", "true") sql """set global enable_nereids_planner=true""" sql "set global enable_fallback_to_original_planner = true" - // sql "set global enable_server_side_prepared_statement = false" + sql "set global enable_server_side_prepared_statement = false" } } \ No newline at end of file diff --git a/regression-test/suites/point_query_p0/test_rowstore.groovy b/regression-test/suites/point_query_p0/test_rowstore.groovy index 78e1d6c3f1c7c3..5ab2f3e47e270e 100644 --- a/regression-test/suites/point_query_p0/test_rowstore.groovy +++ b/regression-test/suites/point_query_p0/test_rowstore.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_rowstore", "p0") { +suite("test_rowstore", "p0,nonConcurrent") { // Parse url String jdbcUrl = context.config.jdbcUrl def user = context.config.jdbcUser @@ -262,11 +262,6 @@ suite("test_rowstore", "p0") { qe_point_select stmt } - test { - sql "select /*+ SET_VAR(enable_nereids_planner=false, enable_short_circuit_query_access_column_store=false)*/ * from table_with_column_group where k1 = 1" - exception("Not support column store") - } - sql "DROP TABLE IF EXISTS table_with_column_group4" sql """ CREATE TABLE IF NOT EXISTS table_with_column_group4 ( @@ -286,7 +281,7 @@ suite("test_rowstore", "p0") { "storage_format" = "V2" ) """ - sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ * from table_with_column_group where k1 = 1" + sql "select /*+ SET_VAR(enable_nereids_planner=true)*/ * from table_with_column_group where k1 = 1" def tableName = "rs_query" sql """DROP TABLE IF EXISTS ${tableName}""" @@ -328,4 +323,11 @@ suite("test_rowstore", "p0") { sql """insert into ${tableName} values (2, 'def', 1111919.12345678919, 456, NULL)""" qt_sql """select * from ${tableName} where k1 = 2""" + + sql "set global enable_short_circuit_query_access_column_store = false" + test { + sql "select * from table_with_column_group where k1 = 1" + exception("Not support column store") + } + sql "set global enable_short_circuit_query_access_column_store = true" } \ No newline at end of file