From 8e5a4c2c7ace4be77e34b87463f1f14cc6b3e684 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Mon, 2 Sep 2024 14:44:08 +0800 Subject: [PATCH 1/8] fix hidden sequenc_col not set when insert new rows in partial update --- be/src/exec/tablet_info.cpp | 9 ++++++++ be/src/exec/tablet_info.h | 2 ++ be/src/olap/base_tablet.cpp | 8 +++++-- be/src/olap/delta_writer_v2.cpp | 1 + be/src/olap/partial_update_info.cpp | 23 +++++++++++++++++-- be/src/olap/rowset_builder.cpp | 1 + be/src/olap/tablet_schema.h | 11 ++++++++- .../apache/doris/planner/OlapTableSink.java | 6 +++++ gensrc/proto/descriptors.proto | 1 + gensrc/thrift/Descriptors.thrift | 1 + 10 files changed, 58 insertions(+), 5 deletions(-) diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 3d73bf1bd886de..7f84f28bc6b401 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -127,6 +127,10 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { "different from BE."); } _auto_increment_column_unique_id = pschema.auto_increment_column_unique_id(); + + if (pschema.has_sequence_map_col_name()) { + _sequence_map_column = pschema.sequence_map_col_name(); + } } _timestamp_ms = pschema.timestamp_ms(); _timezone = pschema.timezone(); @@ -198,6 +202,10 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { "different from BE."); } _auto_increment_column_unique_id = tschema.auto_increment_column_unique_id; + + if (tschema.__isset.sequence_map_col_name) { + _sequence_map_column = tschema.sequence_map_col_name; + } } for (const auto& tcolumn : tschema.partial_update_input_columns) { @@ -273,6 +281,7 @@ void OlapTableSchemaParam::to_protobuf(POlapTableSchemaParam* pschema) const { pschema->set_auto_increment_column_unique_id(_auto_increment_column_unique_id); pschema->set_timestamp_ms(_timestamp_ms); pschema->set_timezone(_timezone); + pschema->set_sequence_map_col_name(_sequence_map_column); for (auto col : _partial_update_input_columns) { *pschema->add_partial_update_input_columns() = col; } diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index fcba8fd82623bb..24d125949f788d 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -100,6 +100,7 @@ class OlapTableSchemaParam { std::string timezone() const { return _timezone; } bool is_strict_mode() const { return _is_strict_mode; } std::string debug_string() const; + std::string sequence_map_column() const { return _sequence_map_column; } private: int64_t _db_id; @@ -114,6 +115,7 @@ class OlapTableSchemaParam { std::set _partial_update_input_columns; bool _is_strict_mode = false; std::string _auto_increment_column; + std::string _sequence_map_column {}; int32_t _auto_increment_column_unique_id; int64_t _timestamp_ms = 0; std::string _timezone; diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 1fd3b785b9072f..5105d5373d5fc4 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -894,7 +894,8 @@ Status BaseTablet::generate_default_value_block(const TabletSchema& schema, auto mutable_default_value_columns = default_value_block.mutate_columns(); for (auto i = 0; i < cids.size(); ++i) { const auto& column = schema.column(cids[i]); - if (column.has_default_value()) { + if (column.has_default_value() || + (column.name() == SEQUENCE_COL && schema.sequence_col_use_default_value())) { const auto& default_value = default_values[i]; vectorized::ReadBuffer rb(const_cast(default_value.c_str()), default_value.size()); @@ -966,6 +967,8 @@ Status BaseTablet::generate_new_block_for_partial_update( CHECK(update_rows >= old_rows); + bool sequence_col_use_default_value = rowset_schema->sequence_col_use_default_value(); + // build full block for (auto i = 0; i < missing_cids.size(); ++i) { const auto& rs_column = rowset_schema->column(missing_cids[i]); @@ -984,7 +987,8 @@ Status BaseTablet::generate_new_block_for_partial_update( mutable_column->insert_default(); } else if (old_block_delete_signs != nullptr && old_block_delete_signs[read_index_old[idx]] != 0) { - if (rs_column.has_default_value()) { + if (rs_column.has_default_value() || + (rs_column.name() == SEQUENCE_COL && sequence_col_use_default_value)) { mutable_column->insert_from(*mutable_default_value_columns[i].get(), 0); } else if (rs_column.is_nullable()) { assert_cast( diff --git a/be/src/olap/delta_writer_v2.cpp b/be/src/olap/delta_writer_v2.cpp index 2bd1fa6a111913..2cb470c44f3b7c 100644 --- a/be/src/olap/delta_writer_v2.cpp +++ b/be/src/olap/delta_writer_v2.cpp @@ -234,6 +234,7 @@ void DeltaWriterV2::_build_current_tablet_schema(int64_t index_id, _tablet_schema->set_db_id(table_schema_param->db_id()); if (table_schema_param->is_partial_update()) { _tablet_schema->set_auto_increment_column(table_schema_param->auto_increment_coulumn()); + _tablet_schema->set_sequence_map_column(table_schema_param->sequence_map_column()); } // set partial update columns info _partial_update_info = std::make_shared(); diff --git a/be/src/olap/partial_update_info.cpp b/be/src/olap/partial_update_info.cpp index 6a6ec9deabf5b5..f4d395f1724f45 100644 --- a/be/src/olap/partial_update_info.cpp +++ b/be/src/olap/partial_update_info.cpp @@ -23,6 +23,7 @@ #include "olap/olap_common.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_writer_context.h" +#include "olap/tablet_meta.h" #include "olap/tablet_schema.h" #include "olap/utils.h" #include "vec/common/assert_cast.h" @@ -156,12 +157,16 @@ void PartialUpdateInfo::_generate_default_values_for_missing_cids( if (UNLIKELY(column.type() == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 && to_lower(column.default_value()).find(to_lower("CURRENT_TIMESTAMP")) != std::string::npos)) { + LOG_INFO("_generate_default_values_for_missing_cids: column.default_value()={}", + column.default_value()); DateV2Value dtv; dtv.from_unixtime(timestamp_ms / 1000, timezone); default_value = dtv.debug_string(); } else if (UNLIKELY(column.type() == FieldType::OLAP_FIELD_TYPE_DATEV2 && to_lower(column.default_value()).find(to_lower("CURRENT_DATE")) != std::string::npos)) { + LOG_INFO("_generate_default_values_for_missing_cids: column.default_value()={}", + column.default_value()); DateV2Value dv; dv.from_unixtime(timestamp_ms / 1000, timezone); default_value = dv.debug_string(); @@ -174,6 +179,20 @@ void PartialUpdateInfo::_generate_default_values_for_missing_cids( default_values.emplace_back(); } } + if (!tablet_schema.sequence_map_column().empty() && + !partial_update_input_columns.contains(tablet_schema.sequence_map_column())) { + auto it = std::find(missing_cids.cbegin(), missing_cids.cend(), + tablet_schema.sequence_col_idx()); + DCHECK(it != missing_cids.cend()); + std::size_t seq_col_idx_in_missing_cids = std::distance(missing_cids.cbegin(), it); + it = std::find(missing_cids.cbegin(), missing_cids.cend(), + tablet_schema.field_index(tablet_schema.sequence_map_column())); + DCHECK(it != missing_cids.cend()); + std::size_t seq_map_col_idx_in_missing_cids = std::distance(missing_cids.cbegin(), it); + default_values[seq_col_idx_in_missing_cids] = + default_values[seq_map_col_idx_in_missing_cids]; + } + CHECK_EQ(missing_cids.size(), default_values.size()); } @@ -253,7 +272,7 @@ Status PartialUpdateReadPlan::fill_missing_columns( old_value_block, default_value_block)); } auto mutable_default_value_columns = default_value_block.mutate_columns(); - + bool sequence_col_use_default_value = tablet_schema.sequence_col_use_default_value(); // fill all missing value from mutable_old_columns, need to consider default value and null value for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) { // `use_default_or_null_flag[idx] == false` doesn't mean that we should read values from the old row @@ -271,7 +290,7 @@ Status PartialUpdateReadPlan::fill_missing_columns( const auto& tablet_column = tablet_schema.column(missing_cids[i]); auto& missing_col = mutable_full_columns[missing_cids[i]]; // clang-format off - if (tablet_column.has_default_value()) { + if (tablet_column.has_default_value() || (tablet_column.name() == SEQUENCE_COL && sequence_col_use_default_value)) { missing_col->insert_from(*mutable_default_value_columns[i].get(), 0); } else if (tablet_column.is_nullable()) { auto* nullable_column = diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp index 39fcc3f6c231ab..a24a54e8b0171f 100644 --- a/be/src/olap/rowset_builder.cpp +++ b/be/src/olap/rowset_builder.cpp @@ -415,6 +415,7 @@ void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id, _tablet_schema->set_db_id(table_schema_param->db_id()); if (table_schema_param->is_partial_update()) { _tablet_schema->set_auto_increment_column(table_schema_param->auto_increment_coulumn()); + _tablet_schema->set_sequence_map_column(table_schema_param->sequence_map_column()); } // set partial update columns info _partial_update_info = std::make_shared(); diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 1d1d6c9de79d24..3ffd0968f2eb0c 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -407,7 +407,15 @@ class TabletSchema { _auto_increment_column = auto_increment_column; } std::string auto_increment_column() const { return _auto_increment_column; } - + void set_sequence_map_column(std::string name) { _sequence_map_column = name; } + std::string sequence_map_column() const { return _sequence_map_column; } + bool sequence_col_use_default_value() const { + if (!_sequence_map_column.empty()) { + auto seq_map_column = *DORIS_TRY(column(_sequence_map_column)); + return seq_map_column.has_default_value(); + } + return false; + } void set_table_id(int64_t table_id) { _table_id = table_id; } int64_t table_id() const { return _table_id; } void set_db_id(int64_t db_id) { _db_id = db_id; } @@ -523,6 +531,7 @@ class TabletSchema { long _row_store_page_size = segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE; size_t _next_column_unique_id = 0; std::string _auto_increment_column; + std::string _sequence_map_column; bool _has_bf_fpp = false; double _bf_fpp = 0; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java index c07f16b82b94e9..4ab82ebe93dba7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java @@ -323,6 +323,12 @@ public TOlapTableSchemaParam createSchema(long dbId, OlapTable table, Analyzer a schemaParam.setAutoIncrementColumnUniqueId(col.getUniqueId()); } } + if (table.getSequenceMapCol() != null) { + Column seqMapCol = table.getFullSchema().stream() + .filter(col -> col.getName().equalsIgnoreCase(table.getSequenceMapCol())) + .findFirst().get(); + schemaParam.setSequenceMapColName(seqMapCol.getName()); + } } schemaParam.setInvertedIndexFileStorageFormat(table.getInvertedIndexFileStorageFormat()); return schemaParam; diff --git a/gensrc/proto/descriptors.proto b/gensrc/proto/descriptors.proto index 21fc646c92d12d..60f85fae4d395c 100644 --- a/gensrc/proto/descriptors.proto +++ b/gensrc/proto/descriptors.proto @@ -73,5 +73,6 @@ message POlapTableSchemaParam { optional int64 timestamp_ms = 11 [default = 0]; optional string timezone = 12; optional int32 auto_increment_column_unique_id = 13 [default = -1]; + optional string sequence_map_col_name = 14; }; diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift index 10ad6de3f6bb22..b4360081bb204b 100644 --- a/gensrc/thrift/Descriptors.thrift +++ b/gensrc/thrift/Descriptors.thrift @@ -255,6 +255,7 @@ struct TOlapTableSchemaParam { 11: optional string auto_increment_column 12: optional i32 auto_increment_column_unique_id = -1 13: optional Types.TInvertedIndexFileStorageFormat inverted_index_file_storage_format = Types.TInvertedIndexFileStorageFormat.V1 + 14: optional string sequence_map_col_name; } struct TTabletLocation { From 3a4dc8226d638c6271125d2e1108ce0d88f93e58 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Mon, 2 Sep 2024 17:08:39 +0800 Subject: [PATCH 2/8] fix precision loss for newly inserted rows in partial update --- be/src/exec/tablet_info.cpp | 4 + be/src/exec/tablet_info.h | 3 + be/src/olap/delta_writer_v2.cpp | 3 +- be/src/olap/partial_update_info.cpp | 28 ++-- be/src/olap/partial_update_info.h | 3 +- be/src/olap/rowset_builder.cpp | 12 +- be/src/vec/sink/writer/vtablet_writer.cpp | 1 + be/src/vec/sink/writer/vtablet_writer_v2.cpp | 1 + gensrc/proto/descriptors.proto | 1 + gensrc/proto/olap_file.proto | 1 + .../test_partial_update_seq_map_col.out | 97 +++++++++++++ .../test_partial_update_seq_map_col.groovy | 132 ++++++++++++++++++ 12 files changed, 269 insertions(+), 17 deletions(-) create mode 100644 regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out create mode 100644 regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 7f84f28bc6b401..24e55f2d896a89 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -133,6 +133,9 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { } } _timestamp_ms = pschema.timestamp_ms(); + if (pschema.has_nano_seconds()) { + _nano_seconds = pschema.nano_seconds(); + } _timezone = pschema.timezone(); for (const auto& col : pschema.partial_update_input_columns()) { @@ -281,6 +284,7 @@ void OlapTableSchemaParam::to_protobuf(POlapTableSchemaParam* pschema) const { pschema->set_auto_increment_column_unique_id(_auto_increment_column_unique_id); pschema->set_timestamp_ms(_timestamp_ms); pschema->set_timezone(_timezone); + pschema->set_nano_seconds(_nano_seconds); pschema->set_sequence_map_col_name(_sequence_map_column); for (auto col : _partial_update_input_columns) { *pschema->add_partial_update_input_columns() = col; diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index 24d125949f788d..1e500f3b1fc862 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -96,6 +96,8 @@ class OlapTableSchemaParam { int32_t auto_increment_column_unique_id() const { return _auto_increment_column_unique_id; } void set_timestamp_ms(int64_t timestamp_ms) { _timestamp_ms = timestamp_ms; } int64_t timestamp_ms() const { return _timestamp_ms; } + void set_nano_seconds(int32_t nano_seconds) { _nano_seconds = nano_seconds; } + int32_t nano_seconds() const { return _nano_seconds; } void set_timezone(std::string timezone) { _timezone = timezone; } std::string timezone() const { return _timezone; } bool is_strict_mode() const { return _is_strict_mode; } @@ -118,6 +120,7 @@ class OlapTableSchemaParam { std::string _sequence_map_column {}; int32_t _auto_increment_column_unique_id; int64_t _timestamp_ms = 0; + int32_t _nano_seconds {0}; std::string _timezone; }; diff --git a/be/src/olap/delta_writer_v2.cpp b/be/src/olap/delta_writer_v2.cpp index 2cb470c44f3b7c..b9e95e5775bc3e 100644 --- a/be/src/olap/delta_writer_v2.cpp +++ b/be/src/olap/delta_writer_v2.cpp @@ -241,7 +241,8 @@ void DeltaWriterV2::_build_current_tablet_schema(int64_t index_id, _partial_update_info->init(*_tablet_schema, table_schema_param->is_partial_update(), table_schema_param->partial_update_input_columns(), table_schema_param->is_strict_mode(), - table_schema_param->timestamp_ms(), table_schema_param->timezone(), + table_schema_param->timestamp_ms(), + table_schema_param->nano_seconds(), table_schema_param->timezone(), table_schema_param->auto_increment_coulumn()); } diff --git a/be/src/olap/partial_update_info.cpp b/be/src/olap/partial_update_info.cpp index f4d395f1724f45..0eb0a045470041 100644 --- a/be/src/olap/partial_update_info.cpp +++ b/be/src/olap/partial_update_info.cpp @@ -33,12 +33,14 @@ namespace doris { void PartialUpdateInfo::init(const TabletSchema& tablet_schema, bool partial_update, const std::set& partial_update_cols, bool is_strict_mode, - int64_t timestamp_ms, const std::string& timezone, - const std::string& auto_increment_column, int64_t cur_max_version) { + int64_t timestamp_ms, int32_t nano_seconds, + const std::string& timezone, const std::string& auto_increment_column, + int64_t cur_max_version) { is_partial_update = partial_update; partial_update_input_columns = partial_update_cols; max_version_in_flush_phase = cur_max_version; this->timestamp_ms = timestamp_ms; + this->nano_seconds = nano_seconds; this->timezone = timezone; missing_cids.clear(); update_cids.clear(); @@ -79,6 +81,7 @@ void PartialUpdateInfo::to_pb(PartialUpdateInfoPB* partial_update_info_pb) const can_insert_new_rows_in_partial_update); partial_update_info_pb->set_is_strict_mode(is_strict_mode); partial_update_info_pb->set_timestamp_ms(timestamp_ms); + partial_update_info_pb->set_nano_seconds(nano_seconds); partial_update_info_pb->set_timezone(timezone); partial_update_info_pb->set_is_input_columns_contains_auto_inc_column( is_input_columns_contains_auto_inc_column); @@ -115,6 +118,9 @@ void PartialUpdateInfo::from_pb(PartialUpdateInfoPB* partial_update_info_pb) { partial_update_info_pb->is_input_columns_contains_auto_inc_column(); is_schema_contains_auto_inc_column = partial_update_info_pb->is_schema_contains_auto_inc_column(); + if (partial_update_info_pb->has_nano_seconds()) { + nano_seconds = partial_update_info_pb->nano_seconds(); + } default_values.clear(); for (const auto& value : partial_update_info_pb->default_values()) { default_values.push_back(value); @@ -157,16 +163,20 @@ void PartialUpdateInfo::_generate_default_values_for_missing_cids( if (UNLIKELY(column.type() == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 && to_lower(column.default_value()).find(to_lower("CURRENT_TIMESTAMP")) != std::string::npos)) { - LOG_INFO("_generate_default_values_for_missing_cids: column.default_value()={}", - column.default_value()); - DateV2Value dtv; - dtv.from_unixtime(timestamp_ms / 1000, timezone); - default_value = dtv.debug_string(); + auto pos = to_lower(column.default_value()).find('('); + if (pos == std::string::npos) { + DateV2Value dtv; + dtv.from_unixtime(timestamp_ms / 1000, timezone); + default_value = dtv.debug_string(); + } else { + int precision = std::stoi(column.default_value().substr(pos + 1)); + DateV2Value dtv; + dtv.from_unixtime(timestamp_ms / 1000, nano_seconds, timezone, precision); + default_value = dtv.debug_string(); + } } else if (UNLIKELY(column.type() == FieldType::OLAP_FIELD_TYPE_DATEV2 && to_lower(column.default_value()).find(to_lower("CURRENT_DATE")) != std::string::npos)) { - LOG_INFO("_generate_default_values_for_missing_cids: column.default_value()={}", - column.default_value()); DateV2Value dv; dv.from_unixtime(timestamp_ms / 1000, timezone); default_value = dv.debug_string(); diff --git a/be/src/olap/partial_update_info.h b/be/src/olap/partial_update_info.h index 3366c414cf03ff..278b027942eb20 100644 --- a/be/src/olap/partial_update_info.h +++ b/be/src/olap/partial_update_info.h @@ -39,7 +39,7 @@ struct RowsetId; struct PartialUpdateInfo { void init(const TabletSchema& tablet_schema, bool partial_update, const std::set& partial_update_cols, bool is_strict_mode, - int64_t timestamp_ms, const std::string& timezone, + int64_t timestamp_ms, int32_t nano_seconds, const std::string& timezone, const std::string& auto_increment_column, int64_t cur_max_version = -1); void to_pb(PartialUpdateInfoPB* partial_update_info) const; void from_pb(PartialUpdateInfoPB* partial_update_info); @@ -60,6 +60,7 @@ struct PartialUpdateInfo { bool can_insert_new_rows_in_partial_update {true}; bool is_strict_mode {false}; int64_t timestamp_ms {0}; + int32_t nano_seconds {0}; std::string timezone; bool is_input_columns_contains_auto_inc_column = false; bool is_schema_contains_auto_inc_column = false; diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp index a24a54e8b0171f..1a4e3718ceeaf6 100644 --- a/be/src/olap/rowset_builder.cpp +++ b/be/src/olap/rowset_builder.cpp @@ -419,12 +419,12 @@ void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id, } // set partial update columns info _partial_update_info = std::make_shared(); - _partial_update_info->init(*_tablet_schema, table_schema_param->is_partial_update(), - table_schema_param->partial_update_input_columns(), - table_schema_param->is_strict_mode(), - table_schema_param->timestamp_ms(), table_schema_param->timezone(), - table_schema_param->auto_increment_coulumn(), - _max_version_in_flush_phase); + _partial_update_info->init( + *_tablet_schema, table_schema_param->is_partial_update(), + table_schema_param->partial_update_input_columns(), + table_schema_param->is_strict_mode(), table_schema_param->timestamp_ms(), + table_schema_param->nano_seconds(), table_schema_param->timezone(), + table_schema_param->auto_increment_coulumn(), _max_version_in_flush_phase); } } // namespace doris diff --git a/be/src/vec/sink/writer/vtablet_writer.cpp b/be/src/vec/sink/writer/vtablet_writer.cpp index 2aa16ae498fb1f..22be788a18b97a 100644 --- a/be/src/vec/sink/writer/vtablet_writer.cpp +++ b/be/src/vec/sink/writer/vtablet_writer.cpp @@ -1180,6 +1180,7 @@ Status VTabletWriter::_init(RuntimeState* state, RuntimeProfile* profile) { _schema.reset(new OlapTableSchemaParam()); RETURN_IF_ERROR(_schema->init(table_sink.schema)); _schema->set_timestamp_ms(state->timestamp_ms()); + _schema->set_nano_seconds(state->nano_seconds()); _schema->set_timezone(state->timezone()); _location = _pool->add(new OlapTableLocationParam(table_sink.location)); _nodes_info = _pool->add(new DorisNodesInfo(table_sink.nodes_info)); diff --git a/be/src/vec/sink/writer/vtablet_writer_v2.cpp b/be/src/vec/sink/writer/vtablet_writer_v2.cpp index 7e14f8a8327ace..aaffefbd894e4d 100644 --- a/be/src/vec/sink/writer/vtablet_writer_v2.cpp +++ b/be/src/vec/sink/writer/vtablet_writer_v2.cpp @@ -151,6 +151,7 @@ Status VTabletWriterV2::_init(RuntimeState* state, RuntimeProfile* profile) { _schema.reset(new OlapTableSchemaParam()); RETURN_IF_ERROR(_schema->init(table_sink.schema)); _schema->set_timestamp_ms(state->timestamp_ms()); + _schema->set_nano_seconds(state->nano_seconds()); _schema->set_timezone(state->timezone()); _location = _pool->add(new OlapTableLocationParam(table_sink.location)); _nodes_info = _pool->add(new DorisNodesInfo(table_sink.nodes_info)); diff --git a/gensrc/proto/descriptors.proto b/gensrc/proto/descriptors.proto index 60f85fae4d395c..9e46ba9ab823dc 100644 --- a/gensrc/proto/descriptors.proto +++ b/gensrc/proto/descriptors.proto @@ -74,5 +74,6 @@ message POlapTableSchemaParam { optional string timezone = 12; optional int32 auto_increment_column_unique_id = 13 [default = -1]; optional string sequence_map_col_name = 14; + optional int32 nano_seconds = 15 [default = 0]; }; diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 2e9fa94a343f35..9032b5ba4abe0f 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -624,4 +624,5 @@ message PartialUpdateInfoPB { optional bool is_schema_contains_auto_inc_column = 10 [default = false]; repeated string default_values = 11; optional int64 max_version_in_flush_phase = 12 [default = -1]; + optional int32 nano_seconds = 13 [default = 0]; } diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out new file mode 100644 index 00000000000000..ef8a5b860f636a --- /dev/null +++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out @@ -0,0 +1,97 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +1 1 +2 2 +3 3 +4 4 + +-- !sql2 -- +1 1 +2 2 +3 3 +4 4 + +-- !sql3 -- +1 1 999 999 +2 2 999 999 +3 3 999 999 +4 4 999 999 + +-- !sql4 -- +1 1 \N \N +2 2 \N \N +3 3 \N \N +4 4 \N \N + +-- !sql1 -- +1 1 +2 2 +3 3 +4 4 + +-- !sql2 -- +1 1 +2 2 +3 3 +4 4 + +-- !sql3 -- +1 1 999 999 +2 2 999 999 +3 3 999 999 +4 4 999 999 + +-- !sql4 -- +1 1 \N \N +2 2 \N \N +3 3 \N \N +4 4 \N \N + +-- !sql1 -- +1 1 +2 2 +3 3 +4 4 + +-- !sql2 -- +1 1 +2 2 +3 3 +4 4 + +-- !sql3 -- +1 1 999 999 +2 2 999 999 +3 3 999 999 +4 4 999 999 + +-- !sql4 -- +1 1 \N \N +2 2 \N \N +3 3 \N \N +4 4 \N \N + +-- !sql1 -- +1 1 +2 2 +3 3 +4 4 + +-- !sql2 -- +1 1 +2 2 +3 3 +4 4 + +-- !sql3 -- +1 1 999 999 +2 2 999 999 +3 3 999 999 +4 4 999 999 + +-- !sql4 -- +1 1 \N \N +2 2 \N \N +3 3 \N \N +4 4 \N \N + diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy new file mode 100644 index 00000000000000..3a9b09fe9f66d3 --- /dev/null +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy @@ -0,0 +1,132 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_partial_update_seq_map_col", "p0") { + for (def use_nereids : [true, false]) { + for (def use_row_store : [false, true]) { + logger.info("current params: use_nereids: ${use_nereids}, use_row_store: ${use_row_store}") + if (use_nereids) { + sql """ set enable_nereids_planner=true; """ + sql """ set enable_fallback_to_original_planner=false; """ + } else { + sql """ set enable_nereids_planner = false; """ + } + sql "set enable_insert_strict=false;" + sql "set enable_unique_key_partial_update=true;" + sql "set show_hidden_columns=true;" + sql "sync;" + + def tableName = "test_partial_update_seq_map_col1" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( + `k` BIGINT NOT NULL, + `c1` int, + `c2` datetime(6) null default current_timestamp(6), + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "function_column.sequence_col" = "c2", + "store_row_column" = "${use_row_store}"); """ + sql "insert into ${tableName}(k,c1) values(1,1);" + sql "insert into ${tableName}(k,c1) values(2,2);" + sql "insert into ${tableName}(k,c1) values(3,3);" + sql "insert into ${tableName}(k,c1) values(4,4);" + qt_sql1 "select k,c1 from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" + + + tableName = "test_partial_update_seq_map_col2" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( + `k` BIGINT NOT NULL, + `c1` int, + `c2` datetime not null default current_timestamp, + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "function_column.sequence_col" = "c2", + "store_row_column" = "${use_row_store}"); """ + sql "insert into ${tableName}(k,c1) values(1,1);" + sql "insert into ${tableName}(k,c1) values(2,2);" + sql "insert into ${tableName}(k,c1) values(3,3);" + sql "insert into ${tableName}(k,c1) values(4,4);" + qt_sql2 "select k,c1 from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" + + + tableName = "test_partial_update_seq_map_col3" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( + `k` BIGINT NOT NULL, + `c1` int, + `c2` int not null default "999", + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "function_column.sequence_col" = "c2", + "store_row_column" = "${use_row_store}"); """ + sql "insert into ${tableName}(k,c1) values(1,1);" + sql "insert into ${tableName}(k,c1) values(2,2);" + sql "insert into ${tableName}(k,c1) values(3,3);" + sql "insert into ${tableName}(k,c1) values(4,4);" + qt_sql3 "select k,c1,c2,__DORIS_SEQUENCE_COL__ from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" + + + tableName = "test_partial_update_seq_map_col4" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( + `k` BIGINT NOT NULL, + `c1` int, + `c2` int null, + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "function_column.sequence_col" = "c2", + "store_row_column" = "${use_row_store}"); """ + sql "insert into ${tableName}(k,c1) values(1,1);" + sql "insert into ${tableName}(k,c1) values(2,2);" + sql "insert into ${tableName}(k,c1) values(3,3);" + sql "insert into ${tableName}(k,c1) values(4,4);" + qt_sql4 "select k,c1,c2,__DORIS_SEQUENCE_COL__ from ${tableName};" + + + tableName = "test_partial_update_seq_map_col5" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( + `k` BIGINT NOT NULL, + `c1` int, + `c2` int not null + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "function_column.sequence_col" = "c2", + "store_row_column" = "${use_row_store}"); """ + test { + sql "insert into ${tableName}(k,c1) values(1,1);" + exception "the unmentioned column `c2` should have default value or be nullable for newly inserted rows in non-strict mode partial update" + } + } + } +} From 1cdb530782b1b944ee774bf9242c9895f3d77ec8 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Tue, 3 Sep 2024 09:56:53 +0800 Subject: [PATCH 3/8] fix --- .../src/main/java/org/apache/doris/planner/OlapTableSink.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java index 4ab82ebe93dba7..2ea459b1f1e30b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java @@ -323,7 +323,7 @@ public TOlapTableSchemaParam createSchema(long dbId, OlapTable table, Analyzer a schemaParam.setAutoIncrementColumnUniqueId(col.getUniqueId()); } } - if (table.getSequenceMapCol() != null) { + if (table.getSequenceMapCol() != null && !partialUpdateInputColumns.contains(table.getSequenceMapCol())) { Column seqMapCol = table.getFullSchema().stream() .filter(col -> col.getName().equalsIgnoreCase(table.getSequenceMapCol())) .findFirst().get(); From 3af247d81ccb7b4237d7d8d9bf31939fa2640261 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Tue, 3 Sep 2024 14:10:35 +0800 Subject: [PATCH 4/8] tmp --- be/src/olap/partial_update_info.cpp | 1 + .../apache/doris/planner/OlapTableSink.java | 2 +- .../test_partial_update_seq_map_col.out | 28 +++++++++++++++++++ .../test_partial_update_seq_map_col.groovy | 13 ++++++--- 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/be/src/olap/partial_update_info.cpp b/be/src/olap/partial_update_info.cpp index 0eb0a045470041..1c7518f1d5ce9a 100644 --- a/be/src/olap/partial_update_info.cpp +++ b/be/src/olap/partial_update_info.cpp @@ -190,6 +190,7 @@ void PartialUpdateInfo::_generate_default_values_for_missing_cids( } } if (!tablet_schema.sequence_map_column().empty() && + !partial_update_input_columns.contains(SEQUENCE_COL) && !partial_update_input_columns.contains(tablet_schema.sequence_map_column())) { auto it = std::find(missing_cids.cbegin(), missing_cids.cend(), tablet_schema.sequence_col_idx()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java index 2ea459b1f1e30b..4ab82ebe93dba7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java @@ -323,7 +323,7 @@ public TOlapTableSchemaParam createSchema(long dbId, OlapTable table, Analyzer a schemaParam.setAutoIncrementColumnUniqueId(col.getUniqueId()); } } - if (table.getSequenceMapCol() != null && !partialUpdateInputColumns.contains(table.getSequenceMapCol())) { + if (table.getSequenceMapCol() != null) { Column seqMapCol = table.getFullSchema().stream() .filter(col -> col.getName().equalsIgnoreCase(table.getSequenceMapCol())) .findFirst().get(); diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out index ef8a5b860f636a..b122549be24986 100644 --- a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out +++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out @@ -17,6 +17,13 @@ 3 3 999 999 4 4 999 999 +-- !sql3 -- +1 99 8888 8888 +2 99 8888 8888 +3 3 999 999 +4 4 999 999 +5 99 8888 8888 + -- !sql4 -- 1 1 \N \N 2 2 \N \N @@ -41,6 +48,13 @@ 3 3 999 999 4 4 999 999 +-- !sql3 -- +1 99 8888 8888 +2 99 8888 8888 +3 3 999 999 +4 4 999 999 +5 99 8888 8888 + -- !sql4 -- 1 1 \N \N 2 2 \N \N @@ -65,6 +79,13 @@ 3 3 999 999 4 4 999 999 +-- !sql3 -- +1 99 8888 8888 +2 99 8888 8888 +3 3 999 999 +4 4 999 999 +5 99 8888 8888 + -- !sql4 -- 1 1 \N \N 2 2 \N \N @@ -89,6 +110,13 @@ 3 3 999 999 4 4 999 999 +-- !sql3 -- +1 99 8888 8888 +2 99 8888 8888 +3 3 999 999 +4 4 999 999 +5 99 8888 8888 + -- !sql4 -- 1 1 \N \N 2 2 \N \N diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy index 3a9b09fe9f66d3..01d3dee976b945 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy @@ -47,7 +47,7 @@ suite("test_partial_update_seq_map_col", "p0") { sql "insert into ${tableName}(k,c1) values(2,2);" sql "insert into ${tableName}(k,c1) values(3,3);" sql "insert into ${tableName}(k,c1) values(4,4);" - qt_sql1 "select k,c1 from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" + order_qt_sql1 "select k,c1 from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" tableName = "test_partial_update_seq_map_col2" @@ -67,7 +67,7 @@ suite("test_partial_update_seq_map_col", "p0") { sql "insert into ${tableName}(k,c1) values(2,2);" sql "insert into ${tableName}(k,c1) values(3,3);" sql "insert into ${tableName}(k,c1) values(4,4);" - qt_sql2 "select k,c1 from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" + order_qt_sql2 "select k,c1 from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" tableName = "test_partial_update_seq_map_col3" @@ -87,7 +87,12 @@ suite("test_partial_update_seq_map_col", "p0") { sql "insert into ${tableName}(k,c1) values(2,2);" sql "insert into ${tableName}(k,c1) values(3,3);" sql "insert into ${tableName}(k,c1) values(4,4);" - qt_sql3 "select k,c1,c2,__DORIS_SEQUENCE_COL__ from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" + order_qt_sql3 "select k,c1,c2,__DORIS_SEQUENCE_COL__ from ${tableName};" + sql "insert into ${tableName}(k,c1,c2) values(1,99,8888);" + sql "insert into ${tableName}(k,c1,c2) values(2,99,8888);" + sql "insert into ${tableName}(k,c1,c2) values(4,99,77);" + sql "insert into ${tableName}(k,c1,c2) values(5,99,8888);" + order_qt_sql3 "select k,c1,c2,__DORIS_SEQUENCE_COL__ from ${tableName}" tableName = "test_partial_update_seq_map_col4" @@ -107,7 +112,7 @@ suite("test_partial_update_seq_map_col", "p0") { sql "insert into ${tableName}(k,c1) values(2,2);" sql "insert into ${tableName}(k,c1) values(3,3);" sql "insert into ${tableName}(k,c1) values(4,4);" - qt_sql4 "select k,c1,c2,__DORIS_SEQUENCE_COL__ from ${tableName};" + order_qt_sql4 "select k,c1,c2,__DORIS_SEQUENCE_COL__ from ${tableName};" tableName = "test_partial_update_seq_map_col5" From 715c5477b95ef7b3c5e25aebf13dbd152d9e7fda Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Fri, 13 Sep 2024 16:24:51 +0800 Subject: [PATCH 5/8] remove --- be/src/exec/tablet_info.cpp | 9 --------- be/src/exec/tablet_info.h | 2 -- be/src/olap/base_tablet.cpp | 2 -- be/src/olap/delta_writer_v2.cpp | 1 - be/src/olap/partial_update_info.cpp | 18 +----------------- be/src/olap/rowset_builder.cpp | 1 - be/src/olap/tablet_schema.h | 10 ---------- .../apache/doris/planner/OlapTableSink.java | 6 ------ gensrc/proto/descriptors.proto | 3 +-- gensrc/thrift/Descriptors.thrift | 1 - 10 files changed, 2 insertions(+), 51 deletions(-) diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 24e55f2d896a89..44846ded868e8f 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -127,10 +127,6 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { "different from BE."); } _auto_increment_column_unique_id = pschema.auto_increment_column_unique_id(); - - if (pschema.has_sequence_map_col_name()) { - _sequence_map_column = pschema.sequence_map_col_name(); - } } _timestamp_ms = pschema.timestamp_ms(); if (pschema.has_nano_seconds()) { @@ -205,10 +201,6 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { "different from BE."); } _auto_increment_column_unique_id = tschema.auto_increment_column_unique_id; - - if (tschema.__isset.sequence_map_col_name) { - _sequence_map_column = tschema.sequence_map_col_name; - } } for (const auto& tcolumn : tschema.partial_update_input_columns) { @@ -285,7 +277,6 @@ void OlapTableSchemaParam::to_protobuf(POlapTableSchemaParam* pschema) const { pschema->set_timestamp_ms(_timestamp_ms); pschema->set_timezone(_timezone); pschema->set_nano_seconds(_nano_seconds); - pschema->set_sequence_map_col_name(_sequence_map_column); for (auto col : _partial_update_input_columns) { *pschema->add_partial_update_input_columns() = col; } diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index 1e500f3b1fc862..ff1c2e8e6b072e 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -102,7 +102,6 @@ class OlapTableSchemaParam { std::string timezone() const { return _timezone; } bool is_strict_mode() const { return _is_strict_mode; } std::string debug_string() const; - std::string sequence_map_column() const { return _sequence_map_column; } private: int64_t _db_id; @@ -117,7 +116,6 @@ class OlapTableSchemaParam { std::set _partial_update_input_columns; bool _is_strict_mode = false; std::string _auto_increment_column; - std::string _sequence_map_column {}; int32_t _auto_increment_column_unique_id; int64_t _timestamp_ms = 0; int32_t _nano_seconds {0}; diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 5105d5373d5fc4..a77c49e0ac5046 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -967,8 +967,6 @@ Status BaseTablet::generate_new_block_for_partial_update( CHECK(update_rows >= old_rows); - bool sequence_col_use_default_value = rowset_schema->sequence_col_use_default_value(); - // build full block for (auto i = 0; i < missing_cids.size(); ++i) { const auto& rs_column = rowset_schema->column(missing_cids[i]); diff --git a/be/src/olap/delta_writer_v2.cpp b/be/src/olap/delta_writer_v2.cpp index b9e95e5775bc3e..73d2fb1d9746a9 100644 --- a/be/src/olap/delta_writer_v2.cpp +++ b/be/src/olap/delta_writer_v2.cpp @@ -234,7 +234,6 @@ void DeltaWriterV2::_build_current_tablet_schema(int64_t index_id, _tablet_schema->set_db_id(table_schema_param->db_id()); if (table_schema_param->is_partial_update()) { _tablet_schema->set_auto_increment_column(table_schema_param->auto_increment_coulumn()); - _tablet_schema->set_sequence_map_column(table_schema_param->sequence_map_column()); } // set partial update columns info _partial_update_info = std::make_shared(); diff --git a/be/src/olap/partial_update_info.cpp b/be/src/olap/partial_update_info.cpp index 1c7518f1d5ce9a..e2f854b373e5eb 100644 --- a/be/src/olap/partial_update_info.cpp +++ b/be/src/olap/partial_update_info.cpp @@ -189,21 +189,6 @@ void PartialUpdateInfo::_generate_default_values_for_missing_cids( default_values.emplace_back(); } } - if (!tablet_schema.sequence_map_column().empty() && - !partial_update_input_columns.contains(SEQUENCE_COL) && - !partial_update_input_columns.contains(tablet_schema.sequence_map_column())) { - auto it = std::find(missing_cids.cbegin(), missing_cids.cend(), - tablet_schema.sequence_col_idx()); - DCHECK(it != missing_cids.cend()); - std::size_t seq_col_idx_in_missing_cids = std::distance(missing_cids.cbegin(), it); - it = std::find(missing_cids.cbegin(), missing_cids.cend(), - tablet_schema.field_index(tablet_schema.sequence_map_column())); - DCHECK(it != missing_cids.cend()); - std::size_t seq_map_col_idx_in_missing_cids = std::distance(missing_cids.cbegin(), it); - default_values[seq_col_idx_in_missing_cids] = - default_values[seq_map_col_idx_in_missing_cids]; - } - CHECK_EQ(missing_cids.size(), default_values.size()); } @@ -283,7 +268,6 @@ Status PartialUpdateReadPlan::fill_missing_columns( old_value_block, default_value_block)); } auto mutable_default_value_columns = default_value_block.mutate_columns(); - bool sequence_col_use_default_value = tablet_schema.sequence_col_use_default_value(); // fill all missing value from mutable_old_columns, need to consider default value and null value for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) { // `use_default_or_null_flag[idx] == false` doesn't mean that we should read values from the old row @@ -301,7 +285,7 @@ Status PartialUpdateReadPlan::fill_missing_columns( const auto& tablet_column = tablet_schema.column(missing_cids[i]); auto& missing_col = mutable_full_columns[missing_cids[i]]; // clang-format off - if (tablet_column.has_default_value() || (tablet_column.name() == SEQUENCE_COL && sequence_col_use_default_value)) { + if (tablet_column.has_default_value()) { missing_col->insert_from(*mutable_default_value_columns[i].get(), 0); } else if (tablet_column.is_nullable()) { auto* nullable_column = diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp index 1a4e3718ceeaf6..9dac1f8842d345 100644 --- a/be/src/olap/rowset_builder.cpp +++ b/be/src/olap/rowset_builder.cpp @@ -415,7 +415,6 @@ void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id, _tablet_schema->set_db_id(table_schema_param->db_id()); if (table_schema_param->is_partial_update()) { _tablet_schema->set_auto_increment_column(table_schema_param->auto_increment_coulumn()); - _tablet_schema->set_sequence_map_column(table_schema_param->sequence_map_column()); } // set partial update columns info _partial_update_info = std::make_shared(); diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 3ffd0968f2eb0c..c9bcbf8945adc2 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -407,15 +407,6 @@ class TabletSchema { _auto_increment_column = auto_increment_column; } std::string auto_increment_column() const { return _auto_increment_column; } - void set_sequence_map_column(std::string name) { _sequence_map_column = name; } - std::string sequence_map_column() const { return _sequence_map_column; } - bool sequence_col_use_default_value() const { - if (!_sequence_map_column.empty()) { - auto seq_map_column = *DORIS_TRY(column(_sequence_map_column)); - return seq_map_column.has_default_value(); - } - return false; - } void set_table_id(int64_t table_id) { _table_id = table_id; } int64_t table_id() const { return _table_id; } void set_db_id(int64_t db_id) { _db_id = db_id; } @@ -531,7 +522,6 @@ class TabletSchema { long _row_store_page_size = segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE; size_t _next_column_unique_id = 0; std::string _auto_increment_column; - std::string _sequence_map_column; bool _has_bf_fpp = false; double _bf_fpp = 0; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java index 4ab82ebe93dba7..c07f16b82b94e9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java @@ -323,12 +323,6 @@ public TOlapTableSchemaParam createSchema(long dbId, OlapTable table, Analyzer a schemaParam.setAutoIncrementColumnUniqueId(col.getUniqueId()); } } - if (table.getSequenceMapCol() != null) { - Column seqMapCol = table.getFullSchema().stream() - .filter(col -> col.getName().equalsIgnoreCase(table.getSequenceMapCol())) - .findFirst().get(); - schemaParam.setSequenceMapColName(seqMapCol.getName()); - } } schemaParam.setInvertedIndexFileStorageFormat(table.getInvertedIndexFileStorageFormat()); return schemaParam; diff --git a/gensrc/proto/descriptors.proto b/gensrc/proto/descriptors.proto index 9e46ba9ab823dc..99cd99410ed7de 100644 --- a/gensrc/proto/descriptors.proto +++ b/gensrc/proto/descriptors.proto @@ -73,7 +73,6 @@ message POlapTableSchemaParam { optional int64 timestamp_ms = 11 [default = 0]; optional string timezone = 12; optional int32 auto_increment_column_unique_id = 13 [default = -1]; - optional string sequence_map_col_name = 14; - optional int32 nano_seconds = 15 [default = 0]; + optional int32 nano_seconds = 14 [default = 0]; }; diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift index b4360081bb204b..10ad6de3f6bb22 100644 --- a/gensrc/thrift/Descriptors.thrift +++ b/gensrc/thrift/Descriptors.thrift @@ -255,7 +255,6 @@ struct TOlapTableSchemaParam { 11: optional string auto_increment_column 12: optional i32 auto_increment_column_unique_id = -1 13: optional Types.TInvertedIndexFileStorageFormat inverted_index_file_storage_format = Types.TInvertedIndexFileStorageFormat.V1 - 14: optional string sequence_map_col_name; } struct TTabletLocation { From 7051624a6d29313cea6c00fed2e1638fb8288f20 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Fri, 13 Sep 2024 18:58:58 +0800 Subject: [PATCH 6/8] fix --- be/src/olap/base_tablet.cpp | 6 ++---- be/src/olap/partial_update_info.cpp | 1 + be/src/olap/tablet_schema.h | 1 + .../src/main/java/org/apache/doris/catalog/Column.java | 6 ++++++ .../src/main/java/org/apache/doris/catalog/OlapTable.java | 5 ++++- .../java/org/apache/doris/datasource/InternalCatalog.java | 4 ++-- 6 files changed, 16 insertions(+), 7 deletions(-) diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index a77c49e0ac5046..1fd3b785b9072f 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -894,8 +894,7 @@ Status BaseTablet::generate_default_value_block(const TabletSchema& schema, auto mutable_default_value_columns = default_value_block.mutate_columns(); for (auto i = 0; i < cids.size(); ++i) { const auto& column = schema.column(cids[i]); - if (column.has_default_value() || - (column.name() == SEQUENCE_COL && schema.sequence_col_use_default_value())) { + if (column.has_default_value()) { const auto& default_value = default_values[i]; vectorized::ReadBuffer rb(const_cast(default_value.c_str()), default_value.size()); @@ -985,8 +984,7 @@ Status BaseTablet::generate_new_block_for_partial_update( mutable_column->insert_default(); } else if (old_block_delete_signs != nullptr && old_block_delete_signs[read_index_old[idx]] != 0) { - if (rs_column.has_default_value() || - (rs_column.name() == SEQUENCE_COL && sequence_col_use_default_value)) { + if (rs_column.has_default_value()) { mutable_column->insert_from(*mutable_default_value_columns[i].get(), 0); } else if (rs_column.is_nullable()) { assert_cast( diff --git a/be/src/olap/partial_update_info.cpp b/be/src/olap/partial_update_info.cpp index e2f854b373e5eb..a576096fcd4394 100644 --- a/be/src/olap/partial_update_info.cpp +++ b/be/src/olap/partial_update_info.cpp @@ -268,6 +268,7 @@ Status PartialUpdateReadPlan::fill_missing_columns( old_value_block, default_value_block)); } auto mutable_default_value_columns = default_value_block.mutate_columns(); + // fill all missing value from mutable_old_columns, need to consider default value and null value for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) { // `use_default_or_null_flag[idx] == false` doesn't mean that we should read values from the old row diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index c9bcbf8945adc2..1d1d6c9de79d24 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -407,6 +407,7 @@ class TabletSchema { _auto_increment_column = auto_increment_column; } std::string auto_increment_column() const { return _auto_increment_column; } + void set_table_id(int64_t table_id) { _table_id = table_id; } int64_t table_id() const { return _table_id; } void set_db_id(int64_t db_id) { _db_id = db_id; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index fc69c31f0e98f3..7e0ab33aa7c14d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -1181,4 +1181,10 @@ public GeneratedColumnInfo getGeneratedColumnInfo() { public Set getGeneratedColumnsThatReferToThis() { return generatedColumnsThatReferToThis; } + + public void setDefaultValueInfo(Column refColumn) { + this.defaultValue = refColumn.defaultValue; + this.defaultValueExprDef = refColumn.defaultValueExprDef; + this.realDefaultValue = refColumn.realDefaultValue; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 9728a9e41547bc..a104b65a178e6d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -1421,7 +1421,7 @@ public void setSequenceMapCol(String colName) { getOrCreatTableProperty().setSequenceMapCol(colName); } - public void setSequenceInfo(Type type) { + public void setSequenceInfo(Type type, Column refColumn) { this.hasSequenceCol = true; this.sequenceType = type; @@ -1435,6 +1435,9 @@ public void setSequenceInfo(Type type) { // unique key table sequenceCol = ColumnDef.newSequenceColumnDef(type, AggregateType.REPLACE).toColumn(); } + if (refColumn != null) { + sequenceCol.setDefaultValueInfo(refColumn); + } // add sequence column at last fullSchema.add(sequenceCol); nameToColumn.put(Column.SEQUENCE_COL, sequenceCol); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index 03c33a21e94434..6569477ab9e7d1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -2981,7 +2981,7 @@ private boolean createOlapTable(Database db, CreateTableStmt stmt) throws UserEx throw new DdlException("Sequence type only support integer types and date types"); } olapTable.setSequenceMapCol(col.getName()); - olapTable.setSequenceInfo(col.getType()); + olapTable.setSequenceInfo(col.getType(), col); } } catch (Exception e) { throw new DdlException(e.getMessage()); @@ -2995,7 +2995,7 @@ private boolean createOlapTable(Database db, CreateTableStmt stmt) throws UserEx throw new DdlException("The sequence_col and sequence_type cannot be set at the same time"); } if (sequenceColType != null) { - olapTable.setSequenceInfo(sequenceColType); + olapTable.setSequenceInfo(sequenceColType, null); } } catch (Exception e) { throw new DdlException(e.getMessage()); From 9e6d4f60653441b46df067e3f9659d5bb877bf5f Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Sat, 14 Sep 2024 14:09:30 +0800 Subject: [PATCH 7/8] add update case --- .../test_partial_update_seq_map_col.out | 96 +++++++++++++++---- .../test_partial_update_seq_map_col.groovy | 27 +++++- 2 files changed, 104 insertions(+), 19 deletions(-) diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out index b122549be24986..5bdfef021827ac 100644 --- a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out +++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.out @@ -1,9 +1,25 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql1 -- -1 1 -2 2 -3 3 -4 4 +1 1 \N +2 2 \N +3 3 \N +4 4 \N + +-- !sql1 -- +1 1 20 +2 2 20 +3 3 \N +4 4 \N + +-- !sql1 -- +1 1 20 +2 2 20 +3 3 \N +4 4 \N + +-- !sql1 -- +3 3 2099-09-10T12:00:00.977174 \N 2099-09-10T12:00:00.977174 +4 4 2099-09-10T12:00:00.977174 \N 2099-09-10T12:00:00.977174 -- !sql2 -- 1 1 @@ -31,10 +47,26 @@ 4 4 \N \N -- !sql1 -- -1 1 -2 2 -3 3 -4 4 +1 1 \N +2 2 \N +3 3 \N +4 4 \N + +-- !sql1 -- +1 1 20 +2 2 20 +3 3 \N +4 4 \N + +-- !sql1 -- +1 1 20 +2 2 20 +3 3 \N +4 4 \N + +-- !sql1 -- +3 3 2099-09-10T12:00:00.977174 \N 2099-09-10T12:00:00.977174 +4 4 2099-09-10T12:00:00.977174 \N 2099-09-10T12:00:00.977174 -- !sql2 -- 1 1 @@ -62,10 +94,26 @@ 4 4 \N \N -- !sql1 -- -1 1 -2 2 -3 3 -4 4 +1 1 \N +2 2 \N +3 3 \N +4 4 \N + +-- !sql1 -- +1 1 20 +2 2 20 +3 3 \N +4 4 \N + +-- !sql1 -- +1 1 20 +2 2 20 +3 3 \N +4 4 \N + +-- !sql1 -- +3 3 2099-09-10T12:00:00.977174 \N 2099-09-10T12:00:00.977174 +4 4 2099-09-10T12:00:00.977174 \N 2099-09-10T12:00:00.977174 -- !sql2 -- 1 1 @@ -93,10 +141,26 @@ 4 4 \N \N -- !sql1 -- -1 1 -2 2 -3 3 -4 4 +1 1 \N +2 2 \N +3 3 \N +4 4 \N + +-- !sql1 -- +1 1 20 +2 2 20 +3 3 \N +4 4 \N + +-- !sql1 -- +1 1 20 +2 2 20 +3 3 \N +4 4 \N + +-- !sql1 -- +3 3 2099-09-10T12:00:00.977174 \N 2099-09-10T12:00:00.977174 +4 4 2099-09-10T12:00:00.977174 \N 2099-09-10T12:00:00.977174 -- !sql2 -- 1 1 diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy index 01d3dee976b945..ced58f19c99f42 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_map_col.groovy @@ -27,7 +27,6 @@ suite("test_partial_update_seq_map_col", "p0") { } sql "set enable_insert_strict=false;" sql "set enable_unique_key_partial_update=true;" - sql "set show_hidden_columns=true;" sql "sync;" def tableName = "test_partial_update_seq_map_col1" @@ -36,6 +35,10 @@ suite("test_partial_update_seq_map_col", "p0") { `k` BIGINT NOT NULL, `c1` int, `c2` datetime(6) null default current_timestamp(6), + c3 int, + c4 int, + c5 int, + c6 int ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 PROPERTIES ( @@ -47,8 +50,26 @@ suite("test_partial_update_seq_map_col", "p0") { sql "insert into ${tableName}(k,c1) values(2,2);" sql "insert into ${tableName}(k,c1) values(3,3);" sql "insert into ${tableName}(k,c1) values(4,4);" - order_qt_sql1 "select k,c1 from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" - + order_qt_sql1 "select k,c1,c3 from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" + // update column which is not sequence map col + if (use_nereids) { + explain { + sql "update ${tableName} set c3=20 where c1<=2;" + contains "IS_PARTIAL_UPDATE: false" + } + } + sql "update ${tableName} set c3=20 where c1<=2;" + order_qt_sql1 "select k,c1,c3 from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" + // update sequence map col + if (use_nereids) { + explain { + sql "update ${tableName} set c2='2099-09-10 12:00:00.977174' where k>2;" + contains "IS_PARTIAL_UPDATE: false" + } + } + sql "update ${tableName} set c2='2099-09-10 12:00:00.977174' where k>2;" + order_qt_sql1 "select k,c1,c3 from ${tableName} where c2=__DORIS_SEQUENCE_COL__;" + order_qt_sql1 "select k,c1,c2,c3,__DORIS_SEQUENCE_COL__ from ${tableName} where c1>2;" tableName = "test_partial_update_seq_map_col2" sql """ DROP TABLE IF EXISTS ${tableName} """ From a0bada52fd9e9468b93b697084707f689a561d5a Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Wed, 18 Sep 2024 14:36:04 +0800 Subject: [PATCH 8/8] postprocess --- .../main/java/org/apache/doris/catalog/OlapTable.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index a104b65a178e6d..0b10fa1bdd21c5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -1865,6 +1865,17 @@ public void gsonPostProcess() throws IOException { if (isAutoBucket()) { defaultDistributionInfo.markAutoBucket(); } + if (isUniqKeyMergeOnWrite() && getSequenceMapCol() != null) { + // set the hidden sequence column's default value the same with + // the sequence map column's for partial update + String seqMapColName = getSequenceMapCol(); + Column seqMapCol = getBaseSchema().stream().filter(col -> col.getName().equalsIgnoreCase(seqMapColName)) + .findFirst().orElse(null); + Column hiddenSeqCol = getSequenceCol(); + if (seqMapCol != null && hiddenSeqCol != null) { + hiddenSeqCol.setDefaultValueInfo(seqMapCol); + } + } RangePartitionInfo tempRangeInfo = tempPartitions.getPartitionInfo(); if (tempRangeInfo != null) { for (long partitionId : tempRangeInfo.getIdToItem(false).keySet()) {