From d6590664b5b0835cd832d0acaa4ebbdf77bf5834 Mon Sep 17 00:00:00 2001 From: kangkaisen Date: Thu, 13 Dec 2018 17:21:35 +0800 Subject: [PATCH 1/2] Support add key column for LinkedSchemaChange --- be/src/olap/schema_change.cpp | 15 ++++++++++----- be/src/olap/schema_change.h | 8 +++++++- be/src/olap/segment_group.cpp | 36 +++++++++++++++++++++++++++-------- be/src/olap/segment_group.h | 6 +++++- 4 files changed, 50 insertions(+), 15 deletions(-) diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index ad36cdba2c1e40..a5c2a2c120c52b 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -644,9 +644,11 @@ bool RowBlockMerger::_pop_heap() { } LinkedSchemaChange::LinkedSchemaChange( - OLAPTablePtr base_olap_table, OLAPTablePtr new_olap_table) : + OLAPTablePtr base_olap_table, OLAPTablePtr new_olap_table, + const RowBlockChanger& row_block_changer) : _base_olap_table(base_olap_table), - _new_olap_table(new_olap_table) {} + _new_olap_table(new_olap_table), + _row_block_changer(row_block_changer) {} SchemaChangeDirectly::SchemaChangeDirectly( OLAPTablePtr olap_table, @@ -709,7 +711,8 @@ bool LinkedSchemaChange::process(ColumnData* olap_data, SegmentGroup* new_segmen new_segment_group->set_empty(olap_data->empty()); new_segment_group->set_num_segments(olap_data->segment_group()->num_segments()); - new_segment_group->add_column_statistics_for_linked_schema_change(olap_data->segment_group()->get_column_statistics()); + new_segment_group->add_column_statistics_for_linked_schema_change(olap_data->segment_group()->get_column_statistics(), + _row_block_changer.get__schema_mapping() ); if (OLAP_SUCCESS != new_segment_group->load()) { OLAP_LOG_WARNING("fail to reload index. [table='%s' version='%d-%d']", @@ -1780,7 +1783,8 @@ OLAPStatus SchemaChangeHandler::schema_version_convert( LOG(INFO) << "doing linked schema change."; sc_procedure = new(nothrow) LinkedSchemaChange( src_olap_table, - dest_olap_table); + dest_olap_table, + rb_changer); } if (NULL == sc_procedure) { @@ -1998,7 +2002,8 @@ OLAPStatus SchemaChangeHandler::_alter_table(SchemaChangeParams* sc_params) { LOG(INFO) << "doing linked schema change."; sc_procedure = new(nothrow) LinkedSchemaChange( sc_params->ref_olap_table, - sc_params->new_olap_table); + sc_params->new_olap_table, + rb_changer); } if (NULL == sc_procedure) { diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h index 84cafa6f79a2a0..1f718a2cad7db6 100644 --- a/be/src/olap/schema_change.h +++ b/be/src/olap/schema_change.h @@ -66,6 +66,10 @@ class RowBlockChanger { virtual ~RowBlockChanger(); ColumnMapping* get_mutable_column_mapping(size_t column_index); + + SchemaMapping get__schema_mapping() const { + return _schema_mapping; + } bool change_row_block( const DataFileType df_type, @@ -192,13 +196,15 @@ class LinkedSchemaChange : public SchemaChange { public: explicit LinkedSchemaChange( OLAPTablePtr base_olap_table, - OLAPTablePtr new_olap_table); + OLAPTablePtr new_olap_table, + const RowBlockChanger& row_block_changer); ~LinkedSchemaChange() {} bool process(ColumnData* olap_data, SegmentGroup* new_segment_group); private: OLAPTablePtr _base_olap_table; OLAPTablePtr _new_olap_table; + const RowBlockChanger& _row_block_changer; DISALLOW_COPY_AND_ASSIGN(LinkedSchemaChange); }; diff --git a/be/src/olap/segment_group.cpp b/be/src/olap/segment_group.cpp index a5c24c0fd7fca7..86bbdf9ade2c16 100644 --- a/be/src/olap/segment_group.cpp +++ b/be/src/olap/segment_group.cpp @@ -28,6 +28,7 @@ #include "olap/row_cursor.h" #include "olap/utils.h" #include "olap/wrapper_field.h" +#include "olap/schema_change.h" using std::ifstream; using std::string; @@ -198,24 +199,43 @@ void SegmentGroup::delete_all_files() { } } + OLAPStatus SegmentGroup::add_column_statistics_for_linked_schema_change( - const std::vector>& column_statistic_fields) { + const std::vector>& column_statistic_fields, + const SchemaMapping& schema_mapping) { //When add rollup table, the base table index maybe empty if (column_statistic_fields.size() == 0) { return OLAP_SUCCESS; } - //Should use _table->num_key_fields(), not column_statistic_fields.size() - //as rollup table num_key_fields will less than base table column_statistic_fields.size(). - //For LinkedSchemaChange, the rollup table keys order is the same as base table + //1 for LinkedSchemaChange, the rollup table keys order is the same as base table + //2 when user add a new key column to base table, _table->num_key_fields() size will + // greater than _column_statistics size + int num_new_keys = 0; for (size_t i = 0; i < _table->num_key_fields(); ++i) { - WrapperField* first = WrapperField::create(_table->tablet_schema()[i]); + const FieldInfo& column_schema = _table->tablet_schema()[i]; + + WrapperField* first = WrapperField::create(column_schema); DCHECK(first != NULL) << "failed to allocate memory for field: " << i; - first->copy(column_statistic_fields[i].first); - WrapperField* second = WrapperField::create(_table->tablet_schema()[i]); + WrapperField* second = WrapperField::create(column_schema); DCHECK(second != NULL) << "failed to allocate memory for field: " << i; - second->copy(column_statistic_fields[i].second); + + //for new key column, use default value to fill into column_statistics + if (schema_mapping[i].ref_column == -1) { + num_new_keys++; + + if (true == column_schema.is_allow_null && column_schema.default_value.length() == 0) { + first->set_null(); + second->set_null(); + } else { + first->from_string(column_schema.default_value); + second->from_string(column_schema.default_value); + } + } else { + first->copy(column_statistic_fields[i - num_new_keys].first); + second->copy(column_statistic_fields[i - num_new_keys].second); + } _column_statistics.push_back(std::make_pair(first, second)); } diff --git a/be/src/olap/segment_group.h b/be/src/olap/segment_group.h index 19d17af4f10f16..d24316d5f8a995 100644 --- a/be/src/olap/segment_group.h +++ b/be/src/olap/segment_group.h @@ -38,6 +38,7 @@ #include "olap/utils.h" namespace doris { +class ColumnMapping; // Class for segments management // For fast key lookup, we maintain a sparse index for every data file. The @@ -47,6 +48,8 @@ namespace doris { class SegmentGroup { friend class MemIndex; public: + typedef std::vector SchemaMapping; + SegmentGroup(OLAPTable* table, Version version, VersionHash version_hash, bool delete_flag, int segment_group_id, int32_t num_segments); @@ -66,7 +69,8 @@ class SegmentGroup { } OLAPStatus add_column_statistics_for_linked_schema_change( - const std::vector>& column_statistic_fields); + const std::vector>& column_statistic_fields, + const SchemaMapping& schema_mapping); OLAPStatus add_column_statistics( const std::vector>& column_statistic_fields); From f10a77fb62619b10f03f587fd96b1411b32266fc Mon Sep 17 00:00:00 2001 From: kangkaisen Date: Fri, 14 Dec 2018 16:56:02 +0800 Subject: [PATCH 2/2] Move ColumnMapping to single file --- be/src/olap/column_mapping.h | 37 +++++++++++++++++++++++++++++++++++ be/src/olap/schema_change.h | 11 ----------- be/src/olap/segment_group.cpp | 12 +++--------- be/src/olap/segment_group.h | 2 +- 4 files changed, 41 insertions(+), 21 deletions(-) create mode 100644 be/src/olap/column_mapping.h diff --git a/be/src/olap/column_mapping.h b/be/src/olap/column_mapping.h new file mode 100644 index 00000000000000..b5ec1876f57ec4 --- /dev/null +++ b/be/src/olap/column_mapping.h @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_COLUMN_MAPPING_H +#define DORIS_BE_SRC_OLAP_COLUMN_MAPPING_H + +#include "olap/wrapper_field.h" + +namespace doris { + +struct ColumnMapping { + ColumnMapping() : ref_column(-1), default_value(NULL) {} + virtual ~ColumnMapping() {} + + // <0: use default value + // >=0: use origin column + int32_t ref_column; + // normally for default value. stores values for filters + WrapperField* default_value; +}; + +} // namespace doris +#endif // DORIS_BE_SRC_COLUMN_MAPPING_H \ No newline at end of file diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h index 1f718a2cad7db6..87b15fb70fa313 100644 --- a/be/src/olap/schema_change.h +++ b/be/src/olap/schema_change.h @@ -41,17 +41,6 @@ class RowCursor; // defined in 'writer.h' class ColumnDataWriter; -struct ColumnMapping { - ColumnMapping() : ref_column(-1), default_value(NULL) {} - virtual ~ColumnMapping() {} - - // <0: use default value - // >=0: use origin column - int32_t ref_column; - // normally for default value. stores values for filters - WrapperField* default_value; -}; - class RowBlockChanger { public: typedef std::vector SchemaMapping; diff --git a/be/src/olap/segment_group.cpp b/be/src/olap/segment_group.cpp index 86bbdf9ade2c16..fdabc9bc2fd705 100644 --- a/be/src/olap/segment_group.cpp +++ b/be/src/olap/segment_group.cpp @@ -27,8 +27,7 @@ #include "olap/row_block.h" #include "olap/row_cursor.h" #include "olap/utils.h" -#include "olap/wrapper_field.h" -#include "olap/schema_change.h" +#include "olap/column_mapping.h" using std::ifstream; using std::string; @@ -225,13 +224,8 @@ OLAPStatus SegmentGroup::add_column_statistics_for_linked_schema_change( if (schema_mapping[i].ref_column == -1) { num_new_keys++; - if (true == column_schema.is_allow_null && column_schema.default_value.length() == 0) { - first->set_null(); - second->set_null(); - } else { - first->from_string(column_schema.default_value); - second->from_string(column_schema.default_value); - } + first->copy(schema_mapping[i].default_value); + second->copy(schema_mapping[i].default_value); } else { first->copy(column_statistic_fields[i - num_new_keys].first); second->copy(column_statistic_fields[i - num_new_keys].second); diff --git a/be/src/olap/segment_group.h b/be/src/olap/segment_group.h index d24316d5f8a995..ef9f641418cad7 100644 --- a/be/src/olap/segment_group.h +++ b/be/src/olap/segment_group.h @@ -36,9 +36,9 @@ #include "olap/row_cursor.h" #include "olap/olap_index.h" #include "olap/utils.h" +#include "olap/column_mapping.h" namespace doris { -class ColumnMapping; // Class for segments management // For fast key lookup, we maintain a sparse index for every data file. The