From edfecf81d0b28e48610f895e2c46e861cbe22b2d Mon Sep 17 00:00:00 2001 From: chenmingyu Date: Fri, 22 May 2020 00:05:55 +0800 Subject: [PATCH] [Optimize] Using sorted schema change processing to merge the data When reducing the key columns of an aggregate table by doing schema change or rollup, we should trigger a re-sort schema change process to re-sort and merge the data into new schema. So that the data in new schema will be more aggregated. --- be/src/olap/schema_change.cpp | 37 +++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 690fe89c29ee92..77916843a2c5f7 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1859,6 +1859,29 @@ OLAPStatus SchemaChangeHandler::_parse_request(TabletSharedPtr base_tablet, // 若Key列的引用序列出现乱序,则需要重排序 int num_default_value = 0; + // A, B, C are keys(sort keys), D is value + // The following cases are not changing the order, no need to resort: + // (sort keys keep in same order) + // old keys: A B C D + // new keys: A X B C D + // + // old keys: A B C D + // new keys: X A B C D + // + // old keys: A B C D + // new keys: A B C + // + // old keys: A B C D + // new keys: A B + // + // followings need resort: + // (sort keys' order is changed) + // old keys: A B C D + // new keys: B C A + // + // old keys: A B C D + // new keys: A C D + // for (int i = 0, new_schema_size = new_tablet->num_key_columns(); i < new_schema_size; ++i) { ColumnMapping* column_mapping = rb_changer->get_mutable_column_mapping(i); @@ -1874,14 +1897,24 @@ OLAPStatus SchemaChangeHandler::_parse_request(TabletSharedPtr base_tablet, } } + // goes here, the sort keys are keep in origin order. + const TabletSchema& ref_tablet_schema = base_tablet->tablet_schema(); + const TabletSchema& new_tablet_schema = new_tablet->tablet_schema(); + if (new_tablet_schema.keys_type() != KeysType::DUP_KEYS + && new_tablet->num_key_columns() < base_tablet->num_key_columns()) { + // this is a table with aggregate key type, and num of key columns in new schema + // is less, which means the data in new tablet should be more aggregated. + // so we use sorting schema change to sort and merge the data. + *sc_sorting = true; + return OLAP_SUCCESS; + } + if (base_tablet->num_short_key_columns() != new_tablet->num_short_key_columns()) { // the number of short_keys changed, can't do linked schema change *sc_directly = true; return OLAP_SUCCESS; } - const TabletSchema& ref_tablet_schema = base_tablet->tablet_schema(); - const TabletSchema& new_tablet_schema = new_tablet->tablet_schema(); for (size_t i = 0; i < new_tablet->num_columns(); ++i) { ColumnMapping* column_mapping = rb_changer->get_mutable_column_mapping(i); if (column_mapping->ref_column < 0) {