From edfecf81d0b28e48610f895e2c46e861cbe22b2d Mon Sep 17 00:00:00 2001
From: chenmingyu <chenmingyu@baidu.com>
Date: Fri, 22 May 2020 00:05:55 +0800
Subject: [PATCH] [Optimize] Using sorted schema change processing to merge the
 data

When reducing the key columns of an aggregate table by doing schema change
or rollup, we should trigger a re-sort schema change process to re-sort
and merge the data into new schema. So that the data in new schema will
be more aggregated.
---
 be/src/olap/schema_change.cpp | 37 +++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 690fe89c29ee92..77916843a2c5f7 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -1859,6 +1859,29 @@ OLAPStatus SchemaChangeHandler::_parse_request(TabletSharedPtr base_tablet,
     // 若Key列的引用序列出现乱序，则需要重排序
     int num_default_value = 0;
 
+    // A, B, C are keys(sort keys), D is value
+    // The following cases are not changing the order, no need to resort:
+    // (sort keys keep in same order)
+    //      old keys:    A   B   C   D
+    //      new keys:    A   X   B   C   D
+    //
+    //      old keys:    A   B   C   D
+    //      new keys:    X   A   B   C   D
+    //
+    //      old keys:    A   B   C   D
+    //      new keys:    A   B   C
+    //
+    //      old keys:    A   B   C   D
+    //      new keys:    A   B
+    //
+    // followings need resort:
+    // (sort keys' order is changed)
+    //      old keys:    A   B   C   D
+    //      new keys:    B   C   A
+    //
+    //      old keys:    A   B   C   D
+    //      new keys:    A   C   D
+    //
     for (int i = 0, new_schema_size = new_tablet->num_key_columns();
             i < new_schema_size; ++i) {
         ColumnMapping* column_mapping = rb_changer->get_mutable_column_mapping(i);
@@ -1874,14 +1897,24 @@ OLAPStatus SchemaChangeHandler::_parse_request(TabletSharedPtr base_tablet,
         }
     }
 
+    // goes here, the sort keys are keep in origin order.
+    const TabletSchema& ref_tablet_schema = base_tablet->tablet_schema();
+    const TabletSchema& new_tablet_schema = new_tablet->tablet_schema();
+    if (new_tablet_schema.keys_type() != KeysType::DUP_KEYS
+            && new_tablet->num_key_columns() < base_tablet->num_key_columns()) {
+        // this is a table with aggregate key type, and num of key columns in new schema
+        // is less, which means the data in new tablet should be more aggregated.
+        // so we use sorting schema change to sort and merge the data.
+        *sc_sorting = true;
+        return OLAP_SUCCESS;
+    }
+
     if (base_tablet->num_short_key_columns() != new_tablet->num_short_key_columns()) {
         // the number of short_keys changed, can't do linked schema change
         *sc_directly = true;
         return OLAP_SUCCESS;
     }
 
-    const TabletSchema& ref_tablet_schema = base_tablet->tablet_schema();
-    const TabletSchema& new_tablet_schema = new_tablet->tablet_schema();
     for (size_t i = 0; i < new_tablet->num_columns(); ++i) {
         ColumnMapping* column_mapping = rb_changer->get_mutable_column_mapping(i);
         if (column_mapping->ref_column < 0) {