From 7e55acbff77bf309298faecac51826c8d9f7cea1 Mon Sep 17 00:00:00 2001
From: xy720 <xuyang25@baidu.com>
Date: Sun, 26 Feb 2023 19:23:32 +0800
Subject: [PATCH 1/8] save

---
 be/src/vec/data_types/data_type_map.cpp       |   8 +-
 be/src/vec/data_types/data_type_struct.cpp    | 209 ++++++++++++++----
 .../load_p0/stream_load/struct_malformat.csv  |   5 +
 .../load_p0/stream_load/struct_normal.csv     |  13 ++
 .../load_p0/stream_load/test_stream_load.out  |  22 ++
 .../stream_load/test_stream_load.groovy       | 104 +++++++++
 6 files changed, 316 insertions(+), 45 deletions(-)
 create mode 100644 regression-test/data/load_p0/stream_load/struct_malformat.csv
 create mode 100644 regression-test/data/load_p0/stream_load/struct_normal.csv

diff --git a/be/src/vec/data_types/data_type_map.cpp b/be/src/vec/data_types/data_type_map.cpp
index 4895a48c4c4de4..edab0867c15fcd 100644
--- a/be/src/vec/data_types/data_type_map.cpp
+++ b/be/src/vec/data_types/data_type_map.cpp
@@ -156,12 +156,12 @@ Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* column) const {
     auto* map_column = assert_cast<ColumnMap*>(column);
 
     if (*rb.position() != '{') {
-        return Status::InvalidArgument("map does not start with '{' character, found '{}'",
-                                       *rb.position());
+        return Status::InvalidArgument("map does not start with '{}' character, found '{}'",
+                                       "{", *rb.position());
     }
     if (*(rb.end() - 1) != '}') {
-        return Status::InvalidArgument("map does not end with '}' character, found '{}'",
-                                       *(rb.end() - 1));
+        return Status::InvalidArgument("map does not end with '{}' character, found '{}'",
+                                       "}", *(rb.end() - 1));
     }
 
     if (rb.count() == 2) {
diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
index 886ac5342ce150..0d4a8f2de490c8 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -55,8 +55,6 @@ DataTypeStruct::DataTypeStruct(const DataTypes& elems_, const Strings& names_)
     }
 
     Status st = check_tuple_names(names);
-    //if (!st.ok()) {
-    //}
 }
 
 std::string DataTypeStruct::do_get_name() const {
@@ -68,11 +66,6 @@ std::string DataTypeStruct::do_get_name() const {
         if (i != 0) {
             s << ", ";
         }
-
-        // if (have_explicit_names) {
-        //     s << back_quote_if_need(names[i]) << ' ';
-        // }
-
         s << elems[i]->get_name();
     }
     s << ")";
@@ -80,17 +73,85 @@ std::string DataTypeStruct::do_get_name() const {
     return s.str();
 }
 
-Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
+bool next_slot_from_string(ReadBuffer &rb, StringRef &output, bool &is_name, bool &has_quota) {
+    StringRef element(rb.position(), 0);
+    has_quota = false;
+    is_name = false;
+    if (rb.eof()) {
+        return false;
+    }
+
+    // ltrim
+    while (!rb.eof() && isspace(*rb.position())) {
+        ++rb.position();
+        element.data = rb.position();
+    }
+
+    // parse string
+    if (*rb.position() == '"' || *rb.position() == '\'') {
+        const char str_sep = *rb.position();
+        size_t str_len = 1;
+        // search until next '"' or '\''
+        while (str_len < rb.count() && *(rb.position() + str_len) != str_sep) {
+            ++str_len;
+        }
+        // invalid string
+        if (str_len >= rb.count()) {
+            rb.position() = rb.end();
+            return false;
+        }
+        has_quota = true;
+        rb.position() += str_len + 1;
+        element.size += str_len + 1;
+    }
+
+    // parse element until separator ':' or ',' or end '}'
+    while (!rb.eof() && (*rb.position() != ':') && (*rb.position() != ',') &&
+           (rb.count() != 1 || *rb.position() != '}')) {
+        if (has_quota && !isspace(*rb.position())) {
+            return false;
+        }
+        ++rb.position();
+        ++element.size;
+    }
+    // invalid element
+    if (rb.eof()) {
+        return false;
+    }
+
+    if (*rb.position() == ':') {
+        is_name = true;
+    }
+
+    // adjust read buffer position to first char of next element
+    ++rb.position();
+
+    // rtrim
+    while (element.size > 0 && isspace(element.data[element.size - 1])) {
+        --element.size;
+    }
+
+    // trim '"' and '\'' for string
+    if (element.size >= 2 && (element.data[0] == '"' || element.data[0] == '\'') &&
+        element.data[0] == element.data[element.size - 1]) {
+        ++element.data;
+        element.size -= 2;
+    }
+    output = element;
+    return true;
+}
+
+Status DataTypeStruct::from_string(ReadBuffer &rb, IColumn *column) const {
     DCHECK(!rb.eof());
-    auto* struct_column = assert_cast<ColumnStruct*>(column);
+    auto *struct_column = assert_cast<ColumnStruct *>(column);
 
     if (*rb.position() != '{') {
-        return Status::InvalidArgument("Struct does not start with '{' character, found '{}'",
-                                       *rb.position());
+        return Status::InvalidArgument("Struct does not start with '{}' character, found '{}'",
+                                       "{", *rb.position());
     }
-    if (rb.count() < 2 || *(rb.end() - 1) != '}') {
-        return Status::InvalidArgument("Struct does not end with '}' character, found '{}'",
-                                       *(rb.end() - 1));
+    if (*(rb.end() - 1) != '}') {
+        return Status::InvalidArgument("Struct does not end with '{}' character, found '{}'",
+                                       "}", *(rb.end() - 1));
     }
 
     // here need handle the empty struct '{}'
@@ -99,43 +160,109 @@ Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
     }
 
     ++rb.position();
-    std::vector<ReadBuffer> field_rbs;
-    field_rbs.reserve(elems.size());
 
-    // here get the value "jack" and 20 from {"name":"jack","age":20}
+    bool is_explicit_names = false;
+    std::vector <std::string> field_names;
+    std::vector <ReadBuffer> field_rbs;
+    std::vector <size_t> field_pos;
+
     while (!rb.eof()) {
-        size_t field_len = 0;
-        auto start = rb.position();
-        while (!rb.eof() && *start != ',' && *start != '}') {
-            field_len++;
-            start++;
+        StringRef slot(rb.position(), rb.count());
+        bool has_quota = false;
+        bool is_name = false;
+        if (!next_slot_from_string(rb, slot, is_name, has_quota)) {
+            LOG(INFO) << "Cannot read struct field from text";
+            return Status::InvalidArgument("Cannot read struct field from text '{}'",
+                                           slot.to_string());
         }
-        if (field_len >= rb.count()) {
-            return Status::InvalidArgument("Invalid Length");
+        LOG(INFO) << "DataTypeStruct::from_string:slot.to_string():" << slot.to_string();
+        LOG(INFO) << "DataTypeStruct::from_string:is_name:" << is_name;
+        if (is_name) {
+            std::string name = slot.to_string();
+            if (!next_slot_from_string(rb, slot, is_name, has_quota)) {
+                LOG(INFO) << "Cannot read struct field from text";
+                return Status::InvalidArgument("Cannot read struct field from text '{}'",
+                                               slot.to_string());
+            }
+            LOG(INFO) << "DataTypeStruct::from_string:slot.to_string():" << slot.to_string();
+            LOG(INFO) << "DataTypeStruct::from_string:is_name:" << is_name;
+            ReadBuffer field_rb(const_cast<char *>(slot.data), slot.size);
+            field_names.push_back(name);
+            field_rbs.push_back(field_rb);
+
+            if (!is_explicit_names) {
+                is_explicit_names = true;
+            }
+        } else {
+            ReadBuffer field_rb(const_cast<char *>(slot.data), slot.size);
+            field_rbs.push_back(field_rb);
         }
-        ReadBuffer field_rb(rb.position(), field_len);
+    }
 
-        size_t len = 0;
-        auto start_rb = field_rb.position();
-        while (!field_rb.eof() && *start_rb != ':') {
-            len++;
-            start_rb++;
-        }
-        ReadBuffer field(field_rb.position() + len + 1, field_rb.count() - len - 1);
+    // TODO: should we support insert default field value when actual field number is less than
+    // schema field number?
+    if (field_rbs.size() != elems.size()) {
+        std::string cmp_str =
+                field_rbs.size() > elems.size() ? "more" : "less";
+        LOG(INFO) << "Actual struct field number {} is {} than schema field number {}.";
+        return Status::InvalidArgument("Actual struct field number {} is {} than schema field number {}.",
+                                       field_rbs.size(), cmp_str, elems.size());
+    }
 
-        if (field.count() >= 2 && ((*field.position() == '"' && *(field.end() - 1) == '"') ||
-                                   (*field.position() == '\'' && *(field.end() - 1) == '\''))) {
-            ReadBuffer field_no_quote(field.position() + 1, field.count() - 2);
-            field_rbs.push_back(field_no_quote);
-        } else {
-            field_rbs.push_back(field);
+    if (is_explicit_names) {
+        if (field_names.size() != field_rbs.size()) {
+            LOG(INFO) << "Struct field name number {} is not equal to field number {}.";
+            return Status::InvalidArgument("Struct field name number {} is not equal to field number {}.",
+                                           field_names.size(), field_rbs.size());
+        }
+        std::unordered_set <std::string> name_set;
+        for (size_t i = 0; i < field_names.size(); i++) {
+            // check duplicate fields
+            auto ret = name_set.insert(field_names[i]);
+            if (!ret.second) {
+                LOG(INFO) << "Struct field name {} is duplicate with others.";
+                return Status::InvalidArgument("Struct field name {} is duplicate with others.", field_names[i]);
+            }
+            // check name valid
+            auto idx = try_get_position_by_name(field_names[i]);
+            if (idx == std::nullopt) {
+                LOG(INFO) << "Cannot find struct field name {} in schema.";
+                return Status::InvalidArgument("Cannot find struct field name {} in schema.", field_names[i]);
+            }
+            field_pos.push_back(idx.value());
+        }
+    } else {
+        for (size_t i = 0; i < field_rbs.size(); i++) {
+            field_pos.push_back(i);
         }
-
-        rb.position() += field_len + 1;
     }
 
     for (size_t idx = 0; idx < elems.size(); idx++) {
-        elems[idx]->from_string(field_rbs[idx], &struct_column->get_column(idx));
+        auto field_rb = field_rbs[field_pos[idx]];
+        LOG(INFO) << "DataTypeStruct::from_string:field_rb.count():" << field_rbs[field_pos[idx]].count();
+        LOG(INFO) << "DataTypeStruct::from_string:field_rb.to_string():" << field_rbs[field_pos[idx]].to_string();
+        // handle empty element
+        if (field_rb.count() == 0) {
+            auto &nested_null_col = reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
+            nested_null_col.get_nested_column().insert_default();
+            nested_null_col.get_null_map_data().push_back(0);
+            continue;
+        }
+        // handle null element
+        if (field_rb.count() == 4 && strncmp(field_rb.position(), "null", 4) == 0) {
+            auto &nested_null_col = reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
+            nested_null_col.get_nested_column().insert_default();
+            nested_null_col.get_null_map_data().push_back(1);
+            continue;
+        }
+        auto st = elems[idx]->from_string(field_rb, &struct_column->get_column(idx));
+        if (!st.ok()) {
+            // we should do column revert if error
+            for (size_t j = 0; j < idx; j++) {
+                struct_column->get_column(j).pop_back(1);
+            }
+            return st;
+        }
     }
 
     return Status::OK();
diff --git a/regression-test/data/load_p0/stream_load/struct_malformat.csv b/regression-test/data/load_p0/stream_load/struct_malformat.csv
new file mode 100644
index 00000000000000..8af8629e9e6757
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/struct_malformat.csv
@@ -0,0 +1,5 @@
+1|{"f1":1, "f2":100, "f3":100000, "f4":'a', "f5":"doris", "f6":"2023-02-26", "f7":"2023-02-26 17:58", "f8":1.01, "f9":3.1415926, "f10":1.1}
+2|{"f1":1, "f2":100, "f3":100000, "f4":'a', "f5":"doris", "f6":"2023-02-26", "f7":null, "f8":null, "f9":null, "f10":1.1}
+3|\N
+4|"f1":1, "f2":100, "f3":100000, "f4":'a', "f5":"doris", "f6":"2023-02-26", "f7":null, "f8":null, "f9":null, "f10":1.1
+5|{f1:1, f2:100, f3:100000, f4:'a', f5:"doris", f6:"2023-02-26", f7:"2023-02-26 17:58", f8:1.01, f9:3.1415926, f10:1.1
diff --git a/regression-test/data/load_p0/stream_load/struct_normal.csv b/regression-test/data/load_p0/stream_load/struct_normal.csv
new file mode 100644
index 00000000000000..fe82889afddc83
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/struct_normal.csv
@@ -0,0 +1,13 @@
+1|{"f1":1, "f2":100, "f3":100000, "f4":'a', "f5":"doris", "f6":"2023-02-26", "f7":"2023-02-26 17:58", "f8":1.01, "f9":3.1415926, "f10":1.1}
+2|{"f1":1, "f2":100, "f3":100000, "f4":'a', "f5":"doris", "f6":"2023-02-26", "f7":null, "f8":null, "f9":null, "f10":1.1}
+3|{'f1':1, 'f2':100, 'f3':100000, 'f4':'a', 'f5':"doris", 'f6':"2023-02-26", 'f7':null, 'f8':null, 'f9':null, 'f10':1.1}
+4|{f1:1, f2:100, f3:100000, f4:'a', f5:"doris", f6:"2023-02-26", f7:"2023-02-26 17:58", f8:1.01, f9:3.1415926, f10:1.1}
+5|{f1: 1, f2: 100, f3: 100000, f4: a, f5: doris, f6: 2023-02-26, f7: "2023-02-26 17:58", f8: 1.01, f9: 3.1415926, f10: 1.1}
+6|{"f10":1.1, "f9":3.1415926, "f8":1.01, "f7":"2023-02-26 17:58", "f6":"2023-02-26", "f5":"doris", "f4":'a', "f3":100000, "f2":100, "f1":1}
+7|{f10:1.1, f9:3.1415926, f8:1.01, f7:"2023-02-26 17:58", f6:2023-02-26, f5:doris, f4:a, f3:100000, f2:100, f1:1}
+8|{f10:1.1, f9:3.1415926, f8:1.01, f7:"2023-02-26 17:58", f6:2023-02-26, f5:doris, f4:null, f3:null, f2:null, f1:1}
+9|{"f1":null, "f2":null, "f3":null, "f4":null, "f5":null, "f6":null, "f7":null, "f8":null, "f9":null, "f10":null}
+10|{1, 100, 100000, 'a', "doris", "2023-02-26", "2023-02-26 17:58", 1.01, 3.1415926, 1.1}
+11|{1, 100, 100000, 'a', "doris", "2023-02-26", null, null, null, 1.1}
+12|{null, null, null, null, null, null, null, null, null, null}
+13|\N
diff --git a/regression-test/data/load_p0/stream_load/test_stream_load.out b/regression-test/data/load_p0/stream_load/test_stream_load.out
index 72bc763d81f26b..3e4ccd3fc5aa8f 100644
--- a/regression-test/data/load_p0/stream_load/test_stream_load.out
+++ b/regression-test/data/load_p0/stream_load/test_stream_load.out
@@ -69,6 +69,28 @@
 7	[1, 2, 3, 4, 5]	\N	\N	\N	\N	\N	\N	\N	\N	\N
 8	[1, 2, 3, 4, 5]	\N	\N	\N	\N	\N	[NULL]	\N	[NULL]	\N
 
+-- !all111 --
+1	{1, 100, 100000, 'a', 'doris', 2023-02-26, 2023-02-26 17:58:00, 1.01, 3.1415926, 1.1}
+2	{1, 100, 100000, 'a', 'doris', 2023-02-26, NULL, NULL, NULL, 1.1}
+3	\N
+4	\N
+5	\N
+
+-- !all112 --
+1	{1, 100, 100000, 'a', 'doris', 2023-02-26, 2023-02-26 17:58:00, 1.01, 3.1415926, 1.1}
+2	{1, 100, 100000, 'a', 'doris', 2023-02-26, NULL, NULL, NULL, 1.1}
+3	{1, 100, 100000, 'a', 'doris', 2023-02-26, NULL, NULL, NULL, 1.1}
+4	{1, 100, 100000, 'a', 'doris', 2023-02-26, 2023-02-26 17:58:00, 1.01, 3.1415926, 1.1}
+5	{1, 100, 100000, 'a', 'doris', 2023-02-26, 2023-02-26 17:58:00, 1.01, 3.1415926, 1.1}
+6	{1, 100, 100000, 'a', 'doris', 2023-02-26, 2023-02-26 17:58:00, 1.01, 3.1415926, 1.1}
+7	{1, 100, 100000, 'a', 'doris', 2023-02-26, 2023-02-26 17:58:00, 1.01, 3.1415926, 1.1}
+8	{1, NULL, NULL, NULL, 'doris', 2023-02-26, 2023-02-26 17:58:00, 1.01, 3.1415926, 1.1}
+9	{NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}
+10	{1, 100, 100000, 'a', 'doris', 2023-02-26, 2023-02-26 17:58:00, 1.01, 3.1415926, 1.1}
+11	{1, 100, 100000, 'a', 'doris', 2023-02-26, NULL, NULL, NULL, 1.1}
+12	{NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}
+13	\N
+
 -- !sql1 --
 -2	-50	1	\N	44
 2	-51	1	2	\N
diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
index a04d25622ef402..351bab3afd0a5c 100644
--- a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
@@ -186,12 +186,14 @@ suite("test_stream_load", "p0") {
     def tableName6 = "test_unique_key"
     def tableName7 = "test_unique_key_with_delete"
     def tableName8 = "test_array"
+    def tableName10 = "test_struct"
     sql """ DROP TABLE IF EXISTS ${tableName3} """
     sql """ DROP TABLE IF EXISTS ${tableName4} """
     sql """ DROP TABLE IF EXISTS ${tableName5} """
     sql """ DROP TABLE IF EXISTS ${tableName6} """
     sql """ DROP TABLE IF EXISTS ${tableName7} """
     sql """ DROP TABLE IF EXISTS ${tableName8} """
+    sql """ DROP TABLE IF EXISTS ${tableName10} """
     sql """
     CREATE TABLE IF NOT EXISTS ${tableName3} (
       `k1` int(11) NULL,
@@ -287,6 +289,28 @@ suite("test_stream_load", "p0") {
     "replication_allocation" = "tag.location.default: 1"
     );
     """
+    sql """ADMIN SET FRONTEND CONFIG ('enable_struct_type' = 'true');"""
+    sql """
+    CREATE TABLE IF NOT EXISTS ${tableName10} (
+      `k1` INT(11) NULL COMMENT "",
+      `k2` STRUCT<
+               f1:SMALLINT,
+               f2:INT(11),
+               f3:BIGINT,
+               f4:CHAR,
+               f5:VARCHAR(20),
+               f6:DATE,
+               f7:DATETIME,
+               f8:FLOAT,
+               f9:DOUBLE,
+               f10:DECIMAL(20, 6)> NULL COMMENT ""
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`k1`)
+    DISTRIBUTED BY HASH(`k1`) BUCKETS 3
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
 
     // load all columns
     streamLoad {
@@ -673,6 +697,86 @@ suite("test_stream_load", "p0") {
     }
     sql "sync"
 
+    // ===== test struct stream load
+    // malformat without strictmode
+    streamLoad {
+        table "${tableName10}"
+
+        set 'column_separator', '|'
+
+        file 'struct_malformat.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(5, json.NumberTotalRows)
+            assertEquals(5, json.NumberLoadedRows)
+            assertEquals(0, json.NumberFilteredRows)
+            assertEquals(0, json.NumberUnselectedRows)
+        }
+    }
+    sql "sync"
+    qt_all111 "SELECT * from ${tableName10} order by k1" // 5
+    sql """truncate table ${tableName10}"""
+    sql """sync"""
+
+    // malformat with strictmode
+    streamLoad {
+        table "${tableName10}"
+
+        set 'column_separator', '|'
+        set 'strict_mode', 'true'
+
+        file 'struct_malformat.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("fail", json.Status.toLowerCase())
+            assertEquals(5, json.NumberTotalRows)
+            assertEquals(3, json.NumberLoadedRows)
+            assertEquals(2, json.NumberFilteredRows)
+            assertEquals(0, json.NumberUnselectedRows)
+        }
+    }
+    sql "sync"
+
+    // normal load
+    streamLoad {
+        table "${tableName10}"
+
+        set 'column_separator', '|'
+
+        file 'struct_normal.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(13, json.NumberTotalRows)
+            assertEquals(13, json.NumberLoadedRows)
+            assertEquals(0, json.NumberFilteredRows)
+            assertEquals(0, json.NumberUnselectedRows)
+        }
+    }
+    sql "sync"
+    qt_all112 "SELECT * from ${tableName10} order by k1" // 10
+    sql """truncate table ${tableName10}"""
+    sql """sync"""
+
     // test immutable partition success
     def tableName9 = "test_immutable_partition"
     sql """ DROP TABLE IF EXISTS ${tableName9} """

From 1d7ab87de2cf79bc8cc60cce6851b212d4aab29e Mon Sep 17 00:00:00 2001
From: xy720 <xuyang25@baidu.com>
Date: Sun, 26 Feb 2023 19:34:29 +0800
Subject: [PATCH 2/8] format

---
 be/src/vec/data_types/data_type_map.cpp    |  8 +--
 be/src/vec/data_types/data_type_struct.cpp | 58 ++++++++++------------
 2 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/be/src/vec/data_types/data_type_map.cpp b/be/src/vec/data_types/data_type_map.cpp
index edab0867c15fcd..5c362f5b904a7d 100644
--- a/be/src/vec/data_types/data_type_map.cpp
+++ b/be/src/vec/data_types/data_type_map.cpp
@@ -156,12 +156,12 @@ Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* column) const {
     auto* map_column = assert_cast<ColumnMap*>(column);
 
     if (*rb.position() != '{') {
-        return Status::InvalidArgument("map does not start with '{}' character, found '{}'",
-                                       "{", *rb.position());
+        return Status::InvalidArgument("map does not start with '{}' character, found '{}'", "{",
+                                       *rb.position());
     }
     if (*(rb.end() - 1) != '}') {
-        return Status::InvalidArgument("map does not end with '{}' character, found '{}'",
-                                       "}", *(rb.end() - 1));
+        return Status::InvalidArgument("map does not end with '{}' character, found '{}'", "}",
+                                       *(rb.end() - 1));
     }
 
     if (rb.count() == 2) {
diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
index 0d4a8f2de490c8..68b39c59beca58 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -73,7 +73,7 @@ std::string DataTypeStruct::do_get_name() const {
     return s.str();
 }
 
-bool next_slot_from_string(ReadBuffer &rb, StringRef &output, bool &is_name, bool &has_quota) {
+bool next_slot_from_string(ReadBuffer& rb, StringRef& output, bool& is_name, bool& has_quota) {
     StringRef element(rb.position(), 0);
     has_quota = false;
     is_name = false;
@@ -141,17 +141,17 @@ bool next_slot_from_string(ReadBuffer &rb, StringRef &output, bool &is_name, boo
     return true;
 }
 
-Status DataTypeStruct::from_string(ReadBuffer &rb, IColumn *column) const {
+Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
     DCHECK(!rb.eof());
-    auto *struct_column = assert_cast<ColumnStruct *>(column);
+    auto* struct_column = assert_cast<ColumnStruct *>(column);
 
     if (*rb.position() != '{') {
-        return Status::InvalidArgument("Struct does not start with '{}' character, found '{}'",
-                                       "{", *rb.position());
+        return Status::InvalidArgument("Struct does not start with '{}' character, found '{}'", "{",
+                                       *rb.position());
     }
     if (*(rb.end() - 1) != '}') {
-        return Status::InvalidArgument("Struct does not end with '{}' character, found '{}'",
-                                       "}", *(rb.end() - 1));
+        return Status::InvalidArgument("Struct does not end with '{}' character, found '{}'", "}",
+                                       *(rb.end() - 1));
     }
 
     // here need handle the empty struct '{}'
@@ -162,31 +162,25 @@ Status DataTypeStruct::from_string(ReadBuffer &rb, IColumn *column) const {
     ++rb.position();
 
     bool is_explicit_names = false;
-    std::vector <std::string> field_names;
-    std::vector <ReadBuffer> field_rbs;
-    std::vector <size_t> field_pos;
+    std::vector<std::string> field_names;
+    std::vector<ReadBuffer> field_rbs;
+    std::vector<size_t> field_pos;
 
     while (!rb.eof()) {
         StringRef slot(rb.position(), rb.count());
         bool has_quota = false;
         bool is_name = false;
         if (!next_slot_from_string(rb, slot, is_name, has_quota)) {
-            LOG(INFO) << "Cannot read struct field from text";
             return Status::InvalidArgument("Cannot read struct field from text '{}'",
                                            slot.to_string());
         }
-        LOG(INFO) << "DataTypeStruct::from_string:slot.to_string():" << slot.to_string();
-        LOG(INFO) << "DataTypeStruct::from_string:is_name:" << is_name;
         if (is_name) {
             std::string name = slot.to_string();
             if (!next_slot_from_string(rb, slot, is_name, has_quota)) {
-                LOG(INFO) << "Cannot read struct field from text";
                 return Status::InvalidArgument("Cannot read struct field from text '{}'",
                                                slot.to_string());
             }
-            LOG(INFO) << "DataTypeStruct::from_string:slot.to_string():" << slot.to_string();
-            LOG(INFO) << "DataTypeStruct::from_string:is_name:" << is_name;
-            ReadBuffer field_rb(const_cast<char *>(slot.data), slot.size);
+            ReadBuffer field_rb(const_cast<char*>(slot.data), slot.size);
             field_names.push_back(name);
             field_rbs.push_back(field_rb);
 
@@ -194,7 +188,7 @@ Status DataTypeStruct::from_string(ReadBuffer &rb, IColumn *column) const {
                 is_explicit_names = true;
             }
         } else {
-            ReadBuffer field_rb(const_cast<char *>(slot.data), slot.size);
+            ReadBuffer field_rb(const_cast<char*>(slot.data), slot.size);
             field_rbs.push_back(field_rb);
         }
     }
@@ -202,32 +196,30 @@ Status DataTypeStruct::from_string(ReadBuffer &rb, IColumn *column) const {
     // TODO: should we support insert default field value when actual field number is less than
     // schema field number?
     if (field_rbs.size() != elems.size()) {
-        std::string cmp_str =
-                field_rbs.size() > elems.size() ? "more" : "less";
-        LOG(INFO) << "Actual struct field number {} is {} than schema field number {}.";
-        return Status::InvalidArgument("Actual struct field number {} is {} than schema field number {}.",
-                                       field_rbs.size(), cmp_str, elems.size());
+        std::string cmp_str = field_rbs.size() > elems.size() ? "more" : "less";
+        return Status::InvalidArgument(
+                "Actual struct field number {} is {} than schema field number {}.",
+                field_rbs.size(), cmp_str, elems.size());
     }
 
     if (is_explicit_names) {
         if (field_names.size() != field_rbs.size()) {
-            LOG(INFO) << "Struct field name number {} is not equal to field number {}.";
             return Status::InvalidArgument("Struct field name number {} is not equal to field number {}.",
                                            field_names.size(), field_rbs.size());
         }
-        std::unordered_set <std::string> name_set;
+        std::unordered_set<std::string> name_set;
         for (size_t i = 0; i < field_names.size(); i++) {
             // check duplicate fields
             auto ret = name_set.insert(field_names[i]);
             if (!ret.second) {
-                LOG(INFO) << "Struct field name {} is duplicate with others.";
-                return Status::InvalidArgument("Struct field name {} is duplicate with others.", field_names[i]);
+                return Status::InvalidArgument("Struct field name {} is duplicate with others.",
+                                               field_names[i]);
             }
             // check name valid
             auto idx = try_get_position_by_name(field_names[i]);
             if (idx == std::nullopt) {
-                LOG(INFO) << "Cannot find struct field name {} in schema.";
-                return Status::InvalidArgument("Cannot find struct field name {} in schema.", field_names[i]);
+                return Status::InvalidArgument("Cannot find struct field name {} in schema.",
+                                               field_names[i]);
             }
             field_pos.push_back(idx.value());
         }
@@ -239,18 +231,18 @@ Status DataTypeStruct::from_string(ReadBuffer &rb, IColumn *column) const {
 
     for (size_t idx = 0; idx < elems.size(); idx++) {
         auto field_rb = field_rbs[field_pos[idx]];
-        LOG(INFO) << "DataTypeStruct::from_string:field_rb.count():" << field_rbs[field_pos[idx]].count();
-        LOG(INFO) << "DataTypeStruct::from_string:field_rb.to_string():" << field_rbs[field_pos[idx]].to_string();
         // handle empty element
         if (field_rb.count() == 0) {
-            auto &nested_null_col = reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
+            auto &nested_null_col =
+                    reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
             nested_null_col.get_nested_column().insert_default();
             nested_null_col.get_null_map_data().push_back(0);
             continue;
         }
         // handle null element
         if (field_rb.count() == 4 && strncmp(field_rb.position(), "null", 4) == 0) {
-            auto &nested_null_col = reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
+            auto &nested_null_col =
+                    reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
             nested_null_col.get_nested_column().insert_default();
             nested_null_col.get_null_map_data().push_back(1);
             continue;

From fe8fc19a90fc066cc07639794bb1d7edb8ca824c Mon Sep 17 00:00:00 2001
From: xy720 <xuyang25@baidu.com>
Date: Sun, 26 Feb 2023 19:38:46 +0800
Subject: [PATCH 3/8] format

---
 be/src/vec/data_types/data_type_struct.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
index 68b39c59beca58..42747a54d583a1 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -204,8 +204,9 @@ Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
 
     if (is_explicit_names) {
         if (field_names.size() != field_rbs.size()) {
-            return Status::InvalidArgument("Struct field name number {} is not equal to field number {}.",
-                                           field_names.size(), field_rbs.size());
+            return Status::InvalidArgument(
+                    "Struct field name number {} is not equal to field number {}.",
+                    field_names.size(), field_rbs.size());
         }
         std::unordered_set<std::string> name_set;
         for (size_t i = 0; i < field_names.size(); i++) {
@@ -233,7 +234,7 @@ Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
         auto field_rb = field_rbs[field_pos[idx]];
         // handle empty element
         if (field_rb.count() == 0) {
-            auto &nested_null_col =
+            auto& nested_null_col =
                     reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
             nested_null_col.get_nested_column().insert_default();
             nested_null_col.get_null_map_data().push_back(0);
@@ -241,7 +242,7 @@ Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
         }
         // handle null element
         if (field_rb.count() == 4 && strncmp(field_rb.position(), "null", 4) == 0) {
-            auto &nested_null_col =
+            auto& nested_null_col =
                     reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
             nested_null_col.get_nested_column().insert_default();
             nested_null_col.get_null_map_data().push_back(1);

From 5ad80389a5b68d8b1afbdac9c5fcc22e5fb8d92d Mon Sep 17 00:00:00 2001
From: xy720 <xuyang25@baidu.com>
Date: Sun, 26 Feb 2023 19:45:02 +0800
Subject: [PATCH 4/8] format

---
 be/src/vec/data_types/data_type_struct.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
index 42747a54d583a1..06b3b3e594f196 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -143,7 +143,7 @@ bool next_slot_from_string(ReadBuffer& rb, StringRef& output, bool& is_name, boo
 
 Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
     DCHECK(!rb.eof());
-    auto* struct_column = assert_cast<ColumnStruct *>(column);
+    auto* struct_column = assert_cast<ColumnStruct*>(column);
 
     if (*rb.position() != '{') {
         return Status::InvalidArgument("Struct does not start with '{}' character, found '{}'", "{",
@@ -235,7 +235,7 @@ Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
         // handle empty element
         if (field_rb.count() == 0) {
             auto& nested_null_col =
-                    reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
+                    reinterpret_cast<ColumnNullable&>(struct_column->get_column(idx));
             nested_null_col.get_nested_column().insert_default();
             nested_null_col.get_null_map_data().push_back(0);
             continue;
@@ -243,7 +243,7 @@ Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
         // handle null element
         if (field_rb.count() == 4 && strncmp(field_rb.position(), "null", 4) == 0) {
             auto& nested_null_col =
-                    reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
+                    reinterpret_cast<ColumnNullable&>(struct_column->get_column(idx));
             nested_null_col.get_nested_column().insert_default();
             nested_null_col.get_null_map_data().push_back(1);
             continue;

From 6e365dd7aa8f2e8561e7433f49b449e9d34fac0b Mon Sep 17 00:00:00 2001
From: xy720 <xuyang25@baidu.com>
Date: Mon, 27 Feb 2023 17:21:53 +0800
Subject: [PATCH 5/8] save

---
 be/src/vec/data_types/data_type_struct.cpp | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
index 06b3b3e594f196..7d77eac2fafd39 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -234,18 +234,13 @@ Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
         auto field_rb = field_rbs[field_pos[idx]];
         // handle empty element
         if (field_rb.count() == 0) {
-            auto& nested_null_col =
-                    reinterpret_cast<ColumnNullable&>(struct_column->get_column(idx));
-            nested_null_col.get_nested_column().insert_default();
-            nested_null_col.get_null_map_data().push_back(0);
+            struct_column->get_column(idx).insert_default();
             continue;
         }
         // handle null element
         if (field_rb.count() == 4 && strncmp(field_rb.position(), "null", 4) == 0) {
-            auto& nested_null_col =
-                    reinterpret_cast<ColumnNullable&>(struct_column->get_column(idx));
-            nested_null_col.get_nested_column().insert_default();
-            nested_null_col.get_null_map_data().push_back(1);
+            auto &nested_null_col = reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
+            nested_null_col.insert_null_elements(1);
             continue;
         }
         auto st = elems[idx]->from_string(field_rb, &struct_column->get_column(idx));

From 234046b6b44a22aea3b899241b267fd6dbb6d5ed Mon Sep 17 00:00:00 2001
From: xy720 <xuyang25@baidu.com>
Date: Mon, 27 Feb 2023 17:33:50 +0800
Subject: [PATCH 6/8] save

---
 be/src/vec/data_types/data_type_struct.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
index 7d77eac2fafd39..6ae2e3318add19 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -239,7 +239,8 @@ Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
         }
         // handle null element
         if (field_rb.count() == 4 && strncmp(field_rb.position(), "null", 4) == 0) {
-            auto &nested_null_col = reinterpret_cast<ColumnNullable &>(struct_column->get_column(idx));
+            auto &nested_null_col =
+                    reinterpret_cast<ColumnNullable&>(struct_column->get_column(idx));
             nested_null_col.insert_null_elements(1);
             continue;
         }

From b22c71352235e813776621fc0ed4718b1f678308 Mon Sep 17 00:00:00 2001
From: xy720 <xuyang25@baidu.com>
Date: Mon, 27 Feb 2023 17:35:39 +0800
Subject: [PATCH 7/8] save

---
 be/src/vec/data_types/data_type_struct.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
index 6ae2e3318add19..66954687fe2aae 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -239,7 +239,7 @@ Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
         }
         // handle null element
         if (field_rb.count() == 4 && strncmp(field_rb.position(), "null", 4) == 0) {
-            auto &nested_null_col =
+            auto& nested_null_col =
                     reinterpret_cast<ColumnNullable&>(struct_column->get_column(idx));
             nested_null_col.insert_null_elements(1);
             continue;

From 8e269bc58210688c16f54f313eb4dbbc5f98baac Mon Sep 17 00:00:00 2001
From: xy720 <xuyang25@baidu.com>
Date: Wed, 1 Mar 2023 10:40:08 +0800
Subject: [PATCH 8/8] save

---
 be/src/vec/exprs/vexpr.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index f90993884d2d6f..4e71afbf315404 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -381,6 +381,13 @@ FunctionContext::TypeDesc VExpr::column_type_to_type_desc(const TypeDescriptor&
             out.children.push_back(VExpr::column_type_to_type_desc(t));
         }
         break;
+    case TYPE_STRUCT:
+        CHECK(type.children.size() >= 1);
+        out.type = FunctionContext::TYPE_STRUCT;
+        for (const auto& t : type.children) {
+            out.children.push_back(VExpr::column_type_to_type_desc(t));
+        }
+        break;
     case TYPE_STRING:
         out.type = FunctionContext::TYPE_STRING;
         out.len = type.len;