From c3b6e3b22f705b4fd657be25041609c07a9f5af9 Mon Sep 17 00:00:00 2001 From: cambyzju Date: Thu, 12 Jan 2023 21:48:27 +0800 Subject: [PATCH 1/5] fix struct create --- be/src/vec/columns/column_struct.cpp | 34 +++++++--------------------- be/src/vec/columns/column_struct.h | 13 ++++------- 2 files changed, 13 insertions(+), 34 deletions(-) diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index c7e5c23d2a3a64..4d8eaeb76a81f6 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -54,43 +54,25 @@ ColumnStruct::ColumnStruct(MutableColumns&& mutable_columns) { } } -ColumnStruct::ColumnStruct(Columns&& columns) { - columns.reserve(columns.size()); - for (auto& column : columns) { - if (is_column_const(*column)) { - LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element"; - } - columns.push_back(std::move(column)); - } -} - -ColumnStruct::ColumnStruct(TupleColumns&& tuple_columns) { - columns.reserve(tuple_columns.size()); - for (auto& column : tuple_columns) { - if (is_column_const(*column)) { - LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element"; - } - columns.push_back(std::move(column)); - } -} - -ColumnStruct::Ptr ColumnStruct::create(Columns& columns) { +ColumnStruct::Ptr ColumnStruct::create(const Columns& columns) { for (const auto& column : columns) { if (is_column_const(*column)) { LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element"; } } - auto column_struct = ColumnStruct::create(columns); + auto column_struct = ColumnStruct::create(MutableColumns()); + column_struct->columns.assign(columns.begin(), columns.end()); return column_struct; } -ColumnStruct::Ptr ColumnStruct::create(TupleColumns& tuple_columns) { +ColumnStruct::Ptr ColumnStruct::create(const TupleColumns& tuple_columns) { for (const auto& column : tuple_columns) { if (is_column_const(*column)) { LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element"; } } - auto column_struct = ColumnStruct::create(tuple_columns); + auto column_struct = ColumnStruct::create(MutableColumns()); + column_struct->columns = tuple_columns; return column_struct; } @@ -273,7 +255,7 @@ ColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) const { new_columns[i] = columns[i]->permute(perm, limit); } - return ColumnStruct::create(new_columns); + return ColumnStruct::create(std::move(new_columns)); } ColumnPtr ColumnStruct::replicate(const Offsets& offsets) const { @@ -284,7 +266,7 @@ ColumnPtr ColumnStruct::replicate(const Offsets& offsets) const { new_columns[i] = columns[i]->replicate(offsets); } - return ColumnStruct::create(new_columns); + return ColumnStruct::create(std::move(new_columns)); } MutableColumns ColumnStruct::scatter(ColumnIndex num_columns, const Selector& selector) const { diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 75dade874d40b5..32ce7dc1ae43fc 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -79,8 +79,6 @@ class ColumnStruct final : public COWHelper { template struct Less; - ColumnStruct(Columns&& columns); - ColumnStruct(TupleColumns&& tuple_columns); explicit ColumnStruct(MutableColumns&& mutable_columns); ColumnStruct(const ColumnStruct&) = default; @@ -89,14 +87,13 @@ class ColumnStruct final : public COWHelper { * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. */ using Base = COWHelper; - static Ptr create(Columns& columns); - static Ptr create(MutableColumns& columns); - static Ptr create(TupleColumns& columns); + static Ptr create(const Columns& columns); + static Ptr create(const TupleColumns& columns); static Ptr create(Columns&& arg) { return create(arg); } - template - static MutablePtr create(Args&&... args) { - return Base::create(std::forward(args)...); + template ::value>::type> + static MutablePtr create(Arg&& arg) { + return Base::create(std::forward(arg)); } std::string get_name() const override; From 53f7b45c84f949290af229c2a54037c7dc65a7e7 Mon Sep 17 00:00:00 2001 From: cambyzju Date: Thu, 12 Jan 2023 21:51:22 +0800 Subject: [PATCH 2/5] update --- be/src/vec/columns/column_struct.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index 4d8eaeb76a81f6..41ccfe867d9031 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -255,7 +255,7 @@ ColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) const { new_columns[i] = columns[i]->permute(perm, limit); } - return ColumnStruct::create(std::move(new_columns)); + return ColumnStruct::create(new_columns); } ColumnPtr ColumnStruct::replicate(const Offsets& offsets) const { @@ -266,7 +266,7 @@ ColumnPtr ColumnStruct::replicate(const Offsets& offsets) const { new_columns[i] = columns[i]->replicate(offsets); } - return ColumnStruct::create(std::move(new_columns)); + return ColumnStruct::create(new_columns); } MutableColumns ColumnStruct::scatter(ColumnIndex num_columns, const Selector& selector) const { From b314101a80e2d81e924f1095f5ac35eadb2affc4 Mon Sep 17 00:00:00 2001 From: cambyzju Date: Thu, 12 Jan 2023 21:54:17 +0800 Subject: [PATCH 3/5] fix code format --- be/src/vec/columns/column_struct.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 32ce7dc1ae43fc..81efdd2f760812 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -91,7 +91,8 @@ class ColumnStruct final : public COWHelper { static Ptr create(const TupleColumns& columns); static Ptr create(Columns&& arg) { return create(arg); } - template ::value>::type> + template ::value>::type> static MutablePtr create(Arg&& arg) { return Base::create(std::forward(arg)); } From b6ab9384d2edd626cec93f2d30de907e59d378c4 Mon Sep 17 00:00:00 2001 From: cambyzju Date: Thu, 12 Jan 2023 23:17:23 +0800 Subject: [PATCH 4/5] add more codes for struct type --- be/CMakeLists.txt | 2 +- be/src/runtime/types.cpp | 29 ++++++++++++-- be/src/vec/columns/column_struct.cpp | 8 ++++ be/src/vec/columns/column_struct.h | 4 +- be/src/vec/data_types/data_type.cpp | 2 + be/src/vec/data_types/data_type_factory.cpp | 14 +++++++ be/src/vec/data_types/data_type_struct.cpp | 43 +++++++++++++++++++++ be/src/vec/data_types/data_type_struct.h | 19 +++------ 8 files changed, 100 insertions(+), 21 deletions(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index dfa097223e8082..20f4c026960d50 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -565,7 +565,7 @@ endif() # For CMAKE_BUILD_TYPE=Debug if (OS_MACOSX AND ARCH_ARM) # Using -O0 may meet ARM64 branch out of range errors when linking with tcmalloc. - set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -Og") + set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -O0") else() set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -O0") endif() diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index f26b1dcbce4ea2..45b8f8e5f57981 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -150,7 +150,7 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const { } void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const { - DCHECK(!is_complex_type() || type == TYPE_ARRAY) + DCHECK(!is_complex_type() || type == TYPE_ARRAY || type == TYPE_STRUCT) << "Don't support complex type now, type=" << type; auto node = ptype->add_types(); node->set_type(TTypeNodeType::SCALAR); @@ -170,8 +170,18 @@ void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const { for (const TypeDescriptor& child : children) { child.to_protobuf(ptype); } + } else if (type == TYPE_STRUCT) { + node->set_type(TTypeNodeType::STRUCT); + DCHECK_EQ(field_names.size(), contains_nulls.size()); + for (size_t i = 0; i < field_names.size(); ++i) { + auto field = node->add_struct_fields(); + field->set_name(field_names[i]); + field->set_contains_null(contains_nulls[i]); + } + for (const TypeDescriptor& child : children) { + child.to_protobuf(ptype); + } } - // TODO(xy): support struct } TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField& types, int* idx) @@ -213,7 +223,20 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField(src); + for (size_t i = 0; i < columns.size(); ++i) { + columns[i]->insert_indices_from(src_concrete.get_column(i), indices_begin, indices_end); + } +} + // const char * ColumnStruct::skip_serialized_in_arena(const char * pos) const { // for (const auto & column : columns) { // pos = column->skip_serialized_in_arena(pos); diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 81efdd2f760812..a66d91f3e02f95 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -129,9 +129,7 @@ class ColumnStruct final : public COWHelper { // void update_hash_fast(SipHash & hash) const override; void insert_indices_from(const IColumn& src, const int* indices_begin, - const int* indices_end) override { - LOG(FATAL) << "insert_indices_from not implemented"; - } + const int* indices_end) override; void get_permutation(bool reverse, size_t limit, int nan_direction_hint, Permutation& res) const override { diff --git a/be/src/vec/data_types/data_type.cpp b/be/src/vec/data_types/data_type.cpp index 7a0d67d4a41c68..360fa48a988a96 100644 --- a/be/src/vec/data_types/data_type.cpp +++ b/be/src/vec/data_types/data_type.cpp @@ -145,6 +145,8 @@ PGenericType_TypeId IDataType::get_pdata_type(const IDataType* data_type) { return PGenericType::HLL; case TypeIndex::Array: return PGenericType::LIST; + case TypeIndex::Struct: + return PGenericType::STRUCT; case TypeIndex::FixedLengthObject: return PGenericType::FIXEDLENGTHOBJECT; case TypeIndex::JSONB: diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 36aae47d692e29..7100a679c09d5f 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -334,6 +334,20 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) { case PGenericType::FIXEDLENGTHOBJECT: nested = std::make_shared(); break; + case PGenericType::STRUCT: { + size_t col_size = pcolumn.children_size(); + DCHECK(col_size >= 1); + DataTypes dataTypes; + Strings names; + dataTypes.reserve(col_size); + names.reserve(col_size); + for (size_t i = 0; i < col_size; i++) { + dataTypes.push_back(create_data_type(pcolumn.children(i))); + names.push_back(pcolumn.name()); + } + nested = std::make_shared(dataTypes, names); + break; + } default: { LOG(FATAL) << fmt::format("Unknown data type: {}", pcolumn.type()); return nullptr; diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp index 8fd2179bfa1b3f..4364f5c34ff532 100644 --- a/be/src/vec/data_types/data_type_struct.cpp +++ b/be/src/vec/data_types/data_type_struct.cpp @@ -232,6 +232,49 @@ String DataTypeStruct::get_name_by_position(size_t i) const { return names[i - 1]; } +int64_t DataTypeStruct::get_uncompressed_serialized_bytes(const IColumn& column, + int be_exec_version) const { + auto ptr = column.convert_to_full_column_if_const(); + const auto& struct_column = assert_cast(*ptr.get()); + DCHECK(elems.size() == struct_column.tuple_size()); + + int64_t bytes = 0; + for (size_t i = 0; i < elems.size(); ++i) { + bytes += elems[i]->get_uncompressed_serialized_bytes(struct_column.get_column(i), + be_exec_version); + } + return bytes; +} + +char* DataTypeStruct::serialize(const IColumn& column, char* buf, int be_exec_version) const { + auto ptr = column.convert_to_full_column_if_const(); + const auto& struct_column = assert_cast(*ptr.get()); + DCHECK(elems.size() == struct_column.tuple_size()); + + for (size_t i = 0; i < elems.size(); ++i) { + buf = elems[i]->serialize(struct_column.get_column(i), buf, be_exec_version); + } + return buf; +} + +const char* DataTypeStruct::deserialize(const char* buf, IColumn* column, + int be_exec_version) const { + auto* struct_column = assert_cast(column); + DCHECK(elems.size() == struct_column->tuple_size()); + + for (size_t i = 0; i < elems.size(); ++i) { + buf = elems[i]->deserialize(buf, &struct_column->get_column(i), be_exec_version); + } + return buf; +} + +void DataTypeStruct::to_pb_column_meta(PColumnMeta* col_meta) const { + IDataType::to_pb_column_meta(col_meta); + for (size_t i = 0; i < elems.size(); ++i) { + elems[i]->to_pb_column_meta(col_meta->add_children()); + } +} + bool DataTypeStruct::text_can_contain_only_valid_utf8() const { return std::all_of(elems.begin(), elems.end(), [](auto&& elem) { return elem->text_can_contain_only_valid_utf8(); }); diff --git a/be/src/vec/data_types/data_type_struct.h b/be/src/vec/data_types/data_type_struct.h index 9405544b76e5a3..88255de38c7acf 100644 --- a/be/src/vec/data_types/data_type_struct.h +++ b/be/src/vec/data_types/data_type_struct.h @@ -90,20 +90,11 @@ class DataTypeStruct final : public IDataType { std::optional try_get_position_by_name(const String& name) const; String get_name_by_position(size_t i) const; - [[noreturn]] int64_t get_uncompressed_serialized_bytes(const IColumn& column, - int be_exec_version) const override { - LOG(FATAL) << "get_uncompressed_serialized_bytes not implemented"; - } - - [[noreturn]] char* serialize(const IColumn& column, char* buf, - int be_exec_version) const override { - LOG(FATAL) << "serialize not implemented"; - } - - [[noreturn]] const char* deserialize(const char* buf, IColumn* column, - int be_exec_version) const override { - LOG(FATAL) << "serialize not implemented"; - } + int64_t get_uncompressed_serialized_bytes(const IColumn& column, + int be_exec_version) const override; + char* serialize(const IColumn& column, char* buf, int be_exec_version) const override; + const char* deserialize(const char* buf, IColumn* column, int be_exec_version) const override; + void to_pb_column_meta(PColumnMeta* col_meta) const override; // bool is_parametric() const { return true; } // SerializationPtr do_get_default_serialization() const override; From 0a633f66315283505915fb3f42fe20d814436b88 Mon Sep 17 00:00:00 2001 From: cambyzju Date: Thu, 12 Jan 2023 23:19:52 +0800 Subject: [PATCH 5/5] update --- be/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 20f4c026960d50..dfa097223e8082 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -565,7 +565,7 @@ endif() # For CMAKE_BUILD_TYPE=Debug if (OS_MACOSX AND ARCH_ARM) # Using -O0 may meet ARM64 branch out of range errors when linking with tcmalloc. - set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -O0") + set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -Og") else() set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -O0") endif()