From 6e079afc35c325d5f0c9e07848c3c552b4aae21d Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 4 Jan 2023 19:04:55 +0800 Subject: [PATCH 01/11] first commit --- be/src/olap/field.h | 48 ++- .../olap/rowset/segment_v2/column_writer.cpp | 291 ++++++++++++++++++ be/src/olap/rowset/segment_v2/column_writer.h | 60 ++++ .../olap/rowset/segment_v2/segment_writer.cpp | 11 + be/src/olap/tablet_meta.cpp | 6 + be/src/olap/tablet_schema.cpp | 18 ++ be/src/olap/types.cpp | 20 ++ be/src/olap/types.h | 123 ++++++++ be/src/runtime/CMakeLists.txt | 1 + be/src/runtime/map_value.cpp | 50 +++ be/src/runtime/map_value.h | 99 ++++++ be/src/runtime/primitive_type.cpp | 15 +- be/src/runtime/types.cpp | 37 ++- be/src/udf/udf.h | 26 ++ be/src/vec/CMakeLists.txt | 3 + be/src/vec/columns/column.h | 2 + be/src/vec/columns/column_map.cpp | 218 +++++++++++++ be/src/vec/columns/column_map.h | 128 ++++++++ be/src/vec/core/field.h | 32 ++ be/src/vec/core/types.h | 3 + be/src/vec/data_types/data_type.cpp | 2 + be/src/vec/data_types/data_type.h | 5 +- be/src/vec/data_types/data_type_factory.cpp | 27 +- be/src/vec/data_types/data_type_factory.hpp | 4 +- be/src/vec/data_types/data_type_map.cpp | 157 ++++++++++ be/src/vec/data_types/data_type_map.h | 80 +++++ be/src/vec/exprs/vexpr.cpp | 5 + be/src/vec/exprs/vmap_literal.cpp | 51 +++ be/src/vec/exprs/vmap_literal.h | 34 ++ be/src/vec/olap/olap_data_convertor.cpp | 112 ++++++- be/src/vec/olap/olap_data_convertor.h | 19 ++ fe/fe-core/src/main/cup/sql_parser.cup | 35 ++- .../org/apache/doris/analysis/CastExpr.java | 5 + .../java/org/apache/doris/analysis/Expr.java | 9 +- .../org/apache/doris/analysis/MapLiteral.java | 179 +++++++++++ .../java/org/apache/doris/catalog/Column.java | 96 ++++-- .../org/apache/doris/catalog/MapType.java | 48 +++ .../apache/doris/catalog/PrimitiveType.java | 6 + .../java/org/apache/doris/catalog/Type.java | 4 +- .../org/apache/doris/common/util/Util.java | 1 + .../org/apache/doris/mysql/MysqlColType.java | 3 +- .../doris/planner/SetOperationNode.java | 3 + fe/fe-core/src/main/jflex/sql_scanner.flex | 4 + gensrc/thrift/Exprs.thrift | 3 + 44 files changed, 2033 insertions(+), 50 deletions(-) create mode 100644 be/src/runtime/map_value.cpp create mode 100644 be/src/runtime/map_value.h create mode 100644 be/src/vec/columns/column_map.cpp create mode 100644 be/src/vec/columns/column_map.h create mode 100644 be/src/vec/data_types/data_type_map.cpp create mode 100644 be/src/vec/data_types/data_type_map.h create mode 100644 be/src/vec/exprs/vmap_literal.cpp create mode 100644 be/src/vec/exprs/vmap_literal.h create mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java diff --git a/be/src/olap/field.h b/be/src/olap/field.h index 391f7f45be68c9..8b834854f1845d 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -29,6 +29,7 @@ #include "olap/types.h" #include "olap/utils.h" #include "runtime/collection_value.h" +#include "runtime/map_value.h" #include "runtime/mem_pool.h" #include "util/hash_util.hpp" #include "util/mem_util.hpp" @@ -49,7 +50,7 @@ class Field { _index_size(column.index_length()), _is_nullable(column.is_nullable()), _unique_id(column.unique_id()) { - if (column.type() == OLAP_FIELD_TYPE_ARRAY) { + if (column.type() == OLAP_FIELD_TYPE_ARRAY || column.type() == OLAP_FIELD_TYPE_MAP) { _agg_info = get_aggregate_info(column.aggregation(), column.type(), column.get_sub_column(0).type()); } else { @@ -450,6 +451,34 @@ uint32_t Field::hash_code(const CellType& cell, uint32_t seed) const { return _type_info->hash_code(cell.cell_ptr(), seed); } +class MapField : public Field { +public: + explicit MapField(const TabletColumn& column) : Field(column) {} + + void consume(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, + ObjectPool* agg_pool) const override { + dst->set_is_null(src_null); + if (src_null) { + return; + } + _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + } + + // make variable_ptr memory allocate to cell_ptr as MapValue + char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { + auto m = (MapValue*)cell_ptr; + + m->set_key_null_signs(reinterpret_cast(variable_ptr)); + m->set_value_null_signs(reinterpret_cast(variable_ptr)); + + return variable_ptr + _length; + } + + size_t get_variable_len() const override { + return _length; + } +}; + class ArrayField : public Field { public: explicit ArrayField(const TabletColumn& column) : Field(column) {} @@ -751,6 +780,14 @@ class FieldFactory { auto* local = new ArrayField(column); local->add_sub_field(std::move(item_field)); return local; + } + case OLAP_FIELD_TYPE_MAP: { + std::unique_ptr key_field(FieldFactory::create(column.get_sub_column(0))); + std::unique_ptr val_field(FieldFactory::create(column.get_sub_column(1))); + auto* local = new MapField(column); + local->add_sub_field(std::move(key_field)); + local->add_sub_field(std::move(val_field)); + return local; } case OLAP_FIELD_TYPE_DECIMAL: [[fallthrough]]; @@ -792,6 +829,15 @@ class FieldFactory { local->add_sub_field(std::move(item_field)); return local; } + case OLAP_FIELD_TYPE_MAP: { + DCHECK(column.get_subtype_count() == 2); + auto* local= new MapField(column); + std::unique_ptr key_field(FieldFactory::create(column.get_sub_column(0))); + std::unique_ptr value_field(FieldFactory::create(column.get_sub_column(1))); + local->add_sub_field(std::move(key_field)); + local->add_sub_field(std::move(value_field)); + return local; + } case OLAP_FIELD_TYPE_DECIMAL: [[fallthrough]]; case OLAP_FIELD_TYPE_DECIMAL32: diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index df820b141ef387..77522b3eecc5f7 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -173,6 +173,144 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* *writer = std::move(writer_local); return Status::OK(); } + case FieldType::OLAP_FIELD_TYPE_MAP: { + DCHECK(column->get_subtype_count() == 2); + // todo . here key and value is array only? + const TabletColumn& key_column = column->get_sub_column(0); // field_type is true key and value + const TabletColumn& value_column = column->get_sub_column(1); + + // create length writer + FieldType length_type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT; + + ColumnWriterOptions length_options; + length_options.meta = opts.meta->add_children_columns(); + length_options.meta->set_column_id(2); + length_options.meta->set_unique_id(2); + length_options.meta->set_type(length_type); + length_options.meta->set_is_nullable(false); + length_options.meta->set_length( + get_scalar_type_info()->size()); + length_options.meta->set_encoding(DEFAULT_ENCODING); + length_options.meta->set_compression(opts.meta->compression()); + + length_options.need_zone_map = false; + length_options.need_bloom_filter = false; + length_options.need_bitmap_index = false; + + TabletColumn length_column = TabletColumn( + OLAP_FIELD_AGGREGATION_NONE, length_type, length_options.meta->is_nullable(), + length_options.meta->unique_id(), length_options.meta->length()); + length_column.set_name("length"); + length_column.set_index_length(-1); // no short key index + std::unique_ptr bigint_field(FieldFactory::create(length_column)); + auto* length_writer = + new ScalarColumnWriter(length_options, std::move(bigint_field), file_writer); + + // create null writer + ScalarColumnWriter* null_writer = nullptr; + if (opts.meta->is_nullable()) { + FieldType null_type = FieldType::OLAP_FIELD_TYPE_TINYINT; + ColumnWriterOptions null_options; + null_options.meta = opts.meta->add_children_columns(); + null_options.meta->set_column_id(3); + null_options.meta->set_unique_id(3); + null_options.meta->set_type(null_type); + null_options.meta->set_is_nullable(false); + null_options.meta->set_length( + get_scalar_type_info()->size()); + null_options.meta->set_encoding(DEFAULT_ENCODING); + null_options.meta->set_compression(opts.meta->compression()); + + null_options.need_zone_map = false; + null_options.need_bloom_filter = false; + null_options.need_bitmap_index = false; + + TabletColumn null_column = TabletColumn( + OLAP_FIELD_AGGREGATION_NONE, null_type, length_options.meta->is_nullable(), + null_options.meta->unique_id(), null_options.meta->length()); + null_column.set_name("nullable"); + null_column.set_index_length(-1); // no short key index + std::unique_ptr null_field(FieldFactory::create(null_column)); + null_writer = + new ScalarColumnWriter(null_options, std::move(null_field), file_writer); + } + + // create key writer + std::unique_ptr key_writer; + ColumnWriterOptions key_opts; + TabletColumn key_list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY); + { + key_list_column.add_sub_column(const_cast(key_column)); +// key_list_column.add_sub_column(key_column); + key_list_column.set_name("map.key"); + key_list_column.set_index_length(-1); + + + key_opts.meta = opts.meta->mutable_children_columns(0); + key_opts.meta->set_column_id(4); + key_opts.meta->set_unique_id(4); + key_opts.meta->set_type(OLAP_FIELD_TYPE_ARRAY); + key_opts.meta->set_length(0); + key_opts.meta->set_encoding(BIT_SHUFFLE); + key_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F); + key_opts.need_zone_map = false; + key_opts.meta->set_is_nullable(true); + + ColumnMetaPB* child_meta = key_opts.meta->add_children_columns(); + child_meta->set_column_id(5); + child_meta->set_unique_id(5); + child_meta->set_type(key_column.type()); + child_meta->set_length(key_column.length()); + child_meta->set_compression(segment_v2::CompressionTypePB::LZ4F); + child_meta->set_encoding(DICT_ENCODING); + child_meta->set_is_nullable(key_column.is_nullable()); + } + + RETURN_IF_ERROR( + ColumnWriter::create(key_opts, &key_list_column, file_writer, &key_writer)); + + + // create value writer + std::unique_ptr value_writer; + ColumnWriterOptions val_opts; + TabletColumn val_list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY); + { + val_list_column.add_sub_column(const_cast(value_column)); + // val_list_column.add_sub_column(value_column); + val_list_column.set_name("map.val"); + val_list_column.set_index_length(-1); + + + val_opts.meta = opts.meta->mutable_children_columns(1); + val_opts.meta->set_column_id(6); + val_opts.meta->set_unique_id(6); + val_opts.meta->set_type(OLAP_FIELD_TYPE_ARRAY); + val_opts.meta->set_length(0); + val_opts.meta->set_encoding(BIT_SHUFFLE); + val_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F); + val_opts.need_zone_map = false; + val_opts.meta->set_is_nullable(true); + + ColumnMetaPB* child_v_meta = val_opts.meta->add_children_columns(); + child_v_meta->set_column_id(7); + child_v_meta->set_unique_id(7); + child_v_meta->set_type(value_column.type()); + child_v_meta->set_length(value_column.length()); + child_v_meta->set_compression(segment_v2::CompressionTypePB::LZ4F); + child_v_meta->set_encoding(DEFAULT_ENCODING); + child_v_meta->set_is_nullable(value_column.is_nullable()); + } + + RETURN_IF_ERROR( + ColumnWriter::create(val_opts, &val_list_column, file_writer, &value_writer)); + // finally create map writer + std::unique_ptr writer_local = std::unique_ptr( + new MapColumnWriter(opts, std::move(field), length_writer, null_writer, + std::move(key_writer), std::move(value_writer))); + + *writer = std::move(writer_local); + return Status::OK(); + } default: return Status::NotSupported("unsupported type for ColumnWriter: {}", std::to_string(field->type())); @@ -695,5 +833,158 @@ Status ArrayColumnWriter::finish_current_page() { return Status::NotSupported("array writer has no data, can not finish_current_page"); } +/// ============================= MapColumnWriter =====================//// +MapColumnWriter::MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, + ScalarColumnWriter* offset_writer, + ScalarColumnWriter* null_writer, + std::unique_ptr key_writer, + std::unique_ptr value_writer) + : ColumnWriter(std::move(field), opts.meta->is_nullable()), + _key_writer(std::move(key_writer)), + _value_writer(std::move(value_writer)), + _opts(opts) { + _offset_writer.reset(offset_writer); + if (is_nullable()) { + _null_writer.reset(null_writer); + } +} + +Status MapColumnWriter::init() { + RETURN_IF_ERROR(_offset_writer->init()); + if (is_nullable()) { + RETURN_IF_ERROR(_null_writer->init()); + } + RETURN_IF_ERROR(_key_writer->init()); + RETURN_IF_ERROR(_value_writer->init()); + return Status::OK(); +} + +Status MapColumnWriter::put_extra_info_in_page(DataPageFooterPB* footer) { +// footer->set_next_array_item_ordinal(_key_writer->get_next_rowid()); +// footer->set_next_array_item_ordinal(_value_writer->get_next_rowid()); + return Status::OK(); +} +uint64_t MapColumnWriter::estimate_buffer_size() { + size_t estimate = _offset_writer->estimate_buffer_size() + + _key_writer->estimate_buffer_size() + + _value_writer->estimate_buffer_size(); + if (is_nullable()) { + estimate += _null_writer->estimate_buffer_size(); + } + return estimate; +} + +Status MapColumnWriter::finish() { + if (is_nullable()) { + RETURN_IF_ERROR(_null_writer->finish()); + } + RETURN_IF_ERROR(_offset_writer->finish()); + RETURN_IF_ERROR(_key_writer->finish()); + RETURN_IF_ERROR(_value_writer->finish()); + return Status::OK(); +} + +// todo. make keys and values write +Status MapColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { + size_t remaining = num_rows; + const auto* col_cursor = reinterpret_cast(*ptr); + while (remaining > 0) { + size_t num_written = 1; + ordinal_t next_item_ordinal = _offset_writer->get_next_rowid(); + RETURN_IF_ERROR(_offset_writer->append_data_in_current_page( + reinterpret_cast(&next_item_ordinal), &num_written)); + if (num_written < + 1) { // page is full, write first item offset and update current length page's start ordinal + RETURN_IF_ERROR(_offset_writer->finish_current_page()); + } else { + // write child item. + if (_key_writer->is_nullable()) { + auto* key_data_ptr = const_cast(col_cursor)->mutable_key_data(); + for (size_t i = 0; i < col_cursor->length(); ++i) { + RETURN_IF_ERROR(_key_writer->append(col_cursor->is_key_null_at(i), key_data_ptr)); + key_data_ptr = (uint8_t*)key_data_ptr + _key_writer->get_field()->size(); + } + } + if (_value_writer->is_nullable()) { + auto* val_data_ptr = const_cast(col_cursor)->mutable_value_data(); + for (size_t i = 0; i < col_cursor->length(); ++i) { + RETURN_IF_ERROR(_value_writer->append(col_cursor->is_val_null_at(i), val_data_ptr)); + val_data_ptr = (uint8_t*)val_data_ptr + _value_writer->get_field()->size(); + } + } + } + remaining -= num_written; + col_cursor += num_written; + *ptr += num_written * sizeof(MapValue); + } + + if (is_nullable()) { + return write_null_column(num_rows, false); + } + return Status::OK(); +} + +Status MapColumnWriter::write_data() { + if (is_nullable()) { + RETURN_IF_ERROR(_null_writer->write_data()); + } + RETURN_IF_ERROR(_offset_writer->write_data()); + RETURN_IF_ERROR(_key_writer->write_data()); + RETURN_IF_ERROR(_value_writer->write_data()); + return Status::OK(); +} + +Status MapColumnWriter::write_ordinal_index() { + if (is_nullable()) { + RETURN_IF_ERROR(_null_writer->write_ordinal_index()); + } + if (!has_empty_items()) { + RETURN_IF_ERROR(_offset_writer->write_ordinal_index()); + RETURN_IF_ERROR(_key_writer->write_ordinal_index()); + RETURN_IF_ERROR(_value_writer->write_ordinal_index()); + } + return Status::OK(); +} + +Status MapColumnWriter::append_nulls(size_t num_rows) { + size_t num_lengths = num_rows; + const ordinal_t offset = get_next_rowid(); + while (num_lengths > 0) { + // TODO llj bulk write + const auto* offset_ptr = reinterpret_cast(&offset); + RETURN_IF_ERROR(_offset_writer->append_data(&offset_ptr, 1)); + --num_lengths; + } + return write_null_column(num_rows, true); +} + +Status MapColumnWriter::write_null_column(size_t num_rows, bool is_null) { + uint8_t null_sign = is_null ? 1 : 0; + while (num_rows > 0) { + // TODO llj bulk write + const uint8_t* null_sign_ptr = &null_sign; + RETURN_IF_ERROR(_null_writer->append_data(&null_sign_ptr, 1)); + --num_rows; + } + return Status::OK(); +} + +Status MapColumnWriter::finish_current_page() { + if (is_nullable()) { + RETURN_IF_ERROR(_null_writer->finish_current_page()); + } + RETURN_IF_ERROR(_offset_writer->finish_current_page()); + RETURN_IF_ERROR(_key_writer->finish_current_page()); + RETURN_IF_ERROR(_value_writer->finish_current_page()); + return Status::OK(); +} + +Status MapColumnWriter::write_inverted_index() { + if (_opts.inverted_index) { + return _inverted_index_builder->finish(); + } + return Status::OK(); +} + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 5ea7ae654c8a21..f95713fa0627c2 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -320,5 +320,65 @@ class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback { ColumnWriterOptions _opts; }; + +class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { +public: + explicit MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, + ScalarColumnWriter* offset_writer, + ScalarColumnWriter* null_writer, + std::unique_ptr key_writer, + std::unique_ptr value_writer); + ~MapColumnWriter() override = default; + + Status init() override; + + Status append_data(const uint8_t** ptr, size_t num_rows) override; + + uint64_t estimate_buffer_size() override; + + Status finish() override; + Status write_data() override; + Status write_ordinal_index() override; + Status write_inverted_index() override; + Status append_nulls(size_t num_rows) override; + + Status finish_current_page() override; + + Status write_zone_map() override { + if (_opts.need_zone_map) { + return Status::NotSupported("map not support zone map"); + } + return Status::OK(); + } + + Status write_bitmap_index() override { + if (_opts.need_bitmap_index) { + return Status::NotSupported("map not support bitmap index"); + } + return Status::OK(); + } + Status write_bloom_filter_index() override { + if (_opts.need_bloom_filter) { + return Status::NotSupported("map not support bloom filter index"); + } + return Status::OK(); + } + ordinal_t get_next_rowid() const override { return _offset_writer->get_next_rowid(); } + +private: + Status put_extra_info_in_page(DataPageFooterPB* header) override; + Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记 + bool has_empty_items() const { return _offset_writer->get_next_rowid() == 0; } + +private: + std::unique_ptr _offset_writer; + std::unique_ptr _null_writer; + std::unique_ptr _key_writer; + std::unique_ptr _value_writer; + + std::unique_ptr _inverted_index_builder; + ColumnWriterOptions _opts; +}; + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index df7f26c34f506d..bf9ca2c5b7d475 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -154,6 +154,17 @@ Status SegmentWriter::init(const std::vector& col_ids, bool has_key) { } } + + if (column.type() == FieldType::OLAP_FIELD_TYPE_MAP) { + opts.need_zone_map = false; + if (opts.need_bloom_filter) { + return Status::NotSupported("Do not support bloom filter for map type"); + } + if (opts.need_bitmap_index) { + return Status::NotSupported("Do not support bitmap index for map type"); + } + } + std::unique_ptr writer; RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer)); RETURN_IF_ERROR(writer->init()); diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index e3d4c6f3ffb060..a9177096f816e9 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -300,6 +300,12 @@ void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tco ColumnPB* children_column = column->add_children_columns(); init_column_from_tcolumn(0, tcolumn.children_column[0], children_column); } + if (tcolumn.column_type.type == TPrimitiveType::MAP) { + ColumnPB* key_column = column->add_children_columns(); + init_column_from_tcolumn(0, tcolumn.children_column[0], key_column); + ColumnPB* val_column = column->add_children_columns(); + init_column_from_tcolumn(0, tcolumn.children_column[1], val_column); + } } Status TabletMeta::create_from_file(const string& file_path) { diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 55f030cbb9b089..a5ebb1902672c1 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -312,6 +312,8 @@ uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint3 return string_length + sizeof(OLAP_JSONB_MAX_LENGTH); case TPrimitiveType::ARRAY: return OLAP_ARRAY_MAX_LENGTH; + case TPrimitiveType::MAP: + return OLAP_ARRAY_MAX_LENGTH; case TPrimitiveType::DECIMAL32: return 4; case TPrimitiveType::DECIMAL64: @@ -409,6 +411,15 @@ void TabletColumn::init_from_pb(const ColumnPB& column) { child_column.init_from_pb(column.children_columns(0)); add_sub_column(child_column); } + if (_type == FieldType::OLAP_FIELD_TYPE_MAP) { + DCHECK(column.children_columns_size() == 2) << "MAP type has more than 2 children types."; + TabletColumn key_column; + TabletColumn value_column; + key_column.init_from_pb(column.children_columns(0)); + value_column.init_from_pb(column.children_columns(1)); + add_sub_column(key_column); + add_sub_column(value_column); + } } void TabletColumn::to_schema_pb(ColumnPB* column) const { @@ -440,6 +451,13 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const { ColumnPB* child = column->add_children_columns(); _sub_columns[0].to_schema_pb(child); } + if (_type == OLAP_FIELD_TYPE_MAP) { + DCHECK(_sub_columns.size() == 2) << "MAP type has more than 2 children types."; + ColumnPB* child_key = column->add_children_columns(); + _sub_columns[0].to_schema_pb(child_key); + ColumnPB* child_val = column->add_children_columns(); + _sub_columns[1].to_schema_pb(child_val); + } } void TabletColumn::add_sub_column(TabletColumn& sub_column) { diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index aa2226a84ac0db..1415ad0297f93b 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -178,6 +178,14 @@ TypeInfoPtr get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) { } return create_static_type_info_ptr( get_array_type_info((FieldType)child_column->type(), iterations)); + } else if (UNLIKELY(type == OLAP_FIELD_TYPE_MAP)) { + segment_v2::ColumnMetaPB key_meta = column_meta_pb->children_columns(0); + TypeInfoPtr key_type_info = get_type_info(&key_meta); + segment_v2::ColumnMetaPB value_meta = column_meta_pb->children_columns(1); + TypeInfoPtr value_type_info = get_type_info(&value_meta); + + MapTypeInfo* map_type_info = new MapTypeInfo(std::move(key_type_info), std::move(value_type_info)); + return create_static_type_info_ptr(map_type_info); } else { return create_static_type_info_ptr(get_scalar_type_info(type)); } @@ -210,6 +218,13 @@ TypeInfoPtr get_type_info(const TabletColumn* col) { child_column = &child_column->get_sub_column(0); } return create_static_type_info_ptr(get_array_type_info(child_column->type(), iterations)); + } else if (UNLIKELY(type == OLAP_FIELD_TYPE_MAP)) { + const auto* key_column = &col->get_sub_column(0); + TypeInfoPtr key_type = get_type_info(key_column); + const auto* val_column = &col->get_sub_column(1); + TypeInfoPtr value_type = get_type_info(val_column); + MapTypeInfo* map_type_info = new MapTypeInfo(std::move(key_type), std::move(value_type)); + return create_static_type_info_ptr(map_type_info); } else { return create_static_type_info_ptr(get_scalar_type_info(type)); } @@ -218,6 +233,11 @@ TypeInfoPtr get_type_info(const TabletColumn* col) { TypeInfoPtr clone_type_info(const TypeInfo* type_info) { if (is_scalar_type(type_info->type())) { return create_static_type_info_ptr(type_info); + } else if (type_info->type() == OLAP_FIELD_TYPE_MAP) { + const auto map_type_info = dynamic_cast(type_info); + return create_dynamic_type_info_ptr( + new MapTypeInfo(clone_type_info(map_type_info->get_key_type_info()), + clone_type_info(map_type_info->get_value_type_info()))); } else { const auto array_type_info = dynamic_cast(type_info); return create_dynamic_type_info_ptr( diff --git a/be/src/olap/types.h b/be/src/olap/types.h index c580f2ea5319d0..5b4359071896b0 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -31,6 +31,7 @@ #include "olap/olap_define.h" #include "runtime/collection_value.h" #include "runtime/jsonb_value.h" +#include "runtime/map_value.h" #include "runtime/mem_pool.h" #include "util/jsonb_document.h" #include "util/jsonb_utils.h" @@ -430,6 +431,124 @@ class ArrayTypeInfo : public TypeInfo { TypeInfoPtr _item_type_info; const size_t _item_size; }; +///====================== MapType Info ==========================/// +class MapTypeInfo : public TypeInfo { +public: + explicit MapTypeInfo(TypeInfoPtr key_type_info, TypeInfoPtr value_type_info) + : _key_type_info(std::move(key_type_info)), _value_type_info(std::move(value_type_info)) {} + ~MapTypeInfo() override = default; + + inline bool equal(const void* left, const void* right) const override { + auto l_value = reinterpret_cast(left); + auto r_value = reinterpret_cast(right); + if (l_value->size() != r_value->size()) { + return false; + } + uint32_t size = l_value->size(); + for (size_t i = 0; i < size; ++i) { + if ((l_value->is_key_null_at(i) && r_value->is_key_null_at(i)) + && (l_value->is_val_null_at(i) && r_value->is_val_null_at(i))) { + continue; + } else { + return false; + } + } + return true; + } + + int cmp(const void* left, const void* right) const override { + auto l_value = reinterpret_cast(left); + auto r_value = reinterpret_cast(right); + uint32_t l_size = l_value->size(); + uint32_t r_size = r_value->size(); + size_t cur = 0; + while (cur < l_size && cur < r_size) { + if ((l_value->is_key_null_at(cur) && r_value->is_key_null_at(cur)) + && (l_value->is_val_null_at(cur) && r_value->is_val_null_at(cur))) { + ++cur; + } else { + return -1; + } + } + if (l_size < r_size) { + return -1; + } else if (l_size > r_size) { + return 1; + } else { + return 0; + } + } + + void shallow_copy(void* dest, const void* src) const override { + auto dest_value = reinterpret_cast(dest); + auto src_value = reinterpret_cast(src); + dest_value->shallow_copy(src_value); + } + + void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override { + DCHECK(false); + + } + + void copy_object(void* dest, const void* src, MemPool* mem_pool) const override { + deep_copy(dest, src, mem_pool); + } + + void direct_copy(void* dest, const void* src) const override { + CHECK(false); + } + + void direct_copy(uint8_t** base, void* dest, const void* src) const { + CHECK(false); + } + + void direct_copy_may_cut(void* dest, const void* src) const override { direct_copy(dest, src); } + + Status convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool, + size_t variable_len = 0) const override { + return Status::Error(); + } + + Status from_string(void* buf, const std::string& scan_key, const int precision = 0, + const int scale = 0) const override { + return Status::Error(); + } + + std::string to_string(const void* src) const override { + return "{}"; + } + + void set_to_max(void* buf) const override { + DCHECK(false) << "set_to_max of list is not implemented."; + } + + void set_to_min(void* buf) const override { + DCHECK(false) << "set_to_min of list is not implemented."; + } + + uint32_t hash_code(const void* data, uint32_t seed) const override { + auto map_value = reinterpret_cast(data); + auto size = map_value->size(); + uint32_t result = HashUtil::hash(&size, sizeof(size), seed); + result = seed * result + _key_type_info->hash_code( + map_value->key_data(), seed) + + _value_type_info->hash_code( + map_value->value_data(), seed); + return result; + } + + // todo . is here only to need return 16 for two ptr? + const size_t size() const override { return 16; } + + FieldType type() const override { return OLAP_FIELD_TYPE_MAP; } + + inline const TypeInfo* get_key_type_info() const { return _key_type_info.get(); } + inline const TypeInfo* get_value_type_info() const { return _value_type_info.get(); } + +private: + TypeInfoPtr _key_type_info; + TypeInfoPtr _value_type_info; +}; bool is_scalar_type(FieldType field_type); @@ -575,6 +694,10 @@ template <> struct CppTypeTraits { using CppType = CollectionValue; }; +template <> +struct CppTypeTraits { + using CppType = MapValue; +}; template struct BaseFieldtypeTraits : public CppTypeTraits { using CppType = typename CppTypeTraits::CppType; diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index 686c0e391520f9..ef96ae3e12b1d4 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -49,6 +49,7 @@ set(RUNTIME_FILES decimalv2_value.cpp large_int_value.cpp collection_value.cpp + map_value.cpp tuple.cpp tuple_row.cpp fragment_mgr.cpp diff --git a/be/src/runtime/map_value.cpp b/be/src/runtime/map_value.cpp new file mode 100644 index 00000000000000..16751b8cef315a --- /dev/null +++ b/be/src/runtime/map_value.cpp @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "map_value.h" + +namespace doris { + +///====================== map-value funcs ======================/// +void MapValue::to_map_val(MapVal* val) const { + val->length = _length; + val->key = _key_data; + val->value = _value_data; + val->key_null_signs = _key_null_signs; + val->value_null_signs = _val_null_signs; +} + +void MapValue::shallow_copy(const MapValue* value) { + _length = value->_length; + _key_null_signs = value->_key_null_signs; + _val_null_signs = value->_val_null_signs; + _key_data = value->_key_data; + _value_data = value->_value_data; +} + +void MapValue::copy_null_signs(const MapValue* other) { + // todo(amory): here need to judge? + memcpy(_key_null_signs, other->_key_null_signs, other->size()); + memcpy(_val_null_signs, other->_val_null_signs, other->size()); +} + +MapValue MapValue::from_map_val(const MapVal& val) { + return MapValue(val.key, val.value, val.length, val.key_null_signs, val.value_null_signs); +} + + +} // namespace doris diff --git a/be/src/runtime/map_value.h b/be/src/runtime/map_value.h new file mode 100644 index 00000000000000..50f4f1a2a99282 --- /dev/null +++ b/be/src/runtime/map_value.h @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include "runtime/primitive_type.h" + +namespace doris_udf { +class FunctionContext; +struct AnyVal; +} // namespace doris_udf + +namespace doris { + +using doris_udf::FunctionContext; +using doris_udf::AnyVal; + +/** + * MapValue is for map type in memory + */ +class MapValue { +public: + MapValue() = default; + + explicit MapValue(int32_t length) + : _key_data(nullptr), _value_data(nullptr), _length(length){} + + MapValue(void* k_data, void* v_data, int32_t length) + : _key_data(k_data), _value_data(v_data), _length(length) {} + + MapValue(void* k_data, void* v_data, int32_t length, bool* _null_signs, bool is_key_null_signs) + : _key_data(k_data), _value_data(v_data), _length(length) { + if (is_key_null_signs) { + _key_null_signs = _null_signs; + } else { + _val_null_signs = _null_signs; + } + } + + MapValue(void* k_data, void* v_data, int32_t length, bool* key_null_signs, bool* value_null_signs) + : _key_data(k_data), _value_data(v_data), _length(length), _key_null_signs(key_null_signs), _val_null_signs(value_null_signs) {} + + + void set_key_has_null(bool has_null) { _key_has_null = has_null; } + void set_val_has_null(bool has_null) { _val_has_null = has_null; } + bool is_key_null_at(int32_t index) const { return this->_key_has_null && this->_key_null_signs[index]; } + bool is_val_null_at(int32_t index) const { return this->_val_has_null && this->_val_null_signs[index]; } + + void to_map_val(MapVal* val) const; + + int32_t size() const { return _length; } + + int32_t length() const { return _length; } + + void shallow_copy(const MapValue* other); + + void copy_null_signs(const MapValue* other); + + static MapValue from_map_val(const MapVal& val); + + const void* key_data() const { return _key_data; } + void* mutable_key_data() const { return _key_data; } + const void* value_data() const { return _value_data; } + void* mutable_value_data() const { return _value_data; } + const bool* key_null_signs() const { return _key_null_signs; } + const bool* value_null_signs() const { return _val_null_signs; } + void set_key_null_signs(bool* null_signs) { _key_null_signs = null_signs; } + void set_value_null_signs(bool* null_signs) { _val_null_signs = null_signs; } + void set_length(int32_t length) { _length = length; } + void set_key(void* data) { _key_data = data; } + void set_value(void* data) { _value_data = data; } + +private: + // child column data + void* _key_data; + void* _value_data; + int32_t _length; + bool _key_has_null; + bool _val_has_null; + bool* _key_null_signs; + bool* _val_null_signs; + +};//map-value +} // namespace doris diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 779676a45d9780..3263fe55403fb8 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -21,6 +21,7 @@ #include "runtime/collection_value.h" #include "runtime/define_primitive_type.h" #include "runtime/jsonb_value.h" +#include "runtime/map_value.h" #include "runtime/string_value.h" namespace doris { @@ -53,6 +54,8 @@ PrimitiveType convert_type_to_primitive(FunctionContext::Type type) { return PrimitiveType::TYPE_BOOLEAN; case FunctionContext::Type::TYPE_ARRAY: return PrimitiveType::TYPE_ARRAY; + case FunctionContext::Type::TYPE_MAP: + return PrimitiveType::TYPE_MAP; case FunctionContext::Type::TYPE_OBJECT: return PrimitiveType::TYPE_OBJECT; case FunctionContext::Type::TYPE_HLL: @@ -262,6 +265,9 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) { case TPrimitiveType::ARRAY: return TYPE_ARRAY; + case TPrimitiveType::MAP: + return TYPE_MAP; + default: return INVALID_TYPE; } @@ -355,7 +361,8 @@ TPrimitiveType::type to_thrift(PrimitiveType ptype) { case TYPE_ARRAY: return TPrimitiveType::ARRAY; - + case TYPE_MAP: + return TPrimitiveType::MAP; default: return TPrimitiveType::INVALID_TYPE; } @@ -449,7 +456,8 @@ std::string type_to_string(PrimitiveType t) { case TYPE_ARRAY: return "ARRAY"; - + case TYPE_MAP: + return "MAP"; default: return ""; }; @@ -589,7 +597,8 @@ int get_slot_size(PrimitiveType type) { return sizeof(JsonBinaryValue); case TYPE_ARRAY: return sizeof(CollectionValue); - + case TYPE_MAP: + return sizeof(MapValue); case TYPE_NULL: case TYPE_BOOLEAN: case TYPE_TINYINT: diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index 37f49335698a6b..d75a9f51774c9e 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -78,15 +78,16 @@ TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) // ++(*idx); // children.push_back(TypeDescriptor(types, idx)); // break; - // case TTypeNodeType::MAP: - // DCHECK(!node.__isset.scalar_type); - // DCHECK_LT(*idx, types.size() - 2); - // type = TYPE_MAP; - // ++(*idx); - // children.push_back(TypeDescriptor(types, idx)); - // ++(*idx); - // children.push_back(TypeDescriptor(types, idx)); - // break; + case TTypeNodeType::MAP: { + DCHECK(!node.__isset.scalar_type); + DCHECK_LT(*idx, types.size() - 2); + type = TYPE_MAP; + ++(*idx); + children.push_back(TypeDescriptor(types, idx)); + ++(*idx); + children.push_back(TypeDescriptor(types, idx)); + break; + } default: DCHECK(false) << node.type; } @@ -131,7 +132,7 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const { } void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const { - DCHECK(!is_complex_type() || type == TYPE_ARRAY) + DCHECK(!is_complex_type() || type == TYPE_ARRAY || type == TYPE_MAP) << "Don't support complex type now, type=" << type; auto node = ptype->add_types(); node->set_type(TTypeNodeType::SCALAR); @@ -150,6 +151,11 @@ void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const { for (const TypeDescriptor& child : children) { child.to_protobuf(ptype); } + } else if (type == TYPE_MAP) { + node->set_type(TTypeNodeType::MAP); + for (const TypeDescriptor& child : children) { + child.to_protobuf(ptype); + } } } @@ -191,6 +197,14 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField"; return ss.str(); } + case TYPE_MAP: + ss << "MAP<" << children[0].debug_string() << ", " << children[1].debug_string() << ">"; + return ss.str(); default: return type_to_string(type); } diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index 49758f40d1efaf..30a1363898b337 100644 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -89,6 +89,7 @@ class FunctionContext { TYPE_DECIMALV2, TYPE_OBJECT, TYPE_ARRAY, + TYPE_MAP, TYPE_QUANTILE_STATE, TYPE_DATEV2, TYPE_DATETIMEV2, @@ -910,6 +911,30 @@ struct CollectionVal : public AnyVal { return val; } }; + +struct MapVal : public AnyVal { + void* key; + void* value; + uint64_t length; + // item has no null value if has_null is false. + // item ```may``` has null value if has_null is true. +// bool has_null; + // null bitmap + bool* key_null_signs; + bool* value_null_signs; + + MapVal() = default; + + MapVal(void* k, void* v, uint64_t length) + : key(k), value(v), length(length) {}; + + static MapVal null() { + MapVal val; + val.is_null = true; + return val; + } +}; + typedef uint8_t* BufferVal; } // namespace doris_udf @@ -927,6 +952,7 @@ using doris_udf::DateTimeVal; using doris_udf::HllVal; using doris_udf::FunctionContext; using doris_udf::CollectionVal; +using doris_udf::MapVal; using doris_udf::Decimal32Val; using doris_udf::Decimal64Val; using doris_udf::Decimal128Val; diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 6627e1b9c7abda..ef2ddc7ceec3e1 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -54,6 +54,7 @@ set(VEC_FILES columns/column_nullable.cpp columns/column_string.cpp columns/column_vector.cpp + columns/column_map.cpp columns/columns_common.cpp common/demangle.cpp common/exception.cpp @@ -85,6 +86,7 @@ set(VEC_FILES data_types/data_type_number_base.cpp data_types/data_type_string.cpp data_types/data_type_decimal.cpp + data_types/data_type_map.cpp data_types/get_least_supertype.cpp data_types/nested_utils.cpp data_types/data_type_date.cpp @@ -133,6 +135,7 @@ set(VEC_FILES exprs/vexpr_context.cpp exprs/vliteral.cpp exprs/varray_literal.cpp + exprs/vmap_literal.cpp exprs/vin_predicate.cpp exprs/vbloom_predicate.cpp exprs/vbitmap_predicate.cpp diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 6bd5ac7855f10c..354ff427689137 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -590,6 +590,8 @@ class IColumn : public COW { virtual bool is_column_array() const { return false; } + virtual bool is_column_map() const { return false; } + /// If the only value column can contain is NULL. /// Does not imply type of object, because it can be ColumnNullable(ColumnNothing) or ColumnConst(ColumnNullable(ColumnNothing)) virtual bool only_null() const { return false; } diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp new file mode 100644 index 00000000000000..6306c9d9b3c4c7 --- /dev/null +++ b/be/src/vec/columns/column_map.cpp @@ -0,0 +1,218 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnMap.cpp +// and modified by Doris + +#include "vec/columns/column_map.h" + +namespace doris::vectorized { + +/** A column of map values. + */ +std::string ColumnMap::get_name() const { + return "Map(" + keys->get_name() + ", " + values->get_name() + ")"; +} + +ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values) + : keys(std::move(keys)), values(std::move(values)) { + check_size(); +} + +ColumnArray::Offsets64& ColumnMap::get_offsets() const { + const ColumnArray & column_keys = assert_cast (get_keys()); + // todo . did here check size ? + return const_cast(column_keys.get_offsets()); +} + +void ColumnMap::check_size() const { + const auto * key_array = typeid_cast(keys.get()); + const auto * value_array = typeid_cast(values.get()); + CHECK(key_array) << "ColumnMap keys can be created only from array"; + CHECK(value_array) << "ColumnMap values can be created only from array"; + CHECK_EQ(get_keys_ptr()->size(), get_values_ptr()->size()); +} + +// todo. here to resize every row map +MutableColumnPtr ColumnMap::clone_resized(size_t to_size) const { + auto res = ColumnMap::create(keys->clone_resized(to_size), values->clone_resized(to_size)); + return res; +} + +// to support field functions +Field ColumnMap::operator[](size_t n) const { + Map res(2); + keys->get(n, res[0]); + values->get(n, res[0]); + + return res; +} + +// here to compare to below +void ColumnMap::get(size_t n, Field & res) const { + Map map(2); + keys->get(n, map[0]); + values->get(n, map[1]); + + res = map; +} + +StringRef ColumnMap::get_data_at(size_t n) const { + LOG(FATAL) << "Method get_data_at is not supported for " << get_name(); +} + +void ColumnMap::insert_data(const char*, size_t) { + LOG(FATAL) << "Method insert_data is not supported for " << get_name(); +} + +void ColumnMap::insert(const Field& x) { + const auto& map = doris::vectorized::get(x); + // ({}, {}, {}) + // ([], []) + CHECK_EQ(map.size(), 2); + keys->insert(map[0]); + values->insert(map[1]); +} + +void ColumnMap::insert_default() { + keys->insert_default(); + values->insert_default(); +} + +void ColumnMap::pop_back(size_t n) { + keys->pop_back(n); + values->pop_back(n); +} + +StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena & arena, char const*& begin) + const { + StringRef res(begin, 0); + auto keys_ref = keys->serialize_value_into_arena(n, arena, begin); + res.data = keys_ref.data - res.size; + res.size += keys_ref.size; + auto value_ref = values->serialize_value_into_arena(n, arena, begin); + res.data = value_ref.data - res.size; + res.size += value_ref.size; + + return res; +} + +void ColumnMap::insert_from(const IColumn& src_, size_t n) { + const ColumnMap& src = assert_cast(src_); + + if ((!get_keys().is_nullable() && src.get_keys().is_nullable()) + || (!get_values().is_nullable() && src.get_values().is_nullable())) { + DCHECK(false); + } else if ((get_keys().is_nullable() && !src.get_keys().is_nullable()) + || (get_values().is_nullable() && !src.get_values().is_nullable())) { + DCHECK(false); + } else { + keys->insert_from(*assert_cast(src_).keys, n); + values->insert_from(*assert_cast(src_).values, n); + } +} + +void ColumnMap::insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) { + for (auto x = indices_begin; x != indices_end; ++x) { + if (*x == -1) { + ColumnMap::insert_default(); + } else { + ColumnMap::insert_from(src, *x); + } + } +} + +const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) { + pos = keys->deserialize_and_insert_from_arena(pos); + pos = values->deserialize_and_insert_from_arena(pos); + + return pos; +} + +void ColumnMap::update_hash_with_value(size_t n, SipHash & hash) const { + keys->update_hash_with_value(n, hash); + values->update_hash_with_value(n, hash); +} + +void ColumnMap::insert_range_from(const IColumn& src, size_t start, size_t length) { + keys->insert_range_from(*assert_cast(src).keys, start, length); + values->insert_range_from(*assert_cast(src).values, start, length); +} + +ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t result_size_hint) const { + return ColumnMap::create(keys->filter(filt, result_size_hint), values->filter(filt, result_size_hint)); +} + +ColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const { + return ColumnMap::create(keys->permute(perm, limit), values->permute(perm, limit)); +} + +ColumnPtr ColumnMap::replicate(const Offsets& offsets) const { + return ColumnMap::create(keys->replicate(offsets), values->replicate(offsets)); +} + +//MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector& selector) const { +// +// MutableColumns keys_scatter = keys->scatter(num_columns, selector); +// MutableColumns values_scatter = values->scatter(num_columns, selector); +// +// MutableColumns res(num_columns); +// +// for (size_t scattered_idx = 0; scattered_idx < num_columns; ++scattered_idx) +// { +// MutableColumns new_columns(2); +// for (size_t map_element_idx = 0; map_element_idx < 2; ++map_element_idx) +// new_columns[map_element_idx] = std::move(scattered_map_elements[map_element_idx][scattered_idx]); +// res[scattered_idx] = ColumnMap::create(std::move(new_columns)); +// } +// +// +// return res; +//} + + +void ColumnMap::reserve(size_t n) { + get_keys().reserve(n); + get_values().reserve(n); +} + +size_t ColumnMap::byte_size() const { + return get_keys().byte_size() + get_values().byte_size(); +} + +size_t ColumnMap::allocated_bytes() const { + return get_keys().allocated_bytes() + get_values().allocated_bytes(); +} + +void ColumnMap::protect() { + get_keys().protect(); + get_values().protect(); +} + +void ColumnMap::get_extremes(Field & min, Field & max) const { + Map min_map(2); + Map max_map(2); + + keys->get_extremes(min_map[0], max_map[0]); + values->get_extremes(min_map[1], max_map[1]); + + min = min_map; + max = max_map; +} + +} \ No newline at end of file diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h new file mode 100644 index 00000000000000..1bb95b352b9376 --- /dev/null +++ b/be/src/vec/columns/column_map.h @@ -0,0 +1,128 @@ +#pragma once + +#include "vec/columns/column_array.h" +#include "vec/columns/column.h" +#include "vec/columns/column_impl.h" +#include "vec/common/arena.h" +#include "vec/core/field.h" +#include "vec/core/types.h" + +namespace doris::vectorized { + + +/** A column of map values. + */ +class ColumnMap final : public COWHelper { + +public: + /** Create immutable column using immutable arguments. This arguments may be shared with other columns. + * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. + */ + using Base = COWHelper; + + static Ptr create(const ColumnPtr& keys, const ColumnPtr& values) { + return ColumnMap::create(keys->assume_mutable(), values->assume_mutable()); + } + + template ::value>::type> + static MutablePtr create(Args&&... args) { + return Base::create(std::forward(args)...); + } + + std::string get_name() const override; + const char * get_family_name() const override { return "Map"; } + TypeIndex get_data_type() const { return TypeIndex::Map; } + + void for_each_subcolumn(ColumnCallback callback) override { + callback(keys); + callback(values); + } + + MutableColumnPtr clone_resized(size_t size) const override; + + bool can_be_inside_nullable() const override { return true; } + size_t size() const override { return keys->size(); } + Field operator[](size_t n) const override; + void get(size_t n, Field & res) const override; + StringRef get_data_at(size_t n) const override; + + void insert_data(const char* pos, size_t length) override; + void insert_range_from(const IColumn& src, size_t start, size_t length) override; + void insert_from(const IColumn& src_, size_t n) override; + void insert(const Field & x) override; + void insert_default() override; + + void pop_back(size_t n) override; + + StringRef serialize_value_into_arena(size_t n, Arena & arena, char const *& begin) const override; + const char * deserialize_and_insert_from_arena(const char * pos) override; + + void update_hash_with_value(size_t n, SipHash & hash) const override; + + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; + ColumnPtr permute(const Permutation & perm, size_t limit) const override; + ColumnPtr replicate(const Offsets & offsets) const override; + MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override { + return scatter_impl(num_columns, selector); + } + void get_extremes(Field & min, Field & max) const override; + [[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs_, + int nan_direction_hint) const override { + LOG(FATAL) << "compare_at not implemented"; + } + void get_permutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override { + LOG(FATAL) << "get_permutation not implemented"; + } + void insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) override; + + void append_data_by_selector(MutableColumnPtr& res, + const IColumn::Selector& selector) const override { + return append_data_by_selector_impl(res, selector); + } + + + void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override { + LOG(FATAL) << "replace_column_data not implemented"; + } + void replace_column_data_default(size_t self_row = 0) override { + LOG(FATAL) << "replace_column_data_default not implemented"; + } + void check_size() const; + ColumnArray::Offsets64& get_offsets() const; + void reserve(size_t n) override; + size_t byte_size() const override; + size_t allocated_bytes() const override; + void protect() override; + + /******************** keys and values ***************/ + const ColumnPtr& get_keys_ptr() const { return keys; } + ColumnPtr& get_keys_ptr() { return keys; } + + const IColumn& get_keys() const { return *keys; } + IColumn& get_keys() { return *keys; } + + const ColumnPtr& get_values_ptr() const { return values; } + ColumnPtr& get_values_ptr() { return values; } + + const IColumn& get_values() const { return *values; } + IColumn& get_values() { return *values; } + +private: + friend class COWHelper; + + WrappedPtr keys; // nullable + WrappedPtr values; // nullable + + size_t ALWAYS_INLINE offset_at(ssize_t i) const { return get_offsets()[i - 1]; } + size_t ALWAYS_INLINE size_at(ssize_t i) const { + return get_offsets()[i] - get_offsets()[i - 1]; + } + + explicit ColumnMap(MutableColumnPtr && keys, MutableColumnPtr && values); + + ColumnMap(const ColumnMap &) = default; +}; + +} \ No newline at end of file diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index b5155dc55f980c..7af160b3b22483 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -85,6 +85,7 @@ using FieldVector = std::vector; DEFINE_FIELD_VECTOR(Array); DEFINE_FIELD_VECTOR(Tuple); +DEFINE_FIELD_VECTOR(Map); #undef DEFINE_FIELD_VECTOR @@ -308,6 +309,7 @@ class Field { AggregateFunctionState = 22, JSONB = 23, Decimal128I = 24, + Map = 25, }; static const int MIN_NON_POD = 16; @@ -334,6 +336,8 @@ class Field { return "Array"; case Tuple: return "Tuple"; + case Map: + return "Map"; case Decimal32: return "Decimal32"; case Decimal64: @@ -505,6 +509,8 @@ class Field { return get() < rhs.get(); case Types::Tuple: return get() < rhs.get(); + case Types::Map: + return get() < rhs.get(); case Types::Decimal32: return get>() < rhs.get>(); case Types::Decimal64: @@ -550,6 +556,8 @@ class Field { return get() <= rhs.get(); case Types::Tuple: return get() <= rhs.get(); + case Types::Map: + return get() < rhs.get(); case Types::Decimal32: return get>() <= rhs.get>(); case Types::Decimal64: @@ -587,6 +595,8 @@ class Field { return get() == rhs.get(); case Types::Tuple: return get() == rhs.get(); + case Types::Map: + return get() < rhs.get(); case Types::UInt128: return get() == rhs.get(); case Types::Int128: @@ -677,6 +687,9 @@ class Field { case Types::Tuple: f(field.template get()); return; + case Types::Map: + f(field.template get()); + return; case Types::Decimal32: f(field.template get>()); return; @@ -749,6 +762,9 @@ class Field { case Types::Tuple: destroy(); break; + case Types::Map: + destroy(); + break; case Types::AggregateFunctionState: destroy(); break; @@ -810,6 +826,10 @@ struct Field::TypeToEnum { static const Types::Which value = Types::Tuple; }; template <> +struct Field::TypeToEnum { + static const Types::Which value = Types::Map; +}; +template <> struct Field::TypeToEnum> { static const Types::Which value = Types::Decimal32; }; @@ -871,6 +891,10 @@ struct Field::EnumToType { using Type = Tuple; }; template <> +struct Field::EnumToType { + using Type = Map; +}; +template <> struct Field::EnumToType { using Type = DecimalField; }; @@ -920,6 +944,10 @@ struct TypeName { static std::string get() { return "Tuple"; } }; template <> +struct TypeName { + static std::string get() { return "Map"; } +}; +template <> struct TypeName { static std::string get() { return "AggregateFunctionState"; } }; @@ -1047,6 +1075,10 @@ struct NearestFieldTypeImpl { using Type = Tuple; }; template <> +struct NearestFieldTypeImpl { + using Type = Map; +}; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index 7636d714b398f1..40d742617b60c3 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -80,6 +80,7 @@ enum class TypeIndex { FixedLengthObject, JSONB, Decimal128I, + Map, }; struct Consted { @@ -505,6 +506,8 @@ inline const char* getTypeName(TypeIndex idx) { return "Array"; case TypeIndex::Tuple: return "Tuple"; + case TypeIndex::Map: + return "Map"; case TypeIndex::Set: return "Set"; case TypeIndex::Interval: diff --git a/be/src/vec/data_types/data_type.cpp b/be/src/vec/data_types/data_type.cpp index 7a0d67d4a41c68..ce4f25614a76a4 100644 --- a/be/src/vec/data_types/data_type.cpp +++ b/be/src/vec/data_types/data_type.cpp @@ -149,6 +149,8 @@ PGenericType_TypeId IDataType::get_pdata_type(const IDataType* data_type) { return PGenericType::FIXEDLENGTHOBJECT; case TypeIndex::JSONB: return PGenericType::JSONB; + case TypeIndex::Map: + return PGenericType::MAP; default: return PGenericType::UNKNOWN; } diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h index e5f0e6aa4cd639..5e338b96589cd0 100644 --- a/be/src/vec/data_types/data_type.h +++ b/be/src/vec/data_types/data_type.h @@ -314,6 +314,7 @@ struct WhichDataType { bool is_uuid() const { return idx == TypeIndex::UUID; } bool is_array() const { return idx == TypeIndex::Array; } bool is_tuple() const { return idx == TypeIndex::Tuple; } + bool is_map() const { return idx == TypeIndex::Map; } bool is_set() const { return idx == TypeIndex::Set; } bool is_interval() const { return idx == TypeIndex::Interval; } @@ -355,7 +356,9 @@ inline bool is_tuple(const DataTypePtr& data_type) { inline bool is_array(const DataTypePtr& data_type) { return WhichDataType(data_type).is_array(); } - +inline bool is_map(const DataTypePtr& data_type) { + return WhichDataType(data_type).is_map(); +} inline bool is_nothing(const DataTypePtr& data_type) { return WhichDataType(data_type).is_nothing(); } diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index e622d979d56969..cf661bb85a6c9a 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -19,7 +19,6 @@ // and modified by Doris #include "vec/data_types/data_type_factory.hpp" - #include "data_type_time.h" namespace doris::vectorized { @@ -45,6 +44,10 @@ DataTypePtr DataTypeFactory::create_data_type(const TabletColumn& col_desc, bool if (col_desc.type() == OLAP_FIELD_TYPE_ARRAY) { DCHECK(col_desc.get_subtype_count() == 1); nested = std::make_shared(create_data_type(col_desc.get_sub_column(0))); + } else if (col_desc.type() == OLAP_FIELD_TYPE_MAP) { + DCHECK(col_desc.get_subtype_count() == 2); + nested = std::make_shared(create_data_type(col_desc.get_sub_column(0)), + create_data_type(col_desc.get_sub_column(1))); } else { nested = _create_primitive_data_type(col_desc.type(), col_desc.precision(), col_desc.frac()); @@ -94,8 +97,8 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo break; case TYPE_TIME: case TYPE_TIMEV2: - nested = std::make_shared(); - break; + nested = std::make_shared(); + break; case TYPE_DOUBLE: nested = std::make_shared(); break; @@ -131,6 +134,12 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo nested = std::make_shared( create_data_type(col_desc.children[0], col_desc.contains_null)); break; + case TYPE_MAP: + DCHECK(col_desc.children.size() == 2); + nested = std::make_shared( + create_data_type(col_desc.children[0], col_desc.contains_null), + create_data_type(col_desc.children[1], col_desc.contains_null)); + break; case INVALID_TYPE: default: DCHECK(false) << "invalid PrimitiveType:" << (int)col_desc.type; @@ -299,6 +308,12 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) { case PGenericType::FIXEDLENGTHOBJECT: nested = std::make_shared(); break; + case PGenericType::MAP: + DCHECK(pcolumn.children_size() == 2); + // here to check pcolumn is list? + nested = std::make_shared(create_data_type(pcolumn.children(0).children(0)), + create_data_type(pcolumn.children(1).children(0))); + break; default: { LOG(FATAL) << fmt::format("Unknown data type: {}", pcolumn.type()); return nullptr; @@ -368,6 +383,12 @@ DataTypePtr DataTypeFactory::create_data_type(const arrow::DataType* type, bool nested = std::make_shared( create_data_type(type->field(0)->type().get(), true)); break; + case ::arrow::Type::MAP: + DCHECK(type->num_fields() == 2); + nested = std::make_shared( + create_data_type(type->field(0)->type().get(), true), + create_data_type(type->field(1)->type().get(), true)); + break; default: DCHECK(false) << "invalid arrow type:" << (int)(type->id()); break; diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp index ed270b40eaa830..5b698bdfd0ad90 100644 --- a/be/src/vec/data_types/data_type_factory.hpp +++ b/be/src/vec/data_types/data_type_factory.hpp @@ -33,10 +33,12 @@ #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" +#include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_fixed_length_object.h" #include "vec/data_types/data_type_hll.h" #include "vec/data_types/data_type_jsonb.h" +#include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_nothing.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" @@ -123,7 +125,7 @@ class DataTypeFactory { DataTypePtr create_data_type(const TTypeDesc& raw_type) { return create_data_type(TypeDescriptor::from_thrift(raw_type), raw_type.is_nullable); } - + DataTypePtr create_data_type(const FieldType& type, int precision, int scale) { return _create_primitive_data_type(type, precision, scale); } diff --git a/be/src/vec/data_types/data_type_map.cpp b/be/src/vec/data_types/data_type_map.cpp new file mode 100644 index 00000000000000..e9f8ae7d9d3853 --- /dev/null +++ b/be/src/vec/data_types/data_type_map.cpp @@ -0,0 +1,157 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "data_type_map.h" + +#include "gen_cpp/data.pb.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/columns/column_array.h" +#include "vec/columns/column_map.h" +#include "vec/common/assert_cast.h" + +namespace doris::vectorized { + +DataTypeMap::DataTypeMap(const DataTypePtr& keys_, const DataTypePtr& values_) +{ + key_type = keys_; + value_type = values_; + + keys = std::make_shared(key_type); + values = std::make_shared(value_type); +} + +std::string DataTypeMap::to_string(const IColumn& column, size_t row_num) const { + const ColumnMap & map_column = assert_cast(column); + const ColumnArray::Offsets64& offsets = map_column.get_offsets(); + + size_t offset = offsets[row_num - 1]; + size_t next_offset = offsets[row_num]; + + const IColumn & nested_keys = map_column.get_keys(); + const IColumn & nested_values = map_column.get_values(); + + std::stringstream ss; + ss << "{"; + for (size_t i = offset; i < next_offset; ++i) + { + if (i != offset) + ss << ", "; + ss << "'" << key_type->to_string(nested_keys, i); + ss << ':'; + ss << "'" << value_type->to_string(nested_values, i); + } + ss << "}"; + return ss.str(); +} + +void DataTypeMap::to_string(const class doris::vectorized::IColumn& column, size_t row_num, + class doris::vectorized::BufferWritable& ostr) const { + const ColumnMap & map_column = assert_cast(column); + const ColumnArray::Offsets64& offsets = map_column.get_offsets(); + + size_t offset = offsets[row_num - 1]; + size_t next_offset = offsets[row_num]; + + const IColumn & nested_keys = map_column.get_keys(); + const IColumn & nested_values = map_column.get_values(); + + ostr.write("{", 1); + for (size_t i = offset; i < next_offset; ++i) + { + if (i != offset) + ostr.write(", ", 2); + key_type->to_string(nested_keys, i, ostr); + ostr.write(":", 1); + value_type->to_string(nested_values, i, ostr); + } + ostr.write("}", 1); +} + +Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* column) const { + DCHECK(!rb.eof()); + // only support one level now + auto* map_column = assert_cast(column); + // IColumn& nested_column = array_column->get_data(); + if (*rb.position() != '{') { + return Status::InvalidArgument("map does not start with '{' character, found '{}'", + *rb.position()); + } + keys->from_string(rb, &map_column->get_keys()); + values->from_string(rb, &map_column->get_values()); + if (*(rb.end() - 1) != '}') { + return Status::InvalidArgument("map does not end with '}' character, found '{}'", + *(rb.end() - 1)); + } +// keys->deserializeAsTextQuoted(extractElementColumn(column, 0), istr, settings); +// assertChar(',', istr); +// values->deserializeAsTextQuoted(extractElementColumn(column, 1), istr, settings); +// assertChar('}', istr); + return Status::OK(); +} + +MutableColumnPtr DataTypeMap::create_column() const { + return ColumnMap::create(keys->create_column(), values->create_column()); +} + +void DataTypeMap::to_pb_column_meta(PColumnMeta* col_meta) const { + IDataType::to_pb_column_meta(col_meta); + auto key_children = col_meta->add_children(); + auto value_children = col_meta->add_children(); + keys->to_pb_column_meta(key_children); + values->to_pb_column_meta(value_children); +} + +bool DataTypeMap::equals(const IDataType& rhs) const { + if (typeid(rhs) != typeid(*this)) + return false; + + const DataTypeMap & rhs_map = static_cast(rhs); + + if (!keys->equals(*rhs_map.keys)) + return false; + + if (!values->equals(*rhs_map.values)) + return false; + + return true; +} + +int64_t DataTypeMap::get_uncompressed_serialized_bytes(const IColumn& column, + int data_version) const { + auto ptr = column.convert_to_full_column_if_const(); + const auto& data_column = assert_cast(*ptr.get()); + return get_keys()->get_uncompressed_serialized_bytes(data_column.get_keys(), data_version) + + get_values()->get_uncompressed_serialized_bytes(data_column.get_values(), data_version); +} + +// serialize to binary +char* DataTypeMap::serialize(const IColumn& column, char* buf, int data_version) const { + auto ptr = column.convert_to_full_column_if_const(); + const auto& map_column = assert_cast(*ptr.get()); + + + buf = get_keys()->serialize(map_column.get_keys(), buf, data_version); + return get_values()->serialize(map_column.get_values(), buf, data_version); +} + +const char* DataTypeMap::deserialize(const char* buf, IColumn* column, int data_version) const { + const auto* map_column = assert_cast(column); + buf = get_keys()->deserialize(buf, map_column->get_keys_ptr()->assume_mutable(), data_version); + return get_values()->deserialize(buf, map_column->get_values_ptr()->assume_mutable(), data_version); +} + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/data_types/data_type_map.h b/be/src/vec/data_types/data_type_map.h new file mode 100644 index 00000000000000..2a7c06bdc3d2a4 --- /dev/null +++ b/be/src/vec/data_types/data_type_map.h @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeMap.h +// and modified by Doris + +#pragma once + +#include "vec/data_types/data_type.h" + +namespace doris::vectorized { +/** Map data type. + * + * Map's key and value only have types. + * If only one type is set, then key's type is "String" in default. + */ +class DataTypeMap final : public IDataType +{ +private: + DataTypePtr key_type; + DataTypePtr value_type; + DataTypePtr keys; // array + DataTypePtr values; // array + +public: + static constexpr bool is_parametric = true; + + DataTypeMap(const DataTypePtr& keys_, const DataTypePtr& values_); + + TypeIndex get_type_id() const override { return TypeIndex::Map; } + std::string do_get_name() const override { return "Map(" + key_type->get_name() + ", " + value_type->get_name()+ ")"; } + const char * get_family_name() const override { return "Map"; } + + bool can_be_inside_nullable() const override { return true; } + MutableColumnPtr create_column() const override; + Field get_default() const override { return Map(); }; + bool equals(const IDataType& rhs) const override; + bool get_is_parametric() const override { return true; } + bool have_subtypes() const override { return true; } + bool is_comparable() const override { return key_type->is_comparable() && value_type->is_comparable(); } + bool can_be_compared_with_collation() const override { return false; } + bool is_value_unambiguously_represented_in_contiguous_memory_region() const override { + return true; + } + + + const DataTypePtr& get_keys() const { return keys; } + const DataTypePtr& get_values() const { return values; } + + const DataTypePtr & get_key_type() const { return key_type; } + const DataTypePtr & get_value_type() const { return value_type; } + + int64_t get_uncompressed_serialized_bytes(const IColumn& column, + int be_exec_version) const override; + char* serialize(const IColumn& column, char* buf, int be_exec_version) const override; + const char* deserialize(const char* buf, IColumn* column, int be_exec_version) const override; + + void to_pb_column_meta(PColumnMeta* col_meta) const override; + + std::string to_string(const IColumn& column, size_t row_num) const override; + void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; + Status from_string(ReadBuffer& rb, IColumn* column) const override; + +}; + +} \ No newline at end of file diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 9ad3ccc7e85580..a051b240eebc57 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -29,6 +29,7 @@ #include "vec/exprs/vcase_expr.h" #include "vec/exprs/vcast_expr.h" #include "vec/exprs/vcompound_pred.h" +#include "vec/exprs/vmap_literal.h" #include "vec/exprs/vectorized_fn_call.h" #include "vec/exprs/vin_predicate.h" #include "vec/exprs/vinfo_func.h" @@ -124,6 +125,10 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const doris::TExprNode& texpr *expr = pool->add(new VArrayLiteral(texpr_node)); return Status::OK(); } + case TExprNodeType::MAP_LITERAL: { + *expr = pool->add(new VMapLiteral(texpr_node)); + return Status::OK(); + } case doris::TExprNodeType::SLOT_REF: { *expr = pool->add(new VSlotRef(texpr_node)); break; diff --git a/be/src/vec/exprs/vmap_literal.cpp b/be/src/vec/exprs/vmap_literal.cpp new file mode 100644 index 00000000000000..02d4ce71f58656 --- /dev/null +++ b/be/src/vec/exprs/vmap_literal.cpp @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exprs/vmap_literal.h" + +//insert into table_map values ({'name':'zhangsan', 'gender':'male'}), ({'name':'lisi', 'gender':'female'}); +namespace doris::vectorized { + +Status VMapLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, + VExprContext* context) { + DCHECK_EQ(type().children.size(), 2) << "map children type not 2"; + + RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); + // map-field should contain two vector field for keys and values + Field map = Map(); + Field keys = Array(); + Field values = Array(); + // each child is slot with key1, value1, key2, value2... + for (int idx = 0; idx < _children.size(); ++idx ) { + Field item; + ColumnPtrWrapper* const_col_wrapper = nullptr; + RETURN_IF_ERROR(_children[idx]->get_const_col(context, &const_col_wrapper)); + const_col_wrapper->column_ptr->get(0, item); + + if ((idx & 1) == 0) + keys.get().push_back(item); + else + values.get().push_back(item); + } + map.get().push_back(keys); + map.get().push_back(values); + + _column_ptr = _data_type->create_column_const(1, map); + return Status::OK(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/vmap_literal.h b/be/src/vec/exprs/vmap_literal.h new file mode 100644 index 00000000000000..a3c45ffb2f6def --- /dev/null +++ b/be/src/vec/exprs/vmap_literal.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include "vec/exprs/vliteral.h" + + +namespace doris { + +namespace vectorized { +class VMapLiteral : public VLiteral { +public: + VMapLiteral(const TExprNode& node) : VLiteral(node, false) {} + ~VMapLiteral() override = default; + Status prepare(RuntimeState* state, const RowDescriptor& row_desc, + VExprContext* context) override; +}; +} // namespace vectorized + +} // namespace doris diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index e9b4c8fd24b41b..dc8521c9be6397 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -54,7 +54,6 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co case FieldType::OLAP_FIELD_TYPE_CHAR: { return std::make_unique(column.length()); } - case FieldType::OLAP_FIELD_TYPE_MAP: case FieldType::OLAP_FIELD_TYPE_VARCHAR: { return std::make_unique(false); } @@ -119,6 +118,18 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co return std::make_unique( create_olap_column_data_convertor(sub_column)); } + case FieldType::OLAP_FIELD_TYPE_MAP: { + const auto& key_column = column.get_sub_column(0); + const auto& value_column = column.get_sub_column(1); + return std::make_unique( + std::make_unique(create_olap_column_data_convertor(key_column)), + std::make_unique(create_olap_column_data_convertor(value_column))); + //const auto& key_column = column.get_sub_column(0); + //const auto& value_column = column.get_sub_column(1); + //return std::make_unique( + // create_olap_column_data_convertor(key_column), + // create_olap_column_data_convertor(value_column)); + } default: { DCHECK(false) << "Invalid type in RowBlockV2:" << column.type(); return nullptr; @@ -712,4 +723,103 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorArray::convert_to_olap( return Status::OK(); } +Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap() { + const ColumnMap* column_map = nullptr; + const DataTypeMap* data_type_map = nullptr; + if (_nullmap) { + const auto* nullable_column = + assert_cast(_typed_column.column.get()); + column_map = + assert_cast(nullable_column->get_nested_column_ptr().get()); + data_type_map = assert_cast( + (assert_cast(_typed_column.type.get())->get_nested_type()) + .get()); + } else { + column_map = assert_cast(_typed_column.column.get()); + data_type_map = assert_cast(_typed_column.type.get()); + } + assert(column_map); + assert(data_type_map); + + return convert_to_olap(_nullmap, column_map, data_type_map); +} + +Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( + const UInt8* null_map, const ColumnMap* column_map, + const DataTypeMap* data_type_map) { + const UInt8* key_null_map = nullptr; + const UInt8* value_null_map = nullptr; + + ColumnPtr key_data = column_map->get_keys_ptr(); + ColumnPtr value_data = column_map->get_values_ptr(); + if (column_map->get_keys().is_nullable()) { + const auto& key_nullable_column = + assert_cast(column_map->get_keys()); + key_null_map = key_nullable_column.get_null_map_data().data(); + key_data = key_nullable_column.get_nested_column_ptr(); + } + + if (column_map->get_values().is_nullable()) { + const auto& val_nullable_column = + assert_cast(column_map->get_values()); + value_null_map = val_nullable_column.get_null_map_data().data(); + value_data = val_nullable_column.get_nested_column_ptr(); + } + + const auto& offsets = column_map->get_offsets(); // use keys offsets + int64_t start_index = _row_pos - 1; + // int64_t end_index = _row_pos + _num_rows - 1; + // auto start = offsets[start_index]; + // auto size = offsets[end_index] - start; + + ColumnWithTypeAndName key_typed_column = { + key_data, remove_nullable(data_type_map->get_keys()),""}; + _key_convertor->set_source_column(key_typed_column, _row_pos, _num_rows); + _key_convertor->convert_to_olap(); + + ColumnWithTypeAndName value_typed_column = { + value_data, remove_nullable(data_type_map->get_values()), ""}; + _value_convertor->set_source_column(value_typed_column, _row_pos, _num_rows); + _value_convertor->convert_to_olap(); + + MapValue* map_value = _values.data(); + for (size_t i = 0; i < _num_rows; ++i, ++map_value) { + int64_t cur_pos = _row_pos + i; + int64_t prev_pos = cur_pos - 1; + if (_nullmap && _nullmap[cur_pos]) { + continue; + } + auto offset = offsets[prev_pos]; + auto single_map_size = offsets[cur_pos] - offsets[prev_pos]; + new (map_value) MapValue(single_map_size); + + if (single_map_size == 0) { + continue; + } + + if (column_map->get_keys().is_nullable()) { + map_value->set_key_has_null(true); + map_value->set_key_null_signs( + const_cast(reinterpret_cast(key_null_map + offset))); + } else { + map_value->set_key_has_null(false); + } + if (column_map->get_values().is_nullable()) { + map_value->set_val_has_null(true); + map_value->set_value_null_signs( + const_cast(reinterpret_cast(value_null_map + offset))); + } else { + map_value->set_val_has_null(false); + } + + map_value->set_key( + const_cast(_key_convertor->get_data_at(offset - offsets[start_index]))); + map_value->set_value( + const_cast(_value_convertor->get_data_at(offset - offsets[start_index]))); + } + + return Status::OK(); +} + + } // namespace doris::vectorized diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 6898b44a9d31e0..4888d42237fffd 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -20,8 +20,10 @@ #include "olap/types.h" #include "runtime/mem_pool.h" #include "vec/columns/column_nullable.h" +#include "vec/columns/column_map.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/types.h" +#include "vec/data_types/data_type_map.h" namespace doris { @@ -373,6 +375,23 @@ class OlapBlockDataConvertor { OlapColumnDataConvertorBaseUPtr _item_convertor; }; + class OlapColumnDataConvertorMap + : public OlapColumnDataConvertorPaddedPODArray { + public: + OlapColumnDataConvertorMap(OlapColumnDataConvertorBaseUPtr key_convertor, + OlapColumnDataConvertorBaseUPtr value_convertor) + : _key_convertor(std::move(key_convertor)), _value_convertor(std::move(value_convertor)) {} + + Status convert_to_olap() override; + + private: + Status convert_to_olap(const UInt8* null_map, const ColumnMap* column_map, + const DataTypeMap* data_type_map); + OlapColumnDataConvertorBaseUPtr _key_convertor; + OlapColumnDataConvertorBaseUPtr _value_convertor; + };//OlapColumnDataConvertorMap + + private: std::vector _convertors; }; diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 77260ba95fb73a..4af64f0a6ad44c 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -617,7 +617,7 @@ terminal String KW_MTMV, KW_TYPECAST; -terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT; +terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, LBRACE, RBRACE, DIVIDE, MOD, ADD, SUBTRACT; terminal BITAND, BITOR, BITXOR, BITNOT; terminal EQUAL, NOT, LESSTHAN, GREATERTHAN, SET_VAR; terminal COMMENTED_PLAN_HINT_START, COMMENTED_PLAN_HINT_END; @@ -688,7 +688,7 @@ nonterminal SelectList select_clause, select_list, select_sublist; nonterminal SelectListItem select_list_item, star_expr; nonterminal Expr expr, non_pred_expr, arithmetic_expr, timestamp_arithmetic_expr, expr_or_default; nonterminal Expr set_expr_or_default; -nonterminal ArrayList expr_list, values, row_value, opt_values; +nonterminal ArrayList expr_list, values, row_value, opt_values, kv_list; nonterminal ArrayList func_arg_list; nonterminal ArrayList expr_pipe_list; nonterminal String select_alias, opt_table_alias, lock_alias, opt_alias; @@ -726,6 +726,7 @@ nonterminal ArrayList case_when_clause_list; nonterminal FunctionParams function_params; nonterminal Expr function_call_expr, array_expr; nonterminal ArrayLiteral array_literal; +nonterminal MapLiteral map_literal; nonterminal StructField struct_field; nonterminal ArrayList struct_field_list; nonterminal AnalyticWindow opt_window_clause; @@ -927,6 +928,7 @@ precedence left KW_PARTITION; precedence left KW_PARTITIONS; precedence right KW_TEMPORARY; precedence right LBRACKET; +precedence right LBRACE; precedence left KW_ENGINE; // unused @@ -5824,6 +5826,33 @@ array_expr ::= :} ; +kv_list ::= + expr:k COLON expr:v + {: + ArrayList list = new ArrayList(); + list.add(k); + list.add(v); + RESULT = list ; + :} + |kv_list:list COMMA expr:k COLON expr:v + {: + list.add(k); + list.add(v); + RESULT = list; + :} + ; + +map_literal ::= + LBRACE RBRACE + {: + RESULT = new MapLiteral(); + :} + | LBRACE kv_list:list RBRACE + {: + RESULT = new MapLiteral(list.toArray(new LiteralExpr[0])); + :} + ; + struct_field ::= ident:name COLON type:type {: RESULT = new StructField(name, type); :} @@ -5863,6 +5892,8 @@ non_pred_expr ::= {: RESULT = a; :} | array_literal:a {: RESULT = a; :} + | map_literal:a + {: RESULT = a; :} | function_call_expr:e {: RESULT = e; :} | KW_DATE STRING_LITERAL:l diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java index ab5bc992f8b079..309bf0bc180504 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java @@ -318,6 +318,11 @@ public void analyze() throws AnalysisException { type, Function.NullableMode.ALWAYS_NULLABLE, Lists.newArrayList(Type.VARCHAR), false, "doris::CastFunctions::cast_to_array_val", null, null, true); + } else if (type.isMapType()) { + fn = ScalarFunction.createBuiltin(getFnName(Type.MAP), + type, Function.NullableMode.ALWAYS_NULLABLE, + Lists.newArrayList(Type.VARCHAR), false, + "doris::CastFunctions::cast_to_map_val", null, null, true); } if (fn == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 9c29c7ffee9a5b..82171ad6ee293d 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -1319,7 +1319,7 @@ public void checkReturnsBool(String name, boolean printExpr) throws AnalysisExce } public Expr checkTypeCompatibility(Type targetType) throws AnalysisException { - if (targetType.getPrimitiveType() != PrimitiveType.ARRAY + if (targetType.getPrimitiveType() != PrimitiveType.ARRAY && targetType.getPrimitiveType() != PrimitiveType.MAP && targetType.getPrimitiveType() == type.getPrimitiveType()) { if (targetType.isDecimalV2() && type.isDecimalV2()) { return this; @@ -1785,6 +1785,7 @@ enum ExprSerCode { CAST_EXPR(14), JSON_LITERAL(15), ARITHMETIC_EXPR(16); + MAP_LITERAL(17); private static Map codeMap = Maps.newHashMap(); @@ -1836,7 +1837,9 @@ public static void writeTo(Expr expr, DataOutput output) throws IOException { output.writeInt(ExprSerCode.FUNCTION_CALL.getCode()); } else if (expr instanceof ArrayLiteral) { output.writeInt(ExprSerCode.ARRAY_LITERAL.getCode()); - } else if (expr instanceof CastExpr) { + } else if (expr instanceof MapLiteral) { + output.writeInt(ExprSerCode.MAP_LITERAL.getCode()); + } else if (expr instanceof CastExpr) { output.writeInt(ExprSerCode.CAST_EXPR.getCode()); } else if (expr instanceof ArithmeticExpr) { output.writeInt(ExprSerCode.ARITHMETIC_EXPR.getCode()); @@ -1885,6 +1888,8 @@ public static Expr readIn(DataInput in) throws IOException { return FunctionCallExpr.read(in); case ARRAY_LITERAL: return ArrayLiteral.read(in); + case MAP_LITERAL: + return MapLiteral.read(in); case CAST_EXPR: return CastExpr.read(in); case ARITHMETIC_EXPR: diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java new file mode 100644 index 00000000000000..f7aa6aca7dc15a --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.catalog.MapType; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.thrift.TExprNode; +import org.apache.doris.thrift.TExprNodeType; +import org.apache.doris.thrift.TTypeDesc; +import org.apache.doris.thrift.TTypeNode; + +import org.apache.commons.lang.StringUtils; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + + +// INSERT INTO table_map VALUES ({'key1':1, 'key2':10, 'k3':100}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); +// MapLiteral is one row-based literal +public class MapLiteral extends LiteralExpr { + + public MapLiteral() { + type = new MapType(Type.NULL, Type.NULL); + children = new ArrayList<>(); + } + + public MapLiteral(LiteralExpr... exprs) throws AnalysisException { + Type keyType = Type.NULL; + Type valueType = Type.NULL; + children = new ArrayList<>(); + int idx = 0; + for (LiteralExpr expr : exprs) { + if (idx % 2 == 0) { + if (keyType == Type.NULL) { + keyType = expr.getType(); + } else { + keyType = Type.getAssignmentCompatibleType(keyType, expr.getType(), false); + } + if (keyType == Type.INVALID) { + throw new AnalysisException("Invalid element type in Map"); + } + } else { + if (valueType == Type.NULL) { + valueType = expr.getType(); + } else { + valueType = Type.getAssignmentCompatibleType(valueType, expr.getType(), false); + } + if (valueType == Type.INVALID) { + throw new AnalysisException("Invalid element type in Map"); + } + } + children.add(expr); + ++ idx; + } + + type = new MapType(keyType, valueType); + } + + protected MapLiteral(MapLiteral other) { + super(other); + } + + @Override + public Expr uncheckedCastTo(Type targetType) throws AnalysisException { + if (!targetType.isMapType()) { + return super.uncheckedCastTo(targetType); + } + MapLiteral literal = new MapLiteral(this); + Type keyType = ((MapType) targetType).getKeyType(); + Type valueType = ((MapType) targetType).getValueType(); + + for (int i = 0; i < children.size(); ++ i) { + Expr child = children.get(i); + if ((i & 1) == 0) { + literal.children.set(i, child.uncheckedCastTo(keyType)); + } else { + literal.children.set(i, child.uncheckedCastTo(valueType)); + } + } + literal.setType(targetType); + return literal; + } + + @Override + public void checkValueValid() throws AnalysisException { + for (Expr e : children) { + e.checkValueValid(); + } + } + + @Override + protected String toSqlImpl() { + List list = new ArrayList<>(children.size()); + for (int i = 0; i < children.size(); i += 2) { + list.add(children.get(i).toSqlImpl() + ":" + children.get(i + 1).toSqlImpl()); + } + return "MAP{" + StringUtils.join(list, ", ") + "}"; + } + + @Override + protected void toThrift(TExprNode msg) { + msg.node_type = TExprNodeType.MAP_LITERAL; + TTypeDesc container = new TTypeDesc(); + container.setTypes(new ArrayList()); + type.toThrift(container); + msg.setType(container); + } + + @Override + public Expr clone() { + return new MapLiteral(this); + } + + @Override + public boolean isMinValue() { + return false; + } + + @Override + public int compareLiteral(LiteralExpr expr) { + return 0; + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + int size = in.readInt(); + children = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + children.add(Expr.readIn(in)); + } + } + + public static MapLiteral read(DataInput in) throws IOException { + MapLiteral literal = new MapLiteral(); + literal.readFields(in); + return literal; + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + out.writeInt(children.size()); + for (Expr e : children) { + Expr.writeTo(e, out); + } + } + + @Override + public String getStringValue() { + List list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(v.getStringValue())); + return "MAP{" + StringUtils.join(list, ", ") + "}"; + } + + @Override + public String getStringValueForArray() { + return null; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index cd7cde3cbc3f1a..8bf7d24e6dd0c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -59,6 +59,8 @@ public class Column implements Writable, GsonPostProcessable { public static final String SEQUENCE_COL = "__DORIS_SEQUENCE_COL__"; private static final String COLUMN_ARRAY_CHILDREN = "item"; public static final int COLUMN_UNIQUE_ID_INIT_VALUE = -1; + private static final String COLUMN_MAP_KEY = "key"; + private static final String COLUMN_MAP_VALUE = "value"; @SerializedName(value = "name") private String name; @@ -186,6 +188,11 @@ public void createChildrenColumn(Type type, Column column) { Column c = new Column(COLUMN_ARRAY_CHILDREN, ((ArrayType) type).getItemType()); c.setIsAllowNull(((ArrayType) type).getContainsNull()); column.addChildrenColumn(c); + } else if (type.isMapType()) { + Column k = new Column(COLUMN_MAP_KEY, ((MapType) type).getKeyType()); + Column v = new Column(COLUMN_MAP_VALUE, ((MapType) type).getValueType()); + column.addChildrenColumn(k); + column.addChildrenColumn(v); } } @@ -396,37 +403,78 @@ public TColumn toThrift() { return tColumn; } + + // here to make complex type column easy + private void setChildrenTColumn(Column children, TColumn tColumn) { + TColumn childrenTColumn = new TColumn(); + childrenTColumn.setColumnName(children.name); + + TColumnType childrenTColumnType = new TColumnType(); + childrenTColumnType.setType(children.getDataType().toThrift()); + childrenTColumnType.setLen(children.getStrLen()); + childrenTColumnType.setPrecision(children.getPrecision()); + childrenTColumnType.setScale(children.getScale()); + childrenTColumnType.setIndexLen(children.getOlapColumnIndexSize()); + + childrenTColumn.setColumnType(childrenTColumnType); + childrenTColumn.setIsAllowNull(children.isAllowNull()); + // TODO: If we don't set the aggregate type for children, the type will be + // considered as TAggregationType::SUM after deserializing in BE. + // For now, we make children inherit the aggregate type from their parent. + if (tColumn.getAggregationType() != null) { + childrenTColumn.setAggregationType(tColumn.getAggregationType()); + } + + tColumn.children_column.add(childrenTColumn); + toChildrenThrift(children, childrenTColumn); + } + + private void toChildrenThrift(Column column, TColumn tColumn) { if (column.type.isArrayType()) { Column children = column.getChildren().get(0); - - TColumn childrenTColumn = new TColumn(); - childrenTColumn.setColumnName(children.name); - - TColumnType childrenTColumnType = new TColumnType(); - childrenTColumnType.setType(children.getDataType().toThrift()); - childrenTColumnType.setType(children.getDataType().toThrift()); - childrenTColumnType.setLen(children.getStrLen()); - childrenTColumnType.setPrecision(children.getPrecision()); - childrenTColumnType.setScale(children.getScale()); - - childrenTColumnType.setIndexLen(children.getOlapColumnIndexSize()); - childrenTColumn.setColumnType(childrenTColumnType); - childrenTColumn.setIsAllowNull(children.isAllowNull()); - // TODO: If we don't set the aggregate type for children, the type will be - // considered as TAggregationType::SUM after deserializing in BE. - // For now, we make children inherit the aggregate type from their parent. - if (tColumn.getAggregationType() != null) { - childrenTColumn.setAggregationType(tColumn.getAggregationType()); - } - tColumn.setChildrenColumn(new ArrayList<>()); - tColumn.children_column.add(childrenTColumn); - - toChildrenThrift(children, childrenTColumn); + setChildrenTColumn(children, tColumn); + } else if (column.type.isMapType()) { + Column k = column.getChildren().get(0); + Column v = column.getChildren().get(1); + tColumn.setChildrenColumn(new ArrayList<>()); + setChildrenTColumn(k, tColumn); + setChildrenTColumn(v, tColumn); } } + //private void toChildrenThrift(Column column, TColumn tColumn) { + // if (column.type.isArrayType()) { + // Column children = column.getChildren().get(0); + + // TColumn childrenTColumn = new TColumn(); + // childrenTColumn.setColumnName(children.name); + + // TColumnType childrenTColumnType = new TColumnType(); + // childrenTColumnType.setType(children.getDataType().toThrift()); + // childrenTColumnType.setType(children.getDataType().toThrift()); + // childrenTColumnType.setLen(children.getStrLen()); + // childrenTColumnType.setPrecision(children.getPrecision()); + // childrenTColumnType.setScale(children.getScale()); + + // childrenTColumnType.setIndexLen(children.getOlapColumnIndexSize()); + // childrenTColumn.setColumnType(childrenTColumnType); + // childrenTColumn.setIsAllowNull(children.isAllowNull()); + // // TODO: If we don't set the aggregate type for children, the type will be + // // considered as TAggregationType::SUM after deserializing in BE. + // // For now, we make children inherit the aggregate type from their parent. + // if (tColumn.getAggregationType() != null) { + // childrenTColumn.setAggregationType(tColumn.getAggregationType()); + // } + + // tColumn.setChildrenColumn(new ArrayList<>()); + // tColumn.children_column.add(childrenTColumn); + + // toChildrenThrift(children, childrenTColumn); + // } + //} + public void checkSchemaChangeAllowed(Column other) throws DdlException { if (Strings.isNullOrEmpty(other.name)) { throw new DdlException("Dest column name is empty"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java index 1a746a23749736..b2e12bfed9bbcd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java @@ -17,18 +17,25 @@ package org.apache.doris.catalog; +import org.apache.doris.thrift.TColumnType; import org.apache.doris.thrift.TTypeDesc; import org.apache.doris.thrift.TTypeNode; import org.apache.doris.thrift.TTypeNodeType; import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.gson.annotations.SerializedName; + +import java.util.Objects; /** * Describes a MAP type. MAP types have a scalar key and an arbitrarily-typed value. */ public class MapType extends Type { + + @SerializedName(value = "keyType") private final Type keyType; + @SerializedName(value = "valueType") private final Type valueType; public MapType() { @@ -75,6 +82,30 @@ public String toSql(int depth) { keyType.toSql(depth + 1), valueType.toSql(depth + 1)); } + @Override + public boolean matchesType(Type t) { + if (equals(t)) { + return true; + } + + if (!t.isArrayType()) { + return false; + } + + if ((keyType.isNull() || ((MapType) t).getKeyType().isNull()) + && (valueType.isNull() || ((MapType) t).getKeyType().isNull())) { + return true; + } + + return keyType.matchesType(((MapType) t).keyType) + && (valueType.matchesType(((MapType) t).valueType)); + } + + @Override + public String toString() { + return toSql(0).toUpperCase(); + } + @Override protected String prettyPrint(int lpad) { String leftPadding = Strings.repeat(" ", lpad); @@ -88,6 +119,11 @@ protected String prettyPrint(int lpad) { return String.format("%sMAP<%s,%s>", leftPadding, keyType.toSql(), structStr); } + public static boolean canCastTo(MapType type, MapType targetType) { + return Type.canCastTo(type.getKeyType(), targetType.getKeyType()) + && Type.canCastTo(type.getValueType(), targetType.getValueType()); + } + @Override public void toThrift(TTypeDesc container) { TTypeNode node = new TTypeNode(); @@ -98,4 +134,16 @@ public void toThrift(TTypeDesc container) { keyType.toThrift(container); valueType.toThrift(container); } + + @Override + public TColumnType toColumnTypeThrift() { + TColumnType thrift = new TColumnType(); + thrift.type = PrimitiveType.MAP.toThrift(); + return thrift; + } + + @Override + public int hashCode() { + return Objects.hash(keyType, valueType); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java index 8ae8c76ed5f5ac..3fa1a059102cba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java @@ -1102,6 +1102,10 @@ public boolean isArrayType() { return this == ARRAY; } + public boolean isMapType() { + return this == MAP; + } + public boolean isComplexType() { return this == HLL || this == BITMAP; } @@ -1163,6 +1167,8 @@ public MysqlColType toMysqlType() { return MysqlColType.MYSQL_TYPE_BLOB; case JSONB: return MysqlColType.MYSQL_TYPE_JSON; + case MAP: + return MysqlColType.MYSQL_TYPE_MAP; default: return MysqlColType.MYSQL_TYPE_STRING; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index 8c5517d01c8ec3..772669826568d0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -501,7 +501,9 @@ public static boolean canCastTo(Type sourceType, Type targetType) { return ScalarType.canCastTo((ScalarType) sourceType, (ScalarType) targetType); } else if (sourceType.isArrayType() && targetType.isArrayType()) { return ArrayType.canCastTo((ArrayType) sourceType, (ArrayType) targetType); - } else if (targetType.isArrayType() && !((ArrayType) targetType).getItemType().isScalarType() + } else if (sourceType.isMapType() && targetType.isMapType()) { + return MapType.canCastTo((MapType) sourceType, (MapType) targetType); + } else if (targetType.isArrayType() && !((ArrayType) targetType).getItemType().isScalarType() && !sourceType.isNull()) { // TODO: current not support cast any non-array type(except for null) to nested array type. return false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java index 4dcb130087a533..877f4c28d06838 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java @@ -87,6 +87,7 @@ public class Util { TYPE_STRING_MAP.put(PrimitiveType.BITMAP, "bitmap"); TYPE_STRING_MAP.put(PrimitiveType.QUANTILE_STATE, "quantile_state"); TYPE_STRING_MAP.put(PrimitiveType.ARRAY, "Array<%s>"); + TYPE_STRING_MAP.put(PrimitiveType.MAP, "Map<%s,%s>"); TYPE_STRING_MAP.put(PrimitiveType.NULL_TYPE, "null"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlColType.java b/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlColType.java index d451b5ee383416..bc439d47106671 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlColType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlColType.java @@ -52,7 +52,8 @@ public enum MysqlColType { MYSQL_TYPE_BLOB(252, "BLOB"), MYSQL_TYPE_VARSTRING(253, "VAR STRING"), MYSQL_TYPE_STRING(254, "STRING"), - MYSQL_TYPE_GEOMETRY(255, "GEOMETRY"); + MYSQL_TYPE_GEOMETRY(255, "GEOMETRY"), + MYSQL_TYPE_MAP(256, "MAP"); private MysqlColType(int code, String desc) { this.code = code; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java index 7c365f7e133ea1..93c978a3fce9b4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java @@ -185,6 +185,9 @@ public void finalize(Analyzer analyzer) throws UserException { // corresponding output slot isn't being materialized) materializedResultExprLists.clear(); Preconditions.checkState(resultExprLists.size() == children.size()); + if (analyzer.getDescTbl().getTupleDesc(tupleId) == null) { + return; + } List slots = analyzer.getDescTbl().getTupleDesc(tupleId).getSlots(); for (int i = 0; i < resultExprLists.size(); ++i) { List exprList = resultExprLists.get(i); diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex index d9c89907de3dfa..13ef418cffdf19 100644 --- a/fe/fe-core/src/main/jflex/sql_scanner.flex +++ b/fe/fe-core/src/main/jflex/sql_scanner.flex @@ -498,6 +498,8 @@ import org.apache.doris.qe.SqlModeHelper; tokenIdMap.put(new Integer(SqlParserSymbols.RPAREN), ")"); tokenIdMap.put(new Integer(SqlParserSymbols.LBRACKET), "["); tokenIdMap.put(new Integer(SqlParserSymbols.RBRACKET), "]"); + tokenIdMap.put(new Integer(SqlParserSymbols.LBRACE), "{"); + tokenIdMap.put(new Integer(SqlParserSymbols.RBRACE), "}"); tokenIdMap.put(new Integer(SqlParserSymbols.COLON), ":"); tokenIdMap.put(new Integer(SqlParserSymbols.SEMICOLON), ";"); tokenIdMap.put(new Integer(SqlParserSymbols.FLOATINGPOINT_LITERAL), @@ -659,6 +661,8 @@ EndOfLineComment = "--" !({HintContent}|{ContainsLineTerminator}) {LineTerminato "!" { return newToken(SqlParserSymbols.NOT, null); } "<" { return newToken(SqlParserSymbols.LESSTHAN, null); } ">" { return newToken(SqlParserSymbols.GREATERTHAN, null); } +"{" { return newToken(SqlParserSymbols.LBRACE, null); } +"}" { return newToken(SqlParserSymbols.RBRACE, null); } "\"" { return newToken(SqlParserSymbols.UNMATCHED_STRING_LITERAL, null); } "'" { return newToken(SqlParserSymbols.UNMATCHED_STRING_LITERAL, null); } "`" { return newToken(SqlParserSymbols.UNMATCHED_STRING_LITERAL, null); } diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift index 230deb51fffe80..c006af5e9fdd64 100644 --- a/gensrc/thrift/Exprs.thrift +++ b/gensrc/thrift/Exprs.thrift @@ -60,6 +60,9 @@ enum TExprNodeType { // for fulltext search MATCH_PRED, + + // for map + MAP_LITERAL, } //enum TAggregationOp { From 1c10e9458b16f26f2d97c0635cd333953eaee8f8 Mon Sep 17 00:00:00 2001 From: amorynan Date: Mon, 16 Jan 2023 15:06:24 +0800 Subject: [PATCH 02/11] update reader & writer for column map and add element_at function for map --- be/src/exprs/anyval_util.cpp | 6 + be/src/olap/field.h | 5 - .../olap/rowset/segment_v2/column_reader.cpp | 113 +++++++++++++++ be/src/olap/rowset/segment_v2/column_reader.h | 38 +++++ .../olap/rowset/segment_v2/column_writer.cpp | 97 +++---------- be/src/olap/rowset/segment_v2/column_writer.h | 9 +- be/src/olap/types.h | 25 +--- be/src/runtime/map_value.cpp | 12 +- be/src/runtime/map_value.h | 32 +---- be/src/vec/columns/column_map.h | 9 +- be/src/vec/data_types/data_type_factory.cpp | 4 + be/src/vec/data_types/data_type_map.cpp | 10 +- be/src/vec/data_types/data_type_map.h | 3 +- .../functions/array/function_array_element.h | 132 ++++++++++++++++-- be/src/vec/olap/olap_data_convertor.cpp | 36 +---- be/src/vec/olap/olap_data_convertor.h | 2 +- be/src/vec/sink/vmysql_result_writer.cpp | 66 +++++++++ .../org/apache/doris/catalog/FunctionSet.java | 5 +- .../org/apache/doris/catalog/MapType.java | 2 +- gensrc/script/doris_builtins_functions.py | 4 + gensrc/script/gen_builtins_functions.py | 6 + 21 files changed, 409 insertions(+), 207 deletions(-) diff --git a/be/src/exprs/anyval_util.cpp b/be/src/exprs/anyval_util.cpp index b83e04e9ce789f..26aa171bb1a9f8 100644 --- a/be/src/exprs/anyval_util.cpp +++ b/be/src/exprs/anyval_util.cpp @@ -212,6 +212,12 @@ FunctionContext::TypeDesc AnyValUtil::column_type_to_type_desc(const TypeDescrip out.children.push_back(column_type_to_type_desc(t)); } break; + case TYPE_MAP: + out.type = FunctionContext::TYPE_MAP; + for (const auto& t : type.children) { + out.children.push_back(column_type_to_type_desc(t)); + } + break; case TYPE_STRING: out.type = FunctionContext::TYPE_STRING; out.len = type.len; diff --git a/be/src/olap/field.h b/be/src/olap/field.h index 8b834854f1845d..3ebef315dd856d 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -466,11 +466,6 @@ class MapField : public Field { // make variable_ptr memory allocate to cell_ptr as MapValue char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { - auto m = (MapValue*)cell_ptr; - - m->set_key_null_signs(reinterpret_cast(variable_ptr)); - m->set_value_null_signs(reinterpret_cast(variable_ptr)); - return variable_ptr + _length; } diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 6f49aa23cf49f9..f1f21b3506c586 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -31,6 +31,7 @@ #include "util/rle_encoding.h" // for RleDecoder #include "vec/columns/column.h" #include "vec/columns/column_array.h" +#include "vec/columns/column_map.h" #include "vec/core/types.h" #include "vec/runtime/vdatetime_value.h" //for VecDateTime @@ -83,6 +84,34 @@ Status ColumnReader::create(const ColumnReaderOptions& opts, const ColumnMetaPB& *reader = std::move(array_reader); return Status::OK(); } + case FieldType::OLAP_FIELD_TYPE_MAP: { + // map reader now has 3 sub readers for key(arr), value(arr), null(scala) + std::unique_ptr map_reader( + new ColumnReader(opts, meta, num_rows, file_reader)); + std::unique_ptr key_reader; + RETURN_IF_ERROR(ColumnReader::create(opts, meta.children_columns(0), + num_rows, file_reader, + &key_reader)); + std::unique_ptr val_reader; + RETURN_IF_ERROR(ColumnReader::create(opts, meta.children_columns(1), + num_rows, file_reader, + &val_reader)); + std::unique_ptr null_reader; + if (meta.is_nullable()) { + RETURN_IF_ERROR(ColumnReader::create(opts, meta.children_columns(2), + meta.children_columns(2).num_rows(), + file_reader, &null_reader)); + } + map_reader->_sub_readers.resize(meta.children_columns_size()); + + map_reader->_sub_readers[0] = std::move(key_reader); + map_reader->_sub_readers[1] = std::move(val_reader); + if (meta.is_nullable()) { + map_reader->_sub_readers[2] = std::move(null_reader); + } + *reader = std::move(map_reader); + return Status::OK(); + } default: return Status::NotSupported("unsupported type for ColumnReader: {}", std::to_string(type)); @@ -449,6 +478,19 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { null_iterator); return Status::OK(); } + case FieldType::OLAP_FIELD_TYPE_MAP: { + ColumnIterator* key_iterator = nullptr; + RETURN_IF_ERROR(_sub_readers[0]->new_iterator(&key_iterator)); + ColumnIterator* val_iterator = nullptr; + RETURN_IF_ERROR(_sub_readers[1]->new_iterator(&val_iterator)); + ColumnIterator* null_iterator = nullptr; + if (is_nullable()) { + RETURN_IF_ERROR(_sub_readers[2]->new_iterator(&null_iterator)); + } + *iterator = new MapFileColumnIterator(this,null_iterator, + key_iterator, val_iterator); + return Status::OK(); + } default: return Status::NotSupported("unsupported type to create iterator: {}", std::to_string(type)); @@ -456,6 +498,77 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { } } +///====================== MapFileColumnIterator ============================//// +MapFileColumnIterator::MapFileColumnIterator(ColumnReader* reader, ColumnIterator* null_iterator, + ColumnIterator* key_iterator, + ColumnIterator* val_iterator) + : _map_reader(reader) { + _key_iterator.reset(key_iterator); + _val_iterator.reset(val_iterator); + if (_map_reader->is_nullable()) { + _null_iterator.reset(null_iterator); + } +} + +Status MapFileColumnIterator::init(const ColumnIteratorOptions& opts) { + RETURN_IF_ERROR(_key_iterator->init(opts)); + RETURN_IF_ERROR(_val_iterator->init(opts)); + if (_map_reader->is_nullable()) { + RETURN_IF_ERROR(_null_iterator->init(opts)); + } + return Status::OK(); +} + +Status MapFileColumnIterator::_peek_one_offset(ordinal_t* offset) { + return Status::OK(); +} + +Status MapFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) { + return Status::OK(); +} + + +Status MapFileColumnIterator::seek_to_ordinal(ordinal_t ord) { + RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(ord)); + RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(ord)); + if (_map_reader->is_nullable()) { + RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord)); + } + return Status::OK(); +} + +Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, + bool* has_null) { + const auto* column_map = vectorized::check_and_get_column( + dst->is_nullable() ? static_cast(*dst).get_nested_column() + : *dst); + auto column_key_ptr = column_map->get_keys().assume_mutable(); + auto column_val_ptr = column_map->get_values().assume_mutable(); + RETURN_IF_ERROR(_key_iterator->next_batch(n, column_key_ptr, has_null)); + RETURN_IF_ERROR(_val_iterator->next_batch(n, column_val_ptr, has_null)); + + if (dst->is_nullable()) { + auto null_map_ptr = + static_cast(*dst).get_null_map_column_ptr(); + size_t num_read = *n; + bool null_signs_has_null = false; + RETURN_IF_ERROR(_null_iterator->next_batch(&num_read, null_map_ptr, &null_signs_has_null)); + DCHECK(num_read == *n); + } + return Status::OK(); +} + +Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) { + for (size_t i = 0; i < count; ++i) { + RETURN_IF_ERROR(seek_to_ordinal(rowids[i])); + size_t num_read = 1; + RETURN_IF_ERROR(next_batch(&num_read, dst, nullptr)); + DCHECK(num_read == 1); + } + return Status::OK(); +} + //////////////////////////////////////////////////////////////////////////////// ArrayFileColumnIterator::ArrayFileColumnIterator(ColumnReader* reader, diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index c5dd729d82c82f..5ecb75be09584a 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -393,6 +393,44 @@ class EmptyFileColumnIterator final : public ColumnIterator { ordinal_t get_current_ordinal() const override { return 0; } }; +// This iterator is used to read map value column +class MapFileColumnIterator final : public ColumnIterator { +public: + explicit MapFileColumnIterator(ColumnReader* reader, ColumnIterator* null_iterator, + ColumnIterator* key_iterator, ColumnIterator* val_iterator); + + ~MapFileColumnIterator() override = default; + + Status init(const ColumnIteratorOptions& opts) override; + + Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override; + + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; + + Status read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) override; + + Status seek_to_first() override { + RETURN_IF_ERROR(_key_iterator->seek_to_first()); + RETURN_IF_ERROR(_val_iterator->seek_to_first()); + return Status::OK(); + } + + Status seek_to_ordinal(ordinal_t ord) override; + + ordinal_t get_current_ordinal() const override { + return _key_iterator->get_current_ordinal(); + } + +private: + ColumnReader* _map_reader; // need ? + std::unique_ptr _null_iterator; + std::unique_ptr _key_iterator; // ArrayFileColumnIterator + std::unique_ptr _val_iterator; // ArrayFileColumnIterator + + Status _peek_one_offset(ordinal_t* offset); +}; + class ArrayFileColumnIterator final : public ColumnIterator { public: explicit ArrayFileColumnIterator(ColumnReader* reader, FileColumnIterator* offset_reader, diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 77522b3eecc5f7..b0ac786d6a7534 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -179,33 +179,6 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* const TabletColumn& key_column = column->get_sub_column(0); // field_type is true key and value const TabletColumn& value_column = column->get_sub_column(1); - // create length writer - FieldType length_type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT; - - ColumnWriterOptions length_options; - length_options.meta = opts.meta->add_children_columns(); - length_options.meta->set_column_id(2); - length_options.meta->set_unique_id(2); - length_options.meta->set_type(length_type); - length_options.meta->set_is_nullable(false); - length_options.meta->set_length( - get_scalar_type_info()->size()); - length_options.meta->set_encoding(DEFAULT_ENCODING); - length_options.meta->set_compression(opts.meta->compression()); - - length_options.need_zone_map = false; - length_options.need_bloom_filter = false; - length_options.need_bitmap_index = false; - - TabletColumn length_column = TabletColumn( - OLAP_FIELD_AGGREGATION_NONE, length_type, length_options.meta->is_nullable(), - length_options.meta->unique_id(), length_options.meta->length()); - length_column.set_name("length"); - length_column.set_index_length(-1); // no short key index - std::unique_ptr bigint_field(FieldFactory::create(length_column)); - auto* length_writer = - new ScalarColumnWriter(length_options, std::move(bigint_field), file_writer); - // create null writer ScalarColumnWriter* null_writer = nullptr; if (opts.meta->is_nullable()) { @@ -226,7 +199,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* null_options.need_bitmap_index = false; TabletColumn null_column = TabletColumn( - OLAP_FIELD_AGGREGATION_NONE, null_type, length_options.meta->is_nullable(), + OLAP_FIELD_AGGREGATION_NONE, null_type, false, null_options.meta->unique_id(), null_options.meta->length()); null_column.set_name("nullable"); null_column.set_index_length(-1); // no short key index @@ -254,7 +227,8 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* key_opts.meta->set_encoding(BIT_SHUFFLE); key_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F); key_opts.need_zone_map = false; - key_opts.meta->set_is_nullable(true); + // no need key array's null map + key_opts.meta->set_is_nullable(false); ColumnMetaPB* child_meta = key_opts.meta->add_children_columns(); child_meta->set_column_id(5); @@ -289,7 +263,8 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* val_opts.meta->set_encoding(BIT_SHUFFLE); val_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F); val_opts.need_zone_map = false; - val_opts.meta->set_is_nullable(true); + // no need map value array nullable + val_opts.meta->set_is_nullable(false); ColumnMetaPB* child_v_meta = val_opts.meta->add_children_columns(); child_v_meta->set_column_id(7); @@ -305,7 +280,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* ColumnWriter::create(val_opts, &val_list_column, file_writer, &value_writer)); // finally create map writer std::unique_ptr writer_local = std::unique_ptr( - new MapColumnWriter(opts, std::move(field), length_writer, null_writer, + new MapColumnWriter(opts, std::move(field), null_writer, std::move(key_writer), std::move(value_writer))); *writer = std::move(writer_local); @@ -835,22 +810,19 @@ Status ArrayColumnWriter::finish_current_page() { /// ============================= MapColumnWriter =====================//// MapColumnWriter::MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* offset_writer, ScalarColumnWriter* null_writer, std::unique_ptr key_writer, - std::unique_ptr value_writer) + std::unique_ptr value_writer) : ColumnWriter(std::move(field), opts.meta->is_nullable()), _key_writer(std::move(key_writer)), _value_writer(std::move(value_writer)), _opts(opts) { - _offset_writer.reset(offset_writer); if (is_nullable()) { _null_writer.reset(null_writer); } } Status MapColumnWriter::init() { - RETURN_IF_ERROR(_offset_writer->init()); if (is_nullable()) { RETURN_IF_ERROR(_null_writer->init()); } @@ -859,14 +831,8 @@ Status MapColumnWriter::init() { return Status::OK(); } -Status MapColumnWriter::put_extra_info_in_page(DataPageFooterPB* footer) { -// footer->set_next_array_item_ordinal(_key_writer->get_next_rowid()); -// footer->set_next_array_item_ordinal(_value_writer->get_next_rowid()); - return Status::OK(); -} uint64_t MapColumnWriter::estimate_buffer_size() { - size_t estimate = _offset_writer->estimate_buffer_size() + - _key_writer->estimate_buffer_size() + + size_t estimate = _key_writer->estimate_buffer_size() + _value_writer->estimate_buffer_size(); if (is_nullable()) { estimate += _null_writer->estimate_buffer_size(); @@ -878,7 +844,6 @@ Status MapColumnWriter::finish() { if (is_nullable()) { RETURN_IF_ERROR(_null_writer->finish()); } - RETURN_IF_ERROR(_offset_writer->finish()); RETURN_IF_ERROR(_key_writer->finish()); RETURN_IF_ERROR(_value_writer->finish()); return Status::OK(); @@ -890,29 +855,12 @@ Status MapColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { const auto* col_cursor = reinterpret_cast(*ptr); while (remaining > 0) { size_t num_written = 1; - ordinal_t next_item_ordinal = _offset_writer->get_next_rowid(); - RETURN_IF_ERROR(_offset_writer->append_data_in_current_page( - reinterpret_cast(&next_item_ordinal), &num_written)); - if (num_written < - 1) { // page is full, write first item offset and update current length page's start ordinal - RETURN_IF_ERROR(_offset_writer->finish_current_page()); - } else { - // write child item. - if (_key_writer->is_nullable()) { - auto* key_data_ptr = const_cast(col_cursor)->mutable_key_data(); - for (size_t i = 0; i < col_cursor->length(); ++i) { - RETURN_IF_ERROR(_key_writer->append(col_cursor->is_key_null_at(i), key_data_ptr)); - key_data_ptr = (uint8_t*)key_data_ptr + _key_writer->get_field()->size(); - } - } - if (_value_writer->is_nullable()) { - auto* val_data_ptr = const_cast(col_cursor)->mutable_value_data(); - for (size_t i = 0; i < col_cursor->length(); ++i) { - RETURN_IF_ERROR(_value_writer->append(col_cursor->is_val_null_at(i), val_data_ptr)); - val_data_ptr = (uint8_t*)val_data_ptr + _value_writer->get_field()->size(); - } - } - } + auto* key_data_ptr = const_cast(col_cursor)->mutable_key_data(); + const uint8_t* key_ptr = (const uint8_t*)key_data_ptr; + RETURN_IF_ERROR(_key_writer->append_data(&key_ptr, 1)); + auto* val_data_ptr = const_cast(col_cursor)->mutable_value_data(); + const uint8_t* val_ptr = (const uint8_t*)val_data_ptr; + RETURN_IF_ERROR(_value_writer->append_data(&val_ptr, 1)); remaining -= num_written; col_cursor += num_written; *ptr += num_written * sizeof(MapValue); @@ -938,23 +886,13 @@ Status MapColumnWriter::write_ordinal_index() { if (is_nullable()) { RETURN_IF_ERROR(_null_writer->write_ordinal_index()); } - if (!has_empty_items()) { - RETURN_IF_ERROR(_offset_writer->write_ordinal_index()); - RETURN_IF_ERROR(_key_writer->write_ordinal_index()); - RETURN_IF_ERROR(_value_writer->write_ordinal_index()); - } + RETURN_IF_ERROR(_key_writer->write_ordinal_index()); + RETURN_IF_ERROR(_value_writer->write_ordinal_index()); + return Status::OK(); } Status MapColumnWriter::append_nulls(size_t num_rows) { - size_t num_lengths = num_rows; - const ordinal_t offset = get_next_rowid(); - while (num_lengths > 0) { - // TODO llj bulk write - const auto* offset_ptr = reinterpret_cast(&offset); - RETURN_IF_ERROR(_offset_writer->append_data(&offset_ptr, 1)); - --num_lengths; - } return write_null_column(num_rows, true); } @@ -973,7 +911,6 @@ Status MapColumnWriter::finish_current_page() { if (is_nullable()) { RETURN_IF_ERROR(_null_writer->finish_current_page()); } - RETURN_IF_ERROR(_offset_writer->finish_current_page()); RETURN_IF_ERROR(_key_writer->finish_current_page()); RETURN_IF_ERROR(_value_writer->finish_current_page()); return Status::OK(); diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index f95713fa0627c2..6cc6259d6ee6b4 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -324,7 +324,6 @@ class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback { class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { public: explicit MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* offset_writer, ScalarColumnWriter* null_writer, std::unique_ptr key_writer, std::unique_ptr value_writer); @@ -363,15 +362,9 @@ class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { } return Status::OK(); } - ordinal_t get_next_rowid() const override { return _offset_writer->get_next_rowid(); } - -private: - Status put_extra_info_in_page(DataPageFooterPB* header) override; - Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记 - bool has_empty_items() const { return _offset_writer->get_next_rowid() == 0; } + ordinal_t get_next_rowid() const override { return _key_writer->get_next_rowid(); } private: - std::unique_ptr _offset_writer; std::unique_ptr _null_writer; std::unique_ptr _key_writer; std::unique_ptr _value_writer; diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 5b4359071896b0..2c20c50ec45ed3 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -441,19 +441,7 @@ class MapTypeInfo : public TypeInfo { inline bool equal(const void* left, const void* right) const override { auto l_value = reinterpret_cast(left); auto r_value = reinterpret_cast(right); - if (l_value->size() != r_value->size()) { - return false; - } - uint32_t size = l_value->size(); - for (size_t i = 0; i < size; ++i) { - if ((l_value->is_key_null_at(i) && r_value->is_key_null_at(i)) - && (l_value->is_val_null_at(i) && r_value->is_val_null_at(i))) { - continue; - } else { - return false; - } - } - return true; + return l_value->size() == r_value->size(); } int cmp(const void* left, const void* right) const override { @@ -461,16 +449,7 @@ class MapTypeInfo : public TypeInfo { auto r_value = reinterpret_cast(right); uint32_t l_size = l_value->size(); uint32_t r_size = r_value->size(); - size_t cur = 0; - while (cur < l_size && cur < r_size) { - if ((l_value->is_key_null_at(cur) && r_value->is_key_null_at(cur)) - && (l_value->is_val_null_at(cur) && r_value->is_val_null_at(cur))) { - ++cur; - } else { - return -1; - } - } - if (l_size < r_size) { + if (l_size < r_size) { return -1; } else if (l_size > r_size) { return 1; diff --git a/be/src/runtime/map_value.cpp b/be/src/runtime/map_value.cpp index 16751b8cef315a..5287a958226dfe 100644 --- a/be/src/runtime/map_value.cpp +++ b/be/src/runtime/map_value.cpp @@ -24,26 +24,16 @@ void MapValue::to_map_val(MapVal* val) const { val->length = _length; val->key = _key_data; val->value = _value_data; - val->key_null_signs = _key_null_signs; - val->value_null_signs = _val_null_signs; } void MapValue::shallow_copy(const MapValue* value) { _length = value->_length; - _key_null_signs = value->_key_null_signs; - _val_null_signs = value->_val_null_signs; _key_data = value->_key_data; _value_data = value->_value_data; } -void MapValue::copy_null_signs(const MapValue* other) { - // todo(amory): here need to judge? - memcpy(_key_null_signs, other->_key_null_signs, other->size()); - memcpy(_val_null_signs, other->_val_null_signs, other->size()); -} - MapValue MapValue::from_map_val(const MapVal& val) { - return MapValue(val.key, val.value, val.length, val.key_null_signs, val.value_null_signs); + return MapValue(val.key, val.value, val.length); } diff --git a/be/src/runtime/map_value.h b/be/src/runtime/map_value.h index 50f4f1a2a99282..488df38ab61b66 100644 --- a/be/src/runtime/map_value.h +++ b/be/src/runtime/map_value.h @@ -43,24 +43,6 @@ class MapValue { MapValue(void* k_data, void* v_data, int32_t length) : _key_data(k_data), _value_data(v_data), _length(length) {} - MapValue(void* k_data, void* v_data, int32_t length, bool* _null_signs, bool is_key_null_signs) - : _key_data(k_data), _value_data(v_data), _length(length) { - if (is_key_null_signs) { - _key_null_signs = _null_signs; - } else { - _val_null_signs = _null_signs; - } - } - - MapValue(void* k_data, void* v_data, int32_t length, bool* key_null_signs, bool* value_null_signs) - : _key_data(k_data), _value_data(v_data), _length(length), _key_null_signs(key_null_signs), _val_null_signs(value_null_signs) {} - - - void set_key_has_null(bool has_null) { _key_has_null = has_null; } - void set_val_has_null(bool has_null) { _val_has_null = has_null; } - bool is_key_null_at(int32_t index) const { return this->_key_has_null && this->_key_null_signs[index]; } - bool is_val_null_at(int32_t index) const { return this->_val_has_null && this->_val_null_signs[index]; } - void to_map_val(MapVal* val) const; int32_t size() const { return _length; } @@ -69,31 +51,23 @@ class MapValue { void shallow_copy(const MapValue* other); - void copy_null_signs(const MapValue* other); - static MapValue from_map_val(const MapVal& val); const void* key_data() const { return _key_data; } void* mutable_key_data() const { return _key_data; } const void* value_data() const { return _value_data; } void* mutable_value_data() const { return _value_data; } - const bool* key_null_signs() const { return _key_null_signs; } - const bool* value_null_signs() const { return _val_null_signs; } - void set_key_null_signs(bool* null_signs) { _key_null_signs = null_signs; } - void set_value_null_signs(bool* null_signs) { _val_null_signs = null_signs; } + void set_length(int32_t length) { _length = length; } void set_key(void* data) { _key_data = data; } void set_value(void* data) { _value_data = data; } private: - // child column data + // child column data pointer void* _key_data; void* _value_data; + // length for map size int32_t _length; - bool _key_has_null; - bool _val_has_null; - bool* _key_null_signs; - bool* _val_null_signs; };//map-value } // namespace doris diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 1bb95b352b9376..f24bdc3ac99528 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -39,6 +39,11 @@ class ColumnMap final : public COWHelper { callback(values); } + void clear() override { + keys->clear(); + values->clear(); + } + MutableColumnPtr clone_resized(size_t size) const override; bool can_be_inside_nullable() const override { return true; } @@ -54,7 +59,7 @@ class ColumnMap final : public COWHelper { void insert_default() override; void pop_back(size_t n) override; - + bool is_column_map() const override { return true; } StringRef serialize_value_into_arena(size_t n, Arena & arena, char const *& begin) const override; const char * deserialize_and_insert_from_arena(const char * pos) override; @@ -125,4 +130,4 @@ class ColumnMap final : public COWHelper { ColumnMap(const ColumnMap &) = default; }; -} \ No newline at end of file +} diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index cf661bb85a6c9a..6a34b50a445346 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -28,6 +28,10 @@ DataTypePtr DataTypeFactory::create_data_type(const doris::Field& col_desc) { if (col_desc.type() == OLAP_FIELD_TYPE_ARRAY) { DCHECK(col_desc.get_sub_field_count() == 1); nested = std::make_shared(create_data_type(*col_desc.get_sub_field(0))); + } else if (col_desc.type() == OLAP_FIELD_TYPE_MAP) { + DCHECK(col_desc.get_sub_field_count() == 2); + nested = std::make_shared( + create_data_type(*col_desc.get_sub_field(0)), create_data_type(*col_desc.get_sub_field(1))); } else { nested = _create_primitive_data_type(col_desc.type(), col_desc.get_precision(), col_desc.get_scale()); diff --git a/be/src/vec/data_types/data_type_map.cpp b/be/src/vec/data_types/data_type_map.cpp index e9f8ae7d9d3853..8b81f51779f399 100644 --- a/be/src/vec/data_types/data_type_map.cpp +++ b/be/src/vec/data_types/data_type_map.cpp @@ -50,9 +50,9 @@ std::string DataTypeMap::to_string(const IColumn& column, size_t row_num) const { if (i != offset) ss << ", "; - ss << "'" << key_type->to_string(nested_keys, i); + ss << "'" << keys->to_string(nested_keys, i); ss << ':'; - ss << "'" << value_type->to_string(nested_values, i); + ss << "'" << values->to_string(nested_values, i); } ss << "}"; return ss.str(); @@ -74,9 +74,9 @@ void DataTypeMap::to_string(const class doris::vectorized::IColumn& column, size { if (i != offset) ostr.write(", ", 2); - key_type->to_string(nested_keys, i, ostr); + keys->to_string(nested_keys, i, ostr); ostr.write(":", 1); - value_type->to_string(nested_values, i, ostr); + values->to_string(nested_values, i, ostr); } ostr.write("}", 1); } @@ -154,4 +154,4 @@ const char* DataTypeMap::deserialize(const char* buf, IColumn* column, int data_ return get_values()->deserialize(buf, map_column->get_values_ptr()->assume_mutable(), data_version); } -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_map.h b/be/src/vec/data_types/data_type_map.h index 2a7c06bdc3d2a4..cc66f5fa53c84b 100644 --- a/be/src/vec/data_types/data_type_map.h +++ b/be/src/vec/data_types/data_type_map.h @@ -77,4 +77,5 @@ class DataTypeMap final : public IDataType }; -} \ No newline at end of file +} + diff --git a/be/src/vec/functions/array/function_array_element.h b/be/src/vec/functions/array/function_array_element.h index 6722e09e9c3e75..1f7be28c584fce 100644 --- a/be/src/vec/functions/array/function_array_element.h +++ b/be/src/vec/functions/array/function_array_element.h @@ -23,8 +23,10 @@ #include "vec/columns/column_array.h" #include "vec/columns/column_const.h" +#include "vec/columns/column_map.h" #include "vec/columns/column_string.h" #include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_number.h" #include "vec/functions/function.h" #include "vec/functions/function_helpers.h" @@ -44,12 +46,18 @@ class FunctionArrayElement : public IFunction { size_t get_number_of_arguments() const override { return 2; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - DCHECK(is_array(arguments[0])) - << "first argument for function: " << name << " should be DataTypeArray"; - DCHECK(is_integer(arguments[1])) - << "second argument for function: " << name << " should be Integer"; - return make_nullable( - check_and_get_data_type(arguments[0].get())->get_nested_type()); + DCHECK(is_array(arguments[0]) || is_map(arguments[0])) + << "first argument for function: " << name << " should be DataTypeArray or DataTypeMap"; + if (is_array(arguments[0])) { + DCHECK(is_integer(arguments[1])) << "second argument for function: " << name << " should be Integer for array element"; + return make_nullable( + check_and_get_data_type(arguments[0].get())->get_nested_type()); + } else { + return make_nullable( + check_and_get_data_type(arguments[0].get())->get_value_type()); + } + + } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, @@ -68,8 +76,13 @@ class FunctionArrayElement : public IFunction { } else { args = {col_left, block.get_by_position(arguments[1])}; } - - auto res_column = _execute_non_nullable(args, input_rows_count, src_null_map, dst_null_map); + ColumnPtr res_column = nullptr; + if (args[0].column->is_column_map()) { + res_column = _execute_map(args, input_rows_count, src_null_map, dst_null_map); + }else { + res_column = + _execute_non_nullable(args, input_rows_count, src_null_map, dst_null_map); + } if (!res_column) { return Status::RuntimeError("unsupported types for function {}({}, {})", get_name(), block.get_by_position(arguments[0]).type->get_name(), @@ -81,6 +94,79 @@ class FunctionArrayElement : public IFunction { } private: + //=========================== map element===========================// + ColumnPtr _get_mapped_idx(const ColumnArray& key_column, + const ColumnWithTypeAndName& argument) { + if (key_column.get_data().is_column_string()) { + return _mapped_key_string(key_column, argument); + } + return nullptr; + } + + ColumnPtr _get_mapped_value(const ColumnArray& val_column, + const IColumn& matched_indices, + const UInt8* src_null_map, + UInt8* dst_null_map) { + const UInt8* nested_null_map = nullptr; + ColumnPtr nested_column = nullptr; + if (is_column_nullable(val_column.get_data())) { + const auto& nested_null_column = + reinterpret_cast(val_column.get_data()); + nested_null_map = nested_null_column.get_null_map_column().get_data().data(); + nested_column = nested_null_column.get_nested_column_ptr(); + } else { + nested_column = val_column.get_data_ptr(); + } + if (check_column(nested_column)) { + return _execute_number(val_column.get_offsets(), *nested_column, + src_null_map, matched_indices, + nested_null_map, dst_null_map); + } else if (check_column(nested_column)) { + _execute_number(val_column.get_offsets(), *nested_column, + src_null_map, matched_indices, + nested_null_map, dst_null_map); + } + return nullptr; + } + + ColumnPtr _mapped_key_string(const ColumnArray& column, + const ColumnWithTypeAndName& argument) { + auto right_column = argument.column->convert_to_full_column_if_const(); + const ColumnString& match_key = reinterpret_cast(*right_column); + const ColumnArray::Offsets64& offsets = column.get_offsets(); + ColumnPtr nested_ptr = nullptr; + if (is_column_nullable(column.get_data())) { + nested_ptr = reinterpret_cast(column.get_data()).get_nested_column_ptr(); + } else { + nested_ptr = column.get_data_ptr(); + } + const ColumnString& nested_key = reinterpret_cast(*nested_ptr); + size_t rows = offsets.size(); + // prepare return data + auto matched_indices = ColumnVector::create(); + matched_indices->reserve(rows); + + for (size_t i = 0; i < rows; i++) + { + bool matched = false; + size_t begin = offsets[i - 1]; + size_t end = offsets[i]; + for (size_t j = begin; j < end; j++) { + if (nested_key.get_data_at(j) == match_key.get_data_at(i)) { + matched_indices->insert_value(j-begin+1); + matched = true; + break; + } + } + + if (!matched) + matched_indices->insert_value(end-begin+1); // make indices for null + } + + return matched_indices; + } + + template ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, @@ -176,6 +262,36 @@ class FunctionArrayElement : public IFunction { return dst_column; } + ColumnPtr _execute_map(const ColumnsWithTypeAndName& arguments, + size_t input_rows_count, const UInt8* src_null_map, + UInt8* dst_null_map) { + + auto left_column = arguments[0].column->convert_to_full_column_if_const(); + DataTypePtr val_type = reinterpret_cast(*arguments[0].type).get_values(); + const auto& map_column = reinterpret_cast(*left_column); + + const ColumnArray& column_keys = assert_cast (map_column.get_keys()); +// const ColumnArray& column_vals = assert_cast (map_column.get_values()); + + const auto& offsets = column_keys.get_offsets(); + const size_t rows = offsets.size(); + + if (rows <= 0) { + return nullptr; + } + + ColumnPtr matched_indices = _get_mapped_idx(column_keys, arguments[1]); + if (!matched_indices) { + return nullptr; + } + DataTypePtr indices_type(std::make_shared()); + ColumnWithTypeAndName indices(matched_indices, indices_type, "indices"); + ColumnWithTypeAndName data(map_column.get_values_ptr(), val_type, "value"); + ColumnsWithTypeAndName args = {data, indices}; + return _execute_non_nullable(args, input_rows_count, src_null_map, dst_null_map); + //return _get_mapped_value(column_vals, *matched_indices, src_null_map, dst_null_map); + } + ColumnPtr _execute_non_nullable(const ColumnsWithTypeAndName& arguments, size_t input_rows_count, const UInt8* src_null_map, UInt8* dst_null_map) { diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index dc8521c9be6397..e7a1f208436ba0 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -741,44 +741,36 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap() { assert(column_map); assert(data_type_map); - return convert_to_olap(_nullmap, column_map, data_type_map); + return convert_to_olap(column_map, data_type_map); } Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( - const UInt8* null_map, const ColumnMap* column_map, + const ColumnMap* column_map, const DataTypeMap* data_type_map) { - const UInt8* key_null_map = nullptr; - const UInt8* value_null_map = nullptr; ColumnPtr key_data = column_map->get_keys_ptr(); ColumnPtr value_data = column_map->get_values_ptr(); if (column_map->get_keys().is_nullable()) { const auto& key_nullable_column = assert_cast(column_map->get_keys()); - key_null_map = key_nullable_column.get_null_map_data().data(); key_data = key_nullable_column.get_nested_column_ptr(); } if (column_map->get_values().is_nullable()) { const auto& val_nullable_column = assert_cast(column_map->get_values()); - value_null_map = val_nullable_column.get_null_map_data().data(); value_data = val_nullable_column.get_nested_column_ptr(); } const auto& offsets = column_map->get_offsets(); // use keys offsets - int64_t start_index = _row_pos - 1; - // int64_t end_index = _row_pos + _num_rows - 1; - // auto start = offsets[start_index]; - // auto size = offsets[end_index] - start; ColumnWithTypeAndName key_typed_column = { - key_data, remove_nullable(data_type_map->get_keys()),""}; + key_data, remove_nullable(data_type_map->get_keys()),"map.key"}; _key_convertor->set_source_column(key_typed_column, _row_pos, _num_rows); _key_convertor->convert_to_olap(); ColumnWithTypeAndName value_typed_column = { - value_data, remove_nullable(data_type_map->get_values()), ""}; + value_data, remove_nullable(data_type_map->get_values()), "map.value"}; _value_convertor->set_source_column(value_typed_column, _row_pos, _num_rows); _value_convertor->convert_to_olap(); @@ -789,7 +781,6 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( if (_nullmap && _nullmap[cur_pos]) { continue; } - auto offset = offsets[prev_pos]; auto single_map_size = offsets[cur_pos] - offsets[prev_pos]; new (map_value) MapValue(single_map_size); @@ -797,25 +788,10 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( continue; } - if (column_map->get_keys().is_nullable()) { - map_value->set_key_has_null(true); - map_value->set_key_null_signs( - const_cast(reinterpret_cast(key_null_map + offset))); - } else { - map_value->set_key_has_null(false); - } - if (column_map->get_values().is_nullable()) { - map_value->set_val_has_null(true); - map_value->set_value_null_signs( - const_cast(reinterpret_cast(value_null_map + offset))); - } else { - map_value->set_val_has_null(false); - } - map_value->set_key( - const_cast(_key_convertor->get_data_at(offset - offsets[start_index]))); + const_cast(_key_convertor->get_data_at(i))); map_value->set_value( - const_cast(_value_convertor->get_data_at(offset - offsets[start_index]))); + const_cast(_value_convertor->get_data_at(i))); } return Status::OK(); diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 4888d42237fffd..38532fc4f02f55 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -385,7 +385,7 @@ class OlapBlockDataConvertor { Status convert_to_olap() override; private: - Status convert_to_olap(const UInt8* null_map, const ColumnMap* column_map, + Status convert_to_olap(const ColumnMap* column_map, const DataTypeMap* data_type_map); OlapColumnDataConvertorBaseUPtr _key_convertor; OlapColumnDataConvertorBaseUPtr _value_convertor; diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index 4c7f5028d70273..013e4982339b53 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -26,9 +26,11 @@ #include "vec/columns/column_complex.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" +#include "vec/columns/column_map.h" #include "vec/common/assert_cast.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_map.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" #include "vec/runtime/vdatetime_value.h" @@ -186,6 +188,58 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, _buffer.close_dynamic_mode(); result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); } + } else if constexpr (type == TYPE_MAP) { + auto& column_map = assert_cast(*column); + auto& offsets = column_map.get_offsets(); + auto& column_key_array = assert_cast(column_map.get_keys()); + auto& column_val_array = assert_cast(column_map.get_values()); + auto& map_type = assert_cast(*nested_type_ptr); + auto& key_nested_type_ptr = map_type.get_key_type(); + auto& val_nested_type_ptr = map_type.get_value_type(); + for (ssize_t i = 0; i < row_size; ++i) { + if (0 != buf_ret) { + return Status::InternalError("pack mysql buffer failed."); + } + _buffer.reset(); + + _buffer.open_dynamic_mode(); + buf_ret = _buffer.push_string("{", 1); + bool begin = true; + for (auto j = offsets[i - 1]; j < offsets[i]; ++j) { + if (!begin) { + buf_ret = _buffer.push_string(", ", 2); + } + const auto& key_data = column_key_array.get_data_ptr(); + if (key_data->is_null_at(j)) { + buf_ret = _buffer.push_string("NULL", strlen("NULL")); + } else { + if (WhichDataType(remove_nullable(key_nested_type_ptr)).is_string()) { + buf_ret = _buffer.push_string("'", 1); + buf_ret = _add_one_cell(key_data, j, key_nested_type_ptr, _buffer); + buf_ret = _buffer.push_string("'", 1); + } else { + buf_ret = _add_one_cell(key_data, j, key_nested_type_ptr, _buffer); + } + } + buf_ret = _buffer.push_string(":", 1); + const auto& val_data = column_val_array.get_data_ptr(); + if (val_data->is_null_at(j)) { + buf_ret = _buffer.push_string("NULL", strlen("NULL")); + } else { + if (WhichDataType(remove_nullable(val_nested_type_ptr)).is_string()) { + buf_ret = _buffer.push_string("'", 1); + buf_ret = _add_one_cell(val_data, j, val_nested_type_ptr, _buffer); + buf_ret = _buffer.push_string("'", 1); + } else { + buf_ret = _add_one_cell(val_data, j, val_nested_type_ptr, _buffer); + } + } + begin = false; + } + buf_ret = _buffer.push_string("}", 1); + _buffer.close_dynamic_mode(); + result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); + } } else if constexpr (type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 || type == TYPE_DECIMAL128I) { for (int i = 0; i < row_size; ++i) { @@ -655,6 +709,18 @@ Status VMysqlResultWriter::append_block(Block& input_block) { } break; } + case TYPE_MAP: { + if (type_ptr->is_nullable()) { + auto& nested_type = + assert_cast(*type_ptr).get_nested_type(); //for map + status = _add_one_column(column_ptr, result, + nested_type); + } else { + status = _add_one_column(column_ptr, result, + type_ptr); + } + break; + } default: { LOG(WARNING) << "can't convert this type to mysql type. type = " << _output_vexpr_ctxs[i]->root()->type(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 1d93829d83a65c..350f805428306e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1271,10 +1271,9 @@ public static boolean isCastMatchAllowed(Function desc, Function candicate) { final Type[] candicateArgTypes = candicate.getArgs(); if (!(descArgTypes[0] instanceof ScalarType) || !(candicateArgTypes[0] instanceof ScalarType)) { - if (candicateArgTypes[0] instanceof ArrayType) { + if (candicateArgTypes[0] instanceof ArrayType || candicateArgTypes[0] instanceof MapType) { return descArgTypes[0].matchesType(candicateArgTypes[0]); - } - + } return false; } final ScalarType descArgType = (ScalarType) descArgTypes[0]; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java index b2e12bfed9bbcd..2c2f5b461c4113 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java @@ -88,7 +88,7 @@ public boolean matchesType(Type t) { return true; } - if (!t.isArrayType()) { + if (!t.isMapType()) { return false; } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 329e306685aba5..b2b9980c994d08 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -142,6 +142,10 @@ [['element_at', '%element_extract%'], 'VARCHAR', ['ARRAY_VARCHAR', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], [['element_at', '%element_extract%'], 'STRING', ['ARRAY_STRING', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + + # map element + [['element_at', '%element_extract%'], 'INT', ['MAP_STRING_INT', 'STRING'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_BOOLEAN', 'ARRAY_BOOLEAN'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], [['arrays_overlap'], 'BOOLEAN', ['ARRAY_TINYINT', 'ARRAY_TINYINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], [['arrays_overlap'], 'BOOLEAN', ['ARRAY_SMALLINT', 'ARRAY_SMALLINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], diff --git a/gensrc/script/gen_builtins_functions.py b/gensrc/script/gen_builtins_functions.py index bd9a82e4c01bd1..ab354734b3dcdc 100755 --- a/gensrc/script/gen_builtins_functions.py +++ b/gensrc/script/gen_builtins_functions.py @@ -53,6 +53,7 @@ package org.apache.doris.builtins;\n\ \n\ import org.apache.doris.catalog.ArrayType;\n\ +import org.apache.doris.catalog.MapType;\n\ import org.apache.doris.catalog.Type;\n\ import org.apache.doris.catalog.Function;\n\ import org.apache.doris.catalog.FunctionSet;\n\ @@ -107,12 +108,17 @@ def add_function(fn_meta_data, user_visible): in[TINYINT] --> out[Type.TINYINT] in[INT] --> out[Type.INT] in[ARRAY_INT] --> out[new ArrayType(Type.INT)] + in[MAP_STRING_INT] --> out[new MapType(Type.STRING,Type.INT)] """ def generate_fe_datatype(str_type): if str_type.startswith("ARRAY_"): vec_type = str_type.split('_', 1); if len(vec_type) > 1 and vec_type[0] == "ARRAY": return "new ArrayType(" + generate_fe_datatype(vec_type[1]) + ")" + if str_type.startswith("MAP_"): + vec_type = str_type.split('_', 2) + if len(vec_type) > 2 and vec_type[0] == "MAP": + return "new MapType(" + generate_fe_datatype(vec_type[1]) + "," + generate_fe_datatype(vec_type[2])+")" if str_type == "DECIMALV2": return "Type.MAX_DECIMALV2_TYPE" if str_type == "DECIMAL32": From d3d2b1676f2ea00966f2ed04292e25f13248894d Mon Sep 17 00:00:00 2001 From: amorynan Date: Sun, 29 Jan 2023 15:27:29 +0800 Subject: [PATCH 03/11] update some for map --- be/src/http/http_request.h | 4 +- be/src/olap/field.h | 16 +- be/src/olap/page_cache.cpp | 2 +- .../olap/rowset/segment_v2/column_reader.cpp | 46 +++-- be/src/olap/rowset/segment_v2/column_reader.h | 9 +- .../olap/rowset/segment_v2/column_writer.cpp | 161 +++++++----------- be/src/olap/rowset/segment_v2/column_writer.h | 9 +- .../olap/rowset/segment_v2/segment_writer.cpp | 1 - be/src/olap/tablet_schema.cpp | 2 +- be/src/olap/types.cpp | 5 +- be/src/olap/types.h | 32 ++-- be/src/olap/utils.h | 2 +- be/src/runtime/map_value.cpp | 1 - be/src/runtime/map_value.h | 6 +- be/src/runtime/primitive_type.cpp | 2 +- be/src/runtime/types.cpp | 26 +-- be/src/udf/udf.h | 7 +- be/src/util/binary_cast.hpp | 2 +- be/src/vec/columns/column_map.cpp | 69 ++------ be/src/vec/columns/column_map.h | 68 +++++--- be/src/vec/core/accurate_comparison.h | 29 ++-- be/src/vec/core/field.h | 8 +- be/src/vec/data_types/data_type_factory.cpp | 24 +-- be/src/vec/data_types/data_type_factory.hpp | 4 +- be/src/vec/data_types/data_type_map.cpp | 87 +++++----- be/src/vec/data_types/data_type_map.h | 24 +-- be/src/vec/exprs/vexpr.cpp | 2 +- be/src/vec/exprs/vmap_literal.cpp | 4 +- be/src/vec/exprs/vmap_literal.h | 3 +- .../functions/array/function_array_element.h | 90 +++------- be/src/vec/olap/olap_data_convertor.cpp | 26 ++- be/src/vec/olap/olap_data_convertor.h | 16 +- be/src/vec/sink/vmysql_result_writer.cpp | 49 +----- .../org/apache/doris/analysis/MapLiteral.java | 10 +- .../org/apache/doris/mysql/MysqlColType.java | 2 +- .../doris/planner/SetOperationNode.java | 3 - 36 files changed, 345 insertions(+), 506 deletions(-) diff --git a/be/src/http/http_request.h b/be/src/http/http_request.h index 1503e4303a1b69..81085f7cceb049 100644 --- a/be/src/http/http_request.h +++ b/be/src/http/http_request.h @@ -72,7 +72,9 @@ class HttpRequest { void set_handler(HttpHandler* handler) { _handler = handler; } HttpHandler* handler() const { return _handler; } - struct evhttp_request* get_evhttp_request() const { return _ev_req; } + struct evhttp_request* get_evhttp_request() const { + return _ev_req; + } void* handler_ctx() const { return _handler_ctx; } void set_handler_ctx(void* ctx) { diff --git a/be/src/olap/field.h b/be/src/olap/field.h index 3ebef315dd856d..f88e280ecf4ce9 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -50,7 +50,7 @@ class Field { _index_size(column.index_length()), _is_nullable(column.is_nullable()), _unique_id(column.unique_id()) { - if (column.type() == OLAP_FIELD_TYPE_ARRAY || column.type() == OLAP_FIELD_TYPE_MAP) { + if (column.type() == OLAP_FIELD_TYPE_ARRAY || column.type() == OLAP_FIELD_TYPE_MAP) { _agg_info = get_aggregate_info(column.aggregation(), column.type(), column.get_sub_column(0).type()); } else { @@ -464,14 +464,12 @@ class MapField : public Field { _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); } - // make variable_ptr memory allocate to cell_ptr as MapValue + // make variable_ptr memory allocate to cell_ptr as MapValue char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { return variable_ptr + _length; } - size_t get_variable_len() const override { - return _length; - } + size_t get_variable_len() const override { return _length; } }; class ArrayField : public Field { @@ -775,8 +773,8 @@ class FieldFactory { auto* local = new ArrayField(column); local->add_sub_field(std::move(item_field)); return local; - } - case OLAP_FIELD_TYPE_MAP: { + } + case OLAP_FIELD_TYPE_MAP: { std::unique_ptr key_field(FieldFactory::create(column.get_sub_column(0))); std::unique_ptr val_field(FieldFactory::create(column.get_sub_column(1))); auto* local = new MapField(column); @@ -824,9 +822,9 @@ class FieldFactory { local->add_sub_field(std::move(item_field)); return local; } - case OLAP_FIELD_TYPE_MAP: { + case OLAP_FIELD_TYPE_MAP: { DCHECK(column.get_subtype_count() == 2); - auto* local= new MapField(column); + auto* local = new MapField(column); std::unique_ptr key_field(FieldFactory::create(column.get_sub_column(0))); std::unique_ptr value_field(FieldFactory::create(column.get_sub_column(1))); local->add_sub_field(std::move(key_field)); diff --git a/be/src/olap/page_cache.cpp b/be/src/olap/page_cache.cpp index 2813f85dd33fa1..378b9a5b6e3436 100644 --- a/be/src/olap/page_cache.cpp +++ b/be/src/olap/page_cache.cpp @@ -64,7 +64,7 @@ bool StoragePageCache::lookup(const CacheKey& key, PageCacheHandle* handle, void StoragePageCache::insert(const CacheKey& key, const Slice& data, PageCacheHandle* handle, segment_v2::PageTypePB page_type, bool in_memory) { - auto deleter = [](const doris::CacheKey& key, void* value) { delete[] (uint8_t*)value; }; + auto deleter = [](const doris::CacheKey& key, void* value) { delete[](uint8_t*) value; }; CachePriority priority = CachePriority::NORMAL; if (in_memory) { diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index f1f21b3506c586..5890cc0a438917 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -84,18 +84,16 @@ Status ColumnReader::create(const ColumnReaderOptions& opts, const ColumnMetaPB& *reader = std::move(array_reader); return Status::OK(); } - case FieldType::OLAP_FIELD_TYPE_MAP: { - // map reader now has 3 sub readers for key(arr), value(arr), null(scala) + case FieldType::OLAP_FIELD_TYPE_MAP: { + // map reader now has 3 sub readers for key(arr), value(arr), null(scala) std::unique_ptr map_reader( new ColumnReader(opts, meta, num_rows, file_reader)); - std::unique_ptr key_reader; - RETURN_IF_ERROR(ColumnReader::create(opts, meta.children_columns(0), - num_rows, file_reader, - &key_reader)); - std::unique_ptr val_reader; - RETURN_IF_ERROR(ColumnReader::create(opts, meta.children_columns(1), - num_rows, file_reader, - &val_reader)); + std::unique_ptr key_reader; + RETURN_IF_ERROR(ColumnReader::create(opts, meta.children_columns(0), num_rows, + file_reader, &key_reader)); + std::unique_ptr val_reader; + RETURN_IF_ERROR(ColumnReader::create(opts, meta.children_columns(1), num_rows, + file_reader, &val_reader)); std::unique_ptr null_reader; if (meta.is_nullable()) { RETURN_IF_ERROR(ColumnReader::create(opts, meta.children_columns(2), @@ -111,7 +109,7 @@ Status ColumnReader::create(const ColumnReaderOptions& opts, const ColumnMetaPB& } *reader = std::move(map_reader); return Status::OK(); - } + } default: return Status::NotSupported("unsupported type for ColumnReader: {}", std::to_string(type)); @@ -478,7 +476,7 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { null_iterator); return Status::OK(); } - case FieldType::OLAP_FIELD_TYPE_MAP: { + case FieldType::OLAP_FIELD_TYPE_MAP: { ColumnIterator* key_iterator = nullptr; RETURN_IF_ERROR(_sub_readers[0]->new_iterator(&key_iterator)); ColumnIterator* val_iterator = nullptr; @@ -487,8 +485,7 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { if (is_nullable()) { RETURN_IF_ERROR(_sub_readers[2]->new_iterator(&null_iterator)); } - *iterator = new MapFileColumnIterator(this,null_iterator, - key_iterator, val_iterator); + *iterator = new MapFileColumnIterator(this, null_iterator, key_iterator, val_iterator); return Status::OK(); } default: @@ -500,8 +497,8 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { ///====================== MapFileColumnIterator ============================//// MapFileColumnIterator::MapFileColumnIterator(ColumnReader* reader, ColumnIterator* null_iterator, - ColumnIterator* key_iterator, - ColumnIterator* val_iterator) + ColumnIterator* key_iterator, + ColumnIterator* val_iterator) : _map_reader(reader) { _key_iterator.reset(key_iterator); _val_iterator.reset(val_iterator); @@ -519,15 +516,10 @@ Status MapFileColumnIterator::init(const ColumnIteratorOptions& opts) { return Status::OK(); } -Status MapFileColumnIterator::_peek_one_offset(ordinal_t* offset) { - return Status::OK(); -} - Status MapFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) { - return Status::OK(); + return Status::NotSupported("Not support next_batch"); } - Status MapFileColumnIterator::seek_to_ordinal(ordinal_t ord) { RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(ord)); RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(ord)); @@ -538,19 +530,19 @@ Status MapFileColumnIterator::seek_to_ordinal(ordinal_t ord) { } Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, - bool* has_null) { + bool* has_null) { const auto* column_map = vectorized::check_and_get_column( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); + size_t num_read = *n; auto column_key_ptr = column_map->get_keys().assume_mutable(); auto column_val_ptr = column_map->get_values().assume_mutable(); - RETURN_IF_ERROR(_key_iterator->next_batch(n, column_key_ptr, has_null)); - RETURN_IF_ERROR(_val_iterator->next_batch(n, column_val_ptr, has_null)); + RETURN_IF_ERROR(_key_iterator->next_batch(num_read, column_key_ptr, has_null)); + RETURN_IF_ERROR(_val_iterator->next_batch(num_read, column_val_ptr, has_null)); if (dst->is_nullable()) { auto null_map_ptr = static_cast(*dst).get_null_map_column_ptr(); - size_t num_read = *n; bool null_signs_has_null = false; RETURN_IF_ERROR(_null_iterator->next_batch(&num_read, null_map_ptr, &null_signs_has_null)); DCHECK(num_read == *n); @@ -559,7 +551,7 @@ Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr } Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, - vectorized::MutableColumnPtr& dst) { + vectorized::MutableColumnPtr& dst) { for (size_t i = 0; i < count; ++i) { RETURN_IF_ERROR(seek_to_ordinal(rowids[i])); size_t num_read = 1; diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 5ecb75be09584a..902e048ff5ac77 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -413,22 +413,19 @@ class MapFileColumnIterator final : public ColumnIterator { Status seek_to_first() override { RETURN_IF_ERROR(_key_iterator->seek_to_first()); RETURN_IF_ERROR(_val_iterator->seek_to_first()); + RETURN_IF_ERROR(_null_iterator->seek_to_first()); return Status::OK(); } Status seek_to_ordinal(ordinal_t ord) override; - ordinal_t get_current_ordinal() const override { - return _key_iterator->get_current_ordinal(); - } + ordinal_t get_current_ordinal() const override { return _key_iterator->get_current_ordinal(); } private: - ColumnReader* _map_reader; // need ? + ColumnReader* _map_reader; std::unique_ptr _null_iterator; std::unique_ptr _key_iterator; // ArrayFileColumnIterator std::unique_ptr _val_iterator; // ArrayFileColumnIterator - - Status _peek_one_offset(ordinal_t* offset); }; class ArrayFileColumnIterator final : public ColumnIterator { diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index b0ac786d6a7534..a2e0449206b3fa 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -173,14 +173,10 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* *writer = std::move(writer_local); return Status::OK(); } - case FieldType::OLAP_FIELD_TYPE_MAP: { + case FieldType::OLAP_FIELD_TYPE_MAP: { DCHECK(column->get_subtype_count() == 2); - // todo . here key and value is array only? - const TabletColumn& key_column = column->get_sub_column(0); // field_type is true key and value - const TabletColumn& value_column = column->get_sub_column(1); - - // create null writer ScalarColumnWriter* null_writer = nullptr; + // create null writer if (opts.meta->is_nullable()) { FieldType null_type = FieldType::OLAP_FIELD_TYPE_TINYINT; ColumnWriterOptions null_options; @@ -198,9 +194,9 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* null_options.need_bloom_filter = false; null_options.need_bitmap_index = false; - TabletColumn null_column = TabletColumn( - OLAP_FIELD_AGGREGATION_NONE, null_type, false, - null_options.meta->unique_id(), null_options.meta->length()); + TabletColumn null_column = + TabletColumn(OLAP_FIELD_AGGREGATION_NONE, null_type, false, + null_options.meta->unique_id(), null_options.meta->length()); null_column.set_name("nullable"); null_column.set_index_length(-1); // no short key index std::unique_ptr null_field(FieldFactory::create(null_column)); @@ -208,84 +204,49 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* new ScalarColumnWriter(null_options, std::move(null_field), file_writer); } - // create key writer - std::unique_ptr key_writer; - ColumnWriterOptions key_opts; - TabletColumn key_list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY); - { - key_list_column.add_sub_column(const_cast(key_column)); -// key_list_column.add_sub_column(key_column); - key_list_column.set_name("map.key"); - key_list_column.set_index_length(-1); - - - key_opts.meta = opts.meta->mutable_children_columns(0); - key_opts.meta->set_column_id(4); - key_opts.meta->set_unique_id(4); - key_opts.meta->set_type(OLAP_FIELD_TYPE_ARRAY); - key_opts.meta->set_length(0); - key_opts.meta->set_encoding(BIT_SHUFFLE); - key_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F); - key_opts.need_zone_map = false; - // no need key array's null map - key_opts.meta->set_is_nullable(false); - - ColumnMetaPB* child_meta = key_opts.meta->add_children_columns(); - child_meta->set_column_id(5); - child_meta->set_unique_id(5); - child_meta->set_type(key_column.type()); - child_meta->set_length(key_column.length()); - child_meta->set_compression(segment_v2::CompressionTypePB::LZ4F); - child_meta->set_encoding(DICT_ENCODING); - child_meta->set_is_nullable(key_column.is_nullable()); + // create key & value writer + std::vector> inner_writer_list; + for (int i = 0; i < 2; ++i) { + std::unique_ptr inner_array_writer; + ColumnWriterOptions arr_opts; + TabletColumn array_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY); + + array_column.set_index_length(-1); + arr_opts.meta = opts.meta->mutable_children_columns(i); + arr_opts.meta->set_type(OLAP_FIELD_TYPE_ARRAY); + arr_opts.meta->set_encoding(opts.meta->encoding()); + arr_opts.meta->set_compression(opts.meta->compression()); + arr_opts.need_zone_map = false; + // no need inner array's null map + arr_opts.meta->set_is_nullable(false); + + ColumnMetaPB* child_meta = arr_opts.meta->add_children_columns(); + // type and nullable and length. + const TabletColumn& inner_column = + column->get_sub_column(i); // field_type is true key and value + array_column.add_sub_column(const_cast(inner_column)); + array_column.set_name("map.arr"); + child_meta->set_type(inner_column.type()); + child_meta->set_length(inner_column.length()); + + child_meta->set_column_id(arr_opts.meta->column_id() + 1); + child_meta->set_unique_id(arr_opts.meta->column_id() + 1); + child_meta->set_compression(opts.meta->compression()); + child_meta->set_encoding(opts.meta->encoding()); + child_meta->set_is_nullable(true); + RETURN_IF_ERROR(ColumnWriter::create(arr_opts, &array_column, file_writer, + &inner_array_writer)); + inner_writer_list.emplace_back(std::move(inner_array_writer)); } - - RETURN_IF_ERROR( - ColumnWriter::create(key_opts, &key_list_column, file_writer, &key_writer)); - - - // create value writer - std::unique_ptr value_writer; - ColumnWriterOptions val_opts; - TabletColumn val_list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY); - { - val_list_column.add_sub_column(const_cast(value_column)); - // val_list_column.add_sub_column(value_column); - val_list_column.set_name("map.val"); - val_list_column.set_index_length(-1); - - - val_opts.meta = opts.meta->mutable_children_columns(1); - val_opts.meta->set_column_id(6); - val_opts.meta->set_unique_id(6); - val_opts.meta->set_type(OLAP_FIELD_TYPE_ARRAY); - val_opts.meta->set_length(0); - val_opts.meta->set_encoding(BIT_SHUFFLE); - val_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F); - val_opts.need_zone_map = false; - // no need map value array nullable - val_opts.meta->set_is_nullable(false); - - ColumnMetaPB* child_v_meta = val_opts.meta->add_children_columns(); - child_v_meta->set_column_id(7); - child_v_meta->set_unique_id(7); - child_v_meta->set_type(value_column.type()); - child_v_meta->set_length(value_column.length()); - child_v_meta->set_compression(segment_v2::CompressionTypePB::LZ4F); - child_v_meta->set_encoding(DEFAULT_ENCODING); - child_v_meta->set_is_nullable(value_column.is_nullable()); - } - - RETURN_IF_ERROR( - ColumnWriter::create(val_opts, &val_list_column, file_writer, &value_writer)); - // finally create map writer - std::unique_ptr writer_local = std::unique_ptr( - new MapColumnWriter(opts, std::move(field), null_writer, - std::move(key_writer), std::move(value_writer))); + // create map writer + std::unique_ptr writer_local = + std::unique_ptr(new MapColumnWriter( + opts, std::move(field), null_writer, std::move(inner_writer_list[0]), + std::move(inner_writer_list[1]))); *writer = std::move(writer_local); return Status::OK(); - } + } default: return Status::NotSupported("unsupported type for ColumnWriter: {}", std::to_string(field->type())); @@ -810,9 +771,9 @@ Status ArrayColumnWriter::finish_current_page() { /// ============================= MapColumnWriter =====================//// MapColumnWriter::MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* null_writer, - std::unique_ptr key_writer, - std::unique_ptr value_writer) + ScalarColumnWriter* null_writer, + std::unique_ptr key_writer, + std::unique_ptr value_writer) : ColumnWriter(std::move(field), opts.meta->is_nullable()), _key_writer(std::move(key_writer)), _value_writer(std::move(value_writer)), @@ -832,8 +793,7 @@ Status MapColumnWriter::init() { } uint64_t MapColumnWriter::estimate_buffer_size() { - size_t estimate = _key_writer->estimate_buffer_size() + - _value_writer->estimate_buffer_size(); + size_t estimate = _key_writer->estimate_buffer_size() + _value_writer->estimate_buffer_size(); if (is_nullable()) { estimate += _null_writer->estimate_buffer_size(); } @@ -855,17 +815,16 @@ Status MapColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { const auto* col_cursor = reinterpret_cast(*ptr); while (remaining > 0) { size_t num_written = 1; - auto* key_data_ptr = const_cast(col_cursor)->mutable_key_data(); - const uint8_t* key_ptr = (const uint8_t*)key_data_ptr; - RETURN_IF_ERROR(_key_writer->append_data(&key_ptr, 1)); - auto* val_data_ptr = const_cast(col_cursor)->mutable_value_data(); - const uint8_t* val_ptr = (const uint8_t*)val_data_ptr; - RETURN_IF_ERROR(_value_writer->append_data(&val_ptr, 1)); + auto* key_data_ptr = const_cast(col_cursor)->key_data(); + const uint8_t* key_ptr = (const uint8_t*)key_data_ptr; + RETURN_IF_ERROR(_key_writer->append_data(&key_ptr, 1)); + auto* val_data_ptr = const_cast(col_cursor)->value_data(); + const uint8_t* val_ptr = (const uint8_t*)val_data_ptr; + RETURN_IF_ERROR(_value_writer->append_data(&val_ptr, 1)); remaining -= num_written; col_cursor += num_written; *ptr += num_written * sizeof(MapValue); } - if (is_nullable()) { return write_null_column(num_rows, false); } @@ -876,7 +835,6 @@ Status MapColumnWriter::write_data() { if (is_nullable()) { RETURN_IF_ERROR(_null_writer->write_data()); } - RETURN_IF_ERROR(_offset_writer->write_data()); RETURN_IF_ERROR(_key_writer->write_data()); RETURN_IF_ERROR(_value_writer->write_data()); return Status::OK(); @@ -888,7 +846,6 @@ Status MapColumnWriter::write_ordinal_index() { } RETURN_IF_ERROR(_key_writer->write_ordinal_index()); RETURN_IF_ERROR(_value_writer->write_ordinal_index()); - return Status::OK(); } @@ -898,12 +855,12 @@ Status MapColumnWriter::append_nulls(size_t num_rows) { Status MapColumnWriter::write_null_column(size_t num_rows, bool is_null) { uint8_t null_sign = is_null ? 1 : 0; - while (num_rows > 0) { - // TODO llj bulk write - const uint8_t* null_sign_ptr = &null_sign; - RETURN_IF_ERROR(_null_writer->append_data(&null_sign_ptr, 1)); - --num_rows; + uint8_t* null_sign_arr = new uint8_t[num_rows]; + for (int i = 0; i < num_rows; ++i) { + null_sign_arr[i] = null_sign; } + const uint8_t* null_sign_ptr = reinterpret_cast(null_sign_arr); + RETURN_IF_ERROR(_null_writer->append_data(&null_sign_ptr, num_rows)); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 6cc6259d6ee6b4..42ee5060bdc175 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -320,7 +320,6 @@ class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback { ColumnWriterOptions _opts; }; - class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { public: explicit MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, @@ -362,13 +361,17 @@ class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { } return Status::OK(); } + + // according key writer to get next rowid ordinal_t get_next_rowid() const override { return _key_writer->get_next_rowid(); } private: - std::unique_ptr _null_writer; + Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记 + std::unique_ptr _key_writer; std::unique_ptr _value_writer; - + // we need null writer to make sure a row is null or not + std::unique_ptr _null_writer; std::unique_ptr _inverted_index_builder; ColumnWriterOptions _opts; }; diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index bf9ca2c5b7d475..591c46517919d7 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -154,7 +154,6 @@ Status SegmentWriter::init(const std::vector& col_ids, bool has_key) { } } - if (column.type() == FieldType::OLAP_FIELD_TYPE_MAP) { opts.need_zone_map = false; if (opts.need_bloom_filter) { diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index a5ebb1902672c1..d6e810a8c1bce2 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -414,7 +414,7 @@ void TabletColumn::init_from_pb(const ColumnPB& column) { if (_type == FieldType::OLAP_FIELD_TYPE_MAP) { DCHECK(column.children_columns_size() == 2) << "MAP type has more than 2 children types."; TabletColumn key_column; - TabletColumn value_column; + TabletColumn value_column; key_column.init_from_pb(column.children_columns(0)); value_column.init_from_pb(column.children_columns(1)); add_sub_column(key_column); diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index 1415ad0297f93b..ba24742471bfce 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -184,7 +184,8 @@ TypeInfoPtr get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) { segment_v2::ColumnMetaPB value_meta = column_meta_pb->children_columns(1); TypeInfoPtr value_type_info = get_type_info(&value_meta); - MapTypeInfo* map_type_info = new MapTypeInfo(std::move(key_type_info), std::move(value_type_info)); + MapTypeInfo* map_type_info = + new MapTypeInfo(std::move(key_type_info), std::move(value_type_info)); return create_static_type_info_ptr(map_type_info); } else { return create_static_type_info_ptr(get_scalar_type_info(type)); @@ -219,7 +220,7 @@ TypeInfoPtr get_type_info(const TabletColumn* col) { } return create_static_type_info_ptr(get_array_type_info(child_column->type(), iterations)); } else if (UNLIKELY(type == OLAP_FIELD_TYPE_MAP)) { - const auto* key_column = &col->get_sub_column(0); + const auto* key_column = &col->get_sub_column(0); TypeInfoPtr key_type = get_type_info(key_column); const auto* val_column = &col->get_sub_column(1); TypeInfoPtr value_type = get_type_info(val_column); diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 2c20c50ec45ed3..2399faa104af06 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -435,13 +435,14 @@ class ArrayTypeInfo : public TypeInfo { class MapTypeInfo : public TypeInfo { public: explicit MapTypeInfo(TypeInfoPtr key_type_info, TypeInfoPtr value_type_info) - : _key_type_info(std::move(key_type_info)), _value_type_info(std::move(value_type_info)) {} + : _key_type_info(std::move(key_type_info)), + _value_type_info(std::move(value_type_info)) {} ~MapTypeInfo() override = default; inline bool equal(const void* left, const void* right) const override { auto l_value = reinterpret_cast(left); auto r_value = reinterpret_cast(right); - return l_value->size() == r_value->size(); + return l_value->size() == r_value->size(); } int cmp(const void* left, const void* right) const override { @@ -449,7 +450,7 @@ class MapTypeInfo : public TypeInfo { auto r_value = reinterpret_cast(right); uint32_t l_size = l_value->size(); uint32_t r_size = r_value->size(); - if (l_size < r_size) { + if (l_size < r_size) { return -1; } else if (l_size > r_size) { return 1; @@ -464,22 +465,15 @@ class MapTypeInfo : public TypeInfo { dest_value->shallow_copy(src_value); } - void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override { - DCHECK(false); - - } + void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override { DCHECK(false); } void copy_object(void* dest, const void* src, MemPool* mem_pool) const override { deep_copy(dest, src, mem_pool); } - void direct_copy(void* dest, const void* src) const override { - CHECK(false); - } + void direct_copy(void* dest, const void* src) const override { CHECK(false); } - void direct_copy(uint8_t** base, void* dest, const void* src) const { - CHECK(false); - } + void direct_copy(uint8_t** base, void* dest, const void* src) const { CHECK(false); } void direct_copy_may_cut(void* dest, const void* src) const override { direct_copy(dest, src); } @@ -493,9 +487,7 @@ class MapTypeInfo : public TypeInfo { return Status::Error(); } - std::string to_string(const void* src) const override { - return "{}"; - } + std::string to_string(const void* src) const override { return "{}"; } void set_to_max(void* buf) const override { DCHECK(false) << "set_to_max of list is not implemented."; @@ -509,14 +501,12 @@ class MapTypeInfo : public TypeInfo { auto map_value = reinterpret_cast(data); auto size = map_value->size(); uint32_t result = HashUtil::hash(&size, sizeof(size), seed); - result = seed * result + _key_type_info->hash_code( - map_value->key_data(), seed) - + _value_type_info->hash_code( - map_value->value_data(), seed); + result = seed * result + _key_type_info->hash_code(map_value->key_data(), seed) + + _value_type_info->hash_code(map_value->value_data(), seed); return result; } - // todo . is here only to need return 16 for two ptr? + // todo . is here only to need return 16 for two ptr? const size_t size() const override { return 16; } FieldType type() const override { return OLAP_FIELD_TYPE_MAP; } diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index 964b973569dcf1..0360fb51eea075 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -110,7 +110,7 @@ void _destruct_object(const void* obj, void*) { template void _destruct_array(const void* array, void*) { - delete[] ((const T*)array); + delete[]((const T*)array); } // 根据压缩类型的不同,执行压缩。dest_buf_len是dest_buf的最大长度, diff --git a/be/src/runtime/map_value.cpp b/be/src/runtime/map_value.cpp index 5287a958226dfe..4949c3dd0e0705 100644 --- a/be/src/runtime/map_value.cpp +++ b/be/src/runtime/map_value.cpp @@ -36,5 +36,4 @@ MapValue MapValue::from_map_val(const MapVal& val) { return MapValue(val.key, val.value, val.length); } - } // namespace doris diff --git a/be/src/runtime/map_value.h b/be/src/runtime/map_value.h index 488df38ab61b66..965b928e0c3e7e 100644 --- a/be/src/runtime/map_value.h +++ b/be/src/runtime/map_value.h @@ -18,6 +18,7 @@ #pragma once #include + #include "runtime/primitive_type.h" namespace doris_udf { @@ -37,8 +38,7 @@ class MapValue { public: MapValue() = default; - explicit MapValue(int32_t length) - : _key_data(nullptr), _value_data(nullptr), _length(length){} + explicit MapValue(int32_t length) : _key_data(nullptr), _value_data(nullptr), _length(length) {} MapValue(void* k_data, void* v_data, int32_t length) : _key_data(k_data), _value_data(v_data), _length(length) {} @@ -69,5 +69,5 @@ class MapValue { // length for map size int32_t _length; -};//map-value +}; //map-value } // namespace doris diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 3263fe55403fb8..b289788b2165d8 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -362,7 +362,7 @@ TPrimitiveType::type to_thrift(PrimitiveType ptype) { case TYPE_ARRAY: return TPrimitiveType::ARRAY; case TYPE_MAP: - return TPrimitiveType::MAP; + return TPrimitiveType::MAP; default: return TPrimitiveType::INVALID_TYPE; } diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index d75a9f51774c9e..bea4f793330c75 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -79,14 +79,14 @@ TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) // children.push_back(TypeDescriptor(types, idx)); // break; case TTypeNodeType::MAP: { - DCHECK(!node.__isset.scalar_type); - DCHECK_LT(*idx, types.size() - 2); - type = TYPE_MAP; - ++(*idx); - children.push_back(TypeDescriptor(types, idx)); - ++(*idx); - children.push_back(TypeDescriptor(types, idx)); - break; + DCHECK(!node.__isset.scalar_type); + DCHECK_LT(*idx, types.size() - 2); + type = TYPE_MAP; + ++(*idx); + children.push_back(TypeDescriptor(types, idx)); + ++(*idx); + children.push_back(TypeDescriptor(types, idx)); + break; } default: DCHECK(false) << node.type; @@ -132,8 +132,7 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const { } void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const { - DCHECK(!is_complex_type() || type == TYPE_ARRAY || type == TYPE_MAP) - << "Don't support complex type now, type=" << type; + DCHECK(type == TYPE_STRUCT) << "Don't support complex type now, type=" << type; auto node = ptype->add_types(); node->set_type(TTypeNodeType::SCALAR); auto scalar_type = node->mutable_scalar_type(); @@ -199,12 +198,15 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField"; - return ss.str(); + return ss.str(); default: return type_to_string(type); } diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index 30a1363898b337..d3dbbd61cbf433 100644 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -89,7 +89,7 @@ class FunctionContext { TYPE_DECIMALV2, TYPE_OBJECT, TYPE_ARRAY, - TYPE_MAP, + TYPE_MAP, TYPE_QUANTILE_STATE, TYPE_DATEV2, TYPE_DATETIMEV2, @@ -918,15 +918,14 @@ struct MapVal : public AnyVal { uint64_t length; // item has no null value if has_null is false. // item ```may``` has null value if has_null is true. -// bool has_null; + // bool has_null; // null bitmap bool* key_null_signs; bool* value_null_signs; MapVal() = default; - MapVal(void* k, void* v, uint64_t length) - : key(k), value(v), length(length) {}; + MapVal(void* k, void* v, uint64_t length) : key(k), value(v), length(length) {}; static MapVal null() { MapVal val; diff --git a/be/src/util/binary_cast.hpp b/be/src/util/binary_cast.hpp index 17754a15e92d7f..5da841a45e42cd 100644 --- a/be/src/util/binary_cast.hpp +++ b/be/src/util/binary_cast.hpp @@ -37,7 +37,7 @@ union TypeConverter { }; template -inline constexpr bool match_v = std::is_same_v && std::is_same_v; +inline constexpr bool match_v = std::is_same_v&& std::is_same_v; union DecimalInt128Union { DecimalV2Value decimal; diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index 6306c9d9b3c4c7..fb1412c81c78da 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -34,14 +34,14 @@ ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values) } ColumnArray::Offsets64& ColumnMap::get_offsets() const { - const ColumnArray & column_keys = assert_cast (get_keys()); + const ColumnArray& column_keys = assert_cast(get_keys()); // todo . did here check size ? return const_cast(column_keys.get_offsets()); } void ColumnMap::check_size() const { - const auto * key_array = typeid_cast(keys.get()); - const auto * value_array = typeid_cast(values.get()); + const auto* key_array = typeid_cast(keys.get()); + const auto* value_array = typeid_cast(values.get()); CHECK(key_array) << "ColumnMap keys can be created only from array"; CHECK(value_array) << "ColumnMap values can be created only from array"; CHECK_EQ(get_keys_ptr()->size(), get_values_ptr()->size()); @@ -55,15 +55,16 @@ MutableColumnPtr ColumnMap::clone_resized(size_t to_size) const { // to support field functions Field ColumnMap::operator[](size_t n) const { + // Map is FieldVector , see in field.h Map res(2); keys->get(n, res[0]); - values->get(n, res[0]); + values->get(n, res[1]); return res; } // here to compare to below -void ColumnMap::get(size_t n, Field & res) const { +void ColumnMap::get(size_t n, Field& res) const { Map map(2); keys->get(n, map[0]); values->get(n, map[1]); @@ -81,25 +82,19 @@ void ColumnMap::insert_data(const char*, size_t) { void ColumnMap::insert(const Field& x) { const auto& map = doris::vectorized::get(x); - // ({}, {}, {}) - // ([], []) CHECK_EQ(map.size(), 2); keys->insert(map[0]); values->insert(map[1]); } -void ColumnMap::insert_default() { - keys->insert_default(); - values->insert_default(); -} +void ColumnMap::insert_default() {} void ColumnMap::pop_back(size_t n) { keys->pop_back(n); values->pop_back(n); } -StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena & arena, char const*& begin) - const { +StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const { StringRef res(begin, 0); auto keys_ref = keys->serialize_value_into_arena(n, arena, begin); res.data = keys_ref.data - res.size; @@ -114,11 +109,11 @@ StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena & arena, char co void ColumnMap::insert_from(const IColumn& src_, size_t n) { const ColumnMap& src = assert_cast(src_); - if ((!get_keys().is_nullable() && src.get_keys().is_nullable()) - || (!get_values().is_nullable() && src.get_values().is_nullable())) { + if ((!get_keys().is_nullable() && src.get_keys().is_nullable()) || + (!get_values().is_nullable() && src.get_values().is_nullable())) { DCHECK(false); - } else if ((get_keys().is_nullable() && !src.get_keys().is_nullable()) - || (get_values().is_nullable() && !src.get_values().is_nullable())) { + } else if ((get_keys().is_nullable() && !src.get_keys().is_nullable()) || + (get_values().is_nullable() && !src.get_values().is_nullable())) { DCHECK(false); } else { keys->insert_from(*assert_cast(src_).keys, n); @@ -127,7 +122,7 @@ void ColumnMap::insert_from(const IColumn& src_, size_t n) { } void ColumnMap::insert_indices_from(const IColumn& src, const int* indices_begin, - const int* indices_end) { + const int* indices_end) { for (auto x = indices_begin; x != indices_end; ++x) { if (*x == -1) { ColumnMap::insert_default(); @@ -144,7 +139,7 @@ const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) { return pos; } -void ColumnMap::update_hash_with_value(size_t n, SipHash & hash) const { +void ColumnMap::update_hash_with_value(size_t n, SipHash& hash) const { keys->update_hash_with_value(n, hash); values->update_hash_with_value(n, hash); } @@ -155,7 +150,8 @@ void ColumnMap::insert_range_from(const IColumn& src, size_t start, size_t lengt } ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t result_size_hint) const { - return ColumnMap::create(keys->filter(filt, result_size_hint), values->filter(filt, result_size_hint)); + return ColumnMap::create(keys->filter(filt, result_size_hint), + values->filter(filt, result_size_hint)); } ColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const { @@ -166,26 +162,6 @@ ColumnPtr ColumnMap::replicate(const Offsets& offsets) const { return ColumnMap::create(keys->replicate(offsets), values->replicate(offsets)); } -//MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector& selector) const { -// -// MutableColumns keys_scatter = keys->scatter(num_columns, selector); -// MutableColumns values_scatter = values->scatter(num_columns, selector); -// -// MutableColumns res(num_columns); -// -// for (size_t scattered_idx = 0; scattered_idx < num_columns; ++scattered_idx) -// { -// MutableColumns new_columns(2); -// for (size_t map_element_idx = 0; map_element_idx < 2; ++map_element_idx) -// new_columns[map_element_idx] = std::move(scattered_map_elements[map_element_idx][scattered_idx]); -// res[scattered_idx] = ColumnMap::create(std::move(new_columns)); -// } -// -// -// return res; -//} - - void ColumnMap::reserve(size_t n) { get_keys().reserve(n); get_values().reserve(n); @@ -204,15 +180,4 @@ void ColumnMap::protect() { get_values().protect(); } -void ColumnMap::get_extremes(Field & min, Field & max) const { - Map min_map(2); - Map max_map(2); - - keys->get_extremes(min_map[0], max_map[0]); - values->get_extremes(min_map[1], max_map[1]); - - min = min_map; - max = max_map; -} - -} \ No newline at end of file +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index f24bdc3ac99528..4840ee4f69968f 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -1,7 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnMap.cpp +// and modified by Doris + #pragma once -#include "vec/columns/column_array.h" #include "vec/columns/column.h" +#include "vec/columns/column_array.h" #include "vec/columns/column_impl.h" #include "vec/common/arena.h" #include "vec/core/field.h" @@ -9,13 +29,11 @@ namespace doris::vectorized { - /** A column of map values. */ class ColumnMap final : public COWHelper { - -public: - /** Create immutable column using immutable arguments. This arguments may be shared with other columns. +public: + /** Create immutable column using immutable arguments. This arguments may be shared with other columns. * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. */ using Base = COWHelper; @@ -31,7 +49,7 @@ class ColumnMap final : public COWHelper { } std::string get_name() const override; - const char * get_family_name() const override { return "Map"; } + const char* get_family_name() const override { return "Map"; } TypeIndex get_data_type() const { return TypeIndex::Map; } void for_each_subcolumn(ColumnCallback callback) override { @@ -49,35 +67,38 @@ class ColumnMap final : public COWHelper { bool can_be_inside_nullable() const override { return true; } size_t size() const override { return keys->size(); } Field operator[](size_t n) const override; - void get(size_t n, Field & res) const override; + void get(size_t n, Field& res) const override; StringRef get_data_at(size_t n) const override; void insert_data(const char* pos, size_t length) override; void insert_range_from(const IColumn& src, size_t start, size_t length) override; void insert_from(const IColumn& src_, size_t n) override; - void insert(const Field & x) override; + void insert(const Field& x) override; void insert_default() override; void pop_back(size_t n) override; bool is_column_map() const override { return true; } - StringRef serialize_value_into_arena(size_t n, Arena & arena, char const *& begin) const override; - const char * deserialize_and_insert_from_arena(const char * pos) override; + StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override; + const char* deserialize_and_insert_from_arena(const char* pos) override; - void update_hash_with_value(size_t n, SipHash & hash) const override; + void update_hash_with_value(size_t n, SipHash& hash) const override; - ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; - ColumnPtr permute(const Permutation & perm, size_t limit) const override; - ColumnPtr replicate(const Offsets & offsets) const override; - MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override { + ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; + ColumnPtr permute(const Permutation& perm, size_t limit) const override; + ColumnPtr replicate(const Offsets& offsets) const override; + MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override { return scatter_impl(num_columns, selector); } - void get_extremes(Field & min, Field & max) const override; + void get_extremes(Field& min, Field& max) const override { + LOG(FATAL) << "get_extremes not implemented"; + }; [[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override { LOG(FATAL) << "compare_at not implemented"; } - void get_permutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override { - LOG(FATAL) << "get_permutation not implemented"; + void get_permutation(bool reverse, size_t limit, int nan_direction_hint, + Permutation& res) const override { + LOG(FATAL) << "get_permutation not implemented"; } void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override; @@ -87,7 +108,6 @@ class ColumnMap final : public COWHelper { return append_data_by_selector_impl(res, selector); } - void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override { LOG(FATAL) << "replace_column_data not implemented"; } @@ -101,7 +121,7 @@ class ColumnMap final : public COWHelper { size_t allocated_bytes() const override; void protect() override; - /******************** keys and values ***************/ + /******************** keys and values ***************/ const ColumnPtr& get_keys_ptr() const { return keys; } ColumnPtr& get_keys_ptr() { return keys; } @@ -117,7 +137,7 @@ class ColumnMap final : public COWHelper { private: friend class COWHelper; - WrappedPtr keys; // nullable + WrappedPtr keys; // nullable WrappedPtr values; // nullable size_t ALWAYS_INLINE offset_at(ssize_t i) const { return get_offsets()[i - 1]; } @@ -125,9 +145,9 @@ class ColumnMap final : public COWHelper { return get_offsets()[i] - get_offsets()[i - 1]; } - explicit ColumnMap(MutableColumnPtr && keys, MutableColumnPtr && values); + explicit ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values); - ColumnMap(const ColumnMap &) = default; + ColumnMap(const ColumnMap&) = default; }; -} +} // namespace doris::vectorized diff --git a/be/src/vec/core/accurate_comparison.h b/be/src/vec/core/accurate_comparison.h index e52cc4ef6596c5..3f13772041f3cf 100644 --- a/be/src/vec/core/accurate_comparison.h +++ b/be/src/vec/core/accurate_comparison.h @@ -54,14 +54,15 @@ namespace accurate { // Case 1. Is pair of floats or pair of ints or pair of uints template -constexpr bool is_safe_conversion = - (std::is_floating_point_v && std::is_floating_point_v) || - (std::is_integral_v && std::is_integral_v && - !(std::is_signed_v ^ std::is_signed_v)) || - (std::is_same_v && - std::is_same_v) || - (std::is_integral_v && std::is_same_v) || - (std::is_same_v && std::is_integral_v); +constexpr bool is_safe_conversion = (std::is_floating_point_v && std::is_floating_point_v) || + (std::is_integral_v && std::is_integral_v && + !(std::is_signed_v ^ std::is_signed_v)) || + (std::is_same_v && + std::is_same_v) || + (std::is_integral_v && + std::is_same_v) || + (std::is_same_v && + std::is_integral_v); template using bool_if_safe_conversion = std::enable_if_t, bool>; template @@ -69,13 +70,13 @@ using bool_if_not_safe_conversion = std::enable_if_t, /// Case 2. Are params IntXX and UIntYY ? template -constexpr bool is_any_int_vs_uint = std::is_integral_v && std::is_integral_v && - std::is_signed_v && std::is_unsigned_v; +constexpr bool is_any_int_vs_uint = std::is_integral_v&& std::is_integral_v&& + std::is_signed_v&& std::is_unsigned_v; // Case 2a. Are params IntXX and UIntYY and sizeof(IntXX) >= sizeof(UIntYY) (in such case will use accurate compare) template -constexpr bool is_le_int_vs_uint = - is_any_int_vs_uint && (sizeof(TInt) <= sizeof(TUInt)); +constexpr bool is_le_int_vs_uint = is_any_int_vs_uint && + (sizeof(TInt) <= sizeof(TUInt)); template using bool_if_le_int_vs_uint_t = std::enable_if_t, bool>; @@ -106,8 +107,8 @@ inline bool_if_le_int_vs_uint_t equalsOpTmpl(TUInt a, TInt b) { // Case 2b. Are params IntXX and UIntYY and sizeof(IntXX) > sizeof(UIntYY) (in such case will cast UIntYY to IntXX and compare) template -constexpr bool is_gt_int_vs_uint = - is_any_int_vs_uint && (sizeof(TInt) > sizeof(TUInt)); +constexpr bool is_gt_int_vs_uint = is_any_int_vs_uint && + (sizeof(TInt) > sizeof(TUInt)); template using bool_if_gt_int_vs_uint = std::enable_if_t, bool>; diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 7af160b3b22483..be7b7b91033cdf 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -309,7 +309,7 @@ class Field { AggregateFunctionState = 22, JSONB = 23, Decimal128I = 24, - Map = 25, + Map = 25, }; static const int MIN_NON_POD = 16; @@ -336,7 +336,7 @@ class Field { return "Array"; case Tuple: return "Tuple"; - case Map: + case Map: return "Map"; case Decimal32: return "Decimal32"; @@ -509,7 +509,7 @@ class Field { return get() < rhs.get(); case Types::Tuple: return get() < rhs.get(); - case Types::Map: + case Types::Map: return get() < rhs.get(); case Types::Decimal32: return get>() < rhs.get>(); @@ -689,7 +689,7 @@ class Field { return; case Types::Map: f(field.template get()); - return; + return; case Types::Decimal32: f(field.template get>()); return; diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 6a34b50a445346..02c05aa4900ff6 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -19,6 +19,7 @@ // and modified by Doris #include "vec/data_types/data_type_factory.hpp" + #include "data_type_time.h" namespace doris::vectorized { @@ -29,9 +30,10 @@ DataTypePtr DataTypeFactory::create_data_type(const doris::Field& col_desc) { DCHECK(col_desc.get_sub_field_count() == 1); nested = std::make_shared(create_data_type(*col_desc.get_sub_field(0))); } else if (col_desc.type() == OLAP_FIELD_TYPE_MAP) { - DCHECK(col_desc.get_sub_field_count() == 2); + DCHECK(col_desc.get_sub_field_count() == 2); nested = std::make_shared( - create_data_type(*col_desc.get_sub_field(0)), create_data_type(*col_desc.get_sub_field(1))); + create_data_type(*col_desc.get_sub_field(0)), + create_data_type(*col_desc.get_sub_field(1))); } else { nested = _create_primitive_data_type(col_desc.type(), col_desc.get_precision(), col_desc.get_scale()); @@ -50,8 +52,9 @@ DataTypePtr DataTypeFactory::create_data_type(const TabletColumn& col_desc, bool nested = std::make_shared(create_data_type(col_desc.get_sub_column(0))); } else if (col_desc.type() == OLAP_FIELD_TYPE_MAP) { DCHECK(col_desc.get_subtype_count() == 2); - nested = std::make_shared(create_data_type(col_desc.get_sub_column(0)), - create_data_type(col_desc.get_sub_column(1))); + nested = std::make_shared( + create_data_type(col_desc.get_sub_column(0)), + create_data_type(col_desc.get_sub_column(1))); } else { nested = _create_primitive_data_type(col_desc.type(), col_desc.precision(), col_desc.frac()); @@ -101,8 +104,8 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo break; case TYPE_TIME: case TYPE_TIMEV2: - nested = std::make_shared(); - break; + nested = std::make_shared(); + break; case TYPE_DOUBLE: nested = std::make_shared(); break; @@ -143,7 +146,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo nested = std::make_shared( create_data_type(col_desc.children[0], col_desc.contains_null), create_data_type(col_desc.children[1], col_desc.contains_null)); - break; + break; case INVALID_TYPE: default: DCHECK(false) << "invalid PrimitiveType:" << (int)col_desc.type; @@ -314,9 +317,10 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) { break; case PGenericType::MAP: DCHECK(pcolumn.children_size() == 2); - // here to check pcolumn is list? - nested = std::make_shared(create_data_type(pcolumn.children(0).children(0)), - create_data_type(pcolumn.children(1).children(0))); + // here to check pcolumn is list? + nested = std::make_shared( + create_data_type(pcolumn.children(0).children(0)), + create_data_type(pcolumn.children(1).children(0))); break; default: { LOG(FATAL) << fmt::format("Unknown data type: {}", pcolumn.type()); diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp index 5b698bdfd0ad90..ed270b40eaa830 100644 --- a/be/src/vec/data_types/data_type_factory.hpp +++ b/be/src/vec/data_types/data_type_factory.hpp @@ -33,12 +33,10 @@ #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" -#include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_fixed_length_object.h" #include "vec/data_types/data_type_hll.h" #include "vec/data_types/data_type_jsonb.h" -#include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_nothing.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" @@ -125,7 +123,7 @@ class DataTypeFactory { DataTypePtr create_data_type(const TTypeDesc& raw_type) { return create_data_type(TypeDescriptor::from_thrift(raw_type), raw_type.is_nullable); } - + DataTypePtr create_data_type(const FieldType& type, int precision, int scale) { return _create_primitive_data_type(type, precision, scale); } diff --git a/be/src/vec/data_types/data_type_map.cpp b/be/src/vec/data_types/data_type_map.cpp index 8b81f51779f399..4c6a94a21640ed 100644 --- a/be/src/vec/data_types/data_type_map.cpp +++ b/be/src/vec/data_types/data_type_map.cpp @@ -18,15 +18,14 @@ #include "data_type_map.h" #include "gen_cpp/data.pb.h" -#include "vec/data_types/data_type_factory.hpp" #include "vec/columns/column_array.h" #include "vec/columns/column_map.h" #include "vec/common/assert_cast.h" +#include "vec/data_types/data_type_factory.hpp" namespace doris::vectorized { -DataTypeMap::DataTypeMap(const DataTypePtr& keys_, const DataTypePtr& values_) -{ +DataTypeMap::DataTypeMap(const DataTypePtr& keys_, const DataTypePtr& values_) { key_type = keys_; value_type = values_; @@ -35,71 +34,64 @@ DataTypeMap::DataTypeMap(const DataTypePtr& keys_, const DataTypePtr& values_) } std::string DataTypeMap::to_string(const IColumn& column, size_t row_num) const { - const ColumnMap & map_column = assert_cast(column); + const ColumnMap& map_column = assert_cast(column); const ColumnArray::Offsets64& offsets = map_column.get_offsets(); size_t offset = offsets[row_num - 1]; size_t next_offset = offsets[row_num]; - const IColumn & nested_keys = map_column.get_keys(); - const IColumn & nested_values = map_column.get_values(); + auto& keys_arr = assert_cast(map_column.get_keys()); + auto& values_arr = assert_cast(map_column.get_values()); + + const IColumn& nested_keys_column = keys_arr.get_data(); + const IColumn& nested_values_column = values_arr.get_data(); std::stringstream ss; ss << "{"; - for (size_t i = offset; i < next_offset; ++i) - { - if (i != offset) + for (size_t i = offset; i < next_offset; ++i) { + if (i != offset) { ss << ", "; - ss << "'" << keys->to_string(nested_keys, i); - ss << ':'; - ss << "'" << values->to_string(nested_values, i); + } + if (nested_keys_column.is_null_at(i)) { + ss << "NULL"; + } else if (WhichDataType(remove_nullable(key_type)).is_string_or_fixed_string()) { + ss << "'" << key_type->to_string(nested_keys_column, i) << "'"; + } else { + ss << key_type->to_string(nested_keys_column, i); + } + ss << ":"; + if (nested_values_column.is_null_at(i)) { + ss << "NULL"; + } else if (WhichDataType(remove_nullable(value_type)).is_string_or_fixed_string()) { + ss << "'" << value_type->to_string(nested_values_column, i) << "'"; + } else { + ss << value_type->to_string(nested_values_column, i); + } } ss << "}"; return ss.str(); } void DataTypeMap::to_string(const class doris::vectorized::IColumn& column, size_t row_num, - class doris::vectorized::BufferWritable& ostr) const { - const ColumnMap & map_column = assert_cast(column); - const ColumnArray::Offsets64& offsets = map_column.get_offsets(); - - size_t offset = offsets[row_num - 1]; - size_t next_offset = offsets[row_num]; - - const IColumn & nested_keys = map_column.get_keys(); - const IColumn & nested_values = map_column.get_values(); - - ostr.write("{", 1); - for (size_t i = offset; i < next_offset; ++i) - { - if (i != offset) - ostr.write(", ", 2); - keys->to_string(nested_keys, i, ostr); - ostr.write(":", 1); - values->to_string(nested_values, i, ostr); - } - ostr.write("}", 1); + class doris::vectorized::BufferWritable& ostr) const { + std::string ss = to_string(column, row_num); + ostr.write(ss.c_str(), strlen(ss.c_str())); } Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* column) const { DCHECK(!rb.eof()); - // only support one level now auto* map_column = assert_cast(column); - // IColumn& nested_column = array_column->get_data(); if (*rb.position() != '{') { return Status::InvalidArgument("map does not start with '{' character, found '{}'", *rb.position()); } + keys->from_string(rb, &map_column->get_keys()); values->from_string(rb, &map_column->get_values()); if (*(rb.end() - 1) != '}') { return Status::InvalidArgument("map does not end with '}' character, found '{}'", *(rb.end() - 1)); } -// keys->deserializeAsTextQuoted(extractElementColumn(column, 0), istr, settings); -// assertChar(',', istr); -// values->deserializeAsTextQuoted(extractElementColumn(column, 1), istr, settings); -// assertChar('}', istr); return Status::OK(); } @@ -116,34 +108,36 @@ void DataTypeMap::to_pb_column_meta(PColumnMeta* col_meta) const { } bool DataTypeMap::equals(const IDataType& rhs) const { - if (typeid(rhs) != typeid(*this)) + if (typeid(rhs) != typeid(*this)) { return false; + } - const DataTypeMap & rhs_map = static_cast(rhs); + const DataTypeMap& rhs_map = static_cast(rhs); - if (!keys->equals(*rhs_map.keys)) + if (!keys->equals(*rhs_map.keys)) { return false; + } - if (!values->equals(*rhs_map.values)) + if (!values->equals(*rhs_map.values)) { return false; + } return true; } int64_t DataTypeMap::get_uncompressed_serialized_bytes(const IColumn& column, - int data_version) const { + int data_version) const { auto ptr = column.convert_to_full_column_if_const(); const auto& data_column = assert_cast(*ptr.get()); return get_keys()->get_uncompressed_serialized_bytes(data_column.get_keys(), data_version) + get_values()->get_uncompressed_serialized_bytes(data_column.get_values(), data_version); } -// serialize to binary +// serialize to binary char* DataTypeMap::serialize(const IColumn& column, char* buf, int data_version) const { auto ptr = column.convert_to_full_column_if_const(); const auto& map_column = assert_cast(*ptr.get()); - buf = get_keys()->serialize(map_column.get_keys(), buf, data_version); return get_values()->serialize(map_column.get_values(), buf, data_version); } @@ -151,7 +145,8 @@ char* DataTypeMap::serialize(const IColumn& column, char* buf, int data_version) const char* DataTypeMap::deserialize(const char* buf, IColumn* column, int data_version) const { const auto* map_column = assert_cast(column); buf = get_keys()->deserialize(buf, map_column->get_keys_ptr()->assume_mutable(), data_version); - return get_values()->deserialize(buf, map_column->get_values_ptr()->assume_mutable(), data_version); + return get_values()->deserialize(buf, map_column->get_values_ptr()->assume_mutable(), + data_version); } } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_map.h b/be/src/vec/data_types/data_type_map.h index cc66f5fa53c84b..58261b0b3d30ce 100644 --- a/be/src/vec/data_types/data_type_map.h +++ b/be/src/vec/data_types/data_type_map.h @@ -28,12 +28,11 @@ namespace doris::vectorized { * Map's key and value only have types. * If only one type is set, then key's type is "String" in default. */ -class DataTypeMap final : public IDataType -{ +class DataTypeMap final : public IDataType { private: DataTypePtr key_type; DataTypePtr value_type; - DataTypePtr keys; // array + DataTypePtr keys; // array DataTypePtr values; // array public: @@ -42,8 +41,10 @@ class DataTypeMap final : public IDataType DataTypeMap(const DataTypePtr& keys_, const DataTypePtr& values_); TypeIndex get_type_id() const override { return TypeIndex::Map; } - std::string do_get_name() const override { return "Map(" + key_type->get_name() + ", " + value_type->get_name()+ ")"; } - const char * get_family_name() const override { return "Map"; } + std::string do_get_name() const override { + return "Map(" + key_type->get_name() + ", " + value_type->get_name() + ")"; + } + const char* get_family_name() const override { return "Map"; } bool can_be_inside_nullable() const override { return true; } MutableColumnPtr create_column() const override; @@ -51,18 +52,19 @@ class DataTypeMap final : public IDataType bool equals(const IDataType& rhs) const override; bool get_is_parametric() const override { return true; } bool have_subtypes() const override { return true; } - bool is_comparable() const override { return key_type->is_comparable() && value_type->is_comparable(); } + bool is_comparable() const override { + return key_type->is_comparable() && value_type->is_comparable(); + } bool can_be_compared_with_collation() const override { return false; } bool is_value_unambiguously_represented_in_contiguous_memory_region() const override { return true; } - const DataTypePtr& get_keys() const { return keys; } const DataTypePtr& get_values() const { return values; } - const DataTypePtr & get_key_type() const { return key_type; } - const DataTypePtr & get_value_type() const { return value_type; } + const DataTypePtr& get_key_type() const { return key_type; } + const DataTypePtr& get_value_type() const { return value_type; } int64_t get_uncompressed_serialized_bytes(const IColumn& column, int be_exec_version) const override; @@ -74,8 +76,6 @@ class DataTypeMap final : public IDataType std::string to_string(const IColumn& column, size_t row_num) const override; void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; Status from_string(ReadBuffer& rb, IColumn* column) const override; - }; -} - +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index a051b240eebc57..00ff8bdd5f3571 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -29,11 +29,11 @@ #include "vec/exprs/vcase_expr.h" #include "vec/exprs/vcast_expr.h" #include "vec/exprs/vcompound_pred.h" -#include "vec/exprs/vmap_literal.h" #include "vec/exprs/vectorized_fn_call.h" #include "vec/exprs/vin_predicate.h" #include "vec/exprs/vinfo_func.h" #include "vec/exprs/vliteral.h" +#include "vec/exprs/vmap_literal.h" #include "vec/exprs/vruntimefilter_wrapper.h" #include "vec/exprs/vslot_ref.h" #include "vec/exprs/vtuple_is_null_predicate.h" diff --git a/be/src/vec/exprs/vmap_literal.cpp b/be/src/vec/exprs/vmap_literal.cpp index 02d4ce71f58656..b3cce0b61b69d5 100644 --- a/be/src/vec/exprs/vmap_literal.cpp +++ b/be/src/vec/exprs/vmap_literal.cpp @@ -21,7 +21,7 @@ namespace doris::vectorized { Status VMapLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, - VExprContext* context) { + VExprContext* context) { DCHECK_EQ(type().children.size(), 2) << "map children type not 2"; RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); @@ -30,7 +30,7 @@ Status VMapLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, Field keys = Array(); Field values = Array(); // each child is slot with key1, value1, key2, value2... - for (int idx = 0; idx < _children.size(); ++idx ) { + for (int idx = 0; idx < _children.size(); ++idx) { Field item; ColumnPtrWrapper* const_col_wrapper = nullptr; RETURN_IF_ERROR(_children[idx]->get_const_col(context, &const_col_wrapper)); diff --git a/be/src/vec/exprs/vmap_literal.h b/be/src/vec/exprs/vmap_literal.h index a3c45ffb2f6def..6206d4c58ff2a5 100644 --- a/be/src/vec/exprs/vmap_literal.h +++ b/be/src/vec/exprs/vmap_literal.h @@ -18,7 +18,6 @@ #include "vec/exprs/vliteral.h" - namespace doris { namespace vectorized { @@ -27,7 +26,7 @@ class VMapLiteral : public VLiteral { VMapLiteral(const TExprNode& node) : VLiteral(node, false) {} ~VMapLiteral() override = default; Status prepare(RuntimeState* state, const RowDescriptor& row_desc, - VExprContext* context) override; + VExprContext* context) override; }; } // namespace vectorized diff --git a/be/src/vec/functions/array/function_array_element.h b/be/src/vec/functions/array/function_array_element.h index 1f7be28c584fce..ac685b172ba424 100644 --- a/be/src/vec/functions/array/function_array_element.h +++ b/be/src/vec/functions/array/function_array_element.h @@ -22,7 +22,6 @@ #include #include "vec/columns/column_array.h" -#include "vec/columns/column_const.h" #include "vec/columns/column_map.h" #include "vec/columns/column_string.h" #include "vec/data_types/data_type_array.h" @@ -47,17 +46,20 @@ class FunctionArrayElement : public IFunction { DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { DCHECK(is_array(arguments[0]) || is_map(arguments[0])) - << "first argument for function: " << name << " should be DataTypeArray or DataTypeMap"; + << "first argument for function: " << name + << " should be DataTypeArray or DataTypeMap"; if (is_array(arguments[0])) { - DCHECK(is_integer(arguments[1])) << "second argument for function: " << name << " should be Integer for array element"; + DCHECK(is_integer(arguments[1])) << "second argument for function: " << name + << " should be Integer for array element"; return make_nullable( check_and_get_data_type(arguments[0].get())->get_nested_type()); - } else { + } else if (is_map(arguments[0])) { return make_nullable( check_and_get_data_type(arguments[0].get())->get_value_type()); + } else { + LOG(ERROR) << "element_at only support array and map so far."; + return nullptr; } - - } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, @@ -79,94 +81,61 @@ class FunctionArrayElement : public IFunction { ColumnPtr res_column = nullptr; if (args[0].column->is_column_map()) { res_column = _execute_map(args, input_rows_count, src_null_map, dst_null_map); - }else { - res_column = - _execute_non_nullable(args, input_rows_count, src_null_map, dst_null_map); + } else { + res_column = _execute_non_nullable(args, input_rows_count, src_null_map, dst_null_map); } if (!res_column) { return Status::RuntimeError("unsupported types for function {}({}, {})", get_name(), block.get_by_position(arguments[0]).type->get_name(), block.get_by_position(arguments[1]).type->get_name()); } - block.replace_by_position( - result, ColumnNullable::create(std::move(res_column), std::move(dst_null_column))); + block.replace_by_position(result, + ColumnNullable::create(res_column, std::move(dst_null_column))); return Status::OK(); } private: //=========================== map element===========================// ColumnPtr _get_mapped_idx(const ColumnArray& key_column, - const ColumnWithTypeAndName& argument) { - if (key_column.get_data().is_column_string()) { - return _mapped_key_string(key_column, argument); - } - return nullptr; - } - - ColumnPtr _get_mapped_value(const ColumnArray& val_column, - const IColumn& matched_indices, - const UInt8* src_null_map, - UInt8* dst_null_map) { - const UInt8* nested_null_map = nullptr; - ColumnPtr nested_column = nullptr; - if (is_column_nullable(val_column.get_data())) { - const auto& nested_null_column = - reinterpret_cast(val_column.get_data()); - nested_null_map = nested_null_column.get_null_map_column().get_data().data(); - nested_column = nested_null_column.get_nested_column_ptr(); - } else { - nested_column = val_column.get_data_ptr(); - } - if (check_column(nested_column)) { - return _execute_number(val_column.get_offsets(), *nested_column, - src_null_map, matched_indices, - nested_null_map, dst_null_map); - } else if (check_column(nested_column)) { - _execute_number(val_column.get_offsets(), *nested_column, - src_null_map, matched_indices, - nested_null_map, dst_null_map); - } - return nullptr; + const ColumnWithTypeAndName& argument) { + return _mapped_key(key_column, argument); } - ColumnPtr _mapped_key_string(const ColumnArray& column, - const ColumnWithTypeAndName& argument) { + ColumnPtr _mapped_key(const ColumnArray& column, const ColumnWithTypeAndName& argument) { auto right_column = argument.column->convert_to_full_column_if_const(); - const ColumnString& match_key = reinterpret_cast(*right_column); const ColumnArray::Offsets64& offsets = column.get_offsets(); ColumnPtr nested_ptr = nullptr; if (is_column_nullable(column.get_data())) { - nested_ptr = reinterpret_cast(column.get_data()).get_nested_column_ptr(); + nested_ptr = reinterpret_cast(column.get_data()) + .get_nested_column_ptr(); } else { nested_ptr = column.get_data_ptr(); } - const ColumnString& nested_key = reinterpret_cast(*nested_ptr); size_t rows = offsets.size(); // prepare return data auto matched_indices = ColumnVector::create(); matched_indices->reserve(rows); - for (size_t i = 0; i < rows; i++) - { + for (size_t i = 0; i < rows; i++) { bool matched = false; size_t begin = offsets[i - 1]; size_t end = offsets[i]; for (size_t j = begin; j < end; j++) { - if (nested_key.get_data_at(j) == match_key.get_data_at(i)) { - matched_indices->insert_value(j-begin+1); + if (nested_ptr->compare_at(j, i, *right_column, -1) == 0) { + matched_indices->insert_value(j - begin + 1); matched = true; break; } } - if (!matched) - matched_indices->insert_value(end-begin+1); // make indices for null + if (!matched) { + matched_indices->insert_value(end - begin + 1); // make indices for null + } } return matched_indices; } - template ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, @@ -262,16 +231,14 @@ class FunctionArrayElement : public IFunction { return dst_column; } - ColumnPtr _execute_map(const ColumnsWithTypeAndName& arguments, - size_t input_rows_count, const UInt8* src_null_map, - UInt8* dst_null_map) { - + ColumnPtr _execute_map(const ColumnsWithTypeAndName& arguments, size_t input_rows_count, + const UInt8* src_null_map, UInt8* dst_null_map) { auto left_column = arguments[0].column->convert_to_full_column_if_const(); - DataTypePtr val_type = reinterpret_cast(*arguments[0].type).get_values(); + DataTypePtr val_type = + reinterpret_cast(*arguments[0].type).get_values(); const auto& map_column = reinterpret_cast(*left_column); - const ColumnArray& column_keys = assert_cast (map_column.get_keys()); -// const ColumnArray& column_vals = assert_cast (map_column.get_values()); + const ColumnArray& column_keys = assert_cast(map_column.get_keys()); const auto& offsets = column_keys.get_offsets(); const size_t rows = offsets.size(); @@ -289,7 +256,6 @@ class FunctionArrayElement : public IFunction { ColumnWithTypeAndName data(map_column.get_values_ptr(), val_type, "value"); ColumnsWithTypeAndName args = {data, indices}; return _execute_non_nullable(args, input_rows_count, src_null_map, dst_null_map); - //return _get_mapped_value(column_vals, *matched_indices, src_null_map, dst_null_map); } ColumnPtr _execute_non_nullable(const ColumnsWithTypeAndName& arguments, diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index e7a1f208436ba0..7f772b929cdcf8 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -119,11 +119,13 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co create_olap_column_data_convertor(sub_column)); } case FieldType::OLAP_FIELD_TYPE_MAP: { - const auto& key_column = column.get_sub_column(0); + const auto& key_column = column.get_sub_column(0); const auto& value_column = column.get_sub_column(1); return std::make_unique( - std::make_unique(create_olap_column_data_convertor(key_column)), - std::make_unique(create_olap_column_data_convertor(value_column))); + std::make_unique( + create_olap_column_data_convertor(key_column)), + std::make_unique( + create_olap_column_data_convertor(value_column))); //const auto& key_column = column.get_sub_column(0); //const auto& value_column = column.get_sub_column(1); //return std::make_unique( @@ -729,8 +731,7 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap() { if (_nullmap) { const auto* nullable_column = assert_cast(_typed_column.column.get()); - column_map = - assert_cast(nullable_column->get_nested_column_ptr().get()); + column_map = assert_cast(nullable_column->get_nested_column_ptr().get()); data_type_map = assert_cast( (assert_cast(_typed_column.type.get())->get_nested_type()) .get()); @@ -745,9 +746,7 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap() { } Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( - const ColumnMap* column_map, - const DataTypeMap* data_type_map) { - + const ColumnMap* column_map, const DataTypeMap* data_type_map) { ColumnPtr key_data = column_map->get_keys_ptr(); ColumnPtr value_data = column_map->get_values_ptr(); if (column_map->get_keys().is_nullable()) { @@ -764,8 +763,8 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( const auto& offsets = column_map->get_offsets(); // use keys offsets - ColumnWithTypeAndName key_typed_column = { - key_data, remove_nullable(data_type_map->get_keys()),"map.key"}; + ColumnWithTypeAndName key_typed_column = {key_data, remove_nullable(data_type_map->get_keys()), + "map.key"}; _key_convertor->set_source_column(key_typed_column, _row_pos, _num_rows); _key_convertor->convert_to_olap(); @@ -788,14 +787,11 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( continue; } - map_value->set_key( - const_cast(_key_convertor->get_data_at(i))); - map_value->set_value( - const_cast(_value_convertor->get_data_at(i))); + map_value->set_key(const_cast(_key_convertor->get_data_at(i))); + map_value->set_value(const_cast(_value_convertor->get_data_at(i))); } return Status::OK(); } - } // namespace doris::vectorized diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 38532fc4f02f55..eb0a04c7ef86f6 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -19,8 +19,8 @@ #include "olap/types.h" #include "runtime/mem_pool.h" -#include "vec/columns/column_nullable.h" #include "vec/columns/column_map.h" +#include "vec/columns/column_nullable.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/types.h" #include "vec/data_types/data_type_map.h" @@ -375,22 +375,20 @@ class OlapBlockDataConvertor { OlapColumnDataConvertorBaseUPtr _item_convertor; }; - class OlapColumnDataConvertorMap - : public OlapColumnDataConvertorPaddedPODArray { + class OlapColumnDataConvertorMap : public OlapColumnDataConvertorPaddedPODArray { public: OlapColumnDataConvertorMap(OlapColumnDataConvertorBaseUPtr key_convertor, - OlapColumnDataConvertorBaseUPtr value_convertor) - : _key_convertor(std::move(key_convertor)), _value_convertor(std::move(value_convertor)) {} + OlapColumnDataConvertorBaseUPtr value_convertor) + : _key_convertor(std::move(key_convertor)), + _value_convertor(std::move(value_convertor)) {} Status convert_to_olap() override; private: - Status convert_to_olap(const ColumnMap* column_map, - const DataTypeMap* data_type_map); + Status convert_to_olap(const ColumnMap* column_map, const DataTypeMap* data_type_map); OlapColumnDataConvertorBaseUPtr _key_convertor; OlapColumnDataConvertorBaseUPtr _value_convertor; - };//OlapColumnDataConvertorMap - + }; //OlapColumnDataConvertorMap private: std::vector _convertors; diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index 013e4982339b53..522ce0f345e9d1 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -24,9 +24,9 @@ #include "runtime/runtime_state.h" #include "vec/columns/column_array.h" #include "vec/columns/column_complex.h" +#include "vec/columns/column_map.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" -#include "vec/columns/column_map.h" #include "vec/common/assert_cast.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_decimal.h" @@ -189,13 +189,7 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); } } else if constexpr (type == TYPE_MAP) { - auto& column_map = assert_cast(*column); - auto& offsets = column_map.get_offsets(); - auto& column_key_array = assert_cast(column_map.get_keys()); - auto& column_val_array = assert_cast(column_map.get_values()); auto& map_type = assert_cast(*nested_type_ptr); - auto& key_nested_type_ptr = map_type.get_key_type(); - auto& val_nested_type_ptr = map_type.get_value_type(); for (ssize_t i = 0; i < row_size; ++i) { if (0 != buf_ret) { return Status::InternalError("pack mysql buffer failed."); @@ -203,40 +197,9 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, _buffer.reset(); _buffer.open_dynamic_mode(); - buf_ret = _buffer.push_string("{", 1); - bool begin = true; - for (auto j = offsets[i - 1]; j < offsets[i]; ++j) { - if (!begin) { - buf_ret = _buffer.push_string(", ", 2); - } - const auto& key_data = column_key_array.get_data_ptr(); - if (key_data->is_null_at(j)) { - buf_ret = _buffer.push_string("NULL", strlen("NULL")); - } else { - if (WhichDataType(remove_nullable(key_nested_type_ptr)).is_string()) { - buf_ret = _buffer.push_string("'", 1); - buf_ret = _add_one_cell(key_data, j, key_nested_type_ptr, _buffer); - buf_ret = _buffer.push_string("'", 1); - } else { - buf_ret = _add_one_cell(key_data, j, key_nested_type_ptr, _buffer); - } - } - buf_ret = _buffer.push_string(":", 1); - const auto& val_data = column_val_array.get_data_ptr(); - if (val_data->is_null_at(j)) { - buf_ret = _buffer.push_string("NULL", strlen("NULL")); - } else { - if (WhichDataType(remove_nullable(val_nested_type_ptr)).is_string()) { - buf_ret = _buffer.push_string("'", 1); - buf_ret = _add_one_cell(val_data, j, val_nested_type_ptr, _buffer); - buf_ret = _buffer.push_string("'", 1); - } else { - buf_ret = _add_one_cell(val_data, j, val_nested_type_ptr, _buffer); - } - } - begin = false; - } - buf_ret = _buffer.push_string("}", 1); + std::string cell_str = map_type.to_string(*column, i); + buf_ret = _buffer.push_string(cell_str.c_str(), strlen(cell_str.c_str())); + _buffer.close_dynamic_mode(); result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); } @@ -709,7 +672,7 @@ Status VMysqlResultWriter::append_block(Block& input_block) { } break; } - case TYPE_MAP: { + case TYPE_MAP: { if (type_ptr->is_nullable()) { auto& nested_type = assert_cast(*type_ptr).get_nested_type(); //for map @@ -720,7 +683,7 @@ Status VMysqlResultWriter::append_block(Block& input_block) { type_ptr); } break; - } + } default: { LOG(WARNING) << "can't convert this type to mysql type. type = " << _output_vexpr_ctxs[i]->root()->type(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java index f7aa6aca7dc15a..92f558641a04a4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java @@ -53,7 +53,7 @@ public MapLiteral(LiteralExpr... exprs) throws AnalysisException { if (keyType == Type.NULL) { keyType = expr.getType(); } else { - keyType = Type.getAssignmentCompatibleType(keyType, expr.getType(), false); + keyType = Type.getAssignmentCompatibleType(keyType, expr.getType(), true); } if (keyType == Type.INVALID) { throw new AnalysisException("Invalid element type in Map"); @@ -62,7 +62,7 @@ public MapLiteral(LiteralExpr... exprs) throws AnalysisException { if (valueType == Type.NULL) { valueType = expr.getType(); } else { - valueType = Type.getAssignmentCompatibleType(valueType, expr.getType(), false); + valueType = Type.getAssignmentCompatibleType(valueType, expr.getType(), true); } if (valueType == Type.INVALID) { throw new AnalysisException("Invalid element type in Map"); @@ -90,7 +90,7 @@ public Expr uncheckedCastTo(Type targetType) throws AnalysisException { for (int i = 0; i < children.size(); ++ i) { Expr child = children.get(i); - if ((i & 1) == 0) { + if ((i % 2) == 0) { literal.children.set(i, child.uncheckedCastTo(keyType)); } else { literal.children.set(i, child.uncheckedCastTo(valueType)); @@ -167,9 +167,7 @@ public void write(DataOutput out) throws IOException { @Override public String getStringValue() { - List list = new ArrayList<>(children.size()); - children.forEach(v -> list.add(v.getStringValue())); - return "MAP{" + StringUtils.join(list, ", ") + "}"; + return toSqlImpl(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlColType.java b/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlColType.java index bc439d47106671..75b13848ea4619 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlColType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlColType.java @@ -53,7 +53,7 @@ public enum MysqlColType { MYSQL_TYPE_VARSTRING(253, "VAR STRING"), MYSQL_TYPE_STRING(254, "STRING"), MYSQL_TYPE_GEOMETRY(255, "GEOMETRY"), - MYSQL_TYPE_MAP(256, "MAP"); + MYSQL_TYPE_MAP(400, "MAP"); private MysqlColType(int code, String desc) { this.code = code; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java index 93c978a3fce9b4..7c365f7e133ea1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java @@ -185,9 +185,6 @@ public void finalize(Analyzer analyzer) throws UserException { // corresponding output slot isn't being materialized) materializedResultExprLists.clear(); Preconditions.checkState(resultExprLists.size() == children.size()); - if (analyzer.getDescTbl().getTupleDesc(tupleId) == null) { - return; - } List slots = analyzer.getDescTbl().getTupleDesc(tupleId).getSlots(); for (int i = 0; i < resultExprLists.size(); ++i) { List exprList = resultExprLists.get(i); From 615aa0796c8588e82f5f37cf7244b827aeda85d3 Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 1 Feb 2023 18:18:33 +0800 Subject: [PATCH 04/11] clean column writer for map and olap data convetor --- .../olap/rowset/segment_v2/column_reader.cpp | 4 +- .../olap/rowset/segment_v2/column_writer.cpp | 107 +++++++++--------- be/src/olap/rowset/segment_v2/column_writer.h | 12 +- be/src/olap/types.h | 2 +- be/src/runtime/map_value.cpp | 10 -- be/src/runtime/map_value.h | 12 -- be/src/runtime/types.cpp | 4 +- be/src/udf/udf.h | 23 ---- be/src/vec/data_types/data_type_factory.cpp | 4 +- be/src/vec/data_types/data_type_factory.hpp | 1 + be/src/vec/exprs/vmap_literal.cpp | 5 +- be/src/vec/olap/olap_data_convertor.cpp | 29 ++--- be/src/vec/olap/olap_data_convertor.h | 25 +++- be/src/vec/sink/vmysql_result_writer.cpp | 16 +-- .../java/org/apache/doris/catalog/Column.java | 30 ----- 15 files changed, 105 insertions(+), 179 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index c9efe02d383c58..64145eea4e1e4a 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -573,8 +573,8 @@ Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr size_t num_read = *n; auto column_key_ptr = column_map->get_keys().assume_mutable(); auto column_val_ptr = column_map->get_values().assume_mutable(); - RETURN_IF_ERROR(_key_iterator->next_batch(num_read, column_key_ptr, has_null)); - RETURN_IF_ERROR(_val_iterator->next_batch(num_read, column_val_ptr, has_null)); + RETURN_IF_ERROR(_key_iterator->next_batch(&num_read, column_key_ptr, has_null)); + RETURN_IF_ERROR(_val_iterator->next_batch(&num_read, column_val_ptr, has_null)); if (dst->is_nullable()) { auto null_map_ptr = diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 4bacaa3796ab2a..fc05fc97d80b7a 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -275,7 +275,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* null_writer = new ScalarColumnWriter(null_options, std::move(null_field), file_writer); } - + // create key & value writer std::vector> inner_writer_list; for (int i = 0; i < 2; ++i) { @@ -285,36 +285,36 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* array_column.set_index_length(-1); arr_opts.meta = opts.meta->mutable_children_columns(i); - arr_opts.meta->set_type(OLAP_FIELD_TYPE_ARRAY); - arr_opts.meta->set_encoding(opts.meta->encoding()); - arr_opts.meta->set_compression(opts.meta->compression()); - arr_opts.need_zone_map = false; - // no need inner array's null map - arr_opts.meta->set_is_nullable(false); - ColumnMetaPB* child_meta = arr_opts.meta->add_children_columns(); - // type and nullable and length. + // inner column meta from actual opts meta const TabletColumn& inner_column = column->get_sub_column(i); // field_type is true key and value array_column.add_sub_column(const_cast(inner_column)); array_column.set_name("map.arr"); child_meta->set_type(inner_column.type()); child_meta->set_length(inner_column.length()); - child_meta->set_column_id(arr_opts.meta->column_id() + 1); - child_meta->set_unique_id(arr_opts.meta->column_id() + 1); - child_meta->set_compression(opts.meta->compression()); - child_meta->set_encoding(opts.meta->encoding()); + child_meta->set_unique_id(arr_opts.meta->unique_id() + 1); + child_meta->set_compression(arr_opts.meta->compression()); + child_meta->set_encoding(arr_opts.meta->encoding()); child_meta->set_is_nullable(true); + + // set array column meta + arr_opts.meta->set_type(OLAP_FIELD_TYPE_ARRAY); + arr_opts.meta->set_encoding(opts.meta->encoding()); + arr_opts.meta->set_compression(opts.meta->compression()); + arr_opts.need_zone_map = false; + // no need inner array's null map + arr_opts.meta->set_is_nullable(false); RETURN_IF_ERROR(ColumnWriter::create(arr_opts, &array_column, file_writer, &inner_array_writer)); - inner_writer_list.emplace_back(std::move(inner_array_writer)); + inner_writer_list.push_back(std::move(inner_array_writer)); } // create map writer + std::unique_ptr sub_column_writer; std::unique_ptr writer_local = std::unique_ptr(new MapColumnWriter( - opts, std::move(field), null_writer, std::move(inner_writer_list[0]), - std::move(inner_writer_list[1]))); + opts, std::move(field), null_writer, inner_writer_list)); *writer = std::move(writer_local); return Status::OK(); @@ -943,7 +943,7 @@ Status ArrayColumnWriter::append_nulls(size_t num_rows) { Status ArrayColumnWriter::write_null_column(size_t num_rows, bool is_null) { uint8_t null_sign = is_null ? 1 : 0; - while (num_rows > 0) { + while (is_nullable() && num_rows > 0) { // TODO llj bulk write const uint8_t* null_sign_ptr = &null_sign; RETURN_IF_ERROR(_null_writer->append_data(&null_sign_ptr, 1)); @@ -958,29 +958,34 @@ Status ArrayColumnWriter::finish_current_page() { /// ============================= MapColumnWriter =====================//// MapColumnWriter::MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* null_writer, - std::unique_ptr key_writer, - std::unique_ptr value_writer) + ScalarColumnWriter* null_writer, + std::vector>& kv_writers) : ColumnWriter(std::move(field), opts.meta->is_nullable()), - _key_writer(std::move(key_writer)), - _value_writer(std::move(value_writer)), _opts(opts) { + CHECK_EQ(kv_writers.size(), 2); if (is_nullable()) { _null_writer.reset(null_writer); } + for (auto& sub_writers : kv_writers) { + _kv_writers.push_back(std::move(sub_writers)); + } } Status MapColumnWriter::init() { if (is_nullable()) { RETURN_IF_ERROR(_null_writer->init()); } - RETURN_IF_ERROR(_key_writer->init()); - RETURN_IF_ERROR(_value_writer->init()); + for (auto& sub_writer : _kv_writers) { + RETURN_IF_ERROR(sub_writer->init()); + } return Status::OK(); } uint64_t MapColumnWriter::estimate_buffer_size() { - size_t estimate = _key_writer->estimate_buffer_size() + _value_writer->estimate_buffer_size(); + size_t estimate = 0; + for (auto& sub_writer : _kv_writers) { + estimate += sub_writer->estimate_buffer_size(); + } if (is_nullable()) { estimate += _null_writer->estimate_buffer_size(); } @@ -991,26 +996,19 @@ Status MapColumnWriter::finish() { if (is_nullable()) { RETURN_IF_ERROR(_null_writer->finish()); } - RETURN_IF_ERROR(_key_writer->finish()); - RETURN_IF_ERROR(_value_writer->finish()); + for (auto& sub_writer : _kv_writers) { + RETURN_IF_ERROR(sub_writer->finish()); + } return Status::OK(); } // todo. make keys and values write Status MapColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { - size_t remaining = num_rows; - const auto* col_cursor = reinterpret_cast(*ptr); - while (remaining > 0) { - size_t num_written = 1; - auto* key_data_ptr = const_cast(col_cursor)->key_data(); - const uint8_t* key_ptr = (const uint8_t*)key_data_ptr; - RETURN_IF_ERROR(_key_writer->append_data(&key_ptr, 1)); - auto* val_data_ptr = const_cast(col_cursor)->value_data(); - const uint8_t* val_ptr = (const uint8_t*)val_data_ptr; - RETURN_IF_ERROR(_value_writer->append_data(&val_ptr, 1)); - remaining -= num_written; - col_cursor += num_written; - *ptr += num_written * sizeof(MapValue); + auto kv_ptr = reinterpret_cast(*ptr); + for (size_t i = 0; i < 2; ++i) { + auto data = *(kv_ptr + i); + const uint8_t* val_ptr = (const uint8_t*)data; + RETURN_IF_ERROR(_kv_writers[i]->append_data(&val_ptr, num_rows)); } if (is_nullable()) { return write_null_column(num_rows, false); @@ -1022,8 +1020,9 @@ Status MapColumnWriter::write_data() { if (is_nullable()) { RETURN_IF_ERROR(_null_writer->write_data()); } - RETURN_IF_ERROR(_key_writer->write_data()); - RETURN_IF_ERROR(_value_writer->write_data()); + for (auto& sub_writer : _kv_writers) { + RETURN_IF_ERROR(sub_writer->write_data()); + } return Status::OK(); } @@ -1031,23 +1030,26 @@ Status MapColumnWriter::write_ordinal_index() { if (is_nullable()) { RETURN_IF_ERROR(_null_writer->write_ordinal_index()); } - RETURN_IF_ERROR(_key_writer->write_ordinal_index()); - RETURN_IF_ERROR(_value_writer->write_ordinal_index()); + for (auto& sub_writer : _kv_writers) { + RETURN_IF_ERROR(sub_writer->write_ordinal_index()); + } return Status::OK(); } Status MapColumnWriter::append_nulls(size_t num_rows) { + for (auto& sub_writer : _kv_writers) { + RETURN_IF_ERROR(sub_writer->append_nulls(num_rows)); + } return write_null_column(num_rows, true); } Status MapColumnWriter::write_null_column(size_t num_rows, bool is_null) { - uint8_t null_sign = is_null ? 1 : 0; - uint8_t* null_sign_arr = new uint8_t[num_rows]; - for (int i = 0; i < num_rows; ++i) { - null_sign_arr[i] = null_sign; + if (is_nullable()) { + uint8_t null_sign = is_null ? 1 : 0; + std::vector null_signs(num_rows, null_sign); + const uint8_t* null_sign_ptr = null_signs.data(); + RETURN_IF_ERROR(_null_writer->append_data(&null_sign_ptr, num_rows)); } - const uint8_t* null_sign_ptr = reinterpret_cast(null_sign_arr); - RETURN_IF_ERROR(_null_writer->append_data(&null_sign_ptr, num_rows)); return Status::OK(); } @@ -1055,8 +1057,9 @@ Status MapColumnWriter::finish_current_page() { if (is_nullable()) { RETURN_IF_ERROR(_null_writer->finish_current_page()); } - RETURN_IF_ERROR(_key_writer->finish_current_page()); - RETURN_IF_ERROR(_value_writer->finish_current_page()); + for (auto& sub_writer : _kv_writers) { + RETURN_IF_ERROR(sub_writer->finish_current_page()); + } return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index d630dbb3791f96..022fb8f05b7e21 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -374,9 +374,9 @@ class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback { class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { public: explicit MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* null_writer, - std::unique_ptr key_writer, - std::unique_ptr value_writer); + ScalarColumnWriter* null_writer, + std::vector>& _kv_writers); + ~MapColumnWriter() override = default; Status init() override; @@ -414,18 +414,18 @@ class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { } // according key writer to get next rowid - ordinal_t get_next_rowid() const override { return _key_writer->get_next_rowid(); } + ordinal_t get_next_rowid() const override { return _kv_writers[0]->get_next_rowid(); } private: Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记 - std::unique_ptr _key_writer; - std::unique_ptr _value_writer; + std::vector> _kv_writers; // we need null writer to make sure a row is null or not std::unique_ptr _null_writer; std::unique_ptr _inverted_index_builder; ColumnWriterOptions _opts; }; + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 868716cb968198..b759795b2d6394 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -508,7 +508,7 @@ class MapTypeInfo : public TypeInfo { } // todo . is here only to need return 16 for two ptr? - const size_t size() const override { return 16; } + const size_t size() const override { return sizeof(MapValue); } FieldType type() const override { return OLAP_FIELD_TYPE_MAP; } diff --git a/be/src/runtime/map_value.cpp b/be/src/runtime/map_value.cpp index 4949c3dd0e0705..1828fd27b39058 100644 --- a/be/src/runtime/map_value.cpp +++ b/be/src/runtime/map_value.cpp @@ -20,20 +20,10 @@ namespace doris { ///====================== map-value funcs ======================/// -void MapValue::to_map_val(MapVal* val) const { - val->length = _length; - val->key = _key_data; - val->value = _value_data; -} - void MapValue::shallow_copy(const MapValue* value) { _length = value->_length; _key_data = value->_key_data; _value_data = value->_value_data; } -MapValue MapValue::from_map_val(const MapVal& val) { - return MapValue(val.key, val.value, val.length); -} - } // namespace doris diff --git a/be/src/runtime/map_value.h b/be/src/runtime/map_value.h index 965b928e0c3e7e..df0d1b06de27d4 100644 --- a/be/src/runtime/map_value.h +++ b/be/src/runtime/map_value.h @@ -18,19 +18,10 @@ #pragma once #include - #include "runtime/primitive_type.h" -namespace doris_udf { -class FunctionContext; -struct AnyVal; -} // namespace doris_udf - namespace doris { -using doris_udf::FunctionContext; -using doris_udf::AnyVal; - /** * MapValue is for map type in memory */ @@ -43,15 +34,12 @@ class MapValue { MapValue(void* k_data, void* v_data, int32_t length) : _key_data(k_data), _value_data(v_data), _length(length) {} - void to_map_val(MapVal* val) const; - int32_t size() const { return _length; } int32_t length() const { return _length; } void shallow_copy(const MapValue* other); - static MapValue from_map_val(const MapVal& val); const void* key_data() const { return _key_data; } void* mutable_key_data() const { return _key_data; } diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index f5eb0186194ffe..5628fe4c918207 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -98,6 +98,7 @@ TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) case TTypeNodeType::MAP: { DCHECK(!node.__isset.scalar_type); DCHECK_LT(*idx, types.size() - 2); + DCHECK(!node.__isset.contains_null); type = TYPE_MAP; ++(*idx); children.push_back(TypeDescriptor(types, idx)); @@ -229,9 +230,6 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField( - create_data_type(col_desc.children[0], col_desc.contains_null), - create_data_type(col_desc.children[1], col_desc.contains_null)); + create_data_type(col_desc.children[0], col_desc.contains_nulls[0]), + create_data_type(col_desc.children[1], col_desc.contains_nulls[1])); break; } case INVALID_TYPE: diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp index 9bc5e20d5c40a0..879418a326897c 100644 --- a/be/src/vec/data_types/data_type_factory.hpp +++ b/be/src/vec/data_types/data_type_factory.hpp @@ -37,6 +37,7 @@ #include "vec/data_types/data_type_fixed_length_object.h" #include "vec/data_types/data_type_hll.h" #include "vec/data_types/data_type_jsonb.h" +#include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_nothing.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" diff --git a/be/src/vec/exprs/vmap_literal.cpp b/be/src/vec/exprs/vmap_literal.cpp index b3cce0b61b69d5..cbcc70ffab385b 100644 --- a/be/src/vec/exprs/vmap_literal.cpp +++ b/be/src/vec/exprs/vmap_literal.cpp @@ -36,10 +36,11 @@ Status VMapLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, RETURN_IF_ERROR(_children[idx]->get_const_col(context, &const_col_wrapper)); const_col_wrapper->column_ptr->get(0, item); - if ((idx & 1) == 0) + if ((idx & 1) == 0) { keys.get().push_back(item); - else + } else { values.get().push_back(item); + } } map.get().push_back(keys); map.get().push_back(values); diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index edd7c339ee7a86..e02a4ea49fb06d 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -808,7 +808,9 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap() { } Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( - const ColumnMap* column_map, const DataTypeMap* data_type_map) { + const ColumnMap* column_map, + const DataTypeMap* data_type_map) { + ColumnPtr key_data = column_map->get_keys_ptr(); ColumnPtr value_data = column_map->get_values_ptr(); if (column_map->get_keys().is_nullable()) { @@ -823,10 +825,8 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( value_data = val_nullable_column.get_nested_column_ptr(); } - const auto& offsets = column_map->get_offsets(); // use keys offsets - - ColumnWithTypeAndName key_typed_column = {key_data, remove_nullable(data_type_map->get_keys()), - "map.key"}; + ColumnWithTypeAndName key_typed_column = { + key_data, remove_nullable(data_type_map->get_keys()),"map.key"}; _key_convertor->set_source_column(key_typed_column, _row_pos, _num_rows); _key_convertor->convert_to_olap(); @@ -835,23 +835,8 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( _value_convertor->set_source_column(value_typed_column, _row_pos, _num_rows); _value_convertor->convert_to_olap(); - MapValue* map_value = _values.data(); - for (size_t i = 0; i < _num_rows; ++i, ++map_value) { - int64_t cur_pos = _row_pos + i; - int64_t prev_pos = cur_pos - 1; - if (_nullmap && _nullmap[cur_pos]) { - continue; - } - auto single_map_size = offsets[cur_pos] - offsets[prev_pos]; - new (map_value) MapValue(single_map_size); - - if (single_map_size == 0) { - continue; - } - - map_value->set_key(const_cast(_key_convertor->get_data_at(i))); - map_value->set_value(const_cast(_value_convertor->get_data_at(i))); - } + _results[0] = _key_convertor->get_data(); + _results[1] = _value_convertor->get_data(); return Status::OK(); } diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 1d387df86627f0..2318b0e3693b0b 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -396,20 +396,33 @@ class OlapBlockDataConvertor { OlapColumnDataConvertorBaseUPtr _item_convertor; }; - class OlapColumnDataConvertorMap : public OlapColumnDataConvertorPaddedPODArray { + + class OlapColumnDataConvertorMap + : public OlapColumnDataConvertorBase { public: OlapColumnDataConvertorMap(OlapColumnDataConvertorBaseUPtr key_convertor, - OlapColumnDataConvertorBaseUPtr value_convertor) - : _key_convertor(std::move(key_convertor)), - _value_convertor(std::move(value_convertor)) {} + OlapColumnDataConvertorBaseUPtr value_convertor) + : _key_convertor(std::move(key_convertor)), _value_convertor(std::move(value_convertor)) { + _results.resize(2); + } Status convert_to_olap() override; + const void* get_data() const override { + return _results.data(); + }; + + const void* get_data_at(size_t offset) const override { + LOG(FATAL) << "now not support get_data_at for OlapColumnDataConvertorMap"; + }; private: - Status convert_to_olap(const ColumnMap* column_map, const DataTypeMap* data_type_map); + Status convert_to_olap(const ColumnMap* column_map, + const DataTypeMap* data_type_map); OlapColumnDataConvertorBaseUPtr _key_convertor; OlapColumnDataConvertorBaseUPtr _value_convertor; - }; //OlapColumnDataConvertorMap + std::vector _results; + };//OlapColumnDataConvertorMap + private: std::vector _convertors; diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index 686e1c7c584b68..fdc0b57353d838 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -24,7 +24,6 @@ #include "runtime/runtime_state.h" #include "vec/columns/column_array.h" #include "vec/columns/column_complex.h" -#include "vec/columns/column_map.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" #include "vec/common/assert_cast.h" @@ -191,17 +190,18 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); } } else if constexpr (type == TYPE_MAP) { - auto& map_type = assert_cast(*nested_type_ptr); + DCHECK_GE(sub_types.size(), 1); + auto& map_type = assert_cast(*sub_types[0]); for (ssize_t i = 0; i < row_size; ++i) { if (0 != buf_ret) { return Status::InternalError("pack mysql buffer failed."); } _buffer.reset(); - + _buffer.open_dynamic_mode(); std::string cell_str = map_type.to_string(*column, i); buf_ret = _buffer.push_string(cell_str.c_str(), strlen(cell_str.c_str())); - + _buffer.close_dynamic_mode(); result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); } @@ -736,11 +736,11 @@ Status VMysqlResultWriter::append_block(Block& input_block) { if (type_ptr->is_nullable()) { auto& nested_type = assert_cast(*type_ptr).get_nested_type(); //for map - status = _add_one_column(column_ptr, result, - nested_type); + status = _add_one_column(column_ptr, result, scale, + {nested_type}); } else { - status = _add_one_column(column_ptr, result, - type_ptr); + status = _add_one_column(column_ptr, result, scale, + {type_ptr}); } break; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index 8048c176e198b0..2c49d8593be505 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -460,36 +460,6 @@ private void toChildrenThrift(Column column, TColumn tColumn) { } } - //private void toChildrenThrift(Column column, TColumn tColumn) { - // if (column.type.isArrayType()) { - // Column children = column.getChildren().get(0); - - // TColumn childrenTColumn = new TColumn(); - // childrenTColumn.setColumnName(children.name); - - // TColumnType childrenTColumnType = new TColumnType(); - // childrenTColumnType.setType(children.getDataType().toThrift()); - // childrenTColumnType.setType(children.getDataType().toThrift()); - // childrenTColumnType.setLen(children.getStrLen()); - // childrenTColumnType.setPrecision(children.getPrecision()); - // childrenTColumnType.setScale(children.getScale()); - - // childrenTColumnType.setIndexLen(children.getOlapColumnIndexSize()); - // childrenTColumn.setColumnType(childrenTColumnType); - // childrenTColumn.setIsAllowNull(children.isAllowNull()); - // // TODO: If we don't set the aggregate type for children, the type will be - // // considered as TAggregationType::SUM after deserializing in BE. - // // For now, we make children inherit the aggregate type from their parent. - // if (tColumn.getAggregationType() != null) { - // childrenTColumn.setAggregationType(tColumn.getAggregationType()); - // } - - // tColumn.setChildrenColumn(new ArrayList<>()); - // tColumn.children_column.add(childrenTColumn); - - // toChildrenThrift(children, childrenTColumn); - // } - //} public void checkSchemaChangeAllowed(Column other) throws DdlException { if (Strings.isNullOrEmpty(other.name)) { From ed5225e124617c5d88a8d955ebe0ea7b3f9e167a Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 2 Feb 2023 08:09:58 +0800 Subject: [PATCH 05/11] update from_string and code format --- be/src/olap/rowset/segment_v2/column_writer.h | 2 +- be/src/vec/columns/column_map.cpp | 7 ++- be/src/vec/data_types/data_type_map.cpp | 56 ++++++++++++++++++- be/src/vec/olap/olap_data_convertor.cpp | 5 -- 4 files changed, 59 insertions(+), 11 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 022fb8f05b7e21..d386b092881f20 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -417,7 +417,7 @@ class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { ordinal_t get_next_rowid() const override { return _kv_writers[0]->get_next_rowid(); } private: - Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记 + Status write_null_column(size_t num_rows, bool is_null); std::vector> _kv_writers; // we need null writer to make sure a row is null or not diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index fb1412c81c78da..f477b9f336add5 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -87,7 +87,10 @@ void ColumnMap::insert(const Field& x) { values->insert(map[1]); } -void ColumnMap::insert_default() {} +void ColumnMap::insert_default() { + keys->insert_default(); + values->insert_default(); +} void ColumnMap::pop_back(size_t n) { keys->pop_back(n); @@ -180,4 +183,4 @@ void ColumnMap::protect() { get_values().protect(); } -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_map.cpp b/be/src/vec/data_types/data_type_map.cpp index 4c6a94a21640ed..c40e0362c5d1f6 100644 --- a/be/src/vec/data_types/data_type_map.cpp +++ b/be/src/vec/data_types/data_type_map.cpp @@ -81,17 +81,67 @@ void DataTypeMap::to_string(const class doris::vectorized::IColumn& column, size Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* column) const { DCHECK(!rb.eof()); auto* map_column = assert_cast(column); + if (*rb.position() != '{') { return Status::InvalidArgument("map does not start with '{' character, found '{}'", *rb.position()); } - - keys->from_string(rb, &map_column->get_keys()); - values->from_string(rb, &map_column->get_values()); if (*(rb.end() - 1) != '}') { return Status::InvalidArgument("map does not end with '}' character, found '{}'", *(rb.end() - 1)); } + + std::stringstream keyCharset; + std::stringstream valCharset; + + if (rb.count() == 2) { + // empty map {} , need to make empty array to add offset + keyCharset << "[]"; + valCharset << "[]"; + } else { + // {"aaa": 1, "bbb": 20}, need to handle key and value to make key column arr and value arr + // skip "{" + ++rb.position(); + keyCharset << "["; + valCharset << "["; + while (!rb.eof()) { + size_t kv_len = 0; + auto start = rb.position(); + while (!rb.eof() && *start != ',' && *start != '}') { + kv_len++; + start++; + } + if (kv_len >= rb.count()) { + return Status::InvalidArgument("Invalid Length"); + } + + size_t k_len = 0; + auto k_rb = rb.position(); + while (kv_len > 0 && *k_rb != ':') { + k_len++; + k_rb++; + } + ReadBuffer key_rb(rb.position(), k_len); + ReadBuffer val_rb(k_rb + 1, kv_len - k_len - 1); + + // handle key + keyCharset << key_rb.to_string(); + keyCharset << ","; + + // handle value + valCharset << val_rb.to_string(); + valCharset << ","; + + rb.position() += kv_len + 1; + } + keyCharset << ']'; + valCharset << ']'; + } + + ReadBuffer kb(keyCharset.str().data(), keyCharset.str().length()); + ReadBuffer vb(valCharset.str().data(), valCharset.str().length()); + keys->from_string(kb, &map_column->get_keys()); + values->from_string(vb, &map_column->get_values()); return Status::OK(); } diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index e02a4ea49fb06d..f9493ade082c35 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -136,11 +136,6 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co create_olap_column_data_convertor(key_column)), std::make_unique( create_olap_column_data_convertor(value_column))); - //const auto& key_column = column.get_sub_column(0); - //const auto& value_column = column.get_sub_column(1); - //return std::make_unique( - // create_olap_column_data_convertor(key_column), - // create_olap_column_data_convertor(value_column)); } default: { DCHECK(false) << "Invalid type in RowBlockV2:" << column.type(); From 817e0abc94497b95a4693f7ea7bf21630828b400 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 2 Feb 2023 12:00:47 +0800 Subject: [PATCH 06/11] use check clang format --- be/src/olap/field.h | 5 +- be/src/olap/rowset/segment_v2/column_reader.h | 8 +- .../olap/rowset/segment_v2/column_writer.cpp | 14 +- be/src/olap/rowset/segment_v2/column_writer.h | 5 +- be/src/runtime/map_value.h | 2 +- be/src/runtime/primitive_type.cpp | 2 +- be/src/runtime/types.cpp | 116 +++--- be/src/vec/data_types/data_type_factory.cpp | 20 +- be/src/vec/exprs/vexpr.cpp | 336 +++++++++--------- be/src/vec/exprs/vmap_literal.cpp | 4 +- be/src/vec/olap/olap_data_convertor.cpp | 8 +- be/src/vec/olap/olap_data_convertor.h | 19 +- 12 files changed, 265 insertions(+), 274 deletions(-) diff --git a/be/src/olap/field.h b/be/src/olap/field.h index 83c194dc90e0e5..56f26c33aae769 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -456,7 +456,6 @@ uint32_t Field::hash_code(const CellType& cell, uint32_t seed) const { return _type_info->hash_code(cell.cell_ptr(), seed); } - class MapField : public Field { public: explicit MapField(const TabletColumn& column) : Field(column) {} @@ -468,14 +467,14 @@ class MapField : public Field { } _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); } - // make variable_ptr memory allocate to cell_ptr as MapValue + // make variable_ptr memory allocate to cell_ptr as MapValue char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { return variable_ptr + _length; } size_t get_variable_len() const override { return _length; } }; - + class StructField : public Field { public: explicit StructField(const TabletColumn& column) : Field(column) {} diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index b2e3987f4119d4..59a5b4ef1a2211 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -400,7 +400,7 @@ class MapFileColumnIterator final : public ColumnIterator { ColumnIterator* key_iterator, ColumnIterator* val_iterator); ~MapFileColumnIterator() override = default; - + Status init(const ColumnIteratorOptions& opts) override; Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override; @@ -409,16 +409,16 @@ class MapFileColumnIterator final : public ColumnIterator { Status read_by_rowids(const rowid_t* rowids, const size_t count, vectorized::MutableColumnPtr& dst) override; - + Status seek_to_first() override { RETURN_IF_ERROR(_key_iterator->seek_to_first()); RETURN_IF_ERROR(_val_iterator->seek_to_first()); RETURN_IF_ERROR(_null_iterator->seek_to_first()); return Status::OK(); } - + Status seek_to_ordinal(ordinal_t ord) override; - + ordinal_t get_current_ordinal() const override { return _key_iterator->get_current_ordinal(); } private: diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index fc05fc97d80b7a..8d89f2a763105f 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -275,7 +275,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* null_writer = new ScalarColumnWriter(null_options, std::move(null_field), file_writer); } - + // create key & value writer std::vector> inner_writer_list; for (int i = 0; i < 2; ++i) { @@ -312,9 +312,8 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* } // create map writer std::unique_ptr sub_column_writer; - std::unique_ptr writer_local = - std::unique_ptr(new MapColumnWriter( - opts, std::move(field), null_writer, inner_writer_list)); + std::unique_ptr writer_local = std::unique_ptr( + new MapColumnWriter(opts, std::move(field), null_writer, inner_writer_list)); *writer = std::move(writer_local); return Status::OK(); @@ -958,10 +957,9 @@ Status ArrayColumnWriter::finish_current_page() { /// ============================= MapColumnWriter =====================//// MapColumnWriter::MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* null_writer, - std::vector>& kv_writers) - : ColumnWriter(std::move(field), opts.meta->is_nullable()), - _opts(opts) { + ScalarColumnWriter* null_writer, + std::vector>& kv_writers) + : ColumnWriter(std::move(field), opts.meta->is_nullable()), _opts(opts) { CHECK_EQ(kv_writers.size(), 2); if (is_nullable()) { _null_writer.reset(null_writer); diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index d386b092881f20..7d140324ddf6b5 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -374,8 +374,8 @@ class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback { class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { public: explicit MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* null_writer, - std::vector>& _kv_writers); + ScalarColumnWriter* null_writer, + std::vector>& _kv_writers); ~MapColumnWriter() override = default; @@ -426,6 +426,5 @@ class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { ColumnWriterOptions _opts; }; - } // namespace segment_v2 } // namespace doris diff --git a/be/src/runtime/map_value.h b/be/src/runtime/map_value.h index df0d1b06de27d4..d275316cf63ee8 100644 --- a/be/src/runtime/map_value.h +++ b/be/src/runtime/map_value.h @@ -18,6 +18,7 @@ #pragma once #include + #include "runtime/primitive_type.h" namespace doris { @@ -40,7 +41,6 @@ class MapValue { void shallow_copy(const MapValue* other); - const void* key_data() const { return _key_data; } void* mutable_key_data() const { return _key_data; } const void* value_data() const { return _value_data; } diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 3aa91c18ff8d19..2dd7c438fc4287 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -271,7 +271,7 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) { case TPrimitiveType::MAP: return TYPE_MAP; - + case TPrimitiveType::STRUCT: return TYPE_STRUCT; diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index 5628fe4c918207..386f3b57f041d2 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -98,7 +98,7 @@ TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) case TTypeNodeType::MAP: { DCHECK(!node.__isset.scalar_type); DCHECK_LT(*idx, types.size() - 2); - DCHECK(!node.__isset.contains_null); + DCHECK(!node.__isset.contains_null); type = TYPE_MAP; ++(*idx); children.push_back(TypeDescriptor(types, idx)); @@ -251,67 +251,67 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField"; - return ss.str(); } - case TYPE_MAP: - ss << "MAP<" << children[0].debug_string() << ", " << children[1].debug_string() << ">"; - return ss.str(); - case TYPE_STRUCT: { - ss << "STRUCT<"; - for (size_t i = 0; i < children.size(); i++) { - ss << field_names[i]; - ss << ":"; - ss << children[i].debug_string(); - if (i != children.size() - 1) { - ss << ","; + + std::string TypeDescriptor::debug_string() const { + std::stringstream ss; + switch (type) { + case TYPE_CHAR: + ss << "CHAR(" << len << ")"; + return ss.str(); + case TYPE_DECIMALV2: + ss << "DECIMALV2(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_DECIMAL32: + ss << "DECIMAL32(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_DECIMAL64: + ss << "DECIMAL64(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_DECIMAL128I: + ss << "DECIMAL128(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_ARRAY: { + ss << "ARRAY<" << children[0].debug_string() << ">"; + return ss.str(); + } + case TYPE_MAP: + ss << "MAP<" << children[0].debug_string() << ", " << children[1].debug_string() << ">"; + return ss.str(); + case TYPE_STRUCT: { + ss << "STRUCT<"; + for (size_t i = 0; i < children.size(); i++) { + ss << field_names[i]; + ss << ":"; + ss << children[i].debug_string(); + if (i != children.size() - 1) { + ss << ","; + } } + ss << ">"; + return ss.str(); + } + default: + return type_to_string(type); } - ss << ">"; - return ss.str(); - } - default: - return type_to_string(type); } -} -std::ostream& operator<<(std::ostream& os, const TypeDescriptor& type) { - os << type.debug_string(); - return os; -} + std::ostream& operator<<(std::ostream& os, const TypeDescriptor& type) { + os << type.debug_string(); + return os; + } -TTypeDesc create_type_desc(PrimitiveType type, int precision, int scale) { - TTypeDesc type_desc; - std::vector node_type; - node_type.emplace_back(); - TScalarType scalarType; - scalarType.__set_type(to_thrift(type)); - scalarType.__set_len(-1); - scalarType.__set_precision(precision); - scalarType.__set_scale(scale); - node_type.back().__set_scalar_type(scalarType); - type_desc.__set_types(node_type); - return type_desc; -} + TTypeDesc create_type_desc(PrimitiveType type, int precision, int scale) { + TTypeDesc type_desc; + std::vector node_type; + node_type.emplace_back(); + TScalarType scalarType; + scalarType.__set_type(to_thrift(type)); + scalarType.__set_len(-1); + scalarType.__set_precision(precision); + scalarType.__set_scale(scale); + node_type.back().__set_scalar_type(scalarType); + type_desc.__set_types(node_type); + return type_desc; + } } // namespace doris diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index af6407af1cb8ba..7899b2afea41c8 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -165,7 +165,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo nested = std::make_shared( create_data_type(col_desc.children[0], col_desc.contains_nulls[0])); break; - case TYPE_STRUCT: { + case TYPE_STRUCT: DCHECK(col_desc.children.size() >= 1); size_t child_size = col_desc.children.size(); DCHECK_EQ(col_desc.field_names.size(), child_size); @@ -186,16 +186,16 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo create_data_type(col_desc.children[1], col_desc.contains_nulls[1])); break; } - case INVALID_TYPE: - default: - DCHECK(false) << "invalid PrimitiveType:" << (int)col_desc.type; - break; - } +case INVALID_TYPE: +default: + DCHECK(false) << "invalid PrimitiveType:" << (int)col_desc.type; + break; +} - if (nested && is_nullable) { - return std::make_shared(nested); - } - return nested; +if (nested && is_nullable) { + return std::make_shared(nested); +} +return nested; } DataTypePtr DataTypeFactory::_create_primitive_data_type(const FieldType& type, int precision, diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 101407ae802d44..ac6b4e53c69e4f 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -174,218 +174,220 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const doris::TExprNode& texpr default: return Status::InternalError("Unknown expr node type: {}", texpr_node.node_type); } - return Status::OK(); -} + return Status::OK(); + } -Status VExpr::create_tree_from_thrift(doris::ObjectPool* pool, - const std::vector& nodes, VExpr* parent, - int* node_idx, VExpr** root_expr, VExprContext** ctx) { - // propagate error case - if (*node_idx >= nodes.size()) { - return Status::InternalError("Failed to reconstruct expression tree from thrift."); - } - int num_children = nodes[*node_idx].num_children; - VExpr* expr = nullptr; - RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); - DCHECK(expr != nullptr); - if (parent != nullptr) { - parent->add_child(expr); - } else { - DCHECK(root_expr != nullptr); - DCHECK(ctx != nullptr); - *root_expr = expr; - *ctx = pool->add(new VExprContext(expr)); - } - for (int i = 0; i < num_children; i++) { - *node_idx += 1; - RETURN_IF_ERROR(create_tree_from_thrift(pool, nodes, expr, node_idx, nullptr, nullptr)); - // we are expecting a child, but have used all nodes - // this means we have been given a bad tree and must fail + Status VExpr::create_tree_from_thrift(doris::ObjectPool * pool, + const std::vector& nodes, VExpr* parent, + int* node_idx, VExpr** root_expr, VExprContext** ctx) { + // propagate error case if (*node_idx >= nodes.size()) { return Status::InternalError("Failed to reconstruct expression tree from thrift."); } - } - return Status::OK(); -} - -Status VExpr::create_expr_tree(doris::ObjectPool* pool, const doris::TExpr& texpr, - VExprContext** ctx) { - if (texpr.nodes.size() == 0) { - *ctx = nullptr; + int num_children = nodes[*node_idx].num_children; + VExpr* expr = nullptr; + RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); + DCHECK(expr != nullptr); + if (parent != nullptr) { + parent->add_child(expr); + } else { + DCHECK(root_expr != nullptr); + DCHECK(ctx != nullptr); + *root_expr = expr; + *ctx = pool->add(new VExprContext(expr)); + } + for (int i = 0; i < num_children; i++) { + *node_idx += 1; + RETURN_IF_ERROR(create_tree_from_thrift(pool, nodes, expr, node_idx, nullptr, nullptr)); + // we are expecting a child, but have used all nodes + // this means we have been given a bad tree and must fail + if (*node_idx >= nodes.size()) { + return Status::InternalError("Failed to reconstruct expression tree from thrift."); + } + } return Status::OK(); } - int node_idx = 0; - VExpr* e = nullptr; - Status status = create_tree_from_thrift(pool, texpr.nodes, nullptr, &node_idx, &e, ctx); - if (status.ok() && node_idx + 1 != texpr.nodes.size()) { - status = Status::InternalError( - "Expression tree only partially reconstructed. Not all thrift nodes were used."); - } - if (!status.ok()) { - LOG(ERROR) << "Could not construct expr tree.\n" - << status << "\n" - << apache::thrift::ThriftDebugString(texpr); - } - return status; -} -Status VExpr::create_expr_trees(ObjectPool* pool, const std::vector& texprs, - std::vector* ctxs) { - ctxs->clear(); - for (int i = 0; i < texprs.size(); ++i) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(create_expr_tree(pool, texprs[i], &ctx)); - ctxs->push_back(ctx); + Status VExpr::create_expr_tree(doris::ObjectPool * pool, const doris::TExpr& texpr, + VExprContext** ctx) { + if (texpr.nodes.size() == 0) { + *ctx = nullptr; + return Status::OK(); + } + int node_idx = 0; + VExpr* e = nullptr; + Status status = create_tree_from_thrift(pool, texpr.nodes, nullptr, &node_idx, &e, ctx); + if (status.ok() && node_idx + 1 != texpr.nodes.size()) { + status = Status::InternalError( + "Expression tree only partially reconstructed. Not all thrift nodes were " + "used."); + } + if (!status.ok()) { + LOG(ERROR) << "Could not construct expr tree.\n" + << status << "\n" + << apache::thrift::ThriftDebugString(texpr); + } + return status; } - return Status::OK(); -} -Status VExpr::prepare(const std::vector& ctxs, RuntimeState* state, - const RowDescriptor& row_desc) { - for (auto ctx : ctxs) { - RETURN_IF_ERROR(ctx->prepare(state, row_desc)); + Status VExpr::create_expr_trees(ObjectPool * pool, const std::vector& texprs, + std::vector* ctxs) { + ctxs->clear(); + for (int i = 0; i < texprs.size(); ++i) { + VExprContext* ctx = nullptr; + RETURN_IF_ERROR(create_expr_tree(pool, texprs[i], &ctx)); + ctxs->push_back(ctx); + } + return Status::OK(); } - return Status::OK(); -} -void VExpr::close(const std::vector& ctxs, RuntimeState* state) { - for (auto ctx : ctxs) { - ctx->close(state); + Status VExpr::prepare(const std::vector& ctxs, RuntimeState* state, + const RowDescriptor& row_desc) { + for (auto ctx : ctxs) { + RETURN_IF_ERROR(ctx->prepare(state, row_desc)); + } + return Status::OK(); } -} -Status VExpr::open(const std::vector& ctxs, RuntimeState* state) { - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->open(state)); + void VExpr::close(const std::vector& ctxs, RuntimeState* state) { + for (auto ctx : ctxs) { + ctx->close(state); + } } - return Status::OK(); -} -Status VExpr::clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, - std::vector* new_ctxs) { - DCHECK(new_ctxs != nullptr); - if (!new_ctxs->empty()) { - // 'ctxs' was already cloned into '*new_ctxs', nothing to do. - DCHECK_EQ(new_ctxs->size(), ctxs.size()); - for (int i = 0; i < new_ctxs->size(); ++i) { - DCHECK((*new_ctxs)[i]->_is_clone); + Status VExpr::open(const std::vector& ctxs, RuntimeState* state) { + for (int i = 0; i < ctxs.size(); ++i) { + RETURN_IF_ERROR(ctxs[i]->open(state)); } return Status::OK(); } - new_ctxs->resize(ctxs.size()); - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->clone(state, &(*new_ctxs)[i])); - } - return Status::OK(); -} -std::string VExpr::debug_string() const { - // TODO: implement partial debug string for member vars - std::stringstream out; - out << " type=" << _type.debug_string(); - out << " codegen=" - << "false"; - if (!_children.empty()) { - out << " children=" << debug_string(_children); + Status VExpr::clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, + std::vector* new_ctxs) { + DCHECK(new_ctxs != nullptr); + if (!new_ctxs->empty()) { + // 'ctxs' was already cloned into '*new_ctxs', nothing to do. + DCHECK_EQ(new_ctxs->size(), ctxs.size()); + for (int i = 0; i < new_ctxs->size(); ++i) { + DCHECK((*new_ctxs)[i]->_is_clone); + } + return Status::OK(); + } + new_ctxs->resize(ctxs.size()); + for (int i = 0; i < ctxs.size(); ++i) { + RETURN_IF_ERROR(ctxs[i]->clone(state, &(*new_ctxs)[i])); + } + return Status::OK(); } + std::string VExpr::debug_string() const { + // TODO: implement partial debug string for member vars + std::stringstream out; + out << " type=" << _type.debug_string(); + out << " codegen=" + << "false"; - return out.str(); -} - -std::string VExpr::debug_string(const std::vector& exprs) { - std::stringstream out; - out << "["; + if (!_children.empty()) { + out << " children=" << debug_string(_children); + } - for (int i = 0; i < exprs.size(); ++i) { - out << (i == 0 ? "" : " ") << exprs[i]->debug_string(); + return out.str(); } - out << "]"; - return out.str(); -} + std::string VExpr::debug_string(const std::vector& exprs) { + std::stringstream out; + out << "["; -std::string VExpr::debug_string(const std::vector& ctxs) { - std::vector exprs; - for (int i = 0; i < ctxs.size(); ++i) { - exprs.push_back(ctxs[i]->root()); + for (int i = 0; i < exprs.size(); ++i) { + out << (i == 0 ? "" : " ") << exprs[i]->debug_string(); + } + + out << "]"; + return out.str(); } - return debug_string(exprs); -} -bool VExpr::is_constant() const { - for (int i = 0; i < _children.size(); ++i) { - if (!_children[i]->is_constant()) { - return false; + std::string VExpr::debug_string(const std::vector& ctxs) { + std::vector exprs; + for (int i = 0; i < ctxs.size(); ++i) { + exprs.push_back(ctxs[i]->root()); } + return debug_string(exprs); } - return true; -} + bool VExpr::is_constant() const { + for (int i = 0; i < _children.size(); ++i) { + if (!_children[i]->is_constant()) { + return false; + } + } -Status VExpr::get_const_col(VExprContext* context, ColumnPtrWrapper** output) { - *output = nullptr; - if (!is_constant()) { - return Status::OK(); + return true; } - if (_constant_col != nullptr) { + Status VExpr::get_const_col(VExprContext * context, ColumnPtrWrapper * *output) { + *output = nullptr; + if (!is_constant()) { + return Status::OK(); + } + + if (_constant_col != nullptr) { + *output = _constant_col.get(); + return Status::OK(); + } + + int result = -1; + Block block; + // If block is empty, some functions will produce no result. So we insert a column with + // single value here. + block.insert({ColumnUInt8::create(1), std::make_shared(), ""}); + RETURN_IF_ERROR(execute(context, &block, &result)); + DCHECK(result != -1); + const auto& column = block.get_by_position(result).column; + _constant_col = std::make_shared(column); *output = _constant_col.get(); return Status::OK(); } - int result = -1; - Block block; - // If block is empty, some functions will produce no result. So we insert a column with - // single value here. - block.insert({ColumnUInt8::create(1), std::make_shared(), ""}); - RETURN_IF_ERROR(execute(context, &block, &result)); - DCHECK(result != -1); - const auto& column = block.get_by_position(result).column; - _constant_col = std::make_shared(column); - *output = _constant_col.get(); - return Status::OK(); -} + void VExpr::register_function_context(doris::RuntimeState * state, VExprContext * context) { + FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type); + std::vector arg_types; + for (int i = 0; i < _children.size(); ++i) { + arg_types.push_back(AnyValUtil::column_type_to_type_desc(_children[i]->type())); + } -void VExpr::register_function_context(doris::RuntimeState* state, VExprContext* context) { - FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type); - std::vector arg_types; - for (int i = 0; i < _children.size(); ++i) { - arg_types.push_back(AnyValUtil::column_type_to_type_desc(_children[i]->type())); + _fn_context_index = context->register_func(state, return_type, arg_types, 0); } - _fn_context_index = context->register_func(state, return_type, arg_types, 0); -} - -Status VExpr::init_function_context(VExprContext* context, - FunctionContext::FunctionStateScope scope, - const FunctionBasePtr& function) const { - FunctionContext* fn_ctx = context->fn_context(_fn_context_index); - if (scope == FunctionContext::FRAGMENT_LOCAL) { - std::vector constant_cols; - for (auto c : _children) { - ColumnPtrWrapper* const_col_wrapper = nullptr; - RETURN_IF_ERROR(c->get_const_col(context, &const_col_wrapper)); - constant_cols.push_back(const_col_wrapper); + Status VExpr::init_function_context(VExprContext * context, + FunctionContext::FunctionStateScope scope, + const FunctionBasePtr& function) const { + FunctionContext* fn_ctx = context->fn_context(_fn_context_index); + if (scope == FunctionContext::FRAGMENT_LOCAL) { + std::vector constant_cols; + for (auto c : _children) { + ColumnPtrWrapper* const_col_wrapper = nullptr; + RETURN_IF_ERROR(c->get_const_col(context, &const_col_wrapper)); + constant_cols.push_back(const_col_wrapper); + } + fn_ctx->impl()->set_constant_cols(constant_cols); } - fn_ctx->impl()->set_constant_cols(constant_cols); - } - if (scope == FunctionContext::FRAGMENT_LOCAL) { - RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); + if (scope == FunctionContext::FRAGMENT_LOCAL) { + RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); + } + RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::THREAD_LOCAL)); + return Status::OK(); } - RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::THREAD_LOCAL)); - return Status::OK(); -} -void VExpr::close_function_context(VExprContext* context, FunctionContext::FunctionStateScope scope, - const FunctionBasePtr& function) const { - if (_fn_context_index != -1 && !context->_stale) { - FunctionContext* fn_ctx = context->fn_context(_fn_context_index); - function->close(fn_ctx, FunctionContext::THREAD_LOCAL); - if (scope == FunctionContext::FRAGMENT_LOCAL) { - function->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL); + void VExpr::close_function_context(VExprContext * context, + FunctionContext::FunctionStateScope scope, + const FunctionBasePtr& function) const { + if (_fn_context_index != -1 && !context->_stale) { + FunctionContext* fn_ctx = context->fn_context(_fn_context_index); + function->close(fn_ctx, FunctionContext::THREAD_LOCAL); + if (scope == FunctionContext::FRAGMENT_LOCAL) { + function->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL); + } } } -} } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vmap_literal.cpp b/be/src/vec/exprs/vmap_literal.cpp index cbcc70ffab385b..954142f04de72d 100644 --- a/be/src/vec/exprs/vmap_literal.cpp +++ b/be/src/vec/exprs/vmap_literal.cpp @@ -38,9 +38,9 @@ Status VMapLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, if ((idx & 1) == 0) { keys.get().push_back(item); - } else { + } else { values.get().push_back(item); - } + } } map.get().push_back(keys); map.get().push_back(values); diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index f9493ade082c35..75aae961460318 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -803,9 +803,7 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap() { } Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( - const ColumnMap* column_map, - const DataTypeMap* data_type_map) { - + const ColumnMap* column_map, const DataTypeMap* data_type_map) { ColumnPtr key_data = column_map->get_keys_ptr(); ColumnPtr value_data = column_map->get_values_ptr(); if (column_map->get_keys().is_nullable()) { @@ -820,8 +818,8 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( value_data = val_nullable_column.get_nested_column_ptr(); } - ColumnWithTypeAndName key_typed_column = { - key_data, remove_nullable(data_type_map->get_keys()),"map.key"}; + ColumnWithTypeAndName key_typed_column = {key_data, remove_nullable(data_type_map->get_keys()), + "map.key"}; _key_convertor->set_source_column(key_typed_column, _row_pos, _num_rows); _key_convertor->convert_to_olap(); diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 2318b0e3693b0b..1192838a6c6649 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -396,33 +396,28 @@ class OlapBlockDataConvertor { OlapColumnDataConvertorBaseUPtr _item_convertor; }; - - class OlapColumnDataConvertorMap - : public OlapColumnDataConvertorBase { + class OlapColumnDataConvertorMap : public OlapColumnDataConvertorBase { public: OlapColumnDataConvertorMap(OlapColumnDataConvertorBaseUPtr key_convertor, - OlapColumnDataConvertorBaseUPtr value_convertor) - : _key_convertor(std::move(key_convertor)), _value_convertor(std::move(value_convertor)) { + OlapColumnDataConvertorBaseUPtr value_convertor) + : _key_convertor(std::move(key_convertor)), + _value_convertor(std::move(value_convertor)) { _results.resize(2); } Status convert_to_olap() override; - const void* get_data() const override { - return _results.data(); - }; + const void* get_data() const override { return _results.data(); }; const void* get_data_at(size_t offset) const override { LOG(FATAL) << "now not support get_data_at for OlapColumnDataConvertorMap"; }; private: - Status convert_to_olap(const ColumnMap* column_map, - const DataTypeMap* data_type_map); + Status convert_to_olap(const ColumnMap* column_map, const DataTypeMap* data_type_map); OlapColumnDataConvertorBaseUPtr _key_convertor; OlapColumnDataConvertorBaseUPtr _value_convertor; std::vector _results; - };//OlapColumnDataConvertorMap - + }; //OlapColumnDataConvertorMap private: std::vector _convertors; From 50be1da10ef8f41f6994edc32ba6e665eb860f81 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 2 Feb 2023 15:51:54 +0800 Subject: [PATCH 07/11] Revert " use check clang format" This reverts commit 817e0abc94497b95a4693f7ea7bf21630828b400. --- be/src/olap/field.h | 5 +- be/src/olap/rowset/segment_v2/column_reader.h | 8 +- .../olap/rowset/segment_v2/column_writer.cpp | 14 +- be/src/olap/rowset/segment_v2/column_writer.h | 5 +- be/src/runtime/map_value.h | 2 +- be/src/runtime/primitive_type.cpp | 2 +- be/src/runtime/types.cpp | 116 +++--- be/src/vec/data_types/data_type_factory.cpp | 20 +- be/src/vec/exprs/vexpr.cpp | 336 +++++++++--------- be/src/vec/exprs/vmap_literal.cpp | 4 +- be/src/vec/olap/olap_data_convertor.cpp | 8 +- be/src/vec/olap/olap_data_convertor.h | 19 +- 12 files changed, 274 insertions(+), 265 deletions(-) diff --git a/be/src/olap/field.h b/be/src/olap/field.h index 56f26c33aae769..83c194dc90e0e5 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -456,6 +456,7 @@ uint32_t Field::hash_code(const CellType& cell, uint32_t seed) const { return _type_info->hash_code(cell.cell_ptr(), seed); } + class MapField : public Field { public: explicit MapField(const TabletColumn& column) : Field(column) {} @@ -467,14 +468,14 @@ class MapField : public Field { } _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); } - // make variable_ptr memory allocate to cell_ptr as MapValue + // make variable_ptr memory allocate to cell_ptr as MapValue char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { return variable_ptr + _length; } size_t get_variable_len() const override { return _length; } }; - + class StructField : public Field { public: explicit StructField(const TabletColumn& column) : Field(column) {} diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 59a5b4ef1a2211..b2e3987f4119d4 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -400,7 +400,7 @@ class MapFileColumnIterator final : public ColumnIterator { ColumnIterator* key_iterator, ColumnIterator* val_iterator); ~MapFileColumnIterator() override = default; - + Status init(const ColumnIteratorOptions& opts) override; Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override; @@ -409,16 +409,16 @@ class MapFileColumnIterator final : public ColumnIterator { Status read_by_rowids(const rowid_t* rowids, const size_t count, vectorized::MutableColumnPtr& dst) override; - + Status seek_to_first() override { RETURN_IF_ERROR(_key_iterator->seek_to_first()); RETURN_IF_ERROR(_val_iterator->seek_to_first()); RETURN_IF_ERROR(_null_iterator->seek_to_first()); return Status::OK(); } - + Status seek_to_ordinal(ordinal_t ord) override; - + ordinal_t get_current_ordinal() const override { return _key_iterator->get_current_ordinal(); } private: diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 8d89f2a763105f..fc05fc97d80b7a 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -275,7 +275,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* null_writer = new ScalarColumnWriter(null_options, std::move(null_field), file_writer); } - + // create key & value writer std::vector> inner_writer_list; for (int i = 0; i < 2; ++i) { @@ -312,8 +312,9 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* } // create map writer std::unique_ptr sub_column_writer; - std::unique_ptr writer_local = std::unique_ptr( - new MapColumnWriter(opts, std::move(field), null_writer, inner_writer_list)); + std::unique_ptr writer_local = + std::unique_ptr(new MapColumnWriter( + opts, std::move(field), null_writer, inner_writer_list)); *writer = std::move(writer_local); return Status::OK(); @@ -957,9 +958,10 @@ Status ArrayColumnWriter::finish_current_page() { /// ============================= MapColumnWriter =====================//// MapColumnWriter::MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* null_writer, - std::vector>& kv_writers) - : ColumnWriter(std::move(field), opts.meta->is_nullable()), _opts(opts) { + ScalarColumnWriter* null_writer, + std::vector>& kv_writers) + : ColumnWriter(std::move(field), opts.meta->is_nullable()), + _opts(opts) { CHECK_EQ(kv_writers.size(), 2); if (is_nullable()) { _null_writer.reset(null_writer); diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 7d140324ddf6b5..d386b092881f20 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -374,8 +374,8 @@ class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback { class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { public: explicit MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* null_writer, - std::vector>& _kv_writers); + ScalarColumnWriter* null_writer, + std::vector>& _kv_writers); ~MapColumnWriter() override = default; @@ -426,5 +426,6 @@ class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { ColumnWriterOptions _opts; }; + } // namespace segment_v2 } // namespace doris diff --git a/be/src/runtime/map_value.h b/be/src/runtime/map_value.h index d275316cf63ee8..df0d1b06de27d4 100644 --- a/be/src/runtime/map_value.h +++ b/be/src/runtime/map_value.h @@ -18,7 +18,6 @@ #pragma once #include - #include "runtime/primitive_type.h" namespace doris { @@ -41,6 +40,7 @@ class MapValue { void shallow_copy(const MapValue* other); + const void* key_data() const { return _key_data; } void* mutable_key_data() const { return _key_data; } const void* value_data() const { return _value_data; } diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 2dd7c438fc4287..3aa91c18ff8d19 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -271,7 +271,7 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) { case TPrimitiveType::MAP: return TYPE_MAP; - + case TPrimitiveType::STRUCT: return TYPE_STRUCT; diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index 386f3b57f041d2..5628fe4c918207 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -98,7 +98,7 @@ TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) case TTypeNodeType::MAP: { DCHECK(!node.__isset.scalar_type); DCHECK_LT(*idx, types.size() - 2); - DCHECK(!node.__isset.contains_null); + DCHECK(!node.__isset.contains_null); type = TYPE_MAP; ++(*idx); children.push_back(TypeDescriptor(types, idx)); @@ -251,67 +251,67 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField"; - return ss.str(); - } - case TYPE_MAP: - ss << "MAP<" << children[0].debug_string() << ", " << children[1].debug_string() << ">"; - return ss.str(); - case TYPE_STRUCT: { - ss << "STRUCT<"; - for (size_t i = 0; i < children.size(); i++) { - ss << field_names[i]; - ss << ":"; - ss << children[i].debug_string(); - if (i != children.size() - 1) { - ss << ","; - } +std::string TypeDescriptor::debug_string() const { + std::stringstream ss; + switch (type) { + case TYPE_CHAR: + ss << "CHAR(" << len << ")"; + return ss.str(); + case TYPE_DECIMALV2: + ss << "DECIMALV2(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_DECIMAL32: + ss << "DECIMAL32(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_DECIMAL64: + ss << "DECIMAL64(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_DECIMAL128I: + ss << "DECIMAL128(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_ARRAY: { + ss << "ARRAY<" << children[0].debug_string() << ">"; + return ss.str(); + } + case TYPE_MAP: + ss << "MAP<" << children[0].debug_string() << ", " << children[1].debug_string() << ">"; + return ss.str(); + case TYPE_STRUCT: { + ss << "STRUCT<"; + for (size_t i = 0; i < children.size(); i++) { + ss << field_names[i]; + ss << ":"; + ss << children[i].debug_string(); + if (i != children.size() - 1) { + ss << ","; } - ss << ">"; - return ss.str(); - } - default: - return type_to_string(type); } + ss << ">"; + return ss.str(); } - - std::ostream& operator<<(std::ostream& os, const TypeDescriptor& type) { - os << type.debug_string(); - return os; + default: + return type_to_string(type); } +} - TTypeDesc create_type_desc(PrimitiveType type, int precision, int scale) { - TTypeDesc type_desc; - std::vector node_type; - node_type.emplace_back(); - TScalarType scalarType; - scalarType.__set_type(to_thrift(type)); - scalarType.__set_len(-1); - scalarType.__set_precision(precision); - scalarType.__set_scale(scale); - node_type.back().__set_scalar_type(scalarType); - type_desc.__set_types(node_type); - return type_desc; - } +std::ostream& operator<<(std::ostream& os, const TypeDescriptor& type) { + os << type.debug_string(); + return os; +} + +TTypeDesc create_type_desc(PrimitiveType type, int precision, int scale) { + TTypeDesc type_desc; + std::vector node_type; + node_type.emplace_back(); + TScalarType scalarType; + scalarType.__set_type(to_thrift(type)); + scalarType.__set_len(-1); + scalarType.__set_precision(precision); + scalarType.__set_scale(scale); + node_type.back().__set_scalar_type(scalarType); + type_desc.__set_types(node_type); + return type_desc; +} } // namespace doris diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 7899b2afea41c8..af6407af1cb8ba 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -165,7 +165,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo nested = std::make_shared( create_data_type(col_desc.children[0], col_desc.contains_nulls[0])); break; - case TYPE_STRUCT: + case TYPE_STRUCT: { DCHECK(col_desc.children.size() >= 1); size_t child_size = col_desc.children.size(); DCHECK_EQ(col_desc.field_names.size(), child_size); @@ -186,16 +186,16 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo create_data_type(col_desc.children[1], col_desc.contains_nulls[1])); break; } -case INVALID_TYPE: -default: - DCHECK(false) << "invalid PrimitiveType:" << (int)col_desc.type; - break; -} + case INVALID_TYPE: + default: + DCHECK(false) << "invalid PrimitiveType:" << (int)col_desc.type; + break; + } -if (nested && is_nullable) { - return std::make_shared(nested); -} -return nested; + if (nested && is_nullable) { + return std::make_shared(nested); + } + return nested; } DataTypePtr DataTypeFactory::_create_primitive_data_type(const FieldType& type, int precision, diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index ac6b4e53c69e4f..101407ae802d44 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -174,220 +174,218 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const doris::TExprNode& texpr default: return Status::InternalError("Unknown expr node type: {}", texpr_node.node_type); } - return Status::OK(); - } + return Status::OK(); +} - Status VExpr::create_tree_from_thrift(doris::ObjectPool * pool, - const std::vector& nodes, VExpr* parent, - int* node_idx, VExpr** root_expr, VExprContext** ctx) { - // propagate error case +Status VExpr::create_tree_from_thrift(doris::ObjectPool* pool, + const std::vector& nodes, VExpr* parent, + int* node_idx, VExpr** root_expr, VExprContext** ctx) { + // propagate error case + if (*node_idx >= nodes.size()) { + return Status::InternalError("Failed to reconstruct expression tree from thrift."); + } + int num_children = nodes[*node_idx].num_children; + VExpr* expr = nullptr; + RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); + DCHECK(expr != nullptr); + if (parent != nullptr) { + parent->add_child(expr); + } else { + DCHECK(root_expr != nullptr); + DCHECK(ctx != nullptr); + *root_expr = expr; + *ctx = pool->add(new VExprContext(expr)); + } + for (int i = 0; i < num_children; i++) { + *node_idx += 1; + RETURN_IF_ERROR(create_tree_from_thrift(pool, nodes, expr, node_idx, nullptr, nullptr)); + // we are expecting a child, but have used all nodes + // this means we have been given a bad tree and must fail if (*node_idx >= nodes.size()) { return Status::InternalError("Failed to reconstruct expression tree from thrift."); } - int num_children = nodes[*node_idx].num_children; - VExpr* expr = nullptr; - RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); - DCHECK(expr != nullptr); - if (parent != nullptr) { - parent->add_child(expr); - } else { - DCHECK(root_expr != nullptr); - DCHECK(ctx != nullptr); - *root_expr = expr; - *ctx = pool->add(new VExprContext(expr)); - } - for (int i = 0; i < num_children; i++) { - *node_idx += 1; - RETURN_IF_ERROR(create_tree_from_thrift(pool, nodes, expr, node_idx, nullptr, nullptr)); - // we are expecting a child, but have used all nodes - // this means we have been given a bad tree and must fail - if (*node_idx >= nodes.size()) { - return Status::InternalError("Failed to reconstruct expression tree from thrift."); - } - } - return Status::OK(); } + return Status::OK(); +} - Status VExpr::create_expr_tree(doris::ObjectPool * pool, const doris::TExpr& texpr, - VExprContext** ctx) { - if (texpr.nodes.size() == 0) { - *ctx = nullptr; - return Status::OK(); - } - int node_idx = 0; - VExpr* e = nullptr; - Status status = create_tree_from_thrift(pool, texpr.nodes, nullptr, &node_idx, &e, ctx); - if (status.ok() && node_idx + 1 != texpr.nodes.size()) { - status = Status::InternalError( - "Expression tree only partially reconstructed. Not all thrift nodes were " - "used."); - } - if (!status.ok()) { - LOG(ERROR) << "Could not construct expr tree.\n" - << status << "\n" - << apache::thrift::ThriftDebugString(texpr); - } - return status; +Status VExpr::create_expr_tree(doris::ObjectPool* pool, const doris::TExpr& texpr, + VExprContext** ctx) { + if (texpr.nodes.size() == 0) { + *ctx = nullptr; + return Status::OK(); } + int node_idx = 0; + VExpr* e = nullptr; + Status status = create_tree_from_thrift(pool, texpr.nodes, nullptr, &node_idx, &e, ctx); + if (status.ok() && node_idx + 1 != texpr.nodes.size()) { + status = Status::InternalError( + "Expression tree only partially reconstructed. Not all thrift nodes were used."); + } + if (!status.ok()) { + LOG(ERROR) << "Could not construct expr tree.\n" + << status << "\n" + << apache::thrift::ThriftDebugString(texpr); + } + return status; +} - Status VExpr::create_expr_trees(ObjectPool * pool, const std::vector& texprs, - std::vector* ctxs) { - ctxs->clear(); - for (int i = 0; i < texprs.size(); ++i) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(create_expr_tree(pool, texprs[i], &ctx)); - ctxs->push_back(ctx); - } - return Status::OK(); +Status VExpr::create_expr_trees(ObjectPool* pool, const std::vector& texprs, + std::vector* ctxs) { + ctxs->clear(); + for (int i = 0; i < texprs.size(); ++i) { + VExprContext* ctx = nullptr; + RETURN_IF_ERROR(create_expr_tree(pool, texprs[i], &ctx)); + ctxs->push_back(ctx); } + return Status::OK(); +} - Status VExpr::prepare(const std::vector& ctxs, RuntimeState* state, - const RowDescriptor& row_desc) { - for (auto ctx : ctxs) { - RETURN_IF_ERROR(ctx->prepare(state, row_desc)); - } - return Status::OK(); +Status VExpr::prepare(const std::vector& ctxs, RuntimeState* state, + const RowDescriptor& row_desc) { + for (auto ctx : ctxs) { + RETURN_IF_ERROR(ctx->prepare(state, row_desc)); } + return Status::OK(); +} - void VExpr::close(const std::vector& ctxs, RuntimeState* state) { - for (auto ctx : ctxs) { - ctx->close(state); - } +void VExpr::close(const std::vector& ctxs, RuntimeState* state) { + for (auto ctx : ctxs) { + ctx->close(state); } +} - Status VExpr::open(const std::vector& ctxs, RuntimeState* state) { - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->open(state)); - } - return Status::OK(); +Status VExpr::open(const std::vector& ctxs, RuntimeState* state) { + for (int i = 0; i < ctxs.size(); ++i) { + RETURN_IF_ERROR(ctxs[i]->open(state)); } + return Status::OK(); +} - Status VExpr::clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, - std::vector* new_ctxs) { - DCHECK(new_ctxs != nullptr); - if (!new_ctxs->empty()) { - // 'ctxs' was already cloned into '*new_ctxs', nothing to do. - DCHECK_EQ(new_ctxs->size(), ctxs.size()); - for (int i = 0; i < new_ctxs->size(); ++i) { - DCHECK((*new_ctxs)[i]->_is_clone); - } - return Status::OK(); - } - new_ctxs->resize(ctxs.size()); - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->clone(state, &(*new_ctxs)[i])); +Status VExpr::clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, + std::vector* new_ctxs) { + DCHECK(new_ctxs != nullptr); + if (!new_ctxs->empty()) { + // 'ctxs' was already cloned into '*new_ctxs', nothing to do. + DCHECK_EQ(new_ctxs->size(), ctxs.size()); + for (int i = 0; i < new_ctxs->size(); ++i) { + DCHECK((*new_ctxs)[i]->_is_clone); } return Status::OK(); } - std::string VExpr::debug_string() const { - // TODO: implement partial debug string for member vars - std::stringstream out; - out << " type=" << _type.debug_string(); - out << " codegen=" - << "false"; - - if (!_children.empty()) { - out << " children=" << debug_string(_children); - } + new_ctxs->resize(ctxs.size()); + for (int i = 0; i < ctxs.size(); ++i) { + RETURN_IF_ERROR(ctxs[i]->clone(state, &(*new_ctxs)[i])); + } + return Status::OK(); +} +std::string VExpr::debug_string() const { + // TODO: implement partial debug string for member vars + std::stringstream out; + out << " type=" << _type.debug_string(); + out << " codegen=" + << "false"; - return out.str(); + if (!_children.empty()) { + out << " children=" << debug_string(_children); } - std::string VExpr::debug_string(const std::vector& exprs) { - std::stringstream out; - out << "["; + return out.str(); +} - for (int i = 0; i < exprs.size(); ++i) { - out << (i == 0 ? "" : " ") << exprs[i]->debug_string(); - } +std::string VExpr::debug_string(const std::vector& exprs) { + std::stringstream out; + out << "["; - out << "]"; - return out.str(); + for (int i = 0; i < exprs.size(); ++i) { + out << (i == 0 ? "" : " ") << exprs[i]->debug_string(); } - std::string VExpr::debug_string(const std::vector& ctxs) { - std::vector exprs; - for (int i = 0; i < ctxs.size(); ++i) { - exprs.push_back(ctxs[i]->root()); - } - return debug_string(exprs); + out << "]"; + return out.str(); +} + +std::string VExpr::debug_string(const std::vector& ctxs) { + std::vector exprs; + for (int i = 0; i < ctxs.size(); ++i) { + exprs.push_back(ctxs[i]->root()); } + return debug_string(exprs); +} - bool VExpr::is_constant() const { - for (int i = 0; i < _children.size(); ++i) { - if (!_children[i]->is_constant()) { - return false; - } +bool VExpr::is_constant() const { + for (int i = 0; i < _children.size(); ++i) { + if (!_children[i]->is_constant()) { + return false; } - - return true; } - Status VExpr::get_const_col(VExprContext * context, ColumnPtrWrapper * *output) { - *output = nullptr; - if (!is_constant()) { - return Status::OK(); - } + return true; +} - if (_constant_col != nullptr) { - *output = _constant_col.get(); - return Status::OK(); - } +Status VExpr::get_const_col(VExprContext* context, ColumnPtrWrapper** output) { + *output = nullptr; + if (!is_constant()) { + return Status::OK(); + } - int result = -1; - Block block; - // If block is empty, some functions will produce no result. So we insert a column with - // single value here. - block.insert({ColumnUInt8::create(1), std::make_shared(), ""}); - RETURN_IF_ERROR(execute(context, &block, &result)); - DCHECK(result != -1); - const auto& column = block.get_by_position(result).column; - _constant_col = std::make_shared(column); + if (_constant_col != nullptr) { *output = _constant_col.get(); return Status::OK(); } - void VExpr::register_function_context(doris::RuntimeState * state, VExprContext * context) { - FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type); - std::vector arg_types; - for (int i = 0; i < _children.size(); ++i) { - arg_types.push_back(AnyValUtil::column_type_to_type_desc(_children[i]->type())); - } + int result = -1; + Block block; + // If block is empty, some functions will produce no result. So we insert a column with + // single value here. + block.insert({ColumnUInt8::create(1), std::make_shared(), ""}); + RETURN_IF_ERROR(execute(context, &block, &result)); + DCHECK(result != -1); + const auto& column = block.get_by_position(result).column; + _constant_col = std::make_shared(column); + *output = _constant_col.get(); + return Status::OK(); +} - _fn_context_index = context->register_func(state, return_type, arg_types, 0); +void VExpr::register_function_context(doris::RuntimeState* state, VExprContext* context) { + FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type); + std::vector arg_types; + for (int i = 0; i < _children.size(); ++i) { + arg_types.push_back(AnyValUtil::column_type_to_type_desc(_children[i]->type())); } - Status VExpr::init_function_context(VExprContext * context, - FunctionContext::FunctionStateScope scope, - const FunctionBasePtr& function) const { - FunctionContext* fn_ctx = context->fn_context(_fn_context_index); - if (scope == FunctionContext::FRAGMENT_LOCAL) { - std::vector constant_cols; - for (auto c : _children) { - ColumnPtrWrapper* const_col_wrapper = nullptr; - RETURN_IF_ERROR(c->get_const_col(context, &const_col_wrapper)); - constant_cols.push_back(const_col_wrapper); - } - fn_ctx->impl()->set_constant_cols(constant_cols); - } + _fn_context_index = context->register_func(state, return_type, arg_types, 0); +} - if (scope == FunctionContext::FRAGMENT_LOCAL) { - RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); +Status VExpr::init_function_context(VExprContext* context, + FunctionContext::FunctionStateScope scope, + const FunctionBasePtr& function) const { + FunctionContext* fn_ctx = context->fn_context(_fn_context_index); + if (scope == FunctionContext::FRAGMENT_LOCAL) { + std::vector constant_cols; + for (auto c : _children) { + ColumnPtrWrapper* const_col_wrapper = nullptr; + RETURN_IF_ERROR(c->get_const_col(context, &const_col_wrapper)); + constant_cols.push_back(const_col_wrapper); } - RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::THREAD_LOCAL)); - return Status::OK(); + fn_ctx->impl()->set_constant_cols(constant_cols); + } + + if (scope == FunctionContext::FRAGMENT_LOCAL) { + RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); } + RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::THREAD_LOCAL)); + return Status::OK(); +} - void VExpr::close_function_context(VExprContext * context, - FunctionContext::FunctionStateScope scope, - const FunctionBasePtr& function) const { - if (_fn_context_index != -1 && !context->_stale) { - FunctionContext* fn_ctx = context->fn_context(_fn_context_index); - function->close(fn_ctx, FunctionContext::THREAD_LOCAL); - if (scope == FunctionContext::FRAGMENT_LOCAL) { - function->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL); - } +void VExpr::close_function_context(VExprContext* context, FunctionContext::FunctionStateScope scope, + const FunctionBasePtr& function) const { + if (_fn_context_index != -1 && !context->_stale) { + FunctionContext* fn_ctx = context->fn_context(_fn_context_index); + function->close(fn_ctx, FunctionContext::THREAD_LOCAL); + if (scope == FunctionContext::FRAGMENT_LOCAL) { + function->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL); } } +} } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vmap_literal.cpp b/be/src/vec/exprs/vmap_literal.cpp index 954142f04de72d..cbcc70ffab385b 100644 --- a/be/src/vec/exprs/vmap_literal.cpp +++ b/be/src/vec/exprs/vmap_literal.cpp @@ -38,9 +38,9 @@ Status VMapLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, if ((idx & 1) == 0) { keys.get().push_back(item); - } else { + } else { values.get().push_back(item); - } + } } map.get().push_back(keys); map.get().push_back(values); diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index 75aae961460318..f9493ade082c35 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -803,7 +803,9 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap() { } Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( - const ColumnMap* column_map, const DataTypeMap* data_type_map) { + const ColumnMap* column_map, + const DataTypeMap* data_type_map) { + ColumnPtr key_data = column_map->get_keys_ptr(); ColumnPtr value_data = column_map->get_values_ptr(); if (column_map->get_keys().is_nullable()) { @@ -818,8 +820,8 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( value_data = val_nullable_column.get_nested_column_ptr(); } - ColumnWithTypeAndName key_typed_column = {key_data, remove_nullable(data_type_map->get_keys()), - "map.key"}; + ColumnWithTypeAndName key_typed_column = { + key_data, remove_nullable(data_type_map->get_keys()),"map.key"}; _key_convertor->set_source_column(key_typed_column, _row_pos, _num_rows); _key_convertor->convert_to_olap(); diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 1192838a6c6649..2318b0e3693b0b 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -396,28 +396,33 @@ class OlapBlockDataConvertor { OlapColumnDataConvertorBaseUPtr _item_convertor; }; - class OlapColumnDataConvertorMap : public OlapColumnDataConvertorBase { + + class OlapColumnDataConvertorMap + : public OlapColumnDataConvertorBase { public: OlapColumnDataConvertorMap(OlapColumnDataConvertorBaseUPtr key_convertor, - OlapColumnDataConvertorBaseUPtr value_convertor) - : _key_convertor(std::move(key_convertor)), - _value_convertor(std::move(value_convertor)) { + OlapColumnDataConvertorBaseUPtr value_convertor) + : _key_convertor(std::move(key_convertor)), _value_convertor(std::move(value_convertor)) { _results.resize(2); } Status convert_to_olap() override; - const void* get_data() const override { return _results.data(); }; + const void* get_data() const override { + return _results.data(); + }; const void* get_data_at(size_t offset) const override { LOG(FATAL) << "now not support get_data_at for OlapColumnDataConvertorMap"; }; private: - Status convert_to_olap(const ColumnMap* column_map, const DataTypeMap* data_type_map); + Status convert_to_olap(const ColumnMap* column_map, + const DataTypeMap* data_type_map); OlapColumnDataConvertorBaseUPtr _key_convertor; OlapColumnDataConvertorBaseUPtr _value_convertor; std::vector _results; - }; //OlapColumnDataConvertorMap + };//OlapColumnDataConvertorMap + private: std::vector _convertors; From 7a06b3ab0e523b61f85024f57ed3700ddc431f86 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 2 Feb 2023 16:03:01 +0800 Subject: [PATCH 08/11] format data_type_factory --- be/src/vec/data_types/data_type_factory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index af6407af1cb8ba..c5728756a8974d 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -165,7 +165,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo nested = std::make_shared( create_data_type(col_desc.children[0], col_desc.contains_nulls[0])); break; - case TYPE_STRUCT: { + case TYPE_STRUCT: DCHECK(col_desc.children.size() >= 1); size_t child_size = col_desc.children.size(); DCHECK_EQ(col_desc.field_names.size(), child_size); From 51ea6e2fe188fc6bb135134a779702a5fe79618f Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 2 Feb 2023 16:19:26 +0800 Subject: [PATCH 09/11] format data_type_factory --- be/src/vec/data_types/data_type_factory.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index c5728756a8974d..b6b24c9aeb714f 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -185,7 +185,6 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo create_data_type(col_desc.children[0], col_desc.contains_nulls[0]), create_data_type(col_desc.children[1], col_desc.contains_nulls[1])); break; - } case INVALID_TYPE: default: DCHECK(false) << "invalid PrimitiveType:" << (int)col_desc.type; From 0c5005580d0fa6aad5feeaf5bc4784e27a100335 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 2 Feb 2023 17:36:20 +0800 Subject: [PATCH 10/11] update --- be/src/vec/data_types/data_type_factory.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index b6b24c9aeb714f..b028a201131b2a 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -165,7 +165,13 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo nested = std::make_shared( create_data_type(col_desc.children[0], col_desc.contains_nulls[0])); break; - case TYPE_STRUCT: + case TYPE_MAP: + DCHECK(col_desc.children.size() == 2); + nested = std::make_shared( + create_data_type(col_desc.children[0], col_desc.contains_nulls[0]), + create_data_type(col_desc.children[1], col_desc.contains_nulls[1])); + break; + case TYPE_STRUCT: { DCHECK(col_desc.children.size() >= 1); size_t child_size = col_desc.children.size(); DCHECK_EQ(col_desc.field_names.size(), child_size); @@ -179,12 +185,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo } nested = std::make_shared(dataTypes, names); break; - case TYPE_MAP: - DCHECK(col_desc.children.size() == 2); - nested = std::make_shared( - create_data_type(col_desc.children[0], col_desc.contains_nulls[0]), - create_data_type(col_desc.children[1], col_desc.contains_nulls[1])); - break; + } case INVALID_TYPE: default: DCHECK(false) << "invalid PrimitiveType:" << (int)col_desc.type; From 94e9bd5a5d8271bc389a2d9152ae532971a10dc1 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 2 Feb 2023 22:29:52 +0800 Subject: [PATCH 11/11] update format --- be/src/http/http_request.h | 4 +- be/src/olap/field.h | 5 +- be/src/olap/page_cache.cpp | 3 +- be/src/olap/rowset/segment_v2/column_reader.h | 8 +- .../olap/rowset/segment_v2/column_writer.cpp | 14 +- be/src/olap/rowset/segment_v2/column_writer.h | 5 +- be/src/olap/types.cpp | 27 +- be/src/olap/utils.h | 2 +- be/src/runtime/map_value.h | 2 +- be/src/runtime/primitive_type.cpp | 2 +- be/src/runtime/types.cpp | 116 +++--- be/src/util/binary_cast.hpp | 2 +- be/src/vec/core/accurate_comparison.h | 29 +- be/src/vec/data_types/data_type_factory.cpp | 2 +- be/src/vec/exprs/vexpr.cpp | 336 +++++++++--------- be/src/vec/exprs/vmap_literal.cpp | 4 +- be/src/vec/olap/olap_data_convertor.cpp | 8 +- be/src/vec/olap/olap_data_convertor.h | 19 +- .../java/org/apache/doris/analysis/Expr.java | 7 +- .../org/apache/doris/catalog/FunctionSet.java | 2 +- .../java/org/apache/doris/catalog/Type.java | 2 +- 21 files changed, 291 insertions(+), 308 deletions(-) diff --git a/be/src/http/http_request.h b/be/src/http/http_request.h index 81085f7cceb049..1503e4303a1b69 100644 --- a/be/src/http/http_request.h +++ b/be/src/http/http_request.h @@ -72,9 +72,7 @@ class HttpRequest { void set_handler(HttpHandler* handler) { _handler = handler; } HttpHandler* handler() const { return _handler; } - struct evhttp_request* get_evhttp_request() const { - return _ev_req; - } + struct evhttp_request* get_evhttp_request() const { return _ev_req; } void* handler_ctx() const { return _handler_ctx; } void set_handler_ctx(void* ctx) { diff --git a/be/src/olap/field.h b/be/src/olap/field.h index 83c194dc90e0e5..56f26c33aae769 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -456,7 +456,6 @@ uint32_t Field::hash_code(const CellType& cell, uint32_t seed) const { return _type_info->hash_code(cell.cell_ptr(), seed); } - class MapField : public Field { public: explicit MapField(const TabletColumn& column) : Field(column) {} @@ -468,14 +467,14 @@ class MapField : public Field { } _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); } - // make variable_ptr memory allocate to cell_ptr as MapValue + // make variable_ptr memory allocate to cell_ptr as MapValue char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { return variable_ptr + _length; } size_t get_variable_len() const override { return _length; } }; - + class StructField : public Field { public: explicit StructField(const TabletColumn& column) : Field(column) {} diff --git a/be/src/olap/page_cache.cpp b/be/src/olap/page_cache.cpp index 378b9a5b6e3436..a49043aebbabde 100644 --- a/be/src/olap/page_cache.cpp +++ b/be/src/olap/page_cache.cpp @@ -64,8 +64,7 @@ bool StoragePageCache::lookup(const CacheKey& key, PageCacheHandle* handle, void StoragePageCache::insert(const CacheKey& key, const Slice& data, PageCacheHandle* handle, segment_v2::PageTypePB page_type, bool in_memory) { - auto deleter = [](const doris::CacheKey& key, void* value) { delete[](uint8_t*) value; }; - + auto deleter = [](const doris::CacheKey& key, void* value) { delete[] (uint8_t*)value; }; CachePriority priority = CachePriority::NORMAL; if (in_memory) { priority = CachePriority::DURABLE; diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index b2e3987f4119d4..59a5b4ef1a2211 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -400,7 +400,7 @@ class MapFileColumnIterator final : public ColumnIterator { ColumnIterator* key_iterator, ColumnIterator* val_iterator); ~MapFileColumnIterator() override = default; - + Status init(const ColumnIteratorOptions& opts) override; Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override; @@ -409,16 +409,16 @@ class MapFileColumnIterator final : public ColumnIterator { Status read_by_rowids(const rowid_t* rowids, const size_t count, vectorized::MutableColumnPtr& dst) override; - + Status seek_to_first() override { RETURN_IF_ERROR(_key_iterator->seek_to_first()); RETURN_IF_ERROR(_val_iterator->seek_to_first()); RETURN_IF_ERROR(_null_iterator->seek_to_first()); return Status::OK(); } - + Status seek_to_ordinal(ordinal_t ord) override; - + ordinal_t get_current_ordinal() const override { return _key_iterator->get_current_ordinal(); } private: diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index fc05fc97d80b7a..8d89f2a763105f 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -275,7 +275,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* null_writer = new ScalarColumnWriter(null_options, std::move(null_field), file_writer); } - + // create key & value writer std::vector> inner_writer_list; for (int i = 0; i < 2; ++i) { @@ -312,9 +312,8 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* } // create map writer std::unique_ptr sub_column_writer; - std::unique_ptr writer_local = - std::unique_ptr(new MapColumnWriter( - opts, std::move(field), null_writer, inner_writer_list)); + std::unique_ptr writer_local = std::unique_ptr( + new MapColumnWriter(opts, std::move(field), null_writer, inner_writer_list)); *writer = std::move(writer_local); return Status::OK(); @@ -958,10 +957,9 @@ Status ArrayColumnWriter::finish_current_page() { /// ============================= MapColumnWriter =====================//// MapColumnWriter::MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* null_writer, - std::vector>& kv_writers) - : ColumnWriter(std::move(field), opts.meta->is_nullable()), - _opts(opts) { + ScalarColumnWriter* null_writer, + std::vector>& kv_writers) + : ColumnWriter(std::move(field), opts.meta->is_nullable()), _opts(opts) { CHECK_EQ(kv_writers.size(), 2); if (is_nullable()) { _null_writer.reset(null_writer); diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index d386b092881f20..7d140324ddf6b5 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -374,8 +374,8 @@ class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback { class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { public: explicit MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* null_writer, - std::vector>& _kv_writers); + ScalarColumnWriter* null_writer, + std::vector>& _kv_writers); ~MapColumnWriter() override = default; @@ -426,6 +426,5 @@ class MapColumnWriter final : public ColumnWriter, public FlushPageCallback { ColumnWriterOptions _opts; }; - } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index dcfcaf7114d6c4..bee790e066e2d1 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -269,22 +269,19 @@ TypeInfoPtr clone_type_info(const TypeInfo* type_info) { return create_dynamic_type_info_ptr( new MapTypeInfo(clone_type_info(map_type_info->get_key_type_info()), clone_type_info(map_type_info->get_value_type_info()))); - } else { - auto type = type_info->type(); - if (type == OLAP_FIELD_TYPE_STRUCT) { - const auto struct_type_info = dynamic_cast(type_info); - std::vector clone_type_infos; - const std::vector* sub_type_infos = struct_type_info->type_infos(); - clone_type_infos.reserve(sub_type_infos->size()); - for (size_t i = 0; i < sub_type_infos->size(); i++) { - clone_type_infos.push_back(clone_type_info((*sub_type_infos)[i].get())); - } - return create_dynamic_type_info_ptr(new StructTypeInfo(clone_type_infos)); - } else { - const auto array_type_info = dynamic_cast(type_info); - return create_dynamic_type_info_ptr( - new ArrayTypeInfo(clone_type_info(array_type_info->item_type_info()))); + } else if (type_info->type() == OLAP_FIELD_TYPE_STRUCT) { + const auto struct_type_info = dynamic_cast(type_info); + std::vector clone_type_infos; + const std::vector* sub_type_infos = struct_type_info->type_infos(); + clone_type_infos.reserve(sub_type_infos->size()); + for (size_t i = 0; i < sub_type_infos->size(); i++) { + clone_type_infos.push_back(clone_type_info((*sub_type_infos)[i].get())); } + return create_dynamic_type_info_ptr(new StructTypeInfo(clone_type_infos)); + } else if (type_info->type() == OLAP_FIELD_TYPE_ARRAY) { + const auto array_type_info = dynamic_cast(type_info); + return create_dynamic_type_info_ptr( + new ArrayTypeInfo(clone_type_info(array_type_info->item_type_info()))); } } diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index 0360fb51eea075..964b973569dcf1 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -110,7 +110,7 @@ void _destruct_object(const void* obj, void*) { template void _destruct_array(const void* array, void*) { - delete[]((const T*)array); + delete[] ((const T*)array); } // 根据压缩类型的不同,执行压缩。dest_buf_len是dest_buf的最大长度, diff --git a/be/src/runtime/map_value.h b/be/src/runtime/map_value.h index df0d1b06de27d4..d275316cf63ee8 100644 --- a/be/src/runtime/map_value.h +++ b/be/src/runtime/map_value.h @@ -18,6 +18,7 @@ #pragma once #include + #include "runtime/primitive_type.h" namespace doris { @@ -40,7 +41,6 @@ class MapValue { void shallow_copy(const MapValue* other); - const void* key_data() const { return _key_data; } void* mutable_key_data() const { return _key_data; } const void* value_data() const { return _value_data; } diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 3aa91c18ff8d19..2dd7c438fc4287 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -271,7 +271,7 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) { case TPrimitiveType::MAP: return TYPE_MAP; - + case TPrimitiveType::STRUCT: return TYPE_STRUCT; diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index 5628fe4c918207..386f3b57f041d2 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -98,7 +98,7 @@ TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) case TTypeNodeType::MAP: { DCHECK(!node.__isset.scalar_type); DCHECK_LT(*idx, types.size() - 2); - DCHECK(!node.__isset.contains_null); + DCHECK(!node.__isset.contains_null); type = TYPE_MAP; ++(*idx); children.push_back(TypeDescriptor(types, idx)); @@ -251,67 +251,67 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField"; - return ss.str(); } - case TYPE_MAP: - ss << "MAP<" << children[0].debug_string() << ", " << children[1].debug_string() << ">"; - return ss.str(); - case TYPE_STRUCT: { - ss << "STRUCT<"; - for (size_t i = 0; i < children.size(); i++) { - ss << field_names[i]; - ss << ":"; - ss << children[i].debug_string(); - if (i != children.size() - 1) { - ss << ","; + + std::string TypeDescriptor::debug_string() const { + std::stringstream ss; + switch (type) { + case TYPE_CHAR: + ss << "CHAR(" << len << ")"; + return ss.str(); + case TYPE_DECIMALV2: + ss << "DECIMALV2(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_DECIMAL32: + ss << "DECIMAL32(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_DECIMAL64: + ss << "DECIMAL64(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_DECIMAL128I: + ss << "DECIMAL128(" << precision << ", " << scale << ")"; + return ss.str(); + case TYPE_ARRAY: { + ss << "ARRAY<" << children[0].debug_string() << ">"; + return ss.str(); + } + case TYPE_MAP: + ss << "MAP<" << children[0].debug_string() << ", " << children[1].debug_string() << ">"; + return ss.str(); + case TYPE_STRUCT: { + ss << "STRUCT<"; + for (size_t i = 0; i < children.size(); i++) { + ss << field_names[i]; + ss << ":"; + ss << children[i].debug_string(); + if (i != children.size() - 1) { + ss << ","; + } } + ss << ">"; + return ss.str(); + } + default: + return type_to_string(type); } - ss << ">"; - return ss.str(); - } - default: - return type_to_string(type); } -} -std::ostream& operator<<(std::ostream& os, const TypeDescriptor& type) { - os << type.debug_string(); - return os; -} + std::ostream& operator<<(std::ostream& os, const TypeDescriptor& type) { + os << type.debug_string(); + return os; + } -TTypeDesc create_type_desc(PrimitiveType type, int precision, int scale) { - TTypeDesc type_desc; - std::vector node_type; - node_type.emplace_back(); - TScalarType scalarType; - scalarType.__set_type(to_thrift(type)); - scalarType.__set_len(-1); - scalarType.__set_precision(precision); - scalarType.__set_scale(scale); - node_type.back().__set_scalar_type(scalarType); - type_desc.__set_types(node_type); - return type_desc; -} + TTypeDesc create_type_desc(PrimitiveType type, int precision, int scale) { + TTypeDesc type_desc; + std::vector node_type; + node_type.emplace_back(); + TScalarType scalarType; + scalarType.__set_type(to_thrift(type)); + scalarType.__set_len(-1); + scalarType.__set_precision(precision); + scalarType.__set_scale(scale); + node_type.back().__set_scalar_type(scalarType); + type_desc.__set_types(node_type); + return type_desc; + } } // namespace doris diff --git a/be/src/util/binary_cast.hpp b/be/src/util/binary_cast.hpp index 5da841a45e42cd..17754a15e92d7f 100644 --- a/be/src/util/binary_cast.hpp +++ b/be/src/util/binary_cast.hpp @@ -37,7 +37,7 @@ union TypeConverter { }; template -inline constexpr bool match_v = std::is_same_v&& std::is_same_v; +inline constexpr bool match_v = std::is_same_v && std::is_same_v; union DecimalInt128Union { DecimalV2Value decimal; diff --git a/be/src/vec/core/accurate_comparison.h b/be/src/vec/core/accurate_comparison.h index 3f13772041f3cf..e52cc4ef6596c5 100644 --- a/be/src/vec/core/accurate_comparison.h +++ b/be/src/vec/core/accurate_comparison.h @@ -54,15 +54,14 @@ namespace accurate { // Case 1. Is pair of floats or pair of ints or pair of uints template -constexpr bool is_safe_conversion = (std::is_floating_point_v && std::is_floating_point_v) || - (std::is_integral_v && std::is_integral_v && - !(std::is_signed_v ^ std::is_signed_v)) || - (std::is_same_v && - std::is_same_v) || - (std::is_integral_v && - std::is_same_v) || - (std::is_same_v && - std::is_integral_v); +constexpr bool is_safe_conversion = + (std::is_floating_point_v && std::is_floating_point_v) || + (std::is_integral_v && std::is_integral_v && + !(std::is_signed_v ^ std::is_signed_v)) || + (std::is_same_v && + std::is_same_v) || + (std::is_integral_v && std::is_same_v) || + (std::is_same_v && std::is_integral_v); template using bool_if_safe_conversion = std::enable_if_t, bool>; template @@ -70,13 +69,13 @@ using bool_if_not_safe_conversion = std::enable_if_t, /// Case 2. Are params IntXX and UIntYY ? template -constexpr bool is_any_int_vs_uint = std::is_integral_v&& std::is_integral_v&& - std::is_signed_v&& std::is_unsigned_v; +constexpr bool is_any_int_vs_uint = std::is_integral_v && std::is_integral_v && + std::is_signed_v && std::is_unsigned_v; // Case 2a. Are params IntXX and UIntYY and sizeof(IntXX) >= sizeof(UIntYY) (in such case will use accurate compare) template -constexpr bool is_le_int_vs_uint = is_any_int_vs_uint && - (sizeof(TInt) <= sizeof(TUInt)); +constexpr bool is_le_int_vs_uint = + is_any_int_vs_uint && (sizeof(TInt) <= sizeof(TUInt)); template using bool_if_le_int_vs_uint_t = std::enable_if_t, bool>; @@ -107,8 +106,8 @@ inline bool_if_le_int_vs_uint_t equalsOpTmpl(TUInt a, TInt b) { // Case 2b. Are params IntXX and UIntYY and sizeof(IntXX) > sizeof(UIntYY) (in such case will cast UIntYY to IntXX and compare) template -constexpr bool is_gt_int_vs_uint = is_any_int_vs_uint && - (sizeof(TInt) > sizeof(TUInt)); +constexpr bool is_gt_int_vs_uint = + is_any_int_vs_uint && (sizeof(TInt) > sizeof(TUInt)); template using bool_if_gt_int_vs_uint = std::enable_if_t, bool>; diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index b028a201131b2a..f1cc28003eac85 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -166,7 +166,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo create_data_type(col_desc.children[0], col_desc.contains_nulls[0])); break; case TYPE_MAP: - DCHECK(col_desc.children.size() == 2); + DCHECK(col_desc.children.size() == 2); nested = std::make_shared( create_data_type(col_desc.children[0], col_desc.contains_nulls[0]), create_data_type(col_desc.children[1], col_desc.contains_nulls[1])); diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 101407ae802d44..ac6b4e53c69e4f 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -174,218 +174,220 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const doris::TExprNode& texpr default: return Status::InternalError("Unknown expr node type: {}", texpr_node.node_type); } - return Status::OK(); -} + return Status::OK(); + } -Status VExpr::create_tree_from_thrift(doris::ObjectPool* pool, - const std::vector& nodes, VExpr* parent, - int* node_idx, VExpr** root_expr, VExprContext** ctx) { - // propagate error case - if (*node_idx >= nodes.size()) { - return Status::InternalError("Failed to reconstruct expression tree from thrift."); - } - int num_children = nodes[*node_idx].num_children; - VExpr* expr = nullptr; - RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); - DCHECK(expr != nullptr); - if (parent != nullptr) { - parent->add_child(expr); - } else { - DCHECK(root_expr != nullptr); - DCHECK(ctx != nullptr); - *root_expr = expr; - *ctx = pool->add(new VExprContext(expr)); - } - for (int i = 0; i < num_children; i++) { - *node_idx += 1; - RETURN_IF_ERROR(create_tree_from_thrift(pool, nodes, expr, node_idx, nullptr, nullptr)); - // we are expecting a child, but have used all nodes - // this means we have been given a bad tree and must fail + Status VExpr::create_tree_from_thrift(doris::ObjectPool * pool, + const std::vector& nodes, VExpr* parent, + int* node_idx, VExpr** root_expr, VExprContext** ctx) { + // propagate error case if (*node_idx >= nodes.size()) { return Status::InternalError("Failed to reconstruct expression tree from thrift."); } - } - return Status::OK(); -} - -Status VExpr::create_expr_tree(doris::ObjectPool* pool, const doris::TExpr& texpr, - VExprContext** ctx) { - if (texpr.nodes.size() == 0) { - *ctx = nullptr; + int num_children = nodes[*node_idx].num_children; + VExpr* expr = nullptr; + RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); + DCHECK(expr != nullptr); + if (parent != nullptr) { + parent->add_child(expr); + } else { + DCHECK(root_expr != nullptr); + DCHECK(ctx != nullptr); + *root_expr = expr; + *ctx = pool->add(new VExprContext(expr)); + } + for (int i = 0; i < num_children; i++) { + *node_idx += 1; + RETURN_IF_ERROR(create_tree_from_thrift(pool, nodes, expr, node_idx, nullptr, nullptr)); + // we are expecting a child, but have used all nodes + // this means we have been given a bad tree and must fail + if (*node_idx >= nodes.size()) { + return Status::InternalError("Failed to reconstruct expression tree from thrift."); + } + } return Status::OK(); } - int node_idx = 0; - VExpr* e = nullptr; - Status status = create_tree_from_thrift(pool, texpr.nodes, nullptr, &node_idx, &e, ctx); - if (status.ok() && node_idx + 1 != texpr.nodes.size()) { - status = Status::InternalError( - "Expression tree only partially reconstructed. Not all thrift nodes were used."); - } - if (!status.ok()) { - LOG(ERROR) << "Could not construct expr tree.\n" - << status << "\n" - << apache::thrift::ThriftDebugString(texpr); - } - return status; -} -Status VExpr::create_expr_trees(ObjectPool* pool, const std::vector& texprs, - std::vector* ctxs) { - ctxs->clear(); - for (int i = 0; i < texprs.size(); ++i) { - VExprContext* ctx = nullptr; - RETURN_IF_ERROR(create_expr_tree(pool, texprs[i], &ctx)); - ctxs->push_back(ctx); + Status VExpr::create_expr_tree(doris::ObjectPool * pool, const doris::TExpr& texpr, + VExprContext** ctx) { + if (texpr.nodes.size() == 0) { + *ctx = nullptr; + return Status::OK(); + } + int node_idx = 0; + VExpr* e = nullptr; + Status status = create_tree_from_thrift(pool, texpr.nodes, nullptr, &node_idx, &e, ctx); + if (status.ok() && node_idx + 1 != texpr.nodes.size()) { + status = Status::InternalError( + "Expression tree only partially reconstructed. Not all thrift nodes were " + "used."); + } + if (!status.ok()) { + LOG(ERROR) << "Could not construct expr tree.\n" + << status << "\n" + << apache::thrift::ThriftDebugString(texpr); + } + return status; } - return Status::OK(); -} -Status VExpr::prepare(const std::vector& ctxs, RuntimeState* state, - const RowDescriptor& row_desc) { - for (auto ctx : ctxs) { - RETURN_IF_ERROR(ctx->prepare(state, row_desc)); + Status VExpr::create_expr_trees(ObjectPool * pool, const std::vector& texprs, + std::vector* ctxs) { + ctxs->clear(); + for (int i = 0; i < texprs.size(); ++i) { + VExprContext* ctx = nullptr; + RETURN_IF_ERROR(create_expr_tree(pool, texprs[i], &ctx)); + ctxs->push_back(ctx); + } + return Status::OK(); } - return Status::OK(); -} -void VExpr::close(const std::vector& ctxs, RuntimeState* state) { - for (auto ctx : ctxs) { - ctx->close(state); + Status VExpr::prepare(const std::vector& ctxs, RuntimeState* state, + const RowDescriptor& row_desc) { + for (auto ctx : ctxs) { + RETURN_IF_ERROR(ctx->prepare(state, row_desc)); + } + return Status::OK(); } -} -Status VExpr::open(const std::vector& ctxs, RuntimeState* state) { - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->open(state)); + void VExpr::close(const std::vector& ctxs, RuntimeState* state) { + for (auto ctx : ctxs) { + ctx->close(state); + } } - return Status::OK(); -} -Status VExpr::clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, - std::vector* new_ctxs) { - DCHECK(new_ctxs != nullptr); - if (!new_ctxs->empty()) { - // 'ctxs' was already cloned into '*new_ctxs', nothing to do. - DCHECK_EQ(new_ctxs->size(), ctxs.size()); - for (int i = 0; i < new_ctxs->size(); ++i) { - DCHECK((*new_ctxs)[i]->_is_clone); + Status VExpr::open(const std::vector& ctxs, RuntimeState* state) { + for (int i = 0; i < ctxs.size(); ++i) { + RETURN_IF_ERROR(ctxs[i]->open(state)); } return Status::OK(); } - new_ctxs->resize(ctxs.size()); - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->clone(state, &(*new_ctxs)[i])); - } - return Status::OK(); -} -std::string VExpr::debug_string() const { - // TODO: implement partial debug string for member vars - std::stringstream out; - out << " type=" << _type.debug_string(); - out << " codegen=" - << "false"; - if (!_children.empty()) { - out << " children=" << debug_string(_children); + Status VExpr::clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, + std::vector* new_ctxs) { + DCHECK(new_ctxs != nullptr); + if (!new_ctxs->empty()) { + // 'ctxs' was already cloned into '*new_ctxs', nothing to do. + DCHECK_EQ(new_ctxs->size(), ctxs.size()); + for (int i = 0; i < new_ctxs->size(); ++i) { + DCHECK((*new_ctxs)[i]->_is_clone); + } + return Status::OK(); + } + new_ctxs->resize(ctxs.size()); + for (int i = 0; i < ctxs.size(); ++i) { + RETURN_IF_ERROR(ctxs[i]->clone(state, &(*new_ctxs)[i])); + } + return Status::OK(); } + std::string VExpr::debug_string() const { + // TODO: implement partial debug string for member vars + std::stringstream out; + out << " type=" << _type.debug_string(); + out << " codegen=" + << "false"; - return out.str(); -} - -std::string VExpr::debug_string(const std::vector& exprs) { - std::stringstream out; - out << "["; + if (!_children.empty()) { + out << " children=" << debug_string(_children); + } - for (int i = 0; i < exprs.size(); ++i) { - out << (i == 0 ? "" : " ") << exprs[i]->debug_string(); + return out.str(); } - out << "]"; - return out.str(); -} + std::string VExpr::debug_string(const std::vector& exprs) { + std::stringstream out; + out << "["; -std::string VExpr::debug_string(const std::vector& ctxs) { - std::vector exprs; - for (int i = 0; i < ctxs.size(); ++i) { - exprs.push_back(ctxs[i]->root()); + for (int i = 0; i < exprs.size(); ++i) { + out << (i == 0 ? "" : " ") << exprs[i]->debug_string(); + } + + out << "]"; + return out.str(); } - return debug_string(exprs); -} -bool VExpr::is_constant() const { - for (int i = 0; i < _children.size(); ++i) { - if (!_children[i]->is_constant()) { - return false; + std::string VExpr::debug_string(const std::vector& ctxs) { + std::vector exprs; + for (int i = 0; i < ctxs.size(); ++i) { + exprs.push_back(ctxs[i]->root()); } + return debug_string(exprs); } - return true; -} + bool VExpr::is_constant() const { + for (int i = 0; i < _children.size(); ++i) { + if (!_children[i]->is_constant()) { + return false; + } + } -Status VExpr::get_const_col(VExprContext* context, ColumnPtrWrapper** output) { - *output = nullptr; - if (!is_constant()) { - return Status::OK(); + return true; } - if (_constant_col != nullptr) { + Status VExpr::get_const_col(VExprContext * context, ColumnPtrWrapper * *output) { + *output = nullptr; + if (!is_constant()) { + return Status::OK(); + } + + if (_constant_col != nullptr) { + *output = _constant_col.get(); + return Status::OK(); + } + + int result = -1; + Block block; + // If block is empty, some functions will produce no result. So we insert a column with + // single value here. + block.insert({ColumnUInt8::create(1), std::make_shared(), ""}); + RETURN_IF_ERROR(execute(context, &block, &result)); + DCHECK(result != -1); + const auto& column = block.get_by_position(result).column; + _constant_col = std::make_shared(column); *output = _constant_col.get(); return Status::OK(); } - int result = -1; - Block block; - // If block is empty, some functions will produce no result. So we insert a column with - // single value here. - block.insert({ColumnUInt8::create(1), std::make_shared(), ""}); - RETURN_IF_ERROR(execute(context, &block, &result)); - DCHECK(result != -1); - const auto& column = block.get_by_position(result).column; - _constant_col = std::make_shared(column); - *output = _constant_col.get(); - return Status::OK(); -} + void VExpr::register_function_context(doris::RuntimeState * state, VExprContext * context) { + FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type); + std::vector arg_types; + for (int i = 0; i < _children.size(); ++i) { + arg_types.push_back(AnyValUtil::column_type_to_type_desc(_children[i]->type())); + } -void VExpr::register_function_context(doris::RuntimeState* state, VExprContext* context) { - FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type); - std::vector arg_types; - for (int i = 0; i < _children.size(); ++i) { - arg_types.push_back(AnyValUtil::column_type_to_type_desc(_children[i]->type())); + _fn_context_index = context->register_func(state, return_type, arg_types, 0); } - _fn_context_index = context->register_func(state, return_type, arg_types, 0); -} - -Status VExpr::init_function_context(VExprContext* context, - FunctionContext::FunctionStateScope scope, - const FunctionBasePtr& function) const { - FunctionContext* fn_ctx = context->fn_context(_fn_context_index); - if (scope == FunctionContext::FRAGMENT_LOCAL) { - std::vector constant_cols; - for (auto c : _children) { - ColumnPtrWrapper* const_col_wrapper = nullptr; - RETURN_IF_ERROR(c->get_const_col(context, &const_col_wrapper)); - constant_cols.push_back(const_col_wrapper); + Status VExpr::init_function_context(VExprContext * context, + FunctionContext::FunctionStateScope scope, + const FunctionBasePtr& function) const { + FunctionContext* fn_ctx = context->fn_context(_fn_context_index); + if (scope == FunctionContext::FRAGMENT_LOCAL) { + std::vector constant_cols; + for (auto c : _children) { + ColumnPtrWrapper* const_col_wrapper = nullptr; + RETURN_IF_ERROR(c->get_const_col(context, &const_col_wrapper)); + constant_cols.push_back(const_col_wrapper); + } + fn_ctx->impl()->set_constant_cols(constant_cols); } - fn_ctx->impl()->set_constant_cols(constant_cols); - } - if (scope == FunctionContext::FRAGMENT_LOCAL) { - RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); + if (scope == FunctionContext::FRAGMENT_LOCAL) { + RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); + } + RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::THREAD_LOCAL)); + return Status::OK(); } - RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::THREAD_LOCAL)); - return Status::OK(); -} -void VExpr::close_function_context(VExprContext* context, FunctionContext::FunctionStateScope scope, - const FunctionBasePtr& function) const { - if (_fn_context_index != -1 && !context->_stale) { - FunctionContext* fn_ctx = context->fn_context(_fn_context_index); - function->close(fn_ctx, FunctionContext::THREAD_LOCAL); - if (scope == FunctionContext::FRAGMENT_LOCAL) { - function->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL); + void VExpr::close_function_context(VExprContext * context, + FunctionContext::FunctionStateScope scope, + const FunctionBasePtr& function) const { + if (_fn_context_index != -1 && !context->_stale) { + FunctionContext* fn_ctx = context->fn_context(_fn_context_index); + function->close(fn_ctx, FunctionContext::THREAD_LOCAL); + if (scope == FunctionContext::FRAGMENT_LOCAL) { + function->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL); + } } } -} } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vmap_literal.cpp b/be/src/vec/exprs/vmap_literal.cpp index cbcc70ffab385b..954142f04de72d 100644 --- a/be/src/vec/exprs/vmap_literal.cpp +++ b/be/src/vec/exprs/vmap_literal.cpp @@ -38,9 +38,9 @@ Status VMapLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, if ((idx & 1) == 0) { keys.get().push_back(item); - } else { + } else { values.get().push_back(item); - } + } } map.get().push_back(keys); map.get().push_back(values); diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index f9493ade082c35..75aae961460318 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -803,9 +803,7 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap() { } Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( - const ColumnMap* column_map, - const DataTypeMap* data_type_map) { - + const ColumnMap* column_map, const DataTypeMap* data_type_map) { ColumnPtr key_data = column_map->get_keys_ptr(); ColumnPtr value_data = column_map->get_values_ptr(); if (column_map->get_keys().is_nullable()) { @@ -820,8 +818,8 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap( value_data = val_nullable_column.get_nested_column_ptr(); } - ColumnWithTypeAndName key_typed_column = { - key_data, remove_nullable(data_type_map->get_keys()),"map.key"}; + ColumnWithTypeAndName key_typed_column = {key_data, remove_nullable(data_type_map->get_keys()), + "map.key"}; _key_convertor->set_source_column(key_typed_column, _row_pos, _num_rows); _key_convertor->convert_to_olap(); diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 2318b0e3693b0b..1192838a6c6649 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -396,33 +396,28 @@ class OlapBlockDataConvertor { OlapColumnDataConvertorBaseUPtr _item_convertor; }; - - class OlapColumnDataConvertorMap - : public OlapColumnDataConvertorBase { + class OlapColumnDataConvertorMap : public OlapColumnDataConvertorBase { public: OlapColumnDataConvertorMap(OlapColumnDataConvertorBaseUPtr key_convertor, - OlapColumnDataConvertorBaseUPtr value_convertor) - : _key_convertor(std::move(key_convertor)), _value_convertor(std::move(value_convertor)) { + OlapColumnDataConvertorBaseUPtr value_convertor) + : _key_convertor(std::move(key_convertor)), + _value_convertor(std::move(value_convertor)) { _results.resize(2); } Status convert_to_olap() override; - const void* get_data() const override { - return _results.data(); - }; + const void* get_data() const override { return _results.data(); }; const void* get_data_at(size_t offset) const override { LOG(FATAL) << "now not support get_data_at for OlapColumnDataConvertorMap"; }; private: - Status convert_to_olap(const ColumnMap* column_map, - const DataTypeMap* data_type_map); + Status convert_to_olap(const ColumnMap* column_map, const DataTypeMap* data_type_map); OlapColumnDataConvertorBaseUPtr _key_convertor; OlapColumnDataConvertorBaseUPtr _value_convertor; std::vector _results; - };//OlapColumnDataConvertorMap - + }; //OlapColumnDataConvertorMap private: std::vector _convertors; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 35dde18710804c..ee2c09ed4e967b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -1790,10 +1790,9 @@ enum ExprSerCode { ARRAY_LITERAL(13), CAST_EXPR(14), JSON_LITERAL(15), - ARITHMETIC_EXPR(16); - MAP_LITERAL(17); ARITHMETIC_EXPR(16), - STRUCT_LITERAL(17); + STRUCT_LITERAL(17), + MAP_LITERAL(18); private static Map codeMap = Maps.newHashMap(); @@ -1898,7 +1897,7 @@ public static Expr readIn(DataInput in) throws IOException { return FunctionCallExpr.read(in); case ARRAY_LITERAL: return ArrayLiteral.read(in); - case MAP_LITERAL: + case MAP_LITERAL: return MapLiteral.read(in); case STRUCT_LITERAL: return StructLiteral.read(in); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 8375da22b2dfa7..1c07867588cb4a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1274,7 +1274,7 @@ public static boolean isCastMatchAllowed(Function desc, Function candicate) { || !(candicateArgTypes[0] instanceof ScalarType)) { if (candicateArgTypes[0] instanceof ArrayType || candicateArgTypes[0] instanceof MapType) { return descArgTypes[0].matchesType(candicateArgTypes[0]); - } + } return false; } final ScalarType descArgType = (ScalarType) descArgTypes[0]; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index b42375ed0c580b..250356a7d02b11 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -532,7 +532,7 @@ public static boolean canCastTo(Type sourceType, Type targetType) { return ArrayType.canCastTo((ArrayType) sourceType, (ArrayType) targetType); } else if (sourceType.isMapType() && targetType.isMapType()) { return MapType.canCastTo((MapType) sourceType, (MapType) targetType); - } else if (targetType.isArrayType() && !((ArrayType) targetType).getItemType().isScalarType() + } else if (targetType.isArrayType() && !((ArrayType) targetType).getItemType().isScalarType() && !sourceType.isNull()) { // TODO: current not support cast any non-array type(except for null) to nested array type. return false;