From 08e7805e128d9a3e89dd4789e53644db0ac8fa38 Mon Sep 17 00:00:00 2001 From: cambyzju Date: Fri, 10 Jun 2022 21:39:20 +0800 Subject: [PATCH 1/5] offset type inside array change from UInt32 to Int64 --- be/src/olap/column_vector.cpp | 6 +++--- be/src/olap/column_vector.h | 4 ++-- be/src/olap/row_block2.cpp | 10 +++++----- be/src/olap/rowset/segment_v2/column_reader.cpp | 2 +- be/src/vec/columns/column.h | 2 +- be/src/vec/sink/mysql_result_writer.cpp | 4 ++-- be/src/vec/utils/arrow_column_to_doris_column.cpp | 8 ++++---- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/be/src/olap/column_vector.cpp b/be/src/olap/column_vector.cpp index 41e91b9d636ad2..f766fabb0d3463 100644 --- a/be/src/olap/column_vector.cpp +++ b/be/src/olap/column_vector.cpp @@ -144,13 +144,13 @@ Status ColumnVectorBatch::create(size_t init_capacity, bool is_nullable, const T array_type_info->item_type_info(), field->get_sub_field(0), &elements)); std::unique_ptr offsets; - const auto* offsets_type_info = get_scalar_type_info(); + const auto* offsets_type_info = get_scalar_type_info(); RETURN_IF_ERROR(ColumnVectorBatch::create(init_capacity + 1, false, offsets_type_info, nullptr, &offsets)); std::unique_ptr local(new ArrayColumnVectorBatch( type_info, is_nullable, - reinterpret_cast*>(offsets.release()), + reinterpret_cast*>(offsets.release()), elements.release())); RETURN_IF_ERROR(local->resize(init_capacity)); *column_vector_batch = std::move(local); @@ -181,7 +181,7 @@ Status ScalarColumnVectorBatch::resize(size_t new_cap) { } ArrayColumnVectorBatch::ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable, - ScalarColumnVectorBatch* offsets, + ScalarColumnVectorBatch* offsets, ColumnVectorBatch* elements) : ColumnVectorBatch(type_info, is_nullable), _data(0) { _offsets.reset(offsets); diff --git a/be/src/olap/column_vector.h b/be/src/olap/column_vector.h index 70ee6e3041f9d7..eedc5ad8b78417 100644 --- a/be/src/olap/column_vector.h +++ b/be/src/olap/column_vector.h @@ -178,7 +178,7 @@ class ArrayNullColumnVectorBatch : public ColumnVectorBatch { class ArrayColumnVectorBatch : public ColumnVectorBatch { public: explicit ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable, - ScalarColumnVectorBatch* offsets, + ScalarColumnVectorBatch* offsets, ColumnVectorBatch* elements); ~ArrayColumnVectorBatch() override; Status resize(size_t new_cap) override; @@ -249,7 +249,7 @@ class ArrayColumnVectorBatch : public ColumnVectorBatch { std::unique_ptr _elements; // Stores each array's start offsets in _elements. - std::unique_ptr> _offsets; + std::unique_ptr> _offsets; }; } // namespace doris diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index fd4b0ef23ec337..68555c1e4ef358 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -289,8 +289,8 @@ Status RowBlockV2::_copy_data_to_column(int cid, auto& offsets_col = column_array->get_offsets(); offsets_col.reserve(_selected_size); - uint32_t offset = offsets_col.back(); - for (uint16_t j = 0; j < _selected_size; ++j) { + int64_t offset = offsets_col.back(); + for (int64_t j = 0; j < _selected_size; ++j) { uint16_t row_idx = _selection_vector[j]; auto cv = reinterpret_cast(column_block(cid).cell_ptr(row_idx)); if (!nullable_mark_array[j]) { @@ -550,10 +550,10 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t auto nested_col = (*column_array->get_data_ptr()).assume_mutable(); auto& offsets_col = column_array->get_offsets(); - uint32_t offset = offsets_col.back(); - for (uint32_t j = 0; j < selected_size; ++j) { + int64_t offset = offsets_col.back(); + for (int64_t j = 0; j < selected_size; ++j) { if (!nullable_mark_array[j]) { - uint32_t row_idx = j + start; + int64_t row_idx = j + start; auto cv = reinterpret_cast(batch->cell_ptr(row_idx)); offset += cv->length(); _append_data_to_column(array_batch->elements(), array_batch->item_offset(row_idx), diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index d68ffc66c08033..1a3d96ae9d4986 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -396,7 +396,7 @@ Status ArrayFileColumnIterator::init(const ColumnIteratorOptions& opts) { if (_array_reader->is_nullable()) { RETURN_IF_ERROR(_null_iterator->init(opts)); } - const auto* offset_type_info = get_scalar_type_info(); + const auto* offset_type_info = get_scalar_type_info(); RETURN_IF_ERROR( ColumnVectorBatch::create(1024, false, offset_type_info, nullptr, &_length_batch)); return Status::OK(); diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index ea8b952f8ce503..9a63121bbfb395 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -296,7 +296,7 @@ class IColumn : public COW { * (i-th element should be copied offsets[i] - offsets[i - 1] times.) * It is necessary in ARRAY JOIN operation. */ - using Offset = UInt32; + using Offset = Int64; using Offsets = PaddedPODArray; virtual Ptr replicate(const Offsets& offsets) const = 0; diff --git a/be/src/vec/sink/mysql_result_writer.cpp b/be/src/vec/sink/mysql_result_writer.cpp index 55fa8aaf4118c1..88888793b26506 100644 --- a/be/src/vec/sink/mysql_result_writer.cpp +++ b/be/src/vec/sink/mysql_result_writer.cpp @@ -111,7 +111,7 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, } else if constexpr (type == TYPE_ARRAY) { auto& column_array = assert_cast(*column); auto& offsets = column_array.get_offsets(); - for (int i = 0; i < row_size; ++i) { + for (size_t i = 0; i < row_size; ++i) { if (0 != buf_ret) { return Status::InternalError("pack mysql buffer failed."); } @@ -128,7 +128,7 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, _buffer.open_dynamic_mode(); buf_ret = _buffer.push_string("[", 1); bool begin = true; - for (int j = offsets[i - 1]; j < offsets[i]; ++j) { + for (auto j = offsets[i - 1]; j < offsets[i]; ++j) { if (!begin) { buf_ret = _buffer.push_string(", ", 2); } diff --git a/be/src/vec/utils/arrow_column_to_doris_column.cpp b/be/src/vec/utils/arrow_column_to_doris_column.cpp index 206c279e4c4da1..76741fdb59d004 100644 --- a/be/src/vec/utils/arrow_column_to_doris_column.cpp +++ b/be/src/vec/utils/arrow_column_to_doris_column.cpp @@ -100,8 +100,8 @@ static size_t fill_nullable_column(const arrow::Array* array, size_t array_idx, /// Also internal strings are null terminated. static Status convert_column_with_string_data(const arrow::Array* array, size_t array_idx, MutableColumnPtr& data_column, size_t num_elements) { - PaddedPODArray& column_chars_t = assert_cast(*data_column).get_chars(); - PaddedPODArray& column_offsets = assert_cast(*data_column).get_offsets(); + auto& column_chars_t = assert_cast(*data_column).get_chars(); + auto& column_offsets = assert_cast(*data_column).get_offsets(); auto concrete_array = down_cast(array); std::shared_ptr buffer = concrete_array->value_data(); @@ -121,8 +121,8 @@ static Status convert_column_with_string_data(const arrow::Array* array, size_t static Status convert_column_with_fixed_size_data(const arrow::Array* array, size_t array_idx, MutableColumnPtr& data_column, size_t num_elements) { - PaddedPODArray& column_chars_t = assert_cast(*data_column).get_chars(); - PaddedPODArray& column_offsets = assert_cast(*data_column).get_offsets(); + auto& column_chars_t = assert_cast(*data_column).get_chars(); + auto& column_offsets = assert_cast(*data_column).get_offsets(); auto concrete_array = down_cast(array); uint32_t width = concrete_array->byte_width(); From 821e9ae59ffbef7ac8cf86c0981f1af15e8d13de Mon Sep 17 00:00:00 2001 From: cambyzju Date: Mon, 13 Jun 2022 21:21:53 +0800 Subject: [PATCH 2/5] change collection_value length from u32 to int64 --- be/src/olap/row_block2.cpp | 2 +- be/src/olap/rowset/segment_v2/column_reader.h | 2 +- .../olap/rowset/segment_v2/column_writer.cpp | 4 +-- be/src/runtime/collection_value.cpp | 18 +++++------ be/src/runtime/collection_value.h | 30 +++++++++---------- be/src/udf/udf.h | 4 +-- be/src/vec/columns/column_string.cpp | 4 +-- be/src/vec/data_types/data_type_array.cpp | 4 +-- be/src/vec/data_types/data_type_string.cpp | 18 +++++------ be/test/olap/column_vector_test.cpp | 10 +++---- .../apache/doris/catalog/PrimitiveType.java | 3 +- 11 files changed, 50 insertions(+), 49 deletions(-) diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index 68555c1e4ef358..2ace3beb96ffef 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -550,7 +550,7 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t auto nested_col = (*column_array->get_data_ptr()).assume_mutable(); auto& offsets_col = column_array->get_offsets(); - int64_t offset = offsets_col.back(); + auto offset = offsets_col.back(); for (int64_t j = 0; j < selected_size; ++j) { if (!nullable_mark_array[j]) { int64_t row_idx = j + start; diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 47250a96cb5d15..a01261422367f8 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -369,7 +369,7 @@ class ArrayFileColumnIterator final : public ColumnIterator { : size_to_read; ColumnBlockView ordinal_view(&ordinal_block); RETURN_IF_ERROR(_length_iterator->next_batch(&this_read, &ordinal_view, &has_null)); - auto* ordinals = reinterpret_cast(_length_batch->data()); + auto* ordinals = reinterpret_cast(_length_batch->data()); for (int i = 0; i < this_read; ++i) { item_ordinal += ordinals[i]; } diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 6c76ddff62645b..6e8ca1a3379bf9 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -110,7 +110,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* ColumnWriter::create(item_options, &item_column, _wblock, &item_writer)); // create length writer - FieldType length_type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT; + FieldType length_type = FieldType::OLAP_FIELD_TYPE_BIGINT; ColumnWriterOptions length_options; length_options.meta = opts.meta->add_children_columns(); @@ -119,7 +119,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* length_options.meta->set_type(length_type); length_options.meta->set_is_nullable(false); length_options.meta->set_length( - get_scalar_type_info()->size()); + get_scalar_type_info()->size()); length_options.meta->set_encoding(DEFAULT_ENCODING); length_options.meta->set_compression(opts.meta->compression()); diff --git a/be/src/runtime/collection_value.cpp b/be/src/runtime/collection_value.cpp index 9ea8fc3d436133..71fd1e23b5c038 100644 --- a/be/src/runtime/collection_value.cpp +++ b/be/src/runtime/collection_value.cpp @@ -186,7 +186,7 @@ struct ArrayIteratorFunctionsForString : public GenericArrayIteratorFunctions(item); if (string_value->len) { - int offset = convert_to(string_value->ptr); + int64_t offset = convert_to(string_value->ptr); string_value->ptr = convert_to(tuple_data + offset); } } @@ -448,7 +448,7 @@ size_t CollectionValue::get_byte_size(const TypeDescriptor& item_type) const { return result; } -Status CollectionValue::init_collection(ObjectPool* pool, uint32_t size, PrimitiveType child_type, +Status CollectionValue::init_collection(ObjectPool* pool, int64_t size, PrimitiveType child_type, CollectionValue* value) { return init_collection( value, [pool](size_t size) -> uint8_t* { return pool->add_array(new uint8_t[size]); }, @@ -456,7 +456,7 @@ Status CollectionValue::init_collection(ObjectPool* pool, uint32_t size, Primiti } Status CollectionValue::init_collection(CollectionValue* value, const AllocateMemFunc& allocate, - uint32_t size, PrimitiveType child_type) { + int64_t size, PrimitiveType child_type) { if (value == nullptr) { return Status::InvalidArgument("collection value is null"); } @@ -477,13 +477,13 @@ Status CollectionValue::init_collection(CollectionValue* value, const AllocateMe return Status::OK(); } -Status CollectionValue::init_collection(MemPool* pool, uint32_t size, PrimitiveType child_type, +Status CollectionValue::init_collection(MemPool* pool, int64_t size, PrimitiveType child_type, CollectionValue* value) { return init_collection( value, [pool](size_t size) { return pool->allocate(size); }, size, child_type); } -Status CollectionValue::init_collection(FunctionContext* context, uint32_t size, +Status CollectionValue::init_collection(FunctionContext* context, int64_t size, PrimitiveType child_type, CollectionValue* value) { return init_collection( value, [context](size_t size) { return context->allocate(size); }, size, child_type); @@ -506,8 +506,8 @@ void CollectionValue::deep_copy_collection(CollectionValue* shallow_copied_cv, } auto iterator = cv->iterator(item_type.type); - int coll_byte_size = cv->length() * iterator.type_size(); - int nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0; + int64_t coll_byte_size = cv->length() * iterator.type_size(); + int64_t nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0; MemFootprint footprint = gen_mem_footprint(coll_byte_size + nulls_size); int64_t offset = footprint.first; @@ -544,10 +544,10 @@ void CollectionValue::deserialize_collection(CollectionValue* cv, const char* tu return; } // assgin data and null_sign pointer position in tuple_data - int data_offset = convert_to(cv->data()); + int64_t data_offset = convert_to(cv->data()); cv->set_data(convert_to(tuple_data + data_offset)); if (cv->has_null()) { - int null_offset = convert_to(cv->null_signs()); + int64_t null_offset = convert_to(cv->null_signs()); cv->set_null_signs(convert_to(tuple_data + null_offset)); } auto iterator = cv->iterator(item_type.type); diff --git a/be/src/runtime/collection_value.h b/be/src/runtime/collection_value.h index ccf623171fdecd..3bd279022a7b45 100644 --- a/be/src/runtime/collection_value.h +++ b/be/src/runtime/collection_value.h @@ -64,25 +64,25 @@ class CollectionValue { public: CollectionValue() = default; - explicit CollectionValue(uint32_t length) + explicit CollectionValue(int64_t length) : _data(nullptr), _length(length), _has_null(false), _null_signs(nullptr) {} - CollectionValue(void* data, uint32_t length) + CollectionValue(void* data, int64_t length) : _data(data), _length(length), _has_null(false), _null_signs(nullptr) {} - CollectionValue(void* data, uint32_t length, bool* null_signs) + CollectionValue(void* data, int64_t length, bool* null_signs) : _data(data), _length(length), _has_null(true), _null_signs(null_signs) {} - CollectionValue(void* data, uint32_t length, bool has_null, bool* null_signs) + CollectionValue(void* data, int64_t length, bool has_null, bool* null_signs) : _data(data), _length(length), _has_null(has_null), _null_signs(null_signs) {} - bool is_null_at(uint32_t index) const { return this->_has_null && this->_null_signs[index]; } + bool is_null_at(int64_t index) const { return this->_has_null && this->_null_signs[index]; } void to_collection_val(CollectionVal* val) const; - uint32_t size() const { return _length; } + int64_t size() const { return _length; } - uint32_t length() const { return _length; } + int64_t length() const { return _length; } void shallow_copy(const CollectionValue* other); @@ -96,13 +96,13 @@ class CollectionValue { /** * init collection, will alloc (children Type's size + 1) * (children Nums) memory */ - static Status init_collection(ObjectPool* pool, uint32_t size, PrimitiveType child_type, + static Status init_collection(ObjectPool* pool, int64_t size, PrimitiveType child_type, CollectionValue* value); - static Status init_collection(MemPool* pool, uint32_t size, PrimitiveType child_type, + static Status init_collection(MemPool* pool, int64_t size, PrimitiveType child_type, CollectionValue* value); - static Status init_collection(FunctionContext* context, uint32_t size, PrimitiveType child_type, + static Status init_collection(FunctionContext* context, int64_t size, PrimitiveType child_type, CollectionValue* value); static CollectionValue from_collection_val(const CollectionVal& val); @@ -123,7 +123,7 @@ class CollectionValue { const bool* null_signs() const { return _null_signs; } void* mutable_data() { return _data; } bool* mutable_null_signs() { return _null_signs; } - void set_length(uint32_t length) { _length = length; } + void set_length(int64_t length) { _length = length; } void set_has_null(bool has_null) { _has_null = has_null; } void set_data(void* data) { _data = data; } void set_null_signs(bool* null_signs) { _null_signs = null_signs; } @@ -131,13 +131,13 @@ class CollectionValue { private: using AllocateMemFunc = std::function; static Status init_collection(CollectionValue* value, const AllocateMemFunc& allocate, - uint32_t size, PrimitiveType child_type); + int64_t size, PrimitiveType child_type); ArrayIterator internal_iterator(PrimitiveType child_type) const; private: // child column data void* _data; - uint32_t _length; + int64_t _length; // item has no null value if has_null is false. // item ```may``` has null value if has_null is true. bool _has_null; @@ -160,7 +160,7 @@ class ArrayIterator { } return false; } - bool seek(uint32_t n) const { + bool seek(int64_t n) const { if (n >= _collection_value->size()) { return false; } @@ -248,7 +248,7 @@ class ArrayIterator { private: CollectionValue* _collection_value; - mutable uint32_t _offset; + mutable int64_t _offset; const int _type_size; const bool _is_type_fixed_width; diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index 56e447f3752365..0549a6178a9920 100644 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -742,7 +742,7 @@ struct HllVal : public StringVal { struct CollectionVal : public AnyVal { void* data; - uint32_t length; + int64_t length; // item has no null value if has_null is false. // item ```may``` has null value if has_null is true. bool has_null; @@ -751,7 +751,7 @@ struct CollectionVal : public AnyVal { CollectionVal() = default; - CollectionVal(void* data, uint32_t length, bool has_null, bool* null_signs) + CollectionVal(void* data, int64_t length, bool has_null, bool* null_signs) : data(data), length(length), has_null(has_null), null_signs(null_signs) {}; static CollectionVal null() { diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index c469d7919063ce..a8dd373a768103 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -164,7 +164,7 @@ ColumnPtr ColumnString::permute(const Permutation& perm, size_t limit) const { StringRef ColumnString::serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const { - UInt32 string_size = size_at(n); + Int64 string_size = size_at(n); size_t offset = offset_at(n); StringRef res; @@ -178,7 +178,7 @@ StringRef ColumnString::serialize_value_into_arena(size_t n, Arena& arena, } const char* ColumnString::deserialize_and_insert_from_arena(const char* pos) { - const UInt32 string_size = unaligned_load(pos); + const Int64 string_size = unaligned_load(pos); pos += sizeof(string_size); const size_t old_size = chars.size(); diff --git a/be/src/vec/data_types/data_type_array.cpp b/be/src/vec/data_types/data_type_array.cpp index cc67eb7973b1be..e39f1c569bc68e 100644 --- a/be/src/vec/data_types/data_type_array.cpp +++ b/be/src/vec/data_types/data_type_array.cpp @@ -65,7 +65,7 @@ char* DataTypeArray::serialize(const IColumn& column, char* buf) const { const auto& data_column = assert_cast(*ptr.get()); // row num - *reinterpret_cast(buf) = column.size(); + *reinterpret_cast(buf) = column.size(); buf += sizeof(IColumn::Offset); // offsets memcpy(buf, data_column.get_offsets().data(), column.size() * sizeof(IColumn::Offset)); @@ -79,7 +79,7 @@ const char* DataTypeArray::deserialize(const char* buf, IColumn* column) const { auto& offsets = data_column->get_offsets(); // row num - uint32_t row_num = *reinterpret_cast(buf); + IColumn::Offset row_num = *reinterpret_cast(buf); buf += sizeof(IColumn::Offset); // offsets offsets.resize(row_num); diff --git a/be/src/vec/data_types/data_type_string.cpp b/be/src/vec/data_types/data_type_string.cpp index 220b418e4b3a35..b7bf2fdc2e9299 100644 --- a/be/src/vec/data_types/data_type_string.cpp +++ b/be/src/vec/data_types/data_type_string.cpp @@ -90,7 +90,7 @@ bool DataTypeString::equals(const IDataType& rhs) const { int64_t DataTypeString::get_uncompressed_serialized_bytes(const IColumn& column) const { auto ptr = column.convert_to_full_column_if_const(); const auto& data_column = assert_cast(*ptr.get()); - return sizeof(uint32_t) * (column.size() + 1) + sizeof(uint64_t) + + return sizeof(IColumn::Offset) * (column.size() + 1) + sizeof(uint64_t) + data_column.get_chars().size(); } @@ -99,11 +99,11 @@ char* DataTypeString::serialize(const IColumn& column, char* buf) const { const auto& data_column = assert_cast(*ptr.get()); // row num - *reinterpret_cast(buf) = column.size(); - buf += sizeof(uint32_t); + *reinterpret_cast(buf) = column.size(); + buf += sizeof(IColumn::Offset); // offsets - memcpy(buf, data_column.get_offsets().data(), column.size() * sizeof(uint32_t)); - buf += column.size() * sizeof(uint32_t); + memcpy(buf, data_column.get_offsets().data(), column.size() * sizeof(IColumn::Offset)); + buf += column.size() * sizeof(IColumn::Offset); // total length uint64_t value_len = data_column.get_chars().size(); *reinterpret_cast(buf) = value_len; @@ -121,12 +121,12 @@ const char* DataTypeString::deserialize(const char* buf, IColumn* column) const ColumnString::Offsets& offsets = column_string->get_offsets(); // row num - uint32_t row_num = *reinterpret_cast(buf); - buf += sizeof(uint32_t); + IColumn::Offset row_num = *reinterpret_cast(buf); + buf += sizeof(IColumn::Offset); // offsets offsets.resize(row_num); - memcpy(offsets.data(), buf, sizeof(uint32_t) * row_num); - buf += sizeof(uint32_t) * row_num; + memcpy(offsets.data(), buf, sizeof(IColumn::Offset) * row_num); + buf += sizeof(IColumn::Offset) * row_num; // total length uint64_t value_len = *reinterpret_cast(buf); buf += sizeof(uint64_t); diff --git a/be/test/olap/column_vector_test.cpp b/be/test/olap/column_vector_test.cpp index 5faffb041db92f..b744c89bb5c2db 100644 --- a/be/test/olap/column_vector_test.cpp +++ b/be/test/olap/column_vector_test.cpp @@ -95,8 +95,8 @@ void test_read_write_array_column_vector(const TypeInfo* array_type_info, size_t // first write for (size_t i = 0; i < array_init_size; ++i) { - uint32_t len = result[i].length(); - memcpy(offset_cvb->mutable_cell_ptr(1 + i), &len, sizeof(uint32_t)); + int64_t len = result[i].length(); + memcpy(offset_cvb->mutable_cell_ptr(1 + i), &len, sizeof(int64_t)); } array_cvb->set_null_bits(0, array_init_size, false); array_cvb->get_offset_by_length(0, array_init_size); @@ -114,8 +114,8 @@ void test_read_write_array_column_vector(const TypeInfo* array_type_info, size_t // second write EXPECT_TRUE(array_cvb->resize(array_size).ok()); for (int i = array_init_size; i < array_size; ++i) { - uint32_t len = result[i].length(); - memcpy(offset_cvb->mutable_cell_ptr(i + 1), &len, sizeof(uint32_t)); + int64_t len = result[i].length(); + memcpy(offset_cvb->mutable_cell_ptr(i + 1), &len, sizeof(int64_t)); } array_cvb->set_null_bits(array_init_size, array_size - array_init_size, false); array_cvb->get_offset_by_length(array_init_size, array_size - array_init_size); @@ -170,7 +170,7 @@ TEST_F(ColumnVectorTest, array_column_vector_test) { auto* item_val = new uint8_t[num_item]; memset(null_signs, 0, sizeof(bool) * 3); - for (int i = 0; i < num_item; ++i) { + for (size_t i = 0; i < num_item; ++i) { item_val[i] = i; if (i % 3 == 0) { size_t array_index = i / 3; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java index a953386630867b..2f8fa65ad4f345 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java @@ -55,7 +55,8 @@ public enum PrimitiveType { BITMAP("BITMAP", 16, TPrimitiveType.OBJECT), QUANTILE_STATE("QUANTILE_STATE", 16, TPrimitiveType.QUANTILE_STATE), - ARRAY("ARRAY", 24, TPrimitiveType.ARRAY), + // sizeof(CollectionValue) + ARRAY("ARRAY", 32, TPrimitiveType.ARRAY), MAP("MAP", 24, TPrimitiveType.MAP), STRUCT("STRUCT", 24, TPrimitiveType.STRUCT), STRING("STRING", 16, TPrimitiveType.STRING), From 6bb813db9aef980b6cb5188af9de0bc04fb931f0 Mon Sep 17 00:00:00 2001 From: cambyzju Date: Mon, 13 Jun 2022 23:00:46 +0800 Subject: [PATCH 3/5] be codes format for column_writer.cpp --- be/src/olap/rowset/segment_v2/column_writer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 6e8ca1a3379bf9..7749a46e1927fa 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -118,8 +118,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* length_options.meta->set_unique_id(2); length_options.meta->set_type(length_type); length_options.meta->set_is_nullable(false); - length_options.meta->set_length( - get_scalar_type_info()->size()); + length_options.meta->set_length(get_scalar_type_info()->size()); length_options.meta->set_encoding(DEFAULT_ENCODING); length_options.meta->set_compression(opts.meta->compression()); From 38e2493c94a826a555a59c00325f8bc6b562f086 Mon Sep 17 00:00:00 2001 From: cambyzju Date: Wed, 15 Jun 2022 18:05:23 +0800 Subject: [PATCH 4/5] update offset to uint64 --- be/src/exec/olap_scanner.cpp | 2 +- be/src/olap/column_vector.cpp | 6 ++-- be/src/olap/column_vector.h | 4 +-- be/src/olap/row_block2.cpp | 8 ++--- .../olap/rowset/segment_v2/column_reader.cpp | 2 +- be/src/olap/rowset/segment_v2/column_reader.h | 2 +- .../olap/rowset/segment_v2/column_writer.cpp | 5 +-- be/src/runtime/collection_value.cpp | 12 +++---- be/src/runtime/collection_value.h | 32 +++++++++---------- be/src/runtime/tuple.cpp | 4 +-- be/src/udf/udf.h | 4 +-- be/src/vec/columns/column.h | 2 +- be/src/vec/columns/column_string.cpp | 4 +-- be/src/vec/sink/mysql_result_writer.cpp | 2 +- be/test/olap/column_vector_test.cpp | 8 ++--- 15 files changed, 49 insertions(+), 48 deletions(-) diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index da506d84db22b6..d1dfd8a7fdf538 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -395,7 +395,7 @@ Status OlapScanner::get_batch(RuntimeState* state, RowBatch* batch, bool* eof) { auto pool = batch->tuple_data_pool(); CollectionValue::deep_copy_collection( slot, item_type, - [pool](int size) -> MemFootprint { + [pool](int64_t size) -> MemFootprint { int64_t offset = pool->total_allocated_bytes(); uint8_t* data = pool->allocate(size); return {offset, data}; diff --git a/be/src/olap/column_vector.cpp b/be/src/olap/column_vector.cpp index f766fabb0d3463..c4962926f29b27 100644 --- a/be/src/olap/column_vector.cpp +++ b/be/src/olap/column_vector.cpp @@ -144,13 +144,13 @@ Status ColumnVectorBatch::create(size_t init_capacity, bool is_nullable, const T array_type_info->item_type_info(), field->get_sub_field(0), &elements)); std::unique_ptr offsets; - const auto* offsets_type_info = get_scalar_type_info(); + const auto* offsets_type_info = get_scalar_type_info(); RETURN_IF_ERROR(ColumnVectorBatch::create(init_capacity + 1, false, offsets_type_info, nullptr, &offsets)); std::unique_ptr local(new ArrayColumnVectorBatch( type_info, is_nullable, - reinterpret_cast*>(offsets.release()), + reinterpret_cast*>(offsets.release()), elements.release())); RETURN_IF_ERROR(local->resize(init_capacity)); *column_vector_batch = std::move(local); @@ -181,7 +181,7 @@ Status ScalarColumnVectorBatch::resize(size_t new_cap) { } ArrayColumnVectorBatch::ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable, - ScalarColumnVectorBatch* offsets, + ScalarColumnVectorBatch* offsets, ColumnVectorBatch* elements) : ColumnVectorBatch(type_info, is_nullable), _data(0) { _offsets.reset(offsets); diff --git a/be/src/olap/column_vector.h b/be/src/olap/column_vector.h index eedc5ad8b78417..28139fe7d2e6a2 100644 --- a/be/src/olap/column_vector.h +++ b/be/src/olap/column_vector.h @@ -178,7 +178,7 @@ class ArrayNullColumnVectorBatch : public ColumnVectorBatch { class ArrayColumnVectorBatch : public ColumnVectorBatch { public: explicit ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable, - ScalarColumnVectorBatch* offsets, + ScalarColumnVectorBatch* offsets, ColumnVectorBatch* elements); ~ArrayColumnVectorBatch() override; Status resize(size_t new_cap) override; @@ -249,7 +249,7 @@ class ArrayColumnVectorBatch : public ColumnVectorBatch { std::unique_ptr _elements; // Stores each array's start offsets in _elements. - std::unique_ptr> _offsets; + std::unique_ptr> _offsets; }; } // namespace doris diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index 2ace3beb96ffef..947bdd40559c9e 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -289,8 +289,8 @@ Status RowBlockV2::_copy_data_to_column(int cid, auto& offsets_col = column_array->get_offsets(); offsets_col.reserve(_selected_size); - int64_t offset = offsets_col.back(); - for (int64_t j = 0; j < _selected_size; ++j) { + uint64_t offset = offsets_col.back(); + for (uint16_t j = 0; j < _selected_size; ++j) { uint16_t row_idx = _selection_vector[j]; auto cv = reinterpret_cast(column_block(cid).cell_ptr(row_idx)); if (!nullable_mark_array[j]) { @@ -551,9 +551,9 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t auto& offsets_col = column_array->get_offsets(); auto offset = offsets_col.back(); - for (int64_t j = 0; j < selected_size; ++j) { + for (uint32_t j = 0; j < selected_size; ++j) { if (!nullable_mark_array[j]) { - int64_t row_idx = j + start; + uint64_t row_idx = j + start; auto cv = reinterpret_cast(batch->cell_ptr(row_idx)); offset += cv->length(); _append_data_to_column(array_batch->elements(), array_batch->item_offset(row_idx), diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 1a3d96ae9d4986..fe6cd94a822d56 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -396,7 +396,7 @@ Status ArrayFileColumnIterator::init(const ColumnIteratorOptions& opts) { if (_array_reader->is_nullable()) { RETURN_IF_ERROR(_null_iterator->init(opts)); } - const auto* offset_type_info = get_scalar_type_info(); + const auto* offset_type_info = get_scalar_type_info(); RETURN_IF_ERROR( ColumnVectorBatch::create(1024, false, offset_type_info, nullptr, &_length_batch)); return Status::OK(); diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index a01261422367f8..a365679ee797fd 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -369,7 +369,7 @@ class ArrayFileColumnIterator final : public ColumnIterator { : size_to_read; ColumnBlockView ordinal_view(&ordinal_block); RETURN_IF_ERROR(_length_iterator->next_batch(&this_read, &ordinal_view, &has_null)); - auto* ordinals = reinterpret_cast(_length_batch->data()); + auto* ordinals = reinterpret_cast(_length_batch->data()); for (int i = 0; i < this_read; ++i) { item_ordinal += ordinals[i]; } diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 7749a46e1927fa..c96ccd9a61cb29 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -110,7 +110,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* ColumnWriter::create(item_options, &item_column, _wblock, &item_writer)); // create length writer - FieldType length_type = FieldType::OLAP_FIELD_TYPE_BIGINT; + FieldType length_type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT; ColumnWriterOptions length_options; length_options.meta = opts.meta->add_children_columns(); @@ -118,7 +118,8 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* length_options.meta->set_unique_id(2); length_options.meta->set_type(length_type); length_options.meta->set_is_nullable(false); - length_options.meta->set_length(get_scalar_type_info()->size()); + length_options.meta->set_length( + get_scalar_type_info()->size()); length_options.meta->set_encoding(DEFAULT_ENCODING); length_options.meta->set_compression(opts.meta->compression()); diff --git a/be/src/runtime/collection_value.cpp b/be/src/runtime/collection_value.cpp index 71fd1e23b5c038..d8581d8ac5f246 100644 --- a/be/src/runtime/collection_value.cpp +++ b/be/src/runtime/collection_value.cpp @@ -448,7 +448,7 @@ size_t CollectionValue::get_byte_size(const TypeDescriptor& item_type) const { return result; } -Status CollectionValue::init_collection(ObjectPool* pool, int64_t size, PrimitiveType child_type, +Status CollectionValue::init_collection(ObjectPool* pool, uint64_t size, PrimitiveType child_type, CollectionValue* value) { return init_collection( value, [pool](size_t size) -> uint8_t* { return pool->add_array(new uint8_t[size]); }, @@ -456,7 +456,7 @@ Status CollectionValue::init_collection(ObjectPool* pool, int64_t size, Primitiv } Status CollectionValue::init_collection(CollectionValue* value, const AllocateMemFunc& allocate, - int64_t size, PrimitiveType child_type) { + uint64_t size, PrimitiveType child_type) { if (value == nullptr) { return Status::InvalidArgument("collection value is null"); } @@ -477,13 +477,13 @@ Status CollectionValue::init_collection(CollectionValue* value, const AllocateMe return Status::OK(); } -Status CollectionValue::init_collection(MemPool* pool, int64_t size, PrimitiveType child_type, +Status CollectionValue::init_collection(MemPool* pool, uint64_t size, PrimitiveType child_type, CollectionValue* value) { return init_collection( value, [pool](size_t size) { return pool->allocate(size); }, size, child_type); } -Status CollectionValue::init_collection(FunctionContext* context, int64_t size, +Status CollectionValue::init_collection(FunctionContext* context, uint64_t size, PrimitiveType child_type, CollectionValue* value) { return init_collection( value, [context](size_t size) { return context->allocate(size); }, size, child_type); @@ -506,8 +506,8 @@ void CollectionValue::deep_copy_collection(CollectionValue* shallow_copied_cv, } auto iterator = cv->iterator(item_type.type); - int64_t coll_byte_size = cv->length() * iterator.type_size(); - int64_t nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0; + uint64_t coll_byte_size = cv->length() * iterator.type_size(); + uint64_t nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0; MemFootprint footprint = gen_mem_footprint(coll_byte_size + nulls_size); int64_t offset = footprint.first; diff --git a/be/src/runtime/collection_value.h b/be/src/runtime/collection_value.h index 3bd279022a7b45..3fac161503cea1 100644 --- a/be/src/runtime/collection_value.h +++ b/be/src/runtime/collection_value.h @@ -32,7 +32,7 @@ using doris_udf::FunctionContext; using doris_udf::AnyVal; using MemFootprint = std::pair; -using GenMemFootprintFunc = std::function; +using GenMemFootprintFunc = std::function; struct ArrayIteratorFunctionsBase; class ArrayIterator; @@ -64,25 +64,25 @@ class CollectionValue { public: CollectionValue() = default; - explicit CollectionValue(int64_t length) + explicit CollectionValue(uint64_t length) : _data(nullptr), _length(length), _has_null(false), _null_signs(nullptr) {} - CollectionValue(void* data, int64_t length) + CollectionValue(void* data, uint64_t length) : _data(data), _length(length), _has_null(false), _null_signs(nullptr) {} - CollectionValue(void* data, int64_t length, bool* null_signs) + CollectionValue(void* data, uint64_t length, bool* null_signs) : _data(data), _length(length), _has_null(true), _null_signs(null_signs) {} - CollectionValue(void* data, int64_t length, bool has_null, bool* null_signs) + CollectionValue(void* data, uint64_t length, bool has_null, bool* null_signs) : _data(data), _length(length), _has_null(has_null), _null_signs(null_signs) {} - bool is_null_at(int64_t index) const { return this->_has_null && this->_null_signs[index]; } + bool is_null_at(uint64_t index) const { return this->_has_null && this->_null_signs[index]; } void to_collection_val(CollectionVal* val) const; - int64_t size() const { return _length; } + uint64_t size() const { return _length; } - int64_t length() const { return _length; } + uint64_t length() const { return _length; } void shallow_copy(const CollectionValue* other); @@ -96,13 +96,13 @@ class CollectionValue { /** * init collection, will alloc (children Type's size + 1) * (children Nums) memory */ - static Status init_collection(ObjectPool* pool, int64_t size, PrimitiveType child_type, + static Status init_collection(ObjectPool* pool, uint64_t size, PrimitiveType child_type, CollectionValue* value); - static Status init_collection(MemPool* pool, int64_t size, PrimitiveType child_type, + static Status init_collection(MemPool* pool, uint64_t size, PrimitiveType child_type, CollectionValue* value); - static Status init_collection(FunctionContext* context, int64_t size, PrimitiveType child_type, + static Status init_collection(FunctionContext* context, uint64_t size, PrimitiveType child_type, CollectionValue* value); static CollectionValue from_collection_val(const CollectionVal& val); @@ -123,7 +123,7 @@ class CollectionValue { const bool* null_signs() const { return _null_signs; } void* mutable_data() { return _data; } bool* mutable_null_signs() { return _null_signs; } - void set_length(int64_t length) { _length = length; } + void set_length(uint64_t length) { _length = length; } void set_has_null(bool has_null) { _has_null = has_null; } void set_data(void* data) { _data = data; } void set_null_signs(bool* null_signs) { _null_signs = null_signs; } @@ -131,13 +131,13 @@ class CollectionValue { private: using AllocateMemFunc = std::function; static Status init_collection(CollectionValue* value, const AllocateMemFunc& allocate, - int64_t size, PrimitiveType child_type); + uint64_t size, PrimitiveType child_type); ArrayIterator internal_iterator(PrimitiveType child_type) const; private: // child column data void* _data; - int64_t _length; + uint64_t _length; // item has no null value if has_null is false. // item ```may``` has null value if has_null is true. bool _has_null; @@ -160,7 +160,7 @@ class ArrayIterator { } return false; } - bool seek(int64_t n) const { + bool seek(uint64_t n) const { if (n >= _collection_value->size()) { return false; } @@ -248,7 +248,7 @@ class ArrayIterator { private: CollectionValue* _collection_value; - mutable int64_t _offset; + mutable uint64_t _offset; const int _type_size; const bool _is_type_fixed_width; diff --git a/be/src/runtime/tuple.cpp b/be/src/runtime/tuple.cpp index 92ee49ac0b6fed..641c13c4bb2b66 100644 --- a/be/src/runtime/tuple.cpp +++ b/be/src/runtime/tuple.cpp @@ -93,7 +93,7 @@ void Tuple::deep_copy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool, bo // copy collection slot deep_copy_collection_slots( dst, desc, - [pool](int size) -> MemFootprint { + [pool](int64_t size) -> MemFootprint { int64_t offset = pool->total_allocated_bytes(); uint8_t* data = pool->allocate(size); return {offset, data}; @@ -186,7 +186,7 @@ void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset, // copy collection slots deep_copy_collection_slots( dst, desc, - [offset, data](int size) -> MemFootprint { + [offset, data](int64_t size) -> MemFootprint { MemFootprint footprint = {*offset, reinterpret_cast(*data)}; *offset += size; *data += size; diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index 0549a6178a9920..324a1d362aa24f 100644 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -742,7 +742,7 @@ struct HllVal : public StringVal { struct CollectionVal : public AnyVal { void* data; - int64_t length; + uint64_t length; // item has no null value if has_null is false. // item ```may``` has null value if has_null is true. bool has_null; @@ -751,7 +751,7 @@ struct CollectionVal : public AnyVal { CollectionVal() = default; - CollectionVal(void* data, int64_t length, bool has_null, bool* null_signs) + CollectionVal(void* data, uint64_t length, bool has_null, bool* null_signs) : data(data), length(length), has_null(has_null), null_signs(null_signs) {}; static CollectionVal null() { diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 9a63121bbfb395..ee5568bcc62cee 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -296,7 +296,7 @@ class IColumn : public COW { * (i-th element should be copied offsets[i] - offsets[i - 1] times.) * It is necessary in ARRAY JOIN operation. */ - using Offset = Int64; + using Offset = UInt64; using Offsets = PaddedPODArray; virtual Ptr replicate(const Offsets& offsets) const = 0; diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index a8dd373a768103..12701ab5ed3594 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -164,7 +164,7 @@ ColumnPtr ColumnString::permute(const Permutation& perm, size_t limit) const { StringRef ColumnString::serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const { - Int64 string_size = size_at(n); + IColumn::Offset string_size = size_at(n); size_t offset = offset_at(n); StringRef res; @@ -178,7 +178,7 @@ StringRef ColumnString::serialize_value_into_arena(size_t n, Arena& arena, } const char* ColumnString::deserialize_and_insert_from_arena(const char* pos) { - const Int64 string_size = unaligned_load(pos); + const IColumn::Offset string_size = unaligned_load(pos); pos += sizeof(string_size); const size_t old_size = chars.size(); diff --git a/be/src/vec/sink/mysql_result_writer.cpp b/be/src/vec/sink/mysql_result_writer.cpp index 88888793b26506..1902bf9972b03e 100644 --- a/be/src/vec/sink/mysql_result_writer.cpp +++ b/be/src/vec/sink/mysql_result_writer.cpp @@ -111,7 +111,7 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, } else if constexpr (type == TYPE_ARRAY) { auto& column_array = assert_cast(*column); auto& offsets = column_array.get_offsets(); - for (size_t i = 0; i < row_size; ++i) { + for (ssize_t i = 0; i < row_size; ++i) { if (0 != buf_ret) { return Status::InternalError("pack mysql buffer failed."); } diff --git a/be/test/olap/column_vector_test.cpp b/be/test/olap/column_vector_test.cpp index b744c89bb5c2db..19a28088d80638 100644 --- a/be/test/olap/column_vector_test.cpp +++ b/be/test/olap/column_vector_test.cpp @@ -95,8 +95,8 @@ void test_read_write_array_column_vector(const TypeInfo* array_type_info, size_t // first write for (size_t i = 0; i < array_init_size; ++i) { - int64_t len = result[i].length(); - memcpy(offset_cvb->mutable_cell_ptr(1 + i), &len, sizeof(int64_t)); + uint64_t len = result[i].length(); + memcpy(offset_cvb->mutable_cell_ptr(1 + i), &len, sizeof(uint64_t)); } array_cvb->set_null_bits(0, array_init_size, false); array_cvb->get_offset_by_length(0, array_init_size); @@ -114,8 +114,8 @@ void test_read_write_array_column_vector(const TypeInfo* array_type_info, size_t // second write EXPECT_TRUE(array_cvb->resize(array_size).ok()); for (int i = array_init_size; i < array_size; ++i) { - int64_t len = result[i].length(); - memcpy(offset_cvb->mutable_cell_ptr(i + 1), &len, sizeof(int64_t)); + uint64_t len = result[i].length(); + memcpy(offset_cvb->mutable_cell_ptr(i + 1), &len, sizeof(uint64_t)); } array_cvb->set_null_bits(array_init_size, array_size - array_init_size, false); array_cvb->get_offset_by_length(array_init_size, array_size - array_init_size); From a8877a9cd5ed22abecb7fdaf2b25d5e7b7b2411b Mon Sep 17 00:00:00 2001 From: cambyzju Date: Wed, 15 Jun 2022 18:16:13 +0800 Subject: [PATCH 5/5] use ssize_t to index offsets, because -1 is valid --- be/src/vec/columns/column_vector.cpp | 2 +- be/src/vec/functions/array/function_array_size.h | 2 +- be/src/vec/functions/function_string.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index acc7e4b9a2750d..dde2f033a7e3a1 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -327,7 +327,7 @@ ColumnPtr ColumnVector::replicate(const IColumn::Offsets& offsets) const { // vectorized this code to speed up IColumn::Offset counts[size]; - for (size_t i = 0; i < size; ++i) { + for (ssize_t i = 0; i < size; ++i) { counts[i] = offsets[i] - offsets[i - 1]; } diff --git a/be/src/vec/functions/array/function_array_size.h b/be/src/vec/functions/array/function_array_size.h index bffed4460d35f8..1988c5b66a269b 100644 --- a/be/src/vec/functions/array/function_array_size.h +++ b/be/src/vec/functions/array/function_array_size.h @@ -59,7 +59,7 @@ class FunctionArraySize : public IFunction { auto dst_column = ColumnInt64::create(input_rows_count); auto& dst_data = dst_column->get_data(); - for (size_t i = 0; i < offsets.size(); ++i) { + for (ssize_t i = 0; i < offsets.size(); ++i) { dst_data[i] = offsets[i] - offsets[i - 1]; } diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 3027c121d97e5a..3c29cb87458092 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -635,7 +635,7 @@ class FunctionStringRepeat : public IFunction { // fmt::memory_buffer buffer; res_offsets.resize(input_row_size); - for (size_t i = 0; i < input_row_size; ++i) { + for (ssize_t i = 0; i < input_row_size; ++i) { buffer.clear(); const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); int size = offsets[i] - offsets[i - 1] - 1;