Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion be/src/exec/olap_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ Status OlapScanner::get_batch(RuntimeState* state, RowBatch* batch, bool* eof) {
auto pool = batch->tuple_data_pool();
CollectionValue::deep_copy_collection(
slot, item_type,
[pool](int size) -> MemFootprint {
[pool](int64_t size) -> MemFootprint {
int64_t offset = pool->total_allocated_bytes();
uint8_t* data = pool->allocate(size);
return {offset, data};
Expand Down
6 changes: 3 additions & 3 deletions be/src/olap/column_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,13 @@ Status ColumnVectorBatch::create(size_t init_capacity, bool is_nullable, const T
array_type_info->item_type_info(), field->get_sub_field(0), &elements));

std::unique_ptr<ColumnVectorBatch> offsets;
const auto* offsets_type_info = get_scalar_type_info<OLAP_FIELD_TYPE_UNSIGNED_INT>();
const auto* offsets_type_info = get_scalar_type_info<OLAP_FIELD_TYPE_UNSIGNED_BIGINT>();
RETURN_IF_ERROR(ColumnVectorBatch::create(init_capacity + 1, false, offsets_type_info,
nullptr, &offsets));

std::unique_ptr<ColumnVectorBatch> local(new ArrayColumnVectorBatch(
type_info, is_nullable,
reinterpret_cast<ScalarColumnVectorBatch<uint32_t>*>(offsets.release()),
reinterpret_cast<ScalarColumnVectorBatch<uint64_t>*>(offsets.release()),
elements.release()));
RETURN_IF_ERROR(local->resize(init_capacity));
*column_vector_batch = std::move(local);
Expand Down Expand Up @@ -181,7 +181,7 @@ Status ScalarColumnVectorBatch<ScalarType>::resize(size_t new_cap) {
}

ArrayColumnVectorBatch::ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable,
ScalarColumnVectorBatch<uint32_t>* offsets,
ScalarColumnVectorBatch<uint64_t>* offsets,
ColumnVectorBatch* elements)
: ColumnVectorBatch(type_info, is_nullable), _data(0) {
_offsets.reset(offsets);
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/column_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ class ArrayNullColumnVectorBatch : public ColumnVectorBatch {
class ArrayColumnVectorBatch : public ColumnVectorBatch {
public:
explicit ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable,
ScalarColumnVectorBatch<uint32_t>* offsets,
ScalarColumnVectorBatch<uint64_t>* offsets,
ColumnVectorBatch* elements);
~ArrayColumnVectorBatch() override;
Status resize(size_t new_cap) override;
Expand Down Expand Up @@ -249,7 +249,7 @@ class ArrayColumnVectorBatch : public ColumnVectorBatch {
std::unique_ptr<ColumnVectorBatch> _elements;

// Stores each array's start offsets in _elements.
std::unique_ptr<ScalarColumnVectorBatch<uint32_t>> _offsets;
std::unique_ptr<ScalarColumnVectorBatch<uint64_t>> _offsets;
};

} // namespace doris
6 changes: 3 additions & 3 deletions be/src/olap/row_block2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ Status RowBlockV2::_copy_data_to_column(int cid,

auto& offsets_col = column_array->get_offsets();
offsets_col.reserve(_selected_size);
uint32_t offset = offsets_col.back();
uint64_t offset = offsets_col.back();
for (uint16_t j = 0; j < _selected_size; ++j) {
uint16_t row_idx = _selection_vector[j];
auto cv = reinterpret_cast<const CollectionValue*>(column_block(cid).cell_ptr(row_idx));
Expand Down Expand Up @@ -550,10 +550,10 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t
auto nested_col = (*column_array->get_data_ptr()).assume_mutable();

auto& offsets_col = column_array->get_offsets();
uint32_t offset = offsets_col.back();
auto offset = offsets_col.back();
for (uint32_t j = 0; j < selected_size; ++j) {
if (!nullable_mark_array[j]) {
uint32_t row_idx = j + start;
uint64_t row_idx = j + start;
auto cv = reinterpret_cast<const CollectionValue*>(batch->cell_ptr(row_idx));
offset += cv->length();
_append_data_to_column(array_batch->elements(), array_batch->item_offset(row_idx),
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ Status ArrayFileColumnIterator::init(const ColumnIteratorOptions& opts) {
if (_array_reader->is_nullable()) {
RETURN_IF_ERROR(_null_iterator->init(opts));
}
const auto* offset_type_info = get_scalar_type_info<OLAP_FIELD_TYPE_UNSIGNED_INT>();
const auto* offset_type_info = get_scalar_type_info<OLAP_FIELD_TYPE_UNSIGNED_BIGINT>();
RETURN_IF_ERROR(
ColumnVectorBatch::create(1024, false, offset_type_info, nullptr, &_length_batch));
return Status::OK();
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/column_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ class ArrayFileColumnIterator final : public ColumnIterator {
: size_to_read;
ColumnBlockView ordinal_view(&ordinal_block);
RETURN_IF_ERROR(_length_iterator->next_batch(&this_read, &ordinal_view, &has_null));
auto* ordinals = reinterpret_cast<uint32_t*>(_length_batch->data());
auto* ordinals = reinterpret_cast<uint64_t*>(_length_batch->data());
for (int i = 0; i < this_read; ++i) {
item_ordinal += ordinals[i];
}
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/rowset/segment_v2/column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn*
ColumnWriter::create(item_options, &item_column, _wblock, &item_writer));

// create length writer
FieldType length_type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT;
FieldType length_type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT;

ColumnWriterOptions length_options;
length_options.meta = opts.meta->add_children_columns();
Expand All @@ -119,7 +119,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn*
length_options.meta->set_type(length_type);
length_options.meta->set_is_nullable(false);
length_options.meta->set_length(
get_scalar_type_info<OLAP_FIELD_TYPE_UNSIGNED_INT>()->size());
get_scalar_type_info<OLAP_FIELD_TYPE_UNSIGNED_BIGINT>()->size());
length_options.meta->set_encoding(DEFAULT_ENCODING);
length_options.meta->set_compression(opts.meta->compression());

Expand Down
18 changes: 9 additions & 9 deletions be/src/runtime/collection_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ struct ArrayIteratorFunctionsForString : public GenericArrayIteratorFunctions<ty
static void deserialize(void* item, const char* tuple_data, const TypeDescriptor& type_desc) {
auto* string_value = static_cast<CppType*>(item);
if (string_value->len) {
int offset = convert_to<int>(string_value->ptr);
int64_t offset = convert_to<int64_t>(string_value->ptr);
string_value->ptr = convert_to<char*>(tuple_data + offset);
}
}
Expand Down Expand Up @@ -448,15 +448,15 @@ size_t CollectionValue::get_byte_size(const TypeDescriptor& item_type) const {
return result;
}

Status CollectionValue::init_collection(ObjectPool* pool, uint32_t size, PrimitiveType child_type,
Status CollectionValue::init_collection(ObjectPool* pool, uint64_t size, PrimitiveType child_type,
CollectionValue* value) {
return init_collection(
value, [pool](size_t size) -> uint8_t* { return pool->add_array(new uint8_t[size]); },
size, child_type);
}

Status CollectionValue::init_collection(CollectionValue* value, const AllocateMemFunc& allocate,
uint32_t size, PrimitiveType child_type) {
uint64_t size, PrimitiveType child_type) {
if (value == nullptr) {
return Status::InvalidArgument("collection value is null");
}
Expand All @@ -477,13 +477,13 @@ Status CollectionValue::init_collection(CollectionValue* value, const AllocateMe
return Status::OK();
}

Status CollectionValue::init_collection(MemPool* pool, uint32_t size, PrimitiveType child_type,
Status CollectionValue::init_collection(MemPool* pool, uint64_t size, PrimitiveType child_type,
CollectionValue* value) {
return init_collection(
value, [pool](size_t size) { return pool->allocate(size); }, size, child_type);
}

Status CollectionValue::init_collection(FunctionContext* context, uint32_t size,
Status CollectionValue::init_collection(FunctionContext* context, uint64_t size,
PrimitiveType child_type, CollectionValue* value) {
return init_collection(
value, [context](size_t size) { return context->allocate(size); }, size, child_type);
Expand All @@ -506,8 +506,8 @@ void CollectionValue::deep_copy_collection(CollectionValue* shallow_copied_cv,
}

auto iterator = cv->iterator(item_type.type);
int coll_byte_size = cv->length() * iterator.type_size();
int nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0;
uint64_t coll_byte_size = cv->length() * iterator.type_size();
uint64_t nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0;

MemFootprint footprint = gen_mem_footprint(coll_byte_size + nulls_size);
int64_t offset = footprint.first;
Expand Down Expand Up @@ -544,10 +544,10 @@ void CollectionValue::deserialize_collection(CollectionValue* cv, const char* tu
return;
}
// assgin data and null_sign pointer position in tuple_data
int data_offset = convert_to<int>(cv->data());
int64_t data_offset = convert_to<int64_t>(cv->data());
cv->set_data(convert_to<char*>(tuple_data + data_offset));
if (cv->has_null()) {
int null_offset = convert_to<int>(cv->null_signs());
int64_t null_offset = convert_to<int64_t>(cv->null_signs());
cv->set_null_signs(convert_to<bool*>(tuple_data + null_offset));
}
auto iterator = cv->iterator(item_type.type);
Expand Down
32 changes: 16 additions & 16 deletions be/src/runtime/collection_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ using doris_udf::FunctionContext;
using doris_udf::AnyVal;

using MemFootprint = std::pair<int64_t, uint8_t*>;
using GenMemFootprintFunc = std::function<MemFootprint(int size)>;
using GenMemFootprintFunc = std::function<MemFootprint(int64_t size)>;

struct ArrayIteratorFunctionsBase;
class ArrayIterator;
Expand Down Expand Up @@ -64,25 +64,25 @@ class CollectionValue {
public:
CollectionValue() = default;

explicit CollectionValue(uint32_t length)
explicit CollectionValue(uint64_t length)
: _data(nullptr), _length(length), _has_null(false), _null_signs(nullptr) {}

CollectionValue(void* data, uint32_t length)
CollectionValue(void* data, uint64_t length)
: _data(data), _length(length), _has_null(false), _null_signs(nullptr) {}

CollectionValue(void* data, uint32_t length, bool* null_signs)
CollectionValue(void* data, uint64_t length, bool* null_signs)
: _data(data), _length(length), _has_null(true), _null_signs(null_signs) {}

CollectionValue(void* data, uint32_t length, bool has_null, bool* null_signs)
CollectionValue(void* data, uint64_t length, bool has_null, bool* null_signs)
: _data(data), _length(length), _has_null(has_null), _null_signs(null_signs) {}

bool is_null_at(uint32_t index) const { return this->_has_null && this->_null_signs[index]; }
bool is_null_at(uint64_t index) const { return this->_has_null && this->_null_signs[index]; }

void to_collection_val(CollectionVal* val) const;

uint32_t size() const { return _length; }
uint64_t size() const { return _length; }

uint32_t length() const { return _length; }
uint64_t length() const { return _length; }

void shallow_copy(const CollectionValue* other);

Expand All @@ -96,13 +96,13 @@ class CollectionValue {
/**
* init collection, will alloc (children Type's size + 1) * (children Nums) memory
*/
static Status init_collection(ObjectPool* pool, uint32_t size, PrimitiveType child_type,
static Status init_collection(ObjectPool* pool, uint64_t size, PrimitiveType child_type,
CollectionValue* value);

static Status init_collection(MemPool* pool, uint32_t size, PrimitiveType child_type,
static Status init_collection(MemPool* pool, uint64_t size, PrimitiveType child_type,
CollectionValue* value);

static Status init_collection(FunctionContext* context, uint32_t size, PrimitiveType child_type,
static Status init_collection(FunctionContext* context, uint64_t size, PrimitiveType child_type,
CollectionValue* value);

static CollectionValue from_collection_val(const CollectionVal& val);
Expand All @@ -123,21 +123,21 @@ class CollectionValue {
const bool* null_signs() const { return _null_signs; }
void* mutable_data() { return _data; }
bool* mutable_null_signs() { return _null_signs; }
void set_length(uint32_t length) { _length = length; }
void set_length(uint64_t length) { _length = length; }
void set_has_null(bool has_null) { _has_null = has_null; }
void set_data(void* data) { _data = data; }
void set_null_signs(bool* null_signs) { _null_signs = null_signs; }

private:
using AllocateMemFunc = std::function<uint8_t*(size_t size)>;
static Status init_collection(CollectionValue* value, const AllocateMemFunc& allocate,
uint32_t size, PrimitiveType child_type);
uint64_t size, PrimitiveType child_type);
ArrayIterator internal_iterator(PrimitiveType child_type) const;

private:
// child column data
void* _data;
uint32_t _length;
uint64_t _length;
// item has no null value if has_null is false.
// item ```may``` has null value if has_null is true.
bool _has_null;
Expand All @@ -160,7 +160,7 @@ class ArrayIterator {
}
return false;
}
bool seek(uint32_t n) const {
bool seek(uint64_t n) const {
if (n >= _collection_value->size()) {
return false;
}
Expand Down Expand Up @@ -248,7 +248,7 @@ class ArrayIterator {

private:
CollectionValue* _collection_value;
mutable uint32_t _offset;
mutable uint64_t _offset;
const int _type_size;
const bool _is_type_fixed_width;

Expand Down
4 changes: 2 additions & 2 deletions be/src/runtime/tuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void Tuple::deep_copy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool, bo
// copy collection slot
deep_copy_collection_slots(
dst, desc,
[pool](int size) -> MemFootprint {
[pool](int64_t size) -> MemFootprint {
int64_t offset = pool->total_allocated_bytes();
uint8_t* data = pool->allocate(size);
return {offset, data};
Expand Down Expand Up @@ -186,7 +186,7 @@ void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset,
// copy collection slots
deep_copy_collection_slots(
dst, desc,
[offset, data](int size) -> MemFootprint {
[offset, data](int64_t size) -> MemFootprint {
MemFootprint footprint = {*offset, reinterpret_cast<uint8_t*>(*data)};
*offset += size;
*data += size;
Expand Down
4 changes: 2 additions & 2 deletions be/src/udf/udf.h
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ struct HllVal : public StringVal {

struct CollectionVal : public AnyVal {
void* data;
uint32_t length;
uint64_t length;
// item has no null value if has_null is false.
// item ```may``` has null value if has_null is true.
bool has_null;
Expand All @@ -751,7 +751,7 @@ struct CollectionVal : public AnyVal {

CollectionVal() = default;

CollectionVal(void* data, uint32_t length, bool has_null, bool* null_signs)
CollectionVal(void* data, uint64_t length, bool has_null, bool* null_signs)
: data(data), length(length), has_null(has_null), null_signs(null_signs) {};

static CollectionVal null() {
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/columns/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ class IColumn : public COW<IColumn> {
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
* It is necessary in ARRAY JOIN operation.
*/
using Offset = UInt32;
using Offset = UInt64;
using Offsets = PaddedPODArray<Offset>;
virtual Ptr replicate(const Offsets& offsets) const = 0;

Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/columns/column_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ ColumnPtr ColumnString::permute(const Permutation& perm, size_t limit) const {

StringRef ColumnString::serialize_value_into_arena(size_t n, Arena& arena,
char const*& begin) const {
UInt32 string_size = size_at(n);
IColumn::Offset string_size = size_at(n);
size_t offset = offset_at(n);

StringRef res;
Expand All @@ -178,7 +178,7 @@ StringRef ColumnString::serialize_value_into_arena(size_t n, Arena& arena,
}

const char* ColumnString::deserialize_and_insert_from_arena(const char* pos) {
const UInt32 string_size = unaligned_load<UInt32>(pos);
const IColumn::Offset string_size = unaligned_load<IColumn::Offset>(pos);
pos += sizeof(string_size);

const size_t old_size = chars.size();
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/columns/column_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets& offsets) const {

// vectorized this code to speed up
IColumn::Offset counts[size];
for (size_t i = 0; i < size; ++i) {
for (ssize_t i = 0; i < size; ++i) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

size is size_t, not need change i to sszie_t

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use offsets[i - 1] inside this for loop, so it is better to use ssize_t for index -1.

counts[i] = offsets[i] - offsets[i - 1];
}

Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/data_types/data_type_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ char* DataTypeArray::serialize(const IColumn& column, char* buf) const {
const auto& data_column = assert_cast<const ColumnArray&>(*ptr.get());

// row num
*reinterpret_cast<uint32_t*>(buf) = column.size();
*reinterpret_cast<IColumn::Offset*>(buf) = column.size();
buf += sizeof(IColumn::Offset);
// offsets
memcpy(buf, data_column.get_offsets().data(), column.size() * sizeof(IColumn::Offset));
Expand All @@ -79,7 +79,7 @@ const char* DataTypeArray::deserialize(const char* buf, IColumn* column) const {
auto& offsets = data_column->get_offsets();

// row num
uint32_t row_num = *reinterpret_cast<const IColumn::Offset*>(buf);
IColumn::Offset row_num = *reinterpret_cast<const IColumn::Offset*>(buf);
buf += sizeof(IColumn::Offset);
// offsets
offsets.resize(row_num);
Expand Down
Loading