Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions be/src/olap/column_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,19 @@ void ArrayColumnVectorBatch::prepare_for_read(size_t start_idx, size_t size, boo
DCHECK(start_idx + size <= capacity());
for (size_t i = 0; i < size; ++i) {
if (!is_null_at(start_idx + i)) {
_data[start_idx + i] = CollectionValue(
_elements->mutable_cell_ptr(*(_offsets->scalar_cell_ptr(start_idx + i))),
*(_offsets->scalar_cell_ptr(start_idx + i + 1)) -
*(_offsets->scalar_cell_ptr(start_idx + i)),
item_has_null,
_elements->is_nullable() ? const_cast<bool*>(&_elements->null_signs()[*(
_offsets->scalar_cell_ptr(start_idx + i))])
: nullptr);
auto next_offset = *(_offsets->scalar_cell_ptr(start_idx + i + 1));
auto offset = *(_offsets->scalar_cell_ptr(start_idx + i));
uint32_t length = next_offset - offset;
if (length == 0) {
_data[start_idx + i] = CollectionValue(length);
} else {
_data[start_idx + i] = CollectionValue(
_elements->mutable_cell_ptr(offset),
length,
item_has_null,
_elements->is_nullable() ? const_cast<bool*>(&_elements->null_signs()[offset])
: nullptr);
}
}
}
}
Expand Down
8 changes: 6 additions & 2 deletions be/src/olap/rowset/segment_v2/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ Status ColumnReader::init() {
"Bad file $0: invalid column index type $1", _path_desc.filepath, index_meta.type()));
}
}
if (_ordinal_index_meta == nullptr) {
if (!is_empty() && _ordinal_index_meta == nullptr) {
return Status::Corruption(strings::Substitute(
"Bad file $0: missing ordinal index for column $1", _path_desc.filepath, _meta.column_id()));
}
Expand Down Expand Up @@ -339,6 +339,10 @@ Status ColumnReader::seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterat
}

Status ColumnReader::new_iterator(ColumnIterator** iterator) {
if (is_empty()) {
*iterator = new EmptyFileColumnIterator();
return Status::OK();
}
if (is_scalar_type((FieldType)_meta.type())) {
*iterator = new FileColumnIterator(this);
return Status::OK();
Expand Down Expand Up @@ -427,7 +431,7 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool

// read item
size_t item_size = array_batch->get_item_size(dst->current_offset(), *n);
if (item_size > 0) {
if (item_size >= 0) {
bool item_has_null = false;
ColumnVectorBatch* item_vector_batch = array_batch->elements();

Expand Down
13 changes: 13 additions & 0 deletions be/src/olap/rowset/segment_v2/column_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ class ColumnReader {

PagePointer get_dict_page_pointer() const { return _meta.dict_page(); }

inline bool is_empty() const { return _num_rows == 0; }

private:
ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& meta, uint64_t num_rows,
FilePathDesc path_desc);
Expand Down Expand Up @@ -303,6 +305,17 @@ class FileColumnIterator final : public ColumnIterator {
std::unique_ptr<StringRef[]> _dict_word_info;
};

class EmptyFileColumnIterator final : public ColumnIterator {
public:
Status seek_to_first() override { return Status::OK(); }
Status seek_to_ordinal(ordinal_t ord) override { return Status::OK(); }
Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override {
*n = 0;
return Status::OK();
}
ordinal_t get_current_ordinal() const override { return 0; }
};

class ArrayFileColumnIterator final : public ColumnIterator {
public:
explicit ArrayFileColumnIterator(ColumnReader* reader, FileColumnIterator* length_reader,
Expand Down
10 changes: 6 additions & 4 deletions be/src/olap/rowset/segment_v2/column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,8 +524,8 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) {
if (num_written <
1) { // page is full, write first item offset and update current length page's start ordinal
RETURN_IF_ERROR(_length_writer->finish_current_page());
_current_length_page_first_ordinal += _lengh_sum_in_cur_page;
_lengh_sum_in_cur_page = 0;
_current_length_page_first_ordinal += _length_sum_in_cur_page;
_length_sum_in_cur_page = 0;
} else {
// write child item.
if (_item_writer->is_nullable()) {
Expand All @@ -539,7 +539,7 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) {
RETURN_IF_ERROR(_item_writer->append_data(reinterpret_cast<const uint8_t**>(&data),
col_cursor->length()));
}
_lengh_sum_in_cur_page += col_cursor->length();
_length_sum_in_cur_page += col_cursor->length();
}
remaining -= num_written;
col_cursor += num_written;
Expand Down Expand Up @@ -579,7 +579,9 @@ Status ArrayColumnWriter::write_ordinal_index() {
if (is_nullable()) {
RETURN_IF_ERROR(_null_writer->write_ordinal_index());
}
RETURN_IF_ERROR(_item_writer->write_ordinal_index());
if (!has_empty_items()) {
RETURN_IF_ERROR(_item_writer->write_ordinal_index());
}
return Status::OK();
}

Expand Down
5 changes: 4 additions & 1 deletion be/src/olap/rowset/segment_v2/column_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,14 +303,17 @@ class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback {
private:
Status put_extra_info_in_page(DataPageFooterPB* header) override;
inline Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记
inline bool has_empty_items() const {
return _item_writer->get_next_rowid() == 0;
}

private:
std::unique_ptr<ScalarColumnWriter> _length_writer;
std::unique_ptr<ScalarColumnWriter> _null_writer;
std::unique_ptr<ColumnWriter> _item_writer;
ColumnWriterOptions _opts;
ordinal_t _current_length_page_first_ordinal = 0;
ordinal_t _lengh_sum_in_cur_page = 0;
ordinal_t _length_sum_in_cur_page = 0;
};

} // namespace segment_v2
Expand Down
16 changes: 14 additions & 2 deletions be/src/runtime/collection_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
namespace doris {
int sizeof_type(PrimitiveType type) {
switch (type) {
case TYPE_TINYINT:
return sizeof(int8_t);
case TYPE_SMALLINT:
return sizeof(int16_t);
case TYPE_INT:
return sizeof(int32_t);
case TYPE_CHAR:
Expand All @@ -40,6 +44,8 @@ int sizeof_type(PrimitiveType type) {

Status type_check(PrimitiveType type) {
switch (type) {
case TYPE_TINYINT:
case TYPE_SMALLINT:
case TYPE_INT:
case TYPE_CHAR:
case TYPE_VARCHAR:
Expand Down Expand Up @@ -140,7 +146,7 @@ Status CollectionValue::init_collection(FunctionContext* context, uint32_t size,
}

CollectionValue CollectionValue::from_collection_val(const CollectionVal& val) {
return CollectionValue(val.data, val.length, val.null_signs);
return CollectionValue(val.data, val.length, val.has_null, val.null_signs);
}

Status CollectionValue::set(uint32_t i, PrimitiveType type, const AnyVal* value) {
Expand All @@ -160,6 +166,12 @@ Status CollectionValue::set(uint32_t i, PrimitiveType type, const AnyVal* value)
}

switch (type) {
case TYPE_TINYINT:
*reinterpret_cast<int8_t*>(iter.value()) = reinterpret_cast<const TinyIntVal*>(value)->val;
break;
case TYPE_SMALLINT:
*reinterpret_cast<int16_t*>(iter.value()) = reinterpret_cast<const SmallIntVal*>(value)->val;
break;
case TYPE_INT:
*reinterpret_cast<int32_t*>(iter.value()) = reinterpret_cast<const IntVal*>(value)->val;
break;
Expand Down Expand Up @@ -214,7 +226,7 @@ void ArrayIterator::value(AnyVal* dest) {
break;

case TYPE_SMALLINT:
reinterpret_cast<TinyIntVal*>(dest)->val = *reinterpret_cast<const int16_t*>(value());
reinterpret_cast<SmallIntVal*>(dest)->val = *reinterpret_cast<const int16_t*>(value());
break;

case TYPE_INT:
Expand Down
6 changes: 5 additions & 1 deletion be/src/runtime/mysql_result_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,11 @@ int MysqlResultWriter::_add_row_value(int index, const TypeDescriptor& type, voi
buf_ret = _add_row_value(index, children_type, iter.value());
buf_ret = _row_buffer->push_string("'", 1);
} else {
buf_ret = _add_row_value(index, children_type, iter.value());
if (!iter.value()) {
buf_ret = _row_buffer->push_string("NULL", 4);
} else {
buf_ret = _add_row_value(index, children_type, iter.value());
}
}

iter.next();
Expand Down
1 change: 1 addition & 0 deletions be/src/runtime/raw_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ void RawValue::write(const void* value, void* dst, const TypeDescriptor& type, M
ArrayIterator src_iter = src->iterator(children_type);
ArrayIterator val_iter = val->iterator(children_type);

val->set_has_null(src->has_null());
val->copy_null_signs(src);

while (src_iter.has_next() && val_iter.has_next()) {
Expand Down
10 changes: 7 additions & 3 deletions be/src/runtime/row_batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,10 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch)
// assgin data and null_sign pointer position in tuple_data
int data_offset = convert_to<int>(array_val->data());
array_val->set_data(tuple_data + data_offset);
int null_offset = convert_to<int>(array_val->null_signs());
array_val->set_null_signs(convert_to<bool*>(tuple_data + null_offset));
if (array_val->has_null()) {
int null_offset = convert_to<int>(array_val->null_signs());
array_val->set_null_signs(convert_to<bool*>(tuple_data + null_offset));
}

const TypeDescriptor& item_type = slot_collection->type().children.at(0);
if (!item_type.is_string_type()) {
Expand Down Expand Up @@ -590,7 +592,9 @@ size_t RowBatch::total_byte_size() const {
// compute data null_signs size
CollectionValue* array_val =
tuple->get_collection_slot(slot_collection->tuple_offset());
result += array_val->length() * sizeof(bool);
if (array_val->has_null()) {
result += array_val->length() * sizeof(bool);
}

const TypeDescriptor& item_type = slot_collection->type().children.at(0);
result += array_val->length() * item_type.get_slot_size();
Expand Down
29 changes: 13 additions & 16 deletions be/src/runtime/tuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,25 +99,20 @@ void Tuple::deep_copy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool, bo
const TypeDescriptor& item_type = slot_desc->type().children.at(0);

int coll_byte_size = cv->length() * item_type.get_slot_size();
int nulls_size = cv->length() * sizeof(bool);
int nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0;

int64_t offset = pool->total_allocated_bytes();
char* coll_data = (char*)(pool->allocate(coll_byte_size + nulls_size));

// copy data and null_signs
if (nulls_size > 0) {
cv->set_has_null(true);
cv->set_null_signs(convert_to<bool*>(coll_data) + coll_byte_size);
memory_copy(coll_data, cv->null_signs(), nulls_size);
} else {
cv->set_has_null(false);
}
memory_copy(convert_to<bool*>(coll_data), cv->null_signs(), nulls_size);
memory_copy(coll_data + nulls_size, cv->data(), coll_byte_size);

// assgin new null_sign and data location
cv->set_null_signs(convert_ptrs ? convert_to<bool*>(offset) : convert_to<bool*>(coll_data));
cv->set_data(convert_ptrs ? convert_to<char*>(offset + nulls_size)
: coll_data + nulls_size);
if (cv->has_null()) {
cv->set_null_signs(convert_ptrs ? convert_to<bool*>(offset) : convert_to<bool*>(coll_data));
}
cv->set_data(convert_ptrs ? convert_to<char*>(offset + nulls_size) : coll_data + nulls_size);

if (!item_type.is_string_type()) {
continue;
Expand Down Expand Up @@ -212,16 +207,17 @@ void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset,
const TypeDescriptor& item_type = slot_desc->type().children.at(0);

int coll_byte_size = cv->length() * item_type.get_slot_size();
int nulls_size = cv->length() * sizeof(bool);
int nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0;

// copy null_sign
memory_copy(*data, cv->null_signs(), nulls_size);
// copy data
memory_copy(*data + nulls_size, cv->data(), coll_byte_size);

if (!item_type.is_string_type()) {
cv->set_null_signs(convert_ptrs ? convert_to<bool*>(*offset)
: convert_to<bool*>(*data));
if (cv->has_null()) {
cv->set_null_signs(convert_ptrs ? convert_to<bool*>(*offset) : convert_to<bool*>(*data));
}
cv->set_data(convert_ptrs ? convert_to<char*>(*offset + nulls_size)
: *data + nulls_size);
*data += coll_byte_size + nulls_size;
Expand Down Expand Up @@ -250,8 +246,9 @@ void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset,
}
}
// assgin new null_sign and data location
cv->set_null_signs(convert_ptrs ? convert_to<bool*>(base_offset)
: convert_to<bool*>(base_data));
if (cv->has_null()) {
cv->set_null_signs(convert_ptrs ? convert_to<bool*>(base_offset) : convert_to<bool*>(base_data));
}
cv->set_data(convert_ptrs ? convert_to<char*>(base_offset + nulls_size)
: base_data + nulls_size);
}
Expand Down
24 changes: 24 additions & 0 deletions be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,30 @@ TEST_F(ColumnReaderWriterTest, test_v_default_value) {
test_v_read_default_value<OLAP_FIELD_TYPE_DECIMAL>(v_decimal, &decimal);
}

TEST_F(ColumnReaderWriterTest, test_single_empty_array) {
size_t num_array = 1;
std::unique_ptr<uint8_t[]> array_is_null(new uint8_t[BitmapSize(num_array)]());
CollectionValue array(0);
test_array_nullable_data<OLAP_FIELD_TYPE_TINYINT, BIT_SHUFFLE, BIT_SHUFFLE>(
&array, array_is_null.get(), num_array, "test_single_empty_array");
}

TEST_F(ColumnReaderWriterTest, test_mixed_empty_arrays) {
size_t num_array = 3;
std::unique_ptr<uint8_t[]> array_is_null(new uint8_t[BitmapSize(num_array)]());
std::unique_ptr<CollectionValue[]> collection_values(new CollectionValue[num_array]);
int data[] = {1, 2, 3};
for (int i = 0; i < num_array; ++ i) {
if (i % 2 == 1) {
new (&collection_values[i]) CollectionValue(0);
} else {
new (&collection_values[i]) CollectionValue(&data, 3, false, nullptr);
}
}
test_array_nullable_data<OLAP_FIELD_TYPE_INT, BIT_SHUFFLE, BIT_SHUFFLE>(
collection_values.get(), array_is_null.get(), num_array, "test_mixed_empty_arrays");
}

} // namespace segment_v2
} // namespace doris

Expand Down
14 changes: 14 additions & 0 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ nonterminal CaseExpr case_expr;
nonterminal ArrayList<CaseWhenClause> case_when_clause_list;
nonterminal FunctionParams function_params;
nonterminal Expr function_call_expr, array_expr;
nonterminal ArrayLiteral array_literal;
nonterminal StructField struct_field;
nonterminal ArrayList<StructField> struct_field_list;
nonterminal AnalyticWindow opt_window_clause;
Expand Down Expand Up @@ -4752,6 +4753,17 @@ function_call_expr ::=
:}
;

array_literal ::=
LBRACKET RBRACKET
{:
RESULT = new ArrayLiteral();
:}
| LBRACKET expr_list:list RBRACKET
{:
RESULT = new ArrayLiteral(list.toArray(new LiteralExpr[0]));
:}
;

array_expr ::=
KW_ARRAY LPAREN function_params:params RPAREN
{:
Expand Down Expand Up @@ -4800,6 +4812,8 @@ non_pred_expr ::=
{: RESULT = l; :}
| array_expr:a
{: RESULT = a; :}
| array_literal:a
{: RESULT = a; :}
| function_call_expr:e
{: RESULT = e; :}
| KW_DATE STRING_LITERAL:l
Expand Down
Loading