Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& timezone,
Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json(
IColumn& column, Slice& slice, int rows, int* num_deserialized,
const FormatOptions& options) const {
if (rows < 1) [[unlikely]] {
return Status::OK();
}
Status st = deserialize_one_cell_from_json(column, slice, options);
if (!st.ok()) {
return st;
Expand All @@ -264,6 +267,9 @@ Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json(

void DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(IColumn& column,
int times) const {
if (times < 1) [[unlikely]] {
return;
}
auto& col = static_cast<ColumnVector<UInt64>&>(column);
auto sz = col.size();
UInt64 val = col.get_element(sz - 1);
Expand Down
6 changes: 6 additions & 0 deletions be/src/vec/data_types/serde/data_type_datev2_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ Status DataTypeDateV2SerDe::write_column_to_orc(const std::string& timezone, con
Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column, Slice& slice,
int rows, int* num_deserialized,
const FormatOptions& options) const {
if (rows < 1) [[unlikely]] {
return Status::OK();
}
Status st = deserialize_one_cell_from_json(column, slice, options);
if (!st.ok()) {
return st;
Expand All @@ -189,6 +192,9 @@ Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column,

void DataTypeDateV2SerDe::insert_column_last_value_multiple_times(IColumn& column,
int times) const {
if (times < 1) [[unlikely]] {
return;
}
auto& col = static_cast<ColumnVector<UInt32>&>(column);
auto sz = col.size();
UInt32 val = col.get_element(sz - 1);
Expand Down
6 changes: 6 additions & 0 deletions be/src/vec/data_types/serde/data_type_decimal_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@ template <typename T>
Status DataTypeDecimalSerDe<T>::deserialize_column_from_fixed_json(
IColumn& column, Slice& slice, int rows, int* num_deserialized,
const FormatOptions& options) const {
if (rows < 1) [[unlikely]] {
return Status::OK();
}
Status st = deserialize_one_cell_from_json(column, slice, options);
if (!st.ok()) {
return st;
Expand All @@ -293,6 +296,9 @@ Status DataTypeDecimalSerDe<T>::deserialize_column_from_fixed_json(
template <typename T>
void DataTypeDecimalSerDe<T>::insert_column_last_value_multiple_times(IColumn& column,
int times) const {
if (times < 1) [[unlikely]] {
return;
}
auto& col = static_cast<ColumnDecimal<T>&>(column);
auto sz = col.size();

Expand Down
5 changes: 4 additions & 1 deletion be/src/vec/data_types/serde/data_type_nullable_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,15 @@ Status DataTypeNullableSerDe::deserialize_column_from_hive_text_vector(
Status DataTypeNullableSerDe::deserialize_column_from_fixed_json(
IColumn& column, Slice& slice, int rows, int* num_deserialized,
const FormatOptions& options) const {
if (rows < 1) [[unlikely]] {
return Status::OK();
}
auto& col = static_cast<ColumnNullable&>(column);
Status st = deserialize_one_cell_from_json(column, slice, options);
if (!st.ok()) {
return st;
}
if (rows - 1 != 0) {
if (rows > 1) {
auto& null_map = col.get_null_map_data();
auto& nested_column = col.get_nested_column();

Expand Down
6 changes: 6 additions & 0 deletions be/src/vec/data_types/serde/data_type_number_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ template <typename T>
Status DataTypeNumberSerDe<T>::deserialize_column_from_fixed_json(
IColumn& column, Slice& slice, int rows, int* num_deserialized,
const FormatOptions& options) const {
if (rows < 1) [[unlikely]] {
return Status::OK();
}
Status st = deserialize_one_cell_from_json(column, slice, options);
if (!st.ok()) {
return st;
Expand All @@ -241,6 +244,9 @@ Status DataTypeNumberSerDe<T>::deserialize_column_from_fixed_json(
template <typename T>
void DataTypeNumberSerDe<T>::insert_column_last_value_multiple_times(IColumn& column,
int times) const {
if (times < 1) [[unlikely]] {
return;
}
auto& col = static_cast<ColumnVector<T>&>(column);
auto sz = col.size();
T val = col.get_element(sz - 1);
Expand Down
11 changes: 10 additions & 1 deletion be/src/vec/data_types/serde/data_type_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,17 +243,26 @@ class DataTypeSerDe {
virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows,
int* num_deserialized,
const FormatOptions& options) const {
//In this function implementation, we need to consider the case where rows is 0, 1, and other larger integers.
if (rows < 1) [[unlikely]] {
return Status::OK();
}
Status st = deserialize_one_cell_from_json(column, slice, options);
if (!st.ok()) {
*num_deserialized = 0;
return st;
}
insert_column_last_value_multiple_times(column, rows - 1);
if (rows > 1) [[likely]] {
insert_column_last_value_multiple_times(column, rows - 1);
}
*num_deserialized = rows;
return Status::OK();
}
// Insert the last value to the end of this column multiple times.
virtual void insert_column_last_value_multiple_times(IColumn& column, int times) const {
if (times < 1) [[unlikely]] {
return;
}
//If you try to simplify this operation by using `column.insert_many_from(column, column.size() - 1, rows - 1);`
// you are likely to get incorrect data results.
MutableColumnPtr dum_col = column.clone_empty();
Expand Down
6 changes: 6 additions & 0 deletions be/src/vec/data_types/serde/data_type_string_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,9 @@ class DataTypeStringSerDeBase : public DataTypeSerDe {
Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows,
int* num_deserialized,
const FormatOptions& options) const override {
if (rows < 1) [[unlikely]] {
return Status::OK();
}
Status st = deserialize_one_cell_from_json(column, slice, options);
if (!st.ok()) {
return st;
Expand All @@ -229,6 +232,9 @@ class DataTypeStringSerDeBase : public DataTypeSerDe {
}

void insert_column_last_value_multiple_times(IColumn& column, int times) const override {
if (times < 1) [[unlikely]] {
return;
}
auto& col = static_cast<ColumnString&>(column);
auto sz = col.size();

Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ Status RowGroupReader::_fill_partition_columns(
auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
Slice slice(value.data(), value.size());
int num_deserialized = 0;
// Be careful when reading empty rows from parquet row groups.
if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows,
&num_deserialized,
_text_formatOptions) != Status::OK()) {
Expand Down