diff --git a/be/src/util/slice.h b/be/src/util/slice.h index 57865b50e3e65d..84aec06c400aba 100644 --- a/be/src/util/slice.h +++ b/be/src/util/slice.h @@ -119,6 +119,54 @@ struct Slice { size -= n; } + /// Drop the last "n" bytes from this slice. + /// + /// @pre n <= size + /// + /// @note Only the base and bounds of the slice are changed; + /// the data is not modified. + /// + /// @param [in] n + /// Number of bytes that should be dropped from the last. + void remove_suffix(size_t n) { + assert(n <= size); + size -= n; + } + + /// Remove leading spaces. + /// + /// @pre n <= size + /// + /// @note Only the base and bounds of the slice are changed; + /// the data is not modified. + /// + /// @param [in] n + /// Number of bytes of space that should be dropped from the beginning. + void trim_prefix() { + int32_t begin = 0; + while (begin < size && data[begin] == ' ') { + data += 1; + size -= 1; + } + } + + /// Remove quote char '"' or ''' which should exist as first and last char. + /// + /// @pre n <= size + /// + /// @note Only the base and bounds of the slice are changed; + /// the data is not modified. + /// + /// @param [in] n + /// Number of bytes of space that should be dropped from the beginning. + void trim_quote() { + int32_t begin = 0; + if (size > 2 && ((data[begin] == '"' && data[size - 1] == '"') || + (data[begin] == '\'' && data[size - 1] == '\''))) { + data += 1; + size -= 2; + } + } /// Truncate the slice to the given number of bytes. /// /// @pre n <= size diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 93cc45414df648..9397ab6a5b822e 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -32,6 +32,116 @@ namespace doris { namespace vectorized { class Arena; +void DataTypeArraySerDe::serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +void DataTypeArraySerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + auto& data_column = assert_cast(*ptr); + auto& offsets = data_column.get_offsets(); + size_t offset = offsets[row_num - 1]; + size_t next_offset = offsets[row_num]; + + const IColumn& nested_column = data_column.get_data(); + // bool is_nested_string = remove_nullable(nested_column.get_ptr())->is_column_string(); + + bw.write("[", 1); + // nested column field delim should be replaced as collection delim because this field is in array. + // add ' ' to keep same with origin format with array + options.field_delim = options.collection_delim; + options.field_delim += " "; + nested_serde->serialize_column_to_text(nested_column, offset, next_offset, bw, options); + bw.write("]", 1); +} + +Status DataTypeArraySerDe::deserialize_column_from_text_vector(IColumn& column, + std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const { + DCHECK(!slices.empty()); + int end = num_deserialized && *num_deserialized > 0 ? *num_deserialized : slices.size(); + + for (int i = 0; i < end; ++i) { + if (Status st = deserialize_one_cell_from_text(column, slices[i], options); + st != Status::OK()) { + *num_deserialized = i + 1; + return st; + } + } + return Status::OK(); +} + +Status DataTypeArraySerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + DCHECK(!slice.empty()); + auto& array_column = assert_cast(column); + auto& offsets = array_column.get_offsets(); + IColumn& nested_column = array_column.get_data(); + DCHECK(nested_column.is_nullable()); + if (slice[0] != '[') { + return Status::InvalidArgument("Array does not start with '[' character, found '{}'", + slice[0]); + } + if (slice[slice.size - 1] != ']') { + return Status::InvalidArgument("Array does not end with ']' character, found '{}'", + slice[slice.size - 1]); + } + // empty array [] + if (slice.size == 2) { + offsets.push_back(offsets.back()); + return Status::OK(); + } + slice.remove_prefix(1); + slice.remove_suffix(1); + + // deserialize array column from text we have to know how to split from text and support nested + // complex type. + // 1. get item according to collection_delimiter, but if meet collection_delimiter in string, we should ignore it. + // 2. keep a nested level to support nested complex type. + int nested_level = 0; + bool has_quote = false; + std::vector slices; + slice.trim_prefix(); + slices.emplace_back(slice); + size_t slice_size = slice.size; + // pre add total slice can reduce lasted element check. + for (int idx = 0; idx < slice_size; ++idx) { + char c = slice[idx]; + if (c == '"' || c == '\'') { + has_quote = !has_quote; + } else if (!has_quote && (c == '[' || c == '{')) { + ++nested_level; + } else if (!has_quote && (c == ']' || c == '}')) { + --nested_level; + } else if (!has_quote && nested_level == 0 && c == options.collection_delim) { + // if meet collection_delimiter and not in quote, we can make it as an item. + slices.back().remove_suffix(slice_size - idx); + // add next total slice.(slice data will not change, so we can use slice directly) + // skip delimiter + Slice next(slice.data + idx + 1, slice_size - idx - 1); + next.trim_prefix(); + if (options.converted_from_string) slices.back().trim_quote(); + slices.emplace_back(next); + } + } + + if (options.converted_from_string) slices.back().trim_quote(); + + int elem_deserialized = 0; + Status st = nested_serde->deserialize_column_from_text_vector(nested_column, slices, + &elem_deserialized, options); + offsets.emplace_back(offsets.back() + elem_deserialized); + return st; +} + void DataTypeArraySerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int row_num) const { diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index 28a90dc114b768..222564de0ecbbf 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -39,22 +39,25 @@ class DataTypeArraySerDe : public DataTypeSerDe { DataTypeArraySerDe(const DataTypeSerDeSPtr& _nested_serde) : nested_serde(_nested_serde) {} void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override { - LOG(FATAL) << "Not support serialize array column to buffer"; - } + FormatOptions& options) const override; - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, - const FormatOptions& options) const override { - LOG(FATAL) << "Not support deserialize from buffer to array"; - return Status::NotSupported("Not support deserialize from buffer to array"); - } + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override { - LOG(FATAL) << "Not support write array column to pb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status read_column_from_pb(IColumn& column, const PValues& arg) const override { - LOG(FATAL) << "Not support read from pb to array"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.h b/be/src/vec/data_types/serde/data_type_bitmap_serde.h index 9f1d8f8a71ed53..3a36aad612e243 100644 --- a/be/src/vec/data_types/serde/data_type_bitmap_serde.h +++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.h @@ -34,14 +34,26 @@ class Arena; class DataTypeBitMapSerDe : public DataTypeSerDe { public: void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override { - LOG(FATAL) << "Not support serialize bitmap column to buffer"; + FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override { - LOG(FATAL) << "Not support deserialize from buffer to bitmap"; - return Status::NotSupported("Not support deserialize from buffer to bitmap"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_pb(const IColumn& column, PValues& result, int start, @@ -55,11 +67,13 @@ class DataTypeBitMapSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { - LOG(FATAL) << "Not support write bitmap column to arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "Not support read bitmap column from arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.cpp b/be/src/vec/data_types/serde/data_type_date64_serde.cpp index e360a86172d15a..6afe531eab9133 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_date64_serde.cpp @@ -28,9 +28,15 @@ namespace doris { namespace vectorized { +void DataTypeDate64SerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT(); +} + void DataTypeDate64SerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const { + FormatOptions& options) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -54,23 +60,29 @@ void DataTypeDate64SerDe::serialize_one_cell_to_text(const IColumn& column, int char* pos = value.to_string(buf); bw.write(buf, pos - buf - 1); } - bw.commit(); } -Status DataTypeDate64SerDe::deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, +Status DataTypeDate64SerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeDate64SerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& column_data = assert_cast(column); Int64 val = 0; if (options.date_olap_format) { tm time_tm; - char* res = strptime(rb.position(), "%Y-%m-%d", &time_tm); + char* res = strptime(slice.data, "%Y-%m-%d", &time_tm); if (nullptr != res) { val = (time_tm.tm_year + 1900) * 16 * 32 + (time_tm.tm_mon + 1) * 32 + time_tm.tm_mday; } else { // 1400 - 01 - 01 val = 716833; } - } else if (!read_date_text_impl(val, rb)) { + } else if (ReadBuffer rb(slice.data, slice.size); !read_date_text_impl(val, rb)) { return Status::InvalidArgument("parse date fail, string: '{}'", std::string(rb.position(), rb.count()).c_str()); } @@ -78,9 +90,15 @@ Status DataTypeDate64SerDe::deserialize_one_cell_from_text(IColumn& column, Read return Status::OK(); } +void DataTypeDateTimeSerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + void DataTypeDateTimeSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const { + FormatOptions& options) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -109,16 +127,22 @@ void DataTypeDateTimeSerDe::serialize_one_cell_to_text(const IColumn& column, in char* pos = value.to_string(buf); bw.write(buf, pos - buf - 1); } - bw.commit(); } -Status DataTypeDateTimeSerDe::deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, +Status DataTypeDateTimeSerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeDateTimeSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& column_data = assert_cast(column); Int64 val = 0; if (options.date_olap_format) { tm time_tm; - char* res = strptime(rb.position(), "%Y-%m-%d %H:%M:%S", &time_tm); + char* res = strptime(slice.data, "%Y-%m-%d %H:%M:%S", &time_tm); if (nullptr != res) { val = ((time_tm.tm_year + 1900) * 10000L + (time_tm.tm_mon + 1) * 100L + time_tm.tm_mday) * @@ -128,7 +152,7 @@ Status DataTypeDateTimeSerDe::deserialize_one_cell_from_text(IColumn& column, Re // 1400 - 01 - 01 val = 14000101000000L; } - } else if (!read_datetime_text_impl(val, rb)) { + } else if (ReadBuffer rb(slice.data, slice.size); !read_datetime_text_impl(val, rb)) { return Status::InvalidArgument("parse datetime fail, string: '{}'", std::string(rb.position(), rb.count()).c_str()); } diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.h b/be/src/vec/data_types/serde/data_type_date64_serde.h index a52cac5d1594a7..3780004a1898e0 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.h +++ b/be/src/vec/data_types/serde/data_type_date64_serde.h @@ -43,10 +43,16 @@ class Arena; class DataTypeDate64SerDe : public DataTypeNumberSerDe { void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override; - - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; @@ -64,11 +70,17 @@ class DataTypeDate64SerDe : public DataTypeNumberSerDe { }; class DataTypeDateTimeSerDe : public DataTypeDate64SerDe { + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override; + FormatOptions& options) const override; - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; }; } // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index b6ddc5744d5523..368932217e3998 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -28,9 +28,15 @@ namespace doris { namespace vectorized { +void DataTypeDateTimeV2SerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + void DataTypeDateTimeV2SerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const { + FormatOptions& options) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -50,10 +56,15 @@ void DataTypeDateTimeV2SerDe::serialize_one_cell_to_text(const IColumn& column, char* pos = val.to_string(buf); bw.write(buf, pos - buf - 1); } - bw.commit(); } -Status DataTypeDateTimeV2SerDe::deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, +Status DataTypeDateTimeV2SerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} +Status DataTypeDateTimeV2SerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& column_data = assert_cast(column); UInt64 val = 0; @@ -61,13 +72,14 @@ Status DataTypeDateTimeV2SerDe::deserialize_one_cell_from_text(IColumn& column, doris::vectorized::DateV2Value datetimev2_value; std::string date_format = "%Y-%m-%d %H:%i:%s.%f"; if (datetimev2_value.from_date_format_str(date_format.data(), date_format.size(), - rb.position(), rb.count())) { + slice.data, slice.size)) { val = datetimev2_value.to_date_int_val(); } else { val = doris::vectorized::MIN_DATETIME_V2; } - } else if (!read_datetime_v2_text_impl(val, rb)) { + } else if (ReadBuffer rb(slice.data, slice.size); + !read_datetime_v2_text_impl(val, rb)) { return Status::InvalidArgument("parse date fail, string: '{}'", std::string(rb.position(), rb.count()).c_str()); } diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index b0309198d01d30..645ba928ec7ca1 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -46,17 +46,25 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe { DataTypeDateTimeV2SerDe(int scale) : scale(scale) {}; void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override; + FormatOptions& options) const override; - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "not support read arrow array to uint64 column"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp index 6d9d4f341787d5..de0416718667c1 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -28,9 +28,15 @@ namespace doris { namespace vectorized { +void DataTypeDateV2SerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + void DataTypeDateV2SerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const { + FormatOptions& options) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -42,22 +48,28 @@ void DataTypeDateV2SerDe::serialize_one_cell_to_text(const IColumn& column, int char* pos = val.to_string(buf); // DateTime to_string the end is /0 bw.write(buf, pos - buf - 1); - bw.commit(); } -Status DataTypeDateV2SerDe::deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, +Status DataTypeDateV2SerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeDateV2SerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& column_data = assert_cast(column); UInt32 val = 0; if (options.date_olap_format) { tm time_tm; - char* res = strptime(rb.position(), "%Y-%m-%d", &time_tm); + char* res = strptime(slice.data, "%Y-%m-%d", &time_tm); if (nullptr != res) { val = ((time_tm.tm_year + 1900) << 9) | ((time_tm.tm_mon + 1) << 5) | time_tm.tm_mday; } else { val = doris::vectorized::MIN_DATE_V2; } - } else if (!read_date_v2_text_impl(val, rb)) { + } else if (ReadBuffer rb(slice.data, slice.size); !read_date_v2_text_impl(val, rb)) { return Status::InvalidArgument("parse date fail, string: '{}'", std::string(rb.position(), rb.count()).c_str()); } diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index 689ed08092b514..2e0467a37007c6 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -43,11 +43,17 @@ class Arena; class DataTypeDateV2SerDe : public DataTypeNumberSerDe { void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override; + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index 9cf891c62e20a4..e356b5dc0ab0a1 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -32,10 +32,17 @@ namespace doris { namespace vectorized { +template +void DataTypeDecimalSerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + template void DataTypeDecimalSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const { + FormatOptions& options) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -49,14 +56,23 @@ void DataTypeDecimalSerDe::serialize_one_cell_to_text(const IColumn& column, auto length = col.get_element(row_num).to_string(buf, scale, scale_multiplier); bw.write(buf, length); } - bw.commit(); } + +template +Status DataTypeDecimalSerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + template -Status DataTypeDecimalSerDe::deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, +Status DataTypeDecimalSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& column_data = assert_cast&>(column).get_data(); T val = 0; - if (!read_decimal_text_impl(val, rb, precision, scale)) { + if (ReadBuffer rb(slice.data, slice.size); + !read_decimal_text_impl(val, rb, precision, scale)) { return Status::InvalidArgument("parse decimal fail, string: '{}', primitive type: '{}'", std::string(rb.position(), rb.count()).c_str(), get_primitive_type()); diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 4c2b73e3ab403f..9836bb2f3e6c55 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -69,11 +69,18 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { scale_multiplier(decimal_scale_multiplier(scale)) {} void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override; + FormatOptions& options) const override; - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h index ca1795181e1b07..aa357aeb806984 100644 --- a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h +++ b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h @@ -37,48 +37,70 @@ class Arena; class DataTypeFixedLengthObjectSerDe : public DataTypeSerDe { public: void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override { - LOG(FATAL) << "Not support serialize FixedLengthObject column to buffer"; + FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override { - LOG(FATAL) << "Not support deserialize FixedLengthObject column from buffer"; - return Status::NotSupported("Not support deserialize FixedLengthObject column from buffer"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override { - return Status::NotSupported("Not support write FixedLengthObject column to pb"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status read_column_from_pb(IColumn& column, const PValues& arg) const override { - return Status::NotSupported("Not support read from pb to FixedLengthObject"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); }; void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int row_num) const override { - LOG(FATAL) << "Not support write FixedLengthObject column to jsonb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override { - LOG(FATAL) << "Not support read from jsonb to FixedLengthObject"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { - LOG(FATAL) << "Not support write FixedLengthObject column to arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "Not support read FixedLengthObject column from arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override { - return Status::NotSupported("Not support write object column to mysql"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override { - return Status::NotSupported("Not support write object column to mysql"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } }; } // namespace vectorized diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.cpp b/be/src/vec/data_types/serde/data_type_hll_serde.cpp index 52b59cb3c03d41..8066196f7466ab 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_hll_serde.cpp @@ -37,9 +37,14 @@ namespace doris { namespace vectorized { class IColumn; +void DataTypeHLLSerDe::serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + void DataTypeHLLSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const { + FormatOptions& options) const { auto col_row = check_column_const_set_readability(column, row_num); ColumnPtr ptr = col_row.first; row_num = col_row.second; @@ -47,14 +52,21 @@ void DataTypeHLLSerDe::serialize_one_cell_to_text(const IColumn& column, int row std::unique_ptr buf = std::make_unique(data.max_serialized_size()); size_t size = data.serialize((uint8*)buf.get()); bw.write(buf.get(), size); - bw.commit(); } -Status DataTypeHLLSerDe::deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, +Status DataTypeHLLSerDe::deserialize_column_from_text_vector(IColumn& column, + std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeHLLSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& data_column = assert_cast(column); - HyperLogLog hyper_log_log(Slice(rb.to_string())); + HyperLogLog hyper_log_log(slice); data_column.insert_value(hyper_log_log); return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.h b/be/src/vec/data_types/serde/data_type_hll_serde.h index 65f56cd3d0eb97..46f90fc20c0fa0 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.h +++ b/be/src/vec/data_types/serde/data_type_hll_serde.h @@ -34,9 +34,14 @@ class Arena; class DataTypeHLLSerDe : public DataTypeSerDe { public: void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override; - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; @@ -50,7 +55,8 @@ class DataTypeHLLSerDe : public DataTypeSerDe { int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "Not support read hll column from arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index c85c5c4abe5ea0..9b78d519828438 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -55,9 +55,15 @@ Status DataTypeJsonbSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } +void DataTypeJsonbSerDe::serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + void DataTypeJsonbSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const { + FormatOptions& options) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -65,14 +71,21 @@ void DataTypeJsonbSerDe::serialize_one_cell_to_text(const IColumn& column, int r const StringRef& s = assert_cast(*ptr).get_data_at(row_num); if (s.size > 0) { bw.write(s.data, s.size); - bw.commit(); } } -Status DataTypeJsonbSerDe::deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, +Status DataTypeJsonbSerDe::deserialize_column_from_text_vector(IColumn& column, + std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeJsonbSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const { JsonBinaryValue value; - RETURN_IF_ERROR(value.from_json_string(rb.position(), rb.count())); + RETURN_IF_ERROR(value.from_json_string(slice.data, slice.size)); auto& column_string = assert_cast(column); column_string.insert_data(value.value(), value.size()); diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index 1d612eeb69a0ef..3acb4ea0e1f5f5 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -43,11 +43,17 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe { int end) const override; void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override; + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index 5b7bb09147f5ae..afa64d954b1d8b 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -29,6 +29,164 @@ namespace doris { namespace vectorized { class Arena; +void DataTypeMapSerDe::serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} +void DataTypeMapSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + const ColumnMap& map_column = assert_cast(*ptr); + const ColumnArray::Offsets64& offsets = map_column.get_offsets(); + + size_t offset = offsets[row_num - 1]; + size_t next_offset = offsets[row_num]; + + const IColumn& nested_keys_column = map_column.get_keys(); + const IColumn& nested_values_column = map_column.get_values(); + bw.write("{", 1); + for (size_t i = offset; i < next_offset; ++i) { + if (i != offset) { + bw.write(&options.collection_delim, 1); + bw.write(" ", 1); + } + key_serde->serialize_one_cell_to_text(nested_keys_column, i, bw, options); + bw.write(&options.map_key_delim, 1); + value_serde->serialize_one_cell_to_text(nested_values_column, i, bw, options); + } + bw.write("}", 1); +} + +Status DataTypeMapSerDe::deserialize_column_from_text_vector(IColumn& column, + std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} +Status DataTypeMapSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + DCHECK(!slice.empty()); + auto& array_column = assert_cast(column); + auto& offsets = array_column.get_offsets(); + IColumn& nested_key_column = array_column.get_keys(); + IColumn& nested_val_column = array_column.get_values(); + DCHECK(nested_key_column.is_nullable()); + DCHECK(nested_val_column.is_nullable()); + if (slice[0] != '{') { + std::stringstream ss; + ss << slice[0] << '\''; + return Status::InvalidArgument("Map does not start with '{' character, found '" + ss.str()); + } + if (slice[slice.size - 1] != '}') { + std::stringstream ss; + ss << slice[slice.size - 1] << '\''; + return Status::InvalidArgument("Map does not end with '}' character, found '" + ss.str()); + } + // empty map + if (slice.size == 2) { + offsets.push_back(offsets.back()); + return Status::OK(); + } + + // remove '{' '}' + slice.remove_prefix(1); + slice.remove_suffix(1); + slice.trim_prefix(); + + // deserialize map column from text we have to know how to split from text and support nested + // complex type. + // 1. get item according to collection_delimiter, but if meet collection_delimiter in string, we should ignore it. + // 2. get kv according map_key_delimiter, but if meet map_key_delimiter in string, we should ignore it. + // 3. keep a nested level to support nested complex type. + int nested_level = 0; + bool has_quote = false; + int start_pos = 0; + size_t slice_size = slice.size; + bool key_added = false; + int idx = 0; + int elem_deserialized = 0; + for (; idx < slice_size; ++idx) { + char c = slice[idx]; + if (c == '"' || c == '\'') { + has_quote = !has_quote; + } else if (c == '\\' && idx + 1 < slice_size) { //escaped + ++idx; + } else if (!has_quote && (c == '[' || c == '{')) { + ++nested_level; + } else if (!has_quote && (c == ']' || c == '}')) { + --nested_level; + } else if (!has_quote && nested_level == 0 && c == options.map_key_delim && !key_added) { + // if meet map_key_delimiter and not in quote, we can make it as key elem. + if (idx == start_pos) { + continue; + } + Slice next(slice.data + start_pos, idx - start_pos); + next.trim_prefix(); + if (options.converted_from_string && + (next.starts_with("\"") || next.starts_with("'"))) { + next.remove_prefix(1); + } + if (options.converted_from_string && (next.ends_with("\"") || next.ends_with("'"))) { + next.remove_suffix(1); + } + if (Status st = + key_serde->deserialize_one_cell_from_text(nested_key_column, next, options); + !st.ok()) { + nested_key_column.pop_back(elem_deserialized); + nested_val_column.pop_back(elem_deserialized); + return st; + } + // skip delimiter + start_pos = idx + 1; + key_added = true; + } else if (!has_quote && nested_level == 0 && c == options.collection_delim && key_added) { + // if meet collection_delimiter and not in quote, we can make it as value elem + if (idx == start_pos) { + continue; + } + Slice next(slice.data + start_pos, idx - start_pos); + next.trim_prefix(); + if (options.converted_from_string) next.trim_quote(); + + if (Status st = value_serde->deserialize_one_cell_from_text(nested_val_column, next, + options); + !st.ok()) { + nested_key_column.pop_back(elem_deserialized + 1); + nested_val_column.pop_back(elem_deserialized); + return st; + } + // skip delimiter + start_pos = idx + 1; + // reset key_added + key_added = false; + ++elem_deserialized; + } + } + // for last value elem + if (!has_quote && nested_level == 0 && idx == slice_size && idx != start_pos && key_added) { + Slice next(slice.data + start_pos, idx - start_pos); + next.trim_prefix(); + if (options.converted_from_string) next.trim_quote(); + + if (Status st = + value_serde->deserialize_one_cell_from_text(nested_val_column, next, options); + !st.ok()) { + nested_key_column.pop_back(elem_deserialized + 1); + nested_val_column.pop_back(elem_deserialized); + return st; + } + ++elem_deserialized; + } + + offsets.emplace_back(offsets.back() + elem_deserialized); + return Status::OK(); +} + void DataTypeMapSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const { auto blob = static_cast(arg); column.deserialize_and_insert_from_arena(blob->getBlob()); diff --git a/be/src/vec/data_types/serde/data_type_map_serde.h b/be/src/vec/data_types/serde/data_type_map_serde.h index b415ec4d27951d..e90ba11f299436 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.h +++ b/be/src/vec/data_types/serde/data_type_map_serde.h @@ -40,22 +40,23 @@ class DataTypeMapSerDe : public DataTypeSerDe { : key_serde(_key_serde), value_serde(_value_serde) {} void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override { - LOG(FATAL) << "Not support serialize map column to buffer"; - } - - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, - const FormatOptions& options) const override { - LOG(FATAL) << "Not support deserialize from buffer to map"; - return Status::NotSupported("Not support deserialize from buffer to map"); - } + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override { - LOG(FATAL) << "Not support write map column to pb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status read_column_from_pb(IColumn& column, const PValues& arg) const override { - LOG(FATAL) << "Not support read from pb to map"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int row_num) const override; diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index bc4187ea233110..77d67e764a174e 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -38,9 +38,15 @@ namespace doris { namespace vectorized { class Arena; +void DataTypeNullableSerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + void DataTypeNullableSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const { + FormatOptions& options) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -48,30 +54,36 @@ void DataTypeNullableSerDe::serialize_one_cell_to_text(const IColumn& column, in const auto& col_null = assert_cast(*ptr); if (col_null.is_null_at(row_num)) { bw.write("NULL", 4); - bw.commit(); } else { nested_serde->serialize_one_cell_to_text(col_null.get_nested_column(), row_num, bw, options); } } -Status DataTypeNullableSerDe::deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, +Status DataTypeNullableSerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeNullableSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& null_column = assert_cast(column); // TODO(Amory) make null literal configurable - if (rb.count() == 4 && *(rb.position()) == 'N' && *(rb.position() + 1) == 'U' && - *(rb.position() + 2) == 'L' && *(rb.position() + 3) == 'L') { + if (slice.size == 4 && slice[0] == 'N' && slice[1] == 'U' && slice[2] == 'L' && + slice[3] == 'L') { null_column.insert_data(nullptr, 0); return Status::OK(); } - auto st = nested_serde->deserialize_one_cell_from_text(null_column.get_nested_column(), rb, + auto st = nested_serde->deserialize_one_cell_from_text(null_column.get_nested_column(), slice, options); if (!st.ok()) { // fill null if fail null_column.insert_data(nullptr, 0); // 0 is meaningless here return Status::OK(); } - // fill not null if succ + // fill not null if success null_column.get_null_map_data().push_back(0); return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 4ff7630ddfc96c..42e30e2d1bdc48 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -36,11 +36,16 @@ class DataTypeNullableSerDe : public DataTypeSerDe { DataTypeNullableSerDe(const DataTypeSerDeSPtr& _nested_serde) : nested_serde(_nested_serde) {} void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override; - - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index d887d90d2d49e8..c83e4769cb0f2e 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -101,9 +101,10 @@ void DataTypeNumberSerDe::write_column_to_arrow(const IColumn& column, const } template -Status DataTypeNumberSerDe::deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, +Status DataTypeNumberSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& column_data = reinterpret_cast(column); + ReadBuffer rb(slice.data, slice.size); if constexpr (std::is_same::value) { // TODO: support for Uint128 return Status::InvalidArgument("uint128 is not support"); @@ -135,10 +136,17 @@ Status DataTypeNumberSerDe::deserialize_one_cell_from_text(IColumn& column, R return Status::OK(); } +template +void DataTypeNumberSerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + template void DataTypeNumberSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const { + FormatOptions& options) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -154,7 +162,14 @@ void DataTypeNumberSerDe::serialize_one_cell_to_text(const IColumn& column, i } else if constexpr (std::is_integral::value || std::numeric_limits::is_iec559) { bw.write_number(data); } - bw.commit(); +} + +template +Status DataTypeNumberSerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); } template diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index b3df10f062ff6d..1e27ef03d12c28 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -56,11 +56,16 @@ class DataTypeNumberSerDe : public DataTypeSerDe { using ColumnType = ColumnVector; void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override; - - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h b/be/src/vec/data_types/serde/data_type_object_serde.h index 22235a1573e831..3dddc06113dcbb 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.h +++ b/be/src/vec/data_types/serde/data_type_object_serde.h @@ -37,50 +37,70 @@ class Arena; class DataTypeObjectSerDe : public DataTypeSerDe { public: void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override { - LOG(FATAL) << "Not support write object column to buffer"; + FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override { - LOG(FATAL) << "Not support read object column from buffer"; - return Status::NotSupported("Not support read object column from buffer"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override { - LOG(FATAL) << "Not support write object column to pb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status read_column_from_pb(IColumn& column, const PValues& arg) const override { - LOG(FATAL) << "Not support read from pb to object"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int row_num) const override { - LOG(FATAL) << "Not support write object column to json"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override { - LOG(FATAL) << "Not support write json object to column"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { - LOG(FATAL) << "Not support write object column to arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "Not support read object column from arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override { - LOG(FATAL) << "Not support write object column to mysql"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override { - LOG(FATAL) << "Not support write object column to mysql"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } }; } // namespace vectorized diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h index 08fe45699cc7d9..4c9dae672beee0 100644 --- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h +++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h @@ -41,15 +41,29 @@ template class DataTypeQuantileStateSerDe : public DataTypeSerDe { public: void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override { - LOG(FATAL) << "Not support write QuantileState column to buffer"; + FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override { - LOG(FATAL) << "Not support read QuantileState column from buffer"; - return Status::NotSupported("Not support read QuantileState column from buffer"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; @@ -61,11 +75,13 @@ class DataTypeQuantileStateSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { - LOG(FATAL) << "Not support write " << column.get_name() << " to arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "Not support read " << column.get_name() << " from arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 36a2bd104a1a7d..654b8aeb7bbdcb 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -41,6 +41,23 @@ namespace cctz { class time_zone; } // namespace cctz +#define SERIALIZE_COLUMN_TO_TEXT() \ + for (size_t i = start_idx; i < end_idx; ++i) { \ + if (i != start_idx) { \ + bw.write(options.field_delim.data(), options.field_delim.size()); \ + } \ + serialize_one_cell_to_text(column, i, bw, options); \ + } + +#define DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() \ + for (int i = 0; i < slices.size(); ++i) { \ + if (Status st = deserialize_one_cell_from_text(column, slices[i], options); \ + st != Status::OK()) { \ + return st; \ + } \ + ++*num_deserialized; \ + } + namespace doris { class PValues; class JsonbValue; @@ -70,6 +87,24 @@ class DataTypeSerDe { * use this format in olap, because it is more slower, keep this option is for compatibility. */ bool date_olap_format = false; + /** + * field delimiter is used to separate fields in one row + */ + std::string field_delim = ","; + /** + * collection_delim is used to separate elements in collection, such as array, map + */ + char collection_delim = ','; + /** + * map_key_delim is used to separate key and value in map , eg. key:value + */ + char map_key_delim = ':'; + /** + * used in deserialize with text format, if the element is packed in string using "" or '', but not string type, and this switch is open + * we can convert the string to the element type, such as int, float, double, date, datetime, timestamp, decimal + * by dropping the "" or ''. + */ + bool converted_from_string = false; }; public: @@ -77,10 +112,18 @@ class DataTypeSerDe { virtual ~DataTypeSerDe(); // Text serializer and deserializer with formatOptions to handle different text format virtual void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const = 0; + FormatOptions& options) const = 0; + + // this function serialize multi-column to one row text to avoid virtual function call in complex type nested loop + virtual void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const = 0; - virtual Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + virtual Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const = 0; + // deserialize text vector is to avoid virtual function call in complex type nested loop + virtual Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const = 0; // Protobuf serializer and deserializer virtual Status write_column_to_pb(const IColumn& column, PValues& result, int start, diff --git a/be/src/vec/data_types/serde/data_type_string_serde.cpp b/be/src/vec/data_types/serde/data_type_string_serde.cpp index dda20b5b655781..24ec0fa53e33d5 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_string_serde.cpp @@ -34,22 +34,34 @@ namespace doris { namespace vectorized { class Arena; +void DataTypeStringSerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + void DataTypeStringSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const { + FormatOptions& options) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; const auto& value = assert_cast(*ptr).get_data_at(row_num); bw.write(value.data, value.size); - bw.commit(); } -Status DataTypeStringSerDe::deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, +Status DataTypeStringSerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeStringSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& column_data = assert_cast(column); - column_data.insert_data(rb.position(), rb.count()); + column_data.insert_data(slice.data, slice.size); return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index d377c345b982f6..74822c8f95cc36 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -34,11 +34,18 @@ class Arena; class DataTypeStringSerDe : public DataTypeSerDe { public: void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override; + FormatOptions& options) const override; - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.h b/be/src/vec/data_types/serde/data_type_struct_serde.h index 9491fc1d497c89..33d14c74115dfb 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.h +++ b/be/src/vec/data_types/serde/data_type_struct_serde.h @@ -40,22 +40,38 @@ class DataTypeStructSerDe : public DataTypeSerDe { : elemSerDeSPtrs(_elemSerDeSPtrs) {} void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, - const FormatOptions& options) const override { - LOG(FATAL) << "Not support serialize struct column to buffer"; + FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "serialize_one_cell_to_text with type " + column.get_name()); } - Status deserialize_one_cell_from_text(IColumn& column, ReadBuffer& rb, + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "serialize_column_to_text with type " + column.get_name()); + } + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, const FormatOptions& options) const override { - LOG(FATAL) << "Not support deserialize from buffer to struct"; - return Status::NotSupported("Not support deserialize from buffer to struct"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "deserialize_one_cell_from_text with type " + column.get_name()); } + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override { + throw doris::Exception( + ErrorCode::NOT_IMPLEMENTED_ERROR, + "deserialize_column_from_text_vector with type " + column.get_name()); + } Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override { - LOG(FATAL) << "Not support write struct column to pb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status read_column_from_pb(IColumn& column, const PValues& arg) const override { - LOG(FATAL) << "Not support read from pb to strut"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "read_column_from_pb with type " + column.get_name()); } void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int row_num) const override; diff --git a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp index a97208d4793c3f..188d70daa94d7a 100644 --- a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp @@ -22,7 +22,9 @@ #include "vec/common/string_buffer.hpp" #include "vec/core/field.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_map.h" #include "vec/data_types/serde/data_type_serde.h" #include "vec/data_types/serde_utils.h" #include "vec/io/reader_buffer.h" @@ -133,7 +135,7 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { } else { data_type_ptr = DataTypeFactory::instance().create_data_type(type, 0, 0); } - std::cout << "this type is " << data_type_ptr->get_name() << ": " + std::cout << "========= This type is " << data_type_ptr->get_name() << ": " << fmt::format("{}", type) << std::endl; auto col = data_type_ptr->create_column(); @@ -149,7 +151,7 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { for (int i = 0; i < std::get<1>(type_pair).size(); ++i) { string test_str = std::get<1>(type_pair)[i]; std::cout << "the str : " << test_str << std::endl; - ReadBuffer rb_test(test_str.data(), test_str.size()); + Slice rb_test(test_str.data(), test_str.size()); // deserialize Status st = serde->deserialize_one_cell_from_text(*col, rb_test, default_format_option); @@ -161,6 +163,7 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { EXPECT_EQ(st.ok(), true); // serialize serde->serialize_one_cell_to_text(*col, i, buffer_writer, default_format_option); + buffer_writer.commit(); EXPECT_EQ(ser_col->get_data_at(ser_col->size() - 1).to_string(), std::get<2>(type_pair)[i]); } @@ -181,7 +184,7 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { for (auto pair : date_scala_field_types) { auto type = pair.first; DataTypePtr data_type_ptr = DataTypeFactory::instance().create_data_type(type, 0, 0); - std::cout << "this type is " << data_type_ptr->get_name() << ": " + std::cout << "========= This type is " << data_type_ptr->get_name() << ": " << fmt::format("{}", type) << std::endl; std::unique_ptr min_wf(WrapperField::create_by_type(type)); @@ -196,9 +199,9 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { string max_s = max_wf->to_string(); string rand_date = rand_wf->to_string(); - ReadBuffer min_rb(min_s.data(), min_s.size()); - ReadBuffer max_rb(max_s.data(), max_s.size()); - ReadBuffer rand_rb(rand_date.data(), rand_date.size()); + Slice min_rb(min_s.data(), min_s.size()); + Slice max_rb(max_s.data(), max_s.size()); + Slice rand_rb(rand_date.data(), rand_date.size()); auto col = data_type_ptr->create_column(); DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); @@ -217,8 +220,11 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { ser_col->reserve(3); VectorBufferWriter buffer_writer(*ser_col.get()); serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); serde->serialize_one_cell_to_text(*col, 1, buffer_writer, formatOptions); + buffer_writer.commit(); serde->serialize_one_cell_to_text(*col, 2, buffer_writer, formatOptions); + buffer_writer.commit(); rtrim(min_s); rtrim(max_s); rtrim(rand_date); @@ -252,8 +258,1003 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); serde->serialize_one_cell_to_text(*col, 0, buffer_writer, default_format_option); + buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); EXPECT_EQ(rand_wf->to_string(), rand_s_d.to_string()); } } + +// test for array and map +TEST(TextSerde, ComplexTypeSerdeTextTest) { + // array-scala + { + // nested type,test string, expect string(option.converted_from_string=false),expect string(option.converted_from_string=true) + typedef std::tuple, std::vector, std::vector> + FieldType_RandStr; + std::vector nested_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_BOOL, + {"[0, 1,-1,1]", "[true, false]", "[,]", "[1,true,t]", + "[1, false], [,], [1,true,t]"}, + {"[0, 1, NULL, 1]", "[1, 0]", "[NULL, NULL]", "[1, 1, NULL]", + "[1, NULL, NULL, 1, NULL]"}, + {"[0, 1, NULL, 1]", "[1, 0]", "[NULL, NULL]", "[1, 1, NULL]", + "[1, NULL, NULL, 1, NULL]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_TINYINT, + {"[1111, 12, ]", "[,1 , 3]", "[ed, 2,]", "[],[]", "[[]]"}, + {"[NULL, 12, NULL]", "[NULL, 1, 3]", "[NULL, 2, NULL]", "[NULL]", "[NULL]"}, + {"[NULL, 12, NULL]", "[NULL, 1, 3]", "[NULL, 2, NULL]", "[NULL]", + "[NULL]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_FLOAT, + {"[0.33, 0.67, 0]", "[3.40282e+38, 3.40282e+38+1]", "[\"3.40282e+38+1\"]", + "[\"3.14\", 0.77]"}, + {"[0.33, 0.67, 0]", "[3.40282e+38, NULL]", "[NULL]", "[NULL, 0.77]"}, + {"[0.33, 0.67, 0]", "[3.40282e+38, NULL]", "[NULL]", "[3.14, 0.77]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"[3.1415926, 0.878787878, 12.44456475432]", + "[2343.12345465746, 2.22507e-308, 2.22507e-308-1, \"2.22507e-308\"]"}, + {"[3.1415926, 0.878787878, 12.44456475432]", + "[2343.12345465746, 2.22507e-308, NULL, NULL]"}, + {"[3.1415926, 0.878787878, 12.44456475432]", + "[2343.12345465746, 2.22507e-308, NULL, 2.22507e-308]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_STRING, + {"[\"hello\", \"world\"]", "['a', 'b', 'c']", + "[\"42\",1412341,true,42.43,3.40282e+38+1,alpha:beta:gamma,Earth#42:" + "Control#86:Bob#31,17:true:Abe " + "Linkedin,BLUE,\"\\N\",\"\u0001\u0002\u0003,\\u0001bc\"]"}, + // last : ["42",1412341,true,42.43,3.40282e+38+1,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,"\N",",\u0001bc"] + {"[\"hello\", \"world\"]", "['a', 'b', 'c']", + "[\"42\", 1412341, true, 42.43, 3.40282e+38+1, alpha:beta:gamma, " + "Earth#42:Control#86:Bob#31, 17:true:Abe Linkedin, BLUE, \"\\N\", " + "\"\x1\x2\x3,\\u0001bc\"]"}, + {"[hello, world]", "[a, b, c]", + "[42, 1412341, true, 42.43, 3.40282e+38+1, alpha:beta:gamma, " + "Earth#42:Control#86:Bob#31, 17:true:Abe Linkedin, BLUE, \\N, " + "\x1\x2\x3,\\u0001bc]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATE, + {"[\\\"2022-07-13\\\",\"2022-07-13 12:30:00\"]", + "[2022-07-13 12:30:00, \"2022-07-13\"]", + "[2022-07-13 12:30:00.000, 2022-07-13]"}, + {"[NULL, NULL]", "[2022-07-13, NULL]", "[2022-07-13, 2022-07-13]"}, + {"[NULL, 2022-07-13]", "[2022-07-13, 2022-07-13]", + "[2022-07-13, 2022-07-13]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATETIME, + {"[\"2022-07-13\",\"2022-07-13 12:30:00\"]", + "[2022-07-13 12:30:00, \"2022-07-13\", 2022-07-13 12:30:00.0000]"}, + {"[NULL, NULL]", "[2022-07-13 12:30:00, NULL, 2022-07-13 12:30:00]"}, + {"[2022-07-13 00:00:00, 2022-07-13 12:30:00]", + "[2022-07-13 12:30:00, 2022-07-13 00:00:00, 2022-07-13 12:30:00]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DECIMAL, + {"[4, 5.5, 6.67]", + "[012345678901234567.012345678,123456789012345678.01234567, " + "12345678901234567.0123456779,12345678901234567.01234567791," + "1234567890123456789.01234567]", + "[\"012345678901234567.012345678\",\"123456789012345678.01234567\", " + "\"12345678901234567.0123456779\", " + "\"12345678901234567.01234567791\",\"1234567890123456789.01234567\"]", + "[\\1234567890123456789.01234567\\]"}, + {"[4.000000000, 5.500000000, 6.670000000]", + "[12345678901234567.012345678, 123456789012345678.012345670, " + "12345678901234567.012345678, NULL, NULL]", + "[NULL, NULL, NULL, NULL, NULL]", "[NULL]"}, + {"[4.000000000, 5.500000000, 6.670000000]", + "[12345678901234567.012345678, 123456789012345678.012345670, " + "12345678901234567.012345678, NULL, NULL]", + "[12345678901234567.012345678, 123456789012345678.012345670, " + "12345678901234567.012345678, NULL, NULL]", + "[NULL]"}), + }; + // array type + for (auto type_pair : nested_field_types) { + auto type = std::get<0>(type_pair); + DataTypePtr nested_data_type_ptr = + DataTypeFactory::instance().create_data_type(type, 0, 0); + DataTypePtr array_data_type_ptr = + std::make_shared(make_nullable(nested_data_type_ptr)); + + std::cout << "========= This type is " << array_data_type_ptr->get_name() << ": " + << fmt::format("{}", type) << std::endl; + + auto col = array_data_type_ptr->create_column(); + auto col2 = array_data_type_ptr->create_column(); + auto col3 = array_data_type_ptr->create_column(); + + DataTypeSerDeSPtr serde = array_data_type_ptr->get_serde(); + DataTypeSerDeSPtr serde_1 = array_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<1>(type_pair).size(); ++i) { + std::string rand_str = std::get<1>(type_pair)[i]; + std::string expect_str = std::get<2>(type_pair)[i]; + std::string expect_str_1 = std::get<3>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + std::cout << "expect_str_can_format_from_string:" << expect_str << std::endl; + { + Slice slice(rand_str.data(), rand_str.size()); + formatOptions.converted_from_string = false; + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + if (expect_str == "[]") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, i, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test : " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + { + // from_string + ReadBuffer rb(rand_str.data(), rand_str.size()); + Status status = array_data_type_ptr->from_string(rb, col2); + EXPECT_EQ(status.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, i, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + { + formatOptions.converted_from_string = true; + std::cout << "======== change " << formatOptions.converted_from_string + << " with rand_str: " << rand_str << std::endl; + Slice slice(rand_str.data(), rand_str.size()); + Status st = + serde_1->deserialize_one_cell_from_text(*col3, slice, formatOptions); + if (expect_str == "[]") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde_1->serialize_one_cell_to_text(*col3, i, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str_1, rand_s_d.to_string()); + } + } + } + } + } + + // map-scala-scala + { + // nested key type , nested value type, test string , expect string + typedef std::tuple, std::vector> + FieldType_RandStr; + std::vector nested_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_BOOL, + FieldType::OLAP_FIELD_TYPE_STRING, + {"{1: \"amory is 7\", 0: \" doris be better \", -1: \"wrong,\"}", + "{\"1\": \"amory is 7\", \"0\": 1}"}, + {"{1:\"amory is 7\", 0:\" doris be better \", NULL:\"wrong,\"}", + "{NULL:\"amory is 7\", NULL:1}"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_STRING, FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"{\" ,.amory\": 111.2343, \"\": 112., 'dggs': 13.14 , NULL: 12.2222222, " + ": NULL\\}", + "{\"\": NULL, null: 12.44}", "{{}}", "{{}", "}}", "{}, {}"}, + {"{\" ,.amory\":111.2343, \"\":112, 'dggs':13.14, NULL:12.2222222, :NULL}", + "{\"\":NULL, null:12.44}", "{}", "{}", "", "{}"}), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_FLOAT, + FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"{0.33: 3.1415926,3.1415926: 22}", "{3.14, 15926: 22}", "{3.14}", + "{222:3444},", "{4.12, 677: 455: 356, 67.6:67.7}"}, + {"{0.33:3.1415926, 3.1415925:22}", "{NULL:22}", "{}", "", + "{NULL:NULL, 67.6:67.7}"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATE, FieldType::OLAP_FIELD_TYPE_DATETIME, + {"{2022-07-13: 2022-07-13 12:30:00, 2022-07-13 12:30:00: 2022-07-13 " + "12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, NULL: NULL, " + "2022-07-13:'2022-07-13 12:30:00'}", + // escaped char ':' + "{2022-07-13 12\\:30\\:00: 2022-07-13, 2022-07-13 12\\:30\\:00.000: " + "2022-07-13 12:30:00.000, 2022-07-13:\'2022-07-13 12:30:00\'}"}, + {"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:NULL, 2022-07-13:NULL, " + "NULL:NULL, 2022-07-13:NULL}", + "{2022-07-13:2022-07-13 00:00:00, 2022-07-13:2022-07-13 12:30:00, " + "2022-07-13:NULL}"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATETIME, FieldType::OLAP_FIELD_TYPE_DECIMAL, + {"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, NULL: NULL}", + "{\"2022-07-13 12:30:00\": \"12.45675432\"}", + "{2022-07-13 12\\:30\\:00:12.45675432, 2022-07-13#12:30:00: 12.45675432}", + "{2022-07-13 12\\:30\\:00.0000:12.45675432, null:12.34}"}, + {"{2022-07-13 12:00:00:30.000000000, 2022-07-13 00:00:00:12.456754320, " + "NULL:NULL}", + "{NULL:NULL}", + "{2022-07-13 12:30:00:12.456754320, 2022-07-13 12:00:00:30.000000000}", + "{2022-07-13 12:30:00:12.456754320, NULL:12.340000000}"}), + }; + + for (auto type_pair : nested_field_types) { + auto key_type = std::get<0>(type_pair); + auto value_type = std::get<1>(type_pair); + DataTypePtr nested_key_type_ptr = + DataTypeFactory::instance().create_data_type(key_type, 0, 0); + DataTypePtr nested_value_type_ptr = + DataTypeFactory::instance().create_data_type(value_type, 0, 0); + DataTypePtr map_data_type_ptr = std::make_shared( + make_nullable(nested_key_type_ptr), make_nullable(nested_value_type_ptr)); + + std::cout << "========= This type is " << map_data_type_ptr->get_name() << std::endl; + + auto col2 = map_data_type_ptr->create_column(); + DataTypeSerDeSPtr serde = map_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<2>(type_pair).size(); ++i) { + std::string rand_str = std::get<2>(type_pair)[i]; + std::string expect_str = std::get<3>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + { + auto col = map_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + std::cout << st.to_json() << std::endl; + if (expect_str.empty()) { + EXPECT_FALSE(st.ok()); + continue; + } + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + // from_string + { + ReadBuffer rb(rand_str.data(), rand_str.size()); + std::cout << "from string rb: " << rb.to_string() << std::endl; + Status stat = map_data_type_ptr->from_string(rb, col2); + std::cout << stat.to_json() << std::endl; + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col2, col2->size() - 1, buffer_writer, + formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d.to_string() << std::endl; + } + } + } + + // option with converted_with_string true + typedef std::tuple, std::vector> + FieldType_RandStr; + std::vector field_types = { + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATE, FieldType::OLAP_FIELD_TYPE_DATETIME, + {"{2022-07-13: 2022-07-13 12:30:00, 2022-07-13 12:30:00: 2022-07-13 " + "12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, NULL: NULL, " + "2022-07-13:'2022-07-13 12:30:00'}", + // escaped char ':' + "{2022-07-13 12\\:30\\:00: 2022-07-13, 2022-07-13 12\\:30\\:00.000: " + "2022-07-13 12:30:00.000, 2022-07-13:\'2022-07-13 12:30:00\'}"}, + {"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:NULL, 2022-07-13:NULL, " + "NULL:NULL, 2022-07-13:2022-07-13 12:30:00}", + "{2022-07-13:2022-07-13 00:00:00, 2022-07-13:2022-07-13 12:30:00, " + "2022-07-13:2022-07-13 12:30:00}"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATETIME, FieldType::OLAP_FIELD_TYPE_DECIMAL, + {"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, NULL: NULL}", + "{\"2022-07-13 12:30:00\": \"12.45675432\"}", + "{2022-07-13 12\\:30\\:00:12.45675432, 2022-07-13#12:30:00: 12.45675432}", + "{2022-07-13 12\\:30\\:00.0000:12.45675432, null:12.34}"}, + {"{2022-07-13 12:00:00:30.000000000, 2022-07-13 00:00:00:12.456754320, " + "NULL:NULL}", + "{2022-07-13 12:30:00:12.456754320}", + "{2022-07-13 12:30:00:12.456754320, 2022-07-13 12:00:00:30.000000000}", + "{2022-07-13 12:30:00:12.456754320, NULL:12.340000000}"}), + }; + for (auto type_pair : field_types) { + auto key_type = std::get<0>(type_pair); + auto value_type = std::get<1>(type_pair); + DataTypePtr nested_key_type_ptr = + DataTypeFactory::instance().create_data_type(key_type, 0, 0); + DataTypePtr nested_value_type_ptr = + DataTypeFactory::instance().create_data_type(value_type, 0, 0); + DataTypePtr map_data_type_ptr = std::make_shared( + make_nullable(nested_key_type_ptr), make_nullable(nested_value_type_ptr)); + + std::cout << "========= This type is " << map_data_type_ptr->get_name() << std::endl; + + auto col2 = map_data_type_ptr->create_column(); + DataTypeSerDeSPtr serde = map_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.converted_from_string = true; + + for (int i = 0; i < std::get<2>(type_pair).size(); ++i) { + std::string rand_str = std::get<2>(type_pair)[i]; + std::string expect_str = std::get<3>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + { + auto col = map_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + std::cout << st.to_json() << std::endl; + if (expect_str.empty()) { + EXPECT_FALSE(st.ok()); + continue; + } + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + } + } +} + +TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { + // array-array + { + // nested type,test string, expect string(option.converted_from_string=false), expect_from_string, expect string(option.converted_from_string=true) + typedef std::tuple, std::vector, std::vector, + std::vector> + FieldType_RandStr; + std::vector nested_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, + {"[[Hello, World],[This, is, a, nested, array]]"}, + {"[[Hello, World], [This, is, a, nested, array]]"}, + {"[NULL, NULL, NULL, NULL, NULL, NULL, NULL]"}, + {"[[Hello, World], [This, is, a, nested, array]]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_STRING, + {"[[With, special, \"characters\"], [like, @, #, $, % \"^\", &, *, (, ), " + "-, _], [=, +, [, ], {, }, |, \\, ;, :, ', '\', <, >, ,, ., /, ?, ~]]"}, + {"[[With, special, \"characters\"], [like, @, #, $, % \"^\", &, *, (, ), " + "-, _], [=, +, [, ], {, }, |, \\, ;, :, ', '\', <, >, ,, ., /, ?, ~]]"}, + {""}, + {"[[With, special, characters], [like, @, #, $, % \"^\", &, *, (, ), -, " + "_], [=, +, [, ], {, }, |, \\, ;, :, ', '\', <, >, ,, ., /, ?, ~]]"})}; + // array type + for (auto type_pair : nested_field_types) { + auto type = std::get<0>(type_pair); + DataTypePtr nested_data_type_ptr = + DataTypeFactory::instance().create_data_type(type, 0, 0); + DataTypePtr nested_array_data_type_ptr = + std::make_shared(make_nullable(nested_data_type_ptr)); + + DataTypePtr array_data_type_ptr = + std::make_shared(make_nullable(nested_array_data_type_ptr)); + + std::cout << "========= This type is " << array_data_type_ptr->get_name() << ": " + << fmt::format("{}", type) << std::endl; + + DataTypeSerDeSPtr serde = array_data_type_ptr->get_serde(); + DataTypeSerDeSPtr serde_1 = array_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<1>(type_pair).size(); ++i) { + std::string rand_str = std::get<1>(type_pair)[i]; + std::string expect_str = std::get<2>(type_pair)[i]; + std::string expect_from_string_str = std::get<3>(type_pair)[i]; + std::string expect_str_1 = std::get<4>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + std::cout << "expect_from_str:" << expect_from_string_str << std::endl; + std::cout << "expect_str_can_format_from_string:" << expect_str << std::endl; + { + // serde + auto col = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + formatOptions.converted_from_string = false; + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test : " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + { + // from_string + ReadBuffer rb(rand_str.data(), rand_str.size()); + auto col2 = array_data_type_ptr->create_column(); + Status status = array_data_type_ptr->from_string(rb, col2); + if (expect_from_string_str == "") { + EXPECT_EQ(status.ok(), false); + std::cout << "test from_string: " << status.to_json() << std::endl; + } else { + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col2, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d << std::endl; + EXPECT_EQ(expect_from_string_str, rand_s_d.to_string()); + } + } + { + formatOptions.converted_from_string = true; + std::cout << "======== change " << formatOptions.converted_from_string + << " with rand_str: " << rand_str << std::endl; + auto col3 = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = + serde_1->deserialize_one_cell_from_text(*col3, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde_1->serialize_one_cell_to_text(*col3, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str_1, rand_s_d.to_string()); + } + } + } + } + } + + // array-map + { + // nested type,test string, expect string(option.converted_from_string=false), expect_from_string, expect string(option.converted_from_string=true) + typedef std::tuple, std::vector, + std::vector, std::vector> + FieldType_RandStr; + std::vector nested_field_types = {FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_STRING, FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"[{\"2cKtIM-L1mOcEm-udR-HcB2\":0.23929040957798242,\"eof2UN-Is0EEuA-H5D-hE58\":0." + "42373055809540094,\"FwUSOB-R8rtK9W-BVG-8wYZ\":0.7680704548628841},{\"qDXU9D-" + "7orr51d-g80-6t5k\":0.6446245786874659,\"bkLjmx-uZ2Ez7F-536-PGqy\":0." + "8880791950937957,\"9Etq4o-FPm37O4-5fk-QWh7\":0.08630489716260481},{\"tu3OMw-" + "mzS0jAx-Dnj-Xm3G\":0.1184199213706042,\"XkhTn0-QFLo8Ks-JXR-k4zk\":0." + "5181239375482816,\"EYC8Dj-GTTp9iB-b4O-QBkO\":0.4491897722178303},{\"sHFGPg-" + "cfA8gya-kfw-IugT\":0.20842299487398452,\"BBQ6e5-OJYRJhC-zki-7rQj\":0." + "3050124830713523,\"mKH57V-YmwCNFq-vs8-vUIX\":0.36446683035480754},{\"HfhEMX-" + "oAMBJCC-YIC-hCqN\":0.8131454631693608,\"xrnTFd-ikONWik-T7J-sL8J\":0." + "37509722558990855,\"SVyEes-77mlzIr-N6c-DkYw\":0.4703053945053086}]"}, + {"[{\"2cKtIM-L1mOcEm-udR-HcB2\":0.23929040957798242, " + "\"eof2UN-Is0EEuA-H5D-hE58\":0.42373055809540094, " + "\"FwUSOB-R8rtK9W-BVG-8wYZ\":0.7680704548628841}, " + "{\"qDXU9D-7orr51d-g80-6t5k\":0.6446245786874659, " + "\"bkLjmx-uZ2Ez7F-536-PGqy\":0.8880791950937957, " + "\"9Etq4o-FPm37O4-5fk-QWh7\":0.08630489716260481}, " + "{\"tu3OMw-mzS0jAx-Dnj-Xm3G\":0.1184199213706042, " + "\"XkhTn0-QFLo8Ks-JXR-k4zk\":0.5181239375482816, " + "\"EYC8Dj-GTTp9iB-b4O-QBkO\":0.4491897722178303}, " + "{\"sHFGPg-cfA8gya-kfw-IugT\":0.20842299487398452, " + "\"BBQ6e5-OJYRJhC-zki-7rQj\":0.3050124830713523, " + "\"mKH57V-YmwCNFq-vs8-vUIX\":0.36446683035480754}, " + "{\"HfhEMX-oAMBJCC-YIC-hCqN\":0.8131454631693608, " + "\"xrnTFd-ikONWik-T7J-sL8J\":0.37509722558990855, " + "\"SVyEes-77mlzIr-N6c-DkYw\":0.4703053945053086}]"}, + {""}, + {"[{2cKtIM-L1mOcEm-udR-HcB2:0.23929040957798242, " + "eof2UN-Is0EEuA-H5D-hE58:0.42373055809540094, " + "FwUSOB-R8rtK9W-BVG-8wYZ:0.7680704548628841}, " + "{qDXU9D-7orr51d-g80-6t5k:0.6446245786874659, " + "bkLjmx-uZ2Ez7F-536-PGqy:0.8880791950937957, " + "9Etq4o-FPm37O4-5fk-QWh7:0.08630489716260481}, " + "{tu3OMw-mzS0jAx-Dnj-Xm3G:0.1184199213706042, " + "XkhTn0-QFLo8Ks-JXR-k4zk:0.5181239375482816, " + "EYC8Dj-GTTp9iB-b4O-QBkO:0.4491897722178303}, " + "{sHFGPg-cfA8gya-kfw-IugT:0.20842299487398452, " + "BBQ6e5-OJYRJhC-zki-7rQj:0.3050124830713523, " + "mKH57V-YmwCNFq-vs8-vUIX:0.36446683035480754}, " + "{HfhEMX-oAMBJCC-YIC-hCqN:0.8131454631693608, " + "xrnTFd-ikONWik-T7J-sL8J:0.37509722558990855, " + "SVyEes-77mlzIr-N6c-DkYw:0.4703053945053086}]"})}; + for (auto type_pair : nested_field_types) { + auto key_type = std::get<0>(type_pair); + DataTypePtr nested_key_data_type_ptr = + DataTypeFactory::instance().create_data_type(key_type, 0, 0); + auto val_type = std::get<1>(type_pair); + DataTypePtr nested_value_data_type_ptr = + DataTypeFactory::instance().create_data_type(val_type, 0, 0); + + DataTypePtr nested_map_data_type_ptr = + std::make_shared(make_nullable(nested_key_data_type_ptr), + make_nullable(nested_value_data_type_ptr)); + + DataTypePtr array_data_type_ptr = + std::make_shared(make_nullable(nested_map_data_type_ptr)); + + std::cout << "========= This type is " << array_data_type_ptr->get_name() << std::endl; + + DataTypeSerDeSPtr serde = array_data_type_ptr->get_serde(); + DataTypeSerDeSPtr serde_1 = array_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<2>(type_pair).size(); ++i) { + std::string rand_str = std::get<2>(type_pair)[i]; + std::string expect_str = std::get<3>(type_pair)[i]; + std::string expect_from_string_str = std::get<4>(type_pair)[i]; + std::string expect_str_1 = std::get<5>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + std::cout << "expect_from_str:" << expect_from_string_str << std::endl; + std::cout << "expect_str_can_format_from_string:" << expect_str << std::endl; + { + // serde + auto col = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + formatOptions.converted_from_string = false; + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test : " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + { + // from_string + ReadBuffer rb(rand_str.data(), rand_str.size()); + auto col2 = array_data_type_ptr->create_column(); + Status status = array_data_type_ptr->from_string(rb, col2); + if (expect_from_string_str == "") { + EXPECT_EQ(status.ok(), false); + std::cout << "test from_string: " << status.to_json() << std::endl; + } else { + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col2, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d << std::endl; + EXPECT_EQ(expect_from_string_str, rand_s_d.to_string()); + } + } + { + formatOptions.converted_from_string = true; + std::cout << "======== change " << formatOptions.converted_from_string + << " with rand_str: " << rand_str << std::endl; + auto col3 = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = + serde_1->deserialize_one_cell_from_text(*col3, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde_1->serialize_one_cell_to_text(*col3, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str_1, rand_s_d.to_string()); + } + } + } + } + } + + // map-scala-array (map>) + { + // nested type,test string, expect string(option.converted_from_string=false), expect_from_string, expect string(option.converted_from_string=true) + typedef std::tuple, std::vector, + std::vector, std::vector> + FieldType_RandStr; + std::vector nested_field_types = {FieldType_RandStr( + // map> + FieldType::OLAP_FIELD_TYPE_STRING, FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"{\"5Srn6n-SP9fOS3-khz-Ljwt\":[0.8537551959339321,0.13473869413865858,0." + "9806016478238296,0.23014415892941564,0.26853530959759686,0.05484935641143551,0." + "11181328816302816,0.26510985318905933,0.6350885463275475,0.18209889263574142]," + "\"vrQmBC-2WlpWML-V5S-OLgM\":[0.6982221340596457,0.9260447299229463,0." + "12488042737255534,0.8859407191137862,0.03201490973378984,0.8371916387557367,0." + "7894434066323907,0.29667576138232743,0.9837777568426148,0.7773721913552772]," + "\"3ZbiXK-VvmhFcg-09V-w3g3\":[0.20509046053951785,0.9175575704931109,0." + "305788438361256,0.9923240410251069,0.6612939841907548,0.5922056063112593,0." + "15750800821536715,0.6374743124669565,0.4158097731627699,0.00302193321816846]," + "\"gMswpS-Ele9wHM-Uxp-VxzC\":[0.14378032144751685,0.627919779177473,0." + "6188731271454715,0.8088384184584442,0.8169160298605824,0.9051151670055427,0." + "558001941204895,0.029409463113641787,0.9532987674717762,0.20833228278241533]," + "\"TT9P9f-PXjQnvN-RBx-xRiS\":[0.8276005878909756,0.470950932860423,0." + "2442851528127543,0.710599416715854,0.3353731152359334,0.622947602340124,0." + "30675353671676797,0.8190741661938367,0.633630372770242,0.9436322366112492]," + "\"gLAnZc-oF7PC9o-ryd-MOXr\":[0.9742716809818137,0.9114038616933997,0." + "47459239268645104,0.6054569900795078,0.5515590901916287,0.8833310208917589,0." + "96476090778518,0.8873874315592357,0.3577701257062156,0.6993447306713452]," + "\"zrq6BY-7FJg3hc-Dd1-bAJn\":[0.1038405592062176,0.6757819253774818,0." + "6386535502499314,0.23598674876945303,0.11046582465777044,0.6426056925348297,0." + "17289073092250662,0.37116009951425233,0.594677969672274,0.49351456402872274]," + "\"gCKqtW-bLaoxgZ-CuW-M2re\":[0.934169137905867,0.12015121444469123,0." + "5009923777544698,0.4689139716802634,0.7226298925299507,0.33486164698864984,0." + "32944768657449996,0.5051366150918063,0.03228636228382431,0.48211773870118435]," + "\"SWqhI2-XnF9jVR-dT1-Yrtt\":[0.8005897112110444,0.899180582368993,0." + "9232176819588501,0.8615673086606942,0.9248122266449379,0.5586489299212893,0." + "40494513773898455,0.4752644689010731,0.6668395567417462,0.9068738374244337]," + "\"Z85F6M-cy5K4GP-7I5-5KS9\":[0.34761241187833714,0.46467162849990507,0." + "009781307454025168,0.3174295126364216,0.6405423361175397,0.33838144910731327,0." + "328860321648657,0.032638966917555856,0.32782524002924884,0.7675689545937956]," + "\"rlcnbo-tFg1FfP-ra6-D9Z8\":[0.7450713997349928,0.792502852203968,0." + "9034039182796755,0.49131654565079996,0.25223293077647946,0.9827253462450637,0." + "1684868582627418,0.0417161505112974,0.8498128570850716,0.8948779001812955]}"}, + {"{\"5Srn6n-SP9fOS3-khz-Ljwt\":[0.8537551959339321, 0.13473869413865858, " + "0.9806016478238296, 0.23014415892941564, 0.26853530959759686, " + "0.05484935641143551, 0.11181328816302816, 0.26510985318905933, " + "0.6350885463275475, 0.18209889263574142], " + "\"vrQmBC-2WlpWML-V5S-OLgM\":[0.6982221340596457, 0.9260447299229463, " + "0.12488042737255534, 0.8859407191137862, 0.03201490973378984, " + "0.8371916387557367, 0.7894434066323907, 0.29667576138232743, 0.9837777568426148, " + "0.7773721913552772], \"3ZbiXK-VvmhFcg-09V-w3g3\":[0.20509046053951785, " + "0.9175575704931109, 0.305788438361256, 0.9923240410251069, 0.6612939841907548, " + "0.5922056063112593, 0.15750800821536715, 0.6374743124669565, 0.4158097731627699, " + "0.00302193321816846], \"gMswpS-Ele9wHM-Uxp-VxzC\":[0.14378032144751685, " + "0.627919779177473, 0.6188731271454715, 0.8088384184584442, 0.8169160298605824, " + "0.9051151670055427, 0.558001941204895, 0.029409463113641787, 0.9532987674717762, " + "0.20833228278241533], \"TT9P9f-PXjQnvN-RBx-xRiS\":[0.8276005878909756, " + "0.470950932860423, 0.2442851528127543, 0.710599416715854, 0.3353731152359334, " + "0.622947602340124, 0.30675353671676797, 0.8190741661938367, 0.633630372770242, " + "0.9436322366112492], \"gLAnZc-oF7PC9o-ryd-MOXr\":[0.9742716809818137, " + "0.9114038616933997, 0.47459239268645104, 0.6054569900795078, 0.5515590901916287, " + "0.8833310208917589, 0.96476090778518, 0.8873874315592357, 0.3577701257062156, " + "0.6993447306713452], \"zrq6BY-7FJg3hc-Dd1-bAJn\":[0.1038405592062176, " + "0.6757819253774818, 0.6386535502499314, 0.23598674876945303, " + "0.11046582465777044, 0.6426056925348297, 0.17289073092250662, " + "0.37116009951425233, 0.594677969672274, 0.49351456402872274], " + "\"gCKqtW-bLaoxgZ-CuW-M2re\":[0.934169137905867, 0.12015121444469123, " + "0.5009923777544698, 0.4689139716802634, 0.7226298925299507, 0.33486164698864984, " + "0.32944768657449996, 0.5051366150918063, 0.03228636228382431, " + "0.48211773870118435], \"SWqhI2-XnF9jVR-dT1-Yrtt\":[0.8005897112110444, " + "0.899180582368993, 0.9232176819588501, 0.8615673086606942, 0.9248122266449379, " + "0.5586489299212893, 0.40494513773898455, 0.4752644689010731, 0.6668395567417462, " + "0.9068738374244337], \"Z85F6M-cy5K4GP-7I5-5KS9\":[0.34761241187833714, " + "0.46467162849990507, 0.009781307454025168, 0.3174295126364216, " + "0.6405423361175397, 0.33838144910731327, 0.328860321648657, " + "0.032638966917555856, 0.32782524002924884, 0.7675689545937956], " + "\"rlcnbo-tFg1FfP-ra6-D9Z8\":[0.7450713997349928, 0.792502852203968, " + "0.9034039182796755, 0.49131654565079996, 0.25223293077647946, " + "0.9827253462450637, 0.1684868582627418, 0.0417161505112974, 0.8498128570850716, " + "0.8948779001812955]}"}, + {""}, + {"{5Srn6n-SP9fOS3-khz-Ljwt:[0.8537551959339321, 0.13473869413865858, " + "0.9806016478238296, 0.23014415892941564, 0.26853530959759686, " + "0.05484935641143551, 0.11181328816302816, 0.26510985318905933, " + "0.6350885463275475, 0.18209889263574142], " + "vrQmBC-2WlpWML-V5S-OLgM:[0.6982221340596457, 0.9260447299229463, " + "0.12488042737255534, 0.8859407191137862, 0.03201490973378984, " + "0.8371916387557367, 0.7894434066323907, 0.29667576138232743, 0.9837777568426148, " + "0.7773721913552772], 3ZbiXK-VvmhFcg-09V-w3g3:[0.20509046053951785, " + "0.9175575704931109, 0.305788438361256, 0.9923240410251069, 0.6612939841907548, " + "0.5922056063112593, 0.15750800821536715, 0.6374743124669565, 0.4158097731627699, " + "0.00302193321816846], gMswpS-Ele9wHM-Uxp-VxzC:[0.14378032144751685, " + "0.627919779177473, 0.6188731271454715, 0.8088384184584442, 0.8169160298605824, " + "0.9051151670055427, 0.558001941204895, 0.029409463113641787, 0.9532987674717762, " + "0.20833228278241533], TT9P9f-PXjQnvN-RBx-xRiS:[0.8276005878909756, " + "0.470950932860423, 0.2442851528127543, 0.710599416715854, 0.3353731152359334, " + "0.622947602340124, 0.30675353671676797, 0.8190741661938367, 0.633630372770242, " + "0.9436322366112492], gLAnZc-oF7PC9o-ryd-MOXr:[0.9742716809818137, " + "0.9114038616933997, 0.47459239268645104, 0.6054569900795078, 0.5515590901916287, " + "0.8833310208917589, 0.96476090778518, 0.8873874315592357, 0.3577701257062156, " + "0.6993447306713452], zrq6BY-7FJg3hc-Dd1-bAJn:[0.1038405592062176, " + "0.6757819253774818, 0.6386535502499314, 0.23598674876945303, " + "0.11046582465777044, 0.6426056925348297, 0.17289073092250662, " + "0.37116009951425233, 0.594677969672274, 0.49351456402872274], " + "gCKqtW-bLaoxgZ-CuW-M2re:[0.934169137905867, 0.12015121444469123, " + "0.5009923777544698, 0.4689139716802634, 0.7226298925299507, 0.33486164698864984, " + "0.32944768657449996, 0.5051366150918063, 0.03228636228382431, " + "0.48211773870118435], SWqhI2-XnF9jVR-dT1-Yrtt:[0.8005897112110444, " + "0.899180582368993, 0.9232176819588501, 0.8615673086606942, 0.9248122266449379, " + "0.5586489299212893, 0.40494513773898455, 0.4752644689010731, 0.6668395567417462, " + "0.9068738374244337], Z85F6M-cy5K4GP-7I5-5KS9:[0.34761241187833714, " + "0.46467162849990507, 0.009781307454025168, 0.3174295126364216, " + "0.6405423361175397, 0.33838144910731327, 0.328860321648657, " + "0.032638966917555856, 0.32782524002924884, 0.7675689545937956], " + "rlcnbo-tFg1FfP-ra6-D9Z8:[0.7450713997349928, 0.792502852203968, " + "0.9034039182796755, 0.49131654565079996, 0.25223293077647946, " + "0.9827253462450637, 0.1684868582627418, 0.0417161505112974, 0.8498128570850716, " + "0.8948779001812955]}"})}; + for (auto type_pair : nested_field_types) { + auto key_type = std::get<0>(type_pair); + DataTypePtr nested_key_data_type_ptr = + DataTypeFactory::instance().create_data_type(key_type, 0, 0); + auto val_type = std::get<1>(type_pair); + DataTypePtr nested_value_data_type_ptr = + DataTypeFactory::instance().create_data_type(val_type, 0, 0); + DataTypePtr array_data_type_ptr = + std::make_shared(make_nullable(nested_value_data_type_ptr)); + + DataTypePtr map_data_type_ptr = std::make_shared( + make_nullable(nested_key_data_type_ptr), make_nullable(array_data_type_ptr)); + + std::cout << "========= This type is " << map_data_type_ptr->get_name() << std::endl; + + DataTypeSerDeSPtr serde = map_data_type_ptr->get_serde(); + DataTypeSerDeSPtr serde_1 = map_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<2>(type_pair).size(); ++i) { + std::string rand_str = std::get<2>(type_pair)[i]; + std::string expect_str = std::get<3>(type_pair)[i]; + std::string expect_from_string_str = std::get<4>(type_pair)[i]; + std::string expect_str_1 = std::get<5>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + std::cout << "expect_from_str:" << expect_from_string_str << std::endl; + std::cout << "expect_str_can_format_from_string:" << expect_str << std::endl; + { + // serde + auto col = map_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + formatOptions.converted_from_string = false; + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + std::cout << "test : " << st.to_json() << std::endl; + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test : " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + { + // from_string + ReadBuffer rb(rand_str.data(), rand_str.size()); + auto col2 = map_data_type_ptr->create_column(); + Status status = map_data_type_ptr->from_string(rb, col2); + if (expect_from_string_str == "") { + EXPECT_EQ(status.ok(), false); + std::cout << "test from_string: " << status.to_json() << std::endl; + } else { + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col2, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d << std::endl; + EXPECT_EQ(expect_from_string_str, rand_s_d.to_string()); + } + } + { + formatOptions.converted_from_string = true; + std::cout << "======== change " << formatOptions.converted_from_string + << " with rand_str: " << rand_str << std::endl; + auto col3 = map_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = + serde_1->deserialize_one_cell_from_text(*col3, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde_1->serialize_one_cell_to_text(*col3, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str_1, rand_s_d.to_string()); + } + } + } + } + } + + // map-scala-map (map>) + { + // nested type,test string, expect string(option.converted_from_string=false), expect_from_string, expect string(option.converted_from_string=true) + typedef std::tuple, std::vector, + std::vector, std::vector> + FieldType_RandStr; + std::vector nested_field_types = {FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_STRING, FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"{\"5H6iPe-CRvVE5Q-QnG-8WQb\":{},\"stDa6g-GML89aZ-w5u-LBe0\":{\"Vlekcq-LDCMo6f-" + "J7U-6rwB\":0.15375824233866453,\"4ljyNE-JMK1bSp-c05-EajL\":0.36153399717116075}," + "\"URvXyY-SMttaG4-Zol-mPak\":{\"xVaeqR-cj8I6EM-3Nt-queD\":0.003968938824538082," + "\"Vt2mSs-wacYDvl-qUi-B7kI\":0.6900852274982441,\"i3cJJh-oskdqti-KGU-U6gC\":0." + "40773692843073994},\"N3R9TI-jtBPGOQ-uRc-aWAD\":{\"xmGI09-FaCFrrR-O5J-29eu\":0." + "7166939407858642,\"fbxIwJ-HLvW94X-tPn-JgKT\":0.05904881148976504,\"ylE7y1-" + "wI3UhjR-ecQ-bNfo\":0.9293354174058581,\"zA0pEV-Lm8g4wq-NJc-TDou\":0." + "4000067127237942}}"}, + {"{\"5H6iPe-CRvVE5Q-QnG-8WQb\":{}, " + "\"stDa6g-GML89aZ-w5u-LBe0\":{\"Vlekcq-LDCMo6f-J7U-6rwB\":0.15375824233866453, " + "\"4ljyNE-JMK1bSp-c05-EajL\":0.36153399717116075}, " + "\"URvXyY-SMttaG4-Zol-mPak\":{\"xVaeqR-cj8I6EM-3Nt-queD\":0.003968938824538082, " + "\"Vt2mSs-wacYDvl-qUi-B7kI\":0.6900852274982441, " + "\"i3cJJh-oskdqti-KGU-U6gC\":0.40773692843073994}, " + "\"N3R9TI-jtBPGOQ-uRc-aWAD\":{\"xmGI09-FaCFrrR-O5J-29eu\":0.7166939407858642, " + "\"fbxIwJ-HLvW94X-tPn-JgKT\":0.05904881148976504, " + "\"ylE7y1-wI3UhjR-ecQ-bNfo\":0.9293354174058581, " + "\"zA0pEV-Lm8g4wq-NJc-TDou\":0.4000067127237942}}"}, + {""}, + {"{5H6iPe-CRvVE5Q-QnG-8WQb:{}, " + "stDa6g-GML89aZ-w5u-LBe0:{Vlekcq-LDCMo6f-J7U-6rwB:0.15375824233866453, " + "4ljyNE-JMK1bSp-c05-EajL:0.36153399717116075}, " + "URvXyY-SMttaG4-Zol-mPak:{xVaeqR-cj8I6EM-3Nt-queD:0.003968938824538082, " + "Vt2mSs-wacYDvl-qUi-B7kI:0.6900852274982441, " + "i3cJJh-oskdqti-KGU-U6gC:0.40773692843073994}, " + "N3R9TI-jtBPGOQ-uRc-aWAD:{xmGI09-FaCFrrR-O5J-29eu:0.7166939407858642, " + "fbxIwJ-HLvW94X-tPn-JgKT:0.05904881148976504, " + "ylE7y1-wI3UhjR-ecQ-bNfo:0.9293354174058581, " + "zA0pEV-Lm8g4wq-NJc-TDou:0.4000067127237942}}"})}; + for (auto type_pair : nested_field_types) { + auto key_type = std::get<0>(type_pair); + DataTypePtr nested_key_data_type_ptr = + DataTypeFactory::instance().create_data_type(key_type, 0, 0); + auto val_type = std::get<1>(type_pair); + DataTypePtr nested_value_data_type_ptr = + DataTypeFactory::instance().create_data_type(val_type, 0, 0); + + DataTypePtr nested_map_data_type_ptr = + std::make_shared(make_nullable(nested_key_data_type_ptr), + make_nullable(nested_value_data_type_ptr)); + + DataTypePtr array_data_type_ptr = + std::make_shared(make_nullable(std::make_shared()), + make_nullable(nested_map_data_type_ptr)); + + std::cout << " ========= ========= This type is " << array_data_type_ptr->get_name() + << std::endl; + + DataTypeSerDeSPtr serde = array_data_type_ptr->get_serde(); + DataTypeSerDeSPtr serde_1 = array_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<2>(type_pair).size(); ++i) { + std::string rand_str = std::get<2>(type_pair)[i]; + std::string expect_str = std::get<3>(type_pair)[i]; + std::string expect_from_string_str = std::get<4>(type_pair)[i]; + std::string expect_str_1 = std::get<5>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + std::cout << "expect_from_str:" << expect_from_string_str << std::endl; + std::cout << "expect_str_can_format_from_string:" << expect_str << std::endl; + { + // serde + auto col = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + formatOptions.converted_from_string = false; + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test : " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + { + // from_string + ReadBuffer rb(rand_str.data(), rand_str.size()); + auto col2 = array_data_type_ptr->create_column(); + Status status = array_data_type_ptr->from_string(rb, col2); + if (expect_from_string_str == "") { + EXPECT_EQ(status.ok(), false); + std::cout << "test from_string: " << status.to_json() << std::endl; + } else { + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col2, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d << std::endl; + EXPECT_EQ(expect_from_string_str, rand_s_d.to_string()); + } + } + { + formatOptions.converted_from_string = true; + std::cout << "======== change " << formatOptions.converted_from_string + << " with rand_str: " << rand_str << std::endl; + auto col3 = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = + serde_1->deserialize_one_cell_from_text(*col3, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde_1->serialize_one_cell_to_text(*col3, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str_1, rand_s_d.to_string()); + } + } + } + } + } +} + +TEST(TextSerde, test_slice) { + Slice slice("[\"hello\", \"world\"]"); + slice.remove_prefix(1); + slice.remove_suffix(1); + std::vector slices; + slices.emplace_back(slice); + // size_t slice_size = slice.size; + bool has_quote = false; + int nested_level = 0; + + for (int idx = 0; idx < slice.size; ++idx) { + char c = slice[idx]; + std::cout << "c:" << c << " " << fmt::format("{}, {}", c == '[', c == ']') << std::endl; + if (c == '"' || c == '\'') { + has_quote = !has_quote; + } else if (!has_quote && (c == '[' || c == '{')) { + ++nested_level; + } else if (!has_quote && (c == ']' || c == '}')) { + --nested_level; + } else if (!has_quote && nested_level == 0 && c == ',') { + // if meet collection_delimiter and not in quote, we can make it as an item. + slices.back().remove_suffix(slice.size - idx); + // add next total slice.(slice data will not change, so we can use slice directly) + // skip delimiter + std::cout << "back: " << slices.back().to_string() << std::endl; + std::cout << "insert: " << Slice(slice.data + idx + 1, slice.size - idx - 1).to_string() + << std::endl; + Slice next(slice.data + idx + 1, slice.size - idx - 1); + next.trim_prefix(); + slices.emplace_back(next); + } + } + std::cout << "slices size: " << nested_level << std::endl; + for (auto s : slices) { + std::cout << s.to_string() << std::endl; + } +} } // namespace doris::vectorized \ No newline at end of file