diff --git a/be/src/olap/types.h b/be/src/olap/types.h index b01cee2484353e..0701aca675a0d2 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -55,6 +55,8 @@ #include "util/types.h" #include "vec/common/arena.h" #include "vec/core/wide_integer.h" +#include "vec/runtime/ipv4_value.h" +#include "vec/runtime/ipv6_value.h" #include "vec/runtime/vdatetime_value.h" namespace doris { @@ -728,7 +730,7 @@ struct CppTypeTraits { }; template <> struct CppTypeTraits { - using CppType = uint128_t; + using CppType = int128_t; using UnsignedCppType = uint128_t; }; template <> @@ -980,11 +982,8 @@ struct FieldTypeTraits : public BaseFieldTypeTraits { static Status from_string(void* buf, const std::string& scan_key, const int precision, const int scale) { - StringParser::ParseResult result = StringParser::PARSE_SUCCESS; - uint32_t value = StringParser::string_to_unsigned_int(scan_key.c_str(), - scan_key.size(), &result); - - if (result == StringParser::PARSE_FAILURE) { + uint32_t value; + if (!IPv4Value::from_string(value, scan_key)) { return Status::Error( "FieldTypeTraits::from_string meet PARSE_FAILURE"); } @@ -994,10 +993,16 @@ struct FieldTypeTraits static std::string to_string(const void* src) { uint32_t value = *reinterpret_cast(src); - std::stringstream ss; - ss << ((value >> 24) & 0xFF) << '.' << ((value >> 16) & 0xFF) << '.' - << ((value >> 8) & 0xFF) << '.' << (value & 0xFF); - return ss.str(); + IPv4Value ipv4_value(value); + return ipv4_value.to_string(); + } + + static void set_to_max(void* buf) { + *reinterpret_cast(buf) = 0xFFFFFFFF; // 255.255.255.255 + } + + static void set_to_min(void* buf) { + *reinterpret_cast(buf) = 0; // 0.0.0.0 } }; @@ -1006,67 +1011,27 @@ struct FieldTypeTraits : public BaseFieldTypeTraits { static Status from_string(void* buf, const std::string& scan_key, const int precision, const int scale) { - std::istringstream iss(scan_key); - std::string token; - uint128_t result = 0; - int count = 0; - - while (std::getline(iss, token, ':')) { - if (token.empty()) { - count += 8 - count; - break; - } - - if (count > 8) { - return Status::Error( - "FieldTypeTraits::from_string meet PARSE_FAILURE"); - } - - uint16_t value = 0; - std::istringstream ss(token); - if (!(ss >> std::hex >> value)) { - return Status::Error( - "FieldTypeTraits::from_string meet PARSE_FAILURE"); - } - - result = (result << 16) | value; - count++; - } - - if (count < 8) { + int128_t value; + if (!IPv6Value::from_string(value, scan_key)) { return Status::Error( "FieldTypeTraits::from_string meet PARSE_FAILURE"); } - - *reinterpret_cast(buf) = result; + *reinterpret_cast(buf) = value; return Status::OK(); } static std::string to_string(const void* src) { - std::stringstream result; - uint128_t ipv6 = *reinterpret_cast(src); - - for (int i = 0; i < 8; i++) { - uint16_t part = static_cast((ipv6 >> (112 - i * 16)) & 0xFFFF); - result << std::to_string(part); - if (i != 7) { - result << ":"; - } - } - - return result.str(); + int128_t value = *reinterpret_cast(src); + IPv6Value ipv6_value(value); + return ipv6_value.to_string(); } static void set_to_max(void* buf) { - *reinterpret_cast(buf) = - static_cast(999999999999999999ll) * 100000000000000000ll * 1000ll + - static_cast(99999999999999999ll) * 1000ll + 999ll; + *reinterpret_cast(buf) = -1; // ::1 } static void set_to_min(void* buf) { - *reinterpret_cast(buf) = - -(static_cast(999999999999999999ll) * 100000000000000000ll * 1000ll + - static_cast(99999999999999999ll) * 1000ll + 999ll); + *reinterpret_cast(buf) = 0; // :: } }; diff --git a/be/src/util/arrow/row_batch.cpp b/be/src/util/arrow/row_batch.cpp index 6662f2e0ba7aee..ea58a08bc84465 100644 --- a/be/src/util/arrow/row_batch.cpp +++ b/be/src/util/arrow/row_batch.cpp @@ -89,6 +89,12 @@ Status convert_to_arrow_type(const TypeDescriptor& type, std::shared_ptr(type.precision, type.scale); break; + case TYPE_IPV4: + *result = arrow::uint32(); + break; + case TYPE_IPV6: + *result = arrow::utf8(); + break; case TYPE_BOOLEAN: *result = arrow::boolean(); break; diff --git a/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp b/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp index 228c8bed16cf78..dc9559ec1d36ed 100644 --- a/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp @@ -20,6 +20,7 @@ #include #include "vec/columns/column_const.h" +#include "vec/io/io_helper.h" namespace doris { namespace vectorized { @@ -61,5 +62,31 @@ Status DataTypeIPv4SerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } +Status DataTypeIPv4SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + IPv4 data = assert_cast(*ptr).get_element(row_num); + IPv4Value ipv4_value(data); + std::string ipv4_str = ipv4_value.to_string(); + bw.write(ipv4_str.c_str(), ipv4_str.length()); + return Status::OK(); +} + +Status DataTypeIPv4SerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& column_data = reinterpret_cast(column); + ReadBuffer rb(slice.data, slice.size); + IPv4 val = 0; + if (!read_ipv4_text_impl(val, rb)) { + return Status::InvalidArgument("parse ipv4 fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } + column_data.insert_value(val); + return Status::OK(); +} + } // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_ipv4_serde.h b/be/src/vec/data_types/serde/data_type_ipv4_serde.h index ce0dd476b93028..654b7d9532cb4e 100644 --- a/be/src/vec/data_types/serde/data_type_ipv4_serde.h +++ b/be/src/vec/data_types/serde/data_type_ipv4_serde.h @@ -46,6 +46,10 @@ class DataTypeIPv4SerDe : public DataTypeNumberSerDe { int row_idx, bool col_const) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, + const FormatOptions& options) const override; private: template diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp index b95c7edfd242ac..7c9d0f4258280f 100644 --- a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp @@ -20,6 +20,7 @@ #include #include "vec/columns/column_const.h" +#include "vec/io/io_helper.h" namespace doris { namespace vectorized { @@ -61,5 +62,31 @@ Status DataTypeIPv6SerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } +Status DataTypeIPv6SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + IPv6 data = assert_cast(*ptr).get_element(row_num); + IPv6Value ipv6_value(data); + std::string ipv6_str = ipv6_value.to_string(); + bw.write(ipv6_str.c_str(), ipv6_str.length()); + return Status::OK(); +} + +Status DataTypeIPv6SerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& column_data = reinterpret_cast(column); + ReadBuffer rb(slice.data, slice.size); + IPv6 val = 0; + if (!read_ipv6_text_impl(val, rb)) { + return Status::InvalidArgument("parse ipv6 fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } + column_data.insert_value(val); + return Status::OK(); +} + } // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.h b/be/src/vec/data_types/serde/data_type_ipv6_serde.h index a5b7add05449b5..e48039281c1dde 100644 --- a/be/src/vec/data_types/serde/data_type_ipv6_serde.h +++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.h @@ -49,6 +49,10 @@ class DataTypeIPv6SerDe : public DataTypeNumberSerDe { int row_idx, bool col_const) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, + const FormatOptions& options) const override; private: template diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp index a4921c56316d4a..bd7072ee79a1fc 100644 --- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp @@ -70,6 +70,8 @@ #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_hll.h" +#include "vec/data_types/data_type_ipv4.h" +#include "vec/data_types/data_type_ipv6.h" #include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" @@ -98,6 +100,8 @@ void serialize_and_deserialize_arrow_test() { {"k5", FieldType::OLAP_FIELD_TYPE_DECIMAL32, 5, TYPE_DECIMAL32, false}, {"k6", FieldType::OLAP_FIELD_TYPE_DECIMAL64, 6, TYPE_DECIMAL64, false}, {"k12", FieldType::OLAP_FIELD_TYPE_DATETIMEV2, 12, TYPE_DATETIMEV2, false}, + {"k8", FieldType::OLAP_FIELD_TYPE_IPV4, 8, TYPE_IPV4, false}, + {"k9", FieldType::OLAP_FIELD_TYPE_IPV6, 9, TYPE_IPV6, false}, }; } else { cols = {{"a", FieldType::OLAP_FIELD_TYPE_ARRAY, 6, TYPE_ARRAY, true}, @@ -445,6 +449,34 @@ void serialize_and_deserialize_arrow_test() { block.insert(type_and_name); } break; + case TYPE_IPV4: + tslot.__set_slotType(type_desc.to_thrift()); + { + auto vec = vectorized::ColumnIPv4::create(); + auto& data = vec->get_data(); + for (int i = 0; i < row_num; ++i) { + data.push_back(i); + } + vectorized::DataTypePtr data_type(std::make_shared()); + vectorized::ColumnWithTypeAndName type_and_name(vec->get_ptr(), data_type, + col_name); + block.insert(std::move(type_and_name)); + } + break; + case TYPE_IPV6: + tslot.__set_slotType(type_desc.to_thrift()); + { + auto vec = vectorized::ColumnIPv6::create(); + auto& data = vec->get_data(); + for (int i = 0; i < row_num; ++i) { + data.push_back(i); + } + vectorized::DataTypePtr data_type(std::make_shared()); + vectorized::ColumnWithTypeAndName type_and_name(vec->get_ptr(), data_type, + col_name); + block.insert(std::move(type_and_name)); + } + break; default: break; } diff --git a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp index 2f337bbed7eeaf..315ecded4900ac 100644 --- a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp @@ -237,15 +237,83 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) { StringRef max_s_d = ser_col->get_data_at(1); StringRef rand_s_d = ser_col->get_data_at(2); - std::cout << "min(" << min_s << ") with datat_ype_str:" << min_s_d << std::endl; - std::cout << "max(" << max_s << ") with datat_ype_str:" << max_s_d << std::endl; - std::cout << "rand(" << rand_date << ") with datat_type_str:" << rand_s_d << std::endl; + std::cout << "min(" << min_s << ") with data_type_str:" << min_s_d << std::endl; + std::cout << "max(" << max_s << ") with data_type_str:" << max_s_d << std::endl; + std::cout << "rand(" << rand_date << ") with data_type_str:" << rand_s_d << std::endl; EXPECT_EQ(min_s, min_s_d.to_string()); EXPECT_EQ(max_s, max_s_d.to_string()); EXPECT_EQ(rand_date, rand_s_d.to_string()); } } + // ipv4 and ipv6 type + { + typedef std::pair FieldType_RandStr; + std::vector date_scala_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, "127.0.0.1"), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6, "2405:9800:9800:66::2")}; + for (auto pair : date_scala_field_types) { + auto type = pair.first; + DataTypePtr data_type_ptr = DataTypeFactory::instance().create_data_type(type, 0, 0); + std::cout << "========= This type is " << data_type_ptr->get_name() << ": " + << fmt::format("{}", type) << std::endl; + + std::unique_ptr min_wf(WrapperField::create_by_type(type)); + std::unique_ptr max_wf(WrapperField::create_by_type(type)); + std::unique_ptr rand_wf(WrapperField::create_by_type(type)); + + min_wf->set_to_min(); + max_wf->set_to_max(); + EXPECT_EQ(rand_wf->from_string(pair.second, 0, 0).ok(), true); + + string min_s = min_wf->to_string(); + string max_s = max_wf->to_string(); + string rand_ip = rand_wf->to_string(); + + Slice min_rb(min_s.data(), min_s.size()); + Slice max_rb(max_s.data(), max_s.size()); + Slice rand_rb(rand_ip.data(), rand_ip.size()); + + auto col = data_type_ptr->create_column(); + DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); + // make use c++ lib equals to wrapper field from_string behavior + DataTypeSerDe::FormatOptions formatOptions; + + Status st = serde->deserialize_one_cell_from_json(*col, min_rb, formatOptions); + EXPECT_EQ(st.ok(), true); + st = serde->deserialize_one_cell_from_json(*col, max_rb, formatOptions); + EXPECT_EQ(st.ok(), true); + st = serde->deserialize_one_cell_from_json(*col, rand_rb, formatOptions); + EXPECT_EQ(st.ok(), true); + + auto ser_col = ColumnString::create(); + ser_col->reserve(3); + VectorBufferWriter buffer_writer(*ser_col.get()); + st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); + buffer_writer.commit(); + st = serde->serialize_one_cell_to_json(*col, 1, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); + buffer_writer.commit(); + st = serde->serialize_one_cell_to_json(*col, 2, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); + buffer_writer.commit(); + rtrim(min_s); + rtrim(max_s); + rtrim(rand_ip); + StringRef min_s_d = ser_col->get_data_at(0); + StringRef max_s_d = ser_col->get_data_at(1); + StringRef rand_s_d = ser_col->get_data_at(2); + + std::cout << "min(" << min_s << ") with data_type_str:" << min_s_d << std::endl; + std::cout << "max(" << max_s << ") with data_type_str:" << max_s_d << std::endl; + std::cout << "rand(" << rand_ip << ") with data_type_str:" << rand_s_d << std::endl; + EXPECT_EQ(min_s, min_s_d.to_string()); + EXPECT_EQ(max_s, max_s_d.to_string()); + EXPECT_EQ(rand_ip, rand_s_d.to_string()); + } + } + // nullable data type with const column { DataTypePtr data_type_ptr = DataTypeFactory::instance().create_data_type( diff --git a/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp b/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp index 5ef9b3c52fbf57..5fff0d75ccfedc 100644 --- a/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp @@ -54,6 +54,8 @@ #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_hll.h" +#include "vec/data_types/data_type_ipv4.h" +#include "vec/data_types/data_type_ipv6.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_quantilestate.h" @@ -61,6 +63,8 @@ #include "vec/data_types/data_type_time_v2.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vslot_ref.h" +#include "vec/runtime/ipv4_value.h" +#include "vec/runtime/ipv6_value.h" #include "vec/runtime/vdatetime_value.h" #include "vec/sink/vmysql_result_writer.cpp" #include "vec/sink/vmysql_result_writer.h" @@ -76,7 +80,9 @@ void serialize_and_deserialize_mysql_test() { {"k2", FieldType::OLAP_FIELD_TYPE_STRING, 2, TYPE_STRING, false}, {"k3", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 3, TYPE_DECIMAL128I, false}, {"k11", FieldType::OLAP_FIELD_TYPE_DATETIME, 11, TYPE_DATETIME, false}, - {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}}; + {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}, + {"k5", FieldType::OLAP_FIELD_TYPE_IPV4, 5, TYPE_IPV4, false}, + {"k6", FieldType::OLAP_FIELD_TYPE_IPV6, 6, TYPE_IPV6, false}}; int row_num = 7; // make desc and generate block vectorized::VExprContextSPtrs _output_vexpr_ctxs; @@ -242,6 +248,40 @@ void serialize_and_deserialize_mysql_test() { block.insert(test_datetime); } break; + case TYPE_IPV4: + tslot.__set_slotType(type_desc.to_thrift()); + { + auto column_vector_ipv4 = vectorized::ColumnVector::create(); + auto& ipv4_data = column_vector_ipv4->get_data(); + for (int i = 0; i < row_num; ++i) { + IPv4Value ipv4_value; + bool res = ipv4_value.from_string("192.168.0." + std::to_string(i)); + ASSERT_TRUE(res); + ipv4_data.push_back(ipv4_value.value()); + } + vectorized::DataTypePtr ipv4_type(std::make_shared()); + vectorized::ColumnWithTypeAndName test_ipv4(column_vector_ipv4->get_ptr(), + ipv4_type, col_name); + block.insert(test_ipv4); + } + break; + case TYPE_IPV6: + tslot.__set_slotType(type_desc.to_thrift()); + { + auto column_vector_ipv6 = vectorized::ColumnVector::create(); + auto& ipv6_data = column_vector_ipv6->get_data(); + for (int i = 0; i < row_num; ++i) { + IPv6Value ipv6_value; + bool res = ipv6_value.from_string("2001:2000:3080:1351::" + std::to_string(i)); + ASSERT_TRUE(res); + ipv6_data.push_back(ipv6_value.value()); + } + vectorized::DataTypePtr ipv6_type(std::make_shared()); + vectorized::ColumnWithTypeAndName test_ipv6(column_vector_ipv6->get_ptr(), + ipv6_type, col_name); + block.insert(test_ipv6); + } + break; default: break; } diff --git a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp index 356b984041e9c6..78cd4649998817 100644 --- a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp @@ -43,6 +43,8 @@ #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_hll.h" +#include "vec/data_types/data_type_ipv4.h" +#include "vec/data_types/data_type_ipv6.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_quantilestate.h" @@ -184,6 +186,7 @@ inline void serialize_and_deserialize_pb_test() { check_pb_col(nullable_data_type, *nullable_column.get()); } // int with 1024 batch size + std::cout << "==== int with 1024 batch size === " << std::endl; { auto vec = vectorized::ColumnVector::create(); auto& data = vec->get_data(); @@ -198,6 +201,28 @@ inline void serialize_and_deserialize_pb_test() { ->insert_range_from_not_nullable(*vec, 0, 1024); check_pb_col(nullable_data_type, *nullable_column.get()); } + // ipv4 + std::cout << "==== ipv4 === " << std::endl; + { + auto vec = vectorized::ColumnVector::create(); + auto& data = vec->get_data(); + for (int i = 0; i < 1024; ++i) { + data.push_back(i); + } + vectorized::DataTypePtr data_type(std::make_shared()); + check_pb_col(data_type, *vec.get()); + } + // ipv6 + std::cout << "==== ipv6 === " << std::endl; + { + auto vec = vectorized::ColumnVector::create(); + auto& data = vec->get_data(); + for (int i = 0; i < 1024; ++i) { + data.push_back(i); + } + vectorized::DataTypePtr data_type(std::make_shared()); + check_pb_col(data_type, *vec.get()); + } } TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTest) { diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp b/be/test/vec/data_types/serde/data_type_serde_test.cpp index c6fc2d42f48fb0..ea1626790a3235 100644 --- a/be/test/vec/data_types/serde/data_type_serde_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp @@ -45,6 +45,8 @@ #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_hll.h" +#include "vec/data_types/data_type_ipv4.h" +#include "vec/data_types/data_type_ipv6.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_quantilestate.h" @@ -192,6 +194,26 @@ inline void serialize_and_deserialize_pb_test() { ->insert_range_from_not_nullable(*vec, 0, 1024); check_pb_col(nullable_data_type, *nullable_column.get()); } + // ipv4 + { + auto vec = vectorized::ColumnVector::create(); + auto& data = vec->get_data(); + for (int i = 0; i < 1024; ++i) { + data.push_back(i); + } + vectorized::DataTypePtr data_type(std::make_shared()); + check_pb_col(data_type, *vec.get()); + } + // ipv6 + { + auto vec = vectorized::ColumnVector::create(); + auto& data = vec->get_data(); + for (int i = 0; i < 1024; ++i) { + data.push_back(i); + } + vectorized::DataTypePtr data_type(std::make_shared()); + check_pb_col(data_type, *vec.get()); + } } TEST(DataTypeSerDeTest, DataTypeScalaSerDeTest) { diff --git a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp index 0dbe03db2f6b63..d94c36828bdb66 100644 --- a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp @@ -238,15 +238,83 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { StringRef max_s_d = ser_col->get_data_at(1); StringRef rand_s_d = ser_col->get_data_at(2); - std::cout << "min(" << min_s << ") with datat_ype_str:" << min_s_d << std::endl; - std::cout << "max(" << max_s << ") with datat_ype_str:" << max_s_d << std::endl; - std::cout << "rand(" << rand_date << ") with datat_type_str:" << rand_s_d << std::endl; + std::cout << "min(" << min_s << ") with data_type_str:" << min_s_d << std::endl; + std::cout << "max(" << max_s << ") with data_type_str:" << max_s_d << std::endl; + std::cout << "rand(" << rand_date << ") with data_type_str:" << rand_s_d << std::endl; EXPECT_EQ(min_s, min_s_d.to_string()); EXPECT_EQ(max_s, max_s_d.to_string()); EXPECT_EQ(rand_date, rand_s_d.to_string()); } } + // ipv4 and ipv6 + { + typedef std::pair FieldType_RandStr; + std::vector ip_scala_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, "127.0.0.1"), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6, "2405:9800:9800:66::2")}; + for (auto pair : ip_scala_field_types) { + auto type = pair.first; + DataTypePtr data_type_ptr = DataTypeFactory::instance().create_data_type(type, 0, 0); + std::cout << "========= This type is " << data_type_ptr->get_name() << ": " + << fmt::format("{}", type) << std::endl; + + std::unique_ptr min_wf(WrapperField::create_by_type(type)); + std::unique_ptr max_wf(WrapperField::create_by_type(type)); + std::unique_ptr rand_wf(WrapperField::create_by_type(type)); + + min_wf->set_to_min(); + max_wf->set_to_max(); + static_cast(rand_wf->from_string(pair.second, 0, 0)); + + string min_s = min_wf->to_string(); + string max_s = max_wf->to_string(); + string rand_ip = rand_wf->to_string(); + + Slice min_rb(min_s.data(), min_s.size()); + Slice max_rb(max_s.data(), max_s.size()); + Slice rand_rb(rand_ip.data(), rand_ip.size()); + + auto col = data_type_ptr->create_column(); + DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); + // make use c++ lib equals to wrapper field from_string behavior + DataTypeSerDe::FormatOptions formatOptions; + + Status st = serde->deserialize_one_cell_from_json(*col, min_rb, formatOptions); + EXPECT_EQ(st.ok(), true); + st = serde->deserialize_one_cell_from_json(*col, max_rb, formatOptions); + EXPECT_EQ(st.ok(), true); + st = serde->deserialize_one_cell_from_json(*col, rand_rb, formatOptions); + EXPECT_EQ(st.ok(), true); + + auto ser_col = ColumnString::create(); + ser_col->reserve(3); + VectorBufferWriter buffer_writer(*ser_col.get()); + st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); + buffer_writer.commit(); + st = serde->serialize_one_cell_to_json(*col, 1, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); + buffer_writer.commit(); + st = serde->serialize_one_cell_to_json(*col, 2, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); + buffer_writer.commit(); + rtrim(min_s); + rtrim(max_s); + rtrim(rand_ip); + StringRef min_s_d = ser_col->get_data_at(0); + StringRef max_s_d = ser_col->get_data_at(1); + StringRef rand_s_d = ser_col->get_data_at(2); + + std::cout << "min(" << min_s << ") with data_type_str:" << min_s_d << std::endl; + std::cout << "max(" << max_s << ") with data_type_str:" << max_s_d << std::endl; + std::cout << "rand(" << rand_ip << ") with data_type_str:" << rand_s_d << std::endl; + EXPECT_EQ(min_s, min_s_d.to_string()); + EXPECT_EQ(max_s, max_s_d.to_string()); + EXPECT_EQ(rand_ip, rand_s_d.to_string()); + } + } + // nullable data type with const column { DataTypePtr data_type_ptr = DataTypeFactory::instance().create_data_type(