From 81266c7bbef1963cf681667da020b7c4e67b682e Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Thu, 29 Aug 2024 19:37:02 +0800 Subject: [PATCH] [feature](variant) support sub schema for variant type --- be/src/olap/rowset/segment_v2/segment.cpp | 5 +- .../segment_v2/vertical_segment_writer.cpp | 32 +- be/src/olap/tablet_reader.cpp | 2 + be/src/olap/tablet_schema.cpp | 8 +- be/src/olap/tablet_schema.h | 8 + be/src/runtime/types.cpp | 16 + be/src/tools/meta_tool.cpp | 755 +++++++++--------- be/src/vec/columns/column_object.cpp | 61 +- be/src/vec/columns/column_object.h | 10 +- be/src/vec/columns/subcolumn_tree.h | 1 + be/src/vec/common/schema_util.cpp | 69 +- be/src/vec/core/field.h | 34 +- .../vec/data_types/convert_field_to_type.cpp | 12 +- be/src/vec/data_types/data_type.h | 9 + be/src/vec/data_types/data_type_array.cpp | 24 + be/src/vec/data_types/data_type_array.h | 2 + be/src/vec/data_types/data_type_bitmap.h | 5 + be/src/vec/data_types/data_type_date_time.h | 1 + be/src/vec/data_types/data_type_decimal.h | 7 + be/src/vec/data_types/data_type_factory.cpp | 28 +- be/src/vec/data_types/data_type_factory.hpp | 6 +- be/src/vec/data_types/data_type_jsonb.h | 9 + be/src/vec/data_types/data_type_nullable.h | 8 + be/src/vec/data_types/data_type_number_base.h | 8 + be/src/vec/data_types/data_type_string.h | 8 + be/src/vec/data_types/data_type_time.h | 7 + be/src/vec/data_types/data_type_time_v2.h | 11 + be/src/vec/data_types/get_least_supertype.cpp | 4 + .../serde/data_type_array_serde.cpp | 38 +- .../data_types/serde/data_type_array_serde.h | 5 +- .../data_types/serde/data_type_date64_serde.h | 7 + .../serde/data_type_datetimev2_serde.h | 7 + .../data_types/serde/data_type_datev2_serde.h | 7 + .../serde/data_type_decimal_serde.h | 7 + .../data_types/serde/data_type_ipv4_serde.h | 7 + .../data_types/serde/data_type_ipv6_serde.h | 7 + .../serde/data_type_jsonb_serde.cpp | 3 +- .../data_types/serde/data_type_jsonb_serde.h | 2 +- .../serde/data_type_nothing_serde.h | 8 + .../serde/data_type_nullable_serde.cpp | 12 +- .../serde/data_type_nullable_serde.h | 7 +- .../data_types/serde/data_type_number_serde.h | 6 +- .../serde/data_type_object_serde.cpp | 33 + .../data_types/serde/data_type_object_serde.h | 5 + .../vec/data_types/serde/data_type_serde.cpp | 86 +- be/src/vec/data_types/serde/data_type_serde.h | 11 +- .../data_types/serde/data_type_string_serde.h | 2 +- .../data_types/serde/data_type_time_serde.h | 7 + be/src/vec/functions/function_cast.h | 35 +- .../serde/data_type_serde_mysql_test.cpp | 2 +- .../org/apache/doris/catalog/ScalarType.java | 6 +- .../org/apache/doris/catalog/StructField.java | 8 +- .../java/org/apache/doris/catalog/Type.java | 29 + .../org/apache/doris/catalog/VariantType.java | 79 +- .../org/apache/doris/nereids/DorisParser.g4 | 8 + fe/fe-core/src/main/cup/sql_parser.cup | 22 + .../doris/alter/SchemaChangeHandler.java | 5 +- .../org/apache/doris/analysis/CastExpr.java | 3 + .../java/org/apache/doris/catalog/Column.java | 50 +- .../nereids/parser/LogicalPlanBuilder.java | 33 +- .../plans/commands/info/ColumnDefinition.java | 13 + .../doris/nereids/types/StructField.java | 19 +- .../doris/nereids/types/VariantType.java | 54 +- regression-test/conf/regression-conf.groovy | 7 - regression-test/data/variant_p0/nested.out | 18 +- .../data/variant_p0/predefine/load.out | 151 ++++ .../data/variant_p0/predefine/sql/q01.out | 34 + .../data/variant_p0/predefine/sql/q02.out | 103 +++ .../variant_github_events_p2/load.groovy | 7 + regression-test/suites/variant_p0/load.groovy | 8 +- .../suites/variant_p0/nested.groovy | 1 - .../suites/variant_p0/predefine/load.groovy | 291 +++++++ .../suites/variant_p0/predefine/sql/q01.sql | 2 + .../suites/variant_p0/predefine/sql/q02.sql | 24 + 74 files changed, 1811 insertions(+), 618 deletions(-) create mode 100644 regression-test/data/variant_p0/predefine/load.out create mode 100644 regression-test/data/variant_p0/predefine/sql/q01.out create mode 100644 regression-test/data/variant_p0/predefine/sql/q02.out create mode 100644 regression-test/suites/variant_p0/predefine/load.groovy create mode 100644 regression-test/suites/variant_p0/predefine/sql/q01.sql create mode 100644 regression-test/suites/variant_p0/predefine/sql/q02.sql diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 513c0be4f8cd14..6d63d08db244f3 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -1143,10 +1143,7 @@ Status Segment::seek_and_read_by_rowid(const TabletSchema& schema, SlotDescripto } RETURN_IF_ERROR( iterator_hint->read_by_rowids(single_row_loc.data(), 1, file_storage_column)); - // iterator_hint.reset(nullptr); - // Get it's inner field, for JSONB case - vectorized::Field field = remove_nullable(storage_type)->get_default(); - file_storage_column->get(0, field); + vectorized::Field field = storage_type->get_type_field(*file_storage_column, 0); result->insert(field); } else { int index = (slot->col_unique_id() >= 0) ? schema.field_index(slot->col_unique_id()) diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 0846b0fc1186a8..9be60dda587ed1 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -17,6 +17,7 @@ #include "olap/rowset/segment_v2/vertical_segment_writer.h" +#include #include #include @@ -43,7 +44,8 @@ #include "olap/olap_common.h" #include "olap/partial_update_info.h" #include "olap/primary_key_index.h" -#include "olap/row_cursor.h" // RowCursor // IWYU pragma: keep +#include "olap/row_cursor.h" // RowCursor // IWYU pragma: keep +#include "olap/rowset/rowset_fwd.h" #include "olap/rowset/rowset_writer_context.h" // RowsetWriterContext #include "olap/rowset/segment_creator.h" #include "olap/rowset/segment_v2/column_writer.h" // ColumnWriter @@ -70,7 +72,10 @@ #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" #include "vec/io/reader_buffer.h" +#include "vec/json/path_in_data.h" #include "vec/jsonb/serialize.h" #include "vec/olap/olap_data_convertor.h" @@ -1040,6 +1045,10 @@ Status VerticalSegmentWriter::_append_block_with_variant_subcolumns(RowsInBlock& remove_nullable(column_ref)->assume_mutable_ref()); const TabletColumnPtr& parent_column = _tablet_schema->columns()[i]; + std::map typed_columns; + for (const auto& col : parent_column->get_sub_columns()) { + typed_columns[col->name()] = col; + } // generate column info by entry info auto generate_column_info = [&](const auto& entry) { const std::string& column_name = @@ -1050,6 +1059,13 @@ Status VerticalSegmentWriter::_append_block_with_variant_subcolumns(RowsInBlock& auto full_path = full_path_builder.append(parent_column->name_lower_case(), false) .append(entry->path.get_parts(), false) .build(); + // typed column takes no effect no nested column + if (typed_columns.contains(entry->path.get_path()) && !entry->path.has_nested_part()) { + TabletColumn typed_column = *typed_columns[entry->path.get_path()]; + typed_column.set_path_info(full_path); + typed_column.set_parent_unique_id(parent_column->unique_id()); + return typed_column; + } return vectorized::schema_util::get_column_by_type( final_data_type_from_object, column_name, vectorized::schema_util::ExtraInfo { @@ -1069,14 +1085,22 @@ Status VerticalSegmentWriter::_append_block_with_variant_subcolumns(RowsInBlock& CHECK(entry->data.is_finalized()); int current_column_id = column_id++; TabletColumn tablet_column = generate_column_info(entry); + vectorized::DataTypePtr storage_type = + vectorized::DataTypeFactory::instance().create_data_type(tablet_column); + vectorized::DataTypePtr finalized_type = entry->data.get_least_common_type(); + vectorized::ColumnPtr current_column = + entry->data.get_finalized_column_ptr()->get_ptr(); + if (!storage_type->equals(*finalized_type)) { + RETURN_IF_ERROR(vectorized::schema_util::cast_column( + {current_column, finalized_type, ""}, storage_type, ¤t_column)); + } vectorized::schema_util::inherit_column_attributes(*parent_column, tablet_column, _flush_schema); RETURN_IF_ERROR(_create_column_writer(current_column_id /*unused*/, tablet_column, _flush_schema)); RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_column( - {entry->data.get_finalized_column_ptr()->get_ptr(), - entry->data.get_least_common_type(), tablet_column.name()}, - data.row_pos, data.num_rows, current_column_id)); + {current_column->get_ptr(), storage_type, tablet_column.name()}, data.row_pos, + data.num_rows, current_column_id)); // convert column data from engine format to storage layer format auto [status, column] = _olap_data_convertor->convert_column_data(current_column_id); if (!status.ok()) { diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp index a83e0bfdbf4c30..1fc3e81f389450 100644 --- a/be/src/olap/tablet_reader.cpp +++ b/be/src/olap/tablet_reader.cpp @@ -277,6 +277,8 @@ TabletColumn TabletReader::materialize_column(const TabletColumn& orig) { cast_type.type); } column_with_cast_type.set_type(filed_type); + column_with_cast_type.set_precision_frac(cast_type.precision, cast_type.scale); + column_with_cast_type.set_is_decimal(cast_type.precision > 0); return column_with_cast_type; } diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 3ec5d22166477f..168da2eefbeb30 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -117,6 +117,10 @@ FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) { return FieldType::OLAP_FIELD_TYPE_JSONB; case PrimitiveType::TYPE_VARIANT: return FieldType::OLAP_FIELD_TYPE_VARIANT; + case PrimitiveType::TYPE_IPV4: + return FieldType::OLAP_FIELD_TYPE_IPV4; + case PrimitiveType::TYPE_IPV6: + return FieldType::OLAP_FIELD_TYPE_IPV6; case PrimitiveType::TYPE_LAMBDA_FUNCTION: return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented case PrimitiveType::TYPE_AGG_STATE: @@ -604,8 +608,10 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const { if (_has_default_value) { column->set_default_value(_default_value); } - if (_is_decimal) { + if (_precision >= 0) { column->set_precision(_precision); + } + if (_frac >= 0) { column->set_frac(_frac); } column->set_length(_length); diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index c813d6f0ef8722..a5868173a7ac45 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -182,6 +183,13 @@ class TabletColumn : public MetadataAdder { const std::vector& sparse_columns() const; size_t num_sparse_columns() const { return _num_sparse_columns; } + void set_precision_frac(int32_t precision, int32_t frac) { + _precision = precision; + _frac = frac; + } + + void set_is_decimal(bool is_decimal) { _is_decimal = is_decimal; } + Status check_valid() const { if (type() != FieldType::OLAP_FIELD_TYPE_ARRAY && type() != FieldType::OLAP_FIELD_TYPE_STRUCT && diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index 7b7154fb38a438..4fc4864765438f 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -28,6 +28,7 @@ #include #include "olap/olap_define.h" +#include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" namespace doris { @@ -113,6 +114,21 @@ TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) contains_nulls.push_back(node.contains_nulls[1]); break; } + case TTypeNodeType::VARIANT: { + // complex variant type + DCHECK(!node.__isset.scalar_type); + DCHECK_LT(*idx, types.size() - 1); + DCHECK(!node.__isset.contains_nulls); + type = TYPE_VARIANT; + contains_nulls.reserve(node.struct_fields.size()); + for (size_t i = 0; i < node.struct_fields.size(); i++) { + ++(*idx); + children.push_back(TypeDescriptor(types, idx)); + field_names.push_back(node.struct_fields[i].name); + contains_nulls.push_back(node.struct_fields[i].contains_null); + } + break; + } default: DCHECK(false) << node.type; } diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 076b4fae18b025..0072d0be4bed16 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -1,377 +1,378 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "gutil/strings/numbers.h" -#include "gutil/strings/split.h" -#include "gutil/strings/substitute.h" -#include "io/fs/file_reader.h" -#include "io/fs/local_file_system.h" -#include "json2pb/pb_to_json.h" -#include "olap/data_dir.h" -#include "olap/olap_define.h" -#include "olap/options.h" -#include "olap/rowset/segment_v2/binary_plain_page.h" -#include "olap/rowset/segment_v2/column_reader.h" -#include "olap/storage_engine.h" -#include "olap/tablet_meta.h" -#include "olap/tablet_meta_manager.h" -#include "olap/utils.h" -#include "util/coding.h" -#include "util/crc32c.h" - -using std::filesystem::path; -using doris::DataDir; -using doris::StorageEngine; -using doris::OlapMeta; -using doris::Status; -using doris::TabletMeta; -using doris::TabletMetaManager; -using doris::Slice; -using strings::Substitute; -using doris::segment_v2::SegmentFooterPB; -using doris::segment_v2::ColumnReader; -using doris::segment_v2::PageHandle; -using doris::segment_v2::PagePointer; -using doris::segment_v2::ColumnReaderOptions; -using doris::segment_v2::ColumnIteratorOptions; -using doris::segment_v2::PageFooterPB; -using doris::io::FileReaderSPtr; - -const std::string HEADER_PREFIX = "tabletmeta_"; - -DEFINE_string(root_path, "", "storage root path"); -DEFINE_string(operation, "get_meta", - "valid operation: get_meta, flag, load_meta, delete_meta, show_meta"); -DEFINE_int64(tablet_id, 0, "tablet_id for tablet meta"); -DEFINE_int32(schema_hash, 0, "schema_hash for tablet meta"); -DEFINE_string(json_meta_path, "", "absolute json meta file path"); -DEFINE_string(pb_meta_path, "", "pb meta file path"); -DEFINE_string(tablet_file, "", "file to save a set of tablets"); -DEFINE_string(file, "", "segment file path"); - -std::string get_usage(const std::string& progname) { - std::stringstream ss; - ss << progname << " is the Doris BE Meta tool.\n"; - ss << "Stop BE first before use this tool.\n"; - ss << "Usage:\n"; - ss << "./meta_tool --operation=get_meta --root_path=/path/to/storage/path " - "--tablet_id=tabletid --schema_hash=schemahash\n"; - ss << "./meta_tool --operation=load_meta --root_path=/path/to/storage/path " - "--json_meta_path=path\n"; - ss << "./meta_tool --operation=delete_meta " - "--root_path=/path/to/storage/path --tablet_id=tabletid " - "--schema_hash=schemahash\n"; - ss << "./meta_tool --operation=delete_meta --tablet_file=file_path\n"; - ss << "./meta_tool --operation=show_meta --pb_meta_path=path\n"; - ss << "./meta_tool --operation=show_segment_footer --file=/path/to/segment/file\n"; - return ss.str(); -} - -void show_meta() { - TabletMeta tablet_meta; - Status s = tablet_meta.create_from_file(FLAGS_pb_meta_path); - if (!s.ok()) { - std::cout << "load pb meta file:" << FLAGS_pb_meta_path << " failed" - << ", status:" << s << std::endl; - return; - } - std::string json_meta; - json2pb::Pb2JsonOptions json_options; - json_options.pretty_json = true; - doris::TabletMetaPB tablet_meta_pb; - tablet_meta.to_meta_pb(&tablet_meta_pb); - json2pb::ProtoMessageToJson(tablet_meta_pb, &json_meta, json_options); - std::cout << json_meta << std::endl; -} - -void get_meta(DataDir* data_dir) { - std::string value; - Status s = - TabletMetaManager::get_json_meta(data_dir, FLAGS_tablet_id, FLAGS_schema_hash, &value); - if (s.is()) { - std::cout << "no tablet meta for tablet_id:" << FLAGS_tablet_id - << ", schema_hash:" << FLAGS_schema_hash << std::endl; - return; - } - std::cout << value << std::endl; -} - -void load_meta(DataDir* data_dir) { - // load json tablet meta into meta - Status s = TabletMetaManager::load_json_meta(data_dir, FLAGS_json_meta_path); - if (!s.ok()) { - std::cout << "load meta failed, status:" << s << std::endl; - return; - } - std::cout << "load meta successfully" << std::endl; -} - -void delete_meta(DataDir* data_dir) { - Status s = TabletMetaManager::remove(data_dir, FLAGS_tablet_id, FLAGS_schema_hash); - if (!s.ok()) { - std::cout << "delete tablet meta failed for tablet_id:" << FLAGS_tablet_id - << ", schema_hash:" << FLAGS_schema_hash << ", status:" << s << std::endl; - return; - } - std::cout << "delete meta successfully" << std::endl; -} - -Status init_data_dir(StorageEngine& engine, const std::string& dir, std::unique_ptr* ret) { - std::string root_path; - RETURN_IF_ERROR(doris::io::global_local_filesystem()->canonicalize(dir, &root_path)); - doris::StorePath path; - auto res = parse_root_path(root_path, &path); - if (!res.ok()) { - std::cout << "parse root path failed:" << root_path << std::endl; - return Status::InternalError("parse root path failed"); - } - - auto p = std::make_unique(engine, path.path, path.capacity_bytes, path.storage_medium); - if (p == nullptr) { - std::cout << "new data dir failed" << std::endl; - return Status::InternalError("new data dir failed"); - } - res = p->init(); - if (!res.ok()) { - std::cout << "data_dir load failed" << std::endl; - return Status::InternalError("data_dir load failed"); - } - - p.swap(*ret); - return Status::OK(); -} - -void batch_delete_meta(const std::string& tablet_file) { - // each line in tablet file indicate a tablet to delete, format is: - // data_dir,tablet_id,schema_hash - // eg: - // /data1/palo.HDD,100010,11212389324 - // /data2/palo.HDD,100010,23049230234 - std::ifstream infile(tablet_file); - std::string line = ""; - int err_num = 0; - int delete_num = 0; - int total_num = 0; - StorageEngine engine(doris::EngineOptions {}); - std::unordered_map> dir_map; - while (std::getline(infile, line)) { - total_num++; - std::vector v = strings::Split(line, ","); - if (v.size() != 3) { - std::cout << "invalid line in tablet_file: " << line << std::endl; - err_num++; - continue; - } - // 1. get dir - std::string dir; - Status st = doris::io::global_local_filesystem()->canonicalize(v[0], &dir); - if (!st.ok()) { - std::cout << "invalid root dir in tablet_file: " << line << std::endl; - err_num++; - continue; - } - - if (dir_map.find(dir) == dir_map.end()) { - // new data dir, init it - std::unique_ptr data_dir_p; - Status st = init_data_dir(engine, dir, &data_dir_p); - if (!st.ok()) { - std::cout << "invalid root path:" << FLAGS_root_path - << ", error: " << st.to_string() << std::endl; - err_num++; - continue; - } - dir_map[dir] = std::move(data_dir_p); - std::cout << "get a new data dir: " << dir << std::endl; - } - DataDir* data_dir = dir_map[dir].get(); - if (data_dir == nullptr) { - std::cout << "failed to get data dir: " << line << std::endl; - err_num++; - continue; - } - - // 2. get tablet id/schema_hash - int64_t tablet_id; - if (!safe_strto64(v[1].c_str(), &tablet_id)) { - std::cout << "invalid tablet id: " << line << std::endl; - err_num++; - continue; - } - int64_t schema_hash; - if (!safe_strto64(v[2].c_str(), &schema_hash)) { - std::cout << "invalid schema hash: " << line << std::endl; - err_num++; - continue; - } - - Status s = TabletMetaManager::remove(data_dir, tablet_id, schema_hash); - if (!s.ok()) { - std::cout << "delete tablet meta failed for tablet_id:" << tablet_id - << ", schema_hash:" << schema_hash << ", status:" << s << std::endl; - err_num++; - continue; - } - - delete_num++; - } - - std::cout << "total: " << total_num << ", delete: " << delete_num << ", error: " << err_num - << std::endl; - return; -} - -Status get_segment_footer(doris::io::FileReader* file_reader, SegmentFooterPB* footer) { - // Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), MagicNumber(4) - std::string file_name = file_reader->path(); - uint64_t file_size = file_reader->size(); - if (file_size < 12) { - return Status::Corruption("Bad segment file {}: file size {} < 12", file_name, file_size); - } - - size_t bytes_read = 0; - uint8_t fixed_buf[12]; - Slice slice(fixed_buf, 12); - RETURN_IF_ERROR(file_reader->read_at(file_size - 12, slice, &bytes_read)); - - // validate magic number - const char* k_segment_magic = "D0R1"; - const uint32_t k_segment_magic_length = 4; - if (memcmp(fixed_buf + 8, k_segment_magic, k_segment_magic_length) != 0) { - return Status::Corruption("Bad segment file {}: magic number not match", file_name); - } - - // read footer PB - uint32_t footer_length = doris::decode_fixed32_le(fixed_buf); - if (file_size < 12 + footer_length) { - return Status::Corruption("Bad segment file {}: file size {} < {}", file_name, file_size, - 12 + footer_length); - } - std::string footer_buf; - footer_buf.resize(footer_length); - Slice slice2(footer_buf); - RETURN_IF_ERROR(file_reader->read_at(file_size - 12 - footer_length, slice2, &bytes_read)); - - // validate footer PB's checksum - uint32_t expect_checksum = doris::decode_fixed32_le(fixed_buf + 4); - uint32_t actual_checksum = doris::crc32c::Value(footer_buf.data(), footer_buf.size()); - if (actual_checksum != expect_checksum) { - return Status::Corruption( - "Bad segment file {}: footer checksum not match, actual={} vs expect={}", file_name, - actual_checksum, expect_checksum); - } - - // deserialize footer PB - if (!footer->ParseFromString(footer_buf)) { - return Status::Corruption("Bad segment file {}: failed to parse SegmentFooterPB", - file_name); - } - return Status::OK(); -} - -void show_segment_footer(const std::string& file_name) { - doris::io::FileReaderSPtr file_reader; - Status status = doris::io::global_local_filesystem()->open_file(file_name, &file_reader); - if (!status.ok()) { - std::cout << "open file failed: " << status << std::endl; - return; - } - SegmentFooterPB footer; - status = get_segment_footer(file_reader.get(), &footer); - if (!status.ok()) { - std::cout << "get footer failed: " << status.to_string() << std::endl; - return; - } - std::string json_footer; - json2pb::Pb2JsonOptions json_options; - json_options.pretty_json = true; - bool ret = json2pb::ProtoMessageToJson(footer, &json_footer, json_options); - if (!ret) { - std::cout << "Convert PB to json failed" << std::endl; - return; - } - std::cout << json_footer << std::endl; - return; -} - -int main(int argc, char** argv) { - std::string usage = get_usage(argv[0]); - gflags::SetUsageMessage(usage); - google::ParseCommandLineFlags(&argc, &argv, true); - - if (FLAGS_operation == "show_meta") { - show_meta(); - } else if (FLAGS_operation == "batch_delete_meta") { - std::string tablet_file; - Status st = - doris::io::global_local_filesystem()->canonicalize(FLAGS_tablet_file, &tablet_file); - if (!st.ok()) { - std::cout << "invalid tablet file: " << FLAGS_tablet_file - << ", error: " << st.to_string() << std::endl; - return -1; - } - - batch_delete_meta(tablet_file); - } else if (FLAGS_operation == "show_segment_footer") { - if (FLAGS_file == "") { - std::cout << "no file flag for show dict" << std::endl; - return -1; - } - show_segment_footer(FLAGS_file); - } else { - // operations that need root path should be written here - std::set valid_operations = {"get_meta", "load_meta", "delete_meta"}; - if (valid_operations.find(FLAGS_operation) == valid_operations.end()) { - std::cout << "invalid operation:" << FLAGS_operation << std::endl; - return -1; - } - - StorageEngine engine(doris::EngineOptions {}); - std::unique_ptr data_dir; - Status st = init_data_dir(engine, FLAGS_root_path, &data_dir); - if (!st.ok()) { - std::cout << "invalid root path:" << FLAGS_root_path << ", error: " << st.to_string() - << std::endl; - return -1; - } - - if (FLAGS_operation == "get_meta") { - get_meta(data_dir.get()); - } else if (FLAGS_operation == "load_meta") { - load_meta(data_dir.get()); - } else if (FLAGS_operation == "delete_meta") { - delete_meta(data_dir.get()); - } else { - std::cout << "invalid operation: " << FLAGS_operation << "\n" << usage << std::endl; - return -1; - } - } - gflags::ShutDownCommandLineFlags(); - return 0; -} +// // Licensed to the Apache Software Foundation (ASF) under one +// // or more contributor license agreements. See the NOTICE file +// // distributed with this work for additional information +// // regarding copyright ownership. The ASF licenses this file +// // to you under the Apache License, Version 2.0 (the +// // "License"); you may not use this file except in compliance +// // with the License. You may obtain a copy of the License at +// // +// // http://www.apache.org/licenses/LICENSE-2.0 +// // +// // Unless required by applicable law or agreed to in writing, +// // software distributed under the License is distributed on an +// // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// // KIND, either express or implied. See the License for the +// // specific language governing permissions and limitations +// // under the License. +// +// #include +// #include +// #include +// +// #include +// #include +// #include +// #include +// #include +// #include +// +// #include "common/status.h" +// #include "gutil/strings/numbers.h" +// #include "gutil/strings/split.h" +// #include "gutil/strings/substitute.h" +// #include "io/fs/file_reader.h" +// #include "io/fs/local_file_system.h" +// #include "json2pb/pb_to_json.h" +// #include "olap/data_dir.h" +// #include "olap/olap_define.h" +// #include "olap/options.h" +// #include "olap/rowset/segment_v2/binary_plain_page.h" +// #include "olap/rowset/segment_v2/column_reader.h" +// #include "olap/storage_engine.h" +// #include "olap/tablet_meta.h" +// #include "olap/tablet_meta_manager.h" +// #include "olap/utils.h" +// #include "util/coding.h" +// #include "util/crc32c.h" +// +// using std::filesystem::path; +// using doris::DataDir; +// using doris::StorageEngine; +// using doris::OlapMeta; +// using doris::Status; +// using doris::TabletMeta; +// using doris::TabletMetaManager; +// using doris::Slice; +// using strings::Substitute; +// using doris::segment_v2::SegmentFooterPB; +// using doris::segment_v2::ColumnReader; +// using doris::segment_v2::PageHandle; +// using doris::segment_v2::PagePointer; +// using doris::segment_v2::ColumnReaderOptions; +// using doris::segment_v2::ColumnIteratorOptions; +// using doris::segment_v2::PageFooterPB; +// using doris::io::FileReaderSPtr; +// +// const std::string HEADER_PREFIX = "tabletmeta_"; +// +// DEFINE_string(root_path, "", "storage root path"); +// DEFINE_string(operation, "get_meta", +// "valid operation: get_meta, flag, load_meta, delete_meta, show_meta"); +// DEFINE_int64(tablet_id, 0, "tablet_id for tablet meta"); +// DEFINE_int32(schema_hash, 0, "schema_hash for tablet meta"); +// DEFINE_string(json_meta_path, "", "absolute json meta file path"); +// DEFINE_string(pb_meta_path, "", "pb meta file path"); +// DEFINE_string(tablet_file, "", "file to save a set of tablets"); +// DEFINE_string(file, "", "segment file path"); +// +// std::string get_usage(const std::string& progname) { +// std::stringstream ss; +// ss << progname << " is the Doris BE Meta tool.\n"; +// ss << "Stop BE first before use this tool.\n"; +// ss << "Usage:\n"; +// ss << "./meta_tool --operation=get_meta --root_path=/path/to/storage/path " +// "--tablet_id=tabletid --schema_hash=schemahash\n"; +// ss << "./meta_tool --operation=load_meta --root_path=/path/to/storage/path " +// "--json_meta_path=path\n"; +// ss << "./meta_tool --operation=delete_meta " +// "--root_path=/path/to/storage/path --tablet_id=tabletid " +// "--schema_hash=schemahash\n"; +// ss << "./meta_tool --operation=delete_meta --tablet_file=file_path\n"; +// ss << "./meta_tool --operation=show_meta --pb_meta_path=path\n"; +// ss << "./meta_tool --operation=show_segment_footer --file=/path/to/segment/file\n"; +// return ss.str(); +// } +// +// void show_meta() { +// TabletMeta tablet_meta; +// Status s = tablet_meta.create_from_file(FLAGS_pb_meta_path); +// if (!s.ok()) { +// std::cout << "load pb meta file:" << FLAGS_pb_meta_path << " failed" +// << ", status:" << s << std::endl; +// return; +// } +// std::string json_meta; +// json2pb::Pb2JsonOptions json_options; +// json_options.pretty_json = true; +// doris::TabletMetaPB tablet_meta_pb; +// tablet_meta.to_meta_pb(&tablet_meta_pb); +// json2pb::ProtoMessageToJson(tablet_meta_pb, &json_meta, json_options); +// std::cout << json_meta << std::endl; +// } +// +// void get_meta(DataDir* data_dir) { +// std::string value; +// Status s = +// TabletMetaManager::get_json_meta(data_dir, FLAGS_tablet_id, FLAGS_schema_hash, &value); +// if (s.is()) { +// std::cout << "no tablet meta for tablet_id:" << FLAGS_tablet_id +// << ", schema_hash:" << FLAGS_schema_hash << std::endl; +// return; +// } +// std::cout << value << std::endl; +// } +// +// void load_meta(DataDir* data_dir) { +// // load json tablet meta into meta +// Status s = TabletMetaManager::load_json_meta(data_dir, FLAGS_json_meta_path); +// if (!s.ok()) { +// std::cout << "load meta failed, status:" << s << std::endl; +// return; +// } +// std::cout << "load meta successfully" << std::endl; +// } +// +// void delete_meta(DataDir* data_dir) { +// Status s = TabletMetaManager::remove(data_dir, FLAGS_tablet_id, FLAGS_schema_hash); +// if (!s.ok()) { +// std::cout << "delete tablet meta failed for tablet_id:" << FLAGS_tablet_id +// << ", schema_hash:" << FLAGS_schema_hash << ", status:" << s << std::endl; +// return; +// } +// std::cout << "delete meta successfully" << std::endl; +// } +// +// Status init_data_dir(StorageEngine& engine, const std::string& dir, std::unique_ptr* ret) { +// std::string root_path; +// RETURN_IF_ERROR(doris::io::global_local_filesystem()->canonicalize(dir, &root_path)); +// doris::StorePath path; +// auto res = parse_root_path(root_path, &path); +// if (!res.ok()) { +// std::cout << "parse root path failed:" << root_path << std::endl; +// return Status::InternalError("parse root path failed"); +// } +// +// auto p = std::make_unique(engine, path.path, path.capacity_bytes, path.storage_medium); +// if (p == nullptr) { +// std::cout << "new data dir failed" << std::endl; +// return Status::InternalError("new data dir failed"); +// } +// res = p->init(); +// if (!res.ok()) { +// std::cout << "data_dir load failed" << std::endl; +// return Status::InternalError("data_dir load failed"); +// } +// +// p.swap(*ret); +// return Status::OK(); +// } +// +// void batch_delete_meta(const std::string& tablet_file) { +// // each line in tablet file indicate a tablet to delete, format is: +// // data_dir,tablet_id,schema_hash +// // eg: +// // /data1/palo.HDD,100010,11212389324 +// // /data2/palo.HDD,100010,23049230234 +// std::ifstream infile(tablet_file); +// std::string line = ""; +// int err_num = 0; +// int delete_num = 0; +// int total_num = 0; +// StorageEngine engine(doris::EngineOptions {}); +// std::unordered_map> dir_map; +// while (std::getline(infile, line)) { +// total_num++; +// std::vector v = strings::Split(line, ","); +// if (v.size() != 3) { +// std::cout << "invalid line in tablet_file: " << line << std::endl; +// err_num++; +// continue; +// } +// // 1. get dir +// std::string dir; +// Status st = doris::io::global_local_filesystem()->canonicalize(v[0], &dir); +// if (!st.ok()) { +// std::cout << "invalid root dir in tablet_file: " << line << std::endl; +// err_num++; +// continue; +// } +// +// if (dir_map.find(dir) == dir_map.end()) { +// // new data dir, init it +// std::unique_ptr data_dir_p; +// Status st = init_data_dir(engine, dir, &data_dir_p); +// if (!st.ok()) { +// std::cout << "invalid root path:" << FLAGS_root_path +// << ", error: " << st.to_string() << std::endl; +// err_num++; +// continue; +// } +// dir_map[dir] = std::move(data_dir_p); +// std::cout << "get a new data dir: " << dir << std::endl; +// } +// DataDir* data_dir = dir_map[dir].get(); +// if (data_dir == nullptr) { +// std::cout << "failed to get data dir: " << line << std::endl; +// err_num++; +// continue; +// } +// +// // 2. get tablet id/schema_hash +// int64_t tablet_id; +// if (!safe_strto64(v[1].c_str(), &tablet_id)) { +// std::cout << "invalid tablet id: " << line << std::endl; +// err_num++; +// continue; +// } +// int64_t schema_hash; +// if (!safe_strto64(v[2].c_str(), &schema_hash)) { +// std::cout << "invalid schema hash: " << line << std::endl; +// err_num++; +// continue; +// } +// +// Status s = TabletMetaManager::remove(data_dir, tablet_id, schema_hash); +// if (!s.ok()) { +// std::cout << "delete tablet meta failed for tablet_id:" << tablet_id +// << ", schema_hash:" << schema_hash << ", status:" << s << std::endl; +// err_num++; +// continue; +// } +// +// delete_num++; +// } +// +// std::cout << "total: " << total_num << ", delete: " << delete_num << ", error: " << err_num +// << std::endl; +// return; +// } +// +// Status get_segment_footer(doris::io::FileReader* file_reader, SegmentFooterPB* footer) { +// // Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), MagicNumber(4) +// std::string file_name = file_reader->path(); +// uint64_t file_size = file_reader->size(); +// if (file_size < 12) { +// return Status::Corruption("Bad segment file {}: file size {} < 12", file_name, file_size); +// } +// +// size_t bytes_read = 0; +// uint8_t fixed_buf[12]; +// Slice slice(fixed_buf, 12); +// RETURN_IF_ERROR(file_reader->read_at(file_size - 12, slice, &bytes_read)); +// +// // validate magic number +// const char* k_segment_magic = "D0R1"; +// const uint32_t k_segment_magic_length = 4; +// if (memcmp(fixed_buf + 8, k_segment_magic, k_segment_magic_length) != 0) { +// return Status::Corruption("Bad segment file {}: magic number not match", file_name); +// } +// +// // read footer PB +// uint32_t footer_length = doris::decode_fixed32_le(fixed_buf); +// if (file_size < 12 + footer_length) { +// return Status::Corruption("Bad segment file {}: file size {} < {}", file_name, file_size, +// 12 + footer_length); +// } +// std::string footer_buf; +// footer_buf.resize(footer_length); +// Slice slice2(footer_buf); +// RETURN_IF_ERROR(file_reader->read_at(file_size - 12 - footer_length, slice2, &bytes_read)); +// +// // validate footer PB's checksum +// uint32_t expect_checksum = doris::decode_fixed32_le(fixed_buf + 4); +// uint32_t actual_checksum = doris::crc32c::Value(footer_buf.data(), footer_buf.size()); +// if (actual_checksum != expect_checksum) { +// return Status::Corruption( +// "Bad segment file {}: footer checksum not match, actual={} vs expect={}", file_name, +// actual_checksum, expect_checksum); +// } +// +// // deserialize footer PB +// if (!footer->ParseFromString(footer_buf)) { +// return Status::Corruption("Bad segment file {}: failed to parse SegmentFooterPB", +// file_name); +// } +// return Status::OK(); +// } +// +// void show_segment_footer(const std::string& file_name) { +// doris::io::FileReaderSPtr file_reader; +// Status status = doris::io::global_local_filesystem()->open_file(file_name, &file_reader); +// if (!status.ok()) { +// std::cout << "open file failed: " << status << std::endl; +// return; +// } +// SegmentFooterPB footer; +// status = get_segment_footer(file_reader.get(), &footer); +// if (!status.ok()) { +// std::cout << "get footer failed: " << status.to_string() << std::endl; +// return; +// } +// std::string json_footer; +// json2pb::Pb2JsonOptions json_options; +// json_options.pretty_json = true; +// bool ret = json2pb::ProtoMessageToJson(footer, &json_footer, json_options); +// if (!ret) { +// std::cout << "Convert PB to json failed" << std::endl; +// return; +// } +// std::cout << json_footer << std::endl; +// return; +// } +// +// int main(int argc, char** argv) { +// std::string usage = get_usage(argv[0]); +// gflags::SetUsageMessage(usage); +// google::ParseCommandLineFlags(&argc, &argv, true); +// +// if (FLAGS_operation == "show_meta") { +// show_meta(); +// } else if (FLAGS_operation == "batch_delete_meta") { +// std::string tablet_file; +// Status st = +// doris::io::global_local_filesystem()->canonicalize(FLAGS_tablet_file, &tablet_file); +// if (!st.ok()) { +// std::cout << "invalid tablet file: " << FLAGS_tablet_file +// << ", error: " << st.to_string() << std::endl; +// return -1; +// } +// +// batch_delete_meta(tablet_file); +// } else if (FLAGS_operation == "show_segment_footer") { +// if (FLAGS_file == "") { +// std::cout << "no file flag for show dict" << std::endl; +// return -1; +// } +// show_segment_footer(FLAGS_file); +// } else { +// // operations that need root path should be written here +// std::set valid_operations = {"get_meta", "load_meta", "delete_meta"}; +// if (valid_operations.find(FLAGS_operation) == valid_operations.end()) { +// std::cout << "invalid operation:" << FLAGS_operation << std::endl; +// return -1; +// } +// +// StorageEngine engine(doris::EngineOptions {}); +// std::unique_ptr data_dir; +// Status st = init_data_dir(engine, FLAGS_root_path, &data_dir); +// if (!st.ok()) { +// std::cout << "invalid root path:" << FLAGS_root_path << ", error: " << st.to_string() +// << std::endl; +// return -1; +// } +// +// if (FLAGS_operation == "get_meta") { +// get_meta(data_dir.get()); +// } else if (FLAGS_operation == "load_meta") { +// load_meta(data_dir.get()); +// } else if (FLAGS_operation == "delete_meta") { +// delete_meta(data_dir.get()); +// } else { +// std::cout << "invalid operation: " << FLAGS_operation << "\n" << usage << std::endl; +// return -1; +// } +// } +// gflags::ShutDownCommandLineFlags(); +// return 0; +// } +// \ No newline at end of file diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index d67a70d2f630f0..7a28222df7de2e 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -45,6 +45,7 @@ #include "exprs/json_functions.h" #include "olap/olap_common.h" #include "util/defer_op.h" +#include "util/jsonb_utils.h" #include "util/simd/bits.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/helpers.h" @@ -73,6 +74,7 @@ #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_object.h" #include "vec/data_types/get_least_supertype.h" +#include "vec/functions/function_binary_arithmetic.h" #include "vec/json/path_in_data.h" #ifdef __AVX2__ @@ -85,14 +87,16 @@ namespace doris::vectorized { #include "common/compile_check_begin.h" namespace { -DataTypePtr create_array_of_type(TypeIndex type, size_t num_dimensions, bool is_nullable) { +DataTypePtr create_array_of_type(TypeIndex type, size_t num_dimensions, bool is_nullable, + int precision = -1, int scale = -1) { if (type == ColumnObject::MOST_COMMON_TYPE_ID) { // JSONB type MUST NOT wrapped in ARRAY column, it should be top level. // So we ignored num_dimensions. return is_nullable ? make_nullable(std::make_shared()) : std::make_shared(); } - DataTypePtr result = DataTypeFactory::instance().create_data_type(type, is_nullable); + DataTypePtr result = + DataTypeFactory::instance().create_data_type(type, is_nullable, precision, scale); for (size_t i = 0; i < num_dimensions; ++i) { result = std::make_shared(result); if (is_nullable) { @@ -342,7 +346,44 @@ void get_field_info_impl(const Field& field, FieldInfo* info) { }; } +void get_base_field_info(const Field& field, FieldInfo* info) { + if (field.get_type_id() == TypeIndex::Array) { + if (field.safe_get().empty()) { + info->scalar_type_id = TypeIndex::Nothing; + ++info->num_dimensions; + info->have_nulls = true; + info->need_convert = false; + } else { + ++info->num_dimensions; + get_base_field_info(field.safe_get()[0], info); + } + return; + } + + // handle scalar types + info->scalar_type_id = field.get_type_id(); + info->have_nulls = true; + info->need_convert = false; + info->scale = field.get_scale(); + info->precision = field.get_precision(); + + // Currently the jsonb type should be the top level type, so we should not wrap it in array, + // see create_array_of_type. + // TODO we need to support array correctly + if (UNLIKELY(field.get_type_id() == TypeIndex::JSONB && info->num_dimensions > 0)) { + info->num_dimensions = 0; + info->need_convert = true; + } +} + void get_field_info(const Field& field, FieldInfo* info) { + if (field.get_type_id() != TypeIndex::Nothing) { + // Currently we support specify predefined schema for other types include decimal, datetime ...etc + // so we should set specified info to create correct types, and those predefined types are static and + // type no need to deduce + get_base_field_info(field, info); + return; + } if (field.is_complex_field()) { get_field_info_impl(field, info); } else { @@ -425,7 +466,11 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) { type_changed = true; } if (data.empty()) { - add_new_column_part(create_array_of_type(base_type.idx, value_dim, is_nullable)); + // Currently we support specify predefined schema for other types include decimal, datetime ...etc + // so we should set specified info to create correct types, and those predefined types are static and + // no conflict, so we can set them directly. + add_new_column_part(create_array_of_type(base_type.idx, value_dim, is_nullable, + info.precision, info.scale)); } else if (least_common_type.get_base_type_id() != base_type.idx && !base_type.is_nothing()) { if (schema_util::is_conversion_required_between_integers( base_type.idx, least_common_type.get_base_type_id())) { @@ -948,14 +993,9 @@ void ColumnObject::Subcolumn::get(size_t n, Field& res) const { return; } if (is_finalized()) { - if (least_common_type.get_base_type_id() == TypeIndex::JSONB) { - // JsonbFiled is special case - res = JsonbField(); - } - get_finalized_column().get(n, res); + res = get_least_common_type()->get_type_field(get_finalized_column(), n); return; } - size_t ind = n; if (ind < num_of_defaults_in_prefix) { res = least_common_type.get()->get_default(); @@ -1376,7 +1416,8 @@ Status find_and_set_leave_value(const IColumn* column, const PathInData& path, << ", root: " << std::string(buffer.GetString(), buffer.GetSize()); return Status::NotFound("Not found path {}", path.get_path()); } - RETURN_IF_ERROR(type_serde->write_one_cell_to_json(*column, *target, allocator, mem_pool, row)); + RETURN_IF_ERROR( + type_serde->write_one_cell_to_json(*column, *target, allocator, mem_pool, row, type)); return Status::OK(); } diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index e4127197a22b02..9f8b5811943632 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -72,7 +72,11 @@ struct FieldInfo { /// we need to convert scalars to the common type. bool need_convert; /// Number of dimension in array. 0 if field is scalar. - size_t num_dimensions; + size_t num_dimensions = 0; + + // decimal info + int scale = 0; + int precision = 0; }; void get_field_info(const Field& field, FieldInfo* info); @@ -120,6 +124,10 @@ class ColumnObject final : public COWHelper { const DataTypePtr& get_least_common_type() const { return least_common_type.get(); } + const TypeIndex& get_least_common_base_type_id() const { + return least_common_type.get_base_type_id(); + } + const DataTypePtr& get_least_common_typeBase() const { return least_common_type.get_base(); } diff --git a/be/src/vec/columns/subcolumn_tree.h b/be/src/vec/columns/subcolumn_tree.h index 8b53d1912f3d69..b9ac0092ee70d3 100644 --- a/be/src/vec/columns/subcolumn_tree.h +++ b/be/src/vec/columns/subcolumn_tree.h @@ -20,6 +20,7 @@ #pragma once #include +#include #include "runtime/exec_env.h" #include "runtime/thread_context.h" diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index fd50af3e1fcd88..568a77b66ebb7b 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -32,8 +32,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -45,6 +47,7 @@ #include "olap/tablet_schema.h" #include "runtime/client_cache.h" #include "runtime/exec_env.h" +#include "runtime/runtime_state.h" #include "udf/udf.h" #include "util/defer_op.h" #include "vec/columns/column.h" @@ -156,7 +159,8 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co } Block tmp_block {arguments}; size_t result_column = tmp_block.columns(); - auto ctx = FunctionContext::create_context(nullptr, {}, {}); + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); // To prevent from null info lost, we should not call function since the function framework will wrap // nullable to Variant instead of the root of Variant @@ -186,8 +190,13 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co ctx->set_string_as_jsonb_string(true); ctx->set_jsonb_string_as_string(true); tmp_block.insert({nullptr, type, arg.name}); - RETURN_IF_ERROR( - function->execute(ctx.get(), tmp_block, {0}, result_column, arg.column->size())); + if (!function->execute(ctx.get(), tmp_block, {0}, result_column, arg.column->size())) { + LOG_EVERY_N(WARNING, 100) << fmt::format("cast from {} to {}", arg.type->get_name(), + type->get_name()); + *result = type->create_column_const_with_default_value(arg.column->size()) + ->convert_to_full_column_if_const(); + return Status::OK(); + } *result = tmp_block.get_by_position(result_column).column->convert_to_full_column_if_const(); VLOG_DEBUG << fmt::format("{} before convert {}, after convert {}", arg.name, arg.column->get_name(), (*result)->get_name()); @@ -230,9 +239,15 @@ void get_column_by_type(const vectorized::DataTypePtr& data_type, const std::str column.set_length(data_type->get_size_of_value_in_memory()); return; } - // TODO handle more types like struct/date/datetime/decimal... - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + if (WhichDataType(*data_type).is_decimal()) { + column.set_precision_frac(data_type->get_precision(), data_type->get_scale()); + column.set_is_decimal(true); + return; + } + if (WhichDataType(*data_type).is_date_time_v2()) { + column.set_precision_frac(-1, data_type->get_scale()); + return; + } } TabletColumn get_column_by_type(const vectorized::DataTypePtr& data_type, const std::string& name, @@ -245,6 +260,7 @@ TabletColumn get_column_by_type(const vectorized::DataTypePtr& data_type, const void update_least_schema_internal(const std::map& subcolumns_types, TabletSchemaSPtr& common_schema, bool update_sparse_column, int32_t variant_col_unique_id, + const std::map& typed_columns, std::set* path_set = nullptr) { PathsInData tuple_paths; DataTypes tuple_types; @@ -281,11 +297,21 @@ void update_least_schema_internal(const std::map& subcolu // Append all common type columns of this variant for (int i = 0; i < tuple_paths.size(); ++i) { TabletColumn common_column; - // const std::string& column_name = variant_col_name + "." + tuple_paths[i].get_path(); - get_column_by_type(tuple_types[i], tuple_paths[i].get_path(), common_column, - ExtraInfo {.unique_id = -1, - .parent_unique_id = variant_col_unique_id, - .path_info = tuple_paths[i]}); + // typed path not contains root part + auto path_without_root = tuple_paths[i].copy_pop_front().get_path(); + if (typed_columns.contains(path_without_root) && !tuple_paths[i].has_nested_part()) { + common_column = *typed_columns.at(path_without_root); + // parent unique id and path may not be init in write path + common_column.set_parent_unique_id(variant_col_unique_id); + common_column.set_path_info(tuple_paths[i]); + common_column.set_name(tuple_paths[i].get_path()); + } else { + // const std::string& column_name = variant_col_name + "." + tuple_paths[i].get_path(); + get_column_by_type(tuple_types[i], tuple_paths[i].get_path(), common_column, + ExtraInfo {.unique_id = -1, + .parent_unique_id = variant_col_unique_id, + .path_info = tuple_paths[i]}); + } if (update_sparse_column) { common_schema->mutable_column_by_uid(variant_col_unique_id) .append_sparse_column(common_column); @@ -301,6 +327,11 @@ void update_least_schema_internal(const std::map& subcolu void update_least_common_schema(const std::vector& schemas, TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id, std::set* path_set) { + std::map typed_columns; + for (const TabletColumnPtr& col : + common_schema->column_by_uid(variant_col_unique_id).get_sub_columns()) { + typed_columns[col->name()] = col; + } // Types of subcolumns by path from all tuples. std::map subcolumns_types; for (const TabletSchemaSPtr& schema : schemas) { @@ -308,7 +339,7 @@ void update_least_common_schema(const std::vector& schemas, // Get subcolumns of this variant if (col->has_path_info() && col->parent_unique_id() > 0 && col->parent_unique_id() == variant_col_unique_id) { - subcolumns_types[*col->path_info_ptr()].push_back( + subcolumns_types[*col->path_info_ptr()].emplace_back( DataTypeFactory::instance().create_data_type(*col, col->is_nullable())); } } @@ -325,18 +356,23 @@ void update_least_common_schema(const std::vector& schemas, col->parent_unique_id() == variant_col_unique_id && // this column have been found in origin columns subcolumns_types.find(*col->path_info_ptr()) != subcolumns_types.end()) { - subcolumns_types[*col->path_info_ptr()].push_back( + subcolumns_types[*col->path_info_ptr()].emplace_back( DataTypeFactory::instance().create_data_type(*col, col->is_nullable())); } } } update_least_schema_internal(subcolumns_types, common_schema, false, variant_col_unique_id, - path_set); + typed_columns, path_set); } void update_least_sparse_column(const std::vector& schemas, TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id, const std::set& path_set) { + std::map typed_columns; + for (const TabletColumnPtr& col : + common_schema->column_by_uid(variant_col_unique_id).get_sub_columns()) { + typed_columns[col->name()] = col; + } // Types of subcolumns by path from all tuples. std::map subcolumns_types; for (const TabletSchemaSPtr& schema : schemas) { @@ -350,12 +386,13 @@ void update_least_sparse_column(const std::vector& schemas, if (col->has_path_info() && col->parent_unique_id() > 0 && col->parent_unique_id() == variant_col_unique_id && path_set.find(*col->path_info_ptr()) == path_set.end()) { - subcolumns_types[*col->path_info_ptr()].push_back( + subcolumns_types[*col->path_info_ptr()].emplace_back( DataTypeFactory::instance().create_data_type(*col, col->is_nullable())); } } } - update_least_schema_internal(subcolumns_types, common_schema, true, variant_col_unique_id); + update_least_schema_internal(subcolumns_types, common_schema, true, variant_col_unique_id, + typed_columns); } void inherit_column_attributes(const TabletColumn& source, TabletColumn& target, diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 341f65e075ed11..f5d106c3eca803 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -339,6 +339,7 @@ class DecimalField { * Used to represent a single value of one of several types in memory. * Warning! Prefer to use chunks of columns instead of single values. See Column.h */ + class Field { public: struct Types { @@ -448,9 +449,15 @@ class Field { /** Despite the presence of a template constructor, this constructor is still needed, * since, in its absence, the compiler will still generate the default constructor. */ - Field(const Field& rhs) { create(rhs); } + Field(const Field& rhs) { + copy_type_info(rhs); + create(rhs); + } - Field(Field&& rhs) { create(std::move(rhs)); } + Field(Field&& rhs) { + copy_type_info(rhs); + create(std::move(rhs)); + } // Make the constructor with a String parameter explicit to prevent accidentally creating a Field with the wrong string type. // Other types don't require explicit construction to avoid extensive modifications. @@ -458,7 +465,18 @@ class Field { requires(!std::is_same_v, Field>) explicit(std::is_same_v, String>) Field(T&& rhs); + void set_type_info(TypeIndex type, int precision = -1, int scale = -1) { + this->type = type; + this->precision = precision; + this->scale = scale; + } + + int get_precision() const { return precision; } + int get_scale() const { return scale; } + TypeIndex get_type_id() const { return type; } + Field& operator=(const Field& rhs) { + copy_type_info(rhs); if (this != &rhs) { if (which != rhs.which) { destroy(); @@ -476,6 +494,7 @@ class Field { } Field& operator=(Field&& rhs) { + copy_type_info(rhs); if (this != &rhs) { if (which != rhs.which) { destroy(); @@ -689,6 +708,11 @@ class Field { storage; Types::Which which; + // detailed_type_info is used to store the real type of the field, for example, the real type of a Int64 is DateTimeV2 + // or real type of a Decimal32 is Decimal(27, 9) + TypeIndex type = TypeIndex::Nothing; + int scale = -1; + int precision = -1; /// Assuming there was no allocated state or it was deallocated (see destroy). template @@ -730,6 +754,12 @@ class Field { dispatch([this](auto& value) { assign_concrete(std::move(value)); }, x); } + void copy_type_info(const Field& rhs) { + this->type = rhs.type; + this->precision = rhs.precision; + this->scale = rhs.scale; + } + ALWAYS_INLINE void destroy() { if (which < Types::MIN_NON_POD) { return; diff --git a/be/src/vec/data_types/convert_field_to_type.cpp b/be/src/vec/data_types/convert_field_to_type.cpp index c625f8c424bba6..9af72b29c65023 100644 --- a/be/src/vec/data_types/convert_field_to_type.cpp +++ b/be/src/vec/data_types/convert_field_to_type.cpp @@ -34,6 +34,7 @@ #include "common/exception.h" #include "common/status.h" #include "util/bitmap_value.h" +#include "util/jsonb_document.h" #include "util/jsonb_writer.h" #include "vec/common/field_visitors.h" #include "vec/common/typeid_cast.h" @@ -113,6 +114,14 @@ class FieldVisitorToJsonb : public StaticVisitor { writer->writeString(x); writer->writeEndString(); } + void operator()(const JsonbField& x, JsonbWriter* writer) const { + const JsonbValue* value = JsonbDocument::createValue(x.get_value(), x.get_size()); + if (value == nullptr) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to create JsonbValue"); + } + writer->writeValue(value); + } + void operator()(const Array& x, JsonbWriter* writer) const; void operator()(const Tuple& x, JsonbWriter* writer) const { @@ -148,9 +157,6 @@ class FieldVisitorToJsonb : public StaticVisitor { void operator()(const Map& x, JsonbWriter* writer) const { throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted"); } - void operator()(const JsonbField& x, JsonbWriter* writer) const { - throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted"); - } }; void FieldVisitorToJsonb::operator()(const Array& x, JsonbWriter* writer) const { diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h index 7f1ee0cd85032f..6918ce63db47f5 100644 --- a/be/src/vec/data_types/data_type.h +++ b/be/src/vec/data_types/data_type.h @@ -37,6 +37,7 @@ #include "vec/columns/column_const.h" #include "vec/columns/column_string.h" #include "vec/common/cow.h" +#include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/serde/data_type_serde.h" @@ -120,6 +121,14 @@ class IDataType : private boost::noncopyable { virtual Field get_field(const TExprNode& node) const = 0; + // Return Field which wrapped with the real type. + virtual Field get_type_field(const IColumn& column, size_t row) const { + Field field; + column.get(row, field); + field.set_type_info(get_type_id()); + return field; + } + /// Checks that two instances belong to the same type virtual bool equals(const IDataType& rhs) const = 0; diff --git a/be/src/vec/data_types/data_type_array.cpp b/be/src/vec/data_types/data_type_array.cpp index 254e13a9a8eea2..4402c931e0359a 100644 --- a/be/src/vec/data_types/data_type_array.cpp +++ b/be/src/vec/data_types/data_type_array.cpp @@ -39,6 +39,8 @@ #include "vec/common/string_buffer.hpp" #include "vec/common/string_ref.h" #include "vec/common/typeid_cast.h" +#include "vec/core/field.h" +#include "vec/core/types.h" #include "vec/data_types/data_type_nullable.h" #include "vec/io/reader_buffer.h" @@ -359,4 +361,26 @@ Status DataTypeArray::from_string(ReadBuffer& rb, IColumn* column) const { return Status::OK(); } +Field DataTypeArray::get_type_field(const IColumn& column, size_t row) const { + const auto& array = assert_cast(column); + size_t offset = array.offset_at(row); + size_t size = array.size_at(row); + + if (size > max_array_size_as_field) { + throw doris::Exception( + ErrorCode::INTERNAL_ERROR, + "Array of size {}, is too large to be manipulated as single field, maximum size {}", + size, max_array_size_as_field); + } + + Array res(size); + + for (size_t i = 0; i < size; ++i) { + res[i] = get_nested_type()->get_type_field(array.get_data(), offset + i); + } + Field typed_res(res); + typed_res.set_type_info(TypeIndex::Array); + return typed_res; +} + } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_array.h b/be/src/vec/data_types/data_type_array.h index 7899715ed24def..7fc853136850d0 100644 --- a/be/src/vec/data_types/data_type_array.h +++ b/be/src/vec/data_types/data_type_array.h @@ -73,6 +73,8 @@ class DataTypeArray final : public IDataType { const char* get_family_name() const override { return "Array"; } + Field get_type_field(const IColumn& column, size_t row) const override; + MutableColumnPtr create_column() const override; Field get_default() const override; diff --git a/be/src/vec/data_types/data_type_bitmap.h b/be/src/vec/data_types/data_type_bitmap.h index ce0f327f5450a5..d208255eeb5cc4 100644 --- a/be/src/vec/data_types/data_type_bitmap.h +++ b/be/src/vec/data_types/data_type_bitmap.h @@ -105,6 +105,11 @@ class DataTypeBitMap : public IDataType { __builtin_unreachable(); } + Field get_type_field(const IColumn& column, size_t row) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "Unimplemented get_type_field for BitMap"); + } + static void serialize_as_stream(const BitmapValue& value, BufferWritable& buf); static void deserialize_as_stream(BitmapValue& value, BufferReadable& buf); diff --git a/be/src/vec/data_types/data_type_date_time.h b/be/src/vec/data_types/data_type_date_time.h index 03a6a85657935c..8b0398d80a44d9 100644 --- a/be/src/vec/data_types/data_type_date_time.h +++ b/be/src/vec/data_types/data_type_date_time.h @@ -29,6 +29,7 @@ #include "common/status.h" #include "runtime/define_primitive_type.h" +#include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_number_base.h" diff --git a/be/src/vec/data_types/data_type_decimal.h b/be/src/vec/data_types/data_type_decimal.h index 74655ff6ee8bec..8ac6232713a445 100644 --- a/be/src/vec/data_types/data_type_decimal.h +++ b/be/src/vec/data_types/data_type_decimal.h @@ -236,6 +236,13 @@ class DataTypeDecimal final : public IDataType { DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override { return std::make_shared>(scale, precision, nesting_level); }; + Field get_type_field(const IColumn& column, size_t row) const override { + const auto& decimal_column = static_cast&>(column); + Field field; + decimal_column.get(row, field); + field.set_type_info(TypeId::value, static_cast(precision), static_cast(scale)); + return field; + } /// Decimal specific diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 369809d77f68f3..9e6ead352ed36b 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -248,7 +248,8 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo return nested; } -DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool is_nullable) { +DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool is_nullable, + int precision, int scale) { DataTypePtr nested = nullptr; switch (type_index) { case TypeIndex::UInt8: @@ -297,7 +298,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool nested = std::make_shared(); break; case TypeIndex::DateTimeV2: - nested = std::make_shared(); + nested = std::make_shared(scale > 0 ? scale : 0); break; case TypeIndex::DateTime: nested = std::make_shared(); @@ -309,22 +310,29 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool nested = std::make_shared("", true); break; case TypeIndex::Decimal32: - nested = std::make_shared>(BeConsts::MAX_DECIMAL32_PRECISION, 0); + nested = std::make_shared>( + precision > 0 ? precision : BeConsts::MAX_DECIMAL32_PRECISION, + scale > 0 ? scale : 0); break; case TypeIndex::Decimal64: - nested = std::make_shared>(BeConsts::MAX_DECIMAL64_PRECISION, 0); + nested = std::make_shared>( + precision > 0 ? precision : BeConsts::MAX_DECIMAL64_PRECISION, + scale > 0 ? scale : 0); break; case TypeIndex::Decimal128V2: - nested = std::make_shared>(BeConsts::MAX_DECIMALV2_PRECISION, - 0); + nested = std::make_shared>( + precision > 0 ? precision : BeConsts::MAX_DECIMALV2_PRECISION, + scale > 0 ? scale : 0); break; case TypeIndex::Decimal128V3: - nested = std::make_shared>(BeConsts::MAX_DECIMAL128_PRECISION, - 0); + nested = std::make_shared>( + precision > 0 ? precision : BeConsts::MAX_DECIMAL128_PRECISION, + scale > 0 ? scale : 0); break; case TypeIndex::Decimal256: - nested = std::make_shared>(BeConsts::MAX_DECIMAL256_PRECISION, - 0); + nested = std::make_shared>( + precision > 0 ? precision : BeConsts::MAX_DECIMAL256_PRECISION, + scale > 0 ? scale : 0); break; case TypeIndex::JSONB: nested = std::make_shared(); diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp index bf2a78d62420d1..80b52a871f0017 100644 --- a/be/src/vec/data_types/data_type_factory.hpp +++ b/be/src/vec/data_types/data_type_factory.hpp @@ -50,9 +50,6 @@ enum class TypeIndex; namespace doris::vectorized { class DataTypeFactory { - using DataTypeMap = std::unordered_map; - using InvertedDataTypeMap = std::vector>; - public: static DataTypeFactory& instance() { static DataTypeFactory instance; @@ -60,7 +57,8 @@ class DataTypeFactory { } DataTypePtr create_data_type(const doris::Field& col_desc); - DataTypePtr create_data_type(const TypeIndex& type_index, bool is_nullable = false); + DataTypePtr create_data_type(const TypeIndex& type_index, bool is_nullable = false, + int precision = -1, int scale = -1); DataTypePtr create_data_type(const TabletColumn& col_desc, bool is_nullable = false); DataTypePtr create_data_type(const TypeDescriptor& col_desc, bool is_nullable = true); diff --git a/be/src/vec/data_types/data_type_jsonb.h b/be/src/vec/data_types/data_type_jsonb.h index 2afd893c8d129d..b7e93ad129417c 100644 --- a/be/src/vec/data_types/data_type_jsonb.h +++ b/be/src/vec/data_types/data_type_jsonb.h @@ -84,6 +84,15 @@ class DataTypeJsonb final : public IDataType { return Field(String(value.value(), value.size())); } + // Return JsonbField. + Field get_type_field(const IColumn& column, size_t row) const override { + const auto& column_data = static_cast(column); + Field field = + JsonbField(column_data.get_data_at(row).data, column_data.get_data_at(row).size); + field.set_type_info(TypeIndex::JSONB); + return field; + } + bool equals(const IDataType& rhs) const override; bool have_subtypes() const override { return false; } diff --git a/be/src/vec/data_types/data_type_nullable.h b/be/src/vec/data_types/data_type_nullable.h index d366fb5373ba05..6e876d24193ee9 100644 --- a/be/src/vec/data_types/data_type_nullable.h +++ b/be/src/vec/data_types/data_type_nullable.h @@ -86,6 +86,14 @@ class DataTypeNullable final : public IDataType { return nested_data_type->get_field(node); } + Field get_type_field(const IColumn& column, size_t row) const override { + const auto& nullable_column = assert_cast(column); + if (nullable_column.is_null_at(row)) { + return Null(); + } + return nested_data_type->get_type_field(nullable_column.get_nested_column(), row); + } + bool equals(const IDataType& rhs) const override; bool is_value_unambiguously_represented_in_contiguous_memory_region() const override { diff --git a/be/src/vec/data_types/data_type_number_base.h b/be/src/vec/data_types/data_type_number_base.h index a73bd9951891a3..5948fffffaa151 100644 --- a/be/src/vec/data_types/data_type_number_base.h +++ b/be/src/vec/data_types/data_type_number_base.h @@ -162,6 +162,14 @@ class DataTypeNumberBase : public IDataType { return std::make_shared>(nesting_level); }; + // Return Field which wrapped with the real type. + Field get_type_field(const IColumn& column, size_t row) const override { + const auto& column_data = static_cast&>(column); + Field field = column_data.get_data()[row]; + field.set_type_info(get_type_id()); + return field; + } + protected: template void to_string_batch_impl(const IColumn& column, ColumnString& column_to) const { diff --git a/be/src/vec/data_types/data_type_string.h b/be/src/vec/data_types/data_type_string.h index 8b5ea94151f1eb..95c1247e987f24 100644 --- a/be/src/vec/data_types/data_type_string.h +++ b/be/src/vec/data_types/data_type_string.h @@ -31,6 +31,7 @@ #include "runtime/define_primitive_type.h" #include "serde/data_type_string_serde.h" #include "vec/columns/column_string.h" +#include "vec/common/string_ref.h" #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" @@ -92,6 +93,13 @@ class DataTypeString : public IDataType { DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override { return std::make_shared(nesting_level); }; + // Return Field. + Field get_type_field(const IColumn& column, size_t row) const override { + const auto& column_data = static_cast(column); + Field field(String(column_data.get_data_at(row).data, column_data.get_data_at(row).size)); + field.set_type_info(TypeIndex::String); + return field; + } }; } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_time.h b/be/src/vec/data_types/data_type_time.h index 00cb3ee14bf37d..dc59c209fe5661 100644 --- a/be/src/vec/data_types/data_type_time.h +++ b/be/src/vec/data_types/data_type_time.h @@ -79,6 +79,13 @@ class DataTypeTimeV2 final : public DataTypeNumberBase { const char* get_family_name() const override { return "timev2"; } UInt32 get_scale() const override { return _scale; } + Field get_type_field(const IColumn& column, size_t row) const override { + Field field; + column.get(row, field); + field.set_type_info(get_type_id(), 0, static_cast(get_scale())); + return field; + } + private: UInt32 _scale; }; diff --git a/be/src/vec/data_types/data_type_time_v2.h b/be/src/vec/data_types/data_type_time_v2.h index e9f3fd383658ac..c00951ecae3c83 100644 --- a/be/src/vec/data_types/data_type_time_v2.h +++ b/be/src/vec/data_types/data_type_time_v2.h @@ -31,6 +31,7 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "common/status.h" #include "runtime/define_primitive_type.h" +#include "vec/common/assert_cast.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_number_base.h" @@ -161,12 +162,22 @@ class DataTypeDateTimeV2 final : public DataTypeNumberBase { node.date_literal.value); } } + MutableColumnPtr create_column() const override; UInt32 get_scale() const override { return _scale; } void to_pb_column_meta(PColumnMeta* col_meta) const override; + Field get_type_field(const IColumn& column, size_t row) const override { + const auto& column_data = + assert_cast(column); + Field field; + column_data.get(row, field); + field.set_type_info(get_type_id(), 0, static_cast(get_scale())); + return field; + } + static void cast_to_date(const UInt64 from, Int64& to); static void cast_to_date_time(const UInt64 from, Int64& to); static void cast_to_date_v2(const UInt64 from, UInt32& to); diff --git a/be/src/vec/data_types/get_least_supertype.cpp b/be/src/vec/data_types/get_least_supertype.cpp index 82bea452923ed5..384c706b589f23 100644 --- a/be/src/vec/data_types/get_least_supertype.cpp +++ b/be/src/vec/data_types/get_least_supertype.cpp @@ -252,6 +252,10 @@ void get_least_supertype_jsonb(const DataTypes& types, DataTypePtr* type) { for (const auto& type : types) { type_ids.insert(type->get_type_id()); } + if (type_ids.size() == 1) { + *type = types[0]; + return; + } get_least_supertype_jsonb(type_ids, type); } diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index a56eb00dbdd6fb..c5f8dbe62e7484 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -27,6 +27,9 @@ #include "vec/columns/column_const.h" #include "vec/common/assert_cast.h" #include "vec/common/string_ref.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/functions/function_helpers.h" namespace doris { @@ -234,21 +237,28 @@ void DataTypeArraySerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWri Status DataTypeArraySerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int64_t row_num) const { - // Use allocator instead of stack memory, since rapidjson hold the reference of String value - // otherwise causes stack use after free - auto& column_array = static_cast(column); - if (row_num > column_array.size()) { - return Status::InternalError("row num {} out of range {}!", row_num, column_array.size()); - } - // void* mem = allocator.Malloc(sizeof(vectorized::Field)); - void* mem = mem_pool.alloc(sizeof(vectorized::Field)); - if (!mem) { - return Status::InternalError("Malloc failed"); - } - vectorized::Field* array = new (mem) vectorized::Field(column_array[row_num]); + Arena& mem_pool, int64_t row_num, + const DataTypePtr& type) const { + const DataTypePtr& nested_type = + check_and_get_data_type(type.get())->get_nested_type(); + auto res = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = res.first; + row_num = res.second; + + const auto& data_column = assert_cast(*ptr); + const auto& offsets = data_column.get_offsets(); + + size_t offset = offsets[row_num - 1]; + size_t next_offset = offsets[row_num]; - convert_field_to_rapidjson(*array, result, allocator); + const IColumn& nested_column = data_column.get_data(); + result.SetArray(); + for (size_t i = offset; i < next_offset; ++i) { + rapidjson::Value val; + RETURN_IF_ERROR(nested_serde->write_one_cell_to_json(nested_column, val, allocator, + mem_pool, i, nested_type)); + result.PushBack(val, allocator); + } return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index cdd2115576030e..fdd79bcabb1e0c 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -20,6 +20,7 @@ #include #include +#include #include #include "common/status.h" @@ -33,6 +34,7 @@ class JsonbValue; namespace vectorized { class IColumn; class Arena; +class IDataType; class DataTypeArraySerDe : public DataTypeSerDe { public: @@ -73,7 +75,8 @@ class DataTypeArraySerDe : public DataTypeSerDe { Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, - int64_t row_num) const override; + int64_t row_num, + const std::shared_ptr& type) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.h b/be/src/vec/data_types/serde/data_type_date64_serde.h index c3b97b4273c9cf..a657319379ac8b 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.h +++ b/be/src/vec/data_types/serde/data_type_date64_serde.h @@ -34,6 +34,7 @@ #include "vec/columns/column_vector.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" +#include "vec/data_types/serde/data_type_serde.h" namespace doris { class JsonbOutStream; @@ -72,6 +73,12 @@ class DataTypeDate64SerDe : public DataTypeNumberSerDe { const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int64_t start, int64_t end, std::vector& buffer_list) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int64_t row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } private: template diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index 484df3df62a3e4..874169b7c8f218 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -82,6 +82,13 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe { const FormatOptions& options) const override; void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int64_t row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index c1f8bab15ad004..62e5459929dcbf 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -80,6 +80,13 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe { void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int64_t row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 51867ced18fb04..e7dbc10ebf6d39 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -116,6 +116,13 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { int64_t start, int64_t end, std::vector& buffer_list) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int64_t row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, int* num_deserialized, const FormatOptions& options) const override; diff --git a/be/src/vec/data_types/serde/data_type_ipv4_serde.h b/be/src/vec/data_types/serde/data_type_ipv4_serde.h index 44093412678165..c1774740ce98ae 100644 --- a/be/src/vec/data_types/serde/data_type_ipv4_serde.h +++ b/be/src/vec/data_types/serde/data_type_ipv4_serde.h @@ -61,6 +61,13 @@ class DataTypeIPv4SerDe : public DataTypeNumberSerDe { void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int64_t row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.h b/be/src/vec/data_types/serde/data_type_ipv6_serde.h index 2634c2efbef3d7..13cc5739f45551 100644 --- a/be/src/vec/data_types/serde/data_type_ipv6_serde.h +++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.h @@ -71,6 +71,13 @@ class DataTypeIPv6SerDe : public DataTypeNumberSerDe { void write_one_cell_to_jsonb(const IColumn& column, JsonbWriterT& result, Arena* mem_pool, int unique_id, int64_t row_num) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int64_t row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index e597cdba224376..5053ddd88bdcd6 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -237,7 +237,8 @@ void convert_jsonb_to_rapidjson(const JsonbValue& val, rapidjson::Value& target, Status DataTypeJsonbSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int64_t row_num) const { + Arena& mem_pool, int64_t row_num, + const DataTypePtr& type) const { const auto& data = assert_cast(column); const auto jsonb_val = data.get_data_at(row_num); if (jsonb_val.empty()) { diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index 5080b1ba46ed3c..9ffea32515e5b4 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -65,7 +65,7 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe { std::vector& buffer_list) const override; Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, - int64_t row_num) const override; + int64_t row_num, const DataTypePtr& type) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int64_t start, int64_t end) const override; diff --git a/be/src/vec/data_types/serde/data_type_nothing_serde.h b/be/src/vec/data_types/serde/data_type_nothing_serde.h index 7bf7d2e64314cc..a9f19757eb60df 100644 --- a/be/src/vec/data_types/serde/data_type_nothing_serde.h +++ b/be/src/vec/data_types/serde/data_type_nothing_serde.h @@ -107,6 +107,14 @@ class DataTypeNothingSerde : public DataTypeSerDe { std::vector& buffer_list) const override { return Status::NotSupported("write_column_to_orc with type " + column.get_name()); } + + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int64_t row_num, + const std::shared_ptr& type) const override { + result.SetNull(); + return Status::OK(); + } }; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index f21f160fb0a891..05c6a1902c1e64 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -367,14 +367,16 @@ Status DataTypeNullableSerDe::write_column_to_orc(const std::string& timezone, Status DataTypeNullableSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int64_t row_num) const { - auto& col = static_cast(column); - auto& nested_col = col.get_nested_column(); + Arena& mem_pool, int64_t row_num, + const DataTypePtr& type) const { + const auto& col = static_cast(column); + const auto& nullable_type = static_cast(*type); + const auto& nested_col = col.get_nested_column(); if (col.is_null_at(row_num)) { result.SetNull(); } else { - RETURN_IF_ERROR(nested_serde->write_one_cell_to_json(nested_col, result, allocator, - mem_pool, row_num)); + RETURN_IF_ERROR(nested_serde->write_one_cell_to_json( + nested_col, result, allocator, mem_pool, row_num, nullable_type.get_nested_type())); } return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 159db890540dbc..604223dd9e8c37 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -19,6 +19,8 @@ #include +#include + #include "common/status.h" #include "data_type_serde.h" #include "util/jsonb_writer.h" @@ -30,7 +32,9 @@ class JsonbValue; namespace vectorized { class IColumn; class Arena; +class IDataType; #include "common/compile_check_begin.h" + class DataTypeNullableSerDe : public DataTypeSerDe { public: DataTypeNullableSerDe(const DataTypeSerDeSPtr& _nested_serde, int nesting_level = 1) @@ -96,7 +100,8 @@ class DataTypeNullableSerDe : public DataTypeSerDe { Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, - int64_t row_num) const override; + int64_t row_num, + const std::shared_ptr& type) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return {nested_serde}; } diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index 203cd9dbf46d67..547aa99aef1320 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -35,6 +35,7 @@ #include "vec/columns/column_vector.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" +#include "vec/data_types/data_type.h" namespace doris { class JsonbOutStream; @@ -104,7 +105,7 @@ class DataTypeNumberSerDe : public DataTypeSerDe { std::vector& buffer_list) const override; Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, - int64_t row_num) const override; + int64_t row_num, const DataTypePtr& type) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; private: @@ -314,7 +315,8 @@ template Status DataTypeNumberSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int64_t row_num) const { + Arena& mem_pool, int64_t row_num, + const DataTypePtr& type) const { const auto& data = reinterpret_cast(column).get_data(); if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) { result.SetInt(data[row_num]); diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp b/be/src/vec/data_types/serde/data_type_object_serde.cpp index fc536d9ef0df7b..9fa2ac56cf47bf 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp @@ -197,6 +197,39 @@ Status DataTypeObjectSerDe::write_column_to_orc(const std::string& timezone, con return Status::OK(); } +Status DataTypeObjectSerDe::write_one_cell_to_json( + const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, int64_t row_num, + const std::shared_ptr& type) const { + const auto& var = assert_cast(column); + if (!var.is_finalized()) { + var.assume_mutable()->finalize(); + } + result.SetObject(); + // sort to make output stable, todo add a config + auto subcolumns = schema_util::get_sorted_subcolumns(var.get_subcolumns()); + for (const auto& entry : subcolumns) { + const auto& subcolumn = entry->data.get_finalized_column(); + const auto& subtype_serde = entry->data.get_least_common_type_serde(); + const auto& subtype = entry->data.get_least_common_type(); + if (subcolumn.is_null_at(row_num)) { + continue; + } + rapidjson::Value key; + key.SetString(entry->path.get_path().data(), (uint32_t)entry->path.get_path().size()); + rapidjson::Value val; + RETURN_IF_ERROR(subtype_serde->write_one_cell_to_json(subcolumn, val, allocator, mem_pool, + row_num, subtype)); + if (val.IsNull() && entry->path.empty()) { + // skip null value with empty key, indicate the null json value of root in variant map, + // usally padding in nested arrays + continue; + } + result.AddMember(key, val, allocator); + } + return Status::OK(); +} + } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h b/be/src/vec/data_types/serde/data_type_object_serde.h index c08d4d0af0d2c3..2eea8c49f2ce46 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.h +++ b/be/src/vec/data_types/serde/data_type_object_serde.h @@ -91,6 +91,11 @@ class DataTypeObjectSerDe : public DataTypeSerDe { int64_t start, int64_t end, std::vector& buffer_list) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int64_t row_num, + const std::shared_ptr& type) const override; + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_serde.cpp b/be/src/vec/data_types/serde/data_type_serde.cpp index 4aab198e70c84b..0df67fc65333a9 100644 --- a/be/src/vec/data_types/serde/data_type_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_serde.cpp @@ -48,88 +48,22 @@ DataTypeSerDeSPtrs create_data_type_serdes(const std::vector& s return serdes; } -void DataTypeSerDe::convert_variant_map_to_rapidjson( - const vectorized::VariantMap& map, rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator) { - target.SetObject(); - for (const auto& item : map) { - if (item.second.is_null()) { - continue; - } - rapidjson::Value key; - key.SetString(item.first.data(), cast_set(item.first.size())); - rapidjson::Value val; - convert_field_to_rapidjson(item.second, val, allocator); - if (val.IsNull() && item.first.empty()) { - // skip null value with empty key, indicate the null json value of root in variant map, - // usally padding in nested arrays - continue; - } - target.AddMember(key, val, allocator); - } -} - -void DataTypeSerDe::convert_array_to_rapidjson(const vectorized::Array& array, - rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator) { - target.SetArray(); - for (const vectorized::Field& item : array) { - rapidjson::Value val; - convert_field_to_rapidjson(item, val, allocator); - target.PushBack(val, allocator); - } -} - -void DataTypeSerDe::convert_field_to_rapidjson(const vectorized::Field& field, - rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator) { - switch (field.get_type()) { - case vectorized::Field::Types::Null: - target.SetNull(); - break; - case vectorized::Field::Types::Int64: - target.SetInt64(field.get()); - break; - case vectorized::Field::Types::Float64: - target.SetDouble(field.get()); - break; - case vectorized::Field::Types::JSONB: { - const auto& val = field.get(); - JsonbValue* json_val = JsonbDocument::createValue(val.get_value(), val.get_size()); - convert_jsonb_to_rapidjson(*json_val, target, allocator); - break; - } - case vectorized::Field::Types::String: { - const String& val = field.get(); - target.SetString(val.data(), cast_set(val.size())); - break; - } - case vectorized::Field::Types::Array: { - const vectorized::Array& array = field.get(); - convert_array_to_rapidjson(array, target, allocator); - break; - } - case vectorized::Field::Types::VariantMap: { - const vectorized::VariantMap& map = field.get(); - convert_variant_map_to_rapidjson(map, target, allocator); - break; - } - default: - throw doris::Exception(ErrorCode::INTERNAL_ERROR, "unkown field type: {}", - field.get_type_name()); - break; - } -} - Status DataTypeSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int64_t row_num) const { - return Status::InternalError("Not support write {} to rapidjson", column.get_name()); + Arena& mem_pool, int64_t row_num, + const DataTypePtr& type) const { + const std::string str_rep = type->to_string(column, row_num); + // allocate memory to prevent from heap use after free + void* mem = allocator.Malloc(str_rep.size()); + memcpy(mem, str_rep.data(), str_rep.size()); + result.SetString((const char*)mem, (uint32_t)str_rep.size()); + return Status::OK(); } Status DataTypeSerDe::read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const { - return Status::NotSupported("Not support read {} from rapidjson", column.get_name()); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "Not support read {} from rapidjson", + column.get_name()); } const std::string DataTypeSerDe::NULL_IN_COMPLEX_TYPE = "null"; diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 1a089bb73fe99c..5df62e4f410147 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -350,7 +350,8 @@ class DataTypeSerDe { // rapidjson virtual Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int64_t row_num) const; + Arena& mem_pool, int64_t row_num, + const std::shared_ptr& type) const; virtual Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const; virtual DataTypeSerDeSPtrs get_nested_serdes() const { @@ -366,14 +367,6 @@ class DataTypeSerDe { // The _nesting_level of StructSerde is 1 // The _nesting_level of StringSerde is 2 int _nesting_level = 1; - - static void convert_field_to_rapidjson(const vectorized::Field& field, rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator); - static void convert_array_to_rapidjson(const vectorized::Array& array, rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator); - static void convert_variant_map_to_rapidjson(const vectorized::VariantMap& array, - rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator); }; /// Invert values since Arrow interprets 1 as a non-null value, while doris as a null diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index 69a8cc2617191d..72d65c34497b8a 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -347,7 +347,7 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { } Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, - int64_t row_num) const override { + int64_t row_num, const DataTypePtr& type) const override { const auto& col = assert_cast(column); const auto& data_ref = col.get_data_at(row_num); result.SetString(data_ref.data, cast_set(data_ref.size)); diff --git a/be/src/vec/data_types/serde/data_type_time_serde.h b/be/src/vec/data_types/serde/data_type_time_serde.h index 0ebe79e522cbda..0c8d1248339805 100644 --- a/be/src/vec/data_types/serde/data_type_time_serde.h +++ b/be/src/vec/data_types/serde/data_type_time_serde.h @@ -28,6 +28,7 @@ namespace doris { class JsonbOutStream; #include "common/compile_check_begin.h" namespace vectorized { + class DataTypeTimeV2SerDe : public DataTypeNumberSerDe { public: DataTypeTimeV2SerDe(int scale = 0, int nesting_level = 1) @@ -38,6 +39,12 @@ class DataTypeTimeV2SerDe : public DataTypeNumberSerDe { Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int64_t row_idx, bool col_const, const FormatOptions& options) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int64_t row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } private: template diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 48619ff85f83c8..834b569ff90cc9 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -687,6 +687,11 @@ struct ConvertImplStringToJsonbAsJsonbString { ColumnString* dst_str = assert_cast(dst.get()); const auto* from_string = assert_cast(&col_from); JsonbWriter writer; + if (from_string->size() < input_rows_count) { + return Status::RuntimeError( + "Illegal column {} of first argument of conversion function", + col_from.get_name()); + } for (size_t i = 0; i < input_rows_count; i++) { auto str_ref = from_string->get_data_at(i); writer.reset(); @@ -850,7 +855,7 @@ struct ConvertNothingToJsonb { } }; -template +template struct ConvertImplFromJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, const uint32_t result, size_t input_rows_count) { @@ -864,16 +869,12 @@ struct ConvertImplFromJsonb { auto& null_map = null_map_col->get_data(); auto col_to = ColumnType::create(); - //IColumn & col_to = *res; - // size_t size = col_from.size(); col_to->reserve(input_rows_count); auto& res = col_to->get_data(); res.resize(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { const auto& val = column_string->get_data_at(i); - // ReadBuffer read_buffer((char*)(val.data), val.size); - // RETURN_IF_ERROR(data_type_to->from_string(read_buffer, col_to)); if (val.size == 0) { null_map[i] = 1; @@ -896,6 +897,15 @@ struct ConvertImplFromJsonb { res[i] = 0; continue; } + if (value->isString()) { + // convert by parse + const auto& data = static_cast(value)->getBlob(); + size_t len = static_cast(value)->getBlobLen(); + ReadBuffer rb((char*)(data), len); + bool parsed = try_parse_impl(res[i], rb, context); + null_map[i] = !parsed; + continue; + } if constexpr (type_index == TypeIndex::UInt8) { // cast from json value to boolean type if (value->isTrue()) { @@ -1802,19 +1812,20 @@ class FunctionCast final : public IFunctionBase { bool jsonb_string_as_string) const { switch (to_type->get_type_id()) { case TypeIndex::UInt8: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int8: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int16: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int32: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int64: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int128: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Float64: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::String: if (!jsonb_string_as_string) { // Conversion from String through parsing. diff --git a/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp b/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp index f05919e4a8f477..f551d3146c1954 100644 --- a/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_mysql_test.cpp @@ -83,7 +83,7 @@ void serialize_and_deserialize_mysql_test() { {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}, {"k5", FieldType::OLAP_FIELD_TYPE_IPV4, 5, TYPE_IPV4, false}, {"k6", FieldType::OLAP_FIELD_TYPE_IPV6, 6, TYPE_IPV6, false}}; - int row_num = 7; + int64_t row_num = 7; // make desc and generate block vectorized::VExprContextSPtrs _output_vexpr_ctxs; _output_vexpr_ctxs.resize(cols.size()); diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java index e9f1b50c0dfad8..45c91bec5bbd8e 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java @@ -550,10 +550,8 @@ public static ScalarType createJsonbType() { } public static ScalarType createVariantType() { - // length checked in analysis - ScalarType type = new ScalarType(PrimitiveType.VARIANT); - type.len = MAX_STRING_LENGTH; - return type; + // Not return ScalarType return VariantType instead for compatibility reason + return new VariantType(); } public static ScalarType createVarchar(int len) { diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/StructField.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/StructField.java index ecbfd30ca23538..c08c93135a8d3c 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/StructField.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/StructField.java @@ -42,13 +42,17 @@ public class StructField { public static final String DEFAULT_FIELD_NAME = "col"; - public StructField(String name, Type type, String comment, boolean containsNull) { - this.name = name.toLowerCase(); + public StructField(String name, Type type, String comment, boolean containsNull, boolean nameCaseSensitive) { + this.name = (nameCaseSensitive ? name : name.toLowerCase()); this.type = type; this.comment = comment; this.containsNull = containsNull; } + public StructField(String name, Type type, String comment, boolean containsNull) { + this(name, type, comment, containsNull, false); + } + public StructField(String name, Type type) { this(name, type, null, true); } diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index 7dfcfd15ebec84..b5dd4f11db914f 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -136,6 +136,8 @@ public abstract class Type { private static final ArrayList arraySubTypes; private static final ArrayList mapSubTypes; private static final ArrayList structSubTypes; + + private static final ArrayList variantSubTypes; private static final ArrayList trivialTypes; static { @@ -171,6 +173,8 @@ public abstract class Type { typeMap.put("MAP", Type.MAP); typeMap.put("OBJECT", Type.UNSUPPORTED); typeMap.put("ARRAY", Type.ARRAY); + typeMap.put("IPV4", Type.IPV4); + typeMap.put("IPV6", Type.IPV6); typeMap.put("QUANTILE_STATE", Type.QUANTILE_STATE); } @@ -307,6 +311,27 @@ public abstract class Type { structSubTypes.add(ARRAY); structSubTypes.add(MAP); structSubTypes.add(STRUCT); + + variantSubTypes = Lists.newArrayList(); + variantSubTypes.add(BOOLEAN); + variantSubTypes.addAll(integerTypes); + variantSubTypes.add(FLOAT); + variantSubTypes.add(DOUBLE); + variantSubTypes.add(DECIMAL32); // same DEFAULT_DECIMALV3 + variantSubTypes.add(DECIMAL64); + variantSubTypes.add(DECIMAL128); + variantSubTypes.add(DECIMAL256); + variantSubTypes.add(DATE); + variantSubTypes.add(DATETIME); + variantSubTypes.add(DATEV2); + variantSubTypes.add(DATETIMEV2); + variantSubTypes.add(IPV4); + variantSubTypes.add(IPV6); + variantSubTypes.add(CHAR); + variantSubTypes.add(VARCHAR); + variantSubTypes.add(STRING); + variantSubTypes.add(ARRAY); + variantSubTypes.add(NULL); } public static final Set DATE_SUPPORTED_JAVA_TYPE = Sets.newHashSet(LocalDate.class, java.util.Date.class, @@ -375,6 +400,10 @@ public static ArrayList getStructSubTypes() { return structSubTypes; } + public static ArrayList getVariantSubTypes() { + return variantSubTypes; + } + /** * Return true if this is complex type and support subType */ diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java index 924b197e4d739e..ea0d4915ed9c32 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java @@ -21,22 +21,95 @@ import org.apache.doris.thrift.TTypeNode; import org.apache.doris.thrift.TTypeNodeType; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.gson.annotations.SerializedName; + +import java.util.ArrayList; +import java.util.HashMap; + public class VariantType extends ScalarType { + @SerializedName(value = "fieldMap") + private final HashMap fieldMap = Maps.newHashMap(); + + @SerializedName(value = "fields") + private final ArrayList predefinedFields; + public VariantType() { super(PrimitiveType.VARIANT); + this.predefinedFields = Lists.newArrayList(); + } + + public VariantType(ArrayList fields) { + super(PrimitiveType.VARIANT); + Preconditions.checkNotNull(fields); + this.predefinedFields = fields; + for (int i = 0; i < this.predefinedFields.size(); ++i) { + this.predefinedFields.get(i).setPosition(i); + fieldMap.put(this.predefinedFields.get(i).getName(), this.predefinedFields.get(i)); + } + } + + @Override + public String toSql(int depth) { + if (predefinedFields.isEmpty()) { + return "variant"; + } + if (depth >= MAX_NESTING_DEPTH) { + return "variant<...>"; + } + ArrayList fieldsSql = Lists.newArrayList(); + for (StructField f : predefinedFields) { + fieldsSql.add(f.toSql(depth + 1)); + } + return String.format("variant<%s>", Joiner.on(",").join(fieldsSql)); + } + + public ArrayList getPredefinedFields() { + return predefinedFields; } @Override public void toThrift(TTypeDesc container) { - // not use ScalarType's toThrift for compatibility, because VariantType is not extends ScalarType previously + // use ScalarType's toThrift for compatibility, because VariantType use ScalarType to thrift previously + if (predefinedFields.isEmpty()) { + super.toThrift(container); + return; + } TTypeNode node = new TTypeNode(); container.types.add(node); node.setType(TTypeNodeType.VARIANT); + // predefined fields + node.setStructFields(new ArrayList<>()); + for (StructField field : predefinedFields) { + field.toThrift(container, node); + } + } + + @Override + public boolean supportSubType(Type subType) { + for (Type supportedType : Type.getVariantSubTypes()) { + if (subType.getPrimitiveType() == supportedType.getPrimitiveType()) { + return true; + } + } + return false; + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof VariantType)) { + return false; + } + VariantType otherVariantType = (VariantType) other; + return otherVariantType.getPredefinedFields().equals(predefinedFields); } @Override - public boolean matchesType(Type t) { - return t.isVariantType() || t.isStringType(); + public boolean matchesType(Type type) { + return type.isVariantType(); } } diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 6c37a2b276b0dd..be1e547cbe4dba 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -1709,6 +1709,7 @@ dataType : complex=ARRAY LT dataType GT #complexDataType | complex=MAP LT dataType COMMA dataType GT #complexDataType | complex=STRUCT LT complexColTypeList GT #complexDataType + | VARIANT LT variantSubColTypeList GT #variantPredefinedFields | AGG_STATE LT functionNameIdentifier LEFT_PAREN dataTypes+=dataTypeWithNullable (COMMA dataTypes+=dataTypeWithNullable)* RIGHT_PAREN GT #aggStateDataType @@ -1759,6 +1760,13 @@ complexColType : identifier COLON dataType commentSpec? ; +variantSubColTypeList + : variantSubColType (COMMA variantSubColType)* + ; +variantSubColType + : qualifiedName COLON dataType commentSpec? + ; + commentSpec : COMMENT STRING_LITERAL ; diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 05804343bc3417..9e770aeece85fd 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -43,6 +43,7 @@ import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.VariantType; import org.apache.doris.catalog.GeneratedColumnInfo; import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.StructField; @@ -817,7 +818,9 @@ nonterminal Expr function_call_expr, array_expr, map_expr; nonterminal ArrayLiteral array_literal; nonterminal MapLiteral map_literal; nonterminal StructField struct_field; +nonterminal StructField variant_field; nonterminal ArrayList struct_field_list; +nonterminal ArrayList variant_field_list; nonterminal StructLiteral struct_literal; nonterminal AnalyticWindow opt_window_clause; nonterminal AnalyticWindow.Type window_type; @@ -6952,6 +6955,8 @@ type ::= {: RESULT = new MapType(key_type,value_type); :} | KW_STRUCT LESSTHAN struct_field_list:fields GREATERTHAN {: RESULT = new StructType(fields); :} + | KW_VARIANT LESSTHAN variant_field_list:fields GREATERTHAN + {: RESULT = new VariantType(fields); :} | KW_CHAR LPAREN INTEGER_LITERAL:len RPAREN {: ScalarType type = ScalarType.createCharType(len.intValue()); RESULT = type; @@ -7267,6 +7272,23 @@ struct_field_list ::= :} ; +variant_field ::= + ident:name COLON type:type opt_comment:comment + {: RESULT = new StructField(name, type, comment, true); :} + ; + +variant_field_list ::= + variant_field:field + {: + RESULT = Lists.newArrayList(field); + :} + | variant_field_list:fields COMMA struct_field:field + {: + fields.add(field); + RESULT = fields; + :} + ; + struct_literal ::= LBRACE expr_list:list RBRACE {: diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index eb1cc09f5066c1..1b6d65d3ecee65 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -627,8 +627,9 @@ private boolean processModifyColumn(ModifyColumnClause alterClause, OlapTable ol if (!col.equals(modColumn)) { typeChanged = true; // TODO:the case where columnPos is not empty has not been considered - if (columnPos == null && col.getDataType() == PrimitiveType.VARCHAR - && modColumn.getDataType() == PrimitiveType.VARCHAR) { + if (columnPos == null && (col.getDataType() == PrimitiveType.VARCHAR + && modColumn.getDataType() == PrimitiveType.VARCHAR) + || (col.getDataType().isVariantType() && modColumn.getDataType().isVariantType())) { col.checkSchemaChangeAllowed(modColumn); lightSchemaChange = olapTable.getEnableLightSchemaChange(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java index de257991ca6ba4..e18a66099e130b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java @@ -158,6 +158,9 @@ public CastExpr(Type targetType, Expr e, Void v) { if (from.isComplexType() && type.isJsonbType()) { nullableMode = Function.NullableMode.ALWAYS_NULLABLE; } + if (from.isVariantType() || to.isVariantType()) { + nullableMode = Function.NullableMode.ALWAYS_NULLABLE; + } Preconditions.checkState(nullableMode != null, "cannot find nullable node for cast from " + from + " to " + to); fn = new Function(new FunctionName(getFnName(type)), Lists.newArrayList(e.type), type, diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index 0ae6a4f8bdb5eb..fe0597a3b62e96 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -333,11 +333,19 @@ public void createChildrenColumn(Type type, Column column) { column.addChildrenColumn(v); } else if (type.isStructType()) { ArrayList fields = ((StructType) type).getFields(); - for (StructField field : fields) { - Column c = new Column(field.getName(), field.getType()); - c.setIsAllowNull(field.getContainsNull()); - column.addChildrenColumn(c); - } + addChildren(column, fields); + } else if (type.isVariantType()) { + // variant may contain predefined structured fields + ArrayList fields = ((VariantType) type).getPredefinedFields(); + addChildren(column, fields); + } + } + + private void addChildren(Column column, ArrayList fields) { + for (StructField field : fields) { + Column c = new Column(field.getName(), field.getType()); + c.setIsAllowNull(field.getContainsNull()); + column.addChildrenColumn(c); } } @@ -671,6 +679,14 @@ private void setChildrenTColumn(Column children, TColumn tColumn) { toChildrenThrift(children, childrenTColumn); } + private void addChildren(Column column, TColumn tColumn) { + List childrenColumns = column.getChildren(); + tColumn.setChildrenColumn(new ArrayList<>()); + for (Column c : childrenColumns) { + setChildrenTColumn(c, tColumn); + } + } + private void toChildrenThrift(Column column, TColumn tColumn) { if (column.type.isArrayType()) { Column children = column.getChildren().get(0); @@ -683,11 +699,10 @@ private void toChildrenThrift(Column column, TColumn tColumn) { setChildrenTColumn(k, tColumn); setChildrenTColumn(v, tColumn); } else if (column.type.isStructType()) { - List childrenColumns = column.getChildren(); - tColumn.setChildrenColumn(new ArrayList<>()); - for (Column children : childrenColumns) { - setChildrenTColumn(children, tColumn); - } + addChildren(column, tColumn); + } else if (column.type.isVariantType()) { + // variant may contain predefined structured fields + addChildren(column, tColumn); } } @@ -828,15 +843,22 @@ public OlapFile.ColumnPB toPb(Set bfColumns, List indexes) throws Column v = this.getChildren().get(1); builder.addChildrenColumns(v.toPb(Sets.newHashSet(), Lists.newArrayList())); } else if (this.type.isStructType()) { - List childrenColumns = this.getChildren(); - for (Column c : childrenColumns) { - builder.addChildrenColumns(c.toPb(Sets.newHashSet(), Lists.newArrayList())); - } + addChildren(builder); + } else if (this.type.isVariantType()) { + // variant may contain predefined structured fields + addChildren(builder); } OlapFile.ColumnPB col = builder.build(); return col; } + + private void addChildren(OlapFile.ColumnPB.Builder builder) throws DdlException { + List childrenColumns = this.getChildren(); + for (Column c : childrenColumns) { + builder.addChildrenColumns(c.toPb(Sets.newHashSet(), Lists.newArrayList())); + } + } // CLOUD_CODE_END public void checkSchemaChangeAllowed(Column other) throws DdlException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index a9ce9215d4d9be..58dd87243b467f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -322,6 +322,9 @@ import org.apache.doris.nereids.DorisParser.UpdateContext; import org.apache.doris.nereids.DorisParser.UserIdentifyContext; import org.apache.doris.nereids.DorisParser.UserVariableContext; +import org.apache.doris.nereids.DorisParser.VariantPredefinedFieldsContext; +import org.apache.doris.nereids.DorisParser.VariantSubColTypeContext; +import org.apache.doris.nereids.DorisParser.VariantSubColTypeListContext; import org.apache.doris.nereids.DorisParser.WhereClauseContext; import org.apache.doris.nereids.DorisParser.WindowFrameContext; import org.apache.doris.nereids.DorisParser.WindowSpecContext; @@ -706,6 +709,7 @@ import org.apache.doris.nereids.types.StructField; import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.types.VariantType; import org.apache.doris.nereids.types.coercion.CharacterType; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.RelationUtil; @@ -3266,6 +3270,8 @@ public ColumnDefinition visitColumnDef(ColumnDefContext ctx) { ? visitPrimitiveDataType(((PrimitiveDataTypeContext) ctx.type)) : ctx.type instanceof ComplexDataTypeContext ? visitComplexDataType((ComplexDataTypeContext) ctx.type) + : ctx.type instanceof VariantPredefinedFieldsContext + ? visitVariantPredefinedFields((VariantPredefinedFieldsContext) ctx.type) : visitAggStateDataType((AggStateDataTypeContext) ctx.type); colType = colType.conversion(); boolean isKey = ctx.KEY() != null; @@ -4120,6 +4126,30 @@ public DataType visitPrimitiveDataType(PrimitiveDataTypeContext ctx) { }); } + @Override + public DataType visitVariantPredefinedFields(VariantPredefinedFieldsContext ctx) { + return new VariantType(visitVariantSubColTypeList(ctx.variantSubColTypeList())); + } + + @Override + public List visitVariantSubColTypeList(VariantSubColTypeListContext ctx) { + return ctx.variantSubColType().stream().map( + this::visitVariantSubColType).collect(ImmutableList.toImmutableList()); + } + + @Override + public StructField visitVariantSubColType(VariantSubColTypeContext ctx) { + String comment; + if (ctx.commentSpec() != null) { + comment = ctx.commentSpec().STRING_LITERAL().getText(); + comment = LogicalPlanBuilderAssistant.escapeBackSlash(comment.substring(1, comment.length() - 1)); + } else { + comment = ""; + } + return new StructField(ctx.qualifiedName().getText(), + typedVisit(ctx.dataType()), true, comment, true /*name case-sensitive*/); + } + @Override public DataType visitComplexDataType(ComplexDataTypeContext ctx) { return ParserUtils.withOrigin(ctx, () -> { @@ -4150,7 +4180,8 @@ public StructField visitComplexColType(ComplexColTypeContext ctx) { } else { comment = ""; } - return new StructField(ctx.identifier().getText(), typedVisit(ctx.dataType()), true, comment); + return new StructField(ctx.identifier().getText(), + typedVisit(ctx.dataType()), true, comment, false /*name case-insensitive*/); } private String parseConstant(ConstantContext context) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/ColumnDefinition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/ColumnDefinition.java index 047ef2c1b51f5b..86f7228d7aef44 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/ColumnDefinition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/ColumnDefinition.java @@ -640,6 +640,19 @@ private void validateScalarType(ScalarType scalarType) { } break; } + case VARIANT: + ArrayList predefinedFields = + ((org.apache.doris.catalog.VariantType) scalarType).getPredefinedFields(); + Set fieldNames = new HashSet<>(); + for (org.apache.doris.catalog.StructField field : predefinedFields) { + Type fieldType = field.getType(); + validateNestedType(scalarType, fieldType); + if (!fieldNames.add(field.getName())) { + throw new AnalysisException("Duplicate field name " + field.getName() + + " in struct " + scalarType.toSql()); + } + } + break; case INVALID_TYPE: throw new AnalysisException("Invalid type."); default: diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructField.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructField.java index e095f25aa66bc8..7567d9295a0be9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructField.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructField.java @@ -32,19 +32,28 @@ public class StructField { private final boolean nullable; private final String comment; + private final boolean nameCaseSensitive; + /** * StructField Constructor * @param name The name of this field * @param dataType The data type of this field * @param nullable Indicates if values of this field can be `null` values + * @param nameCaseSensitive Indicates if name is case-sensitive */ - public StructField(String name, DataType dataType, boolean nullable, String comment) { - this.name = Objects.requireNonNull(name, "name should not be null").toLowerCase(Locale.ROOT); + public StructField(String name, DataType dataType, boolean nullable, String comment, boolean nameCaseSensitive) { + this.nameCaseSensitive = nameCaseSensitive; + this.name = nameCaseSensitive ? Objects.requireNonNull(name, "name should not be null") + : Objects.requireNonNull(name, "name should not be null").toLowerCase(Locale.ROOT); this.dataType = Objects.requireNonNull(dataType, "dataType should not be null"); this.nullable = nullable; this.comment = Objects.requireNonNull(comment, "comment should not be null"); } + public StructField(String name, DataType dataType, boolean nullable, String comment) { + this(name, dataType, nullable, comment, false); + } + public String getName() { return name; } @@ -69,16 +78,16 @@ public StructField conversion() { } public StructField withDataType(DataType dataType) { - return new StructField(name, dataType, nullable, comment); + return new StructField(name, dataType, nullable, comment, nameCaseSensitive); } public StructField withDataTypeAndNullable(DataType dataType, boolean nullable) { - return new StructField(name, dataType, nullable, comment); + return new StructField(name, dataType, nullable, comment, nameCaseSensitive); } public org.apache.doris.catalog.StructField toCatalogDataType() { return new org.apache.doris.catalog.StructField( - name, dataType.toCatalogDataType(), comment, nullable); + name, dataType.toCatalogDataType(), comment, nullable, nameCaseSensitive); } public String toSql() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java index 63752594998b3a..2bbaa7982bcf80 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java @@ -18,26 +18,62 @@ package org.apache.doris.nereids.types; import org.apache.doris.catalog.Type; -import org.apache.doris.nereids.annotation.Developing; +import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.types.coercion.PrimitiveType; +import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.Objects; +import java.util.function.Supplier; +import java.util.stream.Collectors; /** * Variant type in Nereids. * Why Variant is not complex type? Since it's nested structure is not pre-defined, then using * primitive type will be easy to handle meta info in FE. + * Also, could predefine some fields of nested columns. + * Example: VARIANT <`a.b`:INT, a.c:DATETIMEV2> + * */ -@Developing public class VariantType extends PrimitiveType { public static final VariantType INSTANCE = new VariantType(); public static final int WIDTH = 24; + private final List predefinedFields; + private final Supplier> pathToFields; + + // No predefined fields + public VariantType() { + predefinedFields = Lists.newArrayList(); + pathToFields = Suppliers.memoize(Maps::newTreeMap); + } + + /** + * Contains predefined fields like struct + */ + public VariantType(List fields) { + this.predefinedFields = ImmutableList.copyOf(Objects.requireNonNull(fields, "fields should not be null")); + this.pathToFields = Suppliers.memoize(() -> this.predefinedFields.stream().collect(ImmutableMap.toImmutableMap( + StructField::getName, f -> f, (f1, f2) -> { + throw new AnalysisException("The name of the struct field cannot be repeated." + + " same name fields are " + f1 + " and " + f2); + }))); + } + @Override public Type toCatalogDataType() { - return Type.VARIANT; + return new org.apache.doris.catalog.VariantType(predefinedFields.stream() + .map(StructField::toCatalogDataType) + .collect(Collectors.toCollection(ArrayList::new))); } @Override @@ -46,8 +82,11 @@ public boolean acceptsType(DataType other) { } @Override - public String simpleString() { - return "variant"; + public String toSql() { + if (predefinedFields.isEmpty()) { + return "VARIANT"; + } + return "VARIANT<" + predefinedFields.stream().map(StructField::toSql).collect(Collectors.joining(",")) + ">"; } @Override @@ -71,11 +110,6 @@ public int width() { return WIDTH; } - @Override - public String toSql() { - return "VARIANT"; - } - @Override public String toString() { return toSql(); diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy index bc001126bceadd..ab9bb0beb91869 100644 --- a/regression-test/conf/regression-conf.groovy +++ b/regression-test/conf/regression-conf.groovy @@ -259,10 +259,3 @@ lakesoulMinioEndpoint="*******" metaServiceToken = "greedisgood9999" instanceId = "default_instance_id" multiClusterInstance = "default_instance_id" - -storageProvider = "oss" -cbsS3Ak = "*******" -cbsS3Sk = "*******" -cbsS3Endpoint = "oss-cn-beijing.aliyuncs.com" -cbsS3Bucket = "test-bucket" -cbsS3Prefix = "test-cluster-prefix" diff --git a/regression-test/data/variant_p0/nested.out b/regression-test/data/variant_p0/nested.out index d0e0e9c822ddc2..e97b4eb8b10ec5 100644 --- a/regression-test/data/variant_p0/nested.out +++ b/regression-test/data/variant_p0/nested.out @@ -157,15 +157,15 @@ v.xx tinyint Yes false \N NONE 10 {"xx":10} -- !sql -- -[] -[{"ba":"11111"}, {"a":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[] -[{"baaa":"11111"}, {"ax1111":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[{"ba":"11111"}, {"a":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[{"mmm":"11111"}, {"ax1111":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[{"ba":"11111"}, {"a":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[{"yyy":"11111"}, {"ax1111":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[{"yyy":"11111"}, {"ax1111":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] +{} +[{"ba":"11111"},{"a":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +{} +[{"baaa":"11111"},{"ax1111":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +[{"ba":"11111"},{"a":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +[{"mmm":"11111"},{"ax1111":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +[{"ba":"11111"},{"a":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +[{"yyy":"11111"},{"ax1111":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +[{"yyy":"11111"},{"ax1111":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] -- !explode_sql -- 19 10 diff --git a/regression-test/data/variant_p0/predefine/load.out b/regression-test/data/variant_p0/predefine/load.out new file mode 100644 index 00000000000000..70b10435cdc87a --- /dev/null +++ b/regression-test/data/variant_p0/predefine/load.out @@ -0,0 +1,151 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 1 {"a":{"b":{"c":123456,"d":11.111}},"dcm":"123.456000000","dt":"2021-01-01 00:00:00","ip":"127.0.0.1","ss":"199991111"} +2 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} +3 3 {"dcm":"789.123000000","dt":"2025-01-01 11:11:11","ip":"127.0.0.1"} +4 4 {"a":{"b":{"c":678910,"d":33.222}}} +5 5 {} +6 6 \N +7 7 {"xxx":12345} +8 8 {"yyy":111.111} +9 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} +10 1 {"a":{"b":{"c":123456,"d":11.111}},"dcm":"123.456000000","dt":"2021-01-01 00:00:00","ip":"127.0.0.1","ss":"199991111"} +11 4 {"a":{"b":{"c":678910,"d":33.222}}} +12 3 {"dcm":"789.123000000","dt":"2025-01-01 11:11:11","ip":"127.0.0.1"} + +-- !sql -- +id bigint No true \N +type varchar(30) Yes false \N NONE +v1 variant Yes false \N NONE +v1.a.b.c int Yes false \N NONE +v1.a.b.d double Yes false \N NONE +v1.dcm decimal(38,0) Yes false \N NONE +v1.dt datetime Yes false \N NONE +v1.ip ipv4 Yes false \N NONE +v1.ss text Yes false \N NONE +v1.xxx smallint Yes false \N NONE +v1.yyy double Yes false \N NONE + +-- !sql -- +127.0.0.1 +127.0.0.1 +127.0.0.1 +127.0.0.1 +127.0.0.1 +127.0.0.1 + +-- !sql -- +123.456000000 +123.456000000 + +-- !sql -- +123.456000000 +456.123000000 +789.123000000 +\N +\N +\N +\N +\N +456.123000000 +123.456000000 +\N +789.123000000 + +-- !sql -- +2022-01-01 11:11:11 +2022-01-01 11:11:11 + +-- !sql -- +2022-01-01 11:11:11 +2022-01-01 11:11:11 + +-- !sql -- +2 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} +9 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} + +-- !sql -- +1 1 {"a":{"b":{"c":123456,"d":11.111}},"dcm":"123.456000000","dt":"2021-01-01 00:00:00","ip":"127.0.0.1","ss":"199991111"} +2 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} +3 3 {"dcm":"789.123000000","dt":"2025-01-01 11:11:11","ip":"127.0.0.1"} +9 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} +10 1 {"a":{"b":{"c":123456,"d":11.111}},"dcm":"123.456000000","dt":"2021-01-01 00:00:00","ip":"127.0.0.1","ss":"199991111"} +12 3 {"dcm":"789.123000000","dt":"2025-01-01 11:11:11","ip":"127.0.0.1"} + +-- !sql -- +id bigint No true \N +v1 variant Yes false \N NONE +v1.PREDEFINE_COL1 smallint Yes false \N NONE +v1.PREDEFINE_COL2 double Yes false \N NONE +v1.PREDEFINE_COL3 text Yes false \N NONE +v1.PREDEFINE_COL4 text Yes false \N NONE +v1.predefine_col1 smallint Yes false \N NONE +v1.predefine_col2 double Yes false \N NONE +v1.predefine_col3 text Yes false \N NONE +v1.predefine_col4 text Yes false \N NONE + +-- !sql -- +1 {"predefine_col1":1024} +2 {"predefine_col2":1.11111} +3 {"predefine_col3":"11111.00000"} +4 {"predefine_col4":"2020-01-01-01"} +5 {"PREDEFINE_COL1":1024} +6 {"PREDEFINE_COL2":1.11111} +7 {"PREDEFINE_COL3":"11111.00000"} +8 {"PREDEFINE_COL4":"2020-01-01-01"} + +-- !sql -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !sql -- +101 {"a":1} {"dcm":"1111111"} \N +102 {"a":1} {"dcm":"1111111"} {"dcm":1111111} +103 {"a":1} {"dcm":"1111111"} {"dt":"2021-01-01 11:11:11"} + +-- !sql -- +id bigint No true \N +v1 variant Yes false \N NONE +v2 variant Yes false \N NONE +v3 variant Yes false \N NONE +v1.PREDEFINE_COL1 smallint Yes false \N NONE +v1.PREDEFINE_COL2 double Yes false \N NONE +v1.PREDEFINE_COL3 text Yes false \N NONE +v1.PREDEFINE_COL4 text Yes false \N NONE +v1.a tinyint Yes false \N NONE +v1.predefine_col1 smallint Yes false \N NONE +v1.predefine_col2 double Yes false \N NONE +v1.predefine_col3 text Yes false \N NONE +v1.predefine_col4 text Yes false \N NONE +v2.dcm decimal(9,0) Yes false \N NONE +v3.dcm decimal(9,0) Yes false \N NONE +v3.dt datetime Yes false \N NONE + +-- !sql -- +1 {"nested":[{"a":123,"b":"456"}]} +1 {"nested":[{"a":123,"b":"456"}]} +1 {"nested":[{"a":123,"b":"456"}]} +1 {"nested":[{"a":123,"b":"456"}]} +1 {"nested":[{"a":123,"b":"456"}]} + +-- !sql -- +[{"a":123,"b":"456"}] +[{"a":123,"b":"456"}] +[{"a":123,"b":"456"}] +[{"a":123,"b":"456"}] +[{"a":123,"b":"456"}] + +-- !sql -- +[123] +[123] +[123] +[123] +[123] + +-- !sql -- +id bigint No true \N +v variant Yes false \N NONE +v.auto_type int Yes false \N NONE + diff --git a/regression-test/data/variant_p0/predefine/sql/q01.out b/regression-test/data/variant_p0/predefine/sql/q01.out new file mode 100644 index 00000000000000..54f9bace38b13d --- /dev/null +++ b/regression-test/data/variant_p0/predefine/sql/q01.out @@ -0,0 +1,34 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q01 -- +0 + +-- !q01_2 -- +id bigint No true \N +v1 variant,array_string:array,array_decimal:array,array_datetime:array,array_datetimev2:array,array_date:array,array_datev2:array,array_ipv4:array,array_ipv6:array,array_float:array,array_boolean:array,int_:int,string_:text,decimal_:decimalv3(27,9),datetime_:datetime,datetimev2_:datetimev2(6),date_:date,datev2_:datev2,ipv4_:ipv4,ipv6_:ipv6,float_:float,boolean_:boolean,varchar_:varchar(65533)> Yes false \N NONE +v1.array_boolean array Yes false \N NONE +v1.array_date array Yes false \N NONE +v1.array_datetime array Yes false \N NONE +v1.array_datetimev2 array Yes false \N NONE +v1.array_datev2 array Yes false \N NONE +v1.array_decimal array Yes false \N NONE +v1.array_float array Yes false \N NONE +v1.array_int array Yes false \N NONE +v1.array_ipv4 array Yes false \N NONE +v1.array_ipv6 array Yes false \N NONE +v1.array_string array Yes false \N NONE +v1.boolean_ boolean Yes false \N NONE +v1.date_ date Yes false \N NONE +v1.datetime_ datetime Yes false \N NONE +v1.datetimev2_ datetime Yes false \N NONE +v1.datev2_ date Yes false \N NONE +v1.decimal_ decimal(38,0) Yes false \N NONE +v1.ext_1 double Yes false \N NONE +v1.ext_2 text Yes false \N NONE +v1.ext_3 array Yes false \N NONE +v1.float_ float Yes false \N NONE +v1.int_ int Yes false \N NONE +v1.ipv4_ ipv4 Yes false \N NONE +v1.ipv6_ ipv6 Yes false \N NONE +v1.string_ text Yes false \N NONE +v1.varchar_ varchar(65533) Yes false \N NONE + diff --git a/regression-test/data/variant_p0/predefine/sql/q02.out b/regression-test/data/variant_p0/predefine/sql/q02.out new file mode 100644 index 00000000000000..df34d03b16352f --- /dev/null +++ b/regression-test/data/variant_p0/predefine/sql/q02.out @@ -0,0 +1,103 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q02 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +10 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +11 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +12 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +13 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +14 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +15 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_2 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_3 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_4 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_5 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_6 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_7 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_8 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_9 -- +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_10 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_11 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} + +-- !q02_12 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_13 -- +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_14 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_15 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_16 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_17 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_18 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_19 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_20 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} + +-- !q02_21 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_22 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} + diff --git a/regression-test/suites/variant_github_events_p2/load.groovy b/regression-test/suites/variant_github_events_p2/load.groovy index 8e6c05ad3e91d1..47c56f72a11c79 100644 --- a/regression-test/suites/variant_github_events_p2/load.groovy +++ b/regression-test/suites/variant_github_events_p2/load.groovy @@ -236,4 +236,11 @@ suite("regression_test_variant_github_events_p2", "nonConcurrent,p2"){ // query with inverted index qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int) from github_events where v["repo"]["name"] match 'xpressengine' order by 1;""" qt_sql """select count() from github_events where v["repo"]["name"] match 'apache' order by 1;""" + + // specify schema + // sql "alter table github_events2 modify column v variant<`payload.comment.id`:int,`payload.commits.url`:text,`payload.forkee.has_pages`:tinyint>" + // load_json_data.call("github_events2", """${getS3Url() + '/regression/gharchive.m/2022-11-07-23.json'}""") + // qt_sql "select * from github_events2 WHERE 1=1 ORDER BY k DESC LIMIT 10" + // qt_sql "select v['payload']['commits'] from github_events2 WHERE 1=1 ORDER BY k DESC LIMIT 10" + // qt_sql "select v['payload']['commits']['url'] from github_events2 WHERE 1=1 ORDER BY k DESC LIMIT 10" } \ No newline at end of file diff --git a/regression-test/suites/variant_p0/load.groovy b/regression-test/suites/variant_p0/load.groovy index cd5e9ee523d922..76dea91d8756c4 100644 --- a/regression-test/suites/variant_p0/load.groovy +++ b/regression-test/suites/variant_p0/load.groovy @@ -441,10 +441,10 @@ suite("regression_test_variant", "p0"){ sql """insert into var_as_key values(2, '{"b" : 11}')""" qt_sql "select * from var_as_key order by k" - test { - sql """select * from ghdata where cast(v['actor']['url'] as ipv4) = '127.0.0.1'""" - exception("Invalid type for variant column: 36") - } + // test { + // sql """select * from ghdata where cast(v['actor']['url'] as ipv4) = '127.0.0.1'""" + // exception("Invalid type for variant column: 36") + // } if (!isCloudMode()) { test { diff --git a/regression-test/suites/variant_p0/nested.groovy b/regression-test/suites/variant_p0/nested.groovy index 7df361c5731644..9faed4c1fb6377 100644 --- a/regression-test/suites/variant_p0/nested.groovy +++ b/regression-test/suites/variant_p0/nested.groovy @@ -21,7 +21,6 @@ suite("regression_test_variant_nested", "p0"){ getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); try { - def table_name = "var_nested" sql "DROP TABLE IF EXISTS ${table_name}" diff --git a/regression-test/suites/variant_p0/predefine/load.groovy b/regression-test/suites/variant_p0/predefine/load.groovy new file mode 100644 index 00000000000000..db698477cc261c --- /dev/null +++ b/regression-test/suites/variant_p0/predefine/load.groovy @@ -0,0 +1,291 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_predefine_schema", "p0"){ + sql """DROP TABLE IF EXISTS test_predefine""" + sql """ + CREATE TABLE `test_predefine` ( + `id` bigint NOT NULL, + `type` varchar(30) NULL, + `v1` variant NULL, + INDEX idx_var_sub(`v1`) USING INVERTED PROPERTIES("parser" = "english", "sub_column_path" = "a.b.c") ) + ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( "replication_allocation" = "tag.location.default: 1"); + """ + + sql """insert into test_predefine values(1, '1', '{"a" : {"b" : {"c" : "123456", "d" : "11.111"}}, "ss" : 199991111, "dcm" : 123.456, "dt" : "2021-01-01 00:00:00", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(2, '2', '{"a" : {"b" : {"c" : 678910, "d" : 22.222}}, "ss" : "29999111", "dcm" : "456.123", "dt" : "2022-01-01 11:11:11", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(3, '3', '{"dcm" : 789.123, "dt" : "2025-01-01 11:11:11.1", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(4, '4', '{"a" : {"b" : {"c" : "678910", "d" : "33.222"}}}')""" + sql """insert into test_predefine values(5, '5', 'null')""" + sql """insert into test_predefine values(6, '6', null)""" + sql """insert into test_predefine values(7, '7', '{"xxx" : 12345}')""" + sql """insert into test_predefine values(8, '8', '{"yyy" : 111.111}')""" + sql """insert into test_predefine values(9, '2', '{"a" : {"b" : {"c" : 678910, "d" : 22.222}}, "ss" : "29999111", "dcm" : "456.123", "dt" : "2022-01-01 11:11:11", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(10, '1', '{"a" : {"b" : {"c" : "123456", "d" : "11.111"}}, "ss" : 199991111, "dcm" : 123.456, "dt" : "2021-01-01 00:00:00", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(12, '3', '{"dcm" : 789.123, "dt" : "2025-01-01 11:11:11.1", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(11, '4', '{"a" : {"b" : {"c" : "678910", "d" : "33.222"}}}')""" + qt_sql """select * from test_predefine order by id""" + sql """set describe_extend_variant_column = true""" + qt_sql "desc test_predefine" + + qt_sql """select cast(v1['ip'] as ipv4) from test_predefine where cast(v1['ip'] as ipv4) = '127.0.0.1';""" + qt_sql """select cast(v1['dcm'] as decimal) from test_predefine where cast(v1['dcm'] as decimal) = '123.456';""" + qt_sql """select v1['dcm'] from test_predefine order by id;""" + qt_sql """select v1['dt'] from test_predefine where cast(v1['dt'] as datetime) = '2022-01-01 11:11:11';""" + qt_sql """select v1['dt'] from test_predefine where cast(v1['dt'] as datetime) = '2022-01-01 11:11:11' order by id limit 10""" + qt_sql """select * from test_predefine where cast(v1['dt'] as datetime) = '2022-01-01 11:11:11' order by id limit 10;""" + qt_sql """select * from test_predefine where v1['dt'] is not null order by id limit 10;""" + + sql """DROP TABLE IF EXISTS test_predefine1""" + sql """ + CREATE TABLE `test_predefine1` ( + `id` bigint NOT NULL, + `v1` variant NULL, + INDEX idx_var_sub(`v1`) USING INVERTED PROPERTIES("parser" = "english", "sub_column_path" = "a.b.c") ) + ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "variant_enable_flatten_nested" = "true"); + """ + sql """insert into test_predefine1 values(1, '{"predefine_col1" : 1024}')""" + sql """insert into test_predefine1 values(2, '{"predefine_col2" : 1.11111}')""" + sql """insert into test_predefine1 values(3, '{"predefine_col3" : "11111.00000"}')""" + sql """insert into test_predefine1 values(4, '{"predefine_col4" : "2020-01-01-01"}')""" + + sql """insert into test_predefine1 values(5, '{"PREDEFINE_COL1" : 1024}')""" + sql """insert into test_predefine1 values(6, '{"PREDEFINE_COL2" : 1.11111}')""" + sql """insert into test_predefine1 values(7, '{"PREDEFINE_COL3" : "11111.00000"}')""" + sql """insert into test_predefine1 values(8, '{"PREDEFINE_COL4" : "2020-01-01-01"}')""" + sql """select * from test_predefine1 order by id limit 1""" + qt_sql """desc test_predefine1""" + qt_sql """select * from test_predefine1 order by id""" + + + // complex types with scalar types + sql "DROP TABLE IF EXISTS test_predefine2" + sql """ + CREATE TABLE `test_predefine2` ( + `id` bigint NOT NULL, + `v1` variant< + array_int:array, + array_string:array, + array_decimal:array, + array_datetime:array, + array_datetimev2:array, + array_date:array, + array_datev2:array, + array_ipv4:array, + array_ipv6:array, + array_float:array, + array_boolean:array, + int_:int, + string_:string, + decimal_:decimalv3(27,9), + datetime_:datetime, + datetimev2_:datetimev2(6), + date_:date, + datev2_:datev2, + ipv4_:ipv4, + ipv6_:ipv6, + float_:float, + boolean_:boolean, + varchar_:varchar + > NULL + ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ( "replication_allocation" = "tag.location.default: 1"); + """ + def json1 = """ + { + "array_int" : [1, 2, 3], + "array_string" : ["a", "b", "c"], + "array_decimal" : [1.1, 2.2, 3.3], + "array_datetime" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "array_datetimev2" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "array_date" : ["2021-01-01", "2022-01-01", "2023-01-01"], + "array_datev2" : ["2021-01-01", "2022-01-01", "2023-01-01"], + "array_ipv4" : ["127.0.0.1", "172.0.1.1"], + "array_ipv6" : ["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"], + "array_float" : ["1.11111"], + "array_boolean" : [true, false, true], + "int_" : 11111122, + "string_" : 12111222113.0, + "decimal_" : 188118222.011121933, + "datetime_" : "2022-01-01 11:11:11", + "datetimev2_" : "2022-01-01 11:11:11.999999", + "date_" : "2022-01-01", + "datev2_" : "2022-01-01", + "ipv4_" : "127.0.0.1", + "ipv6_" : "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe", + "float_" : "128.111", + "boolean_" : true, + "varchar_" : "hello world" + } + """ + def json2 = """ + { + "array_int" : ["1", "2", 3], + "array_string" : ["a", "b", "c"], + "array_decimal" : [1.1, 2.2, 3.3], + "array_datetime" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "array_datetimev2" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "array_date" : ["2021-01-01", "2022-01-01", "2023-01-01"], + "array_datev2" : ["2021-01-01", "2022-01-01", "2023-01-01"], + "array_ipv4" : ["127.0.0.1", "172.0.1.1"], + "array_ipv6" : ["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"], + "array_float" : [2.22222], + "array_boolean" : [1, 0, 1, 0, 1], + "int_" : "3333333333", + "string_" : 12111222113.0, + "decimal_" : "219911111111.011121933", + "datetime_" : "2022-01-01 11:11:11", + "datetimev2_" : "2022-01-01 11:11:11.999999", + "date_" : "2022-01-01", + "datev2_" : "2022-01-01", + "ipv4_" : "127.0.0.1", + "ipv6_" : "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe", + "float_" : 1.111111111, + "boolean_" : true, + "varchar_" : "world hello" + } + """ + def json3 = """ + { + "array_int" : ["1", "2", 3], + "array_string" : ["a", "b", "c"], + "array_datetimev2" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "int_" : "3333333333", + "decimal_" : "219911111111.011121933", + "date_" : "2022-01-01", + "ipv4_" : "127.0.0.1", + "float_" : 1.111111111, + "boolean_" : true, + "varchar_" : "world hello" + } + """ + def json4 = """ + { + "array_int" : ["1", "2", 3], + "array_string" : ["a", "b", "c"], + "array_datetimev2" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "ipv4_" : "127.0.0.1", + "float_" : 1.111111111, + "varchar_" : "world hello", + "ext_1" : 1.111111, + "ext_2" : "this is an extra field", + "ext_3" : [1, 2, 3] + } + """ + sql "insert into test_predefine2 values(1, '${json1}')" + sql "insert into test_predefine2 values(2, '${json2}')" + sql "insert into test_predefine2 values(3, '${json3}')" + sql "insert into test_predefine2 values(4, '${json4}')" + + qt_sql """select * from test_predefine2 order by id""" + + for (int i = 10; i < 100; i++) { + sql "insert into test_predefine2 values(${i}, '${json4}')" + } + + // // schema change + // // 1. add column + sql "alter table test_predefine1 add column v2 variant default null" + sql """insert into test_predefine1 values(101, '{"a" :1}', '{"dcm": 1111111}')""" + sql "alter table test_predefine1 add column v3 variant default null" + sql """insert into test_predefine1 values(102, '{"a" :1}', '{"dcm": 1111111}', '{"dcm": 1111111}');""" + // 2. alter column type + sql "alter table test_predefine1 modify column v3 variant" + sql """insert into test_predefine1 values(103, '{"a" :1}', '{"dcm": 1111111}', '{"dt": "2021-01-01 11:11:11"}');""" + qt_sql """select * from test_predefine1 where id >= 100 order by id""" + // 3. drop column + qt_sql "desc test_predefine1" + sql "alter table test_predefine1 drop column v3" + + sql "DROP TABLE IF EXISTS test_predefine3" + sql """CREATE TABLE `test_predefine3` ( + `id` bigint NOT NULL, + `v` variant NULL) + ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "variant_enable_flatten_nested" = "true");""" + + // test alter nested no effect at present + sql "truncate table test_predefine3" + sql """insert into test_predefine3 values (1, '{"nested" : [{"a" : 123, "b" : "456"}]}')""" + sql "alter table test_predefine3 modify column v variant<`nested.a`: string>" + sql """insert into test_predefine3 values (1, '{"nested" : [{"a" : 123, "b" : "456"}]}')""" + sql """insert into test_predefine3 values (1, '{"nested" : [{"a" : 123, "b" : "456"}]}')""" + sql """insert into test_predefine3 values (1, '{"nested" : [{"a" : 123, "b" : "456"}]}')""" + sql """insert into test_predefine3 values (1, '{"nested" : [{"a" : 123, "b" : "456"}]}')""" + qt_sql "select * from test_predefine3" + qt_sql "select v['nested'] from test_predefine3" + qt_sql "select v['nested']['a'] from test_predefine3" + + // test use auto type detect first then alter to modify type + sql "truncate table test_predefine3" + sql """insert into test_predefine3 values (1, '{"auto_type" : 1234.1111}')""" + sql "alter table test_predefine3 modify column v variant<`auto_type`: int>" + sql """insert into test_predefine3 values (1, '{"auto_type" : "124511111"}')""" + sql """insert into test_predefine3 values (1, '{"auto_type" : 1111122334}')""" + sql """insert into test_predefine3 values (1, '{"auto_type" : 111223341111}')""" + sql """insert into test_predefine3 values (1, '{"auto_type" : true}')""" + sql """insert into test_predefine3 values (1, '{"auto_type" : 1}')""" + sql """insert into test_predefine3 values (1, '{"auto_type" : 256}')""" + sql """insert into test_predefine3 values (1, '{"auto_type" : 12345}')""" + sql """insert into test_predefine3 values (1, '{"auto_type" : 1.0}')""" + qt_sql """desc test_predefine3""" + + // test array + sql """ + CREATE TABLE `region_insert` ( + `k` bigint NULL, + `var` variant> NULL, + `OfvZr` variant NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 5 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "is_being_synced" = "false", + "storage_medium" = "hdd", + "storage_format" = "V2", + "inverted_index_storage_format" = "V2", + "light_schema_change" = "true", + "store_row_column" = "true", + "row_store_page_size" = "16384", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false", + "group_commit_interval_ms" = "10000", + "group_commit_data_bytes" = "134217728" + ); + """ + sql """ + insert into region_insert (k,var,OfvZr) values(1550,'{"key_48":"2024-12-17 20:27:12","key_11":"2024-12-17 20:27:12","key_53":"2024-12-17 20:27:12","key_30":"2024-12-17 20:27:12","key_3":"2024-12-17 20:27:12","key_93":"1HYdNTPvNA","key_40":true,"key_61":"N5LU74i0Nb","key_55":"2024-12-17 20:27:12","key_45":"mMj4f8k8gH","key_58":"2024-12-17 20:27:12","key_71":true,"key_51":"2024-12-17 20:27:12","key_79":"2024-12-17 20:27:12","key_7":"8QJFB23Rug","key_75":31,"key_50":"2024-12-17 20:27:12","key_24":86,"key_33":98,"key_69":16,"key_57":86,"key_86":"2024-12-17 20:27:12","key_99":24,"key_66":"oTZgDxKvcc","key_18":false,"key_49":"2024-12-17 20:27:12","key_2":false,"key_64":"h3DxAvBG8D","key_87":87,"key_37":42,"key_29":"wb29lruo8E","key_96":88,"key_9":83,"key_52":6,"key_97":"X7y409riGJ","key_72":false,"key_26":"2024-12-17 20:27:12","key_12":66,"key_88":false,"key_32":false,"key_6":true,"key_80":false,"key_89":"2024-12-17 20:27:12","key_1":false,"key_35":"2024-12-17 20:27:12","key_23":70,"key_95":23,"key_76":false,"key_92":true,"key_47":"zYM9IJXSxk","key_22":"2024-12-17 20:27:12","key_38":"P9arsVnb3q","key_56":"LU4SdelM46","key_28":24,"key_4":"GKXCKn1Kf9","key_83":29,"key_20":90,"key_43":"VA8xyYskJ1","key_81":22,"key_16":"2024-12-17 20:27:12","key_82":true,"key_84":"2024-12-17 20:27:12"}','{"key_87":"900oLqWX9Q","key_32":63,"key_79":true,"key_42":3,"key_98":20,"key_35":false,"key_19":"2024-12-17 20:27:12","key_89":"NO0TLqKAvS","key_77":"2024-12-17 20:27:12","key_34":false,"key_43":false,"key_30":true,"key_21":"2024-12-17 20:27:12","key_3":"oDDa0SZ7Bs","key_72":"2024-12-17 20:27:12","key_67":38,"key_82":"2024-12-17 20:27:12","key_37":"VWLDmiZbMr","key_16":true,"key_58":"42Mju9EbAS","key_94":false,"key_50":"cqv3qYmYuJ","key_28":28,"key_78":43,"key_2":"omTAZB0CxT","key_75":"4tAlWmcvnY","key_40":50,"key_33":"2024-12-17 20:27:12","key_70":"2024-12-17 20:27:12","key_25":"2024-12-17 20:27:12","key_54":false,"key_11":"2024-12-17 20:27:12","key_5":"ritjh4q9pJ","key_51":"DzQGqKQ95I","key_73":false,"key_10":"bPI94fvfL4","key_26":"AF5DtNU5Dj","key_80":66,"key_9":69,"key_83":false,"key_59":48,"key_24":"2024-12-17 20:27:12","key_84":36,"key_17":true,"key_44":18,"key_97":"JBw2ZZhDtF","key_74":15,"key_96":true,"key_62":"2024-12-17 20:27:12","key_65":"6iWPCv8FDR","key_53":"2024-12-17 20:27:12","key_95":false,"key_56":"3zyjHDYMJG","key_60":false,"key_23":"2024-12-17 20:27:12","key_8":"zbNpgWWYWS","key_81":"2024-12-17 20:27:12"}') + """ + // test bf with bool + sql """ + CREATE TABLE `test_bf_with_bool` ( + `k` bigint NULL, + `var` variant + ) ENGINE=OLAP + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 5 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "bloom_filter_columns" = "var" + ); + """ +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/predefine/sql/q01.sql b/regression-test/suites/variant_p0/predefine/sql/q01.sql new file mode 100644 index 00000000000000..ccc510f2a08689 --- /dev/null +++ b/regression-test/suites/variant_p0/predefine/sql/q01.sql @@ -0,0 +1,2 @@ +set describe_extend_variant_column = true; +desc test_predefine2; \ No newline at end of file diff --git a/regression-test/suites/variant_p0/predefine/sql/q02.sql b/regression-test/suites/variant_p0/predefine/sql/q02.sql new file mode 100644 index 00000000000000..290af22ded6b56 --- /dev/null +++ b/regression-test/suites/variant_p0/predefine/sql/q02.sql @@ -0,0 +1,24 @@ +select * from test_predefine2 order by id limit 10; +select * from test_predefine2 where array_contains(cast(v1['array_int'] as array), 1) order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_string'] as array), 'b') order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_decimal'] as array), 1.1) order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_datetime'] as array), '2021-01-01 00:00:00') order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_datetimev2'] as array), '2021-01-01 00:00:00') order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_date'] as array), '2021-01-01') order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_datev2'] as array), '2021-01-01') order by id limit 4; +-- select * from test_predefine2 where array_contains(cast(v1['array_ipv4'] as array), '127.0.0.1') order by id limit 4; +-- select * from test_predefine2 where array_contains(cast(v1['array_ipv6'] as array), 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe') order by id limit 4; +select * from test_predefine2 where cast(v1['array_float'] as array)[1] >= 1.11111 order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_boolean'] as array), 1) order by id limit 4; +select * from test_predefine2 where cast(v1['int_'] as int) = 11111122 order by id limit 4; +select * from test_predefine2 where cast(v1['string_'] as string) = '12111222113' order by id limit 4; +select * from test_predefine2 where cast(v1['decimal_'] as decimal) >= 188118222.011121933 order by id limit 4; +select * from test_predefine2 where cast(v1['datetime_'] as datetime) = '2022-01-01 11:11:11' order by id limit 4; +select * from test_predefine2 where cast(v1['datetimev2_'] as datetimev2(6)) = '2022-01-01 11:11:11.999999' order by id limit 4; +select * from test_predefine2 where cast(v1['date_'] as date) = '2022-01-01' order by id limit 4; +select * from test_predefine2 where cast(v1['datev2_'] as datev2) = '2022-01-01' order by id limit 4; +select * from test_predefine2 where cast(v1['ipv4_'] as ipv4) = '127.0.0.1' order by id limit 4; +select * from test_predefine2 where cast(v1['ipv6_'] as ipv6) = 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe' order by id limit 4; +select * from test_predefine2 where cast(v1['float_'] as float) >= 128.11 order by id limit 4; +select * from test_predefine2 where cast(v1['boolean_'] as boolean) = 1 order by id limit 4; +select * from test_predefine2 where cast(v1['varchar_'] as varchar) = 'hello world' order by id limit 4; \ No newline at end of file