Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion be/src/vec/columns/column_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena& arena, char con

const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) {
size_t array_size = unaligned_load<size_t>(pos);
pos += 2 * sizeof(array_size);
pos += sizeof(array_size);

for (size_t i = 0; i < array_size; ++i) {
pos = get_keys().deserialize_and_insert_from_arena(pos);
Expand Down
157 changes: 157 additions & 0 deletions be/test/vec/jsonb/serialize_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,10 @@
#include "vec/columns/column_array.h"
#include "vec/columns/column_complex.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_map.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_struct.h"
#include "vec/columns/column_vector.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
Expand All @@ -56,9 +58,11 @@
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_hll.h"
#include "vec/data_types/data_type_map.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/data_type_struct.h"
#include "vec/data_types/data_type_time_v2.h"
#include "vec/data_types/serde/data_type_serde.h"
#include "vec/runtime/vdatetime_value.h"
Expand Down Expand Up @@ -177,6 +181,159 @@ TEST(BlockSerializeTest, Array) {
EXPECT_EQ(block.dump_data(), new_block.dump_data());
}

TEST(BlockSerializeTest, Map) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: function 'TEST' exceeds recommended size/complexity thresholds [readability-function-size]

TEST(BlockSerializeTest, Map) {
^
Additional context

be/test/vec/jsonb/serialize_test.cpp:183: 82 lines including whitespace and comments (threshold 80)

TEST(BlockSerializeTest, Map) {
^

TabletSchema schema;
TabletColumn map;
map.set_name("m");
map.set_unique_id(1);
map.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
schema.append_column(map);
// map string string
DataTypePtr s = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
DataTypePtr d = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
DataTypePtr m = std::make_shared<DataTypeMap>(s, d);
Array k1, k2, v1, v2;
k1.push_back("null");
k1.push_back("doris");
k1.push_back("clever amory");
v1.push_back("ss");
v1.push_back(Null());
v1.push_back("NULL");
k2.push_back("hello amory");
k2.push_back("NULL");
k2.push_back("cute amory");
k2.push_back("doris");
v2.push_back("s");
v2.push_back("0");
v2.push_back("sf");
v2.push_back(Null());
Map m1, m2;
m1.push_back(k1);
m1.push_back(v1);
m2.push_back(k2);
m2.push_back(v2);
MutableColumnPtr map_column = m->create_column();
map_column->reserve(2);
map_column->insert(m1);
map_column->insert(m2);
vectorized::ColumnWithTypeAndName type_and_name(map_column->get_ptr(), m, "test_map");
vectorized::Block block;
block.insert(type_and_name);

MutableColumnPtr col = ColumnString::create();
// serialize
std::cout << "serialize to jsonb" << std::endl;
JsonbSerializeUtil::block_to_jsonb(schema, block, static_cast<ColumnString&>(*col.get()),
block.columns(),
create_data_type_serdes(block.get_data_types()));
// deserialize
TupleDescriptor read_desc(PTupleDescriptor(), true);
// slot
TSlotDescriptor tslot;
tslot.__set_colName("m");
tslot.nullIndicatorBit = -1;
tslot.nullIndicatorByte = 0;
TypeDescriptor type_desc(TYPE_MAP);
type_desc.children.push_back(TypeDescriptor(TYPE_STRING));
type_desc.children.push_back(TypeDescriptor(TYPE_INT));
type_desc.contains_nulls.push_back(true);
type_desc.contains_nulls.push_back(true);
tslot.__set_col_unique_id(1);
tslot.__set_slotType(type_desc.to_thrift());
SlotDescriptor* slot = new SlotDescriptor(tslot);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: use auto when initializing with new to avoid duplicating the type name [modernize-use-auto]

Suggested change
SlotDescriptor* slot = new SlotDescriptor(tslot);
auto* slot = new SlotDescriptor(tslot);

read_desc.add_slot(slot);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: use auto when initializing with new to avoid duplicating the type name [modernize-use-auto]

Suggested change
auto* slot = new SlotDescriptor(tslot);

Block new_block = block.clone_empty();
std::unordered_map<uint32_t, uint32_t> col_uid_to_idx;
std::vector<std::string> default_values;
default_values.resize(read_desc.slots().size());
for (int i = 0; i < read_desc.slots().size(); ++i) {
col_uid_to_idx[read_desc.slots()[i]->col_unique_id()] = i;
default_values[i] = read_desc.slots()[i]->col_default_value();
std::cout << "uid " << read_desc.slots()[i]->col_unique_id() << ":" << i << std::endl;
}
std::cout << block.dump_data() << std::endl;
std::cout << new_block.dump_data() << std::endl;
std::cout << "deserialize from jsonb" << std::endl;
JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()),
static_cast<ColumnString&>(*col.get()), col_uid_to_idx,
new_block, default_values);
std::cout << block.dump_data() << std::endl;
std::cout << new_block.dump_data() << std::endl;
EXPECT_EQ(block.dump_data(), new_block.dump_data());
}

TEST(BlockSerializeTest, Struct) {
TabletSchema schema;
TabletColumn struct_col;
struct_col.set_name("struct");
struct_col.set_unique_id(1);
struct_col.set_type(FieldType::OLAP_FIELD_TYPE_STRUCT);
schema.append_column(struct_col);
vectorized::Block block;
{
DataTypePtr s = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
DataTypePtr d = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt128>());
DataTypePtr m = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
DataTypePtr st = std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m});
Tuple t1, t2;
t1.push_back(String("amory cute"));
t1.push_back(__int128_t(37));
t1.push_back(true);
t2.push_back("null");
t2.push_back(__int128_t(26));
t2.push_back(false);
MutableColumnPtr struct_column = st->create_column();
struct_column->reserve(2);
struct_column->insert(t1);
struct_column->insert(t2);
vectorized::ColumnWithTypeAndName type_and_name(struct_column->get_ptr(), st,
"test_struct");
block.insert(type_and_name);
}

MutableColumnPtr col = ColumnString::create();
// serialize
std::cout << "serialize to jsonb" << std::endl;
JsonbSerializeUtil::block_to_jsonb(schema, block, static_cast<ColumnString&>(*col.get()),
block.columns(),
create_data_type_serdes(block.get_data_types()));
// deserialize
TupleDescriptor read_desc(PTupleDescriptor(), true);
// slot
TSlotDescriptor tslot;
tslot.__set_colName("struct");
tslot.nullIndicatorBit = -1;
tslot.nullIndicatorByte = 0;
TypeDescriptor type_desc(TYPE_STRUCT);
type_desc.add_sub_type(TYPE_STRING, "name", true);
type_desc.add_sub_type(TYPE_LARGEINT, "age", true);
type_desc.add_sub_type(TYPE_BOOLEAN, "is", true);
tslot.__set_col_unique_id(1);
tslot.__set_slotType(type_desc.to_thrift());
SlotDescriptor* slot = new SlotDescriptor(tslot);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: use auto when initializing with new to avoid duplicating the type name [modernize-use-auto]

Suggested change
SlotDescriptor* slot = new SlotDescriptor(tslot);
auto* slot = new SlotDescriptor(tslot);

read_desc.add_slot(slot);

Block new_block = block.clone_empty();
std::unordered_map<uint32_t, uint32_t> col_uid_to_idx;
std::vector<std::string> default_values;
default_values.resize(read_desc.slots().size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: use auto when initializing with new to avoid duplicating the type name [modernize-use-auto]

Suggested change
default_values.resize(read_desc.slots().size());
auto* slot = new SlotDescriptor(tslot);

for (int i = 0; i < read_desc.slots().size(); ++i) {
col_uid_to_idx[read_desc.slots()[i]->col_unique_id()] = i;
default_values[i] = read_desc.slots()[i]->col_default_value();
std::cout << "uid " << read_desc.slots()[i]->col_unique_id() << ":" << i << std::endl;
}
std::cout << block.dump_data() << std::endl;
std::cout << new_block.dump_data() << std::endl;
std::cout << "deserialize from jsonb" << std::endl;
JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()),
static_cast<ColumnString&>(*col.get()), col_uid_to_idx,
new_block, default_values);
std::cout << block.dump_data() << std::endl;
std::cout << new_block.dump_data() << std::endl;
EXPECT_EQ(block.dump_data(), new_block.dump_data());
}

TEST(BlockSerializeTest, JsonbBlock) {
vectorized::Block block;
TabletSchema schema;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 doris1 {"jsonk1":123,"jsonk2":456} [100, 200] {"k1":10} {"a": 1, "b": 2}
2 doris2 {"jsonk3":333,"jsonk4":444} [300, 400] {"k2":20} {"a": 3, "b": 4}

-- !sql --
1 doris1 {"jsonk1":123,"jsonk2":456} [100, 200] {"k1":10} {"a": 1, "b": 2}

-- !sql --
1 apache doris {"jsonk1":123,"jsonk2":456} [100, 200] {"k1":10} {"a": 1, "b": 2}
2 apache doris 2.0 {"jsonk3":333,"jsonk4":444} [300, 400] {"k2":20} {"a": 3, "b": 4}

-- !sql --
1 apache doris {"jsonk1":123,"jsonk2":456} [100, 200] {"k1":10} {"a": 1, "b": 2}

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1 apache doris
2 apache doris 2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import org.apache.commons.lang3.StringUtils

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_nested_type_with_rowstore") {
// this test case aim to test nested type with old planner
sql """set enable_nereids_planner=false"""
sql """ DROP TABLE IF EXISTS ct_table;"""
sql """CREATE TABLE ct_table ( `id` int(11) NOT NULL COMMENT "用户 ID", `c_varchar` varchar(65533) NULL COMMENT "用户姓名", `c_jsonb` JSONB NULL, `c_array` ARRAY<INT> NULL, `c_map` MAP<STRING, INT> NULL, `c_struct` STRUCT<a:INT, b:INT> NULL) UNIQUE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES("replication_num" = "1", "enable_unique_key_merge_on_write" = "true", "store_row_column" = "true");"""

sql """ insert into ct_table values(2, "doris2", '{"jsonk3": 333, "jsonk4": 444}', [300, 400], {"k2": 20}, {3, 4});"""
sql """ insert into ct_table values(1, "doris1", '{"jsonk1": 123, "jsonk2": 456}', [100, 200], {"k1": 10}, {1, 2});"""

qt_sql """ select * from ct_table order by id;"""
// point sql
qt_sql """ select * from ct_table where id = 1"""

// column refresh
streamLoad {
table "ct_table"
time 10000
set 'partial_columns', 'true'
set 'strict_mode', 'false'
set 'columns', 'id,c_varchar'
file 'varchar.tsv'

check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(2, json.NumberTotalRows)
assertEquals(0, json.NumberFilteredRows)
}
}

// select and check
qt_sql """ select * from ct_table order by id;"""
// point sql
qt_sql """ select * from ct_table where id = 1"""
}