From e8193c0de441ac406939d91c56dd5497563fd1ce Mon Sep 17 00:00:00 2001 From: amory Date: Wed, 26 Mar 2025 22:44:09 +0800 Subject: [PATCH] [fix](struct)Fixed the issue of inserting into a struct type string literal with one more subfield causing BE coredump (#49485) Fixed the issue of inserting into a struct type string literal with one more subfield causing BE coredump some situation like this blow will make BE core ``` create table t(a int, b int, s struct) PROPERTIES ("replication_allocation" = "tag.location.default: 1"); insert into t values(1,1,'{1,2}'); ``` core info: ``` [WARNING!] /sys/kernel/mm/transparent_hugepage/enabled: [always] madvise never, Doris not recommend turning on THP, which may cause the BE process to use more memory and cannot be freed in time. Turn off THP: `echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled` start BE in local mode ================================================================= ==2818976==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x603000f17a90 at pc 0x55eba7fdd69c bp 0x7f03243dbd30 sp 0x7f03243dbd28 READ of size 8 at 0x603000f17a90 thread T928 (brpc_light) #0 0x55eba7fdd69b in std::__shared_ptr::get() const /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:1291:16 #1 0x55eba7fdd649 in std::__shared_ptr_access::_M_get() const /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:990:66 #2 0x55eba7fda316 in std::__shared_ptr_access::operator->() const /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:984:9 #3 0x55ebcfc8b19e in doris::vectorized::DataTypeStructSerDe::deserialize_one_cell_from_json(doris::vectorized::IColumn&, doris::Slice&, doris::vectorized::DataTypeSerDe::FormatOptions const&) const /mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_struct_serde.cpp:200:25 #4 0x55ebdac49c8b in doris::vectorized::ConvertImplGenericFromString::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned int, unsigned long) /mnt/disk1/wangqiannan/amory/doris/be/src/vec/functions/function_cast.h:618:36 #5 0x55ebda2234d0 in doris::Status std::__invoke_impl> const&, unsigned int, unsigned long), doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long>(std::__invoke_other, doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned int, unsigned long), doris::FunctionContext*&&, doris::vectorized::Block&, std::vector> const&, unsigned long&&, unsigned long&&) /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14 #6 0x55ebda22323a in std::enable_if> const&, unsigned int, unsigned long), doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long>, doris::Status>::type std::__invoke_r> const&, unsigned int, unsigned long), doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long>(doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned int, unsigned long), doris::FunctionContext*&&, doris::vectorized::Block&, std::vector> const&, unsigned long&&, unsigned long&&) /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:114:9 #7 0x55ebda222e71 in std::_Function_handler> const&, unsigned long, unsigned long), doris::Status (*)(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned int, unsigned long)>::_M_invoke(std::_Any_data const&, doris::FunctionContext*&&, doris::vectorized::Block&, std::vector> const&, unsigned long&&, unsigned long&&) /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9 #8 0x55ebd9b3f1d2 in std::function> const&, unsigned long, unsigned long)>::operator()(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) const /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560:9 #9 0x55ebda1ad7c7 in doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*, std::shared_ptr const&, std::shared_ptr const&, bool) const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned int, unsigned long)::operator()(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned int, unsigned long) const /mnt/disk1/wangqiannan/amory/doris/be/src/vec/functions/function_cast.h:2241:17 #10 0x55ebda1acee2 in doris::Status std::__invoke_impl const&, std::shared_ptr const&, bool) const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned int, unsigned long)&, doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long>(std::__invoke_other, doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*, std::shared_ptr const&, std::shared_ptr const&, bo@@@ ``` --- .../serde/data_type_struct_serde.cpp | 6 +- .../data/insert_p0/test_struct_insert.out | 40 +++++++ .../insert_p0/test_struct_insert.groovy | 101 +++++++++++++++++- 3 files changed, 144 insertions(+), 3 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index c88b0088e5a8d4..aefea80f0c753b 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -158,7 +158,8 @@ Status DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic } Slice next(slice.data + start_pos, idx - start_pos); next.trim_prefix(); - if (field_pos > elem_size) { + // field_pos should always less than elem_size, if not, we should return error + if (field_pos >= elem_size) { // we should do column revert if error for (size_t j = 0; j < field_pos; j++) { struct_column.get_column(j).pop_back(1); @@ -188,7 +189,8 @@ Status DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic (key_added || !is_explicit_names)) { Slice next(slice.data + start_pos, idx - start_pos); next.trim_prefix(); - if (field_pos > elem_size) { + /// field_pos should always less than elem_size, if not, we should return error + if (field_pos >= elem_size) { // we should do column revert if error for (size_t j = 0; j < field_pos; j++) { struct_column.get_column(j).pop_back(1); diff --git a/regression-test/data/insert_p0/test_struct_insert.out b/regression-test/data/insert_p0/test_struct_insert.out index 3dc160ef7f0339..91230f415e2b58 100644 --- a/regression-test/data/insert_p0/test_struct_insert.out +++ b/regression-test/data/insert_p0/test_struct_insert.out @@ -5,3 +5,43 @@ 3 \N {"f1":null, "f2":null, "f3":null} {"f1":"2023-02-23", "f2":"2023-02-23 00:10:19", "f3":"2023-02-23", "f4":"2023-02-23 00:10:19"} {"f1":"", "f2":"", "f3":""} 4 \N \N {"f1":null, "f2":null, "f3":null, "f4":null} {"f1":"abc", "f2":"def", "f3":"hij"} +-- !select -- +1 {"a":1} {"a":1, "b":"a"} {"a":1, "s":{"a":1}} {"a":1, "s":{"a":1, "b":"a"}} {"a":1, "s":{"b":1, "s":{"c":1}}} {"a":1, "b":{"c":1, "d":"a"}, "e":1} + +-- !select -- +1 {"a":1} {"a":1, "b":"a"} {"a":1, "s":{"a":1}} {"a":1, "s":{"a":1, "b":"a"}} {"a":1, "s":{"b":1, "s":{"c":1}}} {"a":1, "b":{"c":1, "d":"a"}, "e":1} +2 {"a":10} {"a":20, "b":"valid"} {"a":30, "s":{"a":31}} {"a":40, "s":{"a":41, "b":"nested"}} {"a":50, "s":{"b":51, "s":{"c":52}}} {"a":60, "b":{"c":61, "d":"text"}, "e":70} + +-- !select -- +1 {"a":1} {"a":1, "b":"a"} {"a":1, "s":{"a":1}} {"a":1, "s":{"a":1, "b":"a"}} {"a":1, "s":{"b":1, "s":{"c":1}}} {"a":1, "b":{"c":1, "d":"a"}, "e":1} +2 {"a":10} {"a":20, "b":"valid"} {"a":30, "s":{"a":31}} {"a":40, "s":{"a":41, "b":"nested"}} {"a":50, "s":{"b":51, "s":{"c":52}}} {"a":60, "b":{"c":61, "d":"text"}, "e":70} +3 \N {"a":30, "b":"valid"} \N {"a":40, "s":{"a":41, "b":"valid"}} {"a":50, "s":{"b":51, "s":null}} {"a":60, "b":null, "e":70} + +-- !select -- +1 {"a":1} {"a":1, "b":"a"} {"a":1, "s":{"a":1}} {"a":1, "s":{"a":1, "b":"a"}} {"a":1, "s":{"b":1, "s":{"c":1}}} {"a":1, "b":{"c":1, "d":"a"}, "e":1} +2 {"a":10} {"a":20, "b":"valid"} {"a":30, "s":{"a":31}} {"a":40, "s":{"a":41, "b":"nested"}} {"a":50, "s":{"b":51, "s":{"c":52}}} {"a":60, "b":{"c":61, "d":"text"}, "e":70} +3 \N {"a":30, "b":"valid"} \N {"a":40, "s":{"a":41, "b":"valid"}} {"a":50, "s":{"b":51, "s":null}} {"a":60, "b":null, "e":70} +4 {"a":null} {"a":40, "b":"50"} {"a":50, "s":{"a":null}} {"a":60, "s":{"a":70, "b":"80"}} {"a":90, "s":{"b":null, "s":{"c":100}}} {"a":100, "b":{"c":110, "d":"120"}, "e":null} + +-- !select -- +1 {"a":1} {"a":1, "b":"a"} {"a":1, "s":{"a":1}} {"a":1, "s":{"a":1, "b":"a"}} {"a":1, "s":{"b":1, "s":{"c":1}}} {"a":1, "b":{"c":1, "d":"a"}, "e":1} +2 {"a":10} {"a":20, "b":"valid"} {"a":30, "s":{"a":31}} {"a":40, "s":{"a":41, "b":"nested"}} {"a":50, "s":{"b":51, "s":{"c":52}}} {"a":60, "b":{"c":61, "d":"text"}, "e":70} +3 \N {"a":30, "b":"valid"} \N {"a":40, "s":{"a":41, "b":"valid"}} {"a":50, "s":{"b":51, "s":null}} {"a":60, "b":null, "e":70} +4 {"a":null} {"a":40, "b":"50"} {"a":50, "s":{"a":null}} {"a":60, "s":{"a":70, "b":"80"}} {"a":90, "s":{"b":null, "s":{"c":100}}} {"a":100, "b":{"c":110, "d":"120"}, "e":null} +5 {"a":10} {"a":20, "b":"valid"} {"a":30, "s":null} {"a":40, "s":null} {"a":50, "s":{"b":51, "s":null}} {"a":60, "b":null, "e":70} + +-- !select -- +1 {"a":1} {"a":1, "b":"a"} {"a":1, "s":{"a":1}} {"a":1, "s":{"a":1, "b":"a"}} {"a":1, "s":{"b":1, "s":{"c":1}}} {"a":1, "b":{"c":1, "d":"a"}, "e":1} +2 {"a":10} {"a":20, "b":"valid"} {"a":30, "s":{"a":31}} {"a":40, "s":{"a":41, "b":"nested"}} {"a":50, "s":{"b":51, "s":{"c":52}}} {"a":60, "b":{"c":61, "d":"text"}, "e":70} +3 \N {"a":30, "b":"valid"} \N {"a":40, "s":{"a":41, "b":"valid"}} {"a":50, "s":{"b":51, "s":null}} {"a":60, "b":null, "e":70} +4 {"a":null} {"a":40, "b":"50"} {"a":50, "s":{"a":null}} {"a":60, "s":{"a":70, "b":"80"}} {"a":90, "s":{"b":null, "s":{"c":100}}} {"a":100, "b":{"c":110, "d":"120"}, "e":null} +5 {"a":10} {"a":20, "b":"valid"} {"a":30, "s":null} {"a":40, "s":null} {"a":50, "s":{"b":51, "s":null}} {"a":60, "b":null, "e":70} + +-- !select -- +1 {"a":1} {"a":1, "b":"a"} {"a":1, "s":{"a":1}} {"a":1, "s":{"a":1, "b":"a"}} {"a":1, "s":{"b":1, "s":{"c":1}}} {"a":1, "b":{"c":1, "d":"a"}, "e":1} +2 {"a":10} {"a":20, "b":"valid"} {"a":30, "s":{"a":31}} {"a":40, "s":{"a":41, "b":"nested"}} {"a":50, "s":{"b":51, "s":{"c":52}}} {"a":60, "b":{"c":61, "d":"text"}, "e":70} +3 \N {"a":30, "b":"valid"} \N {"a":40, "s":{"a":41, "b":"valid"}} {"a":50, "s":{"b":51, "s":null}} {"a":60, "b":null, "e":70} +4 {"a":null} {"a":40, "b":"50"} {"a":50, "s":{"a":null}} {"a":60, "s":{"a":70, "b":"80"}} {"a":90, "s":{"b":null, "s":{"c":100}}} {"a":100, "b":{"c":110, "d":"120"}, "e":null} +5 {"a":10} {"a":20, "b":"valid"} {"a":30, "s":null} {"a":40, "s":null} {"a":50, "s":{"b":51, "s":null}} {"a":60, "b":null, "e":70} +7 {"a":10} {"a":20, "b":""} {"a":30, "s":null} {"a":40, "s":{"a":41, "b":"nested"}} {"a":50, "s":{"b":51, "s":{"c":52}}} {"a":60, "b":{"c":61, "d":""}, "e":70} + diff --git a/regression-test/suites/insert_p0/test_struct_insert.groovy b/regression-test/suites/insert_p0/test_struct_insert.groovy index a845c978580761..e2ef485fbcb3e3 100644 --- a/regression-test/suites/insert_p0/test_struct_insert.groovy +++ b/regression-test/suites/insert_p0/test_struct_insert.groovy @@ -73,4 +73,103 @@ suite("test_struct_insert") { // select the table and check whether the data is correct qt_select "select * from ${testTable} order by k1" -} \ No newline at end of file + + sql "DROP TABLE IF EXISTS test_struct_insert_into" + sql """ + CREATE TABLE IF NOT EXISTS test_struct_insert_into ( + id INT, + s STRUCT, + s1 STRUCT, + s2 struct>, + s3 struct>, + s4 STRUCT>>, + s5 STRUCT, e:INT> + ) PROPERTIES ("replication_allocation" = "tag.location.default: 1"); + """ + + // insert into table + // right cases + sql "INSERT INTO test_struct_insert_into VALUES(1, {1}, {1, 'a'}, {1, {1}}, {1, {1, 'a'}}, {1, {1, {1}}}, {1, {1, 'a'}, 1})" + + qt_select "select * from test_struct_insert_into order by id" + + sql """INSERT INTO test_struct_insert_into VALUES ( + 2, + '{10}', -- s: right + '{20, "valid"}', -- s1: right + '{30, {31}}', -- s2: right(outer a=30,s.a=31) + '{40, {41, "nested"}}', -- s3: right(a=40,s.a=41, s.b="nested") + '{50, {51, {52}}}', -- s4: right(a=50 -> s.b=51 -> s.s.c=52) + '{60, {61, "text"}, 70}' -- s5: right(a=60, b.c=61, b.d="text", e=70) + );""" + + qt_select "select * from test_struct_insert_into order by id" + + sql """ + INSERT INTO test_struct_insert_into VALUES ( + 3, + '{10, 20}', -- s: more -> NULL + '{30, "valid"}', -- s1: right + NULL, -- s2: NULL + '{40, {41, "valid"}}', -- s3: right + '{50, {51, null}}', -- s4: s.s is null + '{60, NULL, 70}' -- s5: b is NULL + ); + """ + + qt_select "select * from test_struct_insert_into order by id" + + sql """ + INSERT INTO test_struct_insert_into VALUES ( + 4, + '{"invalid"}', -- s.a type invalid cast -> s.a is NULL + '{40, 50}', -- right + '{50, {"invalid"}}',-- s2.s.a type invalid cast -> s2.s is {"a":null} + '{60, {70, 80}}', -- right + '{90, {"invalid", {100}}}', -- s4.s.b type invalid cast -> s4.s is NULL + '{100, {110, 120}, "invalid"}' -- s5.e type invalid cast -> s5.e is NULL + ); + """ + qt_select "select * from test_struct_insert_into order by id" + + sql """ + INSERT INTO test_struct_insert_into VALUES ( + 5, + '{10}', + '{20, "valid"}', + '{30, {31, 32}}', -- s2.s more -> s2.s is NULL + '{40, {41, "nested", 42}}', -- s3.s more -> s3.s is NULL + '{50, {51, {52, 53}}}', -- s4.s.s more -> s4.s.s is NULL + '{60, {61, "text", 62}, 70}' -- s5.b more -> s5.b is NULL + ); + """ + qt_select "select * from test_struct_insert_into order by id" + test { + sql """ + INSERT INTO test_struct_insert_into VALUES ( + 6, + '{10}', + '{20}', -- s1 less -> s1 is NULL + '{30, {31}}', -- s2 right + '{40, {41}}', -- s3.s less -> s3.s is NULL + '{50, {51}}', -- s4.s less -> s4.s is NULL + '{60, {61}, 70}' -- s5.b less -> s5.b is NULL + ); + """ + exception("Size of offsets doesn't match size of column") + } + qt_select "select * from test_struct_insert_into order by id" + + sql """ + INSERT INTO test_struct_insert_into VALUES ( + 7, + '{10}', -- right + '{20, }', -- s1.b is empty + '{30, }', -- s2.s is empty + '{40, {41, "nested"}}', -- right + '{50, {51, {52}}}', -- right + '{60, {61, }, 70}' -- s5.b.d is empty + ); + """ + qt_select "select * from test_struct_insert_into order by id" +}