From c73d89d4ea07ead28dce1317dca43bf24a871066 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Mon, 17 Mar 2025 15:57:12 +0800 Subject: [PATCH 1/3] [Fix](partial update) use correct default value for missing columns in partial update (#49066) related PR: https://github.com/apache/doris/pull/48625 In partial update, we should fill the columns which is not nullable and doesn't have user defined default value with its type's default value when the row's delete sign is marked. --- .../test_partial_update_complex_type.out | 20 +++++++++++ .../test_partial_update_complex_type.groovy | 35 ++++++++++++++++--- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_complex_type.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_complex_type.out index ae7ed3b79b904f..6f6a385a359772 100644 --- a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_complex_type.out +++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_complex_type.out @@ -21,6 +21,16 @@ 4 \N \N [1, 2, 3, 4, 5] \N 6 \N \N [] {"a":1000000, "b":10000000} +-- !sql -- +3 doris3 {"jsonk3":456,"jsonk5":789} [600, 400] {"a":2, "b":7} {"cccc":10} 4 0 + +-- !sql -- +1 doris1 {"jsonk1":123,"jsonk2":456} [100, 200] {"a":1, "b":2} {"b":3} 3 0 +1 \N null [null] {"a":null, "b":null} {null:null} 5 1 +2 doris2 {"jsonk2":333,"jsonk4":444} [300, 400] {"a":3, "b":4} {"a":2} 2 0 +2 \N null [null] {"a":null, "b":null} {null:null} 5 1 +3 doris3 {"jsonk3":456,"jsonk5":789} [600, 400] {"a":2, "b":7} {"cccc":10} 4 0 + -- !update_varchar -- 1 apache doris {"jsonk1":123,"jsonk2":456} [100, 200] {"a":1, "b":2} 2 apache doris 2.0 {"jsonk3":333,"jsonk4":444} [300, 400] {"a":3, "b":4} @@ -43,3 +53,13 @@ 4 \N \N [1, 2, 3, 4, 5] \N 6 \N \N [] {"a":1000000, "b":10000000} +-- !sql -- +3 doris3 {"jsonk3":456,"jsonk5":789} [600, 400] {"a":2, "b":7} {"cccc":10} 4 0 + +-- !sql -- +1 doris1 {"jsonk1":123,"jsonk2":456} [100, 200] {"a":1, "b":2} {"b":3} 3 0 +1 \N null [null] {"a":null, "b":null} {null:null} 5 1 +2 doris2 {"jsonk2":333,"jsonk4":444} [300, 400] {"a":3, "b":4} {"a":2} 2 0 +2 \N null [null] {"a":null, "b":null} {null:null} 5 1 +3 doris3 {"jsonk3":456,"jsonk5":789} [600, 400] {"a":2, "b":7} {"cccc":10} 4 0 + diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_complex_type.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_complex_type.groovy index ebd32310fd2b45..fc3cb097080e8c 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_complex_type.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_complex_type.groovy @@ -1,4 +1,3 @@ - // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -32,7 +31,7 @@ suite("test_primary_key_partial_update_complex_type", "p0") { // 2. the combination of map type and row store may result in bugs, so we skip map type in temporary // // create table - sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ DROP TABLE IF EXISTS ${tableName} FORCE""" sql """ CREATE TABLE ${tableName} ( `id` int(11) NOT NULL COMMENT "用户 ID", `c_varchar` varchar(65533) NULL COMMENT "用户姓名", @@ -120,8 +119,36 @@ suite("test_primary_key_partial_update_complex_type", "p0") { select * from ${tableName} order by id; """ - // drop table - sql """ DROP TABLE IF EXISTS ${tableName} """ + // create table for NOT NULL tests + def tableName2 = "${tableName}_not_null" + sql """ DROP TABLE IF EXISTS ${tableName2} FORCE""" + sql """ CREATE TABLE ${tableName2} ( + `id` int(11) NOT NULL COMMENT "用户 ID", + `c_varchar` varchar(65533) NULL COMMENT "用户姓名", + `c_jsonb` JSONB NOT NULL, + `c_array` ARRAY NOT NULL, + `c_struct` STRUCT NOT NULL, + `c_map` MAP not null) + UNIQUE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES("replication_num" = "1", "enable_unique_key_merge_on_write" = "true", + "store_row_column" = "${use_row_store}"); """ + + sql """insert into ${tableName2} values(2, "doris2", '{"jsonk2": 333, "jsonk4": 444}', [300, 400], {3, 4}, {'a': 2})""" + sql """insert into ${tableName2} values(1, "doris1", '{"jsonk1": 123, "jsonk2": 456}', [100, 200], {1, 2}, {'b': 3})""" + sql """insert into ${tableName2} values(3, "doris3", '{"jsonk3": 456, "jsonk5": 789}', [600, 400], {2, 7}, {'cccc': 10})""" + String sql1 = "delete from ${tableName2} where id<=2;" + explain { + sql sql1 + contains "IS_PARTIAL_UPDATE: true" + } + sql(sql1) + + qt_sql """ select *,__DORIS_VERSION_COL__,__DORIS_DELETE_SIGN__ from ${tableName2} order by id,__DORIS_VERSION_COL__;""" + sql "set skip_delete_bitmap=true;" + sql "set skip_delete_sign=true;" + qt_sql """ select *,__DORIS_VERSION_COL__,__DORIS_DELETE_SIGN__ from ${tableName2} order by id,__DORIS_VERSION_COL__;""" + sql "set skip_delete_bitmap=false;" + sql "set skip_delete_sign=false;" } } } From 52d1f34698cf542a92fe61e072308932df16e130 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Mon, 17 Mar 2025 16:37:35 +0800 Subject: [PATCH 2/3] update --- be/src/olap/rowset/segment_v2/segment_writer.cpp | 3 ++- be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp | 3 ++- be/src/olap/tablet.cpp | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 00aec4d533addb..c458b3f28cd525 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -768,7 +768,8 @@ Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f // If the control flow reaches this branch, the column neither has default value // nor is nullable. It means that the row's delete sign is marked, and the value // columns are useless and won't be read. So we can just put arbitary values in the cells - mutable_full_columns[cids_missing[i]]->insert_default(); + mutable_full_columns[cids_missing[i]]->insert( + tablet_column.get_vec_type()->get_default()); } } continue; diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 8898c08aa80943..0fe7afd9be3e5a 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -704,7 +704,8 @@ Status VerticalSegmentWriter::_fill_missing_columns( // If the control flow reaches this branch, the column neither has default value // nor is nullable. It means that the row's delete sign is marked, and the value // columns are useless and won't be read. So we can just put arbitary values in the cells - mutable_full_columns[missing_cids[i]]->insert_default(); + mutable_full_columns[missing_cids[i]]->insert( + tablet_column.get_vec_type()->get_default()); } } continue; diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 8b671961f2c029..6c3edc7bb67b89 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -3362,7 +3362,7 @@ Status Tablet::generate_new_block_for_partial_update( assert_cast(mutable_column.get()) ->insert_null_elements(1); } else { - mutable_column->insert_default(); + mutable_column->insert(rs_column.get_vec_type()->get_default()); } } else { mutable_column->insert_from( From 035b31c152ba1b84c143efd819ee8af3f777e50c Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Mon, 17 Mar 2025 21:00:43 +0800 Subject: [PATCH 3/3] fix case output --- .../partial_update/test_partial_update_complex_type.out | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_complex_type.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_complex_type.out index 6f6a385a359772..65c3d527b7c0c9 100644 --- a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_complex_type.out +++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_complex_type.out @@ -26,9 +26,9 @@ -- !sql -- 1 doris1 {"jsonk1":123,"jsonk2":456} [100, 200] {"a":1, "b":2} {"b":3} 3 0 -1 \N null [null] {"a":null, "b":null} {null:null} 5 1 +1 \N {} [null] {"a":null, "b":null} {null:null} 5 1 2 doris2 {"jsonk2":333,"jsonk4":444} [300, 400] {"a":3, "b":4} {"a":2} 2 0 -2 \N null [null] {"a":null, "b":null} {null:null} 5 1 +2 \N {} [null] {"a":null, "b":null} {null:null} 5 1 3 doris3 {"jsonk3":456,"jsonk5":789} [600, 400] {"a":2, "b":7} {"cccc":10} 4 0 -- !update_varchar -- @@ -58,8 +58,8 @@ -- !sql -- 1 doris1 {"jsonk1":123,"jsonk2":456} [100, 200] {"a":1, "b":2} {"b":3} 3 0 -1 \N null [null] {"a":null, "b":null} {null:null} 5 1 +1 \N {} [null] {"a":null, "b":null} {null:null} 5 1 2 doris2 {"jsonk2":333,"jsonk4":444} [300, 400] {"a":3, "b":4} {"a":2} 2 0 -2 \N null [null] {"a":null, "b":null} {null:null} 5 1 +2 \N {} [null] {"a":null, "b":null} {null:null} 5 1 3 doris3 {"jsonk3":456,"jsonk5":789} [600, 400] {"a":2, "b":7} {"cccc":10} 4 0