From adc48901b4ca4a38dfa4e14880e163e30deec54c Mon Sep 17 00:00:00 2001 From: amorynan Date: Mon, 18 Mar 2024 15:19:17 +0800 Subject: [PATCH 1/2] fix array agg with other agg function --- .../aggregate_function_collect.h | 14 ++--- .../data/query_p0/aggregate/array_agg.out | 53 ++++++++++--------- .../query_p0/aggregate/array_agg.groovy | 14 +++-- 3 files changed, 44 insertions(+), 37 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h b/be/src/vec/aggregate_functions/aggregate_function_collect.h index 7e3c7207a7d27c..e85288bc91d707 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h @@ -390,7 +390,7 @@ struct AggregateFunctionArrayAggData { void deserialize_and_merge(const IColumn& column, size_t row_num) { auto& to_arr = assert_cast(column); auto& to_nested_col = to_arr.get_data(); - auto col_null = reinterpret_cast(&to_nested_col); + auto col_null = assert_cast(&to_nested_col); const auto& vec = assert_cast(col_null->get_nested_column()); auto start = to_arr.get_offsets()[row_num - 1]; auto end = start + to_arr.get_offsets()[row_num] - to_arr.get_offsets()[row_num - 1]; @@ -556,8 +556,8 @@ class AggregateFunctionCollect const size_t num_rows) const override { if constexpr (ShowNull::value) { for (size_t i = 0; i != num_rows; ++i) { - this->data(places[i]).deserialize_and_merge(*assert_cast(column), - i); + this->data(places[i] + offset) + .deserialize_and_merge(*assert_cast(column), i); } } else { return BaseHelper::deserialize_and_merge_vec(places, offset, rhs, column, arena, @@ -596,9 +596,9 @@ class AggregateFunctionCollect Arena* arena, const size_t num_rows) const override { if constexpr (ShowNull::value) { for (size_t i = 0; i != num_rows; ++i) { - if (places[i]) { - this->data(places[i]).deserialize_and_merge( - *assert_cast(column), i); + if (places[i] + offset) { + this->data(places[i] + offset) + .deserialize_and_merge(*assert_cast(column), i); } } } else { @@ -671,4 +671,4 @@ class AggregateFunctionCollect using IAggregateFunction::argument_types; }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/regression-test/data/query_p0/aggregate/array_agg.out b/regression-test/data/query_p0/aggregate/array_agg.out index f79368e1d0189d..7a811738f1a23e 100644 --- a/regression-test/data/query_p0/aggregate/array_agg.out +++ b/regression-test/data/query_p0/aggregate/array_agg.out @@ -1,36 +1,36 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql1 -- -["LC", "LB", "alex"] -["LC", "LB", "LA"] -["LC", null, "LA"] -["LC", "LB", "LA"] -[null, "LC", "LB", "LA"] -[null, "LC", "LC", "LC", "LC"] -[null, "LC", "LC", "LC", "LC"] +3 ["LC", "LB", "alex"] +3 ["LC", "LB", "LA"] +3 ["LC", null, "LA"] +3 ["LC", "LB", "LA"] +4 [null, "LC", "LB", "LA"] +5 [null, "LC", "LC", "LC", "LC"] +5 [null, "LC", "LC", "LC", "LC"] -- !sql2 -- -["alex", null, "LC", "LC", "LC", "LC"] -["LB"] -["LC"] -["LA"] -["LB"] -["LC"] -["LA"] -["LC"] -["LA"] -["LB"] -["LC"] -["LA"] -["LB"] -[null, "LC"] -[null, "LC", "LC"] -[null, "LC", "LC"] +0 ["alex", "LC", "LC", null, "LC", "LC"] +1 ["LB"] +1 ["LC"] +1 ["LA"] +1 ["LB"] +1 ["LC"] +1 ["LA"] +1 ["LC"] +1 ["LA"] +1 ["LB"] +1 ["LC"] +1 ["LA"] +1 ["LB"] +2 [null, "LC"] +3 [null, "LC", "LC"] +3 [null, "LC", "LC"] -- !sql3 -- -["LC", "LB", "alex", "LC", "LB", "LA", "LC", null, "LA", "LC", "LB", "LA", null, "LC", "LB", "LA", null, "LC", "LC", "LC", "LC", null, "LC", "LC", "LC", "LC"] +["LC", null, "LA", null, "LC", "LC", "LC", "LC", null, "LC", "LB", "LA", "LC", "LB", "alex", null, "LC", "LC", "LC", "LC", "LC", "LB", "LA", "LC", "LB", "LA"] -- !sql4 -- -["V1_3", "V1_2", null, "V2_3", "V2_2", "V2_1", "V3_3", null, "V3_1", "V4_3", "V4_2", "V4_1", "V5_3", "V5_3", "V5_2", "V5_1", "V6_3", null, "V6_3", null, "V6_3", "V7_3", null, "V7_3", null, "V7_3"] +["V4_3", "V4_2", "V4_1", "V1_3", "V1_2", null, "V7_3", null, "V7_3", null, "V7_3", "V2_3", "V2_2", "V2_1", "V3_3", null, "V3_1", "V6_3", null, "V6_3", null, "V6_3", "V5_3", "V5_3", "V5_2", "V5_1"] -- !sql5 -- 1 [2, 1, null] @@ -44,6 +44,9 @@ -- !sql6 -- [""] +-- !sql6_1 -- +\N [""] + -- !sql7 -- ["LC", "LB", "alex"] ["LC", "LB", "LA"] diff --git a/regression-test/suites/query_p0/aggregate/array_agg.groovy b/regression-test/suites/query_p0/aggregate/array_agg.groovy index ee4fcb9d455ad0..8798ea7d6474c6 100644 --- a/regression-test/suites/query_p0/aggregate/array_agg.groovy +++ b/regression-test/suites/query_p0/aggregate/array_agg.groovy @@ -27,7 +27,7 @@ suite("array_agg") { ) ENGINE=OLAP DUPLICATE KEY(`id`) COMMENT 'OLAP' - DISTRIBUTED BY HASH(`id`) BUCKETS 1 + DISTRIBUTED BY HASH(`id`) BUCKETS 10 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "storage_format" = "V2", @@ -46,7 +46,7 @@ suite("array_agg") { ) ENGINE=OLAP DUPLICATE KEY(`id`) COMMENT 'OLAP' - DISTRIBUTED BY HASH(`id`) BUCKETS 1 + DISTRIBUTED BY HASH(`id`) BUCKETS 10 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "storage_format" = "V2", @@ -67,7 +67,7 @@ suite("array_agg") { )ENGINE=OLAP DUPLICATE KEY(`id`) COMMENT 'OLAP' - DISTRIBUTED BY HASH(`id`) BUCKETS 1 + DISTRIBUTED BY HASH(`id`) BUCKETS 10 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "storage_format" = "V2", @@ -158,10 +158,10 @@ suite("array_agg") { """ qt_sql1 """ - SELECT array_agg(`label_name`) FROM `test_array_agg` GROUP BY `id` order by id; + SELECT count(id), array_agg(`label_name`) FROM `test_array_agg` GROUP BY `id` order by id; """ qt_sql2 """ - SELECT array_agg(label_name) FROM `test_array_agg` GROUP BY value_field order by value_field; + SELECT count(value_field), array_agg(label_name) FROM `test_array_agg` GROUP BY value_field order by value_field; """ qt_sql3 """ SELECT array_agg(`label_name`) FROM `test_array_agg`; @@ -177,6 +177,10 @@ suite("array_agg") { select array_agg(label_name) from test_array_agg_decimal where id=7; """ + qt_sql6_1 """ + select sum(o_totalprice), array_agg(label_name) from test_array_agg_decimal where id=7; + """ + qt_sql7 """ select array_agg(label_name) from test_array_agg_decimal group by id order by id; """ From 69c9f1a7b19ba6849666a1ae802552d45ec1d7de Mon Sep 17 00:00:00 2001 From: amorynan Date: Tue, 19 Mar 2024 10:18:59 +0800 Subject: [PATCH 2/2] fix case with stable output --- .../data/query_p0/aggregate/array_agg.out | 59 +++++++++---- .../query_p0/aggregate/array_agg.groovy | 84 ++++++++++++++++--- 2 files changed, 114 insertions(+), 29 deletions(-) diff --git a/regression-test/data/query_p0/aggregate/array_agg.out b/regression-test/data/query_p0/aggregate/array_agg.out index 7a811738f1a23e..5f019f755e0e95 100644 --- a/regression-test/data/query_p0/aggregate/array_agg.out +++ b/regression-test/data/query_p0/aggregate/array_agg.out @@ -1,36 +1,36 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql1 -- -3 ["LC", "LB", "alex"] 3 ["LC", "LB", "LA"] -3 ["LC", null, "LA"] 3 ["LC", "LB", "LA"] +3 ["LC", "LB", "alex"] +3 ["LC", null, "LA"] 4 [null, "LC", "LB", "LA"] 5 [null, "LC", "LC", "LC", "LC"] 5 [null, "LC", "LC", "LC", "LC"] -- !sql2 -- -0 ["alex", "LC", "LC", null, "LC", "LC"] -1 ["LB"] -1 ["LC"] +0 ["alex", null, "LC", "LC", "LC", "LC"] 1 ["LA"] -1 ["LB"] -1 ["LC"] 1 ["LA"] -1 ["LC"] 1 ["LA"] -1 ["LB"] -1 ["LC"] 1 ["LA"] 1 ["LB"] +1 ["LB"] +1 ["LB"] +1 ["LB"] +1 ["LC"] +1 ["LC"] +1 ["LC"] +1 ["LC"] 2 [null, "LC"] 3 [null, "LC", "LC"] 3 [null, "LC", "LC"] -- !sql3 -- -["LC", null, "LA", null, "LC", "LC", "LC", "LC", null, "LC", "LB", "LA", "LC", "LB", "alex", null, "LC", "LC", "LC", "LC", "LC", "LB", "LA", "LC", "LB", "LA"] +["LC", "LB", "alex", "LC", "LB", "LA", "LC", null, "LA", "LC", "LB", "LA", null, "LC", "LB", "LA", null, "LC", "LC", "LC", "LC", null, "LC", "LC", "LC", "LC"] -- !sql4 -- -["V4_3", "V4_2", "V4_1", "V1_3", "V1_2", null, "V7_3", null, "V7_3", null, "V7_3", "V2_3", "V2_2", "V2_1", "V3_3", null, "V3_1", "V6_3", null, "V6_3", null, "V6_3", "V5_3", "V5_3", "V5_2", "V5_1"] +["V1_3", "V1_2", null, "V2_3", "V2_2", "V2_1", "V3_3", null, "V3_1", "V4_3", "V4_2", "V4_1", "V5_3", "V5_3", "V5_2", "V5_1", "V6_3", null, "V6_3", null, "V6_3", "V7_3", null, "V7_3", null, "V7_3"] -- !sql5 -- 1 [2, 1, null] @@ -48,13 +48,13 @@ \N [""] -- !sql7 -- -["LC", "LB", "alex"] +[""] +[""] ["LC", "LB", "LA"] -["LC", null, "LA"] ["LC", "LB", "LA"] ["LC", "LB", "LA"] -[""] -[""] +["LC", "LB", "alex"] +["LC", null, "LA"] -- !sql8 -- [null] @@ -68,3 +68,30 @@ 7 [null] 8 [null] +-- !sql11 -- +3 3 +3 3 +3 3 +3 3 +4 4 +5 5 +5 5 + +-- !sql21 -- +0 6 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +2 2 +3 3 +3 3 + diff --git a/regression-test/suites/query_p0/aggregate/array_agg.groovy b/regression-test/suites/query_p0/aggregate/array_agg.groovy index 8798ea7d6474c6..1484125d0da6cc 100644 --- a/regression-test/suites/query_p0/aggregate/array_agg.groovy +++ b/regression-test/suites/query_p0/aggregate/array_agg.groovy @@ -17,6 +17,7 @@ suite("array_agg") { sql "DROP TABLE IF EXISTS `test_array_agg`;" + sql "DROP TABLE IF EXISTS `test_array_agg1`;" sql "DROP TABLE IF EXISTS `test_array_agg_int`;" sql "DROP TABLE IF EXISTS `test_array_agg_decimal`;" sql """ @@ -27,7 +28,7 @@ suite("array_agg") { ) ENGINE=OLAP DUPLICATE KEY(`id`) COMMENT 'OLAP' - DISTRIBUTED BY HASH(`id`) BUCKETS 10 + DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "storage_format" = "V2", @@ -46,7 +47,7 @@ suite("array_agg") { ) ENGINE=OLAP DUPLICATE KEY(`id`) COMMENT 'OLAP' - DISTRIBUTED BY HASH(`id`) BUCKETS 10 + DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "storage_format" = "V2", @@ -67,7 +68,7 @@ suite("array_agg") { )ENGINE=OLAP DUPLICATE KEY(`id`) COMMENT 'OLAP' - DISTRIBUTED BY HASH(`id`) BUCKETS 10 + DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "storage_format" = "V2", @@ -157,43 +158,100 @@ suite("array_agg") { (8, "", NULL,0,NULL,"alexcoco2"); """ - qt_sql1 """ + order_qt_sql1 """ SELECT count(id), array_agg(`label_name`) FROM `test_array_agg` GROUP BY `id` order by id; """ - qt_sql2 """ + order_qt_sql2 """ SELECT count(value_field), array_agg(label_name) FROM `test_array_agg` GROUP BY value_field order by value_field; """ - qt_sql3 """ + order_qt_sql3 """ SELECT array_agg(`label_name`) FROM `test_array_agg`; """ - qt_sql4 """ + order_qt_sql4 """ SELECT array_agg(`value_field`) FROM `test_array_agg`; """ - qt_sql5 """ + order_qt_sql5 """ SELECT id, array_agg(age) FROM test_array_agg_int GROUP BY id order by id; """ - qt_sql6 """ + order_qt_sql6 """ select array_agg(label_name) from test_array_agg_decimal where id=7; """ - qt_sql6_1 """ + order_qt_sql6_1 """ select sum(o_totalprice), array_agg(label_name) from test_array_agg_decimal where id=7; """ - qt_sql7 """ + order_qt_sql7 """ select array_agg(label_name) from test_array_agg_decimal group by id order by id; """ - qt_sql8 """ + order_qt_sql8 """ select array_agg(age) from test_array_agg_decimal where id=7; """ - qt_sql9 """ + order_qt_sql9 """ select id,array_agg(o_totalprice) from test_array_agg_decimal group by id order by id; """ + + // test for bucket 10 + sql """ CREATE TABLE `test_array_agg1` ( + `id` int(11) NOT NULL, + `label_name` varchar(32) default null, + `value_field` string default null + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); """ + + sql """ + insert into `test_array_agg1` values + (1, "alex",NULL), + (1, "LB", "V1_2"), + (1, "LC", "V1_3"), + (2, "LA", "V2_1"), + (2, "LB", "V2_2"), + (2, "LC", "V2_3"), + (3, "LA", "V3_1"), + (3, NULL, NULL), + (3, "LC", "V3_3"), + (4, "LA", "V4_1"), + (4, "LB", "V4_2"), + (4, "LC", "V4_3"), + (5, "LA", "V5_1"), + (5, "LB", "V5_2"), + (5, "LC", "V5_3"), + (5, NULL, "V5_3"), + (6, "LC", "V6_3"), + (6, "LC", NULL), + (6, "LC", "V6_3"), + (6, "LC", NULL), + (6, NULL, "V6_3"), + (7, "LC", "V7_3"), + (7, "LC", NULL), + (7, "LC", "V7_3"), + (7, "LC", NULL), + (7, NULL, "V7_3"); + """ + + order_qt_sql11 """ + SELECT count(id), size(array_agg(`label_name`)) FROM `test_array_agg` GROUP BY `id` order by id; + """ + order_qt_sql21 """ + SELECT count(value_field), size(array_agg(label_name)) FROM `test_array_agg` GROUP BY value_field order by value_field; + """ + + sql "DROP TABLE `test_array_agg`" + sql "DROP TABLE `test_array_agg1`" sql "DROP TABLE `test_array_agg_int`" sql "DROP TABLE `test_array_agg_decimal`" }