Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions be/src/vec/aggregate_functions/aggregate_function_collect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,18 @@ AggregateFunctionPtr do_create_agg_function_collect(bool distinct, const DataTyp
}
}

if (distinct) {
return creator_without_type::create<AggregateFunctionCollect<
AggregateFunctionCollectSetData<T, HasLimit>, HasLimit, std::false_type>>(
argument_types, result_is_nullable);
} else {
return creator_without_type::create<AggregateFunctionCollect<
AggregateFunctionCollectListData<T, HasLimit>, HasLimit, std::false_type>>(
argument_types, result_is_nullable);
if constexpr (!std::is_same_v<T, void>) {
if (distinct) {
return creator_without_type::create<AggregateFunctionCollect<
AggregateFunctionCollectSetData<T, HasLimit>, HasLimit, std::false_type>>(
argument_types, result_is_nullable);
} else {
return creator_without_type::create<AggregateFunctionCollect<
AggregateFunctionCollectListData<T, HasLimit>, HasLimit, std::false_type>>(
argument_types, result_is_nullable);
}
}
return nullptr;
}

template <typename HasLimit, typename ShowNull>
Expand All @@ -69,15 +72,21 @@ AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string& n
if (which.is_date_or_datetime()) {
return do_create_agg_function_collect<Int64, HasLimit, ShowNull>(distinct, argument_types,
result_is_nullable);
} else if (which.is_date_v2()) {
} else if (which.is_date_v2() || which.is_ipv4()) {
return do_create_agg_function_collect<UInt32, HasLimit, ShowNull>(distinct, argument_types,
result_is_nullable);
} else if (which.is_date_time_v2()) {
} else if (which.is_date_time_v2() || which.is_ipv6()) {
return do_create_agg_function_collect<UInt64, HasLimit, ShowNull>(distinct, argument_types,
result_is_nullable);
} else if (which.is_string()) {
return do_create_agg_function_collect<StringRef, HasLimit, ShowNull>(
distinct, argument_types, result_is_nullable);
} else {
// generic serialize which will not use specializations, ShowNull::value always means array_agg
if constexpr (ShowNull::value) {
return do_create_agg_function_collect<void, HasLimit, ShowNull>(
distinct, argument_types, result_is_nullable);
}
}

LOG(WARNING) << fmt::format("unsupported input type {} for aggregate function {}",
Expand Down Expand Up @@ -107,6 +116,7 @@ AggregateFunctionPtr create_aggregate_function_collect(const std::string& name,
}

void register_aggregate_function_collect_list(AggregateFunctionSimpleFactory& factory) {
// notice: array_agg only differs from collect_list in that array_agg will show null elements in array
factory.register_function_both("collect_list", create_aggregate_function_collect);
factory.register_function_both("collect_set", create_aggregate_function_collect);
factory.register_function_both("array_agg", create_aggregate_function_collect);
Expand Down
72 changes: 70 additions & 2 deletions be/src/vec/aggregate_functions/aggregate_function_collect.h
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,71 @@ struct AggregateFunctionArrayAggData<StringRef> {
}
};

template <>
struct AggregateFunctionArrayAggData<void> {
using ElementType = StringRef;
using Self = AggregateFunctionArrayAggData<void>;
MutableColumnPtr column_data;

AggregateFunctionArrayAggData() {}

AggregateFunctionArrayAggData(const DataTypes& argument_types) {
DataTypePtr column_type = argument_types[0];
column_data = column_type->create_column();
}

void add(const IColumn& column, size_t row_num) { column_data->insert_from(column, row_num); }

void deserialize_and_merge(const IColumn& column, size_t row_num) {
auto& to_arr = assert_cast<const ColumnArray&>(column);
auto& to_nested_col = to_arr.get_data();
auto start = to_arr.get_offsets()[row_num - 1];
auto end = start + to_arr.get_offsets()[row_num] - to_arr.get_offsets()[row_num - 1];
for (auto i = start; i < end; ++i) {
column_data->insert_from(to_nested_col, i);
}
}

void reset() { column_data->clear(); }

void insert_result_into(IColumn& to) const {
auto& to_arr = assert_cast<ColumnArray&>(to);
auto& to_nested_col = to_arr.get_data();
size_t num_rows = column_data->size();
for (size_t i = 0; i < num_rows; ++i) {
to_nested_col.insert_from(*column_data, i);
}
to_arr.get_offsets().push_back(to_nested_col.size());
}

void write(BufferWritable& buf) const {
const size_t size = column_data->size();
write_binary(size, buf);
for (size_t i = 0; i < size; i++) {
write_string_binary(column_data->get_data_at(i), buf);
}
}

void read(BufferReadable& buf) {
size_t size = 0;
read_binary(size, buf);
column_data->reserve(size);

StringRef s;
for (size_t i = 0; i < size; i++) {
read_string_binary(s, buf);
column_data->insert_data(s.data, s.size);
}
}

void merge(const Self& rhs) {
const auto size = rhs.column_data->size();
for (size_t i = 0; i < size; i++) {
column_data->insert_from(*rhs.column_data, i);
}
}
};

//ShowNull is just used to support array_agg because array_agg needs to display NULL
//todo: Supports order by sorting for array_agg
template <typename Data, typename HasLimit, typename ShowNull>
Expand Down Expand Up @@ -546,7 +611,8 @@ class AggregateFunctionCollect

void create(AggregateDataPtr __restrict place) const override {
if constexpr (ShowNull::value) {
if constexpr (IsDecimalNumber<typename Data::ElementType>) {
if constexpr (IsDecimalNumber<typename Data::ElementType> ||
std::is_same_v<Data, AggregateFunctionArrayAggData<void>>) {
new (place) Data(argument_types);
} else {
new (place) Data();
Expand Down Expand Up @@ -719,13 +785,15 @@ class AggregateFunctionCollect

for (size_t i = 0; i < num_rows; ++i) {
col_null->get_null_map_data().push_back(col_src.get_null_map_data()[i]);
if constexpr (std::is_same_v<StringRef, typename Data::ElementType>) {
if constexpr (std::is_same_v<Data, AggregateFunctionArrayAggData<StringRef>>) {
auto& vec = assert_cast<ColumnString&, TypeCheckOnRelease::DISABLE>(
col_null->get_nested_column());
const auto& vec_src =
assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
col_src.get_nested_column());
vec.insert_from(vec_src, i);
} else if constexpr (std::is_same_v<Data, AggregateFunctionArrayAggData<void>>) {
to_nested_col.insert_from(col_src.get_nested_column(), i);
} else {
using ColVecType = ColumnVectorOrDecimal<typename Data::ElementType>;
auto& vec = assert_cast<ColVecType&, TypeCheckOnRelease::DISABLE>(
Expand Down
156 changes: 156 additions & 0 deletions regression-test/data/query_p0/aggregate/array_agg.out
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,159 @@
3 3
3 3

-- !sql_array_agg_array --
1 [["plum", "banana", "apple"], ["grape", "banana", null, "plum", "cherry"], ["apple", "banana", "kiwi", null], ["apple", "banana", "cherry", "kiwi", null], ["cherry", null], null]
10 [null, ["apple", "banana", null, "cherry", "grape"], ["cherry", "berry", null], ["peach"]]
11 [["grape", "apple", "kiwi"], null, null, null]
12 [["melon", "papaya", "kiwi"], ["plum", null, "kiwi", "banana"], ["plum", null, "mango"], ["plum", null]]
13 [["apple", null], null, ["peach", "cherry", "papaya", "kiwi", null], ["plum", null]]
14 [["orange", "mango", "plum"], ["apple", "melon"], ["orange", "grape", null], ["orange", "banana", null]]
15 [null, ["banana", "peach", "plum", null], null, ["strawberry", null]]
16 [["peach", "kiwi", null, "berry"], null, ["plum", "grape", null], ["kiwi", null]]
17 [["banana", "plum", null], ["papaya"], null, ["apple", "kiwi", null, "papaya"]]
18 [["apple", null], null, ["apple", "mango", null], null]
19 [["kiwi", "mango", null], ["pear", "grape"], ["cherry", null, "plum"], ["banana", "mango", "cherry"]]
2 [null, ["apple", null, "banana"], ["orange", "grape", null], null]
20 [["grape", null], ["kiwi", null], ["kiwi", "plum", "orange", null], ["papaya", "orange", "kiwi", null]]
21 [["kiwi", null]]
22 [["orange", "peach", null, "kiwi"]]
23 [["berry", "grape", null]]
24 [null]
25 [["mango", "plum", "apple", null]]
26 [["banana", null]]
27 [["orange", "kiwi", "plum", null]]
28 [null]
29 [["apple", null, "grape", "peach"]]
3 [["mango", null], ["orange"], ["apple", "kiwi", "papaya"], ["plum", "peach", null]]
30 [["kiwi", "banana", null]]
31 [["cherry", "berry", null, "plum"]]
32 [null]
33 [["apple", null, "kiwi", "orange"]]
34 [["grape", "plum", null]]
35 [["banana", null]]
36 [["kiwi", "orange", "plum", null]]
37 [null]
38 [["apple", null]]
39 [["plum", "grape", null]]
4 [["mango", null, "orange", "plum", "berry", "kiwi"], ["orange", "grape", "mango", "berry"], ["plum", "kiwi", null, "peach", "berry"], null]
40 [["banana", "kiwi", "peach", null]]
41 [["grape", null, "plum"]]
42 [["orange", "kiwi", "peach", null]]
43 [null]
44 [["apple", "banana", null]]
45 [["grape", null]]
46 [["plum", "kiwi", null, "orange"]]
47 [null]
48 [["mango", null]]
49 [["kiwi", "plum", "banana", null]]
5 [["peach", "melon", null], ["apple", null, "kiwi"], ["grape", "kiwi", null], null]
50 [null]
6 [null, null, ["cherry", "apple", null, "plum"], null]
7 [["papaya", "cherry", "apple", null], ["melon"], ["melon", null, "papaya", "grape", "kiwi", "berry", null], ["orange", "grape", "kiwi"]]
8 [["plum", "peach", null, "orange"], ["banana", null], ["berry", "cherry"], ["banana", "mango", null]]
9 [["orange", "kiwi", "berry", null, "plum"], ["apple", "kiwi", "plum", null, "mango"], ["kiwi", null], null]

-- !sql_array_agg_map --
1 [{"key5":null}, {"key2":15, "key3":8}, {"key1":10, "key2":5}, {"key1":10, "key2":20}, {"key2":null}, null]
10 [{"key3":5, "key4":null}, {"key1":null, "key6":9}, {"key2":10, "key7":null}, {"key1":10}]
11 [{"key1":9}, {"key4":5, "key5":null}, {"key1":3, "key5":null}, {"key4":null}]
12 [null, {"key4":25}, {"key2":20, "key3":null}, {"key1":null, "key2":5}]
13 [{"key2":null, "key3":7}, null, null, {"key3":null, "key5":10}]
14 [{"key6":5}, {"key5":15, "key6":25}, {"key1":3, "key6":8}, {"key4":3, "key5":null}]
15 [{"key1":18, "key6":22}, {"key2":4}, {"key3":null}, null]
16 [{"key2":20}, {"key2":2}, {"key2":8, "key3":null}, {"key7":7, "key3":null}]
17 [{"key4":8}, {"key6":9, "key7":null}, {"key1":10, "key4":14}, {"key7":null}]
18 [{"key1":11}, {"key1":10, "key2":null}, {"key2":2}, {"key2":null, "key5":10}]
19 [{"key7":9}, {"key1":1, "key2":2, "key3":3}, {"key1":null, "key7":6}, {"key3":7, "key4":null}]
2 [{"key1":null, "key5":25}, {"key1":10, "key2":null, "key3":20}, {"key2":null, "key3":7}, {"key3":null}]
20 [{"key1":null, "key3":6}, {"key1":1, "key9":6}, {"key1":14}, {"key5":3, "key7":null}]
21 [{"key1":10, "key6":2}]
22 [{"key3":null}]
23 [{"key1":8}]
24 [{"key2":15, "key4":null}]
25 [{"key7":18}]
26 [{"key3":12}]
27 [{"key5":10}]
28 [{"key1":14}]
29 [{"key2":4, "key4":null}]
3 [{"key1":12}, {"key1":5}, {"key3":null}, {"key1":5, "key4":null}]
30 [{"key6":6}]
31 [{"key3":null}]
32 [{"key2":9, "key7":null}]
33 [{"key1":7}]
34 [{"key4":20}]
35 [{"key1":12, "key5":null}]
36 [{"key3":11}]
37 [{"key1":null}]
38 [{"key2":3, "key6":9}]
39 [{"key5":8}]
4 [{"key2":30}, null, {"key4":15}, {"key3":7, "key4":null}]
40 [{"key1":15}]
41 [{"key3":7}]
42 [{"key4":5}]
43 [{"key1":2, "key7":null}]
44 [{"key2":14}]
45 [{"key4":12}]
46 [{"key6":10}]
47 [{"key2":null}]
48 [{"key5":9}]
49 [{"key1":13}]
5 [{"key1":10}, {"key1":7, "key2":8}, null, {"key2":8, "key5":null}]
50 [{"key7":8}]
6 [{"key4":7, "key6":null}, {"key1":1, "key2":2, "key3":null, "key4":4}, {"key3":null, "key6":12}, {"key2":null, "key3":25}]
7 [{"key1":12, "key3":6}, null, {"key4":15, "key5":null}, {"key1":5}]
8 [{"key1":6, "key7":12}, {"key2":9}, {"key1":null, "key5":50}, null]
9 [{"key2":null, "key5":40}, null, {"key2":14, "key5":7}, {"key1":10, "key2":20, "key3":30, "key4":40, "key5":50, "key6":60, "key7":null}]

-- !sql_array_agg_struct --
1 [{"id":1}, {"id":1}, {"id":1}, {"id":1}, {"id":1}, null]
10 [{"id":10}, {"id":10}, {"id":10}, {"id":null}]
11 [{"id":11}, {"id":11}, {"id":11}, {"id":null}]
12 [{"id":12}, {"id":12}, {"id":12}, {"id":null}]
13 [{"id":13}, {"id":13}, {"id":13}, {"id":null}]
14 [{"id":14}, {"id":null}, {"id":14}, {"id":null}]
15 [{"id":15}, {"id":null}, {"id":15}, {"id":null}]
16 [{"id":16}, {"id":16}, {"id":16}, {"id":16}]
17 [{"id":17}, {"id":17}, {"id":17}, {"id":17}]
18 [{"id":18}, {"id":null}, {"id":18}, {"id":18}]
19 [{"id":19}, {"id":null}, {"id":19}, {"id":19}]
2 [{"id":2}, {"id":null}, {"id":2}, {"id":2}]
20 [{"id":20}, {"id":20}, {"id":null}, {"id":null}]
21 [{"id":21}]
22 [{"id":22}]
23 [{"id":23}]
24 [{"id":24}]
25 [{"id":25}]
26 [{"id":26}]
27 [{"id":27}]
28 [{"id":28}]
29 [{"id":29}]
3 [{"id":3}, {"id":3}, {"id":3}, {"id":3}]
30 [{"id":30}]
31 [{"id":31}]
32 [{"id":32}]
33 [{"id":33}]
34 [{"id":34}]
35 [{"id":35}]
36 [{"id":36}]
37 [{"id":37}]
38 [{"id":38}]
39 [{"id":39}]
4 [{"id":null}, {"id":4}, {"id":4}, {"id":4}]
40 [{"id":40}]
41 [{"id":41}]
42 [{"id":42}]
43 [{"id":43}]
44 [{"id":44}]
45 [{"id":45}]
46 [{"id":46}]
47 [{"id":47}]
48 [{"id":48}]
49 [{"id":49}]
5 [{"id":5}, {"id":null}, {"id":5}, {"id":5}]
50 [{"id":50}]
6 [{"id":6}, {"id":6}, {"id":6}, {"id":6}]
7 [{"id":null}, {"id":null}, {"id":null}, {"id":7}]
8 [{"id":8}, {"id":8}, {"id":8}, {"id":8}]
9 [{"id":9}, {"id":9}, {"id":9}, {"id":9}]

Loading