From 546aa3582f068cc68831ac728ae68bdfffe02af8 Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Thu, 20 Mar 2025 19:45:50 +0800 Subject: [PATCH 1/3] [feature](window) support lead/lag default value support column --- .../aggregate_function_window.h | 59 +++++++++---------- .../expressions/functions/window/Lag.java | 10 +--- .../expressions/functions/window/Lead.java | 10 +--- .../functions/window/WindowFunction.java | 6 +- 4 files changed, 32 insertions(+), 53 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h index ca5b3bb07652bb..f11504f8fc74c5 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window.h @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -354,15 +355,10 @@ struct LeadLagData { static constexpr bool result_nullable = result_is_nullable; void reset() { _data_value.reset(); - _default_value.reset(); _is_inited = false; + _offset_value = 0; } - bool default_is_null() { return _default_value.is_null(); } - - // here _ptr pointer default column from third - void set_value_from_default() { this->_data_value = _default_value; } - void insert_result_into(IColumn& to) const { if constexpr (result_is_nullable) { if (_data_value.is_null()) { @@ -379,8 +375,6 @@ struct LeadLagData { } } - void set_is_null() { this->_data_value.reset(); } - void set_value(const IColumn** columns, size_t pos) { if constexpr (arg_is_nullable) { if (assert_cast(columns[0]) @@ -394,40 +388,44 @@ struct LeadLagData { _data_value.set_value(columns[0], pos); } - void check_default(const IColumn* column) { - if (!_is_inited) { - if (is_column_nullable(*column)) { - const auto* nullable_column = - assert_cast(column); - if (nullable_column->is_null_at(0)) { - _default_value.reset(); - } else { - _default_value.set_value(nullable_column->get_nested_column_ptr().get(), 0); - } + void set_default_value(const IColumn* column, size_t pos) { + // eg: lead(column, 10, default_value), column size maybe 3 rows + // offset value 10 is from second argument, pos: 11 is calculated as frame_end + pos = pos - _offset_value; + if (is_column_nullable(*column)) { + const auto* nullable_column = + assert_cast(column); + if (nullable_column->is_null_at(pos)) { + this->_data_value.reset(); } else { - _default_value.set_value(column, 0); + this->_data_value.set_value(nullable_column->get_nested_column_ptr().get(), pos); } + } else { + this->_data_value.set_value(column, pos); + } + } + + void offset_value(const IColumn* column) { + if (!_is_inited) { + const auto* column_number = assert_cast(column); + _offset_value = column_number->get_data()[0]; _is_inited = true; } } private: BaseValue _data_value; - BaseValue _default_value; bool _is_inited = false; + int64_t _offset_value = 0; }; template struct WindowFunctionLeadImpl : Data { void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, const IColumn** columns) { - this->check_default(columns[2]); if (frame_end > partition_end) { //output default value, win end is under partition - if (this->default_is_null()) { - this->set_is_null(); - } else { - this->set_value_from_default(); - } + this->offset_value(columns[1]); + this->set_default_value(columns[2], frame_end - 1); return; } this->set_value(columns, frame_end - 1); @@ -440,13 +438,10 @@ template struct WindowFunctionLagImpl : Data { void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, const IColumn** columns) { - this->check_default(columns[2]); + // window start is beyond partition if (partition_start >= frame_end) { //[unbound preceding(0), offset preceding(-123)] - if (this->default_is_null()) { // win start is beyond partition - this->set_is_null(); - } else { - this->set_value_from_default(); - } + this->offset_value(columns[1]); + this->set_default_value(columns[2], frame_end - 1); return; } this->set_value(columns, frame_end - 1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java index fc74cadebfce04..e9711758917897 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java @@ -68,11 +68,6 @@ public Expression getOffset() { return child(1); } - public Expression getDefaultValue() { - Preconditions.checkArgument(children.size() == 3); - return child(2); - } - @Override public boolean nullable() { if (children.size() == 3 && child(2).nullable()) { @@ -99,7 +94,7 @@ public void checkLegalityBeforeTypeCoercion() { return; } if (children().size() >= 2) { - checkValidParams(getOffset(), true); + checkValidParams(getOffset()); if (getOffset() instanceof Literal) { if (((Literal) getOffset()).getDouble() < 0) { throw new AnalysisException( @@ -109,9 +104,6 @@ public void checkLegalityBeforeTypeCoercion() { throw new AnalysisException( "The offset parameter of LAG must be a constant positive integer: " + this.toSql()); } - if (children().size() >= 3) { - checkValidParams(getDefaultValue(), false); - } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java index 251141a68cb222..6434e7849cc263 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java @@ -70,11 +70,6 @@ public Expression getOffset() { return child(1); } - public Expression getDefaultValue() { - Preconditions.checkArgument(children.size() == 3); - return child(2); - } - @Override public boolean nullable() { if (children.size() == 3 && child(2).nullable()) { @@ -94,7 +89,7 @@ public void checkLegalityBeforeTypeCoercion() { return; } if (children().size() >= 2) { - checkValidParams(getOffset(), true); + checkValidParams(getOffset()); if (getOffset() instanceof Literal) { if (((Literal) getOffset()).getDouble() < 0) { throw new AnalysisException( @@ -104,9 +99,6 @@ public void checkLegalityBeforeTypeCoercion() { throw new AnalysisException( "The offset parameter of LAG must be a constant positive integer: " + this.toSql()); } - if (children().size() >= 3) { - checkValidParams(getDefaultValue(), false); - } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/WindowFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/WindowFunction.java index 1265f685b26e69..13b4ec58476849 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/WindowFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/WindowFunction.java @@ -59,14 +59,14 @@ public int computeHashCode() { /** * LAG/LEAD param must be const, and offset must be number */ - protected void checkValidParams(Expression param, boolean isOffset) { + protected void checkValidParams(Expression param) { DataType type = param.getDataType(); - if (isOffset == true && !type.isNumericType()) { + if (!type.isNumericType()) { throw new AnalysisException("The offset of LAG/LEAD must be a number: " + this.toSql()); } if (!param.isConstant()) { throw new AnalysisException( - "The parameter 2 or parameter 3 of LAG/LEAD must be a constant value: " + this.toSql()); + "The parameter 2 of LAG/LEAD must be a constant value: " + this.toSql()); } } } From 182a8bab04b91279da0f5ad6ce3029b5313e44ca Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Mon, 24 Mar 2025 11:42:30 +0800 Subject: [PATCH 2/3] [improve](function) support lead/lag function input column as third params --- .../aggregate_function_window.h | 25 ++++++------ .../correctness_p0/test_lag_lead_window.out | 11 ++++++ .../test_lag_lead_window.groovy | 39 +++++++++++++++++++ 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h index f11504f8fc74c5..8dec2b447b8df4 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window.h @@ -21,14 +21,11 @@ #pragma once #include -#include -#include #include #include #include #include -#include #include "gutil/integral_types.h" #include "vec/aggregate_functions/aggregate_function.h" @@ -388,10 +385,8 @@ struct LeadLagData { _data_value.set_value(columns[0], pos); } - void set_default_value(const IColumn* column, size_t pos) { - // eg: lead(column, 10, default_value), column size maybe 3 rows - // offset value 10 is from second argument, pos: 11 is calculated as frame_end - pos = pos - _offset_value; + void set_value_from_default(const IColumn* column, size_t pos) { + DCHECK_GE(pos, 0); if (is_column_nullable(*column)) { const auto* nullable_column = assert_cast(column); @@ -405,7 +400,7 @@ struct LeadLagData { } } - void offset_value(const IColumn* column) { + void set_offset_value(const IColumn* column) { if (!_is_inited) { const auto* column_number = assert_cast(column); _offset_value = column_number->get_data()[0]; @@ -413,6 +408,8 @@ struct LeadLagData { } } + int64_t get_offset_value() const { return _offset_value; } + private: BaseValue _data_value; bool _is_inited = false; @@ -424,8 +421,11 @@ struct WindowFunctionLeadImpl : Data { void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, const IColumn** columns) { if (frame_end > partition_end) { //output default value, win end is under partition - this->offset_value(columns[1]); - this->set_default_value(columns[2], frame_end - 1); + this->set_offset_value(columns[1]); + // eg: lead(column, 10, default_value), column size maybe 3 rows + // offset value 10 is from second argument, pos: 11 is calculated as frame_end + auto pos = frame_end - 1 - this->get_offset_value(); + this->set_value_from_default(columns[2], pos); return; } this->set_value(columns, frame_end - 1); @@ -440,8 +440,9 @@ struct WindowFunctionLagImpl : Data { int64_t frame_end, const IColumn** columns) { // window start is beyond partition if (partition_start >= frame_end) { //[unbound preceding(0), offset preceding(-123)] - this->offset_value(columns[1]); - this->set_default_value(columns[2], frame_end - 1); + this->set_offset_value(columns[1]); + auto pos = frame_end - 1 + this->get_offset_value(); + this->set_value_from_default(columns[2], pos); return; } this->set_value(columns, frame_end - 1); diff --git a/regression-test/data/correctness_p0/test_lag_lead_window.out b/regression-test/data/correctness_p0/test_lag_lead_window.out index 041314a1c6535d..f9927b708c9130 100644 --- a/regression-test/data/correctness_p0/test_lag_lead_window.out +++ b/regression-test/data/correctness_p0/test_lag_lead_window.out @@ -9,6 +9,11 @@ a aa /wyyt-image/2021/11/13/595345040188712460.jpg b aa /wyyt-image/2022/04/13/1434607674511761493.jpg /wyyt-image/2022/04/13/1434607674511761493.jpg c cc /wyyt-image/2022/04/13/1434607674511761493.jpg +-- !select_default3 -- +a /wyyt-image/2021/11/13/595345040188712460.jpg aa aa aa +b /wyyt-image/2022/04/13/1434607674511761493.jpg aa /wyyt-image/2022/04/13/1434607674511761493.jpg aa +c /wyyt-image/2022/04/13/1434607674511761493.jpg cc cc /wyyt-image/2022/04/13/1434607674511761493.jpg + -- !select_default -- c 2022-09-06T00:00:02 2022-09-06T00:00:01 b 2022-09-06T00:00:01 2022-09-06T00:00 @@ -49,3 +54,9 @@ a 2022-09-06T00:00 \N b 2022-09-06T00:00:01 \N c 2022-09-06T00:00:02 \N +-- !select_lead_7 -- +2023-01-01 1 10 20 10 +2023-01-02 1 20 30 10 +2023-01-03 1 30 \N 20 +2023-01-04 1 \N \N 30 + diff --git a/regression-test/suites/correctness_p0/test_lag_lead_window.groovy b/regression-test/suites/correctness_p0/test_lag_lead_window.groovy index 1dfccca58ee6f9..0cf731e7d9ea5a 100644 --- a/regression-test/suites/correctness_p0/test_lag_lead_window.groovy +++ b/regression-test/suites/correctness_p0/test_lag_lead_window.groovy @@ -41,6 +41,12 @@ suite("test_lag_lead_window") { lead(cc,1,'') over (PARTITION by cc order by aa) as lead_cc from ${tableName} order by aa; """ + + qt_select_default3 """ select aa,cc,bb,lead(cc,1,bb) over (PARTITION by cc order by aa) as lead_res, + lag(cc,1,bb) over (PARTITION by cc order by aa) as lag_res + from ${tableName} + order by aa; """ + sql """ DROP TABLE IF EXISTS test1 """ sql """ CREATE TABLE IF NOT EXISTS test1 (id varchar(255), create_time datetime) DISTRIBUTED BY HASH(id) PROPERTIES("replication_num" = "1"); """ @@ -61,4 +67,37 @@ suite("test_lag_lead_window") { sql """ DROP TABLE IF EXISTS test1 """ + + qt_select_lead_7 """ SELECT + sale_date, + product_id, + quantity, + LEAD (quantity, 1, quantity) OVER ( + PARTITION BY + product_id + ORDER BY + sale_date + ) AS next_day_quantity, + LAG (quantity, 1, quantity) OVER ( + PARTITION BY + product_id + ORDER BY + sale_date + ) AS pre_day_quantity + FROM + ( + select 1 AS product_id, '2023-01-01' AS sale_date, 10 AS quantity + UNION ALL + select 1, '2023-01-02', 20 + UNION ALL + select 1, '2023-01-03', 30 + UNION ALL + select 1, '2023-01-04', NULL + ) AS t + ORDER BY + product_id, + sale_date; + """ + + } From bfbf7ebcc9dfcdfe08b361e96428e16867def5db Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Mon, 24 Mar 2025 13:01:01 +0800 Subject: [PATCH 3/3] update --- .../nereids/trees/expressions/functions/window/Lag.java | 5 +++++ .../nereids/trees/expressions/functions/window/Lead.java | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java index e9711758917897..58ff2b820f03f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java @@ -68,6 +68,11 @@ public Expression getOffset() { return child(1); } + public Expression getDefaultValue() { + Preconditions.checkArgument(children.size() == 3); + return child(2); + } + @Override public boolean nullable() { if (children.size() == 3 && child(2).nullable()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java index 6434e7849cc263..b0de4ad571b8e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java @@ -70,6 +70,11 @@ public Expression getOffset() { return child(1); } + public Expression getDefaultValue() { + Preconditions.checkArgument(children.size() == 3); + return child(2); + } + @Override public boolean nullable() { if (children.size() == 3 && child(2).nullable()) {