From b381614ee150532febe44f5a5370bed493088329 Mon Sep 17 00:00:00 2001 From: WangCong <1018957763@qq.com> Date: Sun, 13 Oct 2019 14:41:49 +0800 Subject: [PATCH 1/2] Fix UDAF variable arguments support bug --- be/src/exprs/agg_fn.cc | 3 +- be/src/exprs/agg_fn.h | 6 + be/src/exprs/new_agg_fn_evaluator.cc | 198 ++++++++++++++++++++------- 3 files changed, 154 insertions(+), 53 deletions(-) diff --git a/be/src/exprs/agg_fn.cc b/be/src/exprs/agg_fn.cc index 8d194745cef808..0c561fe133719b 100644 --- a/be/src/exprs/agg_fn.cc +++ b/be/src/exprs/agg_fn.cc @@ -35,7 +35,8 @@ AggFn::AggFn(const TExprNode& tnode, const SlotDescriptor& intermediate_slot_des : Expr(tnode), is_merge_(tnode.agg_expr.is_merge_agg), intermediate_slot_desc_(intermediate_slot_desc), - output_slot_desc_(output_slot_desc) { + output_slot_desc_(output_slot_desc), + _vararg_start_idx(tnode.__isset.vararg_start_idx ? tnode.vararg_start_idx : -1) { // TODO(pengyubing) arg_type_descs_ is used for codegen // arg_type_descs_(AnyValUtil::column_type_to_type_desc( // TypeDescriptor::from_thrift(tnode.agg_expr.arg_types))) { diff --git a/be/src/exprs/agg_fn.h b/be/src/exprs/agg_fn.h index 602728ec09d877..bed805d5b4611c 100644 --- a/be/src/exprs/agg_fn.h +++ b/be/src/exprs/agg_fn.h @@ -150,6 +150,10 @@ class AggFn : public Expr { virtual std::string DebugString() const; static std::string DebugString(const std::vector& exprs); + const int get_vararg_start_idx() const { + return _vararg_start_idx; + } + private: friend class Expr; friend class NewAggFnEvaluator; @@ -178,6 +182,8 @@ class AggFn : public Expr { void* serialize_fn_ = nullptr; void* get_value_fn_ = nullptr; void* finalize_fn_ = nullptr; + + int _vararg_start_idx; AggFn(const TExprNode& node, const SlotDescriptor& intermediate_slot_desc, const SlotDescriptor& output_slot_desc); diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc index bfc7a2fa80257f..9bb1d4baa2a871 100644 --- a/be/src/exprs/new_agg_fn_evaluator.cc +++ b/be/src/exprs/new_agg_fn_evaluator.cc @@ -66,6 +66,25 @@ typedef void (*UpdateFn7)(FunctionContext*, const AnyVal&, const AnyVal&, typedef void (*UpdateFn8)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); + +typedef void (*VarargUpdateFn0)(FunctionContext*, int num_varargs, const AnyVal*, AnyVal*); +typedef void (*VarargUpdateFn1)(FunctionContext*, const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); +typedef void (*VarargUpdateFn2)(FunctionContext*, const AnyVal&, const AnyVal&, int num_varargs, + const AnyVal*, AnyVal*); +typedef void (*VarargUpdateFn3)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + int num_varargs, const AnyVal*, AnyVal*); +typedef void (*VarargUpdateFn4)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); +typedef void (*VarargUpdateFn5)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); +typedef void (*VarargUpdateFn6)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, const AnyVal&, const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); +typedef void (*VarargUpdateFn7)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); +typedef void (*VarargUpdateFn8)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, int num_varargs, + const AnyVal*, AnyVal*); + typedef StringVal (*SerializeFn)(FunctionContext*, const StringVal&); typedef AnyVal (*GetValueFn)(FunctionContext*, const AnyVal&); typedef AnyVal (*FinalizeFn)(FunctionContext*, const AnyVal&); @@ -394,58 +413,133 @@ void NewAggFnEvaluator::Update(const TupleRow* row, Tuple* dst, void* fn) { // TODO: this part is not so good and not scalable. It can be replaced with // codegen but we can also consider leaving it for the first few cases for // debugging. - switch (input_evals_.size()) { - case 0: - reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - break; - case 1: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], staging_intermediate_val_); - break; - case 2: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], staging_intermediate_val_); - break; - case 3: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], staging_intermediate_val_); - break; - case 4: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], staging_intermediate_val_); - break; - case 5: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], staging_intermediate_val_); - break; - case 6: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], *staging_input_vals_[5], staging_intermediate_val_); - break; - case 7: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], *staging_input_vals_[5], - *staging_input_vals_[6], staging_intermediate_val_); - break; - case 8: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], *staging_input_vals_[5], - *staging_input_vals_[6], *staging_input_vals_[7], - staging_intermediate_val_); - break; - default: - DCHECK(false) << "NYI"; - } + if (agg_fn_.get_vararg_start_idx() == -1) { + switch (input_evals_.size()) { + case 0: + reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + break; + case 1: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], staging_intermediate_val_); + break; + case 2: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + staging_intermediate_val_); + break; + case 3: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], staging_intermediate_val_); + break; + case 4: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], + staging_intermediate_val_); + break; + case 5: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], + *staging_input_vals_[4], staging_intermediate_val_); + break; + case 6: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], + *staging_input_vals_[4], *staging_input_vals_[5], + staging_intermediate_val_); + break; + case 7: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], + *staging_input_vals_[4], *staging_input_vals_[5], + *staging_input_vals_[6], staging_intermediate_val_); + break; + case 8: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], + *staging_input_vals_[4], *staging_input_vals_[5], + *staging_input_vals_[6], *staging_input_vals_[7], + staging_intermediate_val_); + break; + default: + DCHECK(false) << "NYI"; + } + } else { + int num_varargs = input_evals_.size() - agg_fn_.get_vararg_start_idx(); + const AnyVal* varargs = *(staging_input_vals_.data() + agg_fn_.get_vararg_start_idx()); + switch (agg_fn_.get_vararg_start_idx()) { + case 0: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + num_varargs, varargs, + staging_intermediate_val_); + break; + case 1: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], + num_varargs, varargs, + staging_intermediate_val_); + break; + case 2: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + num_varargs, varargs, + staging_intermediate_val_); + break; + case 3: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], + num_varargs, varargs, + staging_intermediate_val_); + break; + case 4: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], + num_varargs, varargs, + staging_intermediate_val_); + break; + case 5: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], + *staging_input_vals_[4], + num_varargs, varargs, + staging_intermediate_val_); + break; + case 6: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], + *staging_input_vals_[4], *staging_input_vals_[5], + num_varargs, varargs, + staging_intermediate_val_); + break; + case 7: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], + *staging_input_vals_[4], *staging_input_vals_[5], + *staging_input_vals_[6], + num_varargs, varargs, + staging_intermediate_val_); + break; + case 8: + reinterpret_cast(fn)(agg_fn_ctx_.get(), + *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], + *staging_input_vals_[4], *staging_input_vals_[5], + *staging_input_vals_[6], *staging_input_vals_[7], + num_varargs, varargs, + staging_intermediate_val_); + break; + } + } SetDstSlot(staging_intermediate_val_, slot_desc, dst); } From 125c6a0dd89f7fd46de20acd1c16fff49d47e41f Mon Sep 17 00:00:00 2001 From: WangCong <1018957763@qq.com> Date: Sun, 13 Oct 2019 15:39:27 +0800 Subject: [PATCH 2/2] add missing default case --- be/src/exprs/new_agg_fn_evaluator.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc index 9bb1d4baa2a871..d92eabf24ea08d 100644 --- a/be/src/exprs/new_agg_fn_evaluator.cc +++ b/be/src/exprs/new_agg_fn_evaluator.cc @@ -538,6 +538,8 @@ void NewAggFnEvaluator::Update(const TupleRow* row, Tuple* dst, void* fn) { num_varargs, varargs, staging_intermediate_val_); break; + default: + DCHECK(false) << "NYI"; } } SetDstSlot(staging_intermediate_val_, slot_desc, dst);