From ede7b50496dea89f9a778ddaf7ae91c11b7087d6 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Fri, 3 Mar 2023 12:08:42 +0800 Subject: [PATCH 1/3] [refactor][functioncontext] remove duplicate type definition in function context --- be/src/exprs/function_filter.h | 8 +- be/src/exprs/json_functions.h | 2 - be/src/exprs/math_functions.h | 4 +- be/src/exprs/string_functions.h | 2 +- be/src/olap/like_column_predicate.cpp | 11 +- be/src/olap/like_column_predicate.h | 4 +- be/src/olap/olap_define.h | 25 -- be/src/olap/rowset/beta_rowset_writer.cpp | 2 +- be/src/runtime/buffer_control_block.cpp | 2 + be/src/runtime/collection_value.h | 6 +- be/src/runtime/datetime_value.h | 4 +- be/src/runtime/decimalv2_value.h | 2 - be/src/runtime/descriptor_helper.h | 9 +- be/src/runtime/descriptors.cpp | 50 +-- be/src/runtime/descriptors.h | 23 -- be/src/runtime/primitive_type.cpp | 127 ------- be/src/runtime/primitive_type.h | 5 - be/src/runtime/types.cpp | 1 + be/src/runtime/types.h | 10 +- be/src/udf/CMakeLists.txt | 2 +- be/src/udf/uda_test_harness.h | 270 --------------- be/src/udf/udf.cpp | 37 +- be/src/udf/udf.h | 85 +---- be/src/udf/udf_debug.h | 50 --- be/src/udf/udf_internal.h | 20 +- be/src/util/bitmap_value.h | 16 +- be/src/util/counts.h | 6 +- be/src/vec/common/string_ref.h | 1 - .../vec/exec/format/parquet/schema_desc.cpp | 1 + be/src/vec/exec/scan/new_es_scanner.cpp | 5 +- be/src/vec/exec/scan/new_olap_scan_node.cpp | 4 +- be/src/vec/exec/scan/new_olap_scan_node.h | 2 +- be/src/vec/exec/scan/new_olap_scanner.cpp | 5 +- be/src/vec/exec/scan/vfile_scanner.cpp | 6 +- be/src/vec/exec/scan/vmeta_scanner.cpp | 5 +- be/src/vec/exec/scan/vscan_node.cpp | 4 +- be/src/vec/exec/scan/vscan_node.h | 2 +- be/src/vec/exec/scan/vscanner.cpp | 8 + be/src/vec/exec/scan/vscanner.h | 4 +- be/src/vec/exprs/vexpr.cpp | 127 +------ be/src/vec/exprs/vexpr.h | 1 - be/src/vec/exprs/vexpr_context.cpp | 14 +- be/src/vec/exprs/vexpr_context.h | 17 +- be/src/vec/functions/function_timestamp.cpp | 2 +- be/src/vec/functions/in.h | 11 +- be/src/vec/runtime/vdatetime_value.h | 7 +- be/test/CMakeLists.txt | 6 +- be/test/runtime/datetime_value_test.cpp | 4 +- be/test/testutil/desc_tbl_builder.cc | 14 +- be/test/testutil/function_utils.cpp | 9 +- be/test/testutil/function_utils.h | 9 +- be/test/udf/uda_test.cpp | 315 ------------------ be/test/udf/udf_test.cpp | 193 ----------- be/test/util/counts_test.cpp | 4 +- be/test/vec/function/function_test_util.cpp | 36 +- be/test/vec/function/function_test_util.h | 20 +- gensrc/proto/descriptors.proto | 2 +- 57 files changed, 176 insertions(+), 1445 deletions(-) delete mode 100644 be/src/udf/uda_test_harness.h delete mode 100644 be/src/udf/udf_debug.h delete mode 100644 be/test/udf/uda_test.cpp delete mode 100644 be/test/udf/udf_test.cpp diff --git a/be/src/exprs/function_filter.h b/be/src/exprs/function_filter.h index 84758e793c956c..ba240498a564cd 100644 --- a/be/src/exprs/function_filter.h +++ b/be/src/exprs/function_filter.h @@ -25,8 +25,8 @@ namespace doris { class FunctionFilter { public: - FunctionFilter(bool opposite, const std::string& col_name, doris_udf::FunctionContext* fn_ctx, - doris_udf::StringVal string_param) + FunctionFilter(bool opposite, const std::string& col_name, doris::FunctionContext* fn_ctx, + doris::StringVal string_param) : _opposite(opposite), _col_name(col_name), _fn_ctx(fn_ctx), @@ -35,8 +35,8 @@ class FunctionFilter { bool _opposite; std::string _col_name; // these pointer's life time controlled by scan node - doris_udf::FunctionContext* _fn_ctx; - doris_udf::StringVal + doris::FunctionContext* _fn_ctx; + doris::StringVal _string_param; // only one param from conjunct, because now only support like predicate }; diff --git a/be/src/exprs/json_functions.h b/be/src/exprs/json_functions.h index a070b136b5c505..32a2f93814f18b 100644 --- a/be/src/exprs/json_functions.h +++ b/be/src/exprs/json_functions.h @@ -70,8 +70,6 @@ struct JsonPath { } }; -using namespace doris_udf; - class JsonFunctions { public: /** diff --git a/be/src/exprs/math_functions.h b/be/src/exprs/math_functions.h index bfddcf43fdf781..8d1563a17ec78d 100644 --- a/be/src/exprs/math_functions.h +++ b/be/src/exprs/math_functions.h @@ -32,8 +32,8 @@ class MathFunctions { // Converts src_num in decimal to dest_base, // and fills expr_val.string_val with the result. - static doris_udf::StringVal decimal_to_base(doris_udf::FunctionContext* ctx, int64_t src_num, - int8_t dest_base); + static doris::StringVal decimal_to_base(doris::FunctionContext* ctx, int64_t src_num, + int8_t dest_base); // Converts src_num representing a number in src_base but encoded in decimal // into its actual decimal number. diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index 373686dbb64c51..14515b8dc560c4 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -34,7 +34,7 @@ namespace doris { class StringFunctions { public: - static bool set_re2_options(const doris_udf::StringVal& match_parameter, std::string* error_str, + static bool set_re2_options(const doris::StringVal& match_parameter, std::string* error_str, re2::RE2::Options* opts); // The caller owns the returned regex. Returns nullptr if the pattern could not be compiled. diff --git a/be/src/olap/like_column_predicate.cpp b/be/src/olap/like_column_predicate.cpp index cb5a127f82135c..d2057534c90a04 100644 --- a/be/src/olap/like_column_predicate.cpp +++ b/be/src/olap/like_column_predicate.cpp @@ -25,21 +25,20 @@ namespace doris { template <> LikeColumnPredicate::LikeColumnPredicate(bool opposite, uint32_t column_id, - doris_udf::FunctionContext* fn_ctx, - doris_udf::StringVal val) + doris::FunctionContext* fn_ctx, doris::StringVal val) : ColumnPredicate(column_id, opposite), pattern(reinterpret_cast(val.ptr), val.len) { _state = reinterpret_cast( - fn_ctx->get_function_state(doris_udf::FunctionContext::THREAD_LOCAL)); + fn_ctx->get_function_state(doris::FunctionContext::THREAD_LOCAL)); _state->search_state.clone(_like_state); } template <> LikeColumnPredicate::LikeColumnPredicate(bool opposite, uint32_t column_id, - doris_udf::FunctionContext* fn_ctx, - doris_udf::StringVal val) + doris::FunctionContext* fn_ctx, + doris::StringVal val) : ColumnPredicate(column_id, opposite), pattern(val) { _state = reinterpret_cast( - fn_ctx->get_function_state(doris_udf::FunctionContext::THREAD_LOCAL)); + fn_ctx->get_function_state(doris::FunctionContext::THREAD_LOCAL)); } template diff --git a/be/src/olap/like_column_predicate.h b/be/src/olap/like_column_predicate.h index a38f9228fb07ee..941c00723df8cd 100644 --- a/be/src/olap/like_column_predicate.h +++ b/be/src/olap/like_column_predicate.h @@ -28,8 +28,8 @@ namespace doris { template class LikeColumnPredicate : public ColumnPredicate { public: - LikeColumnPredicate(bool opposite, uint32_t column_id, doris_udf::FunctionContext* fn_ctx, - doris_udf::StringVal val); + LikeColumnPredicate(bool opposite, uint32_t column_id, doris::FunctionContext* fn_ctx, + doris::StringVal val); ~LikeColumnPredicate() override = default; PredicateType type() const override { return PredicateType::EQ; } diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index fbc4f245572d29..ecfceda960eb2d 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -125,8 +125,6 @@ static const uint64_t GB_EXCHANGE_BYTE = 1024 * 1024 * 1024; // bloom filter fpp static const double BLOOM_FILTER_DEFAULT_FPP = 0.05; -#define OLAP_GOTO(label) goto label - enum ColumnFamilyIndex { DEFAULT_COLUMN_FAMILY_INDEX = 0, DORIS_COLUMN_FAMILY_INDEX, @@ -186,29 +184,6 @@ const std::string REMOTE_TABLET_GC_PREFIX = "tgc_"; type_t(const type_t&); #endif -// 没有使用的变量不报warning -#define OLAP_UNUSED_ARG(a) (void)(a) - -// thread-safe(gcc only) method for obtaining singleton -#define DECLARE_SINGLETON(classname) \ -public: \ - static classname* instance() { \ - classname* p_instance = nullptr; \ - try { \ - static classname s_instance; \ - p_instance = &s_instance; \ - } catch (...) { \ - p_instance = nullptr; \ - } \ - return p_instance; \ - } \ - \ -protected: \ - classname(); \ - \ -private: \ - ~classname(); - #define SAFE_DELETE(ptr) \ do { \ if (nullptr != ptr) { \ diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 684d42a1f829ef..02e4d318419002 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -66,7 +66,7 @@ BetaRowsetWriter::~BetaRowsetWriter() { * when the job is cancelled. Although it is meaningless to continue segcompaction when the job * is cancelled, the objects involved in the job should be preserved during segcompaction to * avoid crashs for memory issues. */ - OLAP_UNUSED_ARG(_wait_flying_segcompaction()); + _wait_flying_segcompaction(); // TODO(lingbin): Should wrapper exception logic, no need to know file ops directly. if (!_already_built) { // abnormal exit, remove all files generated diff --git a/be/src/runtime/buffer_control_block.cpp b/be/src/runtime/buffer_control_block.cpp index 25c268aaf2dc99..7fa0c50923d546 100644 --- a/be/src/runtime/buffer_control_block.cpp +++ b/be/src/runtime/buffer_control_block.cpp @@ -19,7 +19,9 @@ #include "gen_cpp/PaloInternalService_types.h" #include "gen_cpp/internal_service.pb.h" +#include "runtime/exec_env.h" #include "runtime/raw_value.h" +#include "runtime/thread_context.h" #include "service/brpc.h" #include "util/thrift_util.h" diff --git a/be/src/runtime/collection_value.h b/be/src/runtime/collection_value.h index 15e29b88f86daf..6b59066f97c340 100644 --- a/be/src/runtime/collection_value.h +++ b/be/src/runtime/collection_value.h @@ -21,13 +21,13 @@ #include "runtime/primitive_type.h" -namespace doris_udf { +namespace doris { class FunctionContext; -} // namespace doris_udf +} // namespace doris namespace doris { -using doris_udf::FunctionContext; +using doris::FunctionContext; using MemFootprint = std::pair; using GenMemFootprintFunc = std::function; diff --git a/be/src/runtime/datetime_value.h b/be/src/runtime/datetime_value.h index 1e3ee94c596d6a..057bba982b147e 100644 --- a/be/src/runtime/datetime_value.h +++ b/be/src/runtime/datetime_value.h @@ -527,12 +527,12 @@ class DateTimeValue { DateTimeValue& operator--() { return *this += -1; } - void to_datetime_val(doris_udf::DateTimeVal* tv) const { + void to_datetime_val(doris::DateTimeVal* tv) const { tv->packed_time = to_int64_datetime_packed(); tv->type = _type; } - static DateTimeValue from_datetime_val(const doris_udf::DateTimeVal& tv) { + static DateTimeValue from_datetime_val(const doris::DateTimeVal& tv) { DateTimeValue value; value.from_packed_time(tv.packed_time); if (tv.type == TIME_DATE) { diff --git a/be/src/runtime/decimalv2_value.h b/be/src/runtime/decimalv2_value.h index 9d670b26ea3cb4..44d93c7e1128bd 100644 --- a/be/src/runtime/decimalv2_value.h +++ b/be/src/runtime/decimalv2_value.h @@ -47,8 +47,6 @@ enum DecimalError { enum DecimalRoundMode { HALF_UP = 1, HALF_EVEN = 2, CEILING = 3, FLOOR = 4, TRUNCATE = 5 }; -using namespace doris_udf; - class DecimalV2Value { public: friend DecimalV2Value operator+(const DecimalV2Value& v1, const DecimalV2Value& v2); diff --git a/be/src/runtime/descriptor_helper.h b/be/src/runtime/descriptor_helper.h index 13892d154aa3c8..ba3f17c57ac13a 100644 --- a/be/src/runtime/descriptor_helper.h +++ b/be/src/runtime/descriptor_helper.h @@ -117,17 +117,11 @@ class TTupleDescriptorBuilder { } } int null_bytes = (num_nullables + 7) / 8; - int offset = null_bytes; int null_offset = 0; for (int i = 0; i < _slot_descs.size(); ++i) { auto& slot_desc = _slot_descs[i]; - int size = get_slot_size(thrift_to_type(slot_desc.slotType.types[0].scalar_type.type)); - int align = (size > 16) ? 16 : size; - offset = ((offset + align - 1) / align) * align; slot_desc.id = tb->next_slot_id(); slot_desc.parent = _tuple_id; - slot_desc.byteOffset = offset; - offset += size; if (slot_desc.nullIndicatorByte >= 0) { slot_desc.nullIndicatorBit = null_offset % 8; slot_desc.nullIndicatorByte = null_offset / 8; @@ -140,7 +134,8 @@ class TTupleDescriptorBuilder { } _tuple_desc.id = _tuple_id; - _tuple_desc.byteSize = offset; + // Useless not set it. + _tuple_desc.byteSize = 0; _tuple_desc.numNullBytes = null_bytes; _tuple_desc.numNullSlots = _slot_descs.size(); diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp index 01ed59eaac178e..1658a3bf6c7246 100644 --- a/be/src/runtime/descriptors.cpp +++ b/be/src/runtime/descriptors.cpp @@ -55,13 +55,11 @@ SlotDescriptor::SlotDescriptor(const TSlotDescriptor& tdesc) _type(TypeDescriptor::from_thrift(tdesc.slotType)), _parent(tdesc.parent), _col_pos(tdesc.columnPos), - _tuple_offset(tdesc.byteOffset), _null_indicator_offset(tdesc.nullIndicatorByte, tdesc.nullIndicatorBit), _col_name(tdesc.colName), _col_name_lower_case(to_lower(tdesc.colName)), _col_unique_id(tdesc.col_unique_id), _slot_idx(tdesc.slotIdx), - _slot_size(_type.get_slot_size()), _field_idx(-1), _is_materialized(tdesc.isMaterialized), _is_key(tdesc.is_key), @@ -72,13 +70,11 @@ SlotDescriptor::SlotDescriptor(const PSlotDescriptor& pdesc) _type(TypeDescriptor::from_protobuf(pdesc.slot_type())), _parent(pdesc.parent()), _col_pos(pdesc.column_pos()), - _tuple_offset(pdesc.byte_offset()), _null_indicator_offset(pdesc.null_indicator_byte(), pdesc.null_indicator_bit()), _col_name(pdesc.col_name()), _col_name_lower_case(to_lower(pdesc.col_name())), _col_unique_id(pdesc.col_unique_id()), _slot_idx(pdesc.slot_idx()), - _slot_size(_type.get_slot_size()), _field_idx(-1), _is_materialized(pdesc.is_materialized()), _is_key(pdesc.is_key()), @@ -89,7 +85,7 @@ void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) const { pslot->set_parent(_parent); _type.to_protobuf(pslot->mutable_slot_type()); pslot->set_column_pos(_col_pos); - pslot->set_byte_offset(_tuple_offset); + pslot->set_byte_offset(0); pslot->set_null_indicator_byte(_null_indicator_offset.byte_offset); pslot->set_null_indicator_bit(_null_indicator_offset.bit_offset); DCHECK_LE(_null_indicator_offset.bit_offset, 8); @@ -115,8 +111,7 @@ vectorized::DataTypePtr SlotDescriptor::get_data_type_ptr() const { std::string SlotDescriptor::debug_string() const { std::stringstream out; out << "Slot(id=" << _id << " type=" << _type << " col=" << _col_pos - << ", colname=" << _col_name << " offset=" << _tuple_offset - << " null=" << _null_indicator_offset.debug_string() << ")"; + << ", colname=" << _col_name << " null=" << _null_indicator_offset.debug_string() << ")"; return out.str(); } @@ -256,7 +251,6 @@ std::string JdbcTableDescriptor::debug_string() const { TupleDescriptor::TupleDescriptor(const TTupleDescriptor& tdesc, bool own_slots) : _id(tdesc.id), _table_desc(nullptr), - _byte_size(tdesc.byteSize), _num_null_bytes(tdesc.numNullBytes), _num_materialized_slots(0), _slots(), @@ -273,7 +267,6 @@ TupleDescriptor::TupleDescriptor(const TTupleDescriptor& tdesc, bool own_slots) TupleDescriptor::TupleDescriptor(const PTupleDescriptor& pdesc, bool own_slots) : _id(pdesc.id()), _table_desc(nullptr), - _byte_size(pdesc.byte_size()), _num_null_bytes(pdesc.num_null_bytes()), _num_materialized_slots(0), _slots(), @@ -313,22 +306,11 @@ std::vector TupleDescriptor::slots_ordered_by_idx() const { return sorted_slots; } -bool TupleDescriptor::layout_equals(const TupleDescriptor& other_desc) const { - if (byte_size() != other_desc.byte_size()) return false; - if (slots().size() != other_desc.slots().size()) return false; - - std::vector slots = slots_ordered_by_idx(); - std::vector other_slots = other_desc.slots_ordered_by_idx(); - for (int i = 0; i < slots.size(); ++i) { - if (!slots[i]->layout_equals(*other_slots[i])) return false; - } - return true; -} - void TupleDescriptor::to_protobuf(PTupleDescriptor* ptuple) const { ptuple->Clear(); ptuple->set_id(_id); - ptuple->set_byte_size(_byte_size); + // Useless not set + ptuple->set_byte_size(0); ptuple->set_num_null_bytes(_num_null_bytes); ptuple->set_table_id(-1); ptuple->set_num_null_slots(_num_null_slots); @@ -336,7 +318,7 @@ void TupleDescriptor::to_protobuf(PTupleDescriptor* ptuple) const { std::string TupleDescriptor::debug_string() const { std::stringstream out; - out << "Tuple(id=" << _id << " size=" << _byte_size; + out << "Tuple(id=" << _id; if (_table_desc != nullptr) { //out << " " << _table_desc->debug_string(); } @@ -501,19 +483,6 @@ bool RowDescriptor::equals(const RowDescriptor& other_desc) const { return true; } -bool RowDescriptor::layout_is_prefix_of(const RowDescriptor& other_desc) const { - if (_tuple_desc_map.size() > other_desc._tuple_desc_map.size()) return false; - for (int i = 0; i < _tuple_desc_map.size(); ++i) { - if (!_tuple_desc_map[i]->layout_equals(*other_desc._tuple_desc_map[i])) return false; - } - return true; -} - -bool RowDescriptor::layout_equals(const RowDescriptor& other_desc) const { - if (_tuple_desc_map.size() != other_desc._tuple_desc_map.size()) return false; - return layout_is_prefix_of(other_desc); -} - std::string RowDescriptor::debug_string() const { std::stringstream ss; @@ -674,15 +643,6 @@ SlotDescriptor* DescriptorTbl::get_slot_descriptor(SlotId id) const { } } -bool SlotDescriptor::layout_equals(const SlotDescriptor& other_desc) const { - if (type().type != other_desc.type().type) return false; - if (is_nullable() != other_desc.is_nullable()) return false; - if (slot_size() != other_desc.slot_size()) return false; - if (tuple_offset() != other_desc.tuple_offset()) return false; - if (!null_indicator_offset().equals(other_desc.null_indicator_offset())) return false; - return true; -} - std::string DescriptorTbl::debug_string() const { std::stringstream out; out << "tuples:\n"; diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index fd75a5ad032987..e03aaac7f4bca3 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -93,20 +93,13 @@ class SlotDescriptor { int col_pos() const { return _col_pos; } // Returns the field index in the generated llvm struct for this slot's tuple int field_idx() const { return _field_idx; } - int tuple_offset() const { return _tuple_offset; } const NullIndicatorOffset& null_indicator_offset() const { return _null_indicator_offset; } bool is_materialized() const { return _is_materialized; } bool is_nullable() const { return _null_indicator_offset.bit_mask != 0; } - int slot_size() const { return _slot_size; } - const std::string& col_name() const { return _col_name; } const std::string& col_name_lower_case() const { return _col_name_lower_case; } - /// Return true if the physical layout of this descriptor matches the physical layout - /// of other_desc, but not necessarily ids. - bool layout_equals(const SlotDescriptor& other_desc) const; - void to_protobuf(PSlotDescriptor* pslot) const; std::string debug_string() const; @@ -133,7 +126,6 @@ class SlotDescriptor { const TypeDescriptor _type; const TupleId _parent; const int _col_pos; - const int _tuple_offset; const NullIndicatorOffset _null_indicator_offset; const std::string _col_name; const std::string _col_name_lower_case; @@ -144,9 +136,6 @@ class SlotDescriptor { // this is provided by the FE const int _slot_idx; - // the byte size of this slot. - const int _slot_size; - // the idx of the slot in the llvm codegen'd tuple struct // this is set by TupleDescriptor during codegen and takes into account // leading null bytes. @@ -348,10 +337,6 @@ class TupleDescriptor { TupleId id() const { return _id; } - /// Return true if the physical layout of this descriptor matches that of other_desc, - /// but not necessarily the id. - bool layout_equals(const TupleDescriptor& other_desc) const; - std::string debug_string() const; void to_protobuf(PTupleDescriptor* ptuple) const; @@ -510,14 +495,6 @@ class RowDescriptor { // Return true if the tuple ids of this descriptor match tuple ids of other desc. bool equals(const RowDescriptor& other_desc) const; - /// Return true if the physical layout of this descriptor matches the physical layout - /// of other_desc, but not necessarily the ids. - bool layout_equals(const RowDescriptor& other_desc) const; - - /// Return true if the tuples of this descriptor are a prefix of the tuples of - /// other_desc. Tuples are compared by their physical layout and not by ids. - bool layout_is_prefix_of(const RowDescriptor& other_desc) const; - std::string debug_string() const; int get_column_id(int slot_id) const; diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 7a90ffbcf2d234..7085dce8e7aec9 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -27,71 +27,6 @@ namespace doris { -PrimitiveType convert_type_to_primitive(FunctionContext::Type type) { - switch (type) { - case FunctionContext::Type::INVALID_TYPE: - return PrimitiveType::INVALID_TYPE; - case FunctionContext::Type::TYPE_DOUBLE: - return PrimitiveType::TYPE_DOUBLE; - case FunctionContext::Type::TYPE_NULL: - return PrimitiveType::TYPE_NULL; - case FunctionContext::Type::TYPE_CHAR: - return PrimitiveType::TYPE_CHAR; - case FunctionContext::Type::TYPE_VARCHAR: - return PrimitiveType::TYPE_VARCHAR; - case FunctionContext::Type::TYPE_STRING: - return PrimitiveType::TYPE_STRING; - case FunctionContext::Type::TYPE_DATETIME: - return PrimitiveType::TYPE_DATETIME; - case FunctionContext::Type::TYPE_DECIMALV2: - return PrimitiveType::TYPE_DECIMALV2; - case FunctionContext::Type::TYPE_DECIMAL32: - return PrimitiveType::TYPE_DECIMAL32; - case FunctionContext::Type::TYPE_DECIMAL64: - return PrimitiveType::TYPE_DECIMAL64; - case FunctionContext::Type::TYPE_DECIMAL128I: - return PrimitiveType::TYPE_DECIMAL128I; - case FunctionContext::Type::TYPE_BOOLEAN: - return PrimitiveType::TYPE_BOOLEAN; - case FunctionContext::Type::TYPE_ARRAY: - return PrimitiveType::TYPE_ARRAY; - case FunctionContext::Type::TYPE_MAP: - return PrimitiveType::TYPE_MAP; - case FunctionContext::Type::TYPE_STRUCT: - return PrimitiveType::TYPE_STRUCT; - case FunctionContext::Type::TYPE_OBJECT: - return PrimitiveType::TYPE_OBJECT; - case FunctionContext::Type::TYPE_HLL: - return PrimitiveType::TYPE_HLL; - case FunctionContext::Type::TYPE_QUANTILE_STATE: - return PrimitiveType::TYPE_QUANTILE_STATE; - case FunctionContext::Type::TYPE_TINYINT: - return PrimitiveType::TYPE_TINYINT; - case FunctionContext::Type::TYPE_SMALLINT: - return PrimitiveType::TYPE_SMALLINT; - case FunctionContext::Type::TYPE_INT: - return PrimitiveType::TYPE_INT; - case FunctionContext::Type::TYPE_BIGINT: - return PrimitiveType::TYPE_BIGINT; - case FunctionContext::Type::TYPE_LARGEINT: - return PrimitiveType::TYPE_LARGEINT; - case FunctionContext::Type::TYPE_DATE: - return PrimitiveType::TYPE_DATE; - case FunctionContext::Type::TYPE_DATEV2: - return PrimitiveType::TYPE_DATEV2; - case FunctionContext::Type::TYPE_DATETIMEV2: - return PrimitiveType::TYPE_DATETIMEV2; - case FunctionContext::Type::TYPE_TIMEV2: - return PrimitiveType::TYPE_TIMEV2; - case FunctionContext::Type::TYPE_JSONB: - return PrimitiveType::TYPE_JSONB; - default: - DCHECK(false); - } - - return PrimitiveType::INVALID_TYPE; -} - bool is_type_compatible(PrimitiveType lhs, PrimitiveType rhs) { if (lhs == TYPE_VARCHAR) { return rhs == TYPE_CHAR || rhs == TYPE_VARCHAR || rhs == TYPE_HLL || rhs == TYPE_OBJECT || @@ -591,66 +526,4 @@ PrimitiveType get_primitive_type(vectorized::TypeIndex v_type) { } } -int get_slot_size(PrimitiveType type) { - switch (type) { - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: - case TYPE_OBJECT: - case TYPE_HLL: - case TYPE_QUANTILE_STATE: - return sizeof(StringRef); - case TYPE_JSONB: - return sizeof(JsonBinaryValue); - case TYPE_VARIANT: - return sizeof(StringRef); - case TYPE_ARRAY: - return sizeof(CollectionValue); - case TYPE_MAP: - return sizeof(MapValue); - case TYPE_STRUCT: - return sizeof(StructValue); - - case TYPE_NULL: - case TYPE_BOOLEAN: - case TYPE_TINYINT: - return 1; - - case TYPE_SMALLINT: - return 2; - - case TYPE_INT: - case TYPE_DATEV2: - case TYPE_FLOAT: - case TYPE_DECIMAL32: - return 4; - - case TYPE_BIGINT: - case TYPE_DOUBLE: - case TYPE_TIME: - case TYPE_DECIMAL64: - case TYPE_DATETIMEV2: - case TYPE_TIMEV2: - return 8; - - case TYPE_LARGEINT: - return sizeof(__int128); - - case TYPE_DATE: - case TYPE_DATETIME: - // This is the size of the slot, the actual size of the data is 12. - return sizeof(DateTimeValue); - - case TYPE_DECIMALV2: - case TYPE_DECIMAL128I: - return 16; - - case INVALID_TYPE: - default: - DCHECK(false); - } - - return 0; -} - } // namespace doris diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index f6dacd1858b01c..a1bab5e662e9c0 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -35,8 +35,6 @@ class DecimalV2Value; struct StringRef; struct JsonBinaryValue; -PrimitiveType convert_type_to_primitive(FunctionContext::Type type); - constexpr bool is_enumeration_type(PrimitiveType type) { switch (type) { case TYPE_FLOAT: @@ -98,9 +96,6 @@ constexpr bool has_variable_type(PrimitiveType type) { type == TYPE_QUANTILE_STATE || type == TYPE_STRING; } -// Returns the byte size of type when in a tuple -int get_slot_size(PrimitiveType type); - bool is_type_compatible(PrimitiveType lhs, PrimitiveType rhs); PrimitiveType get_primitive_type(vectorized::TypeIndex v_type); diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index fca1c86954a723..b65051a680dd44 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -22,6 +22,7 @@ #include +#include "runtime/primitive_type.h" namespace doris { TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index 079ccc5c699034..db2a722b330baa 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -24,14 +24,16 @@ #include #include "common/config.h" -#include "runtime/primitive_type.h" +#include "gen_cpp/Types_types.h" +#include "gen_cpp/types.pb.h" +#include "olap/olap_define.h" +#include "runtime/define_primitive_type.h" namespace doris { extern const int HLL_COLUMN_DEFAULT_LEN; struct TPrimitiveType; -class PTypeDesc; // Describes a type. Includes the enum, children types, and any type-specific metadata // (e.g. precision and scale for decimals). @@ -40,7 +42,7 @@ struct TypeDescriptor { PrimitiveType type; /// Only set if type == TYPE_CHAR or type == TYPE_VARCHAR int len; - static constexpr int MAX_VARCHAR_LENGTH = OLAP_VARCHAR_MAX_LENGTH; + static constexpr int MAX_VARCHAR_LENGTH = 65535; static constexpr int MAX_CHAR_LENGTH = 255; static constexpr int MAX_CHAR_INLINE_LENGTH = 128; @@ -204,8 +206,6 @@ struct TypeDescriptor { bool is_variant_type() const { return type == TYPE_VARIANT; } - int get_slot_size() const { return ::doris::get_slot_size(type); } - static inline int get_decimal_byte_size(int precision) { DCHECK_GT(precision, 0); if (precision <= MAX_DECIMAL4_PRECISION) { diff --git a/be/src/udf/CMakeLists.txt b/be/src/udf/CMakeLists.txt index a0a012c6296f5d..c66e5a21dc7ed9 100755 --- a/be/src/udf/CMakeLists.txt +++ b/be/src/udf/CMakeLists.txt @@ -47,7 +47,7 @@ set (UDF_TEST_LINK_LIBS -lboost_date_time gtest) -set_target_properties(DorisUdf PROPERTIES PUBLIC_HEADER "udf.h;uda_test_harness.h") +set_target_properties(DorisUdf PROPERTIES PUBLIC_HEADER "udf.h") INSTALL(TARGETS DorisUdf ARCHIVE DESTINATION ${OUTPUT_DIR}/udf LIBRARY DESTINATION ${OUTPUT_DIR}/udf/lib diff --git a/be/src/udf/uda_test_harness.h b/be/src/udf/uda_test_harness.h deleted file mode 100644 index 26b3ec6f897178..00000000000000 --- a/be/src/udf/uda_test_harness.h +++ /dev/null @@ -1,270 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/udf/uda-test-harness.h -// and modified by Doris - -#pragma once - -#include -#include -#include - -#include "udf/udf.h" -#include "udf/udf_debug.h" - -namespace doris_udf { - -enum UdaExecutionMode { - ALL = 0, - SINGLE_NODE = 1, - ONE_LEVEL = 2, - TWO_LEVEL = 3, -}; - -template -class UdaTestHarnessBase { -public: - typedef void (*InitFn)(FunctionContext* context, INTERMEDIATE* result); - - typedef void (*MergeFn)(FunctionContext* context, const INTERMEDIATE& src, INTERMEDIATE* dst); - - typedef const INTERMEDIATE (*SerializeFn)(FunctionContext* context, const INTERMEDIATE& type); - - typedef RESULT (*FinalizeFn)(FunctionContext* context, const INTERMEDIATE& value); - - // UDA test harness allows for custom comparator to validate results. UDAs - // can specify a custom comparator to, for example, tolerate numerical imprecision. - // Returns true if x and y should be treated as equal. - typedef bool (*ResultComparator)(const RESULT& x, const RESULT& y); - - void set_result_comparator(ResultComparator fn) { _result_comparator_fn = fn; } - - // This must be called if the INTERMEDIATE is TYPE_FIXED_BUFFER - void set_intermediate_size(int byte_size) { _fixed_buffer_byte_size = byte_size; } - - // Returns the failure string if any. - const std::string& get_error_msg() const { return _error_msg; } - -protected: - UdaTestHarnessBase(InitFn init_fn, MergeFn merge_fn, SerializeFn serialize_fn, - FinalizeFn finalize_fn) - : _init_fn(init_fn), - _merge_fn(merge_fn), - _serialize_fn(serialize_fn), - _finalize_fn(finalize_fn), - _result_comparator_fn(nullptr), - _num_input_values(0) {} - - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const RESULT& expected, UdaExecutionMode mode); - - // Returns false if there is an error set in the context. - bool check_context(FunctionContext* context); - - // Verifies x == y, using the custom comparator if set. - bool check_result(const RESULT& x, const RESULT& y); - - // Runs the UDA on a single node. The entire execution happens in 1 context. - // The UDA does a update on all the input values and then a finalize. - RESULT execute_single_node(); - - // Runs the UDA, simulating a single level aggregation. The values are processed - // on num_nodes + 1 contexts. There are num_nodes that do update and serialize. - // There is a final context that does merge and finalize. - RESULT execute_one_level(int num_nodes); - - // Runs the UDA, simulating a two level aggregation with num1 in the first level and - // num2 in the second. The values are processed in num1 + num2 contexts. - RESULT execute_two_level(int num1, int num2); - - virtual void update(int idx, FunctionContext* context, INTERMEDIATE* dst) = 0; - -private: - // UDA functions - InitFn _init_fn; - MergeFn _merge_fn; - SerializeFn _serialize_fn; - FinalizeFn _finalize_fn; - - // Customer comparator, nullptr if default == should be used. - ResultComparator _result_comparator_fn; - - // Set during execute() by subclass - int _num_input_values; - - // Buffer len for intermediate results if the type is TYPE_FIXED_BUFFER - int _fixed_buffer_byte_size; - - // Error message if anything went wrong during the execution. - std::string _error_msg; -}; - -template -class UdaTestHarness : public UdaTestHarnessBase { -public: - typedef void (*UpdateFn)(FunctionContext* context, const INPUT& input, INTERMEDIATE* result); - - typedef UdaTestHarnessBase BaseClass; - - UdaTestHarness(typename BaseClass::InitFn init_fn, UpdateFn update_fn, - typename BaseClass::MergeFn merge_fn, - typename BaseClass::SerializeFn serialize_fn, - typename BaseClass::FinalizeFn finalize_fn) - : BaseClass(init_fn, merge_fn, serialize_fn, finalize_fn), _update_fn(update_fn) {} - - bool execute(const std::vector& values, const RESULT& expected) { - return execute(values, expected, ALL); - } - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values, const RESULT& expected, UdaExecutionMode mode); - - // Runs the UDA in all the modes, validating the result is 'expected' each time. - // T needs to be compatible (i.e. castable to) with INPUT - template - bool execute(const std::vector& values, const RESULT& expected) { - return execute(values, expected, ALL); - } - template - bool execute(const std::vector& values, const RESULT& expected, UdaExecutionMode mode) { - _input.resize(values.size()); - BaseClass::_num_input_values = _input.size(); - - for (int i = 0; i < values.size(); ++i) { - _input[i] = &values[i]; - } - - return BaseClass::execute(expected, mode); - } - -protected: - virtual void update(int idx, FunctionContext* context, INTERMEDIATE* dst); - -private: - UpdateFn _update_fn; - // Set during execute() - std::vector _input; -}; - -template -class UdaTestHarness2 : public UdaTestHarnessBase { -public: - typedef void (*UpdateFn)(FunctionContext* context, const INPUT1& input1, const INPUT2& input2, - INTERMEDIATE* result); - - typedef UdaTestHarnessBase BaseClass; - - ~UdaTestHarness2() {} - UdaTestHarness2(typename BaseClass::InitFn init_fn, UpdateFn update_fn, - typename BaseClass::MergeFn merge_fn, - typename BaseClass::SerializeFn serialize_fn, - typename BaseClass::FinalizeFn finalize_fn) - : BaseClass(init_fn, merge_fn, serialize_fn, finalize_fn), _update_fn(update_fn) {} - - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values1, const std::vector& values2, - const RESULT& expected, UdaExecutionMode mode); - - bool execute(const std::vector& values1, const std::vector& values2, - const RESULT& expected) { - return execute(values1, values2, expected, ALL); - } - -protected: - virtual void update(int idx, FunctionContext* context, INTERMEDIATE* dst); - -private: - UpdateFn _update_fn; - const std::vector* _input1; - const std::vector* _input2; -}; - -template -class UdaTestHarness3 : public UdaTestHarnessBase { -public: - typedef void (*UpdateFn)(FunctionContext* context, const INPUT1& input1, const INPUT2& input2, - const INPUT3& input3, INTERMEDIATE* result); - - typedef UdaTestHarnessBase BaseClass; - - ~UdaTestHarness3() {} - UdaTestHarness3(typename BaseClass::InitFn init_fn, UpdateFn update_fn, - typename BaseClass::MergeFn merge_fn, - typename BaseClass::SerializeFn serialize_fn, - typename BaseClass::FinalizeFn finalize_fn) - : BaseClass(init_fn, merge_fn, serialize_fn, finalize_fn), _update_fn(update_fn) {} - - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values1, const std::vector& values2, - const std::vector& values3, const RESULT& expected) { - return execute(values1, values2, values3, expected, ALL); - } - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values1, const std::vector& values2, - const std::vector& values3, const RESULT& expected, UdaExecutionMode mode); - -protected: - virtual void update(int idx, FunctionContext* context, INTERMEDIATE* dst); - -private: - UpdateFn _update_fn; - const std::vector* _input1; - const std::vector* _input2; - const std::vector* _input3; -}; - -template -class UdaTestHarness4 : public UdaTestHarnessBase { -public: - typedef void (*UpdateFn)(FunctionContext* context, const INPUT1& input1, const INPUT2& input2, - const INPUT3& input3, const INPUT4& input4, INTERMEDIATE* result); - - typedef UdaTestHarnessBase BaseClass; - - ~UdaTestHarness4() {} - UdaTestHarness4(typename BaseClass::InitFn init_fn, UpdateFn update_fn, - typename BaseClass::MergeFn merge_fn, - typename BaseClass::SerializeFn serialize_fn, - typename BaseClass::FinalizeFn finalize_fn) - : BaseClass(init_fn, merge_fn, serialize_fn, finalize_fn), _update_fn(update_fn) {} - - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values1, const std::vector& values2, - const std::vector& values3, const std::vector& values4, - const RESULT& expected) { - return execute(values1, values2, values3, values4, expected, ALL); - } - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values1, const std::vector& values2, - const std::vector& values3, const std::vector& values4, - const RESULT& expected, UdaExecutionMode mode); - -protected: - virtual void update(int idx, FunctionContext* context, INTERMEDIATE* dst); - -private: - UpdateFn _update_fn; - const std::vector* _input1; - const std::vector* _input2; - const std::vector* _input3; - const std::vector* _input4; -}; - -} // namespace doris_udf - -#include "udf/uda_test_harness_impl.hpp" diff --git a/be/src/udf/udf.cpp b/be/src/udf/udf.cpp index f7b3c6f552659c..d547e604328b02 100644 --- a/be/src/udf/udf.cpp +++ b/be/src/udf/udf.cpp @@ -34,6 +34,7 @@ // binary. For example, it would be unfortunate if they had a random dependency // on libhdfs. #include "runtime/runtime_state.h" +#include "runtime/types.h" #include "udf/udf_internal.h" #include "util/debug_util.h" @@ -41,7 +42,6 @@ namespace doris { FunctionContextImpl::FunctionContextImpl() : _state(nullptr), - _version(doris_udf::FunctionContext::V2_0), _num_warnings(0), _thread_local_fn_state(nullptr), _fragment_local_fn_state(nullptr) {} @@ -51,43 +51,36 @@ void FunctionContextImpl::set_constant_cols( _constant_cols = constant_cols; } -doris_udf::FunctionContext* FunctionContextImpl::create_context( - RuntimeState* state, const doris_udf::FunctionContext::TypeDesc& return_type, - const std::vector& arg_types) { - auto* ctx = new doris_udf::FunctionContext(); +std::unique_ptr FunctionContextImpl::create_context( + RuntimeState* state, const doris::TypeDescriptor& return_type, + const std::vector& arg_types) { + auto ctx = std::unique_ptr(new doris::FunctionContext()); ctx->_impl->_state = state; ctx->_impl->_return_type = return_type; ctx->_impl->_arg_types = arg_types; return ctx; } -FunctionContext* FunctionContextImpl::clone() { - doris_udf::FunctionContext* new_context = create_context(_state, _return_type, _arg_types); +std::unique_ptr FunctionContextImpl::clone() { + auto new_context = create_context(_state, _return_type, _arg_types); new_context->_impl->_constant_cols = _constant_cols; new_context->_impl->_fragment_local_fn_state = _fragment_local_fn_state; return new_context; } +const doris::TypeDescriptor& FunctionContextImpl::get_return_type() const { + return _return_type; +} + } // namespace doris -namespace doris_udf { +namespace doris { static const int MAX_WARNINGS = 1000; FunctionContext::FunctionContext() { _impl = std::make_unique(); } -FunctionContext::DorisVersion FunctionContext::version() const { - return _impl->_version; -} - -FunctionContext::UniqueId FunctionContext::query_id() const { - UniqueId id; - id.hi = _impl->_state->query_id().hi; - id.lo = _impl->_state->query_id().lo; - return id; -} - void FunctionContext::set_function_state(FunctionStateScope scope, std::shared_ptr ptr) { switch (scope) { case THREAD_LOCAL: @@ -131,7 +124,7 @@ bool FunctionContext::add_warning(const char* warning_msg) { } } -const FunctionContext::TypeDesc* FunctionContext::get_arg_type(int arg_idx) const { +const doris::TypeDescriptor* FunctionContext::get_arg_type(int arg_idx) const { if (arg_idx < 0 || arg_idx >= _impl->_arg_types.size()) { return nullptr; } @@ -156,7 +149,7 @@ int FunctionContext::get_num_args() const { return _impl->_arg_types.size(); } -const FunctionContext::TypeDesc& FunctionContext::get_return_type() const { +const doris::TypeDescriptor& FunctionContext::get_return_type() const { return _impl->_return_type; } @@ -180,4 +173,4 @@ StringVal FunctionContext::create_temp_string_val(int64_t len) { std::ostream& operator<<(std::ostream& os, const StringVal& string_val) { return os << string_val.to_string(); } -} // namespace doris_udf +} // namespace doris diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index d92ac31daea4b5..339ab655da1aff 100644 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -39,9 +39,10 @@ class BitmapValue; class DecimalV2Value; class DateTimeValue; class CollectionValue; +struct TypeDescriptor; } // namespace doris -namespace doris_udf { +namespace doris { // All input and output values will be one of the structs below. The struct is a simple // object containing a boolean to store if the value is nullptr and the value itself. The @@ -56,64 +57,6 @@ struct DecimalV2Val; // and manage memory. class FunctionContext { public: - enum DorisVersion { - V2_0, - }; - - enum Type { - INVALID_TYPE = 0, - TYPE_NULL, - TYPE_BOOLEAN, - TYPE_TINYINT, - TYPE_SMALLINT, - TYPE_INT, - TYPE_BIGINT, - TYPE_LARGEINT, - TYPE_FLOAT, - TYPE_DOUBLE, - TYPE_DECIMAL [[deprecated]], - TYPE_DATE, - TYPE_DATETIME, - TYPE_CHAR, - TYPE_VARCHAR, - TYPE_HLL, - TYPE_STRING, - TYPE_FIXED_BUFFER, - TYPE_DECIMALV2, - TYPE_OBJECT, - TYPE_ARRAY, - TYPE_MAP, - TYPE_STRUCT, - TYPE_QUANTILE_STATE, - TYPE_DATEV2, - TYPE_DATETIMEV2, - TYPE_TIMEV2, - TYPE_DECIMAL32, - TYPE_DECIMAL64, - TYPE_DECIMAL128I, - TYPE_JSONB, - TYPE_VARIANT - }; - - struct TypeDesc { - Type type; - - /// Only valid if type == TYPE_DECIMAL - int precision; - int scale; - - /// Only valid if type == TYPE_FIXED_BUFFER || type == TYPE_VARCHAR - int len; - - // only valid if type == TYPE_ARRAY - std::vector children; - }; - - struct UniqueId { - int64_t hi; - int64_t lo; - }; - enum FunctionStateScope { /// Indicates that the function state for this FunctionContext's UDF is shared across /// the plan fragment (a query is divided into multiple plan fragments, each of which @@ -136,12 +79,6 @@ class FunctionContext { THREAD_LOCAL, }; - // Returns the version of Doris that's currently running. - DorisVersion version() const; - - // Returns the query_id for the current query. - UniqueId query_id() const; - // Sets an error for this UDF. If this is called, this will trigger the // query to fail. // Note: when you set error for the UDFs used in Data Load, you should @@ -177,7 +114,7 @@ class FunctionContext { // Returns the return type information of this function. For UDAs, this is the final // return type of the UDA (e.g., the type returned by the finalize function). - const TypeDesc& get_return_type() const; + const doris::TypeDescriptor& get_return_type() const; // Returns the number of arguments to this function (not including the FunctionContext* // argument). @@ -185,7 +122,7 @@ class FunctionContext { // Returns the type information for the arg_idx-th argument (0-indexed, not including // the FunctionContext* argument). Returns nullptr if arg_idx is invalid. - const TypeDesc* get_arg_type(int arg_idx) const; + const doris::TypeDescriptor* get_arg_type(int arg_idx) const; // Returns true if the arg_idx-th input argument (0 indexed, not including the // FunctionContext* argument) is a constant (e.g. 5, "string", 1 + 1). @@ -430,10 +367,10 @@ struct DecimalV2Val : public AnyVal { bool operator!=(const DecimalV2Val& other) const { return !(*this == other); } }; -using doris_udf::BigIntVal; -using doris_udf::DoubleVal; -using doris_udf::StringVal; -using doris_udf::DecimalV2Val; -using doris_udf::DateTimeVal; -using doris_udf::FunctionContext; -} // namespace doris_udf +using doris::BigIntVal; +using doris::DoubleVal; +using doris::StringVal; +using doris::DecimalV2Val; +using doris::DateTimeVal; +using doris::FunctionContext; +} // namespace doris diff --git a/be/src/udf/udf_debug.h b/be/src/udf/udf_debug.h deleted file mode 100644 index 3cdbd3aecca3d2..00000000000000 --- a/be/src/udf/udf_debug.h +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/udf/udf-debug.h -// and modified by Doris - -#pragma once - -#include -#include - -#include "udf/udf.h" - -namespace doris_udf { - -template -std::string debug_string(const T& val) { - if (val.is_null) { - return "NULL"; - } - - std::stringstream ss; - ss << val.val; - return ss.str(); -} - -template <> -std::string debug_string(const StringVal& val) { - if (val.is_null) { - return "NULL"; - } - - return std::string(reinterpret_cast(val.ptr), val.len); -} - -} // namespace doris_udf diff --git a/be/src/udf/udf_internal.h b/be/src/udf/udf_internal.h index afc2e3ca031462..5cdae856b5081d 100644 --- a/be/src/udf/udf_internal.h +++ b/be/src/udf/udf_internal.h @@ -27,12 +27,14 @@ #include #include +#include "runtime/types.h" #include "udf/udf.h" namespace doris { class RuntimeState; struct ColumnPtrWrapper; +struct TypeDescriptor; // This class actually implements the interface of FunctionContext. This is split to // hide the details from the external header. @@ -41,9 +43,9 @@ class FunctionContextImpl { public: /// Create a FunctionContext for a UDA. Identical to the UDF version except for the /// intermediate type. Caller is responsible for deleting it. - static doris_udf::FunctionContext* create_context( - RuntimeState* state, const doris_udf::FunctionContext::TypeDesc& return_type, - const std::vector& arg_types); + static std::unique_ptr create_context( + RuntimeState* state, const doris::TypeDescriptor& return_type, + const std::vector& arg_types); ~FunctionContextImpl() {} @@ -52,7 +54,7 @@ class FunctionContextImpl { /// Returns a new FunctionContext with the same constant args, fragment-local state, and /// debug flag as this FunctionContext. The caller is responsible for calling delete on /// it. - doris_udf::FunctionContext* clone(); + std::unique_ptr clone(); void set_constant_cols(const std::vector>& cols); @@ -60,7 +62,7 @@ class FunctionContextImpl { std::string& string_result() { return _string_result; } - const doris_udf::FunctionContext::TypeDesc& get_return_type() const { return _return_type; } + const doris::TypeDescriptor& get_return_type() const; bool check_overflow_for_decimal() const { return _check_overflow_for_decimal; } @@ -69,14 +71,12 @@ class FunctionContextImpl { } private: - friend class doris_udf::FunctionContext; + friend class doris::FunctionContext; // We use the query's runtime state to report errors and warnings. nullptr for test // contexts. RuntimeState* _state; - doris_udf::FunctionContext::DorisVersion _version; - // Empty if there's no error std::string _error_msg; @@ -88,10 +88,10 @@ class FunctionContextImpl { std::shared_ptr _fragment_local_fn_state; // Type descriptor for the return type of the function. - doris_udf::FunctionContext::TypeDesc _return_type; + doris::TypeDescriptor _return_type; // Type descriptors for each argument of the function. - std::vector _arg_types; + std::vector _arg_types; std::vector> _constant_cols; diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 1e263d594e166d..d95ff00f1db5f1 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1628,14 +1628,14 @@ class BitmapValue { return true; } - doris_udf::BigIntVal minimum() const { + doris::BigIntVal minimum() const { switch (_type) { case SINGLE: - return doris_udf::BigIntVal(_sv); + return doris::BigIntVal(_sv); case BITMAP: - return doris_udf::BigIntVal(_bitmap.minimum()); + return doris::BigIntVal(_bitmap.minimum()); default: - return doris_udf::BigIntVal::null(); + return doris::BigIntVal::null(); } } @@ -1673,14 +1673,14 @@ class BitmapValue { return ss.str(); } - doris_udf::BigIntVal maximum() const { + doris::BigIntVal maximum() const { switch (_type) { case SINGLE: - return doris_udf::BigIntVal(_sv); + return doris::BigIntVal(_sv); case BITMAP: - return doris_udf::BigIntVal(_bitmap.maximum()); + return doris::BigIntVal(_bitmap.maximum()); default: - return doris_udf::BigIntVal::null(); + return doris::BigIntVal::null(); } } diff --git a/be/src/util/counts.h b/be/src/util/counts.h index 5f4e9fe30e4693..0818dbeed3d0c3 100644 --- a/be/src/util/counts.h +++ b/be/src/util/counts.h @@ -107,9 +107,9 @@ class Counts { return (higher - position) * lower_key + (position - lower) * higher_key; } - doris_udf::DoubleVal terminate(double quantile) const { + doris::DoubleVal terminate(double quantile) const { if (_counts.empty()) { - return doris_udf::DoubleVal::null(); + return doris::DoubleVal::null(); } std::vector> elems(_counts.begin(), _counts.end()); @@ -126,7 +126,7 @@ class Counts { long max_position = total - 1; double position = max_position * quantile; - return doris_udf::DoubleVal(get_percentile(elems, position)); + return doris::DoubleVal(get_percentile(elems, position)); } private: diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h index 61cdcd8ea0e917..158d7ed5de11dd 100644 --- a/be/src/vec/common/string_ref.h +++ b/be/src/vec/common/string_ref.h @@ -193,7 +193,6 @@ inline int string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2 } // unnamed namespace -using namespace doris_udf; /// The thing to avoid creating strings to find substrings in the hash table. /// User should make sure data source is const. /// maybe considering rewrite it with std::span / std::basic_string_view is meaningful. diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp b/be/src/vec/exec/format/parquet/schema_desc.cpp index a4265ca197094a..1c0644ec678673 100644 --- a/be/src/vec/exec/format/parquet/schema_desc.cpp +++ b/be/src/vec/exec/format/parquet/schema_desc.cpp @@ -18,6 +18,7 @@ #include "schema_desc.h" #include "common/logging.h" +#include "util/slice.h" namespace doris::vectorized { diff --git a/be/src/vec/exec/scan/new_es_scanner.cpp b/be/src/vec/exec/scan/new_es_scanner.cpp index 6e5a9a266b49fb..22db2280bee74b 100644 --- a/be/src/vec/exec/scan/new_es_scanner.cpp +++ b/be/src/vec/exec/scan/new_es_scanner.cpp @@ -42,10 +42,7 @@ NewEsScanner::NewEsScanner(RuntimeState* state, NewEsScanNode* parent, int64_t l Status NewEsScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { VLOG_CRITICAL << NEW_SCANNER_TYPE << "::prepare"; - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); - } + RETURN_IF_ERROR(VScanner::prepare(_state, &_vconjunct_ctx)); if (_is_init) { return Status::OK(); diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index ae91bca9819004..4ad652f088400c 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -294,7 +294,7 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() { Status NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal* constant_str, - doris_udf::FunctionContext** fn_ctx, + doris::FunctionContext** fn_ctx, VScanNode::PushDownType& pdt) { // Now only `like` function filters is supported to push down if (fn_call->fn().name.function_name != "like") { @@ -303,7 +303,7 @@ Status NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_c } const auto& children = fn_call->children(); - doris_udf::FunctionContext* func_cxt = expr_ctx->fn_context(fn_call->fn_context_index()); + doris::FunctionContext* func_cxt = expr_ctx->fn_context(fn_call->fn_context_index()); DCHECK(func_cxt != nullptr); DCHECK(children.size() == 2); for (size_t i = 0; i < children.size(); i++) { diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h index b179f31060e543..9ec95e6d0adf18 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.h +++ b/be/src/vec/exec/scan/new_olap_scan_node.h @@ -46,7 +46,7 @@ class NewOlapScanNode : public VScanNode { Status _should_push_down_function_filter(VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal* constant_str, - doris_udf::FunctionContext** fn_ctx, + doris::FunctionContext** fn_ctx, PushDownType& pdt) override; PushDownType _should_push_down_bloom_filter() override { return PushDownType::ACCEPTABLE; } diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 23d448ad0abbac..b5f5bd4863731e 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -52,10 +52,7 @@ Status NewOlapScanner::prepare(const TPaloScanRange& scan_range, const std::vector& filters, const FilterPredicates& filter_predicates, const std::vector& function_filters) { - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); - } + RETURN_IF_ERROR(VScanner::prepare(_state, &_vconjunct_ctx)); // set limit to reduce end of rowset and segment mem use _tablet_reader = std::make_unique(); diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 066760c5fe6d32..936053d98b55e3 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -60,6 +60,7 @@ VFileScanner::VFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t Status VFileScanner::prepare( VExprContext** vconjunct_ctx_ptr, std::unordered_map* colname_to_value_range) { + RETURN_IF_ERROR(VScanner::prepare(_state, &_vconjunct_ctx)); _colname_to_value_range = colname_to_value_range; _get_block_timer = ADD_TIMER(_parent->_scanner_profile, "FileScannerGetBlockTime"); @@ -79,11 +80,6 @@ Status VFileScanner::prepare( _io_ctx->query_id = &_state->query_id(); _io_ctx->enable_file_cache = _state->query_options().enable_file_cache; - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); - } - if (_is_load) { _src_block_mem_reuse = true; _src_row_desc.reset(new RowDescriptor(_state->desc_tbl(), diff --git a/be/src/vec/exec/scan/vmeta_scanner.cpp b/be/src/vec/exec/scan/vmeta_scanner.cpp index 89bd558ae67dc7..b08dd47622be71 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.cpp +++ b/be/src/vec/exec/scan/vmeta_scanner.cpp @@ -44,10 +44,7 @@ Status VMetaScanner::open(RuntimeState* state) { Status VMetaScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { VLOG_CRITICAL << "VMetaScanner::prepare"; - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); - } + RETURN_IF_ERROR(VScanner::prepare(_state, &_vconjunct_ctx)); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); if (_scan_range.meta_scan_range.__isset.iceberg_params) { RETURN_IF_ERROR(_fetch_iceberg_metadata_batch()); diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index d10cf9898739fa..27edacfb4c4bb0 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -355,7 +355,6 @@ Status VScanNode::_append_rf_into_conjuncts(std::vector& vexprs) { RETURN_IF_ERROR(new_vconjunct_ctx_ptr->prepare(_state, _row_descriptor)); RETURN_IF_ERROR(new_vconjunct_ctx_ptr->open(_state)); if (_vconjunct_ctx_ptr) { - (*_vconjunct_ctx_ptr)->mark_as_stale(); _stale_vexpr_ctxs.push_back(std::move(_vconjunct_ctx_ptr)); } _vconjunct_ctx_ptr.reset(new doris::vectorized::VExprContext*); @@ -459,7 +458,6 @@ Status VScanNode::_normalize_conjuncts() { if (new_root) { (*_vconjunct_ctx_ptr)->set_root(new_root); } else { - (*_vconjunct_ctx_ptr)->mark_as_stale(); _stale_vexpr_ctxs.push_back(std::move(_vconjunct_ctx_ptr)); _vconjunct_ctx_ptr.reset(nullptr); } @@ -640,7 +638,7 @@ Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ct } if (TExprNodeType::FUNCTION_CALL == fn_expr->node_type()) { - doris_udf::FunctionContext* fn_ctx = nullptr; + doris::FunctionContext* fn_ctx = nullptr; StringVal val; PushDownType temp_pdt; RETURN_IF_ERROR(_should_push_down_function_filter( diff --git a/be/src/vec/exec/scan/vscan_node.h b/be/src/vec/exec/scan/vscan_node.h index 476c7c37da7c8a..4fb023a5dc0e43 100644 --- a/be/src/vec/exec/scan/vscan_node.h +++ b/be/src/vec/exec/scan/vscan_node.h @@ -158,7 +158,7 @@ class VScanNode : public ExecNode { virtual Status _should_push_down_function_filter(VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal* constant_str, - doris_udf::FunctionContext** fn_ctx, + doris::FunctionContext** fn_ctx, PushDownType& pdt) { pdt = PushDownType::UNACCEPTABLE; return Status::OK(); diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp index 9af349caf8f4e5..68dbbbfe9ef310 100644 --- a/be/src/vec/exec/scan/vscanner.cpp +++ b/be/src/vec/exec/scan/vscanner.cpp @@ -33,6 +33,14 @@ VScanner::VScanner(RuntimeState* state, VScanNode* parent, int64_t limit, Runtim _is_load = (_input_tuple_desc != nullptr); } +Status VScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { + if (vconjunct_ctx_ptr != nullptr) { + // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. + RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); + } + return Status::OK(); +} + Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) { // only empty block should be here DCHECK(block->rows() == 0); diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h index 86c9cd60e5b569..e36968e6f97a3d 100644 --- a/be/src/vec/exec/scan/vscanner.h +++ b/be/src/vec/exec/scan/vscanner.h @@ -57,6 +57,9 @@ class VScanner { // Filter the output block finally. Status _filter_output_block(Block* block); + // Not virtual, all child will call this method explictly + Status prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr); + public: VScanNode* get_parent() { return _parent; } @@ -117,7 +120,6 @@ class VScanner { protected: void _discard_conjuncts() { if (_vconjunct_ctx) { - _vconjunct_ctx->mark_as_stale(); _stale_vexpr_ctxs.push_back(_vconjunct_ctx); _vconjunct_ctx = nullptr; } diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 786bb9375e29a6..be6900bc1c6bca 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -292,124 +292,6 @@ Status VExpr::clone_if_not_exists(const std::vector& ctxs, Runtim return Status::OK(); } -FunctionContext::TypeDesc VExpr::column_type_to_type_desc(const TypeDescriptor& type) { - FunctionContext::TypeDesc out; - switch (type.type) { - case TYPE_BOOLEAN: - out.type = FunctionContext::TYPE_BOOLEAN; - break; - case TYPE_TINYINT: - out.type = FunctionContext::TYPE_TINYINT; - break; - case TYPE_SMALLINT: - out.type = FunctionContext::TYPE_SMALLINT; - break; - case TYPE_INT: - out.type = FunctionContext::TYPE_INT; - break; - case TYPE_BIGINT: - out.type = FunctionContext::TYPE_BIGINT; - break; - case TYPE_LARGEINT: - out.type = FunctionContext::TYPE_LARGEINT; - break; - case TYPE_FLOAT: - out.type = FunctionContext::TYPE_FLOAT; - break; - case TYPE_TIME: - case TYPE_TIMEV2: - case TYPE_DOUBLE: - out.type = FunctionContext::TYPE_DOUBLE; - break; - case TYPE_DATE: - out.type = FunctionContext::TYPE_DATE; - break; - case TYPE_DATETIME: - out.type = FunctionContext::TYPE_DATETIME; - break; - case TYPE_DATEV2: - out.type = FunctionContext::TYPE_DATEV2; - break; - case TYPE_DATETIMEV2: - out.type = FunctionContext::TYPE_DATETIMEV2; - break; - case TYPE_DECIMAL32: - out.type = FunctionContext::TYPE_DECIMAL32; - out.precision = type.precision; - out.scale = type.scale; - break; - case TYPE_DECIMAL64: - out.type = FunctionContext::TYPE_DECIMAL64; - out.precision = type.precision; - out.scale = type.scale; - break; - case TYPE_DECIMAL128I: - out.type = FunctionContext::TYPE_DECIMAL128I; - out.precision = type.precision; - out.scale = type.scale; - break; - case TYPE_VARCHAR: - out.type = FunctionContext::TYPE_VARCHAR; - out.len = type.len; - break; - case TYPE_HLL: - out.type = FunctionContext::TYPE_HLL; - out.len = type.len; - break; - case TYPE_OBJECT: - out.type = FunctionContext::TYPE_OBJECT; - break; - case TYPE_QUANTILE_STATE: - out.type = FunctionContext::TYPE_QUANTILE_STATE; - break; - case TYPE_CHAR: - out.type = FunctionContext::TYPE_CHAR; - out.len = type.len; - break; - case TYPE_DECIMALV2: - out.type = FunctionContext::TYPE_DECIMALV2; - // out.precision = type.precision; - // out.scale = type.scale; - break; - case TYPE_NULL: - out.type = FunctionContext::TYPE_NULL; - break; - case TYPE_ARRAY: - out.type = FunctionContext::TYPE_ARRAY; - for (const auto& t : type.children) { - out.children.push_back(VExpr::column_type_to_type_desc(t)); - } - break; - case TYPE_MAP: - CHECK(type.children.size() == 2); - // only support map key is scalar - CHECK(!type.children[0].is_complex_type()); - out.type = FunctionContext::TYPE_MAP; - for (const auto& t : type.children) { - out.children.push_back(VExpr::column_type_to_type_desc(t)); - } - break; - case TYPE_STRUCT: - CHECK(type.children.size() >= 1); - out.type = FunctionContext::TYPE_STRUCT; - for (const auto& t : type.children) { - out.children.push_back(VExpr::column_type_to_type_desc(t)); - } - break; - case TYPE_STRING: - out.type = FunctionContext::TYPE_STRING; - out.len = type.len; - break; - case TYPE_JSONB: - out.type = FunctionContext::TYPE_JSONB; - out.len = type.len; - break; - default: - DCHECK(false) << "Unknown type: " << type; - } - return out; -} - std::string VExpr::debug_string() const { // TODO: implement partial debug string for member vars std::stringstream out; @@ -479,13 +361,12 @@ Status VExpr::get_const_col(VExprContext* context, } void VExpr::register_function_context(doris::RuntimeState* state, VExprContext* context) { - FunctionContext::TypeDesc return_type = VExpr::column_type_to_type_desc(_type); - std::vector arg_types; + std::vector arg_types; for (int i = 0; i < _children.size(); ++i) { - arg_types.push_back(VExpr::column_type_to_type_desc(_children[i]->type())); + arg_types.push_back(_children[i]->type()); } - _fn_context_index = context->register_func(state, return_type, arg_types); + _fn_context_index = context->register_function_context(state, _type, arg_types); } Status VExpr::init_function_context(VExprContext* context, @@ -511,7 +392,7 @@ Status VExpr::init_function_context(VExprContext* context, void VExpr::close_function_context(VExprContext* context, FunctionContext::FunctionStateScope scope, const FunctionBasePtr& function) const { - if (_fn_context_index != -1 && !context->_stale) { + if (_fn_context_index != -1) { FunctionContext* fn_ctx = context->fn_context(_fn_context_index); function->close(fn_ctx, FunctionContext::THREAD_LOCAL); if (scope == FunctionContext::FRAGMENT_LOCAL) { diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 1eb527c6006fe7..b5e08d8499e202 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -180,7 +180,6 @@ class VExpr { } protected: - static FunctionContext::TypeDesc column_type_to_type_desc(const TypeDescriptor& type); /// Simple debug string that provides no expr subclass-specific information std::string debug_string(const std::string& expr_name) const { std::stringstream out; diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index 2103e633912a86..f8cab779067137 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -28,15 +28,12 @@ VExprContext::VExprContext(VExpr* expr) _prepared(false), _opened(false), _closed(false), - _last_result_column_id(-1), - _stale(false) {} + _last_result_column_id(-1) {} VExprContext::~VExprContext() { + // Do not delete this code, this code here is used to check if forget to close the opened context + // Or there will be memory leak DCHECK(!_prepared || _closed) << get_stack_trace(); - - for (int i = 0; i < _fn_contexts.size(); ++i) { - delete _fn_contexts[i]; - } } doris::Status VExprContext::execute(doris::vectorized::Block* block, int* result_column_id) { @@ -95,8 +92,9 @@ void VExprContext::clone_fn_contexts(VExprContext* other) { } } -int VExprContext::register_func(RuntimeState* state, const FunctionContext::TypeDesc& return_type, - const std::vector& arg_types) { +int VExprContext::register_function_context(RuntimeState* state, + const doris::TypeDescriptor& return_type, + const std::vector& arg_types) { _fn_contexts.push_back(FunctionContextImpl::create_context(state, return_type, arg_types)); _fn_contexts.back()->impl()->set_check_overflow_for_decimal( state->check_overflow_for_decimal()); diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index 4c7d8e039fe970..e1b9bf69d7be6c 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -41,15 +41,15 @@ class VExprContext { /// retrieve the created context. Exprs that need a FunctionContext should call this in /// Prepare() and save the returned index. 'varargs_buffer_size', if specified, is the /// size of the varargs buffer in the created FunctionContext (see udf-internal.h). - int register_func(RuntimeState* state, const FunctionContext::TypeDesc& return_type, - const std::vector& arg_types); + int register_function_context(RuntimeState* state, const doris::TypeDescriptor& return_type, + const std::vector& arg_types); /// Retrieves a registered FunctionContext. 'i' is the index returned by the call to - /// register_func(). This should only be called by VExprs. + /// register_function_context(). This should only be called by VExprs. FunctionContext* fn_context(int i) { DCHECK_GE(i, 0); DCHECK_LT(i, _fn_contexts.size()); - return _fn_contexts[i]; + return _fn_contexts[i].get(); } [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block, @@ -71,11 +71,6 @@ class VExprContext { void clone_fn_contexts(VExprContext* other); - void mark_as_stale() { - DCHECK(!_stale); - _stale = true; - } - private: friend class VExpr; @@ -92,13 +87,11 @@ class VExprContext { /// FunctionContexts for each registered expression. The FunctionContexts are created /// and owned by this VExprContext. - std::vector _fn_contexts; + std::vector> _fn_contexts; int _last_result_column_id; /// The depth of expression-tree. int _depth_num = 0; - - bool _stale; }; } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index 704fc8f3ff0b83..4ec1b30cc12eeb 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -118,7 +118,7 @@ struct StrToDate { } if constexpr (std::is_same_v) { if (context->impl()->get_return_type().type == - doris_udf::FunctionContext::Type::TYPE_DATETIME) { + doris::PrimitiveType::TYPE_DATETIME) { ts_val.to_datetime(); } else { ts_val.cast_to_date(); diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h index 9a8b096513b82c..02b2a7cb620484 100644 --- a/be/src/vec/functions/in.h +++ b/be/src/vec/functions/in.h @@ -68,14 +68,13 @@ class FunctionIn : public IFunction { } std::shared_ptr state = std::make_shared(); context->set_function_state(scope, state); - if (context->get_arg_type(0)->type == FunctionContext::Type::TYPE_CHAR || - context->get_arg_type(0)->type == FunctionContext::Type::TYPE_VARCHAR || - context->get_arg_type(0)->type == FunctionContext::Type::TYPE_STRING) { + if (context->get_arg_type(0)->type == doris::PrimitiveType::TYPE_CHAR || + context->get_arg_type(0)->type == doris::PrimitiveType::TYPE_VARCHAR || + context->get_arg_type(0)->type == doris::PrimitiveType::TYPE_STRING) { // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly state->hybrid_set.reset(new StringValueSet()); } else { - state->hybrid_set.reset( - create_set(convert_type_to_primitive(context->get_arg_type(0)->type))); + state->hybrid_set.reset(create_set(context->get_arg_type(0)->type)); } DCHECK(context->get_num_args() >= 1); @@ -197,7 +196,7 @@ class FunctionIn : public IFunction { } std::unique_ptr hybrid_set( - create_set(convert_type_to_primitive(context->get_arg_type(0)->type))); + create_set(context->get_arg_type(0)->type)); bool null_in_set = false; for (const auto& set_column : set_columns) { diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 8cd7a664607486..4060bae8cb8d3a 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -567,7 +567,7 @@ class VecDateTimeValue { // Now this type is a temp solution with little changes VecDateTimeValue& operator--() { return *this += -1; } - void to_datetime_val(doris_udf::DateTimeVal* tv) const { + void to_datetime_val(doris::DateTimeVal* tv) const { tv->packed_time = to_int64_datetime_packed(); tv->type = _type; } @@ -584,7 +584,7 @@ class VecDateTimeValue { // Now this type is a temp solution with little changes ((uint64_t)minute() << 26) | ((uint64_t)second() << 20)); } - static VecDateTimeValue from_datetime_val(const doris_udf::DateTimeVal& tv) { + static VecDateTimeValue from_datetime_val(const doris::DateTimeVal& tv) { VecDateTimeValue value; value.from_packed_time(tv.packed_time); if (tv.type == TIME_DATE) { @@ -1095,8 +1095,7 @@ class DateV2Value { bool get_date_from_daynr(uint64_t); - static DateV2Value from_datetimev2_val( - const doris_udf::DateTimeV2Val& tv) { + static DateV2Value from_datetimev2_val(const doris::DateTimeV2Val& tv) { DCHECK(is_datetime); DateV2Value value; value.from_datetime(tv.datetimev2_value); diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index 4e4de45271963f..054cb5f74b0107 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -163,10 +163,7 @@ set(TESTUTIL_TEST_FILES testutil/function_utils.cpp testutil/run_all_tests.cpp ) -set(UDF_TEST_FILES - # udf/udf_test.cpp - # udf/uda_test.cpp -) + set(UTIL_TEST_FILES util/bit_util_test.cpp util/brpc_client_cache_test.cpp @@ -286,7 +283,6 @@ add_executable(doris_be_test ${OLAP_TEST_FILES} ${RUNTIME_TEST_FILES} ${TESTUTIL_TEST_FILES} - ${UDF_TEST_FILES} ${UTIL_TEST_FILES} ${VEC_TEST_FILES} ) diff --git a/be/test/runtime/datetime_value_test.cpp b/be/test/runtime/datetime_value_test.cpp index d71923e8f8e215..f2e5163a72a323 100644 --- a/be/test/runtime/datetime_value_test.cpp +++ b/be/test/runtime/datetime_value_test.cpp @@ -1451,14 +1451,14 @@ TEST_F(DateTimeValueTest, packed_time) { } { - doris_udf::DateTimeVal tv; + doris::DateTimeVal tv; tv.packed_time = 1830650338932162560L; tv.type = TIME_DATETIME; DateTimeValue v1 = DateTimeValue::from_datetime_val(tv); v1.to_string(buf); EXPECT_STREQ("2001-02-03 12:34:56", buf); - doris_udf::DateTimeVal tv2; + doris::DateTimeVal tv2; v1.to_datetime_val(&tv2); EXPECT_TRUE(tv == tv2); diff --git a/be/test/testutil/desc_tbl_builder.cc b/be/test/testutil/desc_tbl_builder.cc index 5a7fcb46e7376a..86602cac6b8e73 100644 --- a/be/test/testutil/desc_tbl_builder.cc +++ b/be/test/testutil/desc_tbl_builder.cc @@ -36,7 +36,7 @@ TupleDescBuilder& DescriptorTblBuilder::declare_tuple() { // item_id of -1 indicates no itemTupleId static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDescriptor& type, - int slot_idx, int byte_offset, int item_id) { + int slot_idx, int item_id) { int null_byte = slot_idx / 8; int null_bit = slot_idx % 8; TSlotDescriptor slot_desc; @@ -45,7 +45,7 @@ static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDes slot_desc.__set_slotType(type.to_thrift()); // For now no tests depend on the materialized path being populated correctly. // slot_desc.__set_materializedPath(vector()); - slot_desc.__set_byteOffset(byte_offset); + slot_desc.__set_byteOffset(0); slot_desc.__set_nullIndicatorByte(null_byte); slot_desc.__set_nullIndicatorBit(null_bit); slot_desc.__set_slotIdx(slot_idx); @@ -56,10 +56,10 @@ static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDes return slot_desc; } -static TTupleDescriptor make_tuple_descriptor(int id, int byte_size, int num_null_bytes) { +static TTupleDescriptor make_tuple_descriptor(int id, int num_null_bytes) { TTupleDescriptor tuple_desc; tuple_desc.__set_id(id); - tuple_desc.__set_byteSize(byte_size); + tuple_desc.__set_byteSize(0); tuple_desc.__set_numNullBytes(num_null_bytes); return tuple_desc; } @@ -91,7 +91,6 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector& } int num_null_bytes = BitUtil::ceil(slot_types.size(), 8); - int byte_offset = num_null_bytes; int tuple_id = *next_tuple_id; ++(*next_tuple_id); @@ -105,13 +104,12 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector& // } thrift_desc_tbl->slotDescriptors.push_back( - make_slot_descriptor(*slot_id, tuple_id, slot_types[i], i, byte_offset, item_id)); + make_slot_descriptor(*slot_id, tuple_id, slot_types[i], i, item_id)); thrift_desc_tbl->__isset.slotDescriptors = true; - byte_offset += slot_types[i].get_slot_size(); ++(*slot_id); } - TTupleDescriptor result = make_tuple_descriptor(tuple_id, byte_offset, num_null_bytes); + TTupleDescriptor result = make_tuple_descriptor(tuple_id, num_null_bytes); thrift_desc_tbl->tupleDescriptors.push_back(result); return result; } diff --git a/be/test/testutil/function_utils.cpp b/be/test/testutil/function_utils.cpp index 87ff68ad70d666..7953614e97db61 100644 --- a/be/test/testutil/function_utils.cpp +++ b/be/test/testutil/function_utils.cpp @@ -31,13 +31,13 @@ FunctionUtils::FunctionUtils() { globals.__set_timestamp_ms(1565026737805); globals.__set_time_zone("Asia/Shanghai"); _state = new RuntimeState(globals); - doris_udf::FunctionContext::TypeDesc return_type; - std::vector arg_types; + doris::TypeDescriptor return_type; + std::vector arg_types; _fn_ctx = FunctionContextImpl::create_context(_state, return_type, arg_types); } -FunctionUtils::FunctionUtils(const doris_udf::FunctionContext::TypeDesc& return_type, - const std::vector& arg_types, +FunctionUtils::FunctionUtils(const doris::TypeDescriptor& return_type, + const std::vector& arg_types, int varargs_buffer_size) { TQueryGlobals globals; globals.__set_now_string("2019-08-06 01:38:57"); @@ -48,7 +48,6 @@ FunctionUtils::FunctionUtils(const doris_udf::FunctionContext::TypeDesc& return_ } FunctionUtils::~FunctionUtils() { - delete _fn_ctx; if (_state) { delete _state; } diff --git a/be/test/testutil/function_utils.h b/be/test/testutil/function_utils.h index d22489a7e694c1..710ef111233407 100644 --- a/be/test/testutil/function_utils.h +++ b/be/test/testutil/function_utils.h @@ -29,16 +29,15 @@ class RuntimeState; class FunctionUtils { public: FunctionUtils(); - FunctionUtils(const doris_udf::FunctionContext::TypeDesc& return_type, - const std::vector& arg_types, - int varargs_buffer_size); + FunctionUtils(const doris::TypeDescriptor& return_type, + const std::vector& arg_types, int varargs_buffer_size); ~FunctionUtils(); - doris_udf::FunctionContext* get_fn_ctx() { return _fn_ctx; } + doris::FunctionContext* get_fn_ctx() { return _fn_ctx.get(); } private: RuntimeState* _state = nullptr; - doris_udf::FunctionContext* _fn_ctx = nullptr; + std::unique_ptr _fn_ctx; }; } // namespace doris diff --git a/be/test/udf/uda_test.cpp b/be/test/udf/uda_test.cpp deleted file mode 100644 index fe8e83618d2a6b..00000000000000 --- a/be/test/udf/uda_test.cpp +++ /dev/null @@ -1,315 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include - -#include "common/logging.h" -#include "udf/uda_test_harness.h" - -namespace doris_udf { - -//-------------------------------- Count ------------------------------------ -// Example of implementing Count(int_col). -// The input type is: int -// The intermediate type is bigint -// the return type is bigint -void CountInit(FunctionContext* context, BigIntVal* val) { - val->is_null = false; - val->val = 0; -} - -void CountUpdate(FunctionContext* context, const IntVal& input, BigIntVal* val) { - // BigIntVal is the same ptr as what was passed to CountInit - if (input.is_null) { - return; - } - - ++val->val; -} - -void CountMerge(FunctionContext* context, const BigIntVal& src, BigIntVal* dst) { - dst->val += src.val; -} - -BigIntVal CountFinalize(FunctionContext* context, const BigIntVal& val) { - return val; -} - -//-------------------------------- Count(...) ------------------------------------ -// Example of implementing Count(...) -// The input type is: multiple ints -// The intermediate type is bigint -// the return type is bigint -void Count2Update(FunctionContext* context, const IntVal& input1, const IntVal& input2, - BigIntVal* val) { - val->val += (!input1.is_null + !input2.is_null); -} -void Count3Update(FunctionContext* context, const IntVal& input1, const IntVal& input2, - const IntVal& input3, BigIntVal* val) { - val->val += (!input1.is_null + !input2.is_null + !input3.is_null); -} -void Count4Update(FunctionContext* context, const IntVal& input1, const IntVal& input2, - const IntVal& input3, const IntVal& input4, BigIntVal* val) { - val->val += (!input1.is_null + !input2.is_null + !input3.is_null + !input4.is_null); -} - -//-------------------------------- Min(String) ------------------------------------ -// Example of implementing MIN for strings. -// The input type is: STRING -// The intermediate type is BufferVal -// the return type is STRING -// This is a little more sophisticated since the result buffers are reused (it grows -// to the longest result string). -struct MinState { - uint8_t* value; - int len; - int buffer_len; - - void set(FunctionContext* context, const StringVal& val) { - if (buffer_len < val.len) { - context->free(value); - value = context->allocate(val.len); - buffer_len = val.len; - } - - memcpy(value, val.ptr, val.len); - len = val.len; - } -}; - -// Initialize the MinState scratch space -void MinInit(FunctionContext* context, BufferVal* val) { - MinState* state = reinterpret_cast(*val); - state->value = nullptr; - state->buffer_len = 0; -} - -// Update the min value, comparing with the current value in MinState -void MinUpdate(FunctionContext* context, const StringVal& input, BufferVal* val) { - if (input.is_null) { - return; - } - - MinState* state = reinterpret_cast(*val); - - if (state->value == nullptr) { - state->set(context, input); - return; - } - - int cmp = memcmp(input.ptr, state->value, std::min(input.len, state->len)); - - if (cmp < 0 || (cmp == 0 && input.len < state->len)) { - state->set(context, input); - } -} - -// Serialize the state into the min string -const BufferVal MinSerialize(FunctionContext* context, const BufferVal& intermediate) { - return intermediate; -} - -// Merge is the same as Update since the serialized format is the raw input format -void MinMerge(FunctionContext* context, const BufferVal& src, BufferVal* dst) { - const MinState* src_state = reinterpret_cast(src); - - if (src_state->value == nullptr) { - return; - } - - MinUpdate(context, StringVal(src_state->value, src_state->len), dst); -} - -// Finalize also just returns the string so is the same as MinSerialize. -StringVal MinFinalize(FunctionContext* context, const BufferVal& val) { - const MinState* state = reinterpret_cast(val); - - if (state->value == nullptr) { - return StringVal::null(); - } - - StringVal result = StringVal(context, state->len); - memcpy(result.ptr, state->value, state->len); - return result; -} - -//----------------------------- Bits after Xor ------------------------------------ -// Example of a UDA that xors all the input bits and then returns the number of -// resulting bits that are set. This illustrates where the result and intermediate -// are the same type, but a transformation is still needed in Finalize() -// The input type is: double -// The intermediate type is bigint -// the return type is bigint -void XorInit(FunctionContext* context, BigIntVal* val) { - val->is_null = false; - val->val = 0; -} - -void XorUpdate(FunctionContext* context, const double* input, BigIntVal* val) { - // BigIntVal is the same ptr as what was passed to CountInit - if (input == nullptr) { - return; - } - - val->val |= *reinterpret_cast(input); -} - -void XorMerge(FunctionContext* context, const BigIntVal& src, BigIntVal* dst) { - dst->val |= src.val; -} - -BigIntVal XorFinalize(FunctionContext* context, const BigIntVal& val) { - int64_t set_bits = 0; - // Do popcnt on val - // set_bits = popcnt(val.val); - return BigIntVal(set_bits); -} - -//--------------------------- HLL(Distinct Estimate) --------------------------------- -// Example of implementing distinct estimate. As an example, we will compress the -// intermediate buffer. -// Note: this is not the actual algorithm but a sketch of how it would be implemented -// with the UDA interface. -// The input type is: bigint -// The intermediate type is string (fixed at 256 bytes) -// the return type is bigint -void DistinctEstimateInit(FunctionContext* context, StringVal* val) { - // Since this is known, this will be allocated to 256 bytes. - EXPECT_EQ(val->len, 256); - memset(val->ptr, 0, 256); -} - -void DistinctEstimatUpdate(FunctionContext* context, const int64_t* input, StringVal* val) { - if (input == nullptr) { - return; - } - - for (int i = 0; i < 256; ++i) { - int hash = 0; - // Hash(input) with the ith hash function - // hash = Hash(*input, i); - val->ptr[i] = hash; - } -} - -StringVal DistinctEstimatSerialize(FunctionContext* context, const StringVal& intermediate) { - int compressed_size = 0; - uint8_t* result = nullptr; // SnappyCompress(intermediate.ptr, intermediate.len); - return StringVal(result, compressed_size); -} - -void DistinctEstimateMerge(FunctionContext* context, const StringVal& src, StringVal* dst) { - uint8_t* src_uncompressed = nullptr; // SnappyUncompress(src.ptr, src.len); - - for (int i = 0; i < 256; ++i) { - dst->ptr[i] ^= src_uncompressed[i]; - } -} - -BigIntVal DistinctEstimateFinalize(FunctionContext* context, const StringVal& val) { - int64_t set_bits = 0; - // Do popcnt on val - // set_bits = popcnt(val.val); - return BigIntVal(set_bits); -} - -TEST(CountTest, Basic) { - UdaTestHarness test(CountInit, CountUpdate, CountMerge, nullptr, - CountFinalize); - std::vector no_nulls; - no_nulls.resize(1000); - - EXPECT_TRUE(test.execute(no_nulls, BigIntVal(no_nulls.size()))) << test; - EXPECT_FALSE(test.execute(no_nulls, BigIntVal(100))) << test; -} - -TEST(CountMultiArgTest, Basic) { - int num = 1000; - std::vector no_nulls; - no_nulls.resize(num); - - UdaTestHarness2 test2(CountInit, Count2Update, CountMerge, - nullptr, CountFinalize); - EXPECT_TRUE(test2.execute(no_nulls, no_nulls, BigIntVal(2 * num))); - EXPECT_FALSE(test2.execute(no_nulls, no_nulls, BigIntVal(100))); - - UdaTestHarness3 test3( - CountInit, Count3Update, CountMerge, nullptr, CountFinalize); - EXPECT_TRUE(test3.execute(no_nulls, no_nulls, no_nulls, BigIntVal(3 * num))); - - UdaTestHarness4 test4( - CountInit, Count4Update, CountMerge, nullptr, CountFinalize); - EXPECT_TRUE(test4.execute(no_nulls, no_nulls, no_nulls, no_nulls, BigIntVal(4 * num))); -} - -bool FuzzyCompare(const BigIntVal& r1, const BigIntVal& r2) { - if (r1.is_null && r2.is_null) { - return true; - } - - if (r1.is_null || r2.is_null) { - return false; - } - - return abs(r1.val - r2.val) <= 1; -} - -TEST(CountTest, FuzzyEquals) { - UdaTestHarness test(CountInit, CountUpdate, CountMerge, nullptr, - CountFinalize); - std::vector no_nulls; - no_nulls.resize(1000); - - EXPECT_TRUE(test.execute(no_nulls, BigIntVal(1000))) << test; - EXPECT_FALSE(test.execute(no_nulls, BigIntVal(999))) << test; - - test.set_result_comparator(FuzzyCompare); - EXPECT_TRUE(test.execute(no_nulls, BigIntVal(1000))) << test; - EXPECT_TRUE(test.execute(no_nulls, BigIntVal(999))) << test; - EXPECT_FALSE(test.execute(no_nulls, BigIntVal(998))) << test; -} - -TEST(MinTest, Basic) { - UdaTestHarness test(MinInit, MinUpdate, MinMerge, MinSerialize, - MinFinalize); - test.set_intermediate_size(sizeof(MinState)); - - std::vector values; - values.push_back(StringVal("BBB")); - EXPECT_TRUE(test.execute(values, StringVal("BBB"))) << test; - - values.push_back(StringVal("AA")); - EXPECT_TRUE(test.execute(values, StringVal("AA"))) << test; - - values.push_back(StringVal("CCC")); - EXPECT_TRUE(test.execute(values, StringVal("AA"))) << test; - - values.push_back(StringVal("ABCDEF")); - values.push_back(StringVal("AABCDEF")); - values.push_back(StringVal("A")); - EXPECT_TRUE(test.execute(values, StringVal("A"))) << test; - - values.clear(); - values.push_back(StringVal::null()); - EXPECT_TRUE(test.execute(values, StringVal::null())) << test; - - values.push_back(StringVal("ZZZ")); - EXPECT_TRUE(test.execute(values, StringVal("ZZZ"))) << test; -} -} // namespace doris_udf diff --git a/be/test/udf/udf_test.cpp b/be/test/udf/udf_test.cpp deleted file mode 100644 index bf5d7e1fbcbef1..00000000000000 --- a/be/test/udf/udf_test.cpp +++ /dev/null @@ -1,193 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include - -#include "common/logging.h" -#include "udf/udf_test_harness.hpp" - -namespace doris_udf { - -DoubleVal zero_udf(FunctionContext* context) { - return DoubleVal(0); -} - -StringVal log_udf(FunctionContext* context, const StringVal& arg1) { - std::cerr << (arg1.is_null ? "nullptr" : std::string((char*)arg1.ptr, arg1.len)) << std::endl; - return arg1; -} - -StringVal upper_udf(FunctionContext* context, const StringVal& input) { - if (input.is_null) { - return StringVal::null(); - } - - // Create a new StringVal object that's the same length as the input - StringVal result = StringVal(context, input.len); - - for (int i = 0; i < input.len; ++i) { - result.ptr[i] = toupper(input.ptr[i]); - } - - return result; -} - -FloatVal min3(FunctionContext* context, const FloatVal& f1, const FloatVal& f2, - const FloatVal& f3) { - bool is_null = true; - float v = 0.0; - - if (!f1.is_null) { - if (is_null) { - v = f1.val; - is_null = false; - } else { - v = std::min(v, f1.val); - } - } - - if (!f2.is_null) { - if (is_null) { - v = f2.val; - is_null = false; - } else { - v = std::min(v, f2.val); - } - } - - if (!f3.is_null) { - if (is_null) { - v = f3.val; - is_null = false; - } else { - v = std::min(v, f3.val); - } - } - - return is_null ? FloatVal::null() : FloatVal(v); -} - -StringVal concat(FunctionContext* context, int n, const StringVal* args) { - int size = 0; - bool all_null = true; - - for (int i = 0; i < n; ++i) { - if (args[i].is_null) { - continue; - } - - size += args[i].len; - all_null = false; - } - - if (all_null) { - return StringVal::null(); - } - - int offset = 0; - StringVal result(context, size); - - for (int i = 0; i < n; ++i) { - if (args[i].is_null) { - continue; - } - - memcpy(result.ptr + offset, args[i].ptr, args[i].len); - offset += args[i].len; - } - - return result; -} - -IntVal num_var_args(FunctionContext*, const BigIntVal& dummy, int n, const IntVal* args) { - return IntVal(n); -} - -IntVal validat_udf(FunctionContext* context) { - EXPECT_EQ(context->version(), FunctionContext::V2_0); - return IntVal::null(); -} - -IntVal validate_fail(FunctionContext* context) { - context->set_error("Fail"); - return IntVal::null(); -} - -IntVal validate_mem(FunctionContext* context) { - EXPECT_TRUE(context->allocate(0) == nullptr); - uint8_t* buffer = context->allocate(10); - EXPECT_TRUE(buffer != nullptr); - memset(buffer, 0, 10); - context->free(buffer); - return IntVal::null(); -} - -TEST(UdfTest, TestFunctionContext) { - EXPECT_TRUE(UdfTestHarness::validat_udf(validat_udf, IntVal::null())); - EXPECT_FALSE(UdfTestHarness::validat_udf(validate_fail, IntVal::null())); - EXPECT_TRUE(UdfTestHarness::validat_udf(validate_mem, IntVal::null())); -} - -TEST(UdfTest, TestValidate) { - EXPECT_TRUE(UdfTestHarness::validat_udf(zero_udf, DoubleVal(0))); - EXPECT_FALSE(UdfTestHarness::validat_udf(zero_udf, DoubleVal(10))); - - EXPECT_TRUE((UdfTestHarness::validat_udf(log_udf, StringVal("abcd"), - StringVal("abcd")))); - - EXPECT_TRUE((UdfTestHarness::validat_udf(upper_udf, StringVal("abcd"), - StringVal("ABCD")))); - - EXPECT_TRUE((UdfTestHarness::validat_udf( - min3, FloatVal(1), FloatVal(2), FloatVal(3), FloatVal(1)))); - EXPECT_TRUE((UdfTestHarness::validat_udf( - min3, FloatVal(1), FloatVal::null(), FloatVal(3), FloatVal(1)))); - EXPECT_TRUE((UdfTestHarness::validat_udf( - min3, FloatVal::null(), FloatVal::null(), FloatVal::null(), FloatVal::null()))); -} - -// TEST(UdfTest, TestTimestampVal) { -// boost::gregorian::date d(2003, 3, 15); -// TimestampVal t1(*(int32_t*)&d); -// EXPECT_TRUE((UdfTestHarness::validat_udf(time_to_string, t1, -// "2003-03-15 00:00:00"))); - -// TimestampVal t2(*(int32_t*)&d, 1000L * 1000L * 5000L); -// EXPECT_TRUE((UdfTestHarness::validat_udf(time_to_string, t2, -// "2003-03-15 00:00:05"))); -// } - -TEST(UdfTest, TestVarArgs) { - std::vector input; - input.push_back(StringVal("Hello")); - input.push_back(StringVal("World")); - - EXPECT_TRUE((UdfTestHarness::validat_udf(concat, input, - StringVal("HelloWorld")))); - - input.push_back(StringVal("More")); - EXPECT_TRUE((UdfTestHarness::validat_udf(concat, input, - StringVal("HelloWorldMore")))); - - std::vector args; - args.resize(10); - EXPECT_TRUE((UdfTestHarness::validat_udf( - num_var_args, BigIntVal(0), args, IntVal(args.size())))); -} -} // namespace doris_udf diff --git a/be/test/util/counts_test.cpp b/be/test/util/counts_test.cpp index 4f9d8d8235036d..d4d9e5895a2a4a 100644 --- a/be/test/util/counts_test.cpp +++ b/be/test/util/counts_test.cpp @@ -39,7 +39,7 @@ TEST_F(TCountsTest, TotalTest) { counts.increment(19, 1); counts.increment(7, 2); - doris_udf::DoubleVal result = counts.terminate(0.2); + doris::DoubleVal result = counts.terminate(0.2); EXPECT_EQ(1, result.val); uint8_t* writer = new uint8_t[counts.serialized_size()]; uint8_t* type_reader = writer; @@ -47,7 +47,7 @@ TEST_F(TCountsTest, TotalTest) { Counts other; other.unserialize(type_reader); - doris_udf::DoubleVal result1 = other.terminate(0.2); + doris::DoubleVal result1 = other.terminate(0.2); EXPECT_EQ(result.val, result1.val); Counts other1; diff --git a/be/test/vec/function/function_test_util.cpp b/be/test/vec/function/function_test_util.cpp index ae35124e66b5e7..e1551fe77fe376 100644 --- a/be/test/vec/function/function_test_util.cpp +++ b/be/test/vec/function/function_test_util.cpp @@ -50,7 +50,7 @@ uint64_t str_to_datetime_v2(std::string datetime_str, std::string datetime_forma size_t type_index_to_data_type(const std::vector& input_types, size_t index, ut_type::UTDataTypeDesc& ut_desc, DataTypePtr& type) { - doris_udf::FunctionContext::TypeDesc& desc = ut_desc.type_desc; + doris::TypeDescriptor& desc = ut_desc.type_desc; if (index >= input_types.size()) { return -1; } @@ -73,71 +73,71 @@ size_t type_index_to_data_type(const std::vector& input_types, size_t i switch (tp) { case TypeIndex::String: - desc.type = doris_udf::FunctionContext::TYPE_STRING; + desc.type = doris::PrimitiveType::TYPE_STRING; type = std::make_shared(); return 1; case TypeIndex::JSONB: - desc.type = doris_udf::FunctionContext::TYPE_JSONB; + desc.type = doris::PrimitiveType::TYPE_JSONB; type = std::make_shared(); return 1; case TypeIndex::BitMap: - desc.type = doris_udf::FunctionContext::TYPE_OBJECT; + desc.type = doris::PrimitiveType::TYPE_OBJECT; type = std::make_shared(); return 1; case TypeIndex::UInt8: - desc.type = doris_udf::FunctionContext::TYPE_BOOLEAN; + desc.type = doris::PrimitiveType::TYPE_BOOLEAN; type = std::make_shared(); return 1; case TypeIndex::Int8: - desc.type = doris_udf::FunctionContext::TYPE_TINYINT; + desc.type = doris::PrimitiveType::TYPE_TINYINT; type = std::make_shared(); return 1; case TypeIndex::Int16: - desc.type = doris_udf::FunctionContext::TYPE_SMALLINT; + desc.type = doris::PrimitiveType::TYPE_SMALLINT; type = std::make_shared(); return 1; case TypeIndex::Int32: - desc.type = doris_udf::FunctionContext::TYPE_INT; + desc.type = doris::PrimitiveType::TYPE_INT; type = std::make_shared(); return 1; case TypeIndex::Int64: - desc.type = doris_udf::FunctionContext::TYPE_BIGINT; + desc.type = doris::PrimitiveType::TYPE_BIGINT; type = std::make_shared(); return 1; case TypeIndex::Int128: - desc.type = doris_udf::FunctionContext::TYPE_LARGEINT; + desc.type = doris::PrimitiveType::TYPE_LARGEINT; type = std::make_shared(); return 1; case TypeIndex::Float32: - desc.type = doris_udf::FunctionContext::TYPE_FLOAT; + desc.type = doris::PrimitiveType::TYPE_FLOAT; type = std::make_shared(); return 1; case TypeIndex::Float64: - desc.type = doris_udf::FunctionContext::TYPE_DOUBLE; + desc.type = doris::PrimitiveType::TYPE_DOUBLE; type = std::make_shared(); return 1; case TypeIndex::Decimal128: - desc.type = doris_udf::FunctionContext::TYPE_DECIMALV2; + desc.type = doris::PrimitiveType::TYPE_DECIMALV2; type = std::make_shared>(); return 1; case TypeIndex::DateTime: - desc.type = doris_udf::FunctionContext::TYPE_DATETIME; + desc.type = doris::PrimitiveType::TYPE_DATETIME; type = std::make_shared(); return 1; case TypeIndex::Date: - desc.type = doris_udf::FunctionContext::TYPE_DATE; + desc.type = doris::PrimitiveType::TYPE_DATE; type = std::make_shared(); return 1; case TypeIndex::DateV2: - desc.type = doris_udf::FunctionContext::TYPE_DATEV2; + desc.type = doris::PrimitiveType::TYPE_DATEV2; type = std::make_shared(); return 1; case TypeIndex::DateTimeV2: - desc.type = doris_udf::FunctionContext::TYPE_DATETIMEV2; + desc.type = doris::PrimitiveType::TYPE_DATETIMEV2; type = std::make_shared(); return 1; case TypeIndex::Array: { - desc.type = doris_udf::FunctionContext::TYPE_ARRAY; + desc.type = doris::PrimitiveType::TYPE_ARRAY; ut_type::UTDataTypeDesc sub_desc; DataTypePtr sub_type = nullptr; ++index; diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 9cb7a2c5bdf02a..66178dd1303111 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -150,7 +150,7 @@ constexpr TypeIndex get_type_index() { struct UTDataTypeDesc { DataTypePtr data_type; - doris_udf::FunctionContext::TypeDesc type_desc; + doris::TypeDescriptor type_desc; std::string col_name; bool is_const = false; bool is_nullable = true; @@ -206,7 +206,7 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty // 1.2 prepare args for function call ColumnNumbers arguments; - std::vector arg_types; + std::vector arg_types; std::vector> constant_col_ptrs; std::vector> constant_cols; for (size_t i = 0; i < descs.size(); ++i) { @@ -229,22 +229,22 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty func_name, block.get_columns_with_type_and_name(), return_type); EXPECT_TRUE(func != nullptr); - doris_udf::FunctionContext::TypeDesc fn_ctx_return; + doris::TypeDescriptor fn_ctx_return; if constexpr (std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_BOOLEAN; + fn_ctx_return.type = doris::PrimitiveType::TYPE_BOOLEAN; } else if constexpr (std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_INT; + fn_ctx_return.type = doris::PrimitiveType::TYPE_INT; } else if constexpr (std::is_same_v || std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_DOUBLE; + fn_ctx_return.type = doris::PrimitiveType::TYPE_DOUBLE; } else if constexpr (std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_DATETIME; + fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIME; } else if (std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_DATEV2; + fn_ctx_return.type = doris::PrimitiveType::TYPE_DATEV2; } else if (std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_DATETIMEV2; + fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIMEV2; } else { - fn_ctx_return.type = doris_udf::FunctionContext::INVALID_TYPE; + fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE; } FunctionUtils fn_utils(fn_ctx_return, arg_types, 0); diff --git a/gensrc/proto/descriptors.proto b/gensrc/proto/descriptors.proto index 74234cb442f91d..9660a6707db81b 100644 --- a/gensrc/proto/descriptors.proto +++ b/gensrc/proto/descriptors.proto @@ -28,7 +28,7 @@ message PSlotDescriptor { required int32 parent = 2; // tuple id which this slot is belong to required PTypeDesc slot_type = 3; required int32 column_pos = 4; // in originating table - required int32 byte_offset = 5; // into tuple + required int32 byte_offset = 5; // into tuple, not used any more required int32 null_indicator_byte = 6; required int32 null_indicator_bit = 7; required string col_name = 8; From 5fb7f73c202d997167446a3b57e159a6b4006138 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Sun, 5 Mar 2023 09:07:45 +0800 Subject: [PATCH 2/3] f --- be/src/udf/udf.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index 339ab655da1aff..093048372cd1b4 100644 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -27,11 +27,8 @@ #include #include #include - -// This is the only Doris header required to develop UDFs and UDAs. This header -// contains the types that need to be used and the FunctionContext object. The context -// object serves as the interface object between the UDF/UDA and the doris process. namespace doris { + class FunctionContextImpl; struct ColumnPtrWrapper; struct StringRef; @@ -40,10 +37,6 @@ class DecimalV2Value; class DateTimeValue; class CollectionValue; struct TypeDescriptor; -} // namespace doris - -namespace doris { - // All input and output values will be one of the structs below. The struct is a simple // object containing a boolean to store if the value is nullptr and the value itself. The // value is unspecified if the nullptr boolean is set. From e63c37fb82ba020d750eed9ad27b691431566ef7 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Sun, 5 Mar 2023 16:33:10 +0800 Subject: [PATCH 3/3] f --- be/src/vec/exec/scan/new_es_scanner.cpp | 2 +- be/src/vec/exec/scan/new_olap_scanner.cpp | 2 +- be/src/vec/exec/scan/vfile_scanner.cpp | 2 +- be/src/vec/exec/scan/vmeta_scanner.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/be/src/vec/exec/scan/new_es_scanner.cpp b/be/src/vec/exec/scan/new_es_scanner.cpp index 22db2280bee74b..18233dde6c867c 100644 --- a/be/src/vec/exec/scan/new_es_scanner.cpp +++ b/be/src/vec/exec/scan/new_es_scanner.cpp @@ -42,7 +42,7 @@ NewEsScanner::NewEsScanner(RuntimeState* state, NewEsScanNode* parent, int64_t l Status NewEsScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { VLOG_CRITICAL << NEW_SCANNER_TYPE << "::prepare"; - RETURN_IF_ERROR(VScanner::prepare(_state, &_vconjunct_ctx)); + RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); if (_is_init) { return Status::OK(); diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index b5f5bd4863731e..80af93eff2bc35 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -52,7 +52,7 @@ Status NewOlapScanner::prepare(const TPaloScanRange& scan_range, const std::vector& filters, const FilterPredicates& filter_predicates, const std::vector& function_filters) { - RETURN_IF_ERROR(VScanner::prepare(_state, &_vconjunct_ctx)); + RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); // set limit to reduce end of rowset and segment mem use _tablet_reader = std::make_unique(); diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 936053d98b55e3..72522119f31502 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -60,7 +60,7 @@ VFileScanner::VFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t Status VFileScanner::prepare( VExprContext** vconjunct_ctx_ptr, std::unordered_map* colname_to_value_range) { - RETURN_IF_ERROR(VScanner::prepare(_state, &_vconjunct_ctx)); + RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); _colname_to_value_range = colname_to_value_range; _get_block_timer = ADD_TIMER(_parent->_scanner_profile, "FileScannerGetBlockTime"); diff --git a/be/src/vec/exec/scan/vmeta_scanner.cpp b/be/src/vec/exec/scan/vmeta_scanner.cpp index b08dd47622be71..86e60edf111220 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.cpp +++ b/be/src/vec/exec/scan/vmeta_scanner.cpp @@ -44,7 +44,7 @@ Status VMetaScanner::open(RuntimeState* state) { Status VMetaScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { VLOG_CRITICAL << "VMetaScanner::prepare"; - RETURN_IF_ERROR(VScanner::prepare(_state, &_vconjunct_ctx)); + RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); if (_scan_range.meta_scan_range.__isset.iceberg_params) { RETURN_IF_ERROR(_fetch_iceberg_metadata_batch());