diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 0ede1306a94452..90cebf58e1a7d7 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -390,6 +390,7 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { break; } case TYPE_VARCHAR: + case TYPE_OBJECT: case TYPE_HLL: { Slice* slice = reinterpret_cast(ptr); StringValue *slot = tuple->get_string_slot(slot_desc->tuple_offset()); diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index 24b4b5ee0b4fb6..410844c62f7b63 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -712,12 +712,6 @@ int OlapTableSink::_validate_data(RuntimeState* state, RowBatch* batch, Bitmap* case TYPE_VARCHAR: { // Fixed length string StringValue* str_val = (StringValue*)slot; - // todo(kks): varchar(0) means bitmap_union agg type - // we will remove this special handle when we add a special type for bitmap_union - if (desc->type().type == TYPE_VARCHAR && desc->type().len == 0) { - continue; - } - if (str_val->len > desc->type().len) { std::stringstream ss; ss << "the length of input is too long than schema. " diff --git a/be/src/exprs/agg_fn_evaluator.cpp b/be/src/exprs/agg_fn_evaluator.cpp index aee822eb29bea1..e0ef163931addd 100755 --- a/be/src/exprs/agg_fn_evaluator.cpp +++ b/be/src/exprs/agg_fn_evaluator.cpp @@ -329,7 +329,8 @@ inline void AggFnEvaluator::set_any_val( case TYPE_CHAR: case TYPE_VARCHAR: - case TYPE_HLL: + case TYPE_HLL: + case TYPE_OBJECT: reinterpret_cast(slot)->to_string_val( reinterpret_cast(dst)); return; @@ -404,6 +405,7 @@ inline void AggFnEvaluator::set_output_slot(const AnyVal* src, case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_OBJECT: *reinterpret_cast(slot) = StringValue::from_string_val(*reinterpret_cast(src)); return; @@ -598,7 +600,8 @@ bool AggFnEvaluator::count_distinct_data_filter(TupleRow* row, Tuple* dst) { case TYPE_CHAR: case TYPE_VARCHAR: - case TYPE_HLL: { + case TYPE_HLL: + case TYPE_OBJECT: { StringVal* value = reinterpret_cast(_staging_input_vals[i]); memcpy(begin, value->ptr, value->len); begin += value->len; @@ -940,7 +943,8 @@ void AggFnEvaluator::serialize_or_finalize(FunctionContext* agg_fn_ctx, Tuple* s case TYPE_CHAR: case TYPE_VARCHAR: - case TYPE_HLL :{ + case TYPE_HLL: + case TYPE_OBJECT: { typedef StringVal(*Fn)(FunctionContext*, AnyVal*); StringVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); diff --git a/be/src/exprs/anyval_util.cpp b/be/src/exprs/anyval_util.cpp index e8791e5a83fd2f..ebe6ebfda4a443 100755 --- a/be/src/exprs/anyval_util.cpp +++ b/be/src/exprs/anyval_util.cpp @@ -83,6 +83,7 @@ AnyVal* create_any_val(ObjectPool* pool, const TypeDescriptor& type) { case TYPE_CHAR: case TYPE_HLL: case TYPE_VARCHAR: + case TYPE_OBJECT: return pool->add(new StringVal); case TYPE_DECIMAL: @@ -143,7 +144,9 @@ FunctionContext::TypeDesc AnyValUtil::column_type_to_type_desc(const TypeDescrip case TYPE_HLL: out.type = FunctionContext::TYPE_HLL; out.len = type.len; - break; + break; + case TYPE_OBJECT: + out.type = FunctionContext::TYPE_OBJECT; case TYPE_CHAR: out.type = FunctionContext::TYPE_CHAR; out.len = type.len; diff --git a/be/src/exprs/anyval_util.h b/be/src/exprs/anyval_util.h index 3b18f602c5f4ab..61f8778a509475 100755 --- a/be/src/exprs/anyval_util.h +++ b/be/src/exprs/anyval_util.h @@ -184,44 +184,6 @@ class AnyValUtil { return HashUtil::murmur_hash64A(&v.val, 8, seed); } - static doris_udf::FunctionContext::Type primitive_type_to_type(const PrimitiveType& type) { - switch (type) { - case TYPE_NULL: - return doris_udf::FunctionContext::TYPE_NULL; - case TYPE_BOOLEAN: - return doris_udf::FunctionContext::TYPE_BOOLEAN; - case TYPE_TINYINT: - return doris_udf::FunctionContext::TYPE_TINYINT; - case TYPE_SMALLINT: - return doris_udf::FunctionContext::TYPE_SMALLINT; - case TYPE_INT: - return doris_udf::FunctionContext::TYPE_INT; - case TYPE_BIGINT: - return doris_udf::FunctionContext::TYPE_BIGINT; - case TYPE_LARGEINT: - return doris_udf::FunctionContext::TYPE_LARGEINT; - case TYPE_FLOAT: - return doris_udf::FunctionContext::TYPE_FLOAT; - case TYPE_DOUBLE: - return doris_udf::FunctionContext::TYPE_DOUBLE; - case TYPE_DATE: - return doris_udf::FunctionContext::TYPE_DATE; - case TYPE_DATETIME: - return doris_udf::FunctionContext::TYPE_DATETIME; - case TYPE_HLL: - case TYPE_CHAR: - case TYPE_VARCHAR: - return doris_udf::FunctionContext::TYPE_STRING; - case TYPE_DECIMAL: - return doris_udf::FunctionContext::TYPE_DECIMAL; - case TYPE_DECIMALV2: - return doris_udf::FunctionContext::TYPE_DECIMALV2; - break; - default: - DCHECK(false) << "Unknown type: " << type; - } - return doris_udf::FunctionContext::TYPE_NULL; - } // Returns the byte size of *Val for type t. static int any_val_size(const TypeDescriptor& t) { switch (t.type) { @@ -249,6 +211,7 @@ class AnyValUtil { case TYPE_DOUBLE: return sizeof(doris_udf::DoubleVal); + case TYPE_OBJECT: case TYPE_HLL: case TYPE_CHAR: case TYPE_VARCHAR: @@ -281,6 +244,7 @@ class AnyValUtil { case TYPE_LARGEINT: return alignof(LargeIntVal); case TYPE_FLOAT: return alignof(FloatVal); case TYPE_DOUBLE: return alignof(DoubleVal); + case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: case TYPE_CHAR: @@ -377,6 +341,7 @@ class AnyValUtil { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_OBJECT: reinterpret_cast(slot)->to_string_val( reinterpret_cast(dst)); return; diff --git a/be/src/exprs/case_expr.cpp b/be/src/exprs/case_expr.cpp index 70967d67254e32..178df684990cbd 100644 --- a/be/src/exprs/case_expr.cpp +++ b/be/src/exprs/case_expr.cpp @@ -327,6 +327,7 @@ void CaseExpr::get_child_val(int child_idx, ExprContext* ctx, TupleRow* row, Any case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_OBJECT: *reinterpret_cast(dst) = _children[child_idx]->get_string_val(ctx, row); break; case TYPE_DECIMAL: @@ -373,6 +374,7 @@ bool CaseExpr::any_val_eq(const TypeDescriptor& type, const AnyVal* v1, const An case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_OBJECT: return AnyValUtil::equals(type, *reinterpret_cast(v1), *reinterpret_cast(v2)); case TYPE_DECIMAL: diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index 35f93fedd2ec81..663a095a80260d 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -154,6 +154,7 @@ Expr::Expr(const TypeDescriptor& type) : case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_OBJECT: _node_type = (TExprNodeType::STRING_LITERAL); break; @@ -212,6 +213,7 @@ Expr::Expr(const TypeDescriptor& type, bool is_slotref) : case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_OBJECT: _node_type = (TExprNodeType::STRING_LITERAL); break; @@ -379,13 +381,6 @@ Status Expr::create_expr(ObjectPool* pool, const TExprNode& texpr_node, Expr** e *expr = pool->add(new ScalarFnCall(texpr_node)); } return Status::OK(); - //case TExprNodeType::AGG_EXPR: { - // if (!texpr_node.__isset.agg_expr) { - // return Status::InternalError("Aggregation expression not set in thrift node"); - // } - // *expr = pool->add(new AggregateExpr(texpr_node)); - // return Status::OK(); - //} case TExprNodeType::CASE_EXPR: { if (!texpr_node.__isset.case_expr) { @@ -752,7 +747,8 @@ doris_udf::AnyVal* Expr::get_const_val(ExprContext* context) { } case TYPE_CHAR: case TYPE_VARCHAR: - case TYPE_HLL: { + case TYPE_HLL: + case TYPE_OBJECT: { _constant_val.reset(new StringVal(get_string_val(context, NULL))); break; } diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index 2c5118322befd1..6c21b89025cfcc 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -335,10 +335,11 @@ void* ExprContext::get_value(Expr* e, TupleRow* row) { } case TYPE_CHAR: case TYPE_VARCHAR: - case TYPE_HLL: { + case TYPE_HLL: + case TYPE_OBJECT: { doris_udf::StringVal v = e->get_string_val(this, row); if (v.is_null) { - return NULL; + return nullptr; } _result.string_val.ptr = reinterpret_cast(v.ptr); _result.string_val.len = v.len; diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc index b5f605dd0f2842..293d3cc23945ee 100644 --- a/be/src/exprs/new_agg_fn_evaluator.cc +++ b/be/src/exprs/new_agg_fn_evaluator.cc @@ -268,6 +268,7 @@ void NewAggFnEvaluator::SetDstSlot(const AnyVal* src, const SlotDescriptor& dst_ case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_OBJECT: *reinterpret_cast(slot) = StringValue::from_string_val(*reinterpret_cast(src)); return; @@ -369,7 +370,8 @@ inline void NewAggFnEvaluator::set_any_val( case TYPE_CHAR: case TYPE_VARCHAR: - case TYPE_HLL: + case TYPE_HLL: + case TYPE_OBJECT: reinterpret_cast(slot)->to_string_val( reinterpret_cast(dst)); return; @@ -642,7 +644,8 @@ void NewAggFnEvaluator::SerializeOrFinalize(Tuple* src, } case TYPE_CHAR: case TYPE_VARCHAR: - case TYPE_HLL:{ + case TYPE_HLL: + case TYPE_OBJECT: { typedef StringVal(*Fn)(FunctionContext*, AnyVal*); StringVal v = reinterpret_cast(fn)( agg_fn_ctx_.get(), staging_intermediate_val_); diff --git a/be/src/olap/aggregate_func.cpp b/be/src/olap/aggregate_func.cpp index 0b019342f24eac..569c75d6e3975d 100644 --- a/be/src/olap/aggregate_func.cpp +++ b/be/src/olap/aggregate_func.cpp @@ -131,7 +131,8 @@ AggregateFuncResolver::AggregateFuncResolver() { add_aggregate_mapping(); // Bitmap Aggregate Function - add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); //for backward compatibility } AggregateFuncResolver::~AggregateFuncResolver() { diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 35405817501a0b..01fd685c0e7740 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -446,7 +446,7 @@ struct AggregateFuncTraits -struct AggregateFuncTraits { +struct AggregateFuncTraits { static void init(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, ObjectPool* agg_pool) { DCHECK_EQ(src_null, false); dst->set_not_null(); @@ -456,6 +456,7 @@ struct AggregateFuncTraitssize = 0; auto* bitmap = new RoaringBitmap(src_slice->data); + dst_slice->data = (char*) bitmap; mem_pool->mem_tracker()->consume(sizeof(RoaringBitmap)); @@ -491,6 +492,13 @@ struct AggregateFuncTraits +struct AggregateFuncTraits : + public AggregateFuncTraits {}; + + template struct AggregateTraits : public AggregateFuncTraits { static const FieldAggregationMethod agg_method = aggMethod; diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 34203f28a8f295..85ffcf0c7f46e3 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -139,7 +139,8 @@ enum FieldType { OLAP_FIELD_TYPE_UNKNOWN = 21, // UNKNOW Type OLAP_FIELD_TYPE_NONE = 22, OLAP_FIELD_TYPE_HLL = 23, - OLAP_FIELD_TYPE_BOOL = 24 + OLAP_FIELD_TYPE_BOOL = 24, + OLAP_FIELD_TYPE_OBJECT = 25 }; // 定义Field支持的所有聚集方法 diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp index 2fa3143c533fec..aa00e599c6027e 100644 --- a/be/src/olap/row_block.cpp +++ b/be/src/olap/row_block.cpp @@ -92,7 +92,7 @@ void RowBlock::_compute_layout() { // All field has a nullbyte in memory if (column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_HLL - || column.type() == OLAP_FIELD_TYPE_CHAR) { + || column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_OBJECT) { // 变长部分额外计算下实际最大的字符串长度(此处length已经包括记录Length的2个字节) memory_size += sizeof(Slice) + sizeof(char); } else { diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp index 96bb78ea7a92db..9fbf5972a3c292 100644 --- a/be/src/olap/rowset/segment_v2/encoding_info.cpp +++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp @@ -18,9 +18,11 @@ #include "olap/rowset/segment_v2/encoding_info.h" #include "olap/olap_common.h" +#include "olap/rowset/segment_v2/binary_dict_page.h" +#include "olap/rowset/segment_v2/binary_plain_page.h" #include "olap/rowset/segment_v2/bitshuffle_page.h" +#include "olap/rowset/segment_v2/plain_page.h" #include "olap/rowset/segment_v2/rle_page.h" -#include "olap/rowset/segment_v2/binary_dict_page.h" #include "gutil/strings/substitute.h" namespace doris { @@ -38,9 +40,23 @@ struct TypeEncodingTraits { }; template struct TypeEncodingTraits { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { + *builder = new PlainPageBuilder(opts); + return Status::OK(); + } + static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { + *decoder = new PlainPageDecoder(data, opts); + return Status::OK(); + } +}; + +template +struct TypeEncodingTraits { + static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { + *builder = new BinaryPlainPageBuilder(opts); return Status::OK(); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { + *decoder = new BinaryPlainPageDecoder(data, opts); return Status::OK(); } }; @@ -149,6 +165,7 @@ EncodingInfoResolver::EncodingInfoResolver() { _add_map(); _add_map(); _add_map(); + _add_map(); } EncodingInfoResolver::~EncodingInfoResolver() { diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 60ae7a95f545e5..acd726e7174374 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -70,6 +70,8 @@ FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) { type = OLAP_FIELD_TYPE_LIST; } else if (0 == upper_type_str.compare("MAP")) { type = OLAP_FIELD_TYPE_MAP; + } else if (0 == upper_type_str.compare("OBJECT")) { + type = OLAP_FIELD_TYPE_OBJECT; } else { LOG(WARNING) << "invalid type string. [type='" << type_str << "']"; type = OLAP_FIELD_TYPE_UNKNOWN; @@ -160,6 +162,7 @@ std::string TabletColumn::get_string_by_field_type(FieldType type) { case OLAP_FIELD_TYPE_BOOL: return "BOOLEAN"; + case OLAP_FIELD_TYPE_HLL: return "HLL"; @@ -172,6 +175,9 @@ std::string TabletColumn::get_string_by_field_type(FieldType type) { case OLAP_FIELD_TYPE_MAP: return "MAP"; + case OLAP_FIELD_TYPE_OBJECT: + return "OBJECT"; + default: return "UNKNOWN"; } @@ -226,6 +232,8 @@ uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint3 return 4; case TPrimitiveType::DOUBLE: return 8; + case TPrimitiveType::OBJECT: + return 16; case TPrimitiveType::CHAR: return string_length; case TPrimitiveType::VARCHAR: diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index 9ff8afcc62d086..fa4e4341fd02e4 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -76,6 +76,7 @@ TypeInfoResolver::TypeInfoResolver() { add_mapping(); add_mapping(); add_mapping(); + add_mapping(); } TypeInfoResolver::~TypeInfoResolver() {} diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 3bb0aa22eabead..9a6405b4975f69 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -156,6 +156,9 @@ template<> struct CppTypeTraits { template<> struct CppTypeTraits { using CppType = Slice; }; +template<> struct CppTypeTraits { + using CppType = Slice; +}; template struct BaseFieldtypeTraits : public CppTypeTraits { @@ -579,6 +582,15 @@ struct FieldTypeTraits : public FieldTypeTraits +struct FieldTypeTraits : public FieldTypeTraits { + /* + * Object type only used as value, so + * cmp/from_string/set_to_max/set_to_min function + * in this struct has no significance + */ +}; + // Instantiate this template to get static access to the type traits. template struct TypeTraits : public FieldTypeTraits { diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 8971ef11c19aea..e6ac071e186091 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -89,6 +89,9 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) { case TPrimitiveType::HLL: return TYPE_HLL; + case TPrimitiveType::OBJECT: + return TYPE_OBJECT; + default: return INVALID_TYPE; } @@ -152,6 +155,9 @@ TPrimitiveType::type to_thrift(PrimitiveType ptype) { case TYPE_HLL: return TPrimitiveType::HLL; + + case TYPE_OBJECT: + return TPrimitiveType::OBJECT; default: return TPrimitiveType::INVALID_TYPE; @@ -213,8 +219,13 @@ std::string type_to_string(PrimitiveType t) { case TYPE_CHAR: return "CHAR"; + case TYPE_HLL: return "HLL"; + + case TYPE_OBJECT: + return "OBJECT"; + default: return ""; }; @@ -279,6 +290,9 @@ std::string type_to_odbc_string(PrimitiveType t) { case TYPE_HLL: return "hll"; + + case TYPE_OBJECT: + return "object"; }; return "unknown"; diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index b5fbfbbd1ac369..f40c85a58415c8 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -56,6 +56,7 @@ enum PrimitiveType { TYPE_DECIMALV2, /* 20 */ TYPE_TIME, /* 21 */ + TYPE_OBJECT, }; inline bool is_enumeration_type(PrimitiveType type) { @@ -99,6 +100,7 @@ inline bool is_enumeration_type(PrimitiveType type) { // Returns the byte size of 'type' Returns 0 for variable length types. inline int get_byte_size(PrimitiveType type) { switch (type) { + case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: return 0; @@ -139,6 +141,7 @@ inline int get_byte_size(PrimitiveType type) { inline int get_real_byte_size(PrimitiveType type) { switch (type) { + case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: return 0; @@ -181,6 +184,7 @@ inline int get_real_byte_size(PrimitiveType type) { // Returns the byte size of type when in a tuple inline int get_slot_size(PrimitiveType type) { switch (type) { + case TYPE_OBJECT: case TYPE_HLL: case TYPE_CHAR: case TYPE_VARCHAR: @@ -225,9 +229,19 @@ inline int get_slot_size(PrimitiveType type) { } inline bool is_type_compatible(PrimitiveType lhs, PrimitiveType rhs) { - if (lhs == TYPE_CHAR || lhs == TYPE_VARCHAR || lhs == TYPE_HLL) { + if (lhs == TYPE_VARCHAR) { + return rhs == TYPE_CHAR || rhs == TYPE_VARCHAR + || rhs == TYPE_HLL || rhs == TYPE_OBJECT; + } + + if (lhs == TYPE_OBJECT) { + return rhs == TYPE_VARCHAR || rhs == TYPE_OBJECT; + } + + if (lhs == TYPE_CHAR || lhs == TYPE_HLL) { return rhs == TYPE_CHAR || rhs == TYPE_VARCHAR || rhs == TYPE_HLL; } + return lhs == rhs; } diff --git a/be/src/runtime/raw_value.cpp b/be/src/runtime/raw_value.cpp index 40a65e42bc205b..3f2c36d3d0236b 100644 --- a/be/src/runtime/raw_value.cpp +++ b/be/src/runtime/raw_value.cpp @@ -204,6 +204,7 @@ void RawValue::print_value(const void* value, const TypeDescriptor& type, int sc case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_OBJECT: case TYPE_HLL: { string_val = reinterpret_cast(value); std::stringstream ss; @@ -285,6 +286,7 @@ void RawValue::write(const void* value, void* dst, const TypeDescriptor& type, M *reinterpret_cast(dst) = *reinterpret_cast(value); break; + case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: case TYPE_CHAR: { diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index 95ff9cf616a570..bb2335e121bded 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -175,7 +175,7 @@ struct TypeDescriptor { void to_protobuf(PTypeDesc* ptype) const; inline bool is_string_type() const { - return type == TYPE_VARCHAR || type == TYPE_CHAR || type == TYPE_HLL; + return type == TYPE_VARCHAR || type == TYPE_CHAR || type == TYPE_HLL || type == TYPE_OBJECT; } inline bool is_date_type() const { @@ -187,7 +187,7 @@ struct TypeDescriptor { } inline bool is_var_len_string_type() const { - return type == TYPE_VARCHAR || type == TYPE_HLL || type == TYPE_CHAR; + return type == TYPE_VARCHAR || type == TYPE_HLL || type == TYPE_CHAR || type == TYPE_OBJECT; } inline bool is_complex_type() const { @@ -205,6 +205,7 @@ struct TypeDescriptor { case TYPE_MAP: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_OBJECT: return 0; case TYPE_NULL: @@ -245,6 +246,7 @@ struct TypeDescriptor { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_OBJECT: return sizeof(StringValue); case TYPE_NULL: diff --git a/be/src/runtime/vectorized_row_batch.cpp b/be/src/runtime/vectorized_row_batch.cpp index 91543119b499c9..ae371e75e910ac 100644 --- a/be/src/runtime/vectorized_row_batch.cpp +++ b/be/src/runtime/vectorized_row_batch.cpp @@ -56,7 +56,8 @@ void VectorizedRowBatch::dump_to_row_block(RowBlock* row_block) { size_t field_size = 0; if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || - column.type() == OLAP_FIELD_TYPE_HLL) { + column.type() == OLAP_FIELD_TYPE_HLL || + column.type() == OLAP_FIELD_TYPE_OBJECT) { field_size = sizeof(Slice); } else { field_size = column.length(); @@ -102,7 +103,8 @@ void VectorizedRowBatch::dump_to_row_block(RowBlock* row_block) { size_t field_size = 0; if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || - column.type() == OLAP_FIELD_TYPE_HLL) { + column.type() == OLAP_FIELD_TYPE_HLL || + column.type() == OLAP_FIELD_TYPE_OBJECT) { field_size = sizeof(Slice); } else { field_size = column.length(); diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index 4cf5d9bbf5eaf1..80656271d14aef 100755 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -73,7 +73,8 @@ class FunctionContext { TYPE_HLL, TYPE_STRING, TYPE_FIXED_BUFFER, - TYPE_DECIMALV2 + TYPE_DECIMALV2, + TYPE_OBJECT }; struct TypeDesc { diff --git a/be/src/util/symbols_util.cpp b/be/src/util/symbols_util.cpp index b74fa908e15266..eb623a760500ea 100644 --- a/be/src/util/symbols_util.cpp +++ b/be/src/util/symbols_util.cpp @@ -152,6 +152,7 @@ static void append_any_val_type( case TYPE_VARCHAR: case TYPE_CHAR: case TYPE_HLL: + case TYPE_OBJECT: append_mangled_token("StringVal", s); break; case TYPE_DATE: diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md index 54adce61497425..482ac683a7150a 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md @@ -49,7 +49,7 @@ mysql> select bitmap_union_int (id2) from bitmap_udaf; CREATE TABLE `bitmap_test` ( `id` int(11) NULL COMMENT "", - `id2` varchar(0) bitmap_union NULL // 注意: bitmap_union的varchar长度需要指定为0 + `id2` bitmap bitmap_union NULL ) ENGINE=OLAP AGGREGATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 10; diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md index 63eeb65d311b48..7a9e9649390a32 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md @@ -50,9 +50,13 @@ HLL (1~16385个字节) hll列类型,不需要指定长度和默认值、长度根据数据的聚合 程度系统内控制,并且HLL列只能通过配套的hll_union_agg、Hll_cardinality、hll_hash进行查询或使用 + BITMAP + bitmap 列类型,不需要指定长度和默认值 + BITMAP 列只能通过配套的 BITMAP_UNION、BITMAP_COUNT、TO_BITMAP 进行查询或使用 agg_type:聚合类型,如果不指定,则该列为 key 列。否则,该列为 value 列 - SUM、MAX、MIN、REPLACE、HLL_UNION(仅用于HLL列,为HLL独有的聚合方式)、BITMAP_UNION(列类型需要定义为VARCHAR(20)) + SUM、MAX、MIN、REPLACE、HLL_UNION(仅用于HLL列,为HLL独有的聚合方式)、 + BITMAP_UNION(仅用于 BITMAP 列,为 BITMAP 独有的聚合方式) 该类型只对聚合模型(key_desc的type为AGGREGATE KEY)有用,其它模型不需要指定这个。 是否允许为NULL: 默认不允许为 NULL。NULL 值在导入数据中用 \N 来表示 @@ -339,8 +343,8 @@ ( k1 TINYINT, k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 VARCHAR(0) BITMAP_UNION, // 注意: bitmap_union的varchar长度需要指定为0 - v2 VARCHAR(0) BITMAP_UNION + v1 BITMAP BITMAP_UNION, + v2 BITMAP BITMAP_UNION ) ENGINE=olap AGGREGATE KEY(k1, k2) diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md index a167f0d79e3c39..99dd2ccc708850 100644 --- a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md @@ -45,7 +45,7 @@ mysql> select bitmap_union_int (id2) from bitmap_udaf; CREATE TABLE `bitmap_test` ( `id` int(11) NULL COMMENT "", - `id2` varchar(0) bitmap_union NULL // NOTICE: bitmap_union's varchar length must be 0. + `id2` bitmap bitmap_union NULL ) ENGINE=OLAP AGGREGATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 10; diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE TABLE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE TABLE_EN.md index 150216421caf0b..6bfda4c7ea8fe8 100644 --- a/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE TABLE_EN.md +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE TABLE_EN.md @@ -53,11 +53,14 @@ VARCHAR[(length)] Variable length string. Range: 1 ~ 65533 HLL (1~16385 Bytes) - HLLL tpye, No need to specify length. + HLL tpye, No need to specify length. This type can only be queried by hll_union_agg, hll_cardinality, hll_hash functions. + BITMAP + BITMAP type, No need to specify length. + This type can only be queried by BITMAP_UNION、BITMAP_COUNT、TO_BITMAP functions. agg_type: Aggregation type. If not specified, the column is key column. Otherwise, the column is value column. - SUM、MAX、MIN、REPLACE、HLL_UNION(Only for HLL type), BITMAP_UNION(Type should be VARCHAR(0)) + SUM、MAX、MIN、REPLACE、HLL_UNION(Only for HLL type), BITMAP_UNION(Only for BITMAP type) Allow NULL: Default is NOT NULL. NULL value should be represented as `\N` in load source file. @@ -388,8 +391,8 @@ ( k1 TINYINT, k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 VARCHAR(0) BITMAP_UNION, // 注意: bitmap_union的varchar长度需要指定为0 - v2 VARCHAR(0) BITMAP_UNION + v1 BITMAP BITMAP_UNION, + v2 BITMAP BITMAP_UNION ) ENGINE=olap AGGREGATE KEY(k1, k2) diff --git a/fe/src/main/cup/sql_parser.cup b/fe/src/main/cup/sql_parser.cup index 4ac0e37a208676..7e34b704bc347a 100644 --- a/fe/src/main/cup/sql_parser.cup +++ b/fe/src/main/cup/sql_parser.cup @@ -191,7 +191,7 @@ parser code {: // Total keywords of doris terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_AND, KW_ANTI, KW_AS, KW_ASC, KW_AUTHORS, - KW_BACKEND, KW_BACKUP, KW_BETWEEN, KW_BEGIN, KW_BIGINT, KW_BITMAP_UNION, KW_BOOLEAN, KW_BOTH, KW_BROKER, KW_BACKENDS, KW_BY, + KW_BACKEND, KW_BACKUP, KW_BETWEEN, KW_BEGIN, KW_BIGINT, KW_BITMAP, KW_BITMAP_UNION, KW_BOOLEAN, KW_BOTH, KW_BROKER, KW_BACKENDS, KW_BY, KW_CANCEL, KW_CASE, KW_CAST, KW_CHAIN, KW_CHAR, KW_CHARSET, KW_CLUSTER, KW_CLUSTERS, KW_COLLATE, KW_COLLATION, KW_COLUMN, KW_COLUMNS, KW_COMMENT, KW_COMMIT, KW_COMMITTED, KW_CONFIG, KW_CONNECTION, KW_CONNECTION_ID, KW_CONSISTENT, KW_COUNT, KW_CREATE, KW_CROSS, KW_CURRENT, KW_CURRENT_USER, @@ -3322,6 +3322,8 @@ type ::= {: RESULT = Type.DATETIME; :} | KW_TIME {: RESULT = Type.TIME; :} + | KW_BITMAP + {: RESULT = Type.BITMAP; :} | KW_STRING {: RESULT = ScalarType.createVarcharType(-1); :} | KW_VARCHAR LPAREN INTEGER_LITERAL:len RPAREN @@ -3996,6 +3998,8 @@ keyword ::= {: RESULT = id; :} | KW_BEGIN:id {: RESULT = id; :} + | KW_BITMAP:id + {: RESULT = id; :} | KW_BITMAP_UNION:id {: RESULT = id; :} | KW_BOOLEAN:id diff --git a/fe/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index 9e691d94049695..483008b983aa7c 100644 --- a/fe/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -273,6 +273,11 @@ public void analyze(Analyzer analyzer) throws AnalysisException, UserException { hasHll = true; } + if (columnDef.getType().isBitmapType()) { + if (columnDef.isKey()) { + throw new AnalysisException("BITMAP can't be used as keys, "); + } + } if (columnDef.getAggregateType() == BITMAP_UNION) { if (columnDef.isKey()) { diff --git a/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index d1520127cae366..f624d5434858bd 100644 --- a/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.AggregateFunction; import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Function; import org.apache.doris.catalog.FunctionSet; @@ -383,6 +384,35 @@ private void analyzeBuiltinAggFunction(Analyzer analyzer) throws AnalysisExcepti throw new AnalysisException("BITMAP_UNION_INT params only support TINYINT or SMALLINT or INT"); } + if ((fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_COUNT))) { + if (children.size() != 1) { + throw new AnalysisException("BITMAP_COUNT function could only have one child"); + } + + if (getChild(0) instanceof SlotRef) { + SlotRef slotRef = (SlotRef) getChild(0); + Column column = slotRef.getDesc().getColumn(); + if (column != null && column.getAggregationType() != AggregateType.BITMAP_UNION) { + throw new AnalysisException("BITMAP_COUNT function require the column is BITMAP_UNION aggregate type"); + } else if (slotRef.getDesc().getSourceExprs().size() == 1) { + Expr sourceExpr = slotRef.getDesc().getSourceExprs().get(0); + if (sourceExpr instanceof FunctionCallExpr) { + FunctionCallExpr functionExpr = (FunctionCallExpr) sourceExpr; + if (!functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION)) { + throw new AnalysisException("BITMAP_COUNT function only support BITMAP_UNION function as it's child"); + } + } + } + } else if (getChild(0) instanceof FunctionCallExpr) { + FunctionCallExpr functionCallExpr = (FunctionCallExpr) getChild(0); + if (!functionCallExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION)) { + throw new AnalysisException("BITMAP_COUNT function only support BITMAP_UNION function as it's child"); + } + } else { + throw new AnalysisException("BITMAP_COUNT only support BITMAP_UNION(column) or BITMAP_COUNT(BITMAP_UNION(column))"); + } + } + if ((fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION))) { if (children.size() != 1) { throw new AnalysisException("BITMAP_UNION function could only have one child"); diff --git a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java index d5544495b5abdd..a6d91cf188e8e9 100644 --- a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java @@ -409,6 +409,9 @@ private void analyzeSubquery(Analyzer analyzer) throws UserException { if (col.getType().isHllType() && !mentionedColumns.contains(col.getName())) { throw new AnalysisException (" hll column " + col.getName() + " mush in insert into columns"); } + if (col.getType().isBitmapType() && !mentionedColumns.contains(col.getName())) { + throw new AnalysisException (" object column " + col.getName() + " mush in insert into columns"); + } } } diff --git a/fe/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/src/main/java/org/apache/doris/analysis/SelectStmt.java index ec8f6f81d10e21..cf5f4eb5d03816 100644 --- a/fe/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -852,6 +852,12 @@ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException { "GROUP BY expression must not contain hll column: " + groupingExprsCopy.get(i).toSql()); } + + if (groupingExprsCopy.get(i).type.isBitmapType()) { + throw new AnalysisException( + "GROUP BY expression must not contain bitmap column: " + + groupingExprsCopy.get(i).toSql()); + } } } @@ -947,7 +953,12 @@ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException { if (sortInfo.getOrderingExprs().get(i).type.isHllType()) { throw new AnalysisException( - "ORDER BY expression not contain hll column."); + "ORDER BY expression could not contain hll column."); + } + + if (sortInfo.getOrderingExprs().get(i).type.isBitmapType()) { + throw new AnalysisException( + "ORDER BY expression could not contain bitmap column."); } } } diff --git a/fe/src/main/java/org/apache/doris/analysis/SlotRef.java b/fe/src/main/java/org/apache/doris/analysis/SlotRef.java index 26b590815b47df..7ad53951079c38 100644 --- a/fe/src/main/java/org/apache/doris/analysis/SlotRef.java +++ b/fe/src/main/java/org/apache/doris/analysis/SlotRef.java @@ -87,10 +87,6 @@ public Expr clone() { return new SlotRef(this); } - public boolean isHllType() { - return this.type == Type.HLL; - } - public SlotDescriptor getDesc() { Preconditions.checkState(isAnalyzed); Preconditions.checkNotNull(desc); diff --git a/fe/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/src/main/java/org/apache/doris/analysis/TypeDef.java index 15b1731e7d6841..9c58bc34602d4c 100644 --- a/fe/src/main/java/org/apache/doris/analysis/TypeDef.java +++ b/fe/src/main/java/org/apache/doris/analysis/TypeDef.java @@ -93,9 +93,7 @@ private void analyzeScalarType(ScalarType scalarType) int len = scalarType.getLength(); // len is decided by child, when it is -1. - // todo(kks) : varchar(0) for bitmap_union agg type, - // we should forbid the len equal zero when we add a special type for bitmap_union - if (len < 0) { + if (len <= 0) { throw new AnalysisException(name + " size must be > 0: " + len); } if (scalarType.getLength() > maxLen) { diff --git a/fe/src/main/java/org/apache/doris/catalog/AggregateType.java b/fe/src/main/java/org/apache/doris/catalog/AggregateType.java index ebd67064a26eeb..020ec92090783d 100644 --- a/fe/src/main/java/org/apache/doris/catalog/AggregateType.java +++ b/fe/src/main/java/org/apache/doris/catalog/AggregateType.java @@ -91,7 +91,7 @@ public enum AggregateType { compatibilityMap.put(HLL_UNION, EnumSet.copyOf(primitiveTypeList)); primitiveTypeList.clear(); - primitiveTypeList.add(PrimitiveType.VARCHAR); + primitiveTypeList.add(PrimitiveType.BITMAP); compatibilityMap.put(BITMAP_UNION, EnumSet.copyOf(primitiveTypeList)); compatibilityMap.put(NONE, EnumSet.allOf(PrimitiveType.class)); diff --git a/fe/src/main/java/org/apache/doris/catalog/Function.java b/fe/src/main/java/org/apache/doris/catalog/Function.java index b356ee6e2e6045..bab63be3cb9723 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Function.java +++ b/fe/src/main/java/org/apache/doris/catalog/Function.java @@ -454,6 +454,7 @@ public static String getUdfTypeName(PrimitiveType t) { case VARCHAR: case CHAR: case HLL: + case BITMAP: return "string_val"; case DATE: case DATETIME: @@ -492,6 +493,7 @@ public static String getUdfType(PrimitiveType t) { case VARCHAR: case CHAR: case HLL: + case BITMAP: return "StringVal"; case DATE: case DATETIME: diff --git a/fe/src/main/java/org/apache/doris/catalog/PrimitiveType.java b/fe/src/main/java/org/apache/doris/catalog/PrimitiveType.java index a3031e5802c387..3520803746304c 100644 --- a/fe/src/main/java/org/apache/doris/catalog/PrimitiveType.java +++ b/fe/src/main/java/org/apache/doris/catalog/PrimitiveType.java @@ -54,9 +54,12 @@ public enum PrimitiveType { HLL("HLL", 16, TPrimitiveType.HLL), TIME("TIME", 8, TPrimitiveType.TIME), + // we use OBJECT type represent BITMAP type in Backend + BITMAP("BITMAP", 16, TPrimitiveType.OBJECT), // Unsupported scalar types. BINARY("BINARY", -1, TPrimitiveType.BINARY); + private static final int DATE_INDEX_LEN = 3; private static final int DATETIME_INDEX_LEN = 8; private static final int VARCHAR_INDEX_LEN = 20; @@ -239,6 +242,7 @@ public enum PrimitiveType { builder.put(VARCHAR, DECIMALV2); builder.put(VARCHAR, VARCHAR); builder.put(VARCHAR, HLL); + builder.put(VARCHAR, BITMAP); // Decimal builder.put(DECIMAL, BOOLEAN); builder.put(DECIMAL, TINYINT); @@ -268,6 +272,10 @@ public enum PrimitiveType { builder.put(HLL, HLL); builder.put(HLL, VARCHAR); + // BITMAP + builder.put(BITMAP, BITMAP); + builder.put(BITMAP, VARCHAR); + //TIME builder.put(TIME, TIME); builder.put(TIME, DOUBLE); @@ -316,6 +324,7 @@ public enum PrimitiveType { supportedTypes.add(TIME); supportedTypes.add(DECIMAL); supportedTypes.add(DECIMALV2); + supportedTypes.add(BITMAP); } public static ArrayList getIntegerTypes() { @@ -503,6 +512,8 @@ public static boolean isImplicitCast(PrimitiveType type, PrimitiveType target) { compatibilityMatrix[HLL.ordinal()][HLL.ordinal()] = HLL; compatibilityMatrix[HLL.ordinal()][TIME.ordinal()] = INVALID_TYPE; + compatibilityMatrix[BITMAP.ordinal()][BITMAP.ordinal()] = BITMAP; + compatibilityMatrix[TIME.ordinal()][TIME.ordinal()] = TIME; } @@ -562,6 +573,8 @@ public static PrimitiveType fromThrift(TPrimitiveType tPrimitiveType) { return CHAR; case HLL: return HLL; + case OBJECT: + return BITMAP; default: return INVALID_TYPE; } @@ -637,90 +650,6 @@ public boolean isDecimalV2Type() { return this == DECIMALV2; } - public PrimitiveType getNumResultType() { - switch (this) { - case BOOLEAN: - case TINYINT: - case SMALLINT: - case INT: - case BIGINT: - return BIGINT; - case LARGEINT: - return LARGEINT; - case FLOAT: - case DOUBLE: - case DATE: - case DATETIME: - case CHAR: - case VARCHAR: - return DOUBLE; - case DECIMAL: - return DECIMAL; - case DECIMALV2: - return DECIMALV2; - case HLL: - return HLL; - default: - return INVALID_TYPE; - - } - } - - public PrimitiveType getResultType() { - switch (this) { - case BOOLEAN: - case TINYINT: - case SMALLINT: - case INT: - case BIGINT: - return BIGINT; - case LARGEINT: - return LARGEINT; - case FLOAT: - case DOUBLE: - return DOUBLE; - case DATE: - case DATETIME: - case CHAR: - case VARCHAR: - case TIME: - return VARCHAR; - case DECIMAL: - return DECIMAL; - case DECIMALV2: - return DECIMALV2; - case HLL: - return HLL; - default: - return INVALID_TYPE; - - } - } - - public PrimitiveType getMaxResolutionType() { - if (this == BOOLEAN) { - return BOOLEAN; - } else if (this == LARGEINT) { - // if (this == LARGEINT) { - return LARGEINT; - } else if (isFixedPointType()) { - return BIGINT; - } else if (isDecimalType()) { - return DECIMAL; - } else if (isDecimalV2Type()) { - return DECIMALV2; - } else if (isDateType()) { - return DATETIME; - // Timestamps get summed as DOUBLE for AVG. - } else if (isFloatingPointType()) { - return DOUBLE; - } else if (isNull()) { - return NULL_TYPE; - } else { - return INVALID_TYPE; - } - } - public boolean isNumericType() { return isFixedPointType() || isFloatingPointType() || isDecimalType() || isDecimalV2Type(); } @@ -800,41 +729,4 @@ public int getOlapColumnIndexSize() { return this.getSlotSize(); } } - - public static PrimitiveType getCmpType(PrimitiveType t1, PrimitiveType t2) { - PrimitiveType t1ResultType = t1.getResultType(); - PrimitiveType t2ResultType = t2.getResultType(); - - // Following logical is compatible with MySQL. - if (t1ResultType == PrimitiveType.VARCHAR && t2ResultType == PrimitiveType.VARCHAR) { - return PrimitiveType.VARCHAR; - } - - if (t1ResultType == PrimitiveType.HLL && t2ResultType == PrimitiveType.HLL) { - return PrimitiveType.HLL; - } - - if (t1ResultType == PrimitiveType.BIGINT && t2ResultType == PrimitiveType.BIGINT) { - return getAssignmentCompatibleType(t1, t2); - } - if ((t1ResultType == PrimitiveType.BIGINT - || t1ResultType == PrimitiveType.DECIMAL) - && (t2ResultType == PrimitiveType.BIGINT - || t2ResultType == PrimitiveType.DECIMAL)) { - return PrimitiveType.DECIMAL; - } - if ((t1ResultType == PrimitiveType.BIGINT - || t1ResultType == PrimitiveType.DECIMALV2) - && (t2ResultType == PrimitiveType.BIGINT - || t2ResultType == PrimitiveType.DECIMALV2)) { - return PrimitiveType.DECIMALV2; - } - if ((t1ResultType == PrimitiveType.BIGINT - || t1ResultType == PrimitiveType.LARGEINT) - && (t2ResultType == PrimitiveType.BIGINT - || t2ResultType == PrimitiveType.LARGEINT)) { - return PrimitiveType.LARGEINT; - } - return PrimitiveType.DOUBLE; - } } diff --git a/fe/src/main/java/org/apache/doris/catalog/ScalarFunction.java b/fe/src/main/java/org/apache/doris/catalog/ScalarFunction.java index 3d4618aef78d08..4aa2aaaef49d65 100644 --- a/fe/src/main/java/org/apache/doris/catalog/ScalarFunction.java +++ b/fe/src/main/java/org/apache/doris/catalog/ScalarFunction.java @@ -166,6 +166,7 @@ public static ScalarFunction createBuiltinOperator( case CHAR: case VARCHAR: case HLL: + case BITMAP: beFn += "_string_val"; break; case DATE: diff --git a/fe/src/main/java/org/apache/doris/catalog/ScalarType.java b/fe/src/main/java/org/apache/doris/catalog/ScalarType.java index 3078e406b6d57a..21bcfd25d0203b 100644 --- a/fe/src/main/java/org/apache/doris/catalog/ScalarType.java +++ b/fe/src/main/java/org/apache/doris/catalog/ScalarType.java @@ -121,6 +121,8 @@ public static ScalarType createType(PrimitiveType type) { return createVarcharType(); case HLL: return createHllType(); + case BITMAP: + return BITMAP; case DATE: return DATE; case DATETIME: @@ -166,6 +168,8 @@ public static ScalarType createType(String type) { return createVarcharType(); case "HLL": return createHllType(); + case "BITMAP": + return BITMAP; case "DATE": return DATE; case "DATETIME": @@ -331,6 +335,7 @@ public String toSql(int depth) { case DATE: case DATETIME: case HLL: + case BITMAP: stringBuilder.append(type.toString().toLowerCase()); break; default: @@ -751,6 +756,8 @@ public int getStorageLayoutBytes() { return len; case HLL: return 16385; + case BITMAP: + return 1024; // this is a estimated value default: return 0; } diff --git a/fe/src/main/java/org/apache/doris/catalog/Type.java b/fe/src/main/java/org/apache/doris/catalog/Type.java index c84aab101ad362..475fab7cf125d9 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/src/main/java/org/apache/doris/catalog/Type.java @@ -76,6 +76,7 @@ public abstract class Type { public static final ScalarType VARCHAR = ScalarType.createVarcharType(-1); public static final ScalarType HLL = ScalarType.createHllType(); public static final ScalarType CHAR = (ScalarType) ScalarType.createCharType(-1); + public static final ScalarType BITMAP = new ScalarType(PrimitiveType.BITMAP); private static ArrayList integerTypes; private static ArrayList numericTypes; @@ -112,6 +113,7 @@ public abstract class Type { supportedTypes.add(DOUBLE); supportedTypes.add(VARCHAR); supportedTypes.add(HLL); + supportedTypes.add(BITMAP); supportedTypes.add(CHAR); supportedTypes.add(DATE); supportedTypes.add(DATETIME); @@ -191,6 +193,10 @@ public boolean isHllType() { return isScalarType(PrimitiveType.HLL); } + public boolean isBitmapType() { + return isScalarType(PrimitiveType.BITMAP); + } + public boolean isScalarType() { return this instanceof ScalarType; } @@ -330,41 +336,6 @@ public boolean matchesType(Type t) { return false; } -// /** -// * Gets the ColumnType from the given FieldSchema by using Impala's SqlParser. -// * Returns null if the FieldSchema could not be parsed. -// * The type can either be: -// * - Supported by Impala, in which case the type is returned. -// * - A type Impala understands but is not yet implemented (e.g. date), the type is -// * returned but type.IsSupported() returns false. -// * - A type Impala can't understand at all in which case null is returned. -// */ -// public static Type parseColumnType(String typeStr) { -// // Wrap the type string in a CREATE TABLE stmt and use Impala's Parser -// // to get the ColumnType. -// // Pick a table name that can't be used. -// String stmt = String.format("CREATE TABLE $DUMMY ($DUMMY %s)", typeStr); -// SqlScanner input = new SqlScanner(new StringReader(stmt)); -// SqlParser parser = new SqlParser(input); -// CreateTableStmt createTableStmt; -// try { -// Object o = parser.parse().value; -// if (!(o instanceof CreateTableStmt)) { -// // Should never get here. -// throw new IllegalStateException("Couldn't parse create table stmt."); -// } -// createTableStmt = (CreateTableStmt) o; -// if (createTableStmt.getColumnDefs().isEmpty()) { -// // Should never get here. -// throw new IllegalStateException("Invalid create table stmt."); -// } -// } catch (Exception e) { -// return null; -// } -// TypeDef typeDef = createTableStmt.getColumnDefs().get(0).getTypeDef(); -// return typeDef.getType(); -// } - /** * Returns true if t1 can be implicitly cast to t2 according to Impala's casting rules. * Implicit casts are always allowed when no loss of precision would result (i.e. every @@ -483,6 +454,8 @@ public static Type fromPrimitiveType(PrimitiveType type) { return Type.VARCHAR; case HLL: return Type.HLL; + case BITMAP: + return Type.BITMAP; default: return null; } @@ -752,6 +725,7 @@ public Integer getNumPrecRadix() { compatibilityMatrix[BOOLEAN.ordinal()][VARCHAR.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[BOOLEAN.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[BOOLEAN.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; + compatibilityMatrix[BOOLEAN.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // TINYINT compatibilityMatrix[TINYINT.ordinal()][SMALLINT.ordinal()] = PrimitiveType.SMALLINT; @@ -769,6 +743,7 @@ public Integer getNumPrecRadix() { compatibilityMatrix[TINYINT.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[TINYINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[TINYINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; + compatibilityMatrix[TINYINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // SMALLINT compatibilityMatrix[SMALLINT.ordinal()][INT.ordinal()] = PrimitiveType.INT; @@ -785,6 +760,7 @@ public Integer getNumPrecRadix() { compatibilityMatrix[SMALLINT.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[SMALLINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[SMALLINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; + compatibilityMatrix[SMALLINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // INT compatibilityMatrix[INT.ordinal()][BIGINT.ordinal()] = PrimitiveType.BIGINT; @@ -804,6 +780,7 @@ public Integer getNumPrecRadix() { compatibilityMatrix[INT.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[INT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[INT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; + compatibilityMatrix[INT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // BIGINT // 64 bit integer does not fit in mantissa of double or float. @@ -824,6 +801,7 @@ public Integer getNumPrecRadix() { compatibilityMatrix[BIGINT.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[BIGINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[BIGINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; + compatibilityMatrix[BIGINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // LARGEINT compatibilityMatrix[LARGEINT.ordinal()][FLOAT.ordinal()] = PrimitiveType.DOUBLE; @@ -836,6 +814,7 @@ public Integer getNumPrecRadix() { compatibilityMatrix[LARGEINT.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.DECIMALV2; compatibilityMatrix[LARGEINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[LARGEINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; + compatibilityMatrix[LARGEINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // FLOAT compatibilityMatrix[FLOAT.ordinal()][DOUBLE.ordinal()] = PrimitiveType.DOUBLE; @@ -847,6 +826,7 @@ public Integer getNumPrecRadix() { compatibilityMatrix[FLOAT.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[FLOAT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[FLOAT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; + compatibilityMatrix[FLOAT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // DOUBLE compatibilityMatrix[DOUBLE.ordinal()][DATE.ordinal()] = PrimitiveType.INVALID_TYPE; @@ -857,6 +837,7 @@ public Integer getNumPrecRadix() { compatibilityMatrix[DOUBLE.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DOUBLE.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DOUBLE.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; + compatibilityMatrix[DOUBLE.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // DATE compatibilityMatrix[DATE.ordinal()][DATETIME.ordinal()] = PrimitiveType.DATETIME; @@ -866,6 +847,7 @@ public Integer getNumPrecRadix() { compatibilityMatrix[DATE.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.DECIMALV2; compatibilityMatrix[DATE.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DATE.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[DATE.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // DATETIME compatibilityMatrix[DATETIME.ordinal()][CHAR.ordinal()] = PrimitiveType.INVALID_TYPE; @@ -874,6 +856,7 @@ public Integer getNumPrecRadix() { compatibilityMatrix[DATETIME.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.DECIMALV2; compatibilityMatrix[DATETIME.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DATETIME.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[DATETIME.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // We can convert some but not all string values to timestamps. // CHAR @@ -882,24 +865,32 @@ public Integer getNumPrecRadix() { compatibilityMatrix[CHAR.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[CHAR.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[CHAR.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[CHAR.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // VARCHAR compatibilityMatrix[VARCHAR.ordinal()][DECIMAL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[VARCHAR.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[VARCHAR.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[VARCHAR.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[VARCHAR.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // DECIMAL compatibilityMatrix[DECIMAL.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.DECIMALV2; compatibilityMatrix[DECIMAL.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DECIMAL.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[DECIMAL.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // DECIMALV2 compatibilityMatrix[DECIMALV2.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DECIMALV2.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[DECIMALV2.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; // HLL compatibilityMatrix[HLL.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[HLL.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + + // BITMAP + compatibilityMatrix[BITMAP.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; // Check all of the necessary entries that should be filled. // ignore binary @@ -939,6 +930,7 @@ public Type getResultType() { case CHAR: case VARCHAR: case HLL: + case BITMAP: return VARCHAR; case DECIMAL: return DECIMAL; diff --git a/fe/src/main/java/org/apache/doris/common/util/Util.java b/fe/src/main/java/org/apache/doris/common/util/Util.java index cd2de44a6dc6f6..ce21fec6bf7fb9 100644 --- a/fe/src/main/java/org/apache/doris/common/util/Util.java +++ b/fe/src/main/java/org/apache/doris/common/util/Util.java @@ -67,6 +67,7 @@ public class Util { TYPE_STRING_MAP.put(PrimitiveType.DECIMALV2, "decimal(%d,%d)"); TYPE_STRING_MAP.put(PrimitiveType.HLL, "varchar(%d)"); TYPE_STRING_MAP.put(PrimitiveType.BOOLEAN, "bool"); + TYPE_STRING_MAP.put(PrimitiveType.BITMAP, "bitmap"); } private static class CmdWorker extends Thread { diff --git a/fe/src/main/java/org/apache/doris/task/HadoopLoadPendingTask.java b/fe/src/main/java/org/apache/doris/task/HadoopLoadPendingTask.java index 8c22d9d51d8778..e8c99461097a75 100644 --- a/fe/src/main/java/org/apache/doris/task/HadoopLoadPendingTask.java +++ b/fe/src/main/java/org/apache/doris/task/HadoopLoadPendingTask.java @@ -543,6 +543,9 @@ public Map toDppColumn() { case HLL: columnType = "HLL"; break; + case BITMAP: + columnType = "BITMAP"; + break; case DECIMAL: columnType = "DECIMAL"; break; diff --git a/fe/src/main/jflex/sql_scanner.flex b/fe/src/main/jflex/sql_scanner.flex index 705b8999161566..a82d55fc6caa4f 100644 --- a/fe/src/main/jflex/sql_scanner.flex +++ b/fe/src/main/jflex/sql_scanner.flex @@ -76,6 +76,7 @@ import org.apache.doris.common.util.SqlUtils; keywordMap.put("begin", new Integer(SqlParserSymbols.KW_BEGIN)); keywordMap.put("between", new Integer(SqlParserSymbols.KW_BETWEEN)); keywordMap.put("bigint", new Integer(SqlParserSymbols.KW_BIGINT)); + keywordMap.put("bitmap", new Integer(SqlParserSymbols.KW_BITMAP)); keywordMap.put("boolean", new Integer(SqlParserSymbols.KW_BOOLEAN)); keywordMap.put("hll", new Integer(SqlParserSymbols.KW_HLL)); keywordMap.put("both", new Integer(SqlParserSymbols.KW_BOTH)); diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index d2c979e1d94328..66c0241c5d53c8 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -48,11 +48,12 @@ message MetadataPairPB { enum EncodingTypePB { UNKNOWN_ENCODING = 0; DEFAULT_ENCODING = 1; - PLAIN_ENCODING = 2; + PLAIN_ENCODING = 2; // for non-binary type PREFIX_ENCODING = 3; RLE = 4; DICT_ENCODING = 5; BIT_SHUFFLE = 6; + BINARY_PLAIN_ENCODING = 7; } enum CompressionTypePB { diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index 20529cebd9b6fe..da3a2d6303f623 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -73,7 +73,8 @@ enum TPrimitiveType { VARCHAR, HLL, DECIMALV2, - TIME + TIME, + OBJECT } enum TTypeNodeType {