Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions be/src/exec/olap_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) {
break;
}
case TYPE_VARCHAR:
case TYPE_OBJECT:
case TYPE_HLL: {
Slice* slice = reinterpret_cast<Slice*>(ptr);
StringValue *slot = tuple->get_string_slot(slot_desc->tuple_offset());
Expand Down
6 changes: 0 additions & 6 deletions be/src/exec/tablet_sink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -712,12 +712,6 @@ int OlapTableSink::_validate_data(RuntimeState* state, RowBatch* batch, Bitmap*
case TYPE_VARCHAR: {
// Fixed length string
StringValue* str_val = (StringValue*)slot;
// todo(kks): varchar(0) means bitmap_union agg type
// we will remove this special handle when we add a special type for bitmap_union
if (desc->type().type == TYPE_VARCHAR && desc->type().len == 0) {
continue;
}

if (str_val->len > desc->type().len) {
std::stringstream ss;
ss << "the length of input is too long than schema. "
Expand Down
10 changes: 7 additions & 3 deletions be/src/exprs/agg_fn_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ inline void AggFnEvaluator::set_any_val(

case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_HLL:
case TYPE_OBJECT:
reinterpret_cast<const StringValue*>(slot)->to_string_val(
reinterpret_cast<StringVal*>(dst));
return;
Expand Down Expand Up @@ -404,6 +405,7 @@ inline void AggFnEvaluator::set_output_slot(const AnyVal* src,
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
*reinterpret_cast<StringValue*>(slot) =
StringValue::from_string_val(*reinterpret_cast<const StringVal*>(src));
return;
Expand Down Expand Up @@ -598,7 +600,8 @@ bool AggFnEvaluator::count_distinct_data_filter(TupleRow* row, Tuple* dst) {

case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL: {
case TYPE_HLL:
case TYPE_OBJECT: {
StringVal* value = reinterpret_cast<StringVal*>(_staging_input_vals[i]);
memcpy(begin, value->ptr, value->len);
begin += value->len;
Expand Down Expand Up @@ -940,7 +943,8 @@ void AggFnEvaluator::serialize_or_finalize(FunctionContext* agg_fn_ctx, Tuple* s

case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL :{
case TYPE_HLL:
case TYPE_OBJECT: {
typedef StringVal(*Fn)(FunctionContext*, AnyVal*);
StringVal v = reinterpret_cast<Fn>(fn)(agg_fn_ctx, _staging_intermediate_val);
set_output_slot(&v, dst_slot_desc, dst);
Expand Down
5 changes: 4 additions & 1 deletion be/src/exprs/anyval_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ AnyVal* create_any_val(ObjectPool* pool, const TypeDescriptor& type) {
case TYPE_CHAR:
case TYPE_HLL:
case TYPE_VARCHAR:
case TYPE_OBJECT:
return pool->add(new StringVal);

case TYPE_DECIMAL:
Expand Down Expand Up @@ -143,7 +144,9 @@ FunctionContext::TypeDesc AnyValUtil::column_type_to_type_desc(const TypeDescrip
case TYPE_HLL:
out.type = FunctionContext::TYPE_HLL;
out.len = type.len;
break;
break;
case TYPE_OBJECT:
out.type = FunctionContext::TYPE_OBJECT;
case TYPE_CHAR:
out.type = FunctionContext::TYPE_CHAR;
out.len = type.len;
Expand Down
41 changes: 3 additions & 38 deletions be/src/exprs/anyval_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,44 +184,6 @@ class AnyValUtil {
return HashUtil::murmur_hash64A(&v.val, 8, seed);
}

static doris_udf::FunctionContext::Type primitive_type_to_type(const PrimitiveType& type) {
switch (type) {
case TYPE_NULL:
return doris_udf::FunctionContext::TYPE_NULL;
case TYPE_BOOLEAN:
return doris_udf::FunctionContext::TYPE_BOOLEAN;
case TYPE_TINYINT:
return doris_udf::FunctionContext::TYPE_TINYINT;
case TYPE_SMALLINT:
return doris_udf::FunctionContext::TYPE_SMALLINT;
case TYPE_INT:
return doris_udf::FunctionContext::TYPE_INT;
case TYPE_BIGINT:
return doris_udf::FunctionContext::TYPE_BIGINT;
case TYPE_LARGEINT:
return doris_udf::FunctionContext::TYPE_LARGEINT;
case TYPE_FLOAT:
return doris_udf::FunctionContext::TYPE_FLOAT;
case TYPE_DOUBLE:
return doris_udf::FunctionContext::TYPE_DOUBLE;
case TYPE_DATE:
return doris_udf::FunctionContext::TYPE_DATE;
case TYPE_DATETIME:
return doris_udf::FunctionContext::TYPE_DATETIME;
case TYPE_HLL:
case TYPE_CHAR:
case TYPE_VARCHAR:
return doris_udf::FunctionContext::TYPE_STRING;
case TYPE_DECIMAL:
return doris_udf::FunctionContext::TYPE_DECIMAL;
case TYPE_DECIMALV2:
return doris_udf::FunctionContext::TYPE_DECIMALV2;
break;
default:
DCHECK(false) << "Unknown type: " << type;
}
return doris_udf::FunctionContext::TYPE_NULL;
}
// Returns the byte size of *Val for type t.
static int any_val_size(const TypeDescriptor& t) {
switch (t.type) {
Expand Down Expand Up @@ -249,6 +211,7 @@ class AnyValUtil {
case TYPE_DOUBLE:
return sizeof(doris_udf::DoubleVal);

case TYPE_OBJECT:
case TYPE_HLL:
case TYPE_CHAR:
case TYPE_VARCHAR:
Expand Down Expand Up @@ -281,6 +244,7 @@ class AnyValUtil {
case TYPE_LARGEINT: return alignof(LargeIntVal);
case TYPE_FLOAT: return alignof(FloatVal);
case TYPE_DOUBLE: return alignof(DoubleVal);
case TYPE_OBJECT:
case TYPE_HLL:
case TYPE_VARCHAR:
case TYPE_CHAR:
Expand Down Expand Up @@ -377,6 +341,7 @@ class AnyValUtil {
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
reinterpret_cast<const StringValue*>(slot)->to_string_val(
reinterpret_cast<doris_udf::StringVal*>(dst));
return;
Expand Down
2 changes: 2 additions & 0 deletions be/src/exprs/case_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ void CaseExpr::get_child_val(int child_idx, ExprContext* ctx, TupleRow* row, Any
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
*reinterpret_cast<StringVal*>(dst) = _children[child_idx]->get_string_val(ctx, row);
break;
case TYPE_DECIMAL:
Expand Down Expand Up @@ -373,6 +374,7 @@ bool CaseExpr::any_val_eq(const TypeDescriptor& type, const AnyVal* v1, const An
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
return AnyValUtil::equals(type, *reinterpret_cast<const StringVal*>(v1),
*reinterpret_cast<const StringVal*>(v2));
case TYPE_DECIMAL:
Expand Down
12 changes: 4 additions & 8 deletions be/src/exprs/expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ Expr::Expr(const TypeDescriptor& type) :
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
_node_type = (TExprNodeType::STRING_LITERAL);
break;

Expand Down Expand Up @@ -212,6 +213,7 @@ Expr::Expr(const TypeDescriptor& type, bool is_slotref) :
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
_node_type = (TExprNodeType::STRING_LITERAL);
break;

Expand Down Expand Up @@ -379,13 +381,6 @@ Status Expr::create_expr(ObjectPool* pool, const TExprNode& texpr_node, Expr** e
*expr = pool->add(new ScalarFnCall(texpr_node));
}
return Status::OK();
//case TExprNodeType::AGG_EXPR: {
// if (!texpr_node.__isset.agg_expr) {
// return Status::InternalError("Aggregation expression not set in thrift node");
// }
// *expr = pool->add(new AggregateExpr(texpr_node));
// return Status::OK();
//}

case TExprNodeType::CASE_EXPR: {
if (!texpr_node.__isset.case_expr) {
Expand Down Expand Up @@ -752,7 +747,8 @@ doris_udf::AnyVal* Expr::get_const_val(ExprContext* context) {
}
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL: {
case TYPE_HLL:
case TYPE_OBJECT: {
_constant_val.reset(new StringVal(get_string_val(context, NULL)));
break;
}
Expand Down
5 changes: 3 additions & 2 deletions be/src/exprs/expr_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,10 +335,11 @@ void* ExprContext::get_value(Expr* e, TupleRow* row) {
}
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL: {
case TYPE_HLL:
case TYPE_OBJECT: {
doris_udf::StringVal v = e->get_string_val(this, row);
if (v.is_null) {
return NULL;
return nullptr;
}
_result.string_val.ptr = reinterpret_cast<char*>(v.ptr);
_result.string_val.len = v.len;
Expand Down
7 changes: 5 additions & 2 deletions be/src/exprs/new_agg_fn_evaluator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ void NewAggFnEvaluator::SetDstSlot(const AnyVal* src, const SlotDescriptor& dst_
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
*reinterpret_cast<StringValue*>(slot) =
StringValue::from_string_val(*reinterpret_cast<const StringVal*>(src));
return;
Expand Down Expand Up @@ -369,7 +370,8 @@ inline void NewAggFnEvaluator::set_any_val(

case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_HLL:
case TYPE_OBJECT:
reinterpret_cast<const StringValue*>(slot)->to_string_val(
reinterpret_cast<StringVal*>(dst));
return;
Expand Down Expand Up @@ -642,7 +644,8 @@ void NewAggFnEvaluator::SerializeOrFinalize(Tuple* src,
}
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:{
case TYPE_HLL:
case TYPE_OBJECT: {
typedef StringVal(*Fn)(FunctionContext*, AnyVal*);
StringVal v = reinterpret_cast<Fn>(fn)(
agg_fn_ctx_.get(), staging_intermediate_val_);
Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/aggregate_func.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ AggregateFuncResolver::AggregateFuncResolver() {
add_aggregate_mapping<OLAP_FIELD_AGGREGATION_HLL_UNION, OLAP_FIELD_TYPE_HLL>();

// Bitmap Aggregate Function
add_aggregate_mapping<OLAP_FIELD_AGGREGATION_BITMAP_UNION, OLAP_FIELD_TYPE_VARCHAR>();
add_aggregate_mapping<OLAP_FIELD_AGGREGATION_BITMAP_UNION, OLAP_FIELD_TYPE_OBJECT>();
add_aggregate_mapping<OLAP_FIELD_AGGREGATION_BITMAP_UNION, OLAP_FIELD_TYPE_VARCHAR>(); //for backward compatibility
}

AggregateFuncResolver::~AggregateFuncResolver() {
Expand Down
10 changes: 9 additions & 1 deletion be/src/olap/aggregate_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ struct AggregateFuncTraits<OLAP_FIELD_AGGREGATION_HLL_UNION, OLAP_FIELD_TYPE_HLL
// when data load, after bitmap_init fucntion, bitmap_union column won't be null
// so when init, update bitmap, the src is not null
template <>
struct AggregateFuncTraits<OLAP_FIELD_AGGREGATION_BITMAP_UNION, OLAP_FIELD_TYPE_VARCHAR> {
struct AggregateFuncTraits<OLAP_FIELD_AGGREGATION_BITMAP_UNION, OLAP_FIELD_TYPE_OBJECT> {
static void init(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, ObjectPool* agg_pool) {
DCHECK_EQ(src_null, false);
dst->set_not_null();
Expand All @@ -456,6 +456,7 @@ struct AggregateFuncTraits<OLAP_FIELD_AGGREGATION_BITMAP_UNION, OLAP_FIELD_TYPE_
// we use zero size represent this slice is a agg object
dst_slice->size = 0;
auto* bitmap = new RoaringBitmap(src_slice->data);

dst_slice->data = (char*) bitmap;

mem_pool->mem_tracker()->consume(sizeof(RoaringBitmap));
Expand Down Expand Up @@ -491,6 +492,13 @@ struct AggregateFuncTraits<OLAP_FIELD_AGGREGATION_BITMAP_UNION, OLAP_FIELD_TYPE_
}
};


// for backward compatibility
template <>
struct AggregateFuncTraits<OLAP_FIELD_AGGREGATION_BITMAP_UNION, OLAP_FIELD_TYPE_VARCHAR> :
public AggregateFuncTraits<OLAP_FIELD_AGGREGATION_BITMAP_UNION, OLAP_FIELD_TYPE_OBJECT> {};


template<FieldAggregationMethod aggMethod, FieldType fieldType>
struct AggregateTraits : public AggregateFuncTraits<aggMethod, fieldType> {
static const FieldAggregationMethod agg_method = aggMethod;
Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/olap_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ enum FieldType {
OLAP_FIELD_TYPE_UNKNOWN = 21, // UNKNOW Type
OLAP_FIELD_TYPE_NONE = 22,
OLAP_FIELD_TYPE_HLL = 23,
OLAP_FIELD_TYPE_BOOL = 24
OLAP_FIELD_TYPE_BOOL = 24,
OLAP_FIELD_TYPE_OBJECT = 25
};

// 定义Field支持的所有聚集方法
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/row_block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ void RowBlock::_compute_layout() {

// All field has a nullbyte in memory
if (column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_HLL
|| column.type() == OLAP_FIELD_TYPE_CHAR) {
|| column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_OBJECT) {
// 变长部分额外计算下实际最大的字符串长度(此处length已经包括记录Length的2个字节)
memory_size += sizeof(Slice) + sizeof(char);
} else {
Expand Down
19 changes: 18 additions & 1 deletion be/src/olap/rowset/segment_v2/encoding_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
#include "olap/rowset/segment_v2/encoding_info.h"

#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/binary_dict_page.h"
#include "olap/rowset/segment_v2/binary_plain_page.h"
#include "olap/rowset/segment_v2/bitshuffle_page.h"
#include "olap/rowset/segment_v2/plain_page.h"
#include "olap/rowset/segment_v2/rle_page.h"
#include "olap/rowset/segment_v2/binary_dict_page.h"
#include "gutil/strings/substitute.h"

namespace doris {
Expand All @@ -38,9 +40,23 @@ struct TypeEncodingTraits { };
template<FieldType type>
struct TypeEncodingTraits<type, PLAIN_ENCODING> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new PlainPageBuilder<type>(opts);
return Status::OK();
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) {
*decoder = new PlainPageDecoder<type>(data, opts);
return Status::OK();
}
};

template<FieldType type>
struct TypeEncodingTraits<type, BINARY_PLAIN_ENCODING> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new BinaryPlainPageBuilder(opts);
return Status::OK();
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) {
*decoder = new BinaryPlainPageDecoder(data, opts);
return Status::OK();
}
};
Expand Down Expand Up @@ -149,6 +165,7 @@ EncodingInfoResolver::EncodingInfoResolver() {
_add_map<OLAP_FIELD_TYPE_DATETIME, PLAIN_ENCODING>();
_add_map<OLAP_FIELD_TYPE_DECIMAL, BIT_SHUFFLE>();
_add_map<OLAP_FIELD_TYPE_DECIMAL, PLAIN_ENCODING>();
_add_map<OLAP_FIELD_TYPE_OBJECT, BINARY_PLAIN_ENCODING>();
}

EncodingInfoResolver::~EncodingInfoResolver() {
Expand Down
8 changes: 8 additions & 0 deletions be/src/olap/tablet_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
type = OLAP_FIELD_TYPE_LIST;
} else if (0 == upper_type_str.compare("MAP")) {
type = OLAP_FIELD_TYPE_MAP;
} else if (0 == upper_type_str.compare("OBJECT")) {
type = OLAP_FIELD_TYPE_OBJECT;
} else {
LOG(WARNING) << "invalid type string. [type='" << type_str << "']";
type = OLAP_FIELD_TYPE_UNKNOWN;
Expand Down Expand Up @@ -160,6 +162,7 @@ std::string TabletColumn::get_string_by_field_type(FieldType type) {

case OLAP_FIELD_TYPE_BOOL:
return "BOOLEAN";

case OLAP_FIELD_TYPE_HLL:
return "HLL";

Expand All @@ -172,6 +175,9 @@ std::string TabletColumn::get_string_by_field_type(FieldType type) {
case OLAP_FIELD_TYPE_MAP:
return "MAP";

case OLAP_FIELD_TYPE_OBJECT:
return "OBJECT";

default:
return "UNKNOWN";
}
Expand Down Expand Up @@ -226,6 +232,8 @@ uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint3
return 4;
case TPrimitiveType::DOUBLE:
return 8;
case TPrimitiveType::OBJECT:
return 16;
case TPrimitiveType::CHAR:
return string_length;
case TPrimitiveType::VARCHAR:
Expand Down
1 change: 1 addition & 0 deletions be/src/olap/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ TypeInfoResolver::TypeInfoResolver() {
add_mapping<OLAP_FIELD_TYPE_CHAR>();
add_mapping<OLAP_FIELD_TYPE_VARCHAR>();
add_mapping<OLAP_FIELD_TYPE_HLL>();
add_mapping<OLAP_FIELD_TYPE_OBJECT>();
}

TypeInfoResolver::~TypeInfoResolver() {}
Expand Down
Loading