Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions be/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -459,11 +459,17 @@ set(DORIS_LINK_LIBS
Util
DorisGen
Webserver
TestUtil
Geo
Plugin
${WL_END_GROUP}
)
if (${MAKE_TEST} STREQUAL "ON")
set(DORIS_LINK_LIBS
${DORIS_LINK_LIBS}
TestUtil
)
endif()


# COMMON_THIRDPARTY are thirdparty dependencies that can run on all platform
# When adding new dependencies, If you don’t know if it can run on all platforms,
Expand Down Expand Up @@ -611,6 +617,7 @@ if (${MAKE_TEST} STREQUAL "ON")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -DGTEST_USE_OWN_TR1_TUPLE=0")
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage -lgcov")
add_definitions(-DBE_TEST)
add_subdirectory(${SRC_DIR}/testutil)
endif ()

add_subdirectory(${SRC_DIR}/agent)
Expand All @@ -625,10 +632,12 @@ add_subdirectory(${SRC_DIR}/http)
add_subdirectory(${SRC_DIR}/olap)
add_subdirectory(${SRC_DIR}/runtime)
add_subdirectory(${SRC_DIR}/service)
add_subdirectory(${SRC_DIR}/testutil)
#add_subdirectory(${SRC_DIR}/tools)
add_subdirectory(${SRC_DIR}/udf)
add_subdirectory(${SRC_DIR}/tools)

if (${MAKE_TEST} STREQUAL "OFF")
add_subdirectory(${SRC_DIR}/tools)
endif()

add_subdirectory(${SRC_DIR}/util)
add_subdirectory(${SRC_DIR}/plugin)

Expand Down
5 changes: 3 additions & 2 deletions be/src/exec/es/es_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ std::string ExtLiteral::value_to_string() {
break;
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING:
ss << get_string();
break;
case TYPE_DATE:
Expand Down Expand Up @@ -134,7 +135,7 @@ double ExtLiteral::get_double() {
}

std::string ExtLiteral::get_string() {
DCHECK(_type == TYPE_VARCHAR || _type == TYPE_CHAR);
DCHECK(_type == TYPE_VARCHAR || _type == TYPE_CHAR || _type == TYPE_STRING);
return (reinterpret_cast<StringValue*>(_value))->to_string();
}

Expand Down Expand Up @@ -331,7 +332,7 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) {
}

PrimitiveType type = expr->type().type;
if (type != TYPE_VARCHAR && type != TYPE_CHAR) {
if (type != TYPE_VARCHAR && type != TYPE_CHAR && type != TYPE_STRING) {
return Status::InternalError("build disjuncts failed: like value is not a string");
}
std::string col = slot_desc->col_name();
Expand Down
3 changes: 2 additions & 1 deletion be/src/exec/es/es_scroll_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple,
}
switch (type) {
case TYPE_CHAR:
case TYPE_VARCHAR: {
case TYPE_VARCHAR:
case TYPE_STRING: {
// sometimes elasticsearch user post some not-string value to Elasticsearch Index.
// because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
// this may be a tricky, but we can workaround this issue
Expand Down
6 changes: 4 additions & 2 deletions be/src/exec/es_scan_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,8 @@ bool EsScanNode::to_ext_literal(PrimitiveType slot_type, void* value, TExtLitera
}

case TYPE_CHAR:
case TYPE_VARCHAR: {
case TYPE_VARCHAR:
case TYPE_STRING: {
node_type = (TExprNodeType::STRING_LITERAL);
TStringLiteral string_literal;
string_literal.__set_value((reinterpret_cast<StringValue*>(value))->debug_string());
Expand Down Expand Up @@ -762,7 +763,8 @@ Status EsScanNode::materialize_row(MemPool* tuple_pool, Tuple* tuple,
int val_idx = cols_next_val_idx[i]++;
switch (slot_desc->type().type) {
case TYPE_CHAR:
case TYPE_VARCHAR: {
case TYPE_VARCHAR:
case TYPE_STRING: {
if (val_idx >= col.string_vals.size()) {
return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "STRING"));
}
Expand Down
1 change: 1 addition & 0 deletions be/src/exec/merge_join_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ Status MergeJoinNode::prepare(RuntimeState* state) {

case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING:
_cmp_func.push_back(compare_value<StringValue>);
break;

Expand Down
5 changes: 3 additions & 2 deletions be/src/exec/odbc_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ Status ODBCConnector::query() {
DataBinding* column_data = new DataBinding;
column_data->target_type = SQL_C_CHAR;
auto type = _tuple_desc->slots()[i]->type().type;
column_data->buffer_length = (type == TYPE_HLL || type == TYPE_CHAR || type == TYPE_VARCHAR)
column_data->buffer_length = (type == TYPE_HLL || type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_STRING)
? BIG_COLUMN_SIZE_BUFFER
: SMALL_COLUMN_SIZE_BUFFER;
column_data->target_value_ptr = malloc(sizeof(char) * column_data->buffer_length);
Expand Down Expand Up @@ -254,7 +254,8 @@ Status ODBCConnector::append(const std::string& table_name, RowBatch* batch,
break;
}
case TYPE_VARCHAR:
case TYPE_CHAR: {
case TYPE_CHAR:
case TYPE_STRING: {
const auto* string_val = (const StringValue*)(item);

if (string_val->ptr == NULL) {
Expand Down
12 changes: 5 additions & 7 deletions be/src/exec/olap_rewrite_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ bool OlapRewriteNode::copy_one_row(TupleRow* src_row, Tuple* tuple, MemPool* poo
const TColumnType& column_type = _column_types[i];
switch (column_type.type) {
case TPrimitiveType::CHAR:
case TPrimitiveType::VARCHAR: {
case TPrimitiveType::VARCHAR:
case TPrimitiveType::STRING: {
// Fixed length string
StringValue* str_val = (StringValue*)src_value;
if (str_val->len > column_type.len) {
Expand All @@ -145,8 +146,7 @@ bool OlapRewriteNode::copy_one_row(TupleRow* src_row, Tuple* tuple, MemPool* poo
<< "schema length: " << column_type.len << "; "
<< "actual length: " << str_val->len << "; ";
return false;
}
StringValue* dst_val = (StringValue*)tuple->get_slot(slot_desc->tuple_offset());
} StringValue* dst_val = (StringValue*)tuple->get_slot(slot_desc->tuple_offset());
if (column_type.type == TPrimitiveType::CHAR) {
dst_val->ptr = (char*)pool->allocate(column_type.len);
memcpy(dst_val->ptr, str_val->ptr, str_val->len);
Expand All @@ -156,10 +156,8 @@ bool OlapRewriteNode::copy_one_row(TupleRow* src_row, Tuple* tuple, MemPool* poo
dst_val->ptr = (char*)pool->allocate(column_type.len);
memcpy(dst_val->ptr, str_val->ptr, str_val->len);
dst_val->len = str_val->len;
}
break;
}
case TPrimitiveType::DECIMALV2: {
} break;
} case TPrimitiveType::DECIMALV2: {
DecimalV2Value* dec_val = (DecimalV2Value*)src_value;
DecimalV2Value* dst_val = (DecimalV2Value*)tuple->get_slot(slot_desc->tuple_offset());
if (dec_val->greater_than_scale(column_type.scale)) {
Expand Down
9 changes: 6 additions & 3 deletions be/src/exec/olap_scan_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,8 @@ Status OlapScanNode::normalize_conjuncts() {

case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL: {
case TYPE_HLL:
case TYPE_STRING: {
ColumnValueRange<StringValue> range(slots[slot_idx]->col_name(),
slots[slot_idx]->type().type);
normalize_predicate(range, slots[slot_idx]);
Expand Down Expand Up @@ -952,7 +953,8 @@ Status OlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range, P
case TYPE_SMALLINT:
case TYPE_INT:
case TYPE_BIGINT:
case TYPE_LARGEINT: {
case TYPE_LARGEINT:
case TYPE_STRING: {
func(temp_range, reinterpret_cast<T*>(value));
break;
}
Expand Down Expand Up @@ -1250,7 +1252,8 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
case TYPE_INT:
case TYPE_BIGINT:
case TYPE_LARGEINT:
case TYPE_BOOLEAN: {
case TYPE_BOOLEAN:
case TYPE_STRING: {
range->add_range(to_olap_filter_type(pred->op(), child_idx),
*reinterpret_cast<T*>(value));
break;
Expand Down
3 changes: 2 additions & 1 deletion be/src/exec/olap_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,8 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) {
}
case TYPE_VARCHAR:
case TYPE_OBJECT:
case TYPE_HLL: {
case TYPE_HLL:
case TYPE_STRING: {
Slice* slice = reinterpret_cast<Slice*>(ptr);
StringValue* slot = tuple->get_string_slot(slot_desc->tuple_offset());
slot->ptr = slice->data;
Expand Down
1 change: 1 addition & 0 deletions be/src/exec/olap_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ inline CompareLargeFunc get_compare_func(PrimitiveType type) {

case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING:
return compare_large<StringValue>;

default:
Expand Down
3 changes: 2 additions & 1 deletion be/src/exec/parquet_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,8 @@ Status ParquetWriterWrapper::_write_one_row(TupleRow* row) {
break;
}
case TYPE_CHAR:
case TYPE_VARCHAR: {
case TYPE_VARCHAR:
case TYPE_STRING: {
if (_str_schema[index][1] != "byte_array") {
std::stringstream ss;
ss << "project field type is char/varchar, should use byte_array, but the "
Expand Down
2 changes: 1 addition & 1 deletion be/src/exec/partitioned_hash_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ uint32_t PartitionedHashTableCtx::HashVariableLenRow(const uint8_t* expr_values,
// non-string and null slots are already part of 'expr_values'.
// if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING
PrimitiveType type = build_exprs_[i]->type().type;
if (type != TYPE_CHAR && type != TYPE_VARCHAR) {
if (type != TYPE_CHAR && type != TYPE_VARCHAR && type != TYPE_STRING) {
continue;
}

Expand Down
9 changes: 7 additions & 2 deletions be/src/exec/schema_scanner/schema_columns_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ std::string SchemaColumnsScanner::to_mysql_data_type_string(TColumnDesc& desc) {
case TPrimitiveType::DOUBLE:
return "double";
case TPrimitiveType::VARCHAR:
case TPrimitiveType::STRING:
return "varchar";
case TPrimitiveType::CHAR:
return "char";
Expand Down Expand Up @@ -151,6 +152,8 @@ std::string SchemaColumnsScanner::type_to_string(TColumnDesc& desc) {
} else {
return "varchar(20)";
}
case TPrimitiveType::STRING:
return "string";
case TPrimitiveType::CHAR:
if (desc.__isset.columnLength) {
return "char(" + std::to_string(desc.columnLength) + ")";
Expand Down Expand Up @@ -261,7 +264,8 @@ Status SchemaColumnsScanner::fill_one_row(Tuple* tuple, MemPool* pool) {
// For string columns, the maximum length in characters.
{
int data_type = _desc_result.columns[_column_index].columnDesc.columnType;
if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR) {
if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR ||
data_type == TPrimitiveType::STRING) {
void* slot = tuple->get_slot(_tuple_desc->slots()[8]->tuple_offset());
int64_t* str_slot = reinterpret_cast<int64_t*>(slot);
if (_desc_result.columns[_column_index].columnDesc.__isset.columnLength) {
Expand All @@ -277,7 +281,8 @@ Status SchemaColumnsScanner::fill_one_row(Tuple* tuple, MemPool* pool) {
// For string columns, the maximum length in bytes.
{
int data_type = _desc_result.columns[_column_index].columnDesc.columnType;
if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR) {
if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR ||
data_type == TPrimitiveType::STRING) {
void* slot = tuple->get_slot(_tuple_desc->slots()[9]->tuple_offset());
int64_t* str_slot = reinterpret_cast<int64_t*>(slot);
if (_desc_result.columns[_column_index].columnDesc.__isset.columnLength) {
Expand Down
15 changes: 14 additions & 1 deletion be/src/exec/tablet_sink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,7 @@ Status OlapTableSink::prepare(RuntimeState* state) {
case TYPE_DATETIME:
case TYPE_HLL:
case TYPE_OBJECT:
case TYPE_STRING:
_need_validate_data = true;
break;
default:
Expand Down Expand Up @@ -698,7 +699,6 @@ Status OlapTableSink::send(RuntimeState* state, RowBatch* input_batch) {
_convert_batch(state, input_batch, _output_batch.get());
batch = _output_batch.get();
}

int num_invalid_rows = 0;
if (_need_validate_data) {
SCOPED_RAW_TIMER(&_validate_data_ns);
Expand Down Expand Up @@ -939,6 +939,19 @@ int OlapTableSink::_validate_data(RuntimeState* state, RowBatch* batch, Bitmap*
}
break;
}
case TYPE_STRING: {
StringValue* str_val = (StringValue*)slot;
if (str_val->len > desc->type().MAX_STRING_LENGTH) {
ss << "the length of input is too long than schema. "
<< "column_name: " << desc->col_name() << "; "
<< "first 128 bytes of input_str: [" << std::string(str_val->ptr, 128) << "] "
<< "schema length: " << desc->type().MAX_STRING_LENGTH << "; "
<< "actual length: " << str_val->len << "; ";
row_valid = false;
continue;
}
break;
}
case TYPE_DECIMALV2: {
DecimalV2Value dec_val(reinterpret_cast<const PackedInt128*>(slot)->value);
if (dec_val.greater_than_scale(desc->type().scale)) {
Expand Down
3 changes: 2 additions & 1 deletion be/src/exec/text_converter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, Tuple* tu
switch (slot_desc->type().type) {
case TYPE_HLL:
case TYPE_VARCHAR:
case TYPE_CHAR: {
case TYPE_CHAR:
case TYPE_STRING: {
StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
str_slot->ptr = const_cast<char*>(data);
str_slot->len = len;
Expand Down
8 changes: 6 additions & 2 deletions be/src/exprs/agg_fn_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ inline void AggFnEvaluator::set_any_val(const void* slot, const TypeDescriptor&
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
case TYPE_STRING:
reinterpret_cast<const StringValue*>(slot)->to_string_val(
reinterpret_cast<StringVal*>(dst));
return;
Expand Down Expand Up @@ -383,6 +384,7 @@ inline void AggFnEvaluator::set_output_slot(const AnyVal* src, const SlotDescrip
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
case TYPE_STRING:
*reinterpret_cast<StringValue*>(slot) =
StringValue::from_string_val(*reinterpret_cast<const StringVal*>(src));
return;
Expand Down Expand Up @@ -563,7 +565,8 @@ bool AggFnEvaluator::count_distinct_data_filter(TupleRow* row, Tuple* dst) {
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT: {
case TYPE_OBJECT:
case TYPE_STRING: {
StringVal* value = reinterpret_cast<StringVal*>(_staging_input_vals[i]);
memcpy(begin, value->ptr, value->len);
begin += value->len;
Expand Down Expand Up @@ -890,7 +893,8 @@ void AggFnEvaluator::serialize_or_finalize(FunctionContext* agg_fn_ctx, Tuple* s
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT: {
case TYPE_OBJECT:
case TYPE_STRING: {
typedef StringVal (*Fn)(FunctionContext*, AnyVal*);
StringVal v = reinterpret_cast<Fn>(fn)(agg_fn_ctx, _staging_intermediate_val);
set_output_slot(&v, dst_slot_desc, dst);
Expand Down
5 changes: 5 additions & 0 deletions be/src/exprs/anyval_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ AnyVal* create_any_val(ObjectPool* pool, const TypeDescriptor& type) {
case TYPE_HLL:
case TYPE_VARCHAR:
case TYPE_OBJECT:
case TYPE_STRING:
return pool->add(new StringVal);

case TYPE_DECIMALV2:
Expand Down Expand Up @@ -162,6 +163,10 @@ FunctionContext::TypeDesc AnyValUtil::column_type_to_type_desc(const TypeDescrip
out.children.push_back(column_type_to_type_desc(t));
}
break;
case TYPE_STRING:
out.type = FunctionContext::TYPE_STRING;
out.len = type.len;
break;
default:
DCHECK(false) << "Unknown type: " << type;
}
Expand Down
3 changes: 3 additions & 0 deletions be/src/exprs/anyval_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ class AnyValUtil {
case TYPE_HLL:
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING:
return sizeof(doris_udf::StringVal);

case TYPE_DATE:
Expand Down Expand Up @@ -280,6 +281,7 @@ class AnyValUtil {
case TYPE_HLL:
case TYPE_VARCHAR:
case TYPE_CHAR:
case TYPE_STRING:
return alignof(StringVal);
case TYPE_DATETIME:
case TYPE_DATE:
Expand Down Expand Up @@ -376,6 +378,7 @@ class AnyValUtil {
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
case TYPE_STRING:
reinterpret_cast<const StringValue*>(slot)->to_string_val(
reinterpret_cast<doris_udf::StringVal*>(dst));
return;
Expand Down
Loading