diff --git a/be/src/common/config.h b/be/src/common/config.h index 4529ad2e8508f9..15a990769a5ab3 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -419,6 +419,11 @@ namespace config { // same cache size configuration. // TODO(cmy): use different config to set different client cache if necessary. CONF_Int32(max_client_cache_size_per_host, "10"); + + // This config is used to strict the incorrect data when loading. + // If it is set to true, the incorrect data from file will be filtered + // If it is set to false, the incorrect data will be loaded and is set to NULL. + CONF_Bool(enable_load_strict, "true"); } // namespace config } // namespace doris diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp index e56c7033beb146..caf194dd125dfa 100644 --- a/be/src/exec/broker_scanner.cpp +++ b/be/src/exec/broker_scanner.cpp @@ -586,8 +586,8 @@ bool BrokerScanner::fill_dest_tuple(const Slice& line, Tuple* dest_tuple, MemPoo } ExprContext* ctx = _dest_expr_ctx[ctx_idx++]; - void* value = ctx->get_value(_src_tuple_row); - if (value == nullptr) { + // if src slot is null + if (_src_tuple_row->get_tuple(0)->is_null(slot_desc->null_indicator_offset())) { if (slot_desc->is_nullable()) { dest_tuple->set_null(slot_desc->null_indicator_offset()); continue; @@ -600,9 +600,27 @@ bool BrokerScanner::fill_dest_tuple(const Slice& line, Tuple* dest_tuple, MemPoo return false; } } - dest_tuple->set_not_null(slot_desc->null_indicator_offset()); - void* slot = dest_tuple->get_slot(slot_desc->tuple_offset()); - RawValue::write(value, slot, slot_desc->type(), mem_pool); + // if src slot is not null + else { + void* value = ctx->get_value(_src_tuple_row); + // current slot is a incorrect data + if ((value == nullptr) && (config::enable_load_strict)) { + std::stringstream error_msg; + error_msg << "column(" << slot_desc->col_name() << ") value is incorrect"; + _state->append_error_msg_to_file( + std::string(line.data, line.size), error_msg.str()); + _counter->num_rows_filtered++; + return false; + } + else if (value == nullptr) { + dest_tuple->set_null(slot_desc->null_indicator_offset()); + continue; + } + dest_tuple->set_not_null(slot_desc->null_indicator_offset()); + void* slot = dest_tuple->get_slot(slot_desc->tuple_offset()); + RawValue::write(value, slot, slot_desc->type(), mem_pool); + } + } return true; } diff --git a/docs/documentation/cn/administrator-guide/config/be_config.md b/docs/documentation/cn/administrator-guide/config/be_config.md new file mode 100644 index 00000000000000..7457f867b08087 --- /dev/null +++ b/docs/documentation/cn/administrator-guide/config/be_config.md @@ -0,0 +1,20 @@ +# be 配置项说明 +## enable load strict 配置说明 + +be 的配置中有一个参数 enable_load_strict 用于限制所有导入方式中类型转换如果遇到错误数据,是否严格 filter。 + +enable load strict 参数只对导入中的类型转换有效,对于类型转换来说,如果 enable_load_strict 为true,则错误的数据将被filter。 + +对于导入的某列包含函数变换的,导入的值和函数的结果一致,strict 对其不产生影响。(其中 strftime 等 broker 系统支持的函数也属于这类)。 + +### strict 与类型转换关系 + +这里以列类型为 int 来举例 +注:当表中的列允许导入空值时 + +source data | source data example | string to int | enable_load_strict | load_data +------------|---------------------|-----------------|--------------------|--------- +空值 | \N | N/A | true or false | NULL +not null | aaa | NULL | true | filtered +not null | aaa | NULL | false | NULL +not null | 1 | 1 | true or false | correct data