From 17ab3f63e89c2dd21f2bea5a3d0a943aaafdb6a2 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Mon, 3 Jun 2019 10:57:34 +0800 Subject: [PATCH 1/2] Change strategy of incorrect data This change adds a config named enable_load_strict which is used to prohibit the incorrect data. When the config is set to false, the incorrect data will be loaded by NULL just like before. When the config is set to true, the inncorrect data will be filtered. --- be/src/common/config.h | 5 +++++ be/src/exec/broker_scanner.cpp | 28 +++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 4529ad2e8508f9..15a990769a5ab3 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -419,6 +419,11 @@ namespace config { // same cache size configuration. // TODO(cmy): use different config to set different client cache if necessary. CONF_Int32(max_client_cache_size_per_host, "10"); + + // This config is used to strict the incorrect data when loading. + // If it is set to true, the incorrect data from file will be filtered + // If it is set to false, the incorrect data will be loaded and is set to NULL. + CONF_Bool(enable_load_strict, "true"); } // namespace config } // namespace doris diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp index e56c7033beb146..caf194dd125dfa 100644 --- a/be/src/exec/broker_scanner.cpp +++ b/be/src/exec/broker_scanner.cpp @@ -586,8 +586,8 @@ bool BrokerScanner::fill_dest_tuple(const Slice& line, Tuple* dest_tuple, MemPoo } ExprContext* ctx = _dest_expr_ctx[ctx_idx++]; - void* value = ctx->get_value(_src_tuple_row); - if (value == nullptr) { + // if src slot is null + if (_src_tuple_row->get_tuple(0)->is_null(slot_desc->null_indicator_offset())) { if (slot_desc->is_nullable()) { dest_tuple->set_null(slot_desc->null_indicator_offset()); continue; @@ -600,9 +600,27 @@ bool BrokerScanner::fill_dest_tuple(const Slice& line, Tuple* dest_tuple, MemPoo return false; } } - dest_tuple->set_not_null(slot_desc->null_indicator_offset()); - void* slot = dest_tuple->get_slot(slot_desc->tuple_offset()); - RawValue::write(value, slot, slot_desc->type(), mem_pool); + // if src slot is not null + else { + void* value = ctx->get_value(_src_tuple_row); + // current slot is a incorrect data + if ((value == nullptr) && (config::enable_load_strict)) { + std::stringstream error_msg; + error_msg << "column(" << slot_desc->col_name() << ") value is incorrect"; + _state->append_error_msg_to_file( + std::string(line.data, line.size), error_msg.str()); + _counter->num_rows_filtered++; + return false; + } + else if (value == nullptr) { + dest_tuple->set_null(slot_desc->null_indicator_offset()); + continue; + } + dest_tuple->set_not_null(slot_desc->null_indicator_offset()); + void* slot = dest_tuple->get_slot(slot_desc->tuple_offset()); + RawValue::write(value, slot, slot_desc->type(), mem_pool); + } + } return true; } From 568da9b43aa1ea412730ce4eb461e659149e72a3 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Tue, 4 Jun 2019 13:03:15 +0800 Subject: [PATCH 2/2] Add introduction of enable_load_strict --- .../administrator-guide/config/be_config.md | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 docs/documentation/cn/administrator-guide/config/be_config.md diff --git a/docs/documentation/cn/administrator-guide/config/be_config.md b/docs/documentation/cn/administrator-guide/config/be_config.md new file mode 100644 index 00000000000000..7457f867b08087 --- /dev/null +++ b/docs/documentation/cn/administrator-guide/config/be_config.md @@ -0,0 +1,20 @@ +# be 配置项说明 +## enable load strict 配置说明 + +be 的配置中有一个参数 enable_load_strict 用于限制所有导入方式中类型转换如果遇到错误数据,是否严格 filter。 + +enable load strict 参数只对导入中的类型转换有效,对于类型转换来说,如果 enable_load_strict 为true,则错误的数据将被filter。 + +对于导入的某列包含函数变换的,导入的值和函数的结果一致,strict 对其不产生影响。(其中 strftime 等 broker 系统支持的函数也属于这类)。 + +### strict 与类型转换关系 + +这里以列类型为 int 来举例 +注:当表中的列允许导入空值时 + +source data | source data example | string to int | enable_load_strict | load_data +------------|---------------------|-----------------|--------------------|--------- +空值 | \N | N/A | true or false | NULL +not null | aaa | NULL | true | filtered +not null | aaa | NULL | false | NULL +not null | 1 | 1 | true or false | correct data