From 3eb2d82995096a1295cd8d064f605904bc3c0fba Mon Sep 17 00:00:00 2001 From: HangyuanLiu <460660596@qq.com> Date: Wed, 18 Sep 2019 18:26:00 +0800 Subject: [PATCH 1/8] hll default value --- be/src/olap/aggregate_func.h | 15 ++++++++++++--- .../org/apache/doris/planner/BrokerScanNode.java | 2 +- .../apache/doris/planner/StreamLoadScanNode.java | 3 ++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 25ffdd0390c977..0fa49b4a661903 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -408,7 +408,11 @@ struct AggregateFuncTraitssize = sizeof(HyperLogLog); // use 'placement new' to allocate HyperLogLog on arena, so that we can control the memory usage. char* mem = arena->Allocate(dst_slice->size); - dst_slice->data = (char*) new (mem) HyperLogLog(src_slice->data); + if (src_slice->empty()) { + dst_slice->data = (char*) new (mem) HyperLogLog(); + } else { + dst_slice->data = (char*) new (mem) HyperLogLog(src_slice->data); + } } static void update(RowCursorCell* dst, const RowCursorCell& src, Arena* arena) { @@ -420,8 +424,13 @@ struct AggregateFuncTraitsdata); - dst_hll->merge(src_hll); + if (src_slice->empty()) { + HyperLogLog src_hll = HyperLogLog(); + dst_hll->merge(src_hll); + } else { + HyperLogLog src_hll = HyperLogLog(src_slice->data); + dst_hll->merge(src_hll); + } } else { // for stream load auto* src_hll = reinterpret_cast(src_slice->data); dst_hll->merge(*src_hll); diff --git a/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java index 5aab4a8e4db71f..e07d0ebb9be14e 100644 --- a/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java @@ -271,7 +271,7 @@ private void finalizeParams(ParamCreateContext context) throws UserException, An } // check hll_hash - if (destSlotDesc.getType().getPrimitiveType() == PrimitiveType.HLL) { + if (destSlotDesc.getType().getPrimitiveType() == PrimitiveType.HLL && exprMap.get(destSlotDesc.getColumn().getName()) != null) { if (!(expr instanceof FunctionCallExpr)) { throw new AnalysisException("HLL column must use hll_hash function, like " + destSlotDesc.getColumn().getName() + "=hll_hash(xxx)"); diff --git a/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java b/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java index 440b13a4df8b3e..bbaeb4ddccd535 100644 --- a/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java @@ -179,8 +179,9 @@ private void finalizeParams() throws UserException { } } } + // check hll_hash - if (dstSlotDesc.getType().getPrimitiveType() == PrimitiveType.HLL) { + if (dstSlotDesc.getType().getPrimitiveType() == PrimitiveType.HLL && exprsByName.get(dstSlotDesc.getColumn().getName()) != null) { if (!(expr instanceof FunctionCallExpr)) { throw new AnalysisException("HLL column must use hll_hash function, like " + dstSlotDesc.getColumn().getName() + "=hll_hash(xxx)"); From 9996ebebb7b42fc92130ea5a69cd523d73b17421 Mon Sep 17 00:00:00 2001 From: HangyuanLiu <460660596@qq.com> Date: Mon, 23 Sep 2019 18:33:31 +0800 Subject: [PATCH 2/8] add empty_hll --- be/src/exprs/hll_function.cpp | 14 ++++++++++++++ be/src/exprs/hll_function.h | 5 +++++ .../org/apache/doris/planner/BrokerScanNode.java | 6 +++--- .../apache/doris/planner/StreamLoadScanNode.java | 6 +++--- gensrc/script/doris_builtins_functions.py | 2 ++ 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/be/src/exprs/hll_function.cpp b/be/src/exprs/hll_function.cpp index d8c759bd57c641..07b4651391e778 100644 --- a/be/src/exprs/hll_function.cpp +++ b/be/src/exprs/hll_function.cpp @@ -25,6 +25,17 @@ namespace doris { using doris_udf::BigIntVal; using doris_udf::StringVal; +const std::string HllFunctions::hll_empty_buffer = init_empty_hll(); +std::string HllFunctions::init_empty_hll() { + const int HLL_EMPTY_SIZE = 1; + std::string buf; + std::unique_ptr hll; + hll.reset(new HyperLogLog()); + buf.resize(HLL_EMPTY_SIZE); + hll->serialize((char*)buf.c_str()); + return buf; +} + void HllFunctions::init() { } @@ -50,6 +61,9 @@ void HllFunctions::hll_init(FunctionContext *, StringVal* dst) { dst->len = sizeof(HyperLogLog); dst->ptr = (uint8_t*)new HyperLogLog(); } +StringVal HllFunctions::empty_hll(FunctionContext* ctx) { + return AnyValUtil::from_string_temp(ctx, hll_empty_buf); +} template void HllFunctions::hll_update(FunctionContext *, const T &src, StringVal* dst) { diff --git a/be/src/exprs/hll_function.h b/be/src/exprs/hll_function.h index e08cbff7bea716..65b2eb456b3880 100644 --- a/be/src/exprs/hll_function.h +++ b/be/src/exprs/hll_function.h @@ -18,6 +18,7 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_HLL_FUNCTION_H #define DORIS_BE_SRC_QUERY_EXPRS_HLL_FUNCTION_H +#include #include "udf/udf.h" namespace doris { @@ -26,6 +27,7 @@ class HllFunctions { public: static void init(); static StringVal hll_hash(FunctionContext* ctx, const StringVal& dest_base); + static StringVal empty_hll(FunctionContext* ctx); static void hll_init(FunctionContext*, StringVal* dst); template @@ -38,6 +40,9 @@ class HllFunctions { static StringVal hll_serialize(FunctionContext* ctx, const StringVal& src); static BigIntVal hll_cardinality(FunctionContext* ctx, const StringVal& src); + + const static std::string hll_empty_buffer; + static std::string init_empty_hll(); }; } diff --git a/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java index e07d0ebb9be14e..3dff76965766fc 100644 --- a/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java @@ -271,15 +271,15 @@ private void finalizeParams(ParamCreateContext context) throws UserException, An } // check hll_hash - if (destSlotDesc.getType().getPrimitiveType() == PrimitiveType.HLL && exprMap.get(destSlotDesc.getColumn().getName()) != null) { + if (destSlotDesc.getType().getPrimitiveType() == PrimitiveType.HLL) { if (!(expr instanceof FunctionCallExpr)) { throw new AnalysisException("HLL column must use hll_hash function, like " + destSlotDesc.getColumn().getName() + "=hll_hash(xxx)"); } FunctionCallExpr fn = (FunctionCallExpr) expr; - if (!fn.getFnName().getFunction().equalsIgnoreCase("hll_hash")) { + if (!fn.getFnName().getFunction().equalsIgnoreCase("hll_hash") && !fn.getFnName().getFunction().equalsIgnoreCase("empty_hll")) { throw new AnalysisException("HLL column must use hll_hash function, like " - + destSlotDesc.getColumn().getName() + "=hll_hash(xxx)"); + + destSlotDesc.getColumn().getName() + "=hll_hash(xxx) or " + destSlotDesc.getColumn().getName() + "=empty_hll()"); } expr.setType(Type.HLL); } diff --git a/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java b/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java index bbaeb4ddccd535..d15e49667710b5 100644 --- a/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java @@ -181,15 +181,15 @@ private void finalizeParams() throws UserException { } // check hll_hash - if (dstSlotDesc.getType().getPrimitiveType() == PrimitiveType.HLL && exprsByName.get(dstSlotDesc.getColumn().getName()) != null) { + if (dstSlotDesc.getType().getPrimitiveType() == PrimitiveType.HLL) { if (!(expr instanceof FunctionCallExpr)) { throw new AnalysisException("HLL column must use hll_hash function, like " + dstSlotDesc.getColumn().getName() + "=hll_hash(xxx)"); } FunctionCallExpr fn = (FunctionCallExpr) expr; - if (!fn.getFnName().getFunction().equalsIgnoreCase("hll_hash")) { + if (!fn.getFnName().getFunction().equalsIgnoreCase("hll_hash") && !fn.getFnName().getFunction().equalsIgnoreCase("empty_hll")) { throw new AnalysisException("HLL column must use hll_hash function, like " - + dstSlotDesc.getColumn().getName() + "=hll_hash(xxx)"); + + dstSlotDesc.getColumn().getName() + "=hll_hash(xxx) or " + dstSlotDesc.getColumn().getName() + "=empty_hll()"); } expr.setType(Type.HLL); } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 9fc365107682d6..f8a87e6d54edac 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -587,6 +587,8 @@ '_ZN5doris12HllFunctions15hll_cardinalityEPN9doris_udf15FunctionContextERKNS1_9StringValE'], [['hll_hash'], 'VARCHAR', ['VARCHAR'], '_ZN5doris12HllFunctions8hll_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE'], + [['empty_hll'], 'VARCHAR', [], + '_ZN5doris12HllFunctions9empty_hllEPN9doris_udf15FunctionContextE'], #bitmap function From 6ed3dfb3dbfc71c571c62fe9034cd063f4fc2048 Mon Sep 17 00:00:00 2001 From: HangyuanLiu <460660596@qq.com> Date: Mon, 23 Sep 2019 18:43:52 +0800 Subject: [PATCH 3/8] fix --- be/src/exprs/hll_function.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/exprs/hll_function.cpp b/be/src/exprs/hll_function.cpp index 033697aaad3767..e516e8fc6d5cfe 100644 --- a/be/src/exprs/hll_function.cpp +++ b/be/src/exprs/hll_function.cpp @@ -32,7 +32,7 @@ std::string HllFunctions::init_empty_hll() { std::unique_ptr hll; hll.reset(new HyperLogLog()); buf.resize(HLL_EMPTY_SIZE); - hll->serialize((char*)buf.c_str()); + hll->serialize((uint8_t*)buf.c_str()); return buf; } @@ -60,7 +60,7 @@ void HllFunctions::hll_init(FunctionContext *, StringVal* dst) { dst->ptr = (uint8_t*)new HyperLogLog(); } StringVal HllFunctions::empty_hll(FunctionContext* ctx) { - return AnyValUtil::from_string_temp(ctx, hll_empty_buf); + return AnyValUtil::from_string_temp(ctx, hll_empty_buffer); } template From 710567474dfdbeaf6e76ef042d4702f7779f4b1e Mon Sep 17 00:00:00 2001 From: HangyuanLiu <460660596@qq.com> Date: Mon, 23 Sep 2019 18:43:52 +0800 Subject: [PATCH 4/8] fix --- be/src/exprs/hll_function.cpp | 14 ++------------ be/src/exprs/hll_function.h | 4 ---- be/src/olap/hll.h | 8 ++++++++ 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/be/src/exprs/hll_function.cpp b/be/src/exprs/hll_function.cpp index 033697aaad3767..84d71dcda1bb99 100644 --- a/be/src/exprs/hll_function.cpp +++ b/be/src/exprs/hll_function.cpp @@ -25,17 +25,6 @@ namespace doris { using doris_udf::BigIntVal; using doris_udf::StringVal; -const std::string HllFunctions::hll_empty_buffer = init_empty_hll(); -std::string HllFunctions::init_empty_hll() { - const int HLL_EMPTY_SIZE = 1; - std::string buf; - std::unique_ptr hll; - hll.reset(new HyperLogLog()); - buf.resize(HLL_EMPTY_SIZE); - hll->serialize((char*)buf.c_str()); - return buf; -} - void HllFunctions::init() { } @@ -60,7 +49,8 @@ void HllFunctions::hll_init(FunctionContext *, StringVal* dst) { dst->ptr = (uint8_t*)new HyperLogLog(); } StringVal HllFunctions::empty_hll(FunctionContext* ctx) { - return AnyValUtil::from_string_temp(ctx, hll_empty_buf); + HyperLogLog hll; + return AnyValUtil::from_string_temp(ctx, hll.empty()); } template diff --git a/be/src/exprs/hll_function.h b/be/src/exprs/hll_function.h index 65b2eb456b3880..da0e5e9d2bd9fd 100644 --- a/be/src/exprs/hll_function.h +++ b/be/src/exprs/hll_function.h @@ -18,7 +18,6 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_HLL_FUNCTION_H #define DORIS_BE_SRC_QUERY_EXPRS_HLL_FUNCTION_H -#include #include "udf/udf.h" namespace doris { @@ -40,9 +39,6 @@ class HllFunctions { static StringVal hll_serialize(FunctionContext* ctx, const StringVal& src); static BigIntVal hll_cardinality(FunctionContext* ctx, const StringVal& src); - - const static std::string hll_empty_buffer; - static std::string init_empty_hll(); }; } diff --git a/be/src/olap/hll.h b/be/src/olap/hll.h index 21807f4f075b69..11869c70f535e6 100644 --- a/be/src/olap/hll.h +++ b/be/src/olap/hll.h @@ -103,6 +103,14 @@ class HyperLogLog { int64_t estimate_cardinality(); + std::string empty() { + const int HLL_EMPTY_SIZE = 1; + std::string buf; + buf.resize(HLL_EMPTY_SIZE); + this->serialize((uint8_t*)buf.c_str()); + return buf; + } + // only for debug std::string to_string() { switch (_type) { From 9bf922f3d48c4f51d31c652e7c5afa4184c8c6ea Mon Sep 17 00:00:00 2001 From: HangyuanLiu <460660596@qq.com> Date: Tue, 24 Sep 2019 21:37:24 +0800 Subject: [PATCH 5/8] Add empty_hll doc --- .../cn/sql-reference/sql-statements/Data Definition/HLL.md | 3 +++ .../sql-statements/Data Manipulation/BROKER LOAD.md | 7 ++++--- .../sql-statements/Data Manipulation/STREAM LOAD.md | 4 ++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/HLL.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/HLL.md index 16b40d7fbb444e..de26c468ac9840 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/HLL.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/HLL.md @@ -18,6 +18,9 @@ HLL_HASH(column_name) 生成HLL列类型,用于insert或导入的时候,导入的使用见相关说明 + + EMPTY_HLL() + 生成空HLL列,用于insert或导入的时候补充默认值,导入的使用见相关说明 ## example 1. 首先创建一张含有hll列的表 diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md index fd1e4991ebe51c..5dc7ec024bc1c5 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md @@ -286,8 +286,8 @@ 7. 导入数据到含有HLL列的表,可以是表中的列或者数据里面的列 - 如果表中有三列分别是(id,v1,v2)。其中v1和v2列是hll列。导入的源文件有3列。则(column_list)中声明第一列为id,第二三列为一个临时命名的k1,k2。 - 在SET中必须给表中的hll列特殊声明 hll_hash。表中的v1列等于原始数据中的hll_hash(k1)列。 + 如果表中有三列分别是(id,v1,v2,v3)。其中v1和v2列是hll列。导入的源文件有3列。则(column_list)中声明第一列为id,第二三列为一个临时命名的k1,k2。 + 在SET中必须给表中的hll列特殊声明 hll_hash。表中的v1列等于原始数据中的hll_hash(k1)列, 表中的v3列在原始数据中并没有对应的值,使用empty_hll补充默认值。 LOAD LABEL example_db.label7 ( DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") @@ -297,7 +297,8 @@ (id, k1, k2) SET ( v1 = hll_hash(k1), - v2 = hll_hash(k2) + v2 = hll_hash(k2), + v3 = empty_hll() ) ) WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md index f1d02d6400b0b4..507373bf637100 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md @@ -90,8 +90,8 @@ 6. 使用streaming方式导入(用户是defalut_cluster中的) seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load - 7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列 - curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load + 7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用empty_hll补充数据中没有的列 + curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load 8. 导入数据进行严格模式过滤,并设置时区为 Africa/Abidjan curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load From 25f3a6a4985247d8b2f70c15d8f5a37991ba909e Mon Sep 17 00:00:00 2001 From: HangyuanLiu <460660596@qq.com> Date: Tue, 24 Sep 2019 22:16:42 +0800 Subject: [PATCH 6/8] fix --- be/src/exprs/hll_function.cpp | 3 +-- be/src/olap/hll.h | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/be/src/exprs/hll_function.cpp b/be/src/exprs/hll_function.cpp index 84d71dcda1bb99..e91f947b345d28 100644 --- a/be/src/exprs/hll_function.cpp +++ b/be/src/exprs/hll_function.cpp @@ -49,8 +49,7 @@ void HllFunctions::hll_init(FunctionContext *, StringVal* dst) { dst->ptr = (uint8_t*)new HyperLogLog(); } StringVal HllFunctions::empty_hll(FunctionContext* ctx) { - HyperLogLog hll; - return AnyValUtil::from_string_temp(ctx, hll.empty()); + return AnyValUtil::from_string_temp(ctx, HyperLogLog::empty()); } template diff --git a/be/src/olap/hll.h b/be/src/olap/hll.h index 11869c70f535e6..e6c88d5e9ce1b9 100644 --- a/be/src/olap/hll.h +++ b/be/src/olap/hll.h @@ -103,11 +103,11 @@ class HyperLogLog { int64_t estimate_cardinality(); - std::string empty() { - const int HLL_EMPTY_SIZE = 1; + static std::string empty() { + static HyperLogLog hll; std::string buf; buf.resize(HLL_EMPTY_SIZE); - this->serialize((uint8_t*)buf.c_str()); + hll.serialize((uint8_t*)buf.c_str()); return buf; } From c403e5de5f231016c0a1a941592b0a06d956c0ab Mon Sep 17 00:00:00 2001 From: HangyuanLiu <460660596@qq.com> Date: Tue, 24 Sep 2019 22:33:23 +0800 Subject: [PATCH 7/8] Add empty_hll doc --- .../en/sql-reference/sql-statements/Data Definition/HLL_EN.md | 3 +++ .../sql-statements/Data Manipulation/BROKER LOAD_EN.md | 4 ++-- .../sql-statements/Data Manipulation/STREAM LOAD_EN.md | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/HLL_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/HLL_EN.md index 4061060964542b..b6c7044d037a1e 100644 --- a/docs/documentation/en/sql-reference/sql-statements/Data Definition/HLL_EN.md +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/HLL_EN.md @@ -19,6 +19,9 @@ This function is used to estimate the cardinality of a single HLL sequence HLL_HASH(column_name) Generate HLL column types for insert or import, see the instructions for the use of imports +EMPTY_HLL() +Generate empty HLL column types for insert or import, see the instructions for the use of imports + ## example 1. First create a table with HLL columns create table test( diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md index 7eefcfabde8322..0936e82be21c6d 100644 --- a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md @@ -302,9 +302,9 @@ 7. Load data into tables containing HLL columns, which can be columns in tables or columns in data - If there are three columns in the table (id, v1, v2). The V1 and V2 columns are HLL columns. The imported source file has three columns. Then (column_list) declares that the first column is id, and the second and third columns are temporarily named k1, k2. + If there are three columns in the table (id, v1, v2, v3). The V1 and V2 columns are HLL columns. The imported source file has three columns. Then (column_list) declares that the first column is id, and the second and third columns are temporarily named k1, k2. - In SET, the HLL column in the table must be specifically declared hll_hash. The V1 column in the table is equal to the hll_hash (k1) column in the original data. + In SET, the HLL column in the table must be specifically declared hll_hash. The V1 column in the table is equal to the hll_hash (k1) column in the original data.The v3 column in the table does not have a corresponding value in the original data, and empty_hll is used to supplement the default value. LOAD LABEL example_db.label7 ( diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md index a294ae25a3a646..531c3bd3c83bc6 100644 --- a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md @@ -145,9 +145,9 @@ Where url is the url given by ErrorURL. ```Seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load``` -7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns +7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use empty_hll to supplement columns that are not in the data - ```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load``` + ```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load``` 8. load data for strict mode filtering and set the time zone to Africa/Abidjan From cbe315358c6034644d55b6b71a3468730f053db5 Mon Sep 17 00:00:00 2001 From: HangyuanLiu <460660596@qq.com> Date: Tue, 24 Sep 2019 22:35:24 +0800 Subject: [PATCH 8/8] Add empty_hll doc --- .../sql-statements/Data Manipulation/BROKER LOAD_EN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md index 0936e82be21c6d..2b820848de0785 100644 --- a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md @@ -315,7 +315,8 @@ (id, k1, k2) SET ( v1 = hll_hash(k1), - v2 = hll_hash(k2) + v2 = hll_hash(k2), + v3 = empty_hll() ) ) WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password");