From 8ac58ef22059be56f3c44b79284adc8215a41081 Mon Sep 17 00:00:00 2001 From: kangkaisen Date: Wed, 28 Aug 2019 18:24:01 +0800 Subject: [PATCH] Make bitmap_union agg column support insert into and broker load --- be/src/exec/tablet_sink.cpp | 6 +++ .../aggregate-functions/bitmap.md | 4 +- .../Data Definition/CREATE TABLE.md | 4 +- .../org/apache/doris/analysis/InsertStmt.java | 41 +++++++++++++++++++ .../org/apache/doris/analysis/SelectStmt.java | 5 +++ .../org/apache/doris/analysis/TypeDef.java | 5 ++- .../apache/doris/planner/BrokerScanNode.java | 2 + .../org/apache/doris/planner/ScanNode.java | 22 ++++++++++ .../doris/planner/StreamLoadScanNode.java | 3 ++ .../apache/doris/catalog/ColumnTypeTest.java | 2 +- 10 files changed, 88 insertions(+), 6 deletions(-) diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index 9fa0a307cc4083..2ca158bd087c5f 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -707,6 +707,12 @@ int OlapTableSink::_validate_data(RuntimeState* state, RowBatch* batch, Bitmap* case TYPE_VARCHAR: { // Fixed length string StringValue* str_val = (StringValue*)slot; + // todo(kks): varchar(0) means bitmap_union agg type + // we will remove this special handle when we add a special type for bitmap_union + if (desc->type().type == TYPE_VARCHAR && desc->type().len == 0) { + continue; + } + if (str_val->len > desc->type().len) { std::stringstream ss; ss << "the length of input is too long than schema. " diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md index 36e81eb0eea1aa..54adce61497425 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md @@ -5,7 +5,7 @@ `TO_BITMAP(expr)` : 将TINYINT,SMALLINT和INT类型的列转为Bitmap -`BITMAP_UNION(expr)` : 计算两个Bitmap的交集,返回值是序列化后的Bitmap值 +`BITMAP_UNION(expr)` : 计算两个Bitmap的并集,返回值是序列化后的Bitmap值 `BITMAP_COUNT(expr)` : 计算Bitmap中不同值的个数 @@ -49,7 +49,7 @@ mysql> select bitmap_union_int (id2) from bitmap_udaf; CREATE TABLE `bitmap_test` ( `id` int(11) NULL COMMENT "", - `id2` varchar(20) bitmap_union NULL + `id2` varchar(0) bitmap_union NULL // 注意: bitmap_union的varchar长度需要指定为0 ) ENGINE=OLAP AGGREGATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 10; diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md index 43864f50f09b4e..b19290b89f4d9e 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md @@ -301,8 +301,8 @@ ( k1 TINYINT, k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 VARCHAR(20) BITMAP_UNION, - v2 VARCHAR(20) BITMAP_UNION + v1 VARCHAR(0) BITMAP_UNION, // 注意: bitmap_union的varchar长度需要指定为0 + v2 VARCHAR(0) BITMAP_UNION ) ENGINE=olap AGGREGATE KEY(k1, k2) diff --git a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java index dc3016de419f84..5568eb2e0a7d3b 100644 --- a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java @@ -17,10 +17,12 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.BrokerTable; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.FunctionSet; import org.apache.doris.catalog.MysqlTable; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -400,6 +402,11 @@ public void analyzeSubquery(Analyzer analyzer) throws UserException { Expr expr = queryStmt.getResultExprs().get(i); checkHllCompatibility(column, expr); } + + if (column.getAggregationType() == AggregateType.BITMAP_UNION) { + Expr expr = queryStmt.getResultExprs().get(i); + checkBitmapCompatibility(column, expr); + } } } } @@ -419,6 +426,10 @@ private void analyzeRow(Analyzer analyzer, List targetColumns, ArrayList checkHllCompatibility(col, expr); } + if (col.getAggregationType() == AggregateType.BITMAP_UNION) { + checkBitmapCompatibility(col, expr); + } + if (expr instanceof DefaultValueExpr) { if (targetColumns.get(i).getDefaultValue() == null) { throw new AnalysisException("Column has no default value, column=" + targetColumns.get(i).getName()); @@ -478,6 +489,36 @@ private void checkHllCompatibility(Column col, Expr expr) throws AnalysisExcepti } } + private void checkBitmapCompatibility(Column col, Expr expr) throws AnalysisException { + boolean isCompatible = false; + final String bitmapMismatchLog = "Column's agg type is bitmap_union," + + " SelectList must contains bitmap_union column, to_bitmap or bitmap_union function's result, column=" + col.getName(); + if (expr instanceof SlotRef) { + final SlotRef slot = (SlotRef) expr; + Column column = slot.getDesc().getColumn(); + if (column != null && column.getAggregationType() == AggregateType.BITMAP_UNION) { + isCompatible = true; // select * from bitmap_table + } else if (slot.getDesc().getSourceExprs().size() == 1) { + Expr sourceExpr = slot.getDesc().getSourceExprs().get(0); + if (sourceExpr instanceof FunctionCallExpr) { + FunctionCallExpr functionExpr = (FunctionCallExpr) sourceExpr; + if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION)) { + isCompatible = true; // select id, bitmap_union(id2) from bitmap_table group by id + } + } + } + } else if (expr instanceof FunctionCallExpr) { + final FunctionCallExpr functionExpr = (FunctionCallExpr) expr; + if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) { + isCompatible = true; // select id, to_bitmap(id2) from table; + } + } + + if (!isCompatible) { + throw new AnalysisException(bitmapMismatchLog); + } + } + private Expr checkTypeCompatibility(Column col, Expr expr) throws AnalysisException { if (col.getDataType().equals(expr.getType().getPrimitiveType())) { return expr; diff --git a/fe/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/src/main/java/org/apache/doris/analysis/SelectStmt.java index 9aec70fd6f791c..1a1af2915fafb4 100644 --- a/fe/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -17,6 +17,7 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; @@ -730,6 +731,10 @@ private void expandStar(TableName tblName, TupleDescriptor desc) throws Analysis throw new AnalysisException ( "hll only use in HLL_UNION_AGG or HLL_CARDINALITY , HLL_HASH and so on."); } + if (col.getAggregationType() == AggregateType.BITMAP_UNION && !fromInsert) { + throw new AnalysisException ( + "BITMAP_UNION agg column only use in TO_BITMAP or BITMAP_UNION , BITMAP_COUNT and so on."); + } resultExprs.add(new SlotRef(tblName, col.getName())); colLabels.add(col.getName()); } diff --git a/fe/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/src/main/java/org/apache/doris/analysis/TypeDef.java index 6fe23c19e97c6c..15b1731e7d6841 100644 --- a/fe/src/main/java/org/apache/doris/analysis/TypeDef.java +++ b/fe/src/main/java/org/apache/doris/analysis/TypeDef.java @@ -92,7 +92,10 @@ private void analyzeScalarType(ScalarType scalarType) } int len = scalarType.getLength(); // len is decided by child, when it is -1. - if (len <= 0) { + + // todo(kks) : varchar(0) for bitmap_union agg type, + // we should forbid the len equal zero when we add a special type for bitmap_union + if (len < 0) { throw new AnalysisException(name + " size must be > 0: " + len); } if (scalarType.getLength() > maxLen) { diff --git a/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java index 736097cb463e3e..ef342ec767d588 100644 --- a/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java @@ -463,6 +463,8 @@ private void finalizeParams(ParamCreateContext context) throws UserException, An expr.setType(Type.HLL); } + checkBitmapCompatibility(destSlotDesc, expr); + // analyze negative if (isNegative && destSlotDesc.getColumn().getAggregationType() == AggregateType.SUM) { expr = new ArithmeticExpr(ArithmeticExpr.Operator.MULTIPLY, expr, new IntLiteral(-1)); diff --git a/fe/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/src/main/java/org/apache/doris/planner/ScanNode.java index 75f6ab3fb866bc..3e9bc86651181c 100644 --- a/fe/src/main/java/org/apache/doris/planner/ScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/ScanNode.java @@ -18,9 +18,13 @@ package org.apache.doris.planner; import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.FunctionCallExpr; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.TupleDescriptor; +import org.apache.doris.catalog.AggregateType; +import org.apache.doris.catalog.FunctionSet; import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.common.AnalysisException; import org.apache.doris.common.UserException; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TScanRangeLocations; @@ -76,6 +80,24 @@ protected Expr castToSlot(SlotDescriptor slotDesc, Expr expr) throws UserExcepti } } + protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException { + boolean isCompatible = true; + if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) { + if (!(expr instanceof FunctionCallExpr)) { + isCompatible = false; + } else { + FunctionCallExpr fn = (FunctionCallExpr) expr; + if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) { + isCompatible = false; + } + } + } + if (!isCompatible) { + throw new AnalysisException("bitmap_union column must use to_bitmap function, like " + + slotDesc.getColumn().getName() + "=to_bitmap(xxx)"); + } + } + /** * Returns all scan ranges plus their locations. Needs to be preceded by a call to * finalize(). diff --git a/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java b/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java index 9e9e77e1302c9a..b012d083e520b9 100644 --- a/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java @@ -288,6 +288,9 @@ private void finalizeParams() throws UserException { } expr.setType(Type.HLL); } + + checkBitmapCompatibility(dstSlotDesc, expr); + if (negative && dstSlotDesc.getColumn().getAggregationType() == AggregateType.SUM) { expr = new ArithmeticExpr(ArithmeticExpr.Operator.MULTIPLY, expr, new IntLiteral(-1)); expr.analyze(analyzer); diff --git a/fe/src/test/java/org/apache/doris/catalog/ColumnTypeTest.java b/fe/src/test/java/org/apache/doris/catalog/ColumnTypeTest.java index dc5a1d91dddf31..46959454b920be 100644 --- a/fe/src/test/java/org/apache/doris/catalog/ColumnTypeTest.java +++ b/fe/src/test/java/org/apache/doris/catalog/ColumnTypeTest.java @@ -95,7 +95,7 @@ public void testCharType() throws AnalysisException { @Test(expected = AnalysisException.class) public void testCharInvalid() throws AnalysisException { - TypeDef type = TypeDef.createVarchar(0); + TypeDef type = TypeDef.createVarchar(-1); type.analyze(null); Assert.fail("No Exception throws"); }