From f92864371192ccc8ea943f1fd801365933556836 Mon Sep 17 00:00:00 2001 From: gipeshka Date: Wed, 2 Oct 2019 16:23:08 +0200 Subject: [PATCH 1/7] 8613 hll sketch to accept array as input --- .../hll/HllSketchBuildAggregator.java | 6 ++ .../hll/HllSketchAggregatorTest.java | 76 ++++++++++++++++++- 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java index 4f772608008a..66a51f7f9848 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java @@ -107,6 +107,12 @@ static void updateSketch(final HllSketch sketch, final Object value) for (String v : list) { sketch.update(v.toCharArray()); } + } else if (value instanceof String[]) { + // noinspection unchecked + String[] list = (String[]) value; + for (String v : list) { + sketch.update(v.toCharArray()); + } } else if (value instanceof char[]) { sketch.update((char[]) value); } else if (value instanceof byte[]) { diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index e2c55be53552..dc1f64bea63a 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -21,13 +21,16 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.segment.transform.ExpressionTransform; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -42,6 +45,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; @RunWith(Parameterized.class) public class HllSketchAggregatorTest @@ -112,6 +116,34 @@ public void buildSketchesAtIngestionTime() throws Exception Assert.assertEquals(200, (double) row.get(0), 0.1); } + @Test + public void buildSketchesAtIngestionTimeMultiValueWithTransformations() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()), + buildParserJson( + Collections.singletonList("dim"), + Arrays.asList("timestamp", "dim", "multiDim", "id"), + ImmutableList.of( + new ExpressionTransform( + "multiDim", + "array_append(multiDim, '0')", + ExprMacroTable.nil() + ) + ) + ), + buildAggregatorJson("HLLSketchBuild", "multiDim", !ROUND), + 0, // minTimestamp + Granularities.NONE, + 200, // maxRowCount + buildGroupByQueryJson("HLLSketchMerge", "sketch", !ROUND) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + Assert.assertEquals(200, (double) row.get(0), 0.1); + } + @Test public void buildSketchesAtQueryTime() throws Exception { @@ -151,7 +183,7 @@ public void buildSketchesAtQueryTimeMultiValue() throws Exception List results = seq.toList(); Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); - Assert.assertEquals(14, (double) row.get(0), 0.1); + Assert.assertEquals(15, (double) row.get(0), 0.1); } @Test @@ -221,6 +253,48 @@ private static String buildParserJson(List dimensions, List colu return toJson(object); } + private static String buildParserJson( + List dimensions, + List columns, + List transforms + ) + { + List> transformsObjects = transforms.stream().map( transform -> + ImmutableMap.of( + "type", "expression", + "name", transform.getName(), + "expression", transform.getExpression() + ) + ).collect(Collectors.toList()); + + Map transformsObject = ImmutableMap.of( + "transforms", transformsObjects + ); + + Map timestampSpec = ImmutableMap.of( + "column", "timestamp", + "format", "yyyyMMdd" + ); + Map dimensionsSpec = ImmutableMap.of( + "dimensions", dimensions, + "dimensionExclusions", Collections.emptyList(), + "spatialDimensions", Collections.emptyList() + ); + Map parseSpec = ImmutableMap.of( + "format", "tsv", + "timestampSpec", timestampSpec, + "dimensionsSpec", dimensionsSpec, + "columns", columns, + "listDelimiter", "," + ); + Map object = ImmutableMap.of( + "type", "string", + "parseSpec", parseSpec, + "transformationSpec", transformsObject + ); + return toJson(object); + } + private static String toJson(Object object) { final String json; From 48a7dc87ba2413a10b6b025e8921dcdb62bc2075 Mon Sep 17 00:00:00 2001 From: gipeshka Date: Wed, 2 Oct 2019 16:39:22 +0200 Subject: [PATCH 2/7] adjust test --- .../aggregation/datasketches/hll/HllSketchAggregatorTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index dc1f64bea63a..6de3bd6eb5ed 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -141,7 +141,7 @@ public void buildSketchesAtIngestionTimeMultiValueWithTransformations() throws E List results = seq.toList(); Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); - Assert.assertEquals(200, (double) row.get(0), 0.1); + Assert.assertEquals(15, (double) row.get(0), 0.1); } @Test @@ -183,7 +183,7 @@ public void buildSketchesAtQueryTimeMultiValue() throws Exception List results = seq.toList(); Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); - Assert.assertEquals(15, (double) row.get(0), 0.1); + Assert.assertEquals(14, (double) row.get(0), 0.1); } @Test From 8bdc388d63963bf973cd3bc6c85a9102e4591767 Mon Sep 17 00:00:00 2001 From: gipeshka Date: Wed, 2 Oct 2019 16:49:22 +0200 Subject: [PATCH 3/7] adjust cs --- .../aggregation/datasketches/hll/HllSketchAggregatorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index 6de3bd6eb5ed..5ce67b99d1b5 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -259,7 +259,7 @@ private static String buildParserJson( List transforms ) { - List> transformsObjects = transforms.stream().map( transform -> + List> transformsObjects = transforms.stream().map(transform -> ImmutableMap.of( "type", "expression", "name", transform.getName(), From 5a04e54cebd9ef3de323fe27453a6ee423677f06 Mon Sep 17 00:00:00 2001 From: gipeshka Date: Wed, 2 Oct 2019 16:54:42 +0200 Subject: [PATCH 4/7] fix wrong configuration name --- .../aggregation/datasketches/hll/HllSketchAggregatorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index 5ce67b99d1b5..495c9f76700e 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -290,7 +290,7 @@ private static String buildParserJson( Map object = ImmutableMap.of( "type", "string", "parseSpec", parseSpec, - "transformationSpec", transformsObject + "transformSpec", transformsObject ); return toJson(object); } From 3935574a801b4c497cc3d140d6081033bdf33193 Mon Sep 17 00:00:00 2001 From: gipeshka Date: Wed, 2 Oct 2019 17:25:25 +0200 Subject: [PATCH 5/7] try to use long 0 --- .../aggregation/datasketches/hll/HllSketchAggregatorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index 495c9f76700e..91e74e0d541a 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -127,7 +127,7 @@ public void buildSketchesAtIngestionTimeMultiValueWithTransformations() throws E ImmutableList.of( new ExpressionTransform( "multiDim", - "array_append(multiDim, '0')", + "array_append(multiDim, 0)", ExprMacroTable.nil() ) ) From a86004f37ac83476a7e25fa724d30598eef61dd3 Mon Sep 17 00:00:00 2001 From: gipeshka Date: Mon, 7 Oct 2019 17:36:15 +0200 Subject: [PATCH 6/7] try it without shadowing --- .../datasketches/hll/HllSketchAggregatorTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index 91e74e0d541a..76305dd04840 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -126,13 +126,13 @@ public void buildSketchesAtIngestionTimeMultiValueWithTransformations() throws E Arrays.asList("timestamp", "dim", "multiDim", "id"), ImmutableList.of( new ExpressionTransform( - "multiDim", - "array_append(multiDim, 0)", + "multiDimAppended", + "array_append(multiDim, '0')", ExprMacroTable.nil() ) ) ), - buildAggregatorJson("HLLSketchBuild", "multiDim", !ROUND), + buildAggregatorJson("HLLSketchBuild", "multiDimAppended", !ROUND), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount From 1834ed2b79eba9f684800ee370938ef0034f320b Mon Sep 17 00:00:00 2001 From: gipeshka Date: Mon, 7 Oct 2019 17:38:38 +0200 Subject: [PATCH 7/7] lets use 15 --- .../aggregation/datasketches/hll/HllSketchAggregatorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index 76305dd04840..7a37a1464373 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -127,7 +127,7 @@ public void buildSketchesAtIngestionTimeMultiValueWithTransformations() throws E ImmutableList.of( new ExpressionTransform( "multiDimAppended", - "array_append(multiDim, '0')", + "array_append(multiDim, '15')", ExprMacroTable.nil() ) )