diff --git a/docs/content/development/datasketches-aggregators.md b/docs/content/development/datasketches-aggregators.md index c82a4349c060..62c70ae6cc74 100644 --- a/docs/content/development/datasketches-aggregators.md +++ b/docs/content/development/datasketches-aggregators.md @@ -5,6 +5,7 @@ layout: doc_page ## DataSketches aggregator Druid aggregators based on [datasketches]()http://datasketches.github.io/) library. Note that sketch algorithms are approxiate, see details in the "Accuracy" section of the datasketches doc. At ingestion time, this aggregator creates the theta sketch objects which get stored in Druid segments. Logically speaking, a theta sketch object can be thought of as a Set data structure. At query time, sketches are read and aggregated(set unioned) together. In the end, by default, you receive the estimate of number of unique entries in the sketch object. Also, You can use post aggregators to do union, intersection or difference on sketch columns in the same row. +Note that you can use `thetaSketch` aggregator on columns which were not ingested using same, it will return estimated cardinality of the column. It is recommended to use it at ingestion time as well to make querying faster. ### Aggregators diff --git a/extensions/datasketches/pom.xml b/extensions/datasketches/pom.xml index 30a375f6a45a..85c280ffc17c 100644 --- a/extensions/datasketches/pom.xml +++ b/extensions/datasketches/pom.xml @@ -18,102 +18,103 @@ ~ under the License. --> - - 4.0.0 + + 4.0.0 - io.druid.extensions - druid-datasketches - druid-datasketches - Druid Aggregators based on datasketches lib http://datasketches.github.io/ + io.druid.extensions + druid-datasketches + druid-datasketches + Druid Aggregators based on datasketches lib http://datasketches.github.io/ - - io.druid - druid - 0.9.0-SNAPSHOT - ../../pom.xml - + + io.druid + druid + 0.9.0-SNAPSHOT + ../../pom.xml + - - - com.yahoo.datasketches - sketches-core - 0.1.1 - - - io.druid - druid-api - ${druid.api.version} - provided - - - io.druid - druid-processing - ${project.parent.version} - provided - + + + com.yahoo.datasketches + sketches-core + 0.2.2 + + + io.druid + druid-api + ${druid.api.version} + provided + + + io.druid + druid-processing + ${project.parent.version} + provided + - - com.fasterxml.jackson.core - jackson-annotations - ${jackson.version} - provided - - - com.fasterxml.jackson.core - jackson-core - ${jackson.version} - provided - - - com.fasterxml.jackson.core - jackson-databind - ${jackson.version} - provided - - - com.fasterxml.jackson.datatype - jackson-datatype-guava - ${jackson.version} - provided - - - com.fasterxml.jackson.datatype - jackson-datatype-joda - ${jackson.version} - provided - - - com.fasterxml.jackson.dataformat - jackson-dataformat-smile - ${jackson.version} - provided - - - com.fasterxml.jackson.jaxrs - jackson-jaxrs-json-provider - ${jackson.version} - provided - - - com.fasterxml.jackson.jaxrs - jackson-jaxrs-smile-provider - ${jackson.version} - provided - + + com.fasterxml.jackson.core + jackson-annotations + ${jackson.version} + provided + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + provided + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + provided + + + com.fasterxml.jackson.datatype + jackson-datatype-guava + ${jackson.version} + provided + + + com.fasterxml.jackson.datatype + jackson-datatype-joda + ${jackson.version} + provided + + + com.fasterxml.jackson.dataformat + jackson-dataformat-smile + ${jackson.version} + provided + + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-json-provider + ${jackson.version} + provided + + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-smile-provider + ${jackson.version} + provided + - - - junit - junit - test - - - io.druid - druid-processing - ${project.parent.version} - test-jar - test - - + + + junit + junit + test + + + io.druid + druid-processing + ${project.parent.version} + test-jar + test + + diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchAggregator.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchAggregator.java index 3b8a9cc1f66c..e81b2ff978c7 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchAggregator.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchAggregator.java @@ -19,17 +19,22 @@ package io.druid.query.aggregation.datasketches.theta; +import com.metamx.common.ISE; +import com.metamx.common.logger.Logger; import com.yahoo.sketches.Family; import com.yahoo.sketches.memory.Memory; -import com.yahoo.sketches.theta.SetOpReturnState; import com.yahoo.sketches.theta.SetOperation; import com.yahoo.sketches.theta.Sketch; import com.yahoo.sketches.theta.Union; import io.druid.query.aggregation.Aggregator; import io.druid.segment.ObjectColumnSelector; +import java.util.List; + public class SketchAggregator implements Aggregator { + private static final Logger logger = new Logger(SketchAggregator.class); + private final ObjectColumnSelector selector; private final String name; private final int size; @@ -48,21 +53,11 @@ public SketchAggregator(String name, ObjectColumnSelector selector, int size) public void aggregate() { Object update = selector.get(); - - if(update == null) { + if (update == null) { return; } - SetOpReturnState success; - if (update instanceof Memory) { - success = union.update((Memory) update); - } else { - success = union.update((Sketch) update); - } - - if(success != SetOpReturnState.Success) { - throw new IllegalStateException("Sketch Aggregation failed with state " + success); - } + updateUnion(union, update); } @Override @@ -105,4 +100,31 @@ public void close() { union = null; } + + static void updateUnion(Union union, Object update) + { + if (update instanceof Memory) { + union.update((Memory) update); + } else if (update instanceof Sketch) { + union.update((Sketch) update); + } else if (update instanceof String) { + union.update((String) update); + } else if (update instanceof byte[]) { + union.update((byte[]) update); + } else if (update instanceof Double) { + union.update(((Double) update)); + } else if (update instanceof Integer || update instanceof Long) { + union.update(((Number) update).longValue()); + } else if (update instanceof int[]) { + union.update((int[]) update); + } else if (update instanceof long[]) { + union.update((long[]) update); + } else if (update instanceof List) { + for (Object entry : (List) update) { + union.update(entry.toString()); + } + } else { + throw new ISE("Illegal type received while theta sketch merging [%s]", update.getClass()); + } + } } diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java index 849a8722175f..0ae9aef13cd6 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java @@ -19,13 +19,12 @@ package io.druid.query.aggregation.datasketches.theta; +import com.metamx.common.logger.Logger; import com.yahoo.sketches.Family; import com.yahoo.sketches.memory.Memory; import com.yahoo.sketches.memory.MemoryRegion; import com.yahoo.sketches.memory.NativeMemory; -import com.yahoo.sketches.theta.SetOpReturnState; import com.yahoo.sketches.theta.SetOperation; -import com.yahoo.sketches.theta.Sketch; import com.yahoo.sketches.theta.Union; import io.druid.query.aggregation.BufferAggregator; import io.druid.segment.ObjectColumnSelector; @@ -36,6 +35,8 @@ public class SketchBufferAggregator implements BufferAggregator { + private static final Logger logger = new Logger(SketchAggregator.class); + private final ObjectColumnSelector selector; private final int size; private final int maxIntermediateSize; @@ -59,28 +60,19 @@ public void init(ByteBuffer buf, int position) } Memory mem = new MemoryRegion(nm, position, maxIntermediateSize); - unions.put(position, (Union) SetOperation.builder().setMemory(mem).build(size, Family.UNION)); + unions.put(position, (Union) SetOperation.builder().initMemory(mem).build(size, Family.UNION)); } @Override public void aggregate(ByteBuffer buf, int position) { Object update = selector.get(); - if(update == null) { + if (update == null) { return; } Union union = getUnion(buf, position); - SetOpReturnState success; - if (update instanceof Memory) { - success = union.update((Memory) update); - } else { - success = union.update((Sketch) update); - } - - if(success != SetOpReturnState.Success) { - throw new IllegalStateException("Sketch Buffer Aggregation failed with state " + update); - } + SketchAggregator.updateUnion(union, update); } @Override @@ -98,7 +90,7 @@ public Object get(ByteBuffer buf, int position) private Union getUnion(ByteBuffer buf, int position) { Union union = unions.get(position); - if(union == null) { + if (union == null) { Memory mem = new MemoryRegion(nm, position, maxIntermediateSize); union = (Union) SetOperation.wrap(mem); unions.put(position, union); @@ -119,7 +111,8 @@ public long getLong(ByteBuffer buf, int position) } @Override - public void close() { + public void close() + { unions.clear(); } diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBuildComplexMetricSerde.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBuildComplexMetricSerde.java index 1b82c1118b53..534d26024304 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBuildComplexMetricSerde.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBuildComplexMetricSerde.java @@ -19,16 +19,9 @@ package io.druid.query.aggregation.datasketches.theta; -import com.metamx.common.IAE; -import com.yahoo.sketches.memory.Memory; -import com.yahoo.sketches.theta.Sketch; -import com.yahoo.sketches.theta.Sketches; -import com.yahoo.sketches.theta.UpdateSketch; import io.druid.data.input.InputRow; import io.druid.segment.serde.ComplexMetricExtractor; -import java.util.List; - /** */ public class SketchBuildComplexMetricSerde extends SketchMergeComplexMetricSerde @@ -48,33 +41,7 @@ public Class extractedClass() @Override public Object extractValue(InputRow inputRow, String metricName) { - Object obj = inputRow.getRaw(metricName); - if (obj == null || obj instanceof Sketch || obj instanceof Memory) { - return obj; - } - - UpdateSketch sketch = Sketches.updateSketchBuilder().build(4096); - if (obj instanceof String) { - sketch.update((String) obj); - } else if (obj instanceof byte[]) { - sketch.update((byte[]) obj); - } else if (obj instanceof Double) { - sketch.update(((Double) obj)); - } else if (obj instanceof Integer || obj instanceof Long) { - sketch.update(((Number) obj).longValue()); - } else if (obj instanceof int[]) { - sketch.update((int[]) obj); - } else if (obj instanceof long[]) { - sketch.update((long[]) obj); - } else if (obj instanceof List) { - for (Object entry : (List) obj) { - sketch.update(entry.toString()); - } - } else { - throw new IAE("Unknown object type[%s] received for ingestion.", obj.getClass()); - } - - return sketch; + return inputRow.getRaw(metricName); } }; } diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchJsonSerializer.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchJsonSerializer.java index b28c44b00305..76db0d990560 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchJsonSerializer.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchJsonSerializer.java @@ -30,7 +30,8 @@ public class SketchJsonSerializer extends JsonSerializer { @Override - public void serialize(Sketch sketch, JsonGenerator jgen, SerializerProvider provider) throws IOException, JsonProcessingException + public void serialize(Sketch sketch, JsonGenerator jgen, SerializerProvider provider) + throws IOException, JsonProcessingException { jgen.writeBinary(sketch.toByteArray()); } diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java index dadb1ca15fec..4eec21b2cc7e 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java @@ -86,6 +86,7 @@ public boolean getIsInputThetaSketch() * sketch. * * @param object the sketch object + * * @return sketch object */ @Override diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchOperations.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchOperations.java index f2d5867e21fb..e6e6e2ca142b 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchOperations.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchOperations.java @@ -25,7 +25,6 @@ import com.yahoo.sketches.memory.NativeMemory; import com.yahoo.sketches.theta.AnotB; import com.yahoo.sketches.theta.Intersection; -import com.yahoo.sketches.theta.SetOpReturnState; import com.yahoo.sketches.theta.SetOperation; import com.yahoo.sketches.theta.Sketch; import com.yahoo.sketches.theta.Sketches; @@ -74,13 +73,13 @@ public static Sketch deserializeFromBase64EncodedString(String str) public static Sketch deserializeFromByteArray(byte[] data) { NativeMemory mem = new NativeMemory(data); - if(Sketch.getSerializationVersion(mem) < 3) { + if (Sketch.getSerializationVersion(mem) < 3) { return Sketches.heapifySketch(mem); } else { return Sketches.wrapSketch(mem); } } - + public static Sketch sketchSetOperation(Func func, int sketchSize, Sketch... sketches) { //in the code below, I am returning SetOp.getResult(false, null) @@ -91,34 +90,29 @@ public static Sketch sketchSetOperation(Func func, int sketchSize, Sketch... ske switch (func) { case UNION: Union union = (Union) SetOperation.builder().build(sketchSize, Family.UNION); - for(Sketch sketch : sketches) { - SetOpReturnState success = union.update(sketch); - if(success != SetOpReturnState.Success) { - throw new IllegalStateException("Sketch operation failed " + func); - } + for (Sketch sketch : sketches) { + union.update(sketch); } return union.getResult(false, null); case INTERSECT: Intersection intersection = (Intersection) SetOperation.builder().build(sketchSize, Family.INTERSECTION); - for(Sketch sketch : sketches) { - SetOpReturnState success = intersection.update(sketch); - if(success != SetOpReturnState.Success) { - throw new IllegalStateException("Sketch operation failed " + func); - } + for (Sketch sketch : sketches) { + intersection.update(sketch); } return intersection.getResult(false, null); case NOT: - if(sketches.length < 2) { - throw new IllegalArgumentException("A-Not-B requires atleast 2 sketches"); + if (sketches.length < 1) { + throw new IllegalArgumentException("A-Not-B requires atleast 1 sketch"); + } + + if (sketches.length == 1) { + return sketches[0]; } Sketch result = sketches[0]; for (int i = 1; i < sketches.length; i++) { AnotB anotb = (AnotB) SetOperation.builder().build(sketchSize, Family.A_NOT_B); - SetOpReturnState success = anotb.update(result, sketches[i]); - if(success != SetOpReturnState.Success) { - throw new IllegalStateException("Sketch operation failed " + func); - } + anotb.update(result, sketches[i]); result = anotb.getResult(false, null); } return result; diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchSetPostAggregator.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchSetPostAggregator.java index d0c866db90dc..550fe2f55573 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchSetPostAggregator.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchSetPostAggregator.java @@ -24,7 +24,6 @@ import com.google.common.collect.Sets; import com.metamx.common.IAE; import com.metamx.common.logger.Logger; - import com.yahoo.sketches.Util; import com.yahoo.sketches.theta.Sketch; import io.druid.query.aggregation.PostAggregator; @@ -118,7 +117,17 @@ public int getSize() @Override public String toString() { - return "SketchSetPostAggregator{" + "name='" + name + '\'' + ", fields=" + fields + ", func=" + func + ", size=" + maxSketchSize +"}"; + return "SketchSetPostAggregator{" + + "name='" + + name + + '\'' + + ", fields=" + + fields + + ", func=" + + func + + ", size=" + + maxSketchSize + + "}"; } @Override diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SynchronizedUnion.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SynchronizedUnion.java index bd343b0ee208..f0af78a23e1f 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SynchronizedUnion.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SynchronizedUnion.java @@ -21,7 +21,6 @@ import com.yahoo.sketches.memory.Memory; import com.yahoo.sketches.theta.CompactSketch; -import com.yahoo.sketches.theta.SetOpReturnState; import com.yahoo.sketches.theta.Sketch; import com.yahoo.sketches.theta.Union; @@ -37,15 +36,51 @@ public SynchronizedUnion(Union delegate) } @Override - public synchronized SetOpReturnState update(Sketch sketch) + public synchronized void update(Sketch sketchIn) { - return delegate.update(sketch); + delegate.update(sketchIn); } @Override - public synchronized SetOpReturnState update(Memory memory) + public synchronized void update(Memory mem) { - return delegate.update(memory); + delegate.update(mem); + } + + @Override + public synchronized void update(long datum) + { + delegate.update(datum); + } + + @Override + public synchronized void update(double datum) + { + delegate.update(datum); + } + + @Override + public synchronized void update(String datum) + { + delegate.update(datum); + } + + @Override + public synchronized void update(byte[] data) + { + delegate.update(data); + } + + @Override + public synchronized void update(int[] data) + { + delegate.update(data); + } + + @Override + public synchronized void update(long[] data) + { + delegate.update(data); } @Override @@ -54,6 +89,12 @@ public synchronized CompactSketch getResult(boolean b, Memory memory) return delegate.getResult(b, memory); } + @Override + public synchronized CompactSketch getResult() + { + return delegate.getResult(); + } + @Override public synchronized byte[] toByteArray() { diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchModule.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchModule.java index 816f3967c92f..f9182d2bc7d5 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchModule.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchModule.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.databind.jsontype.NamedType; import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.inject.Binder; - import com.yahoo.sketches.theta.Sketch; import io.druid.initialization.DruidModule; import io.druid.query.aggregation.datasketches.theta.SketchBuildComplexMetricSerde; diff --git a/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java b/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java index 913854064076..38671f363edf 100644 --- a/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java +++ b/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java @@ -123,6 +123,43 @@ public void testSketchDataIngestAndQuery() throws Exception ); } + @Test + public void testThetaCardinalityOnSimpleColumn() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(SketchAggregationTest.class.getClassLoader().getResource("simple_test_data.tsv").getFile()), + readFileFromClasspathAsString("simple_test_data_record_parser2.json"), + "[" + + " {" + + " \"type\": \"count\"," + + " \"name\": \"count\"" + + " }" + + "]", + 0, + QueryGranularity.NONE, + 5, + readFileFromClasspathAsString("simple_test_data_group_by_query.json") + ); + + List results = Sequences.toList(seq, Lists.newArrayList()); + Assert.assertEquals(1, results.size()); + Assert.assertEquals( + new MapBasedRow( + DateTime.parse("2014-10-19T00:00:00.000Z"), + ImmutableMap + .builder() + .put("sketch_count", 50.0) + .put("sketchEstimatePostAgg", 50.0) + .put("sketchUnionPostAggEstimate", 50.0) + .put("sketchIntersectionPostAggEstimate", 50.0) + .put("sketchAnotBPostAggEstimate", 0.0) + .put("non_existing_col_validation", 0.0) + .build() + ), + results.get(0) + ); + } + @Test public void testSketchMergeAggregatorFactorySerde() throws Exception { @@ -146,7 +183,8 @@ public void testSketchMergeFinalization() throws Exception Assert.assertEquals(sketch, agg.finalizeComputation(sketch)); } - private void assertAggregatorFactorySerde(AggregatorFactory agg) throws Exception{ + private void assertAggregatorFactorySerde(AggregatorFactory agg) throws Exception + { Assert.assertEquals( agg, helper.getObjectMapper().readValue( @@ -183,7 +221,8 @@ public void testSketchSetPostAggregatorSerde() throws Exception ); } - private void assertPostAggregatorSerde(PostAggregator agg) throws Exception{ + private void assertPostAggregatorSerde(PostAggregator agg) throws Exception + { Assert.assertEquals( agg, helper.getObjectMapper().readValue( diff --git a/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchAggregationTest.java b/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchAggregationTest.java index 3469d61332d3..c683f6c10dd0 100644 --- a/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchAggregationTest.java +++ b/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchAggregationTest.java @@ -139,7 +139,8 @@ public void testSketchBuildAggregatorFactorySerde() throws Exception assertAggregatorFactorySerde(new OldSketchBuildAggregatorFactory("name", "fieldName", 16)); } - private void assertAggregatorFactorySerde(AggregatorFactory agg) throws Exception{ + private void assertAggregatorFactorySerde(AggregatorFactory agg) throws Exception + { Assert.assertEquals( agg, helper.getObjectMapper().readValue( @@ -176,7 +177,8 @@ public void testSketchSetPostAggregatorSerde() throws Exception ); } - private void assertPostAggregatorSerde(PostAggregator agg) throws Exception{ + private void assertPostAggregatorSerde(PostAggregator agg) throws Exception + { Assert.assertEquals( agg, helper.getObjectMapper().readValue( diff --git a/extensions/datasketches/src/test/resources/oldapi/old_simple_test_data_group_by_query.json b/extensions/datasketches/src/test/resources/oldapi/old_simple_test_data_group_by_query.json index 5339421e37f2..362652cac339 100644 --- a/extensions/datasketches/src/test/resources/oldapi/old_simple_test_data_group_by_query.json +++ b/extensions/datasketches/src/test/resources/oldapi/old_simple_test_data_group_by_query.json @@ -4,8 +4,18 @@ "granularity": "ALL", "dimensions": [], "aggregations": [ - { "type": "sketchMerge", "name": "sketch_count", "fieldName": "pty_country", "size": 16384 }, - { "type": "sketchMerge", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 } + { + "type": "sketchMerge", + "name": "sketch_count", + "fieldName": "pty_country", + "size": 16384 + }, + { + "type": "sketchMerge", + "name": "non_existing_col_validation", + "fieldName": "non_existing_col", + "size": 16384 + } ], "postAggregations": [ { @@ -19,8 +29,7 @@ { "type": "sketchEstimate", "name": "sketchIntersectionPostAggEstimate", - "field": - { + "field": { "type": "sketchSetOper", "name": "sketchIntersectionPostAgg", "func": "INTERSECT", @@ -40,8 +49,7 @@ { "type": "sketchEstimate", "name": "sketchAnotBPostAggEstimate", - "field": - { + "field": { "type": "sketchSetOper", "name": "sketchAnotBUnionPostAgg", "func": "NOT", @@ -61,8 +69,7 @@ { "type": "sketchEstimate", "name": "sketchUnionPostAggEstimate", - "field": - { + "field": { "type": "sketchSetOper", "name": "sketchUnionPostAgg", "func": "UNION", @@ -80,5 +87,7 @@ } } ], - "intervals": [ "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" ] + "intervals": [ + "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" + ] } diff --git a/extensions/datasketches/src/test/resources/oldapi/old_sketch_test_data_group_by_query.json b/extensions/datasketches/src/test/resources/oldapi/old_sketch_test_data_group_by_query.json index 5af6e3093208..027e0a610cb6 100644 --- a/extensions/datasketches/src/test/resources/oldapi/old_sketch_test_data_group_by_query.json +++ b/extensions/datasketches/src/test/resources/oldapi/old_sketch_test_data_group_by_query.json @@ -4,8 +4,18 @@ "granularity": "ALL", "dimensions": [], "aggregations": [ - { "type": "sketchMerge", "name": "sids_sketch_count", "fieldName": "sids_sketch", "size": 16384 }, - { "type": "sketchMerge", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 } + { + "type": "sketchMerge", + "name": "sids_sketch_count", + "fieldName": "sids_sketch", + "size": 16384 + }, + { + "type": "sketchMerge", + "name": "non_existing_col_validation", + "fieldName": "non_existing_col", + "size": 16384 + } ], "postAggregations": [ { @@ -19,8 +29,7 @@ { "type": "sketchEstimate", "name": "sketchIntersectionPostAggEstimate", - "field": - { + "field": { "type": "sketchSetOper", "name": "sketchIntersectionPostAgg", "func": "INTERSECT", @@ -40,8 +49,7 @@ { "type": "sketchEstimate", "name": "sketchAnotBPostAggEstimate", - "field": - { + "field": { "type": "sketchSetOper", "name": "sketchAnotBUnionPostAgg", "func": "NOT", @@ -60,8 +68,7 @@ { "type": "sketchEstimate", "name": "sketchUnionPostAggEstimate", - "field": - { + "field": { "type": "sketchSetOper", "name": "sketchUnionPostAgg", "func": "UNION", @@ -79,5 +86,7 @@ } } ], - "intervals": [ "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" ] + "intervals": [ + "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" + ] } diff --git a/extensions/datasketches/src/test/resources/simple_test_data_group_by_query.json b/extensions/datasketches/src/test/resources/simple_test_data_group_by_query.json index 1badd1382baf..1b8f80ef02f7 100644 --- a/extensions/datasketches/src/test/resources/simple_test_data_group_by_query.json +++ b/extensions/datasketches/src/test/resources/simple_test_data_group_by_query.json @@ -4,8 +4,18 @@ "granularity": "ALL", "dimensions": [], "aggregations": [ - { "type": "thetaSketch", "name": "sketch_count", "fieldName": "pty_country", "size": 16384 }, - { "type": "thetaSketch", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 } + { + "type": "thetaSketch", + "name": "sketch_count", + "fieldName": "pty_country", + "size": 16384 + }, + { + "type": "thetaSketch", + "name": "non_existing_col_validation", + "fieldName": "non_existing_col", + "size": 16384 + } ], "postAggregations": [ { @@ -19,8 +29,7 @@ { "type": "thetaSketchEstimate", "name": "sketchIntersectionPostAggEstimate", - "field": - { + "field": { "type": "thetaSketchSetOp", "name": "sketchIntersectionPostAgg", "func": "INTERSECT", @@ -40,8 +49,7 @@ { "type": "thetaSketchEstimate", "name": "sketchAnotBPostAggEstimate", - "field": - { + "field": { "type": "thetaSketchSetOp", "name": "sketchAnotBUnionPostAgg", "func": "NOT", @@ -61,8 +69,7 @@ { "type": "thetaSketchEstimate", "name": "sketchUnionPostAggEstimate", - "field": - { + "field": { "type": "thetaSketchSetOp", "name": "sketchUnionPostAgg", "func": "UNION", @@ -80,5 +87,7 @@ } } ], - "intervals": [ "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" ] + "intervals": [ + "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" + ] } diff --git a/extensions/datasketches/src/test/resources/simple_test_data_record_parser.json b/extensions/datasketches/src/test/resources/simple_test_data_record_parser.json index fbe1cd6f04ad..0c9983af072c 100644 --- a/extensions/datasketches/src/test/resources/simple_test_data_record_parser.json +++ b/extensions/datasketches/src/test/resources/simple_test_data_record_parser.json @@ -1,16 +1,22 @@ { - "type" : "string", - "parseSpec" : { - "format" : "tsv", - "timestampSpec" : { - "column" : "timestamp", - "format" : "yyyyMMddHH" - }, - "dimensionsSpec" : { - "dimensions": ["product"], - "dimensionExclusions" : [], - "spatialDimensions" : [] - }, - "columns": ["timestamp", "product", "pty_country"] - } + "type": "string", + "parseSpec": { + "format": "tsv", + "timestampSpec": { + "column": "timestamp", + "format": "yyyyMMddHH" + }, + "dimensionsSpec": { + "dimensions": [ + "product" + ], + "dimensionExclusions": [], + "spatialDimensions": [] + }, + "columns": [ + "timestamp", + "product", + "pty_country" + ] + } } diff --git a/extensions/datasketches/src/test/resources/simple_test_data_record_parser2.json b/extensions/datasketches/src/test/resources/simple_test_data_record_parser2.json new file mode 100644 index 000000000000..6f0ebf1a041e --- /dev/null +++ b/extensions/datasketches/src/test/resources/simple_test_data_record_parser2.json @@ -0,0 +1,23 @@ +{ + "type": "string", + "parseSpec": { + "format": "tsv", + "timestampSpec": { + "column": "timestamp", + "format": "yyyyMMddHH" + }, + "dimensionsSpec": { + "dimensions": [ + "product", + "pty_country" + ], + "dimensionExclusions": [], + "spatialDimensions": [] + }, + "columns": [ + "timestamp", + "product", + "pty_country" + ] + } +} diff --git a/extensions/datasketches/src/test/resources/sketch_test_data_group_by_query.json b/extensions/datasketches/src/test/resources/sketch_test_data_group_by_query.json index 2a7251ef6db8..d4d908a2f28f 100644 --- a/extensions/datasketches/src/test/resources/sketch_test_data_group_by_query.json +++ b/extensions/datasketches/src/test/resources/sketch_test_data_group_by_query.json @@ -4,8 +4,18 @@ "granularity": "ALL", "dimensions": [], "aggregations": [ - { "type": "thetaSketch", "name": "sids_sketch_count", "fieldName": "sids_sketch", "size": 16384 }, - { "type": "thetaSketch", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 } + { + "type": "thetaSketch", + "name": "sids_sketch_count", + "fieldName": "sids_sketch", + "size": 16384 + }, + { + "type": "thetaSketch", + "name": "non_existing_col_validation", + "fieldName": "non_existing_col", + "size": 16384 + } ], "postAggregations": [ { @@ -19,8 +29,7 @@ { "type": "thetaSketchEstimate", "name": "sketchIntersectionPostAggEstimate", - "field": - { + "field": { "type": "thetaSketchSetOp", "name": "sketchIntersectionPostAgg", "func": "INTERSECT", @@ -40,8 +49,7 @@ { "type": "thetaSketchEstimate", "name": "sketchAnotBPostAggEstimate", - "field": - { + "field": { "type": "thetaSketchSetOp", "name": "sketchAnotBUnionPostAgg", "func": "NOT", @@ -60,8 +68,7 @@ { "type": "thetaSketchEstimate", "name": "sketchUnionPostAggEstimate", - "field": - { + "field": { "type": "thetaSketchSetOp", "name": "sketchUnionPostAgg", "func": "UNION", @@ -79,5 +86,7 @@ } } ], - "intervals": [ "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" ] + "intervals": [ + "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" + ] } diff --git a/extensions/datasketches/src/test/resources/sketch_test_data_record_parser.json b/extensions/datasketches/src/test/resources/sketch_test_data_record_parser.json index f485cb29261b..156c5d622267 100644 --- a/extensions/datasketches/src/test/resources/sketch_test_data_record_parser.json +++ b/extensions/datasketches/src/test/resources/sketch_test_data_record_parser.json @@ -1,16 +1,22 @@ { - "type" : "string", - "parseSpec" : { - "format" : "tsv", - "timestampSpec" : { - "column" : "timestamp", - "format" : "yyyyMMddHH" - }, - "dimensionsSpec" : { - "dimensions": ["product"], - "dimensionExclusions" : [], - "spatialDimensions" : [] - }, - "columns": ["timestamp", "product", "sketch"] - } + "type": "string", + "parseSpec": { + "format": "tsv", + "timestampSpec": { + "column": "timestamp", + "format": "yyyyMMddHH" + }, + "dimensionsSpec": { + "dimensions": [ + "product" + ], + "dimensionExclusions": [], + "spatialDimensions": [] + }, + "columns": [ + "timestamp", + "product", + "sketch" + ] + } }