From ff940de0c180b22b8b0b41509a9bb32a259f42ff Mon Sep 17 00:00:00 2001 From: Atul Mohan Date: Wed, 21 Aug 2019 15:59:00 -0500 Subject: [PATCH 1/3] Reset union in AggregateCombiner --- .../theta/SketchAggregatorFactory.java | 1 + .../theta/SketchAggregationTest.java | 35 +++++++++++++++++++ .../src/test/resources/empty_sketch_data.tsv | 16 +++++++++ .../empty_sketch_data_record_parser.json | 24 +++++++++++++ .../empty_sketch_group_by_query.json | 20 +++++++++++ .../empty_sketch_test_data_aggregators.json | 9 +++++ 6 files changed, 105 insertions(+) create mode 100644 extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv create mode 100644 extensions-core/datasketches/src/test/resources/empty_sketch_data_record_parser.json create mode 100644 extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json create mode 100644 extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java index b01cf207f7fa..23aba36f2892 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java @@ -107,6 +107,7 @@ public AggregateCombiner makeAggregateCombiner() public void reset(ColumnValueSelector selector) { union.reset(); + combined.invalidateCache(); fold(selector); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java index 980a0932edcc..e2dd685f04d6 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java @@ -146,6 +146,41 @@ public void testSketchDataIngestAndGpByQuery() throws Exception ); } + @Test + public void testEmptySketchAggregateCombine() throws Exception + { + final String groupByQueryString = readFileFromClasspathAsString("empty_sketch_group_by_query.json"); + final GroupByQuery groupByQuery = (GroupByQuery) helper.getObjectMapper() + .readValue(groupByQueryString, Query.class); + + final Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(SketchAggregationTest.class.getClassLoader().getResource("empty_sketch_data.tsv").getFile()), + readFileFromClasspathAsString("empty_sketch_data_record_parser.json"), + readFileFromClasspathAsString("empty_sketch_test_data_aggregators.json"), + 0, + Granularities.NONE, + 5, + groupByQueryString + ); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals( + ResultRow.fromLegacyRow( + new MapBasedRow( + DateTimes.of("2019-07-14T00:00:00.000Z"), + ImmutableMap + .builder() + .put("product", "product_b") + .put("sketch_count", 0.0) + .build() + ), + groupByQuery + ), + results.get(0) + ); + } + @Test public void testThetaCardinalityOnSimpleColumn() throws Exception { diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv b/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv new file mode 100644 index 000000000000..f8020612ff7d --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv @@ -0,0 +1,16 @@ +2019071401 product_c LN AgMDAAAazJMFAAAAAACAP5fNgSSfQV0umrd4HAzZ90Kd3R29Vl6VSJi331EESX9Lok02RyQmAVg= +2019071401 product_c LN AgMDAAAazJMDAAAAAACAPyVpTh4+JHwJFNDKR6ZFClXojoB1xfTMaA== +2019071401 product_d ZN +2019071401 product_a DN AgMDAAAazJMCAAAAAACAP2kLLSWTMxpfEgn3z/0XznI= +2019071401 product_b ZN +2019071401 product_a CN AgMDAAAazJMHAAAAAACAP7Q7KD3RviIEFd3Mbh+LWimFuB2bxV6WNIgtLk8Cxj1Pk7yxRxlRoV2mjOvxZPnHXlddhXxKoIpu +2019071401 product_a GN AgMDAAAazJMCAAAAAACAP6g0M3E1MIgaMEfeNTWualE= +2019071401 product_b ZN +2019071401 product_a LN AgMDAAAazJMCAAAAAACAP6g0M3E1MIgaMEfeNTWualE= +2019071401 product_a SN AgMDAAAazJMCAAAAAACAP/eILNqf8ikXEgTMlbEhTVM= +2019071401 product_d LN AgMDAAAazJMDAAAAAACAP0CuXW2EE1EDffmgfmwHIhfMinjqFt/4aA== +2019071401 product_a SN AgMDAAAazJMCAAAAAACAP7n4gqkOxPQSBgJI/SODoVY= +2019071401 product_a LN AgMDAAAazJMCAAAAAACAPxp1eGUU6zom22Wxo51cx3o= +2019071401 product_d ZN +2019071401 product_a ZN AgMDAAAazJMCAAAAAACAP/eILNqf8ikXEgTMlbEhTVM= +2019071401 product_c LN AgMDAAAazJMFAAAAAACAP5fNgSSfQV0umrd4HAzZ90Kd3R29Vl6VSJi331EESX9Lok02RyQmAVg= diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_data_record_parser.json b/extensions-core/datasketches/src/test/resources/empty_sketch_data_record_parser.json new file mode 100644 index 000000000000..42a00c3cd93f --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/empty_sketch_data_record_parser.json @@ -0,0 +1,24 @@ +{ + "type": "string", + "parseSpec": { + "format": "tsv", + "timestampSpec": { + "column": "timestamp", + "format": "yyyyMMddHH" + }, + "dimensionsSpec": { + "dimensions": [ + "product", + "product_code" + ], + "dimensionExclusions": [], + "spatialDimensions": [] + }, + "columns": [ + "timestamp", + "product", + "product_code", + "product_sketch" + ] + } +} diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json b/extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json new file mode 100644 index 000000000000..7d2622a7763b --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json @@ -0,0 +1,20 @@ +{ + "queryType": "groupBy", + "dataSource": "test_datasource", + "granularity":"ALL", + "dimensions": ["product"], + "filter" : { + "type" : "selector", "dimension" : "product", "value" : "product_b" + }, + "aggregations": [ + { + "type": "thetaSketch", + "name": "sketch_count", + "fieldName": "product_sketch", + "size": 16384 + } + ], + "intervals": [ + "2019-07-14T00:00:00.000Z/2019-07-15T00:00:00.000Z" + ] +} diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json b/extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json new file mode 100644 index 000000000000..1d0980183637 --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json @@ -0,0 +1,9 @@ +[ + { + "type": "thetaSketch", + "name": "product_sketch", + "fieldName": "product_sketch", + "isInputThetaSketch": true, + "size": 16384 + } +] From 3cbec22b66041c80b3e00786446673442b57b780 Mon Sep 17 00:00:00 2001 From: Atul Mohan Date: Thu, 22 Aug 2019 10:49:45 -0500 Subject: [PATCH 2/3] Use newer sketch objects for test --- .../src/test/resources/empty_sketch_data.tsv | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv b/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv index f8020612ff7d..f48ce89cb18f 100644 --- a/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv +++ b/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv @@ -1,16 +1,16 @@ -2019071401 product_c LN AgMDAAAazJMFAAAAAACAP5fNgSSfQV0umrd4HAzZ90Kd3R29Vl6VSJi331EESX9Lok02RyQmAVg= -2019071401 product_c LN AgMDAAAazJMDAAAAAACAPyVpTh4+JHwJFNDKR6ZFClXojoB1xfTMaA== +2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ= +2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/EOmCmVerjm4= 2019071401 product_d ZN -2019071401 product_a DN AgMDAAAazJMCAAAAAACAP2kLLSWTMxpfEgn3z/0XznI= +2019071401 product_a DN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs= 2019071401 product_b ZN -2019071401 product_a CN AgMDAAAazJMHAAAAAACAP7Q7KD3RviIEFd3Mbh+LWimFuB2bxV6WNIgtLk8Cxj1Pk7yxRxlRoV2mjOvxZPnHXlddhXxKoIpu -2019071401 product_a GN AgMDAAAazJMCAAAAAACAP6g0M3E1MIgaMEfeNTWualE= +2019071401 product_a CN AwEDAAAAAgABAAAAAAAAAP////////9/o31ldGC0E2s= +2019071401 product_a GN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs= 2019071401 product_b ZN -2019071401 product_a LN AgMDAAAazJMCAAAAAACAP6g0M3E1MIgaMEfeNTWualE= -2019071401 product_a SN AgMDAAAazJMCAAAAAACAP/eILNqf8ikXEgTMlbEhTVM= -2019071401 product_d LN AgMDAAAazJMDAAAAAACAP0CuXW2EE1EDffmgfmwHIhfMinjqFt/4aA== -2019071401 product_a SN AgMDAAAazJMCAAAAAACAP7n4gqkOxPQSBgJI/SODoVY= -2019071401 product_a LN AgMDAAAazJMCAAAAAACAPxp1eGUU6zom22Wxo51cx3o= +2019071401 product_a LN AwEDAAAAAgABAAAAAAAAAP////////9/KKcfz0hRe38= +2019071401 product_a SN AwEDAAAAAgABAAAAAAAAAP////////9/jDwgknTL/S0= +2019071401 product_d LN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs= +2019071401 product_a SN AwEDAAAAAgABAAAAAAAAAP////////9/4EyBvXLM/xs= +2019071401 product_a LN AwEDAAAAAgABAAAAAAAAAP////////9/KKcfz0hRe38= 2019071401 product_d ZN -2019071401 product_a ZN AgMDAAAazJMCAAAAAACAP/eILNqf8ikXEgTMlbEhTVM= -2019071401 product_c LN AgMDAAAazJMFAAAAAACAP5fNgSSfQV0umrd4HAzZ90Kd3R29Vl6VSJi331EESX9Lok02RyQmAVg= +2019071401 product_a ZN AwEDAAAAAgABAAAAAAAAAP////////9//mseeN0UsgU= +2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/Rn5CRuhP3h4= From fdf3a146b5cee23b8ae2402d67e9d2a81a44fd9c Mon Sep 17 00:00:00 2001 From: Atul Mohan Date: Thu, 22 Aug 2019 16:14:28 -0500 Subject: [PATCH 3/3] Add empty sketch objects --- .../datasketches/src/test/resources/empty_sketch_data.tsv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv b/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv index f48ce89cb18f..e9d175d28e20 100644 --- a/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv +++ b/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv @@ -1,16 +1,16 @@ 2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ= 2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/EOmCmVerjm4= -2019071401 product_d ZN +2019071401 product_d ZN AQMDAAAezJM= 2019071401 product_a DN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs= -2019071401 product_b ZN +2019071401 product_b ZN AQMDAAAezJM= 2019071401 product_a CN AwEDAAAAAgABAAAAAAAAAP////////9/o31ldGC0E2s= 2019071401 product_a GN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs= -2019071401 product_b ZN +2019071401 product_b ZN AQMDAAAezJM= 2019071401 product_a LN AwEDAAAAAgABAAAAAAAAAP////////9/KKcfz0hRe38= 2019071401 product_a SN AwEDAAAAAgABAAAAAAAAAP////////9/jDwgknTL/S0= 2019071401 product_d LN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs= 2019071401 product_a SN AwEDAAAAAgABAAAAAAAAAP////////9/4EyBvXLM/xs= 2019071401 product_a LN AwEDAAAAAgABAAAAAAAAAP////////9/KKcfz0hRe38= -2019071401 product_d ZN +2019071401 product_d ZN AQMDAAAezJM= 2019071401 product_a ZN AwEDAAAAAgABAAAAAAAAAP////////9//mseeN0UsgU= 2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/Rn5CRuhP3h4=