From 0408c6c1537b2b7d2100262862a33ee84e44b45a Mon Sep 17 00:00:00 2001 From: Alexander Saydakov <13126686+AlexanderSaydakov@users.noreply.github.com> Date: Thu, 25 Apr 2019 14:28:41 -0700 Subject: [PATCH] handle empty sketches (#7526) * handle empty sketches * return array of NaN in case of empty sketch * noinspection ForLoopReplaceableByForEach in tests * style fixes --- ...oublesSketchToHistogramPostAggregator.java | 5 ++ ...oublesSketchToQuantilesPostAggregator.java | 5 ++ ...esSketchToHistogramPostAggregatorTest.java | 84 ++++++++++++++++++ ...esSketchToQuantilesPostAggregatorTest.java | 86 +++++++++++++++++++ 4 files changed, 180 insertions(+) create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToHistogramPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToQuantilesPostAggregatorTest.java diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToHistogramPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToHistogramPostAggregator.java index 2ec10b595f77..da8f3c28345f 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToHistogramPostAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToHistogramPostAggregator.java @@ -56,6 +56,11 @@ public DoublesSketchToHistogramPostAggregator( public Object compute(final Map combinedAggregators) { final DoublesSketch sketch = (DoublesSketch) field.compute(combinedAggregators); + if (sketch.isEmpty()) { + final double[] histogram = new double[splitPoints.length + 1]; + Arrays.fill(histogram, Double.NaN); + return histogram; + } final double[] histogram = sketch.getPMF(splitPoints); for (int i = 0; i < histogram.length; i++) { histogram[i] *= sketch.getN(); diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToQuantilesPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToQuantilesPostAggregator.java index 60a7064011f0..e5089cbd07fb 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToQuantilesPostAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToQuantilesPostAggregator.java @@ -75,6 +75,11 @@ public double[] getFractions() public Object compute(final Map combinedAggregators) { final DoublesSketch sketch = (DoublesSketch) field.compute(combinedAggregators); + if (sketch.isEmpty()) { + final double[] quantiles = new double[fractions.length]; + Arrays.fill(quantiles, Double.NaN); + return quantiles; + } return sketch.getQuantiles(fractions); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToHistogramPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToHistogramPostAggregatorTest.java new file mode 100644 index 000000000000..b5aeec985fb3 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToHistogramPostAggregatorTest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.quantiles; + +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class DoublesSketchToHistogramPostAggregatorTest +{ + @Test + public void emptySketch() + { + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(null); + final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8); + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new DoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {3.5} + ); + + final double[] histogram = (double[]) postAgg.compute(fields); + Assert.assertNotNull(histogram); + Assert.assertEquals(2, histogram.length); + Assert.assertTrue(Double.isNaN(histogram[0])); + Assert.assertTrue(Double.isNaN(histogram[1])); + } + + @Test + public void normalCase() + { + final double[] values = new double[] {1, 2, 3, 4, 5, 6}; + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values); + + final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new DoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {3.5} // splits distribution in two buckets of equal mass + ); + + final double[] histogram = (double[]) postAgg.compute(fields); + Assert.assertNotNull(histogram); + Assert.assertEquals(2, histogram.length); + Assert.assertEquals(3.0, histogram[0], 0); + Assert.assertEquals(3.0, histogram[1], 0); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToQuantilesPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToQuantilesPostAggregatorTest.java new file mode 100644 index 000000000000..7a4edab5e443 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchToQuantilesPostAggregatorTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.quantiles; + +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class DoublesSketchToQuantilesPostAggregatorTest +{ + @Test + public void emptySketch() + { + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(null); + final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8); + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new DoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {0, 0.5, 1} + ); + + final double[] quantiles = (double[]) postAgg.compute(fields); + Assert.assertNotNull(quantiles); + Assert.assertEquals(3, quantiles.length); + Assert.assertTrue(Double.isNaN(quantiles[0])); + Assert.assertTrue(Double.isNaN(quantiles[1])); + Assert.assertTrue(Double.isNaN(quantiles[2])); + } + + @Test + public void normalCase() + { + final double[] values = new double[] {1, 2, 3, 4, 5}; + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values); + + final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new DoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {0, 0.5, 1} + ); + + final double[] quantiles = (double[]) postAgg.compute(fields); + Assert.assertNotNull(quantiles); + Assert.assertEquals(3, quantiles.length); + Assert.assertEquals(1.0, quantiles[0], 0); + Assert.assertEquals(3.0, quantiles[1], 0); + Assert.assertEquals(5.0, quantiles[2], 0); + } +}