From f277a54a5cae71ed5f5685b157e0141015bc5f05 Mon Sep 17 00:00:00 2001 From: turu Date: Thu, 11 Feb 2016 23:46:24 +0100 Subject: [PATCH] removed unsafe heuristics from hll compareTo and provided unit test for regression --- .../hyperloglog/HyperLogLogCollector.java | 17 +------- .../hyperloglog/HyperLogLogCollectorTest.java | 39 +++++++++++++++++++ 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/processing/src/main/java/io/druid/query/aggregation/hyperloglog/HyperLogLogCollector.java b/processing/src/main/java/io/druid/query/aggregation/hyperloglog/HyperLogLogCollector.java index 37166e938f9f..4803b2ca765e 100644 --- a/processing/src/main/java/io/druid/query/aggregation/hyperloglog/HyperLogLogCollector.java +++ b/processing/src/main/java/io/druid/query/aggregation/hyperloglog/HyperLogLogCollector.java @@ -681,21 +681,6 @@ private static short mergeAndStoreByteRegister( @Override public int compareTo(HyperLogLogCollector other) { - final int lhsOffset = (int) this.getRegisterOffset() & 0xffff; - final int rhsOffset = (int) other.getRegisterOffset() & 0xffff; - - if (lhsOffset == rhsOffset) { - final int lhsNumNonZero = (int) this.getNumNonZeroRegisters() & 0xff; - final int rhsNumNonZero = (int) this.getNumNonZeroRegisters() & 0xff; - int retVal = Double.compare(lhsNumNonZero, rhsNumNonZero); - - if (retVal == 0) { - retVal = Double.compare(this.estimateCardinality(), other.estimateCardinality()); - } - - return retVal; - } else { - return Double.compare(lhsOffset, rhsOffset); - } + return Double.compare(this.estimateCardinality(), other.estimateCardinality()); } } diff --git a/processing/src/test/java/io/druid/query/aggregation/hyperloglog/HyperLogLogCollectorTest.java b/processing/src/test/java/io/druid/query/aggregation/hyperloglog/HyperLogLogCollectorTest.java index 9f7f92be6c47..25bd2ee82906 100644 --- a/processing/src/test/java/io/druid/query/aggregation/hyperloglog/HyperLogLogCollectorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/hyperloglog/HyperLogLogCollectorTest.java @@ -776,6 +776,45 @@ public void testCompare2() throws Exception } } + @Test + public void testCompareToShouldBehaveConsistentlyWithEstimatedCardinalitiesEvenInToughCases() throws Exception { + // given + Random rand = new Random(0); + HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar"); + Comparator comparator = factory.getComparator(); + + for (int i = 0; i < 1000; ++i) { + // given + HyperLogLogCollector leftCollector = HyperLogLogCollector.makeLatestCollector(); + int j = rand.nextInt(9000) + 5000; + for (int l = 0; l < j; ++l) { + leftCollector.add(fn.hashLong(rand.nextLong()).asBytes()); + } + + HyperLogLogCollector rightCollector = HyperLogLogCollector.makeLatestCollector(); + int k = rand.nextInt(9000) + 5000; + for (int l = 0; l < k; ++l) { + rightCollector.add(fn.hashLong(rand.nextLong()).asBytes()); + } + + // when + final int orderedByCardinality = Double.compare(leftCollector.estimateCardinality(), + rightCollector.estimateCardinality()); + final int orderedByComparator = comparator.compare(leftCollector, rightCollector); + + // then, assert hyperloglog comparator behaves consistently with estimated cardinalities + Assert.assertEquals( + String.format("orderedByComparator=%d, orderedByCardinality=%d,\n" + + "Left={cardinality=%f, hll=%s},\n" + + "Right={cardinality=%f, hll=%s},\n", orderedByComparator, orderedByCardinality, + leftCollector.estimateCardinality(), leftCollector, + rightCollector.estimateCardinality(), rightCollector), + orderedByCardinality, + orderedByComparator + ); + } + } + @Test public void testMaxOverflow() { HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();