From da48a713aeff4ec46267acc79a3c5cdb59c943f2 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 26 Aug 2014 18:14:34 -0700 Subject: [PATCH 1/2] Fix overzealous timeseries zero-filling. When the index's maxTime is not aligned with the query granularity, gran.next can cause an time extra bucket to get zero-filled. Truncating first prevents that. --- .../segment/QueryableIndexStorageAdapter.java | 5 +- .../IncrementalIndexStorageAdapter.java | 5 +- .../timeseries/TimeseriesQueryRunnerTest.java | 47 +++++++++++++++++++ .../test/java/io/druid/segment/TestIndex.java | 3 +- 4 files changed, 56 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index 9dcd136e024c..c174a64e20e6 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -138,7 +138,10 @@ public Sequence makeCursors(Filter filter, Interval interval, QueryGranu { Interval actualInterval = interval; - final Interval dataInterval = new Interval(getMinTime().getMillis(), gran.next(getMaxTime().getMillis())); + final Interval dataInterval = new Interval( + getMinTime().getMillis(), + gran.next(gran.truncate(getMaxTime().getMillis())) + ); if (!actualInterval.overlaps(dataInterval)) { return Sequences.empty(); diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 1c452496ffb3..6d742509ca27 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -133,8 +133,11 @@ public Sequence makeCursors(final Filter filter, final Interval interval Interval actualIntervalTmp = interval; + final Interval dataInterval = new Interval( + getMinTime().getMillis(), + gran.next(gran.truncate(getMaxTime().getMillis())) + ); - final Interval dataInterval = new Interval(getMinTime().getMillis(), gran.next(getMaxTime().getMillis())); if (!actualIntervalTmp.overlaps(dataInterval)) { return Sequences.empty(); } diff --git a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java index 708a7de10541..e6cdcd391d63 100644 --- a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -465,6 +465,53 @@ public void testTimeseriesGranularityNotAlignedOnSegmentBoundariesWithFilter() TestHelper.assertExpectedResults(expectedResults1, results1); } + @Test + public void testTimeseriesQueryGranularityNotAlignedWithRollupGranularity() + { + TimeseriesQuery query1 = Druids.newTimeseriesQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .filters(QueryRunnerTestHelper.providerDimension, "spot", "upfront", "total_market") + .granularity( + new PeriodGranularity( + new Period("PT1H"), + new DateTime(60000), + DateTimeZone.UTC + ) + ) + .intervals( + Arrays.asList( + new Interval( + "2011-04-15T00:00:00.000Z/2012" + ) + ) + ) + .aggregators( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory( + "idx", + "index" + ) + ) + ) + .build(); + + List> expectedResults1 = Arrays.asList( + new Result( + new DateTime("2011-04-14T23:01Z"), + new TimeseriesResultValue( + ImmutableMap.of("rows", 13L, "idx", 4717L) + ) + ) + ); + + Iterable> results1 = Sequences.toList( + runner.run(query1), + Lists.>newArrayList() + ); + TestHelper.assertExpectedResults(expectedResults1, results1); + } + @Test public void testTimeseriesWithVaryingGranWithFilter() { diff --git a/processing/src/test/java/io/druid/segment/TestIndex.java b/processing/src/test/java/io/druid/segment/TestIndex.java index 3d37545027cf..bb83dac77ce3 100644 --- a/processing/src/test/java/io/druid/segment/TestIndex.java +++ b/processing/src/test/java/io/druid/segment/TestIndex.java @@ -35,7 +35,6 @@ import io.druid.query.aggregation.DoubleSumAggregatorFactory; import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde; -import io.druid.segment.column.ColumnConfig; import io.druid.segment.incremental.IncrementalIndex; import io.druid.segment.serde.ComplexMetrics; import org.joda.time.DateTime; @@ -69,7 +68,7 @@ public class TestIndex }; public static final String[] DIMENSIONS = new String[]{"provider", "quALIty", "plAcEmEnT", "pLacementish"}; public static final String[] METRICS = new String[]{"iNdEx"}; - private static final Interval DATA_INTERVAL = new Interval("2011-01-12T00:00:00.000Z/2011-04-16T00:00:00.000Z"); + private static final Interval DATA_INTERVAL = new Interval("2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z"); private static final AggregatorFactory[] METRIC_AGGS = new AggregatorFactory[]{ new DoubleSumAggregatorFactory(METRICS[0], METRICS[0]), new HyperUniquesAggregatorFactory("quality_uniques", "quality") From 0eea1dc08d10cd748b8738d8d5dada2a0a217242 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 26 Aug 2014 18:47:52 -0700 Subject: [PATCH 2/2] Test for desired timeseries zero-filling behavior. --- .../timeseries/TimeseriesQueryRunnerTest.java | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java index e6cdcd391d63..1f4ba61eac06 100644 --- a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -21,7 +21,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import com.metamx.common.Granularity; import com.metamx.common.guava.Sequences; import io.druid.granularity.PeriodGranularity; import io.druid.granularity.QueryGranularity; @@ -465,6 +467,74 @@ public void testTimeseriesGranularityNotAlignedOnSegmentBoundariesWithFilter() TestHelper.assertExpectedResults(expectedResults1, results1); } + @Test + public void testTimeseriesQueryZeroFilling() + { + TimeseriesQuery query1 = Druids.newTimeseriesQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .filters(QueryRunnerTestHelper.providerDimension, "spot", "upfront", "total_market") + .granularity(QueryGranularity.HOUR) + .intervals( + Arrays.asList( + new Interval( + "2011-04-14T00:00:00.000Z/2011-05-01T00:00:00.000Z" + ) + ) + ) + .aggregators( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory( + "idx", + "index" + ) + ) + ) + .build(); + + List> lotsOfZeroes = Lists.newArrayList(); + for (final Long millis : QueryGranularity.HOUR.iterable( + new DateTime("2011-04-14T01").getMillis(), + new DateTime("2011-04-15").getMillis() + )) { + lotsOfZeroes.add( + new Result<>( + new DateTime(millis), + new TimeseriesResultValue( + ImmutableMap.of("rows", 0L, "idx", 0L) + ) + ) + ); + } + List> expectedResults1 = Lists.newArrayList( + Iterables.concat( + Arrays.asList( + new Result<>( + new DateTime("2011-04-14T00"), + new TimeseriesResultValue( + ImmutableMap.of("rows", 13L, "idx", 4907L) + ) + ) + ), + lotsOfZeroes, + Arrays.asList( + new Result<>( + new DateTime("2011-04-15T00"), + new TimeseriesResultValue( + ImmutableMap.of("rows", 13L, "idx", 4717L) + ) + ) + ) + ) + ); + + Iterable> results1 = Sequences.toList( + runner.run(query1), + Lists.>newArrayList() + ); + TestHelper.assertExpectedResults(expectedResults1, results1); + } + @Test public void testTimeseriesQueryGranularityNotAlignedWithRollupGranularity() {