From d59c36a65d1deeac8fd2b47a752e444bec94030c Mon Sep 17 00:00:00 2001 From: Keuntae Park Date: Fri, 29 Apr 2016 18:34:06 +0900 Subject: [PATCH 1/5] support query granularity and interval for search query --- .../druid/query/search/SearchQueryRunner.java | 31 ++++++++++++++++--- .../query/search/SearchQueryRunnerTest.java | 16 +++++----- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index 49b3a011fd60..583ddc424868 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -27,6 +27,8 @@ import com.google.common.collect.Maps; import com.metamx.collections.bitmap.BitmapFactory; import com.metamx.collections.bitmap.ImmutableBitmap; +import com.metamx.collections.bitmap.MutableBitmap; +import com.metamx.common.IAE; import com.metamx.common.ISE; import com.metamx.common.guava.Accumulator; import com.metamx.common.guava.FunctionalIterable; @@ -53,9 +55,11 @@ import io.druid.segment.StorageAdapter; import io.druid.segment.column.BitmapIndex; import io.druid.segment.column.Column; +import io.druid.segment.column.GenericColumn; import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.Filters; import org.apache.commons.lang.mutable.MutableInt; +import org.joda.time.Interval; import java.util.Arrays; import java.util.List; @@ -90,6 +94,11 @@ public Sequence> run( final SearchQuerySpec searchQuerySpec = query.getQuery(); final int limit = query.getLimit(); final boolean descending = query.isDescending(); + final List intervals = query.getQuerySegmentSpec().getIntervals(); + if (intervals.size() != 1) { + throw new IAE("Should only have one interval, got[%s]", intervals); + } + final Interval interval = intervals.get(0); // Closing this will cause segfaults in unit tests. final QueryableIndex index = segment.asQueryableIndex(); @@ -109,6 +118,22 @@ public Sequence> run( final ImmutableBitmap baseFilter = filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index)); + MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap(); + final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME); + final GenericColumn timeValues = timeColumn.getGenericColumn(); + + for (int i = 0; i < timeValues.length(); i++) + { + long time = timeValues.getLongSingleValueRow(i); + if (interval.contains(time)) + { + timeBitmap.add(i); + } + } + final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap); + final ImmutableBitmap timeFilteredBitmap = + (baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter); + for (DimensionSpec dimension : dimsToSearch) { final Column column = index.getColumn(dimension.getDimension()); if (column == null) { @@ -127,9 +152,7 @@ public Sequence> run( continue; } ImmutableBitmap bitmap = bitmapIndex.getBitmap(i); - if (baseFilter != null) { - bitmap = bitmapFactory.intersection(Arrays.asList(baseFilter, bitmap)); - } + bitmap = bitmapFactory.intersection(Arrays.asList(timeFilteredBitmap, bitmap)); if (bitmap.size() > 0) { MutableInt counter = new MutableInt(bitmap.size()); MutableInt prev = retVal.put(new SearchHit(dimension.getOutputName(), dimVal), counter); @@ -165,7 +188,7 @@ public Sequence> run( dimsToSearch = dimensions; } - final Sequence cursors = adapter.makeCursors(filter, segment.getDataInterval(), QueryGranularities.ALL, descending); + final Sequence cursors = adapter.makeCursors(filter, interval, query.getGranularity(), descending); final TreeMap retVal = cursors.accumulate( Maps.newTreeMap(query.getSort().getComparator()), diff --git a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java index b801f3b3899a..58006a739299 100644 --- a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java @@ -163,14 +163,14 @@ public Sequence> run( ); List expectedHits = Lists.newLinkedList(); - expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 186)); - expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 558)); - expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 186)); - expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 186)); - expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 186)); - expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 372)); - expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 186)); - expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 372)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 91)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 273)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 91)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 91)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 91)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 182)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 91)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 182)); checkSearchQuery(searchQuery, mergedRunner, expectedHits); } From d5c76b695bffb2e4ea9d35d23c0ff335bccf94fd Mon Sep 17 00:00:00 2001 From: Keuntae Park Date: Mon, 2 May 2016 10:07:10 +0900 Subject: [PATCH 2/5] skip unncessary bitmap calculation when query interval contains whole the data interval of the given segments. --- .../druid/query/search/SearchQueryRunner.java | 32 ++++++++++++------- .../query/search/SearchQueryRunnerTest.java | 2 +- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index 583ddc424868..1db97b4c31f2 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -118,21 +118,27 @@ public Sequence> run( final ImmutableBitmap baseFilter = filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index)); - MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap(); - final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME); - final GenericColumn timeValues = timeColumn.getGenericColumn(); - - for (int i = 0; i < timeValues.length(); i++) + ImmutableBitmap timeFilteredBitmap; + if (!interval.contains(segment.getDataInterval())) { - long time = timeValues.getLongSingleValueRow(i); - if (interval.contains(time)) + MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap(); + final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME); + final GenericColumn timeValues = timeColumn.getGenericColumn(); + + for (int i = 0; i < timeValues.length(); i++) { - timeBitmap.add(i); + long time = timeValues.getLongSingleValueRow(i); + if (interval.contains(time)) + { + timeBitmap.add(i); + } } + final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap); + timeFilteredBitmap = + (baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter); + } else { + timeFilteredBitmap = baseFilter; } - final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap); - final ImmutableBitmap timeFilteredBitmap = - (baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter); for (DimensionSpec dimension : dimsToSearch) { final Column column = index.getColumn(dimension.getDimension()); @@ -152,7 +158,9 @@ public Sequence> run( continue; } ImmutableBitmap bitmap = bitmapIndex.getBitmap(i); - bitmap = bitmapFactory.intersection(Arrays.asList(timeFilteredBitmap, bitmap)); + if (timeFilteredBitmap != null) { + bitmap = bitmapFactory.intersection(Arrays.asList(timeFilteredBitmap, bitmap)); + } if (bitmap.size() > 0) { MutableInt counter = new MutableInt(bitmap.size()); MutableInt prev = retVal.put(new SearchHit(dimension.getOutputName(), dimVal), counter); diff --git a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java index 58006a739299..13c9ff660574 100644 --- a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java @@ -174,7 +174,7 @@ public Sequence> run( checkSearchQuery(searchQuery, mergedRunner, expectedHits); } - + @Test public void testSearchSameValueInMultiDims() { From 76ec00a77499dc6c3f554f5c04675da3d5834507 Mon Sep 17 00:00:00 2001 From: Keuntae Park Date: Mon, 2 May 2016 13:54:55 +0900 Subject: [PATCH 3/5] use binary search to find start and end index for the given interval --- .../druid/query/search/SearchQueryRunner.java | 44 +++++++++++++++---- .../query/search/SearchQueryRunnerTest.java | 2 +- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index 1db97b4c31f2..c6758dfe6d55 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -35,7 +35,6 @@ import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; import com.metamx.emitter.EmittingLogger; -import io.druid.granularity.QueryGranularities; import io.druid.query.Druids; import io.druid.query.Query; import io.druid.query.QueryRunner; @@ -125,14 +124,13 @@ public Sequence> run( final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME); final GenericColumn timeValues = timeColumn.getGenericColumn(); - for (int i = 0; i < timeValues.length(); i++) - { - long time = timeValues.getLongSingleValueRow(i); - if (interval.contains(time)) - { - timeBitmap.add(i); - } + int startIndex = Math.max(0, getStartIndexOfTime(timeValues, interval.getStartMillis(), true)); + int endIndex = Math.min(timeValues.length() - 1, getStartIndexOfTime(timeValues, interval.getEndMillis(), false)); + + for (int i = startIndex; i <= endIndex; i++) { + timeBitmap.add(i); } + final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap); timeFilteredBitmap = (baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter); @@ -250,6 +248,36 @@ public TreeMap accumulate(TreeMap return makeReturnResult(limit, retVal); } + private int getStartIndexOfTime(GenericColumn timeValues, long time, boolean inclusive) + { + int low = 0; + int high = timeValues.length() - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + long midVal = timeValues.getLongSingleValueRow(mid); + + if (midVal < time) + low = mid + 1; + else if (midVal > time) + high = mid - 1; + else { // key found + int i; + // rewind the index of the same time values + for (i = mid - 1; i >= 0; i--) { + long prev = timeValues.getLongSingleValueRow(i); + if (time != prev) { + break; + } + } + return inclusive ? i + 1 : i; + } + } + // key not found. + // return insert index + return low; + } + private Sequence> makeReturnResult( int limit, TreeMap retVal) { diff --git a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java index 13c9ff660574..58006a739299 100644 --- a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java @@ -174,7 +174,7 @@ public Sequence> run( checkSearchQuery(searchQuery, mergedRunner, expectedHits); } - + @Test public void testSearchSameValueInMultiDims() { From 9333ab9df6050ef5053f39090cd633b34e2f0c26 Mon Sep 17 00:00:00 2001 From: Keuntae Park Date: Wed, 1 Jun 2016 16:33:28 +0900 Subject: [PATCH 4/5] fix based on comment --- .../src/main/java/io/druid/query/search/SearchQueryRunner.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index c6758dfe6d55..d01ae85a6736 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -118,8 +118,7 @@ public Sequence> run( filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index)); ImmutableBitmap timeFilteredBitmap; - if (!interval.contains(segment.getDataInterval())) - { + if (!interval.contains(segment.getDataInterval())) { MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap(); final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME); final GenericColumn timeValues = timeColumn.getGenericColumn(); From c360596f8aafe21c16722a591032001be8c30b87 Mon Sep 17 00:00:00 2001 From: Keuntae Park Date: Wed, 31 Aug 2016 11:11:13 +0900 Subject: [PATCH 5/5] bug fix based on the review comments and add unit tests --- .../druid/query/search/SearchQueryRunner.java | 4 +-- .../search/SearchQueryRunnerWithCaseTest.java | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index d01ae85a6736..a690cfaf092d 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -247,7 +247,7 @@ public TreeMap accumulate(TreeMap return makeReturnResult(limit, retVal); } - private int getStartIndexOfTime(GenericColumn timeValues, long time, boolean inclusive) + protected int getStartIndexOfTime(GenericColumn timeValues, long time, boolean inclusive) { int low = 0; int high = timeValues.length() - 1; @@ -274,7 +274,7 @@ else if (midVal > time) } // key not found. // return insert index - return low; + return inclusive ? low : low - 1; } private Sequence> makeReturnResult( diff --git a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerWithCaseTest.java b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerWithCaseTest.java index ef375244f073..86791f614975 100644 --- a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerWithCaseTest.java +++ b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerWithCaseTest.java @@ -19,6 +19,7 @@ package io.druid.query.search; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -36,6 +37,7 @@ import io.druid.segment.TestIndex; import io.druid.segment.incremental.IncrementalIndex; import org.joda.time.DateTime; +import org.joda.time.Interval; import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; @@ -155,6 +157,34 @@ public void testSearchSameValueInMultiDims() checkSearchQuery(searchQuery, expectedResults); } + @Test + public void testSearchIntervals() + { + SearchQuery searchQuery; + Druids.SearchQueryBuilder builder = testBuilder() + .dimensions(Arrays.asList(qualityDimension)) + .intervals("2011-01-12T00:00:00.000Z/2011-01-13T00:00:00.000Z"); + Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + + searchQuery = builder.query("otive").build(); + expectedResults.put(qualityDimension, Sets.newHashSet("AutoMotive")); + checkSearchQuery(searchQuery, expectedResults); + } + + @Test + public void testSearchNoOverrappingIntervals() + { + SearchQuery searchQuery; + Druids.SearchQueryBuilder builder = testBuilder() + .dimensions(Arrays.asList(qualityDimension)) + .intervals("2011-01-10T00:00:00.000Z/2011-01-11T00:00:00.000Z"); + Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + + searchQuery = builder.query("business").build(); + expectedResults.put(qualityDimension, Sets.newHashSet()); + checkSearchQuery(searchQuery, expectedResults); + } + @Test public void testFragmentSearch() {