From 97e52358e4ab7fe98aeb2d53d27a5a1de97a7c7c Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 19 Feb 2018 22:26:33 -0800 Subject: [PATCH 1/3] Lazy-ify ValueMatcher BitSet optimization for string dimensions. The idea is that if the prior evaluated filters are decently selective, such that they mean we won't see all possible values of the later filters, then the eager version of the optimization is too wasteful. This involves checking an extra bitset, but the overhead is small even if the lazy-ification is useless. --- .../benchmark/FilterPartitionBenchmark.java | 168 ++++++------------ .../druid/segment/DimensionSelectorUtils.java | 18 +- .../column/SimpleDictionaryEncodedColumn.java | 19 +- 3 files changed, 88 insertions(+), 117 deletions(-) diff --git a/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java index bd7e8415495e..576d17f27eb1 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java @@ -23,6 +23,7 @@ import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; import com.google.common.io.Files; import io.druid.benchmark.datagen.BenchmarkDataGenerator; import io.druid.benchmark.datagen.BenchmarkSchemaInfo; @@ -35,7 +36,6 @@ import io.druid.java.util.common.guava.Sequences; import io.druid.java.util.common.logger.Logger; import io.druid.js.JavaScriptConfig; -import io.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.extraction.ExtractionFn; @@ -73,6 +73,7 @@ import io.druid.segment.filter.SelectorFilter; import io.druid.segment.incremental.IncrementalIndex; import io.druid.segment.serde.ComplexMetrics; +import io.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.commons.io.FileUtils; import org.joda.time.Interval; import org.openjdk.jmh.annotations.Benchmark; @@ -242,12 +243,7 @@ public void stringRead(Blackhole blackhole) throws Exception { StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, null); - - Sequence> stringListSeq = readCursors(cursors, blackhole); - List strings = stringListSeq.limit(1).toList().get(0); - for (String st : strings) { - blackhole.consume(st); - } + readCursors(cursors, blackhole); } @Benchmark @@ -258,11 +254,7 @@ public void longRead(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, null); - Sequence> longListSeq = readCursorsLong(cursors, blackhole); - List strings = longListSeq.limit(1).toList().get(0); - for (Long st : strings) { - blackhole.consume(st); - } + readCursorsLong(cursors, blackhole); } @Benchmark @@ -273,11 +265,7 @@ public void timeFilterNone(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, timeFilterNone); - Sequence> longListSeq = readCursorsLong(cursors, blackhole); - List strings = longListSeq.limit(1).toList().get(0); - for (Long st : strings) { - blackhole.consume(st); - } + readCursorsLong(cursors, blackhole); } @Benchmark @@ -288,11 +276,7 @@ public void timeFilterHalf(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, timeFilterHalf); - Sequence> longListSeq = readCursorsLong(cursors, blackhole); - List strings = longListSeq.limit(1).toList().get(0); - for (Long st : strings) { - blackhole.consume(st); - } + readCursorsLong(cursors, blackhole); } @Benchmark @@ -303,11 +287,7 @@ public void timeFilterAll(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, timeFilterAll); - Sequence> longListSeq = readCursorsLong(cursors, blackhole); - List strings = longListSeq.limit(1).toList().get(0); - for (Long st : strings) { - blackhole.consume(st); - } + readCursorsLong(cursors, blackhole); } @Benchmark @@ -319,12 +299,7 @@ public void readWithPreFilter(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, filter); - - Sequence> stringListSeq = readCursors(cursors, blackhole); - List strings = stringListSeq.limit(1).toList().get(0); - for (String st : strings) { - blackhole.consume(st); - } + readCursors(cursors, blackhole); } @Benchmark @@ -336,12 +311,7 @@ public void readWithPostFilter(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, filter); - - Sequence> stringListSeq = readCursors(cursors, blackhole); - List strings = stringListSeq.limit(1).toList().get(0); - for (String st : strings) { - blackhole.consume(st); - } + readCursors(cursors, blackhole); } @Benchmark @@ -353,12 +323,7 @@ public void readWithExFnPreFilter(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, filter); - - Sequence> stringListSeq = readCursors(cursors, blackhole); - List strings = stringListSeq.limit(1).toList().get(0); - for (String st : strings) { - blackhole.consume(st); - } + readCursors(cursors, blackhole); } @Benchmark @@ -370,12 +335,24 @@ public void readWithExFnPostFilter(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, filter); + readCursors(cursors, blackhole); + } - Sequence> stringListSeq = readCursors(cursors, blackhole); - List strings = stringListSeq.limit(1).toList().get(0); - for (String st : strings) { - blackhole.consume(st); - } + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void readAndFilter(Blackhole blackhole) + { + Filter andFilter = new AndFilter( + ImmutableList.of( + new SelectorFilter("dimUniform", "199"), + new NoBitmapSelectorDimFilter("dimUniform", "super-199", JS_EXTRACTION_FN).toFilter() + ) + ); + + StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); + Sequence cursors = makeCursors(sa, andFilter); + readCursors(cursors, blackhole); } @Benchmark @@ -389,12 +366,7 @@ public void readOrFilter(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, orFilter); - - Sequence> stringListSeq = readCursors(cursors, blackhole); - List strings = stringListSeq.limit(1).toList().get(0); - for (String st : strings) { - blackhole.consume(st); - } + readCursors(cursors, blackhole); } @Benchmark @@ -408,12 +380,7 @@ public void readOrFilterCNF(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, Filters.convertToCNF(orFilter)); - - Sequence> stringListSeq = readCursors(cursors, blackhole); - List strings = stringListSeq.limit(1).toList().get(0); - for (String st : strings) { - blackhole.consume(st); - } + readCursors(cursors, blackhole); } @Benchmark @@ -450,12 +417,7 @@ public void readComplexOrFilter(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, dimFilter3.toFilter()); - - Sequence> stringListSeq = readCursors(cursors, blackhole); - List strings = stringListSeq.limit(1).toList().get(0); - for (String st : strings) { - blackhole.consume(st); - } + readCursors(cursors, blackhole); } @Benchmark @@ -492,12 +454,7 @@ public void readComplexOrFilterCNF(Blackhole blackhole) throws Exception StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); Sequence cursors = makeCursors(sa, Filters.convertToCNF(dimFilter3.toFilter())); - - Sequence> stringListSeq = readCursors(cursors, blackhole); - List strings = stringListSeq.limit(1).toList().get(0); - for (String st : strings) { - blackhole.consume(st); - } + readCursors(cursors, blackhole); } private Sequence makeCursors(StorageAdapter sa, Filter filter) @@ -505,55 +462,46 @@ private Sequence makeCursors(StorageAdapter sa, Filter filter) return sa.makeCursors(filter, schemaInfo.getDataInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null); } - private Sequence> readCursors(Sequence cursors, final Blackhole blackhole) + private void readCursors(Sequence cursors, Blackhole blackhole) { - return Sequences.map( + final Sequence voids = Sequences.map( cursors, - new Function>() - { - @Override - public List apply(Cursor input) - { - List strings = new ArrayList(); - List selectors = new ArrayList<>(); - selectors.add( - input.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimSequential", null)) - ); - //selectors.add(input.makeDimensionSelector(new DefaultDimensionSpec("dimB", null))); - while (!input.isDone()) { - for (DimensionSelector selector : selectors) { - IndexedInts row = selector.getRow(); - blackhole.consume(selector.lookupName(row.get(0))); - //strings.add(selector.lookupName(row.get(0))); - } - input.advance(); + input -> { + List selectors = new ArrayList<>(); + selectors.add( + input.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimSequential", null)) + ); + while (!input.isDone()) { + for (DimensionSelector selector : selectors) { + IndexedInts row = selector.getRow(); + blackhole.consume(selector.lookupName(row.get(0))); } - return strings; + input.advance(); } + return null; } ); + + blackhole.consume(voids.toList()); } - private Sequence> readCursorsLong(Sequence cursors, final Blackhole blackhole) + private void readCursorsLong(Sequence cursors, final Blackhole blackhole) { - return Sequences.map( + final Sequence voids = Sequences.map( cursors, - new Function>() - { - @Override - public List apply(Cursor input) - { - List longvals = new ArrayList(); - BaseLongColumnValueSelector selector = input.getColumnSelectorFactory().makeColumnValueSelector("sumLongSequential"); - while (!input.isDone()) { - long rowval = selector.getLong(); - blackhole.consume(rowval); - input.advance(); - } - return longvals; + input -> { + BaseLongColumnValueSelector selector = input.getColumnSelectorFactory() + .makeColumnValueSelector("sumLongSequential"); + while (!input.isDone()) { + long rowval = selector.getLong(); + blackhole.consume(rowval); + input.advance(); } + return null; } ); + + blackhole.consume(voids.toList()); } private static class NoBitmapSelectorFilter extends SelectorFilter diff --git a/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java b/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java index f83be70241b7..f39748a21f6b 100644 --- a/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java +++ b/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java @@ -170,8 +170,11 @@ private static ValueMatcher makeDictionaryEncodedValueMatcherGeneric( Predicate predicate ) { - final BitSet predicateMatchingValueIds = makePredicateMatchingSet(selector, predicate); + final BitSet checkedIds = new BitSet(selector.getValueCardinality()); + final BitSet matchingIds = new BitSet(selector.getValueCardinality()); final boolean matchNull = predicate.apply(null); + + // Lazy matcher; only check an id if matches() is called. return new ValueMatcher() { @Override @@ -184,7 +187,18 @@ public boolean matches() return matchNull; } else { for (int i = 0; i < size; ++i) { - if (predicateMatchingValueIds.get(row.get(i))) { + final int id = row.get(i); + final boolean matches; + + if (checkedIds.get(id)) { + matches = matchingIds.get(id); + } else { + matches = predicate.apply(selector.lookupName(id)); + checkedIds.set(id); + matchingIds.set(id, matches); + } + + if (matches) { return true; } } diff --git a/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java b/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java index 3d67beb22a2b..b65038b7991a 100644 --- a/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java +++ b/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java @@ -262,16 +262,25 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public ValueMatcher makeValueMatcher(final Predicate predicate) { - final BitSet predicateMatchingValueIds = DimensionSelectorUtils.makePredicateMatchingSet( - this, - predicate - ); + final BitSet checkedIds = new BitSet(getCardinality()); + final BitSet matchingIds = new BitSet(getCardinality()); + + // Lazy matcher; only check an id if matches() is called. return new ValueMatcher() { @Override public boolean matches() { - return predicateMatchingValueIds.get(getRowValue()); + final int id = getRowValue(); + + if (checkedIds.get(id)) { + return matchingIds.get(id); + } else { + final boolean matches = predicate.apply(lookupName(id)); + checkedIds.set(id); + matchingIds.set(id, matches); + return matches; + } } @Override From ff824ffa8b46ef283f9c607a3f062487f44a77f8 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 20 Feb 2018 00:38:05 -0800 Subject: [PATCH 2/3] Remove import. --- .../main/java/io/druid/benchmark/FilterPartitionBenchmark.java | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java index 576d17f27eb1..6d7c8e19d11a 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java @@ -20,7 +20,6 @@ package io.druid.benchmark; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; From bd1635c735cc55718dded72d6942bb469cba9851 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 21 Feb 2018 07:35:48 -0800 Subject: [PATCH 3/3] Minor transformation --- .../main/java/io/druid/segment/DimensionSelectorUtils.java | 4 +++- .../druid/segment/column/SimpleDictionaryEncodedColumn.java | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java b/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java index f39748a21f6b..28fc350aa3c7 100644 --- a/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java +++ b/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java @@ -195,7 +195,9 @@ public boolean matches() } else { matches = predicate.apply(selector.lookupName(id)); checkedIds.set(id); - matchingIds.set(id, matches); + if (matches) { + matchingIds.set(id); + } } if (matches) { diff --git a/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java b/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java index b65038b7991a..e5da89a7be95 100644 --- a/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java +++ b/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java @@ -278,7 +278,9 @@ public boolean matches() } else { final boolean matches = predicate.apply(lookupName(id)); checkedIds.set(id); - matchingIds.set(id, matches); + if (matches) { + matchingIds.set(id); + } return matches; } }