From 55f7dd4629b254301f0ade6552635e468c6c7d0d Mon Sep 17 00:00:00 2001 From: binlijin Date: Fri, 22 Jan 2016 10:40:09 +0800 Subject: [PATCH] optimize create inverted indexes --- .../java/io/druid/segment/IndexMerger.java | 98 ++++++++++++++++--- .../java/io/druid/segment/IndexMergerV9.java | 49 ++++++---- .../io/druid/segment/IndexableAdapter.java | 3 +- .../QueryableIndexIndexableAdapter.java | 79 ++------------- .../segment/RowboatFilteringIndexAdapter.java | 5 +- .../segment/column/BitmapIndexSeeker.java | 30 ------ .../column/EmptyBitmapIndexSeeker.java | 34 ------- .../druid/segment/data/EmptyIndexedInts.java | 6 +- .../incremental/IncrementalIndexAdapter.java | 44 +++------ .../io/druid/segment/IndexMergerTest.java | 27 +++++ .../QueryableIndexIndexableAdapterTest.java | 40 ++------ .../IncrementalIndexAdapterTest.java | 26 +---- 12 files changed, 184 insertions(+), 257 deletions(-) delete mode 100644 processing/src/main/java/io/druid/segment/column/BitmapIndexSeeker.java delete mode 100644 processing/src/main/java/io/druid/segment/column/EmptyBitmapIndexSeeker.java diff --git a/processing/src/main/java/io/druid/segment/IndexMerger.java b/processing/src/main/java/io/druid/segment/IndexMerger.java index 3be34d6360c5..ffaf43d80baa 100644 --- a/processing/src/main/java/io/druid/segment/IndexMerger.java +++ b/processing/src/main/java/io/druid/segment/IndexMerger.java @@ -56,7 +56,6 @@ import io.druid.common.utils.JodaUtils; import io.druid.common.utils.SerializerUtils; import io.druid.query.aggregation.AggregatorFactory; -import io.druid.segment.column.BitmapIndexSeeker; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilitiesImpl; import io.druid.segment.column.ValueType; @@ -970,19 +969,28 @@ public Rowboat apply(@Nullable Rowboat input) tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory); } - BitmapIndexSeeker[] bitmapIndexSeeker = new BitmapIndexSeeker[indexes.size()]; + DictIdSeeker[] dictIdSeeker = new DictIdSeeker[indexes.size()]; for (int j = 0; j < indexes.size(); j++) { - bitmapIndexSeeker[j] = indexes.get(j).getBitmapIndexSeeker(dimension); + IntBuffer dimConversion = dimConversions.get(j).get(dimension); + if (dimConversion != null) { + dictIdSeeker[j] = new DictIdSeeker((IntBuffer) dimConversion.asReadOnlyBuffer().rewind()); + } else { + dictIdSeeker[j] = new DictIdSeeker(null); + } } - for (String dimVal : IndexedIterable.create(dimVals)) { + //Iterate all dim values's dictionary id in ascending order which in line with dim values's compare result. + for (int dictId = 0; dictId < dimVals.size(); dictId++) { progress.progress(); List> convertedInverteds = Lists.newArrayListWithCapacity(indexes.size()); for (int j = 0; j < indexes.size(); ++j) { - convertedInverteds.add( - new ConvertingIndexedInts( - bitmapIndexSeeker[j].seek(dimVal), rowNumConversions.get(j) - ) - ); + int seekedDictId = dictIdSeeker[j].seek(dictId); + if (seekedDictId != DictIdSeeker.NOT_EXIST) { + convertedInverteds.add( + new ConvertingIndexedInts( + indexes.get(j).getBitmapIndex(dimension, seekedDictId), rowNumConversions.get(j) + ) + ); + } } MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap(); @@ -999,13 +1007,16 @@ public Rowboat apply(@Nullable Rowboat input) bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset) ); - if (isSpatialDim && dimVal != null) { - List stringCoords = Lists.newArrayList(SPLITTER.split(dimVal)); - float[] coords = new float[stringCoords.size()]; - for (int j = 0; j < coords.length; j++) { - coords[j] = Float.valueOf(stringCoords.get(j)); + if (isSpatialDim) { + String dimVal = dimVals.get(dictId); + if (dimVal != null) { + List stringCoords = Lists.newArrayList(SPLITTER.split(dimVal)); + float[] coords = new float[stringCoords.size()]; + for (int j = 0; j < coords.length; j++) { + coords[j] = Float.valueOf(stringCoords.get(j)); + } + tree.insert(coords, bitset); } - tree.insert(coords, bitset); } } writer.close(); @@ -1193,6 +1204,63 @@ public IntBuffer getConversionBuffer() } } + /** + * Get old dictId from new dictId, and only support access in order + */ + public static class DictIdSeeker + { + static final int NOT_EXIST = -1; + static final int NOT_INIT = -1; + private final IntBuffer dimConversions; + private int currIndex; + private int currVal; + private int lastVal; + + DictIdSeeker( + IntBuffer dimConversions + ) + { + this.dimConversions = dimConversions; + this.currIndex = 0; + this.currVal = NOT_INIT; + this.lastVal = NOT_INIT; + } + + public int seek(int dictId) + { + if (dimConversions == null) { + return NOT_EXIST; + } + if (lastVal != NOT_INIT) { + if (dictId <= lastVal) { + throw new ISE("Value dictId[%d] is less than the last value dictId[%d] I have, cannot be.", + dictId, lastVal + ); + } + return NOT_EXIST; + } + if (currVal == NOT_INIT) { + currVal = dimConversions.get(); + } + if (currVal == dictId) { + int ret = currIndex; + ++currIndex; + if (dimConversions.hasRemaining()) { + currVal = dimConversions.get(); + } else { + lastVal = dictId; + } + return ret; + } else if (currVal < dictId) { + throw new ISE("Skipped currValue dictId[%d], currIndex[%d]; incoming value dictId[%d]", + currVal, currIndex, dictId + ); + } else { + return NOT_EXIST; + } + } + } + public static class ConvertingIndexedInts implements Iterable { private final IndexedInts baseIndex; diff --git a/processing/src/main/java/io/druid/segment/IndexMergerV9.java b/processing/src/main/java/io/druid/segment/IndexMergerV9.java index a5231902a3fe..9154901d124f 100644 --- a/processing/src/main/java/io/druid/segment/IndexMergerV9.java +++ b/processing/src/main/java/io/druid/segment/IndexMergerV9.java @@ -43,7 +43,6 @@ import io.druid.collections.CombiningIterable; import io.druid.common.utils.JodaUtils; import io.druid.query.aggregation.AggregatorFactory; -import io.druid.segment.column.BitmapIndexSeeker; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilitiesImpl; @@ -59,7 +58,6 @@ import io.druid.segment.data.IOPeon; import io.druid.segment.data.Indexed; import io.druid.segment.data.IndexedIntsWriter; -import io.druid.segment.data.IndexedIterable; import io.druid.segment.data.IndexedRTree; import io.druid.segment.data.TmpFileIOPeon; import io.druid.segment.data.VSizeIndexedIntsWriter; @@ -220,7 +218,7 @@ public Metadata apply(IndexableAdapter input) ); makeInvertedIndexes( adapters, progress, mergedDimensions, indexSpec, v9TmpDir, rowNumConversions, - nullRowsList, dimValueWriters, bitmapIndexWriters, spatialIndexWriters + nullRowsList, dimValueWriters, bitmapIndexWriters, spatialIndexWriters, dimConversions ); /************ Finalize Build Columns *************/ @@ -499,7 +497,8 @@ private void makeInvertedIndexes( final ArrayList nullRowsList, final ArrayList> dimValueWriters, final ArrayList> bitmapIndexWriters, - final ArrayList> spatialIndexWriters + final ArrayList> spatialIndexWriters, + final ArrayList> dimConversions ) throws IOException { final String section = "build inverted index"; @@ -527,24 +526,33 @@ private void makeInvertedIndexes( tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory); } - BitmapIndexSeeker[] bitmapIndexSeeker = new BitmapIndexSeeker[adapters.size()]; + DictIdSeeker[] dictIdSeeker = new DictIdSeeker[adapters.size()]; for (int j = 0; j < adapters.size(); j++) { - bitmapIndexSeeker[j] = adapters.get(j).getBitmapIndexSeeker(dimension); + IntBuffer dimConversion = dimConversions.get(j).get(dimension); + if (dimConversion != null) { + dictIdSeeker[j] = new DictIdSeeker((IntBuffer)dimConversion.asReadOnlyBuffer().rewind()); + } else { + dictIdSeeker[j] = new DictIdSeeker(null); + } } ImmutableBitmap nullRowBitmap = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap( nullRowsList.get(dimIndex) ); - for (String dimVal : IndexedIterable.create(dimVals)) { + //Iterate all dim values's dictionary id in ascending order which in line with dim values's compare result. + for (int dictId = 0; dictId < dimVals.size(); dictId++) { progress.progress(); List> convertedInverteds = Lists.newArrayListWithCapacity(adapters.size()); for (int j = 0; j < adapters.size(); ++j) { - convertedInverteds.add( - new ConvertingIndexedInts( - bitmapIndexSeeker[j].seek(dimVal), rowNumConversions.get(j) - ) - ); + int seekedDictId = dictIdSeeker[j].seek(dictId); + if (seekedDictId != DictIdSeeker.NOT_EXIST) { + convertedInverteds.add( + new ConvertingIndexedInts( + adapters.get(j).getBitmapIndex(dimension, seekedDictId), rowNumConversions.get(j) + ) + ); + } } MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap(); @@ -558,19 +566,22 @@ private void makeInvertedIndexes( } ImmutableBitmap bitmapToWrite = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset); - if (dimVal == null) { + if ((dictId == 0) && (Iterables.getFirst(dimVals, "") == null)) { bitmapIndexWriters.get(dimIndex).write(nullRowBitmap.union(bitmapToWrite)); } else { bitmapIndexWriters.get(dimIndex).write(bitmapToWrite); } - if (spatialIndexWriter != null && dimVal != null) { - List stringCoords = Lists.newArrayList(SPLITTER.split(dimVal)); - float[] coords = new float[stringCoords.size()]; - for (int j = 0; j < coords.length; j++) { - coords[j] = Float.valueOf(stringCoords.get(j)); + if (spatialIndexWriter != null) { + String dimVal = dimVals.get(dictId); + if (dimVal != null) { + List stringCoords = Lists.newArrayList(SPLITTER.split(dimVal)); + float[] coords = new float[stringCoords.size()]; + for (int j = 0; j < coords.length; j++) { + coords[j] = Float.valueOf(stringCoords.get(j)); + } + tree.insert(coords, bitset); } - tree.insert(coords, bitset); } } if (spatialIndexWriter != null) { diff --git a/processing/src/main/java/io/druid/segment/IndexableAdapter.java b/processing/src/main/java/io/druid/segment/IndexableAdapter.java index 8ca187d61946..6b9cae56a3f9 100644 --- a/processing/src/main/java/io/druid/segment/IndexableAdapter.java +++ b/processing/src/main/java/io/druid/segment/IndexableAdapter.java @@ -19,7 +19,6 @@ package io.druid.segment; -import io.druid.segment.column.BitmapIndexSeeker; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.data.Indexed; import io.druid.segment.data.IndexedInts; @@ -44,7 +43,7 @@ public interface IndexableAdapter IndexedInts getBitmapIndex(String dimension, String value); - BitmapIndexSeeker getBitmapIndexSeeker(String dimension); + IndexedInts getBitmapIndex(String dimension, int dictId); String getMetricType(String metric); diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexIndexableAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexIndexableAdapter.java index d05c0e95c6dd..b18727511e29 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexIndexableAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexIndexableAdapter.java @@ -27,12 +27,10 @@ import com.metamx.common.guava.CloseQuietly; import com.metamx.common.logger.Logger; import io.druid.segment.column.BitmapIndex; -import io.druid.segment.column.BitmapIndexSeeker; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ComplexColumn; import io.druid.segment.column.DictionaryEncodedColumn; -import io.druid.segment.column.EmptyBitmapIndexSeeker; import io.druid.segment.column.GenericColumn; import io.druid.segment.column.IndexedFloatsGenericColumn; import io.druid.segment.column.IndexedLongsGenericColumn; @@ -40,7 +38,6 @@ import io.druid.segment.data.ArrayBasedIndexedInts; import io.druid.segment.data.BitmapCompressedIndexedInts; import io.druid.segment.data.EmptyIndexedInts; -import io.druid.segment.data.GenericIndexed; import io.druid.segment.data.Indexed; import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedIterable; @@ -303,12 +300,12 @@ public IndexedInts getBitmapIndex(String dimension, String value) final Column column = input.getColumn(dimension); if (column == null) { - return new EmptyIndexedInts(); + return EmptyIndexedInts.EMPTY_INDEXED_INTS; } final BitmapIndex bitmaps = column.getBitmapIndex(); if (bitmaps == null) { - return new EmptyIndexedInts(); + return EmptyIndexedInts.EMPTY_INDEXED_INTS; } return new BitmapCompressedIndexedInts(bitmaps.getBitmap(value)); @@ -339,79 +336,23 @@ public ColumnCapabilities getCapabilities(String column) } @Override - public BitmapIndexSeeker getBitmapIndexSeeker(String dimension) + public IndexedInts getBitmapIndex(String dimension, int dictId) { final Column column = input.getColumn(dimension); - if (column == null) { - return new EmptyBitmapIndexSeeker(); + return EmptyIndexedInts.EMPTY_INDEXED_INTS; } final BitmapIndex bitmaps = column.getBitmapIndex(); if (bitmaps == null) { - return new EmptyBitmapIndexSeeker(); + return EmptyIndexedInts.EMPTY_INDEXED_INTS; } - final Indexed dimSet = getDimValueLookup(dimension); - - // BitmapIndexSeeker is the main performance boost comes from. - // In the previous version of index merge, during the creation of invert index, we do something like - // merge sort of multiply bitmap indexes. It simply iterator all the previous sorted values, - // and "binary find" the id in each bitmap indexes, which involves disk IO and is really slow. - // Suppose we have N (which is 100 in our test) small segments, each have M (which is 50000 in our case) rows. - // In high cardinality scenario, we will almost have N * M uniq values. So the complexity will be O(N * M * M * LOG(M)). - - // There are 2 properties we did not use during the merging: - // 1. We always travel the dimension values sequentially - // 2. One single dimension value is valid only in one index when cardinality is high enough - // So we introduced the BitmapIndexSeeker, which can only seek value sequentially and can never seek back. - // By using this and the help of "getDimValueLookup", we only need to translate all dimension value to its ID once, - // and the translation is done by self-increase of the integer. We only need to change the CACHED value once after - // previous value is hit, renew the value and increase the ID. The complexity now is O(N * M * LOG(M)). - return new BitmapIndexSeeker() - { - private int currIndex = 0; - private String currVal = null; - private String lastVal = null; - - @Override - public IndexedInts seek(String value) - { - if (dimSet == null || dimSet.size() == 0) { - return new EmptyIndexedInts(); - } - if (lastVal != null) { - if (GenericIndexed.STRING_STRATEGY.compare(value, lastVal) <= 0) { - throw new ISE( - "Value[%s] is less than the last value[%s] I have, cannot be.", - value, lastVal - ); - } - return new EmptyIndexedInts(); - } - if (currVal == null) { - currVal = dimSet.get(currIndex); - } - int compareResult = GenericIndexed.STRING_STRATEGY.compare(currVal, value); - if (compareResult == 0) { - IndexedInts ret = new BitmapCompressedIndexedInts(bitmaps.getBitmap(currIndex)); - ++currIndex; - if (currIndex == dimSet.size()) { - lastVal = value; - } else { - currVal = dimSet.get(currIndex); - } - return ret; - } else if (compareResult < 0) { - throw new ISE( - "Skipped currValue[%s], currIndex[%,d]; incoming value[%s]", - currVal, currIndex, value - ); - } else { - return new EmptyIndexedInts(); - } - } - }; + if (dictId >= 0) { + return new BitmapCompressedIndexedInts(bitmaps.getBitmap(dictId)); + } else { + return EmptyIndexedInts.EMPTY_INDEXED_INTS; + } } @Override diff --git a/processing/src/main/java/io/druid/segment/RowboatFilteringIndexAdapter.java b/processing/src/main/java/io/druid/segment/RowboatFilteringIndexAdapter.java index 892ca62fb4f5..ec77ab78ecc8 100644 --- a/processing/src/main/java/io/druid/segment/RowboatFilteringIndexAdapter.java +++ b/processing/src/main/java/io/druid/segment/RowboatFilteringIndexAdapter.java @@ -21,7 +21,6 @@ import com.google.common.base.Predicate; import com.google.common.collect.Iterables; -import io.druid.segment.column.BitmapIndexSeeker; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.data.Indexed; import io.druid.segment.data.IndexedInts; @@ -95,9 +94,9 @@ public ColumnCapabilities getCapabilities(String column) } @Override - public BitmapIndexSeeker getBitmapIndexSeeker(String dimension) + public IndexedInts getBitmapIndex(String dimension, int dictId) { - return baseAdapter.getBitmapIndexSeeker(dimension); + return baseAdapter.getBitmapIndex(dimension, dictId); } @Override diff --git a/processing/src/main/java/io/druid/segment/column/BitmapIndexSeeker.java b/processing/src/main/java/io/druid/segment/column/BitmapIndexSeeker.java deleted file mode 100644 index 12be528d8039..000000000000 --- a/processing/src/main/java/io/druid/segment/column/BitmapIndexSeeker.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to Metamarkets Group Inc. (Metamarkets) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Metamarkets licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package io.druid.segment.column; - -import io.druid.segment.data.IndexedInts; - -/** - * Only support access in order - */ -public interface BitmapIndexSeeker -{ - public IndexedInts seek(String value); -} diff --git a/processing/src/main/java/io/druid/segment/column/EmptyBitmapIndexSeeker.java b/processing/src/main/java/io/druid/segment/column/EmptyBitmapIndexSeeker.java deleted file mode 100644 index 1900d25ac05b..000000000000 --- a/processing/src/main/java/io/druid/segment/column/EmptyBitmapIndexSeeker.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to Metamarkets Group Inc. (Metamarkets) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Metamarkets licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package io.druid.segment.column; - -import io.druid.segment.data.EmptyIndexedInts; -import io.druid.segment.data.IndexedInts; - -public class EmptyBitmapIndexSeeker implements BitmapIndexSeeker -{ - - @Override - public IndexedInts seek(String value) - { - return new EmptyIndexedInts(); - } - -} diff --git a/processing/src/main/java/io/druid/segment/data/EmptyIndexedInts.java b/processing/src/main/java/io/druid/segment/data/EmptyIndexedInts.java index e9ab31172bd6..e6e7d6a68c95 100644 --- a/processing/src/main/java/io/druid/segment/data/EmptyIndexedInts.java +++ b/processing/src/main/java/io/druid/segment/data/EmptyIndexedInts.java @@ -28,7 +28,11 @@ */ public class EmptyIndexedInts implements IndexedInts { - public static EmptyIndexedInts instance = new EmptyIndexedInts(); + public static final EmptyIndexedInts EMPTY_INDEXED_INTS = new EmptyIndexedInts(); + + private EmptyIndexedInts() + { + } @Override public int size() diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java index 02406bd9b22e..814252cb9e24 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java @@ -26,16 +26,12 @@ import com.google.common.collect.Sets; import com.metamx.collections.bitmap.BitmapFactory; import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.common.ISE; import com.metamx.common.logger.Logger; import io.druid.segment.IndexableAdapter; import io.druid.segment.Metadata; import io.druid.segment.Rowboat; -import io.druid.segment.column.BitmapIndexSeeker; import io.druid.segment.column.ColumnCapabilities; -import io.druid.segment.column.EmptyBitmapIndexSeeker; import io.druid.segment.data.EmptyIndexedInts; -import io.druid.segment.data.GenericIndexed; import io.druid.segment.data.Indexed; import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedIterable; @@ -276,13 +272,13 @@ public IndexedInts getBitmapIndex(String dimension, String value) Map dimInverted = invertedIndexes.get(dimension); if (dimInverted == null) { - return new EmptyIndexedInts(); + return EmptyIndexedInts.EMPTY_INDEXED_INTS; } final MutableBitmap bitmapIndex = dimInverted.get(value); if (bitmapIndex == null) { - return new EmptyIndexedInts(); + return EmptyIndexedInts.EMPTY_INDEXED_INTS; } return new BitmapIndexedInts(bitmapIndex); @@ -301,35 +297,17 @@ public ColumnCapabilities getCapabilities(String column) } @Override - public BitmapIndexSeeker getBitmapIndexSeeker(String dimension) + public IndexedInts getBitmapIndex(String dimension, int dictId) { - final Map dimInverted = invertedIndexes.get(dimension); - if (dimInverted == null) { - return new EmptyBitmapIndexSeeker(); + if (dictId >= 0) { + final Indexed dimValues = getDimValueLookup(dimension); + //NullValueConverterDimDim will convert empty to null, we need convert it back to the actual values, + //because getBitmapIndex relies on the actual values stored in DimDim. + String value = Strings.nullToEmpty(dimValues.get(dictId)); + return getBitmapIndex(dimension, value); + } else { + return EmptyIndexedInts.EMPTY_INDEXED_INTS; } - - return new BitmapIndexSeeker() - { - private String lastVal = null; - - @Override - public IndexedInts seek(String value) - { - if (value != null && GenericIndexed.STRING_STRATEGY.compare(value, lastVal) <= 0) { - throw new ISE( - "Value[%s] is less than the last value[%s] I have, cannot be.", - value, lastVal - ); - } - value = Strings.nullToEmpty(value); - lastVal = value; - final MutableBitmap bitmapIndex = dimInverted.get(value); - if (bitmapIndex == null) { - return new EmptyIndexedInts(); - } - return new BitmapIndexedInts(bitmapIndex); - } - }; } private boolean hasNullValue(String[] dimValues) diff --git a/processing/src/test/java/io/druid/segment/IndexMergerTest.java b/processing/src/test/java/io/druid/segment/IndexMergerTest.java index a9dee9a84780..b32051760c87 100644 --- a/processing/src/test/java/io/druid/segment/IndexMergerTest.java +++ b/processing/src/test/java/io/druid/segment/IndexMergerTest.java @@ -26,8 +26,10 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import com.google.common.primitives.Ints; import com.metamx.collections.bitmap.RoaringBitmapFactory; import com.metamx.common.IAE; +import com.metamx.common.ISE; import io.druid.data.input.MapBasedInputRow; import io.druid.data.input.impl.DimensionsSpec; import io.druid.granularity.QueryGranularity; @@ -60,6 +62,8 @@ import java.io.File; import java.io.IOException; import java.lang.reflect.Field; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -1656,4 +1660,27 @@ private AggregatorFactory[] getCombiningAggregators(AggregatorFactory[] aggregat } return combiningAggregators; } + + @Test + public void testDictIdSeeker() throws Exception + { + IntBuffer dimConversions = ByteBuffer.allocateDirect(3 * Ints.BYTES).asIntBuffer(); + dimConversions.put(0); + dimConversions.put(2); + dimConversions.put(4); + IndexMerger.DictIdSeeker dictIdSeeker = new IndexMerger.DictIdSeeker((IntBuffer) dimConversions.asReadOnlyBuffer().rewind()); + Assert.assertEquals(0, dictIdSeeker.seek(0)); + Assert.assertEquals(-1, dictIdSeeker.seek(1)); + Assert.assertEquals(1, dictIdSeeker.seek(2)); + try { + dictIdSeeker.seek(5); + Assert.fail("Only support access in order"); + } + catch (ISE ise) { + Assert.assertTrue("Only support access in order", true); + } + Assert.assertEquals(-1, dictIdSeeker.seek(3)); + Assert.assertEquals(2, dictIdSeeker.seek(4)); + Assert.assertEquals(-1, dictIdSeeker.seek(5)); + } } diff --git a/processing/src/test/java/io/druid/segment/QueryableIndexIndexableAdapterTest.java b/processing/src/test/java/io/druid/segment/QueryableIndexIndexableAdapterTest.java index 5e298a568ee5..0d3e98181a25 100644 --- a/processing/src/test/java/io/druid/segment/QueryableIndexIndexableAdapterTest.java +++ b/processing/src/test/java/io/druid/segment/QueryableIndexIndexableAdapterTest.java @@ -19,14 +19,11 @@ package io.druid.segment; -import com.metamx.common.ISE; -import io.druid.segment.column.BitmapIndexSeeker; import io.druid.segment.data.CompressedObjectStrategy; import io.druid.segment.data.ConciseBitmapSerdeFactory; import io.druid.segment.data.IncrementalIndexTest; import io.druid.segment.data.IndexedInts; import io.druid.segment.incremental.IncrementalIndex; -import io.druid.segment.incremental.IncrementalIndexAdapter; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -34,7 +31,8 @@ import java.io.File; -public class QueryableIndexIndexableAdapterTest { +public class QueryableIndexIndexableAdapterTest +{ private final static IndexMerger INDEX_MERGER = TestHelper.getTestIndexMerger(); private final static IndexIO INDEX_IO = TestHelper.getTestIndexIO(); private static final IndexSpec INDEX_SPEC = IndexMergerTest.makeIndexSpec( @@ -47,21 +45,15 @@ public class QueryableIndexIndexableAdapterTest { public final TemporaryFolder temporaryFolder = new TemporaryFolder(); @Rule public final CloserRule closer = new CloserRule(false); - + @Test - public void testGetBitmapIndexSeeker() throws Exception + public void testGetBitmapIndex() throws Exception { final long timestamp = System.currentTimeMillis(); IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null); IncrementalIndexTest.populateIndex(timestamp, toPersist); final File tempDir = temporaryFolder.newFolder(); - final IndexableAdapter incrementalAdapter = new IncrementalIndexAdapter( - toPersist.getInterval(), - toPersist, - INDEX_SPEC.getBitmapSerdeFactory().getBitmapFactory() - ); - QueryableIndex index = closer.closeLater( INDEX_IO.loadIndex( INDEX_MERGER.persist( @@ -73,24 +65,12 @@ public void testGetBitmapIndexSeeker() throws Exception ); IndexableAdapter adapter = new QueryableIndexIndexableAdapter(index); - BitmapIndexSeeker bitmapIndexSeeker = adapter.getBitmapIndexSeeker("dim1"); - IndexedInts indexedIntsNull = bitmapIndexSeeker.seek(null); - Assert.assertEquals(0, indexedIntsNull.size()); - IndexedInts indexedInts0 = bitmapIndexSeeker.seek("0"); - Assert.assertEquals(0, indexedInts0.size()); - IndexedInts indexedInts1 = bitmapIndexSeeker.seek("1"); - Assert.assertEquals(1, indexedInts1.size()); - try { - bitmapIndexSeeker.seek("4"); - Assert.assertFalse("Only support access in order", true); - } catch(ISE ise) { - Assert.assertTrue("Only support access in order", true); + String dimension = "dim1"; + //null is added to all dimensions with value + IndexedInts indexedInts = adapter.getBitmapIndex(dimension, 0); + for (int i = 0; i < adapter.getDimValueLookup(dimension).size(); i++) { + indexedInts = adapter.getBitmapIndex(dimension, i); + Assert.assertEquals(1, indexedInts.size()); } - IndexedInts indexedInts2 = bitmapIndexSeeker.seek("2"); - Assert.assertEquals(0, indexedInts2.size()); - IndexedInts indexedInts3 = bitmapIndexSeeker.seek("3"); - Assert.assertEquals(1, indexedInts3.size()); - IndexedInts indexedInts4 = bitmapIndexSeeker.seek("4"); - Assert.assertEquals(0, indexedInts4.size()); } } diff --git a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexAdapterTest.java b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexAdapterTest.java index ed9bfdb11d1e..710d852aeef4 100644 --- a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexAdapterTest.java +++ b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexAdapterTest.java @@ -22,7 +22,6 @@ import io.druid.segment.IndexSpec; import io.druid.segment.IndexableAdapter; import io.druid.segment.Rowboat; -import io.druid.segment.column.BitmapIndexSeeker; import io.druid.segment.data.CompressedObjectStrategy; import io.druid.segment.data.ConciseBitmapSerdeFactory; import io.druid.segment.data.IncrementalIndexTest; @@ -31,8 +30,6 @@ import org.junit.Assert; import org.junit.Test; -import com.metamx.common.ISE; - import java.util.ArrayList; import java.util.List; @@ -45,7 +42,7 @@ public class IncrementalIndexAdapterTest ); @Test - public void testGetBitmapIndexSeeker() throws Exception + public void testGetBitmapIndex() throws Exception { final long timestamp = System.currentTimeMillis(); IncrementalIndex incrementalIndex = IncrementalIndexTest.createIndex(null); @@ -55,24 +52,11 @@ public void testGetBitmapIndexSeeker() throws Exception incrementalIndex, INDEX_SPEC.getBitmapSerdeFactory().getBitmapFactory() ); - BitmapIndexSeeker bitmapIndexSeeker = adapter.getBitmapIndexSeeker("dim1"); - IndexedInts indexedInts0 = bitmapIndexSeeker.seek("0"); - Assert.assertEquals(0, indexedInts0.size()); - IndexedInts indexedInts1 = bitmapIndexSeeker.seek("1"); - Assert.assertEquals(1, indexedInts1.size()); - try { - bitmapIndexSeeker.seek("01"); - Assert.assertFalse("Only support access in order", true); - } - catch (ISE ise) { - Assert.assertTrue("Only support access in order", true); + String dimension = "dim1"; + for (int i = 0; i < adapter.getDimValueLookup(dimension).size(); i++) { + IndexedInts indexedInts = adapter.getBitmapIndex(dimension, i); + Assert.assertEquals(1, indexedInts.size()); } - IndexedInts indexedInts2 = bitmapIndexSeeker.seek("2"); - Assert.assertEquals(0, indexedInts2.size()); - IndexedInts indexedInts3 = bitmapIndexSeeker.seek("3"); - Assert.assertEquals(1, indexedInts3.size()); - IndexedInts indexedInts4 = bitmapIndexSeeker.seek("4"); - Assert.assertEquals(0, indexedInts4.size()); } @Test