diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 8db528560e40..15acc4ce7abc 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -42,6 +42,7 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ComplexColumn; +import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.serde.ComplexMetricSerde; @@ -194,30 +195,38 @@ private ColumnAnalysis analyzeStringColumn( final ColumnHolder columnHolder ) { - long size = 0; - Comparable min = null; Comparable max = null; - - if (!capabilities.hasBitmapIndexes()) { - return ColumnAnalysis.error("string_no_bitmap"); - } - - final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex(); - final int cardinality = bitmapIndex.getCardinality(); - - if (analyzingSize()) { - for (int i = 0; i < cardinality; ++i) { - String value = bitmapIndex.getValue(i); - if (value != null) { - size += StringUtils.estimatedBinaryLengthAsUTF8(value) * bitmapIndex.getBitmap(bitmapIndex.getIndex(value)).size(); + long size = 0; + final int cardinality; + if (capabilities.hasBitmapIndexes()) { + final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex(); + cardinality = bitmapIndex.getCardinality(); + + if (analyzingSize()) { + for (int i = 0; i < cardinality; ++i) { + String value = bitmapIndex.getValue(i); + if (value != null) { + size += StringUtils.estimatedBinaryLengthAsUTF8(value) * bitmapIndex.getBitmap(bitmapIndex.getIndex(value)) + .size(); + } } } - } - if (analyzingMinMax() && cardinality > 0) { - min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0)); - max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1)); + if (analyzingMinMax() && cardinality > 0) { + min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0)); + max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1)); + } + } else if (capabilities.isDictionaryEncoded()) { + // fallback if no bitmap index + DictionaryEncodedColumn theColumn = (DictionaryEncodedColumn) columnHolder.getColumn(); + cardinality = theColumn.getCardinality(); + if (analyzingMinMax() && cardinality > 0) { + min = NullHandling.nullToEmptyIfNeeded(theColumn.lookupName(0)); + max = NullHandling.nullToEmptyIfNeeded(theColumn.lookupName(cardinality - 1)); + } + } else { + cardinality = 0; } return new ColumnAnalysis( diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 31e70d376644..5c613acf658a 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -83,10 +83,16 @@ public class SegmentMetadataQueryTest public static QueryRunner makeMMappedQueryRunner( SegmentId segmentId, boolean rollup, + boolean bitmaps, QueryRunnerFactory factory ) { - QueryableIndex index = rollup ? TestIndex.getMMappedTestIndex() : TestIndex.getNoRollupMMappedTestIndex(); + QueryableIndex index; + if (bitmaps) { + index = rollup ? TestIndex.getMMappedTestIndex() : TestIndex.getNoRollupMMappedTestIndex(); + } else { + index = TestIndex.getNoBitmapMMappedTestIndex(); + } return QueryRunnerTestHelper.makeQueryRunner( factory, segmentId, @@ -99,10 +105,16 @@ public static QueryRunner makeMMappedQueryRunner( public static QueryRunner makeIncrementalIndexQueryRunner( SegmentId segmentId, boolean rollup, + boolean bitmaps, QueryRunnerFactory factory ) { - IncrementalIndex index = rollup ? TestIndex.getIncrementalTestIndex() : TestIndex.getNoRollupIncrementalTestIndex(); + IncrementalIndex index; + if (bitmaps) { + index = rollup ? TestIndex.getIncrementalTestIndex() : TestIndex.getNoRollupIncrementalTestIndex(); + } else { + index = TestIndex.getNoBitmapIncrementalTestIndex(); + } return QueryRunnerTestHelper.makeQueryRunner( factory, segmentId, @@ -121,17 +133,19 @@ public static QueryRunner makeIncrementalIndexQueryRunner( private final SegmentMetadataQuery testQuery; private final SegmentAnalysis expectedSegmentAnalysis1; private final SegmentAnalysis expectedSegmentAnalysis2; + private final boolean bitmaps; - @Parameterized.Parameters(name = "mmap1 = {0}, mmap2 = {1}, rollup1 = {2}, rollup2 = {3}, differentIds = {4}") + @Parameterized.Parameters(name = "mmap1 = {0}, mmap2 = {1}, rollup1 = {2}, rollup2 = {3}, differentIds = {4}, bitmaps={5}") public static Collection constructorFeeder() { return ImmutableList.of( - new Object[]{true, true, true, true, false}, - new Object[]{true, false, true, false, false}, - new Object[]{false, true, true, false, false}, - new Object[]{false, false, false, false, false}, - new Object[]{false, false, true, true, false}, - new Object[]{false, false, false, true, true} + new Object[]{true, true, true, true, false, true}, + new Object[]{true, false, true, false, false, true}, + new Object[]{false, true, true, false, false, true}, + new Object[]{false, false, false, false, false, true}, + new Object[]{false, false, true, true, false, true}, + new Object[]{false, false, false, true, true, true}, + new Object[]{true, true, false, false, false, false} ); } @@ -140,22 +154,24 @@ public SegmentMetadataQueryTest( boolean mmap2, boolean rollup1, boolean rollup2, - boolean differentIds + boolean differentIds, + boolean bitmaps ) { final SegmentId id1 = SegmentId.dummy(differentIds ? "testSegment1" : "testSegment"); final SegmentId id2 = SegmentId.dummy(differentIds ? "testSegment2" : "testSegment"); this.runner1 = mmap1 - ? makeMMappedQueryRunner(id1, rollup1, FACTORY) - : makeIncrementalIndexQueryRunner(id1, rollup1, FACTORY); + ? makeMMappedQueryRunner(id1, rollup1, bitmaps, FACTORY) + : makeIncrementalIndexQueryRunner(id1, rollup1, bitmaps, FACTORY); this.runner2 = mmap2 - ? makeMMappedQueryRunner(id2, rollup2, FACTORY) - : makeIncrementalIndexQueryRunner(id2, rollup2, FACTORY); + ? makeMMappedQueryRunner(id2, rollup2, bitmaps, FACTORY) + : makeIncrementalIndexQueryRunner(id2, rollup2, bitmaps, FACTORY); this.mmap1 = mmap1; this.mmap2 = mmap2; this.rollup1 = rollup1; this.rollup2 = rollup2; this.differentIds = differentIds; + this.bitmaps = bitmaps; testQuery = Druids.newSegmentMetadataQueryBuilder() .dataSource("testing") .intervals("2013/2014") @@ -169,6 +185,16 @@ public SegmentMetadataQueryTest( .merge(true) .build(); + int preferedSize1 = 0; + int placementSize2 = 0; + int overallSize1 = 119691; + int overallSize2 = 119691; + if (bitmaps) { + preferedSize1 = mmap1 ? 10881 : 10764; + placementSize2 = mmap2 ? 10881 : 0; + overallSize1 = mmap1 ? 167493 : 168188; + overallSize2 = mmap2 ? 167493 : 168188; + } expectedSegmentAnalysis1 = new SegmentAnalysis( id1.toString(), ImmutableList.of(Intervals.of("2011-01-12T00:00:00.000Z/2011-04-15T00:00:00.001Z")), @@ -187,7 +213,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.STRING.toString(), false, - mmap1 ? 10881 : 10764, + preferedSize1, 1, "preferred", "preferred", @@ -203,7 +229,7 @@ public SegmentMetadataQueryTest( null, null ) - ), mmap1 ? 167493 : 168188, + ), overallSize1, 1209, null, null, @@ -228,7 +254,7 @@ public SegmentMetadataQueryTest( new ColumnAnalysis( ValueType.STRING.toString(), false, - mmap2 ? 10881 : 0, + placementSize2, 1, null, null, @@ -245,7 +271,7 @@ public SegmentMetadataQueryTest( null ) // null_column will be included only for incremental index, which makes a little bigger result than expected - ), mmap2 ? 167493 : 168188, + ), overallSize2, 1209, null, null, @@ -470,10 +496,16 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() @Test public void testSegmentMetadataQueryWithDefaultAnalysisMerge() { + int size1 = 0; + int size2 = 0; + if (bitmaps) { + size1 = mmap1 ? 10881 : 10764; + size2 = mmap2 ? 10881 : 10764; + } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), false, - (mmap1 ? 10881 : 10764) + (mmap2 ? 10881 : 10764), + size1 + size2, 1, "preferred", "preferred", @@ -485,10 +517,16 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge() @Test public void testSegmentMetadataQueryWithDefaultAnalysisMerge2() { + int size1 = 0; + int size2 = 0; + if (bitmaps) { + size1 = mmap1 ? 6882 : 6808; + size2 = mmap2 ? 6882 : 6808; + } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), false, - (mmap1 ? 6882 : 6808) + (mmap2 ? 6882 : 6808), + size1 + size2, 3, "spot", "upfront", @@ -500,10 +538,16 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge2() @Test public void testSegmentMetadataQueryWithDefaultAnalysisMerge3() { + int size1 = 0; + int size2 = 0; + if (bitmaps) { + size1 = mmap1 ? 9765 : 9660; + size2 = mmap2 ? 9765 : 9660; + } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), false, - (mmap1 ? 9765 : 9660) + (mmap2 ? 9765 : 9660), + size1 + size2, 9, "automotive", "travel", diff --git a/processing/src/test/java/org/apache/druid/segment/TestIndex.java b/processing/src/test/java/org/apache/druid/segment/TestIndex.java index 72b52873386f..87d8abf11586 100644 --- a/processing/src/test/java/org/apache/druid/segment/TestIndex.java +++ b/processing/src/test/java/org/apache/druid/segment/TestIndex.java @@ -20,6 +20,7 @@ package org.apache.druid.segment; import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; import com.google.common.base.Throwables; import com.google.common.io.CharSource; import com.google.common.io.LineProcessor; @@ -111,12 +112,31 @@ public class TestIndex new StringDimensionSchema("null_column") ); + public static final List DIMENSION_SCHEMAS_NO_BITMAP = Arrays.asList( + new StringDimensionSchema("market", null, false), + new StringDimensionSchema("quality", null, false), + new LongDimensionSchema("qualityLong"), + new FloatDimensionSchema("qualityFloat"), + new DoubleDimensionSchema("qualityDouble"), + new StringDimensionSchema("qualityNumericString", null, false), + new StringDimensionSchema("placement", null, false), + new StringDimensionSchema("placementish", null, false), + new StringDimensionSchema("partial_null_column", null, false), + new StringDimensionSchema("null_column", null, false) + ); + public static final DimensionsSpec DIMENSIONS_SPEC = new DimensionsSpec( DIMENSION_SCHEMAS, null, null ); + public static final DimensionsSpec DIMENSIONS_SPEC_NO_BITMAPS = new DimensionsSpec( + DIMENSION_SCHEMAS_NO_BITMAP, + null, + null + ); + public static final String[] DOUBLE_METRICS = new String[]{"index", "indexMin", "indexMaxPlusTen"}; public static final String[] FLOAT_METRICS = new String[]{"indexFloat", "indexMinFloat", "indexMaxFloat"}; private static final Logger log = new Logger(TestIndex.class); @@ -147,107 +167,95 @@ public class TestIndex } } - private static IncrementalIndex realtimeIndex = null; - private static IncrementalIndex noRollupRealtimeIndex = null; - private static QueryableIndex mmappedIndex = null; - private static QueryableIndex noRollupMmappedIndex = null; - private static QueryableIndex mergedRealtime = null; + private static Supplier realtimeIndex = Suppliers.memoize( + () -> makeRealtimeIndex("druid.sample.numeric.tsv") + ); + private static Supplier noRollupRealtimeIndex = Suppliers.memoize( + () -> makeRealtimeIndex("druid.sample.numeric.tsv", false) + ); + private static Supplier noBitmapRealtimeIndex = Suppliers.memoize( + () -> makeRealtimeIndex("druid.sample.numeric.tsv", false, false) + ); + private static Supplier mmappedIndex = Suppliers.memoize( + () -> persistRealtimeAndLoadMMapped(realtimeIndex.get()) + ); + private static Supplier noRollupMmappedIndex = Suppliers.memoize( + () -> persistRealtimeAndLoadMMapped(noRollupRealtimeIndex.get()) + ); + private static Supplier noBitmapMmappedIndex = Suppliers.memoize( + () -> persistRealtimeAndLoadMMapped(noBitmapRealtimeIndex.get()) + ); + private static Supplier mergedRealtime = Suppliers.memoize(() -> { + try { + IncrementalIndex top = makeRealtimeIndex("druid.sample.numeric.tsv.top"); + IncrementalIndex bottom = makeRealtimeIndex("druid.sample.numeric.tsv.bottom"); + + File tmpFile = File.createTempFile("yay", "who"); + tmpFile.delete(); + + File topFile = new File(tmpFile, "top"); + File bottomFile = new File(tmpFile, "bottom"); + File mergedFile = new File(tmpFile, "merged"); + + topFile.mkdirs(); + topFile.deleteOnExit(); + bottomFile.mkdirs(); + bottomFile.deleteOnExit(); + mergedFile.mkdirs(); + mergedFile.deleteOnExit(); + + INDEX_MERGER.persist(top, DATA_INTERVAL, topFile, indexSpec, null); + INDEX_MERGER.persist(bottom, DATA_INTERVAL, bottomFile, indexSpec, null); + + return INDEX_IO.loadIndex( + INDEX_MERGER.mergeQueryableIndex( + Arrays.asList(INDEX_IO.loadIndex(topFile), INDEX_IO.loadIndex(bottomFile)), + true, + METRIC_AGGS, + mergedFile, + indexSpec, + null + ) + ); + } + catch (IOException e) { + throw Throwables.propagate(e); + } + }); public static IncrementalIndex getIncrementalTestIndex() { - synchronized (log) { - if (realtimeIndex != null) { - return realtimeIndex; - } - } - - return realtimeIndex = makeRealtimeIndex("druid.sample.numeric.tsv"); + return realtimeIndex.get(); } public static IncrementalIndex getNoRollupIncrementalTestIndex() { - synchronized (log) { - if (noRollupRealtimeIndex != null) { - return noRollupRealtimeIndex; - } - } + return noRollupRealtimeIndex.get(); + } - return noRollupRealtimeIndex = makeRealtimeIndex("druid.sample.numeric.tsv", false); + public static IncrementalIndex getNoBitmapIncrementalTestIndex() + { + return noBitmapRealtimeIndex.get(); } public static QueryableIndex getMMappedTestIndex() { - synchronized (log) { - if (mmappedIndex != null) { - return mmappedIndex; - } - } - - IncrementalIndex incrementalIndex = getIncrementalTestIndex(); - mmappedIndex = persistRealtimeAndLoadMMapped(incrementalIndex); - - return mmappedIndex; + return mmappedIndex.get(); } public static QueryableIndex getNoRollupMMappedTestIndex() { - synchronized (log) { - if (noRollupMmappedIndex != null) { - return noRollupMmappedIndex; - } - } - - IncrementalIndex incrementalIndex = getNoRollupIncrementalTestIndex(); - noRollupMmappedIndex = persistRealtimeAndLoadMMapped(incrementalIndex); + return noRollupMmappedIndex.get(); + } - return noRollupMmappedIndex; + public static QueryableIndex getNoBitmapMMappedTestIndex() + { + return noBitmapMmappedIndex.get(); } public static QueryableIndex mergedRealtimeIndex() { - synchronized (log) { - if (mergedRealtime != null) { - return mergedRealtime; - } - - try { - IncrementalIndex top = makeRealtimeIndex("druid.sample.numeric.tsv.top"); - IncrementalIndex bottom = makeRealtimeIndex("druid.sample.numeric.tsv.bottom"); - - File tmpFile = File.createTempFile("yay", "who"); - tmpFile.delete(); - - File topFile = new File(tmpFile, "top"); - File bottomFile = new File(tmpFile, "bottom"); - File mergedFile = new File(tmpFile, "merged"); - - topFile.mkdirs(); - topFile.deleteOnExit(); - bottomFile.mkdirs(); - bottomFile.deleteOnExit(); - mergedFile.mkdirs(); - mergedFile.deleteOnExit(); - - INDEX_MERGER.persist(top, DATA_INTERVAL, topFile, indexSpec, null); - INDEX_MERGER.persist(bottom, DATA_INTERVAL, bottomFile, indexSpec, null); - - mergedRealtime = INDEX_IO.loadIndex( - INDEX_MERGER.mergeQueryableIndex( - Arrays.asList(INDEX_IO.loadIndex(topFile), INDEX_IO.loadIndex(bottomFile)), - true, - METRIC_AGGS, - mergedFile, - indexSpec, - null - ) - ); - - return mergedRealtime; - } - catch (IOException e) { - throw Throwables.propagate(e); - } - } + return mergedRealtime.get(); } public static IncrementalIndex makeRealtimeIndex(final String resourceFilename) @@ -256,6 +264,11 @@ public static IncrementalIndex makeRealtimeIndex(final String resourceFilename) } public static IncrementalIndex makeRealtimeIndex(final String resourceFilename, boolean rollup) + { + return makeRealtimeIndex(resourceFilename, rollup, true); + } + + public static IncrementalIndex makeRealtimeIndex(final String resourceFilename, boolean rollup, boolean bitmap) { final URL resource = TestIndex.class.getClassLoader().getResource(resourceFilename); if (resource == null) { @@ -263,20 +276,20 @@ public static IncrementalIndex makeRealtimeIndex(final String resourceFilename, } log.info("Realtime loading index file[%s]", resource); CharSource stream = Resources.asByteSource(resource).asCharSource(StandardCharsets.UTF_8); - return makeRealtimeIndex(stream, rollup); + return makeRealtimeIndex(stream, rollup, bitmap); } public static IncrementalIndex makeRealtimeIndex(final CharSource source) { - return makeRealtimeIndex(source, true); + return makeRealtimeIndex(source, true, true); } - public static IncrementalIndex makeRealtimeIndex(final CharSource source, boolean rollup) + public static IncrementalIndex makeRealtimeIndex(final CharSource source, boolean rollup, boolean bitmap) { final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder() .withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis()) .withTimestampSpec(new TimestampSpec("ds", "auto", null)) - .withDimensionsSpec(DIMENSIONS_SPEC) + .withDimensionsSpec(bitmap ? DIMENSIONS_SPEC : DIMENSIONS_SPEC_NO_BITMAPS) .withVirtualColumns(VIRTUAL_COLUMNS) .withMetrics(METRIC_AGGS) .withRollup(rollup)