Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,14 @@

import org.apache.druid.collections.bitmap.WrappedImmutableRoaringBitmap;
import org.apache.druid.java.util.common.RE;
import org.apache.druid.segment.BitmapOffset;
import org.apache.druid.segment.SimpleAscendingOffset;
import org.apache.druid.segment.data.ColumnarLongs;
import org.apache.druid.segment.data.ColumnarLongsSerializer;
import org.apache.druid.segment.data.CompressedColumnarLongsSupplier;
import org.apache.druid.segment.data.CompressionFactory;
import org.apache.druid.segment.data.CompressionStrategy;
import org.apache.druid.segment.data.Offset;
import org.apache.druid.segment.vector.BitmapVectorOffset;
import org.apache.druid.segment.vector.NoFilterVectorOffset;
import org.apache.druid.segment.vector.VectorOffset;
Expand All @@ -34,14 +37,14 @@
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.infra.Blackhole;
import org.roaringbitmap.buffer.MutableRoaringBitmap;

import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
Expand All @@ -51,6 +54,8 @@ public class BaseColumnarLongsBenchmark
{
static final int VECTOR_SIZE = 512;

Map<String, ColumnarLongs> decoders = new HashMap<>();
Map<String, Integer> encodedSize = new HashMap<>();
/**
* Name of the long encoding strategy. For longs, this is a composite of both byte level block compression and
* encoding of values within the block.
Expand All @@ -68,42 +73,158 @@ public class BaseColumnarLongsBenchmark
long minValue;
long maxValue;

@Nullable
BitSet filter;

Offset offset;
VectorOffset vectorOffset;

void setupFilters(int rows, double filteredRowCountPercentage)

void scan(Blackhole blackhole)
{
EncodingSizeProfiler.encodedSize = encodedSize.get(encoding);
ColumnarLongs encoder = decoders.get(encoding);
while (offset.withinBounds()) {
blackhole.consume(encoder.get(offset.getOffset()));
offset.increment();
}
offset.reset();
blackhole.consume(offset);
}

void scanVectorized(Blackhole blackhole)
{
EncodingSizeProfiler.encodedSize = encodedSize.get(encoding);
ColumnarLongs columnDecoder = decoders.get(encoding);
long[] vector = new long[VECTOR_SIZE];
while (!vectorOffset.isDone()) {
if (vectorOffset.isContiguous()) {
columnDecoder.get(vector, vectorOffset.getStartOffset(), vectorOffset.getCurrentVectorSize());
} else {
columnDecoder.get(vector, vectorOffset.getOffsets(), vectorOffset.getCurrentVectorSize());
}
for (int i = 0; i < vectorOffset.getCurrentVectorSize(); i++) {
blackhole.consume(vector[i]);
}
vectorOffset.advance();
}
blackhole.consume(vector);
blackhole.consume(vectorOffset);
vectorOffset.reset();
columnDecoder.close();
}

void setupFilters(int rows, double filteredRowCountPercentage, String filterDistribution)
{
// todo: filter set distributions to simulate different select patterns?
// (because benchmarks don't take long enough already..)
filter = null;
final int filteredRowCount = (int) Math.floor(rows * filteredRowCountPercentage);


if (filteredRowCount < rows) {
// setup bitset filter
filter = new BitSet();
MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
for (int i = 0; i < filteredRowCount; i++) {
int rowToAccess = rand.nextInt(rows);
// Skip already selected rows if any
while (filter.get(rowToAccess)) {
rowToAccess = rand.nextInt(rows);
}
filter.set(rowToAccess);
bitmap.add(rowToAccess);
switch (filterDistribution) {
case "random":
setupRandomFilter(rows, filteredRowCount);
break;
case "contiguous-start":
offset = new SimpleAscendingOffset(rows);
vectorOffset = new NoFilterVectorOffset(VECTOR_SIZE, 0, filteredRowCount);
break;
case "contiguous-end":
offset = new SimpleAscendingOffset(rows);
vectorOffset = new NoFilterVectorOffset(VECTOR_SIZE, rows - filteredRowCount, rows);
break;
case "contiguous-bitmap-start":
setupContiguousBitmapFilter(rows, filteredRowCount, 0);
break;
case "contiguous-bitmap-end":
setupContiguousBitmapFilter(rows, filteredRowCount, rows - filteredRowCount);
break;
case "chunky-1000":
setupChunkyFilter(rows, filteredRowCount, 1000);
break;
case "chunky-10000":
setupChunkyFilter(rows, filteredRowCount, 10000);
break;
default:
throw new IllegalArgumentException("unknown filter distribution");
}
vectorOffset = new BitmapVectorOffset(
VECTOR_SIZE,
new WrappedImmutableRoaringBitmap(bitmap.toImmutableRoaringBitmap()),
0,
rows
);
} else {
offset = new SimpleAscendingOffset(rows);
vectorOffset = new NoFilterVectorOffset(VECTOR_SIZE, 0, rows);
}
}

private void setupRandomFilter(int rows, int filteredRowCount)
{
MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
for (int i = 0; i < filteredRowCount; i++) {
int rowToAccess = rand.nextInt(rows);
// Skip already selected rows if any
while (bitmap.contains(rowToAccess)) {
rowToAccess = rand.nextInt(rows);
}
bitmap.add(rowToAccess);
}
offset = BitmapOffset.of(
new WrappedImmutableRoaringBitmap(bitmap.toImmutableRoaringBitmap()),
false,
rows
);
vectorOffset = new BitmapVectorOffset(
VECTOR_SIZE,
new WrappedImmutableRoaringBitmap(bitmap.toImmutableRoaringBitmap()),
0,
rows
);
}

private void setupContiguousBitmapFilter(int rows, int filterRowCount, int startOffset)
{
MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
for (int i = startOffset; i < filterRowCount; i++) {
bitmap.add(i);
}
offset = BitmapOffset.of(
new WrappedImmutableRoaringBitmap(bitmap.toImmutableRoaringBitmap()),
false,
rows
);
vectorOffset = new BitmapVectorOffset(
VECTOR_SIZE,
new WrappedImmutableRoaringBitmap(bitmap.toImmutableRoaringBitmap()),
startOffset,
rows
);
}

private void setupChunkyFilter(int rows, int filteredRowCount, int chunkSize)
{
MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
for (int count = 0; count < filteredRowCount; ) {
int chunkOffset = rand.nextInt(rows - chunkSize);
// Skip already selected rows if any
while (bitmap.contains(chunkOffset)) {
chunkOffset = rand.nextInt(rows - chunkSize);
}
int numAdded = 0;
for (; numAdded < chunkSize && count + numAdded < filteredRowCount; numAdded++) {
// break if we run into an existing contiguous section
if (bitmap.contains(numAdded)) {
break;
}
bitmap.add(chunkOffset + numAdded);
}
count += numAdded;
}
offset = BitmapOffset.of(
new WrappedImmutableRoaringBitmap(bitmap.toImmutableRoaringBitmap()),
false,
rows
);
vectorOffset = new BitmapVectorOffset(
VECTOR_SIZE,
new WrappedImmutableRoaringBitmap(bitmap.toImmutableRoaringBitmap()),
0,
rows
);
}

static int encodeToFile(long[] vals, String encoding, FileChannel output)throws IOException
{
SegmentWriteOutMedium writeOutMedium = new OnHeapMemorySegmentWriteOutMedium();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,41 +43,47 @@
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;

@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
@Warmup(iterations = 5)
@Measurement(iterations = 10)
public class ColumnarLongsSelectRowsFromGeneratorBenchmark extends BaseColumnarLongsFromGeneratorBenchmark
{
private Map<String, ColumnarLongs> decoders;
private Map<String, Integer> encodedSize;

/**
* Number of rows to read, the test will randomly set positions in a simulated offset of the specified density in
* {@link #setupFilters(int, double)}
* {@link #setupFilters(int, double, String)}
*/
@Param({
"0.1",
"0.25",
"0.5",
"0.6",
"0.75",
"0.8",
"0.9",
"0.95",
"1.0"
})
private double filteredRowCountPercentage;

@Param({
"random",
"contiguous-start",
"contiguous-end",
"contiguous-bitmap-start",
"contiguous-bitmap-end",
"chunky-1000",
"chunky-10000"
})
private String filterDistribution;

@Setup
public void setup() throws IOException
{
decoders = new HashMap<>();
encodedSize = new HashMap<>();

setupFromFile(encoding);
setupFilters(rows, filteredRowCountPercentage);
setupFilters(rows, filteredRowCountPercentage, filterDistribution);

// uncomment this block to run sanity check to ensure all specified encodings produce the same set of results
//CHECKSTYLE.OFF: Regexp
Expand Down Expand Up @@ -117,42 +123,15 @@ private void setupFromFile(String encoding) throws IOException
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void selectRows(Blackhole blackhole)
{
EncodingSizeProfiler.encodedSize = encodedSize.get(encoding);
ColumnarLongs encoder = decoders.get(encoding);
if (filter == null) {
for (int i = 0; i < rows; i++) {
blackhole.consume(encoder.get(i));
}
} else {
for (int i = filter.nextSetBit(0); i >= 0; i = filter.nextSetBit(i + 1)) {
blackhole.consume(encoder.get(i));
}
}
scan(blackhole);
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void selectRowsVectorized(Blackhole blackhole)
{
EncodingSizeProfiler.encodedSize = encodedSize.get(encoding);
ColumnarLongs columnDecoder = decoders.get(encoding);
long[] vector = new long[VECTOR_SIZE];
while (!vectorOffset.isDone()) {
if (vectorOffset.isContiguous()) {
columnDecoder.get(vector, vectorOffset.getStartOffset(), vectorOffset.getCurrentVectorSize());
} else {
columnDecoder.get(vector, vectorOffset.getOffsets(), vectorOffset.getCurrentVectorSize());
}
for (int i = 0; i < vectorOffset.getCurrentVectorSize(); i++) {
blackhole.consume(vector[i]);
}
vectorOffset.advance();
}
blackhole.consume(vector);
blackhole.consume(vectorOffset);
vectorOffset.reset();
columnDecoder.close();
scanVectorized(blackhole);
}

public static void main(String[] args) throws RunnerException
Expand Down
Loading