From 4456e06c5fc4810856205b9cc35497b3c546b9da Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 12 Mar 2021 18:25:10 -0800 Subject: [PATCH 01/11] Vectorize LongDeserializers. Also, add many more tests. --- .../segment/data/CompressionFactory.java | 21 +- .../segment/data/DeltaLongEncodingReader.java | 22 + .../segment/data/LongsLongEncodingReader.java | 40 +- .../segment/data/TableLongEncodingReader.java | 22 + .../druid/segment/data/VSizeLongSerde.java | 223 +++++++--- .../segment/data/VSizeLongSerdeTest.java | 391 ++++++++++++++---- 6 files changed, 529 insertions(+), 190 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java b/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java index 7bf647a2711f..0b45cdb8d5cf 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java @@ -283,26 +283,9 @@ public interface LongEncodingReader long read(int index); - default void read(long[] out, int outPosition, int startIndex, int length) - { - for (int i = 0; i < length; i++) { - out[outPosition + i] = read(startIndex + i); - } - } - - default int read(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) - { - for (int i = 0; i < length; i++) { - int index = indexes[outPosition + i] - indexOffset; - if (index >= limit) { - return i; - } - - out[outPosition + i] = read(index); - } + void read(long[] out, int outPosition, int startIndex, int length); - return length; - } + int read(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit); LongEncodingReader duplicate(); } diff --git a/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java b/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java index c76e919c119a..cc1e25197bbb 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java +++ b/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java @@ -65,6 +65,28 @@ public long read(int index) return base + deserializer.get(index); } + @Override + public void read(long[] out, int outPosition, int startIndex, int length) + { + deserializer.get(out, outPosition, startIndex, length); + + for (int i = 0; i < length; i++) { + out[outPosition + i] += base; + } + } + + @Override + public int read(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + { + final int len = deserializer.get(out, outPosition, indexes, length, indexOffset, limit); + + for (int i = 0; i < len; i++) { + out[outPosition + i] += base; + } + + return len; + } + @Override public CompressionFactory.LongEncodingReader duplicate() { diff --git a/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java b/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java index aaf2c0ef2025..7fd1aef58b42 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java +++ b/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java @@ -19,52 +19,56 @@ package org.apache.druid.segment.data; +import org.apache.datasketches.memory.Memory; + import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.nio.LongBuffer; public class LongsLongEncodingReader implements CompressionFactory.LongEncodingReader { - private LongBuffer buffer; + private Memory buffer; public LongsLongEncodingReader(ByteBuffer fromBuffer, ByteOrder order) { - this.buffer = fromBuffer.asReadOnlyBuffer().order(order).asLongBuffer(); - } - - private LongsLongEncodingReader(LongBuffer buffer) - { - this.buffer = buffer; + this.buffer = Memory.wrap(fromBuffer.slice(), order); } @Override public void setBuffer(ByteBuffer buffer) { - this.buffer = buffer.asLongBuffer(); + this.buffer = Memory.wrap(buffer.slice(), buffer.order()); } @Override public long read(int index) { - return buffer.get(buffer.position() + index); + return buffer.getLong((long) index << 3); } @Override public void read(final long[] out, final int outPosition, final int startIndex, final int length) { - final int oldPosition = buffer.position(); - try { - buffer.position(oldPosition + startIndex); - buffer.get(out, outPosition, length); - } - finally { - buffer.position(oldPosition); + buffer.getLongArray((long) startIndex << 3, out, outPosition, length); + } + + @Override + public int read(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + { + for (int i = 0; i < length; i++) { + int index = indexes[outPosition + i] - indexOffset; + if (index >= limit) { + return i; + } + + out[outPosition + i] = buffer.getLong((long) index << 3); } + + return length; } @Override public CompressionFactory.LongEncodingReader duplicate() { - return new LongsLongEncodingReader(buffer.duplicate()); + return this; } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java b/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java index 0a20e7b82932..7035be3dc4f5 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java +++ b/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java @@ -71,6 +71,28 @@ public long read(int index) return table[(int) deserializer.get(index)]; } + @Override + public void read(long[] out, int outPosition, int startIndex, int length) + { + deserializer.get(out, outPosition, startIndex, length); + + for (int i = 0; i < length; i++) { + out[outPosition + i] = table[(int) out[outPosition + i]]; + } + } + + @Override + public int read(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + { + final int len = deserializer.get(out, outPosition, indexes, length, indexOffset, limit); + + for (int i = 0; i < len; i++) { + out[outPosition + i] = table[(int) out[outPosition + i]]; + } + + return len; + } + @Override public CompressionFactory.LongEncodingReader duplicate() { diff --git a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java index c0c3c08bbefe..01837dd089ba 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java @@ -19,10 +19,10 @@ package org.apache.druid.segment.data; +import org.apache.datasketches.memory.Memory; import org.apache.druid.java.util.common.IAE; import javax.annotation.Nullable; - import java.io.Closeable; import java.io.IOException; import java.io.OutputStream; @@ -416,92 +416,137 @@ public void close() throws IOException public interface LongDeserializer { long get(int index); + + default void get(long[] out, int outPosition, int startIndex, int length) + { + for (int i = 0; i < length; i++) { + out[outPosition + i] = get(startIndex + i); + } + } + + default int get(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + { + for (int i = 0; i < length; i++) { + int index = indexes[outPosition + i] - indexOffset; + if (index >= limit) { + return i; + } + + out[outPosition + i] = get(index); + } + + return length; + } } private static final class Size1Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size1Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { int shift = 7 - (index & 7); - return (buffer.get(offset + (index >> 3)) >> shift) & 1; + return (buffer.getByte((index >> 3)) >> shift) & 1; } } private static final class Size2Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size2Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { int shift = 6 - ((index & 3) << 1); - return (buffer.get(offset + (index >> 2)) >> shift) & 3; + return (buffer.getByte((index >> 2)) >> shift) & 3; } } private static final class Size4Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size4Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { int shift = ((index + 1) & 1) << 2; - return (buffer.get(offset + (index >> 1)) >> shift) & 0xF; + return (buffer.getByte((index >> 1)) >> shift) & 0xF; } } private static final class Size8Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size8Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { - return buffer.get(offset + index) & 0xFF; + return buffer.getByte(index) & 0xFF; + } + + @Override + public void get(long[] out, int outPosition, int startIndex, int length) + { + long pos = startIndex; + for (int i = 0; i < length; i++, pos += 1) { + out[outPosition + i] = buffer.getByte(pos) & 0xFF; + } + } + + @Override + public int get(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + { + for (int i = 0; i < length; i++) { + int index = indexes[outPosition + i] - indexOffset; + if (index >= limit) { + return i; + } + + out[outPosition + i] = buffer.getByte(index) & 0xFF; + } + + return length; } } private static final class Size12Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size12Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override @@ -509,37 +554,61 @@ public long get(int index) { int shift = ((index + 1) & 1) << 2; int offset = (index * 3) >> 1; - return (buffer.getShort(this.offset + offset) >> shift) & 0xFFF; + return (buffer.getShort(offset) >> shift) & 0xFFF; } } private static final class Size16Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size16Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { - return buffer.getShort(offset + (index << 1)) & 0xFFFF; + return buffer.getShort((long) index << 1) & 0xFFFF; + } + + @Override + public void get(long[] out, int outPosition, int startIndex, int length) + { + long pos = (long) startIndex << 1; + for (int i = 0; i < length; i++, pos += Short.BYTES) { + out[outPosition + i] = buffer.getShort(pos) & 0xFFFF; + } + } + + @Override + public int get(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + { + for (int i = 0; i < length; i++) { + int index = indexes[outPosition + i] - indexOffset; + if (index >= limit) { + return i; + } + + out[outPosition + i] = buffer.getShort((long) index << 1) & 0xFFFF; + } + + return length; } } private static final class Size20Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size20Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override @@ -547,116 +616,136 @@ public long get(int index) { int shift = (((index + 1) & 1) << 2) + 8; int offset = (index * 5) >> 1; - return (buffer.getInt(this.offset + offset) >> shift) & 0xFFFFF; + return (buffer.getInt(offset) >> shift) & 0xFFFFF; } } private static final class Size24Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size24Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { - return buffer.getInt(offset + index * 3) >>> 8; + return buffer.getInt(index * 3L) >>> 8; } } private static final class Size32Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size32Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { - return buffer.getInt(offset + (index << 2)) & 0xFFFFFFFFL; + return buffer.getInt(((long) index << 2)) & 0xFFFFFFFFL; } } private static final class Size40Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size40Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { - return buffer.getLong(offset + index * 5) >>> 24; + return buffer.getLong(index * 5L) >>> 24; } } private static final class Size48Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size48Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { - return buffer.getLong(offset + index * 6) >>> 16; + return buffer.getLong(index * 6L) >>> 16; } } private static final class Size56Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size56Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { - return buffer.getLong(offset + index * 7) >>> 8; + return buffer.getLong(index * 7L) >>> 8; } } private static final class Size64Des implements LongDeserializer { - final ByteBuffer buffer; - final int offset; + final Memory buffer; public Size64Des(ByteBuffer buffer, int bufferOffset) { - this.buffer = buffer; - this.offset = bufferOffset; + final ByteBuffer dup = buffer.duplicate(); + dup.position(bufferOffset); + this.buffer = Memory.wrap(dup.slice(), buffer.order()); } @Override public long get(int index) { - return buffer.getLong(offset + (index << 3)); + return buffer.getLong((long) index << 3); + } + + @Override + public void get(long[] out, int outPosition, int startIndex, int length) + { + buffer.getLongArray((long) startIndex << 3, out, outPosition, length); } - } + @Override + public int get(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + { + for (int i = 0; i < length; i++) { + int index = indexes[outPosition + i] - indexOffset; + if (index >= limit) { + return i; + } + + out[outPosition + i] = buffer.getLong((long) index << 3); + } + + return length; + } + } } diff --git a/processing/src/test/java/org/apache/druid/segment/data/VSizeLongSerdeTest.java b/processing/src/test/java/org/apache/druid/segment/data/VSizeLongSerdeTest.java index 879077f6091e..739a30228d18 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/VSizeLongSerdeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/VSizeLongSerdeTest.java @@ -20,91 +20,161 @@ package org.apache.druid.segment.data; +import com.google.common.primitives.Ints; +import org.apache.druid.java.util.common.StringUtils; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collection; +import java.util.stream.Collectors; +@RunWith(Enclosed.class) public class VSizeLongSerdeTest { - private ByteBuffer buffer; - private ByteArrayOutputStream outStream; - private ByteBuffer outBuffer; - private final long[] values0 = {0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1}; - private final long[] values1 = {0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1}; - private final long[] values2 = {12, 5, 2, 9, 3, 2, 5, 1, 0, 6, 13, 10, 15}; - private final long[] values3 = {1, 1, 1, 1, 1, 11, 11, 11, 11}; - private final long[] values4 = {200, 200, 200, 401, 200, 301, 200, 200, 200, 404, 200, 200, 200, 200}; - private final long[] values5 = {123, 632, 12, 39, 536, 0, 1023, 52, 777, 526, 214, 562, 823, 346}; - private final long[] values6 = {1000000, 1000001, 1000002, 1000003, 1000004, 1000005, 1000006, 1000007, 1000008}; - - @Before - public void setUp() + @RunWith(Parameterized.class) + public static class EveryLittleBitTest { - outStream = new ByteArrayOutputStream(); - outBuffer = ByteBuffer.allocate(500000); - } + private final int numBits; - @Test - public void testGetBitsForMax() - { - Assert.assertEquals(1, VSizeLongSerde.getBitsForMax(1)); - Assert.assertEquals(1, VSizeLongSerde.getBitsForMax(2)); - Assert.assertEquals(2, VSizeLongSerde.getBitsForMax(3)); - Assert.assertEquals(4, VSizeLongSerde.getBitsForMax(16)); - Assert.assertEquals(8, VSizeLongSerde.getBitsForMax(200)); - Assert.assertEquals(12, VSizeLongSerde.getBitsForMax(999)); - Assert.assertEquals(24, VSizeLongSerde.getBitsForMax(12345678)); - Assert.assertEquals(32, VSizeLongSerde.getBitsForMax(Integer.MAX_VALUE)); - Assert.assertEquals(64, VSizeLongSerde.getBitsForMax(Long.MAX_VALUE)); - } + public EveryLittleBitTest(int numBits) + { + this.numBits = numBits; + } - @Test - public void testSerdeValues() throws IOException - { - for (int i : VSizeLongSerde.SUPPORTED_SIZES) { - testSerde(i, values0); - if (i >= 1) { - testSerde(i, values1); - } - if (i >= 4) { - testSerde(i, values2); - testSerde(i, values3); - } - if (i >= 9) { - testSerde(i, values4); - } - if (i >= 10) { - testSerde(i, values5); + @Parameterized.Parameters(name = "numBits={0}") + public static Collection data() + { + return Arrays.stream(VSizeLongSerde.SUPPORTED_SIZES) + .mapToObj(value -> new Object[]{value}) + .collect(Collectors.toList()); + } + + @Test + public void testEveryPowerOfTwo() throws IOException + { + // Test every long that has a single bit set. + + final int numLongs = Math.min(64, numBits); + final long[] longs = new long[numLongs]; + + for (int bit = 0; bit < numLongs; bit++) { + longs[bit] = 1L << bit; } - if (i >= 20) { - testSerde(i, values6); + + testSerde(numBits, longs); + } + + @Test + public void testEveryPowerOfTwoMinusOne() throws IOException + { + // Test every long with runs of low bits set. + + final int numLongs = Math.min(64, numBits + 1); + final long[] longs = new long[numLongs]; + + for (int bit = 0; bit < numLongs; bit++) { + longs[bit] = (1L << bit) - 1; } + + testSerde(numBits, longs); } } - @Test - public void testSerdeLoop() throws IOException + public static class SpecificValuesTest { - for (int i : VSizeLongSerde.SUPPORTED_SIZES) { - if (i >= 8) { - testSerdeIncLoop(i, 0, 256); + private final long[] values0 = {0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1}; + private final long[] values1 = {0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1}; + private final long[] values2 = {12, 5, 2, 9, 3, 2, 5, 1, 0, 6, 13, 10, 15}; + private final long[] values3 = {1, 1, 1, 1, 1, 11, 11, 11, 11}; + private final long[] values4 = {200, 200, 200, 401, 200, 301, 200, 200, 200, 404, 200, 200, 200, 200}; + private final long[] values5 = {123, 632, 12, 39, 536, 0, 1023, 52, 777, 526, 214, 562, 823, 346}; + private final long[] values6 = {1000000, 1000001, 1000002, 1000003, 1000004, 1000005, 1000006, 1000007, 1000008}; + + @Test + public void testGetBitsForMax() + { + Assert.assertEquals(1, VSizeLongSerde.getBitsForMax(1)); + Assert.assertEquals(1, VSizeLongSerde.getBitsForMax(2)); + Assert.assertEquals(2, VSizeLongSerde.getBitsForMax(3)); + Assert.assertEquals(4, VSizeLongSerde.getBitsForMax(16)); + Assert.assertEquals(8, VSizeLongSerde.getBitsForMax(200)); + Assert.assertEquals(12, VSizeLongSerde.getBitsForMax(999)); + Assert.assertEquals(24, VSizeLongSerde.getBitsForMax(12345678)); + Assert.assertEquals(32, VSizeLongSerde.getBitsForMax(Integer.MAX_VALUE)); + Assert.assertEquals(64, VSizeLongSerde.getBitsForMax(Long.MAX_VALUE)); + } + + @Test + public void testSerdeValues() throws IOException + { + for (int i : VSizeLongSerde.SUPPORTED_SIZES) { + testSerde(i, values0); + if (i >= 1) { + testSerde(i, values1); + } + if (i >= 4) { + testSerde(i, values2); + testSerde(i, values3); + } + if (i >= 9) { + testSerde(i, values4); + } + if (i >= 10) { + testSerde(i, values5); + } + if (i >= 20) { + testSerde(i, values6); + } } - if (i >= 16) { - testSerdeIncLoop(i, 0, 50000); + } + + @Test + public void testSerdeLoop() throws IOException + { + final long[] zeroTo256 = generateSequentialLongs(0, 256); + final long[] zeroTo50000 = generateSequentialLongs(0, 50000); + + for (int i : VSizeLongSerde.SUPPORTED_SIZES) { + if (i >= 8) { + testSerde(i, zeroTo256); + } + if (i >= 16) { + testSerde(i, zeroTo50000); + } + } + } + + private long[] generateSequentialLongs(final long start, final long end) + { + final long[] values = new long[Ints.checkedCast(end - start)]; + + for (int i = 0; i < values.length; i++) { + values[i] = start + i; } + + return values; } } - public void testSerde(int longSize, long[] values) throws IOException + public static void testSerde(int numBits, long[] values) throws IOException { - outBuffer.rewind(); - outStream.reset(); - VSizeLongSerde.LongSerializer streamSer = VSizeLongSerde.getSerializer(longSize, outStream); - VSizeLongSerde.LongSerializer bufferSer = VSizeLongSerde.getSerializer(longSize, outBuffer, 0); + final int bufferOffset = 1; + final ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + outStream.write(0xAF); // Dummy byte so the real stuff starts at bufferOffset + + final ByteBuffer buffer = + ByteBuffer.allocate(VSizeLongSerde.getSerializedSize(numBits, values.length) + bufferOffset); + buffer.rewind(); + buffer.put(0, (byte) 0xAF); // Dummy byte again. + VSizeLongSerde.LongSerializer streamSer = VSizeLongSerde.getSerializer(numBits, outStream); + VSizeLongSerde.LongSerializer bufferSer = VSizeLongSerde.getSerializer(numBits, buffer, bufferOffset); for (long value : values) { streamSer.write(value); bufferSer.write(value); @@ -112,40 +182,189 @@ public void testSerde(int longSize, long[] values) throws IOException streamSer.close(); bufferSer.close(); - buffer = ByteBuffer.wrap(outStream.toByteArray()); - Assert.assertEquals(VSizeLongSerde.getSerializedSize(longSize, values.length), buffer.capacity()); - Assert.assertEquals(VSizeLongSerde.getSerializedSize(longSize, values.length), outBuffer.position()); - VSizeLongSerde.LongDeserializer streamDes = VSizeLongSerde.getDeserializer(longSize, buffer, 0); - VSizeLongSerde.LongDeserializer bufferDes = VSizeLongSerde.getDeserializer(longSize, outBuffer, 0); + // Verify serialized sizes. + final ByteBuffer bufferFromStream = ByteBuffer.wrap(outStream.toByteArray()); + Assert.assertEquals( + StringUtils.format("Serialized size (stream, numBits = %d)", numBits), + VSizeLongSerde.getSerializedSize(numBits, values.length), + bufferFromStream.capacity() - bufferOffset + ); + Assert.assertEquals( + StringUtils.format("Serialized size (buffer, numBits = %d)", numBits), + VSizeLongSerde.getSerializedSize(numBits, values.length), + buffer.position() - bufferOffset + ); + + // Verify the actual serialized contents. + Assert.assertArrayEquals( + StringUtils.format("Stream and buffer serialized images are equal (numBits = %d)", numBits), + bufferFromStream.array(), + buffer.array() + ); + + // Verify deserialization. We know the two serialized buffers are equal, so from this point on, just use one. + VSizeLongSerde.LongDeserializer deserializer = VSizeLongSerde.getDeserializer(numBits, buffer, bufferOffset); + + testGetSingleRow(deserializer, numBits, values); + testContiguousGetSingleRow(deserializer, numBits, values); + testContiguousGetWholeRegion(deserializer, numBits, values); + testNoncontiguousGetSingleRow(deserializer, numBits, values); + testNoncontiguousGetEveryOtherValue(deserializer, numBits, values); + testNoncontiguousGetEveryOtherValueWithLimit(deserializer, numBits, values); + } + + private static void testGetSingleRow( + final VSizeLongSerde.LongDeserializer deserializer, + final int numBits, + final long[] values + ) + { + for (int i = 0; i < values.length; i++) { + Assert.assertEquals( + StringUtils.format("Deserializer (testGetSingleRow, numBits = %d, position = %d)", numBits, i), + values[i], + deserializer.get(i) + ); + } + } + + private static void testContiguousGetSingleRow( + final VSizeLongSerde.LongDeserializer deserializer, + final int numBits, + final long[] values + ) + { + final int outPosition = 1; + final long[] out = new long[values.length + outPosition]; + for (int i = 0; i < values.length; i++) { - Assert.assertEquals(values[i], streamDes.get(i)); - Assert.assertEquals(values[i], bufferDes.get(i)); + Arrays.fill(out, -1); + deserializer.get(out, outPosition, i, 1); + + Assert.assertEquals( + StringUtils.format("Deserializer (testContiguousGetSingleRow, numBits = %d, position = %d)", numBits, i), + values[i], + out[outPosition] + ); } } - public void testSerdeIncLoop(int longSize, long start, long end) throws IOException + private static void testContiguousGetWholeRegion( + final VSizeLongSerde.LongDeserializer deserializer, + final int numBits, + final long[] values + ) + { + final int outPosition = 1; + final long[] out = new long[values.length + outPosition]; + Arrays.fill(out, -1); + deserializer.get(out, outPosition, 0, values.length); + + Assert.assertArrayEquals( + StringUtils.format("Deserializer (testContiguousGetWholeRegion, numBits = %d)", numBits), + values, + Arrays.stream(out).skip(outPosition).toArray() + ); + } + + private static void testNoncontiguousGetSingleRow( + final VSizeLongSerde.LongDeserializer deserializer, + final int numBits, + final long[] values + ) { - outBuffer.rewind(); - outStream.reset(); - VSizeLongSerde.LongSerializer streamSer = VSizeLongSerde.getSerializer(longSize, outStream); - VSizeLongSerde.LongSerializer bufferSer = VSizeLongSerde.getSerializer(longSize, outBuffer, 0); - for (long i = start; i < end; i++) { - streamSer.write(i); - bufferSer.write(i); + final int indexOffset = 1; + final int outPosition = 1; + final long[] out = new long[values.length + outPosition]; + final int[] indexes = new int[values.length + outPosition]; + + for (int i = 0; i < values.length; i++) { + Arrays.fill(out, -1); + Arrays.fill(indexes, -1); + indexes[outPosition] = i + indexOffset; + + deserializer.get(out, outPosition, indexes, 1, indexOffset, values.length); + + Assert.assertEquals( + StringUtils.format("Deserializer (testNoncontiguousGetSingleRow, numBits = %d, position = %d)", numBits, i), + values[i], + out[outPosition] + ); } - streamSer.close(); - bufferSer.close(); + } - buffer = ByteBuffer.wrap(outStream.toByteArray()); - Assert.assertEquals(VSizeLongSerde.getSerializedSize(longSize, (int) (end - start)), buffer.capacity()); - Assert.assertEquals(VSizeLongSerde.getSerializedSize(longSize, (int) (end - start)), outBuffer.position()); - VSizeLongSerde.LongDeserializer streamDes = VSizeLongSerde.getDeserializer(longSize, buffer, 0); - VSizeLongSerde.LongDeserializer bufferDes = VSizeLongSerde.getDeserializer(longSize, outBuffer, 0); - for (int i = 0; i < end - start; i++) { - Assert.assertEquals(start + i, streamDes.get(i)); - Assert.assertEquals(start + i, bufferDes.get(i)); + private static void testNoncontiguousGetEveryOtherValue( + final VSizeLongSerde.LongDeserializer deserializer, + final int numBits, + final long[] values + ) + { + final int indexOffset = 1; + final int outPosition = 1; + final long[] out = new long[values.length + outPosition]; + final long[] expectedOut = new long[values.length + outPosition]; + final int[] indexes = new int[values.length + outPosition]; + + Arrays.fill(out, -1); + Arrays.fill(expectedOut, -1); + Arrays.fill(indexes, -1); + + int cnt = 0; + for (int i = 0; i < values.length; i++) { + if (i % 2 == 0) { + indexes[outPosition + i / 2] = i + indexOffset; + expectedOut[outPosition + i / 2] = values[i]; + cnt++; + } } + + deserializer.get(out, outPosition, indexes, cnt, indexOffset, values.length); + + Assert.assertArrayEquals( + StringUtils.format("Deserializer (testNoncontiguousGetEveryOtherValue, numBits = %d)", numBits), + expectedOut, + out + ); } + private static void testNoncontiguousGetEveryOtherValueWithLimit( + final VSizeLongSerde.LongDeserializer deserializer, + final int numBits, + final long[] values + ) + { + final int indexOffset = 1; + final int outPosition = 1; + final long[] out = new long[values.length + outPosition]; + final long[] expectedOut = new long[values.length + outPosition]; + final int[] indexes = new int[values.length + outPosition]; + final int limit = values.length - 2; // Don't do the last value + + Arrays.fill(out, -1); + Arrays.fill(expectedOut, -1); + Arrays.fill(indexes, -1); + int cnt = 0; + for (int i = 0; i < values.length; i++) { + if (i % 2 == 0) { + indexes[outPosition + i / 2] = i + indexOffset; + + if (i < limit) { + expectedOut[outPosition + i / 2] = values[i]; + } + + cnt++; + } + } + + final int ret = deserializer.get(out, outPosition, indexes, cnt, indexOffset, limit); + + Assert.assertArrayEquals( + StringUtils.format("Deserializer (testNoncontiguousGetEveryOtherValue, numBits = %d)", numBits), + expectedOut, + out + ); + + Assert.assertEquals(Math.max(0, cnt - 1), ret); + } } From f94da5956ec3e8ad218943e8e7a65b472cff3b3b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sat, 13 Mar 2021 00:17:20 -0800 Subject: [PATCH 02/11] more faster --- .../BaseColumnarLongsBenchmark.java | 204 ++++++ ...seColumnarLongsFromGeneratorBenchmark.java | 425 +++++++++++ ...aseColumnarLongsFromSegmentsBenchmark.java | 163 +++++ ...LongsEncodeDataFromGeneratorBenchmark.java | 85 +++ ...arLongsEncodeDataFromSegmentBenchmark.java | 86 +++ ...LongsSelectRowsFromGeneratorBenchmark.java | 183 +++++ ...arLongsSelectRowsFromSegmentBenchmark.java | 164 +++++ .../CompressedColumnarIntsBenchmark.java | 2 +- ...ressedVSizeColumnarMultiIntsBenchmark.java | 2 +- .../compression/EncodingSizeProfiler.java | 62 ++ .../FloatCompressionBenchmark.java | 2 +- ...loatCompressionBenchmarkFileGenerator.java | 2 +- .../LongCompressionBenchmark.java | 49 +- ...LongCompressionBenchmarkFileGenerator.java | 2 +- .../VSizeSerdeBenchmark.java | 2 +- .../segment/data/DeltaLongEncodingReader.java | 14 +- .../segment/data/TableLongEncodingReader.java | 14 +- .../druid/segment/data/VSizeLongSerde.java | 683 +++++++++++++++--- .../generator/ColumnValueGenerator.java | 9 +- .../data/CompressedLongsSerdeTest.java | 6 + .../segment/data/VSizeLongSerdeTest.java | 11 +- 21 files changed, 2028 insertions(+), 142 deletions(-) create mode 100644 benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java create mode 100644 benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromGeneratorBenchmark.java create mode 100644 benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java create mode 100644 benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromGeneratorBenchmark.java create mode 100644 benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java create mode 100644 benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java create mode 100644 benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java rename benchmarks/src/test/java/org/apache/druid/benchmark/{ => compression}/CompressedColumnarIntsBenchmark.java (99%) rename benchmarks/src/test/java/org/apache/druid/benchmark/{ => compression}/CompressedVSizeColumnarMultiIntsBenchmark.java (99%) create mode 100644 benchmarks/src/test/java/org/apache/druid/benchmark/compression/EncodingSizeProfiler.java rename benchmarks/src/test/java/org/apache/druid/benchmark/{ => compression}/FloatCompressionBenchmark.java (98%) rename benchmarks/src/test/java/org/apache/druid/benchmark/{ => compression}/FloatCompressionBenchmarkFileGenerator.java (99%) rename benchmarks/src/test/java/org/apache/druid/benchmark/{ => compression}/LongCompressionBenchmark.java (74%) rename benchmarks/src/test/java/org/apache/druid/benchmark/{ => compression}/LongCompressionBenchmarkFileGenerator.java (99%) rename benchmarks/src/test/java/org/apache/druid/benchmark/{ => compression}/VSizeSerdeBenchmark.java (99%) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java new file mode 100644 index 000000000000..598395ffb622 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.compression; + +import org.apache.druid.collections.bitmap.WrappedImmutableRoaringBitmap; +import org.apache.druid.segment.data.ColumnarLongs; +import org.apache.druid.segment.data.ColumnarLongsSerializer; +import org.apache.druid.segment.data.CompressedColumnarLongsSupplier; +import org.apache.druid.segment.data.CompressionFactory; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.vector.BitmapVectorOffset; +import org.apache.druid.segment.vector.NoFilterVectorOffset; +import org.apache.druid.segment.vector.VectorOffset; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.util.BitSet; +import java.util.List; +import java.util.Map; +import java.util.Random; + +@State(Scope.Benchmark) +public class BaseColumnarLongsBenchmark +{ + static final int VECTOR_SIZE = 512; + + @Param({ + "lz4-longs", + "lz4-auto" + }) + String encoding; + + Random rand = new Random(0); + + long[] vals; + + long minValue; + long maxValue; + + @Nullable + BitSet filter; + + VectorOffset vectorOffset; + + void setupFilters(int rows, double filteredRowCountPercentage) + { + // todo: filter set distributions to simulate different select patterns? + // (because benchmarks don't take long enough already..) + filter = null; + final int filteredRowCount = (int) Math.floor(rows * filteredRowCountPercentage); + + if (filteredRowCount < rows) { + // setup bitset filter + filter = new BitSet(); + MutableRoaringBitmap bitmap = new MutableRoaringBitmap(); + for (int i = 0; i < filteredRowCount; i++) { + int rowToAccess = rand.nextInt(rows); + // Skip already selected rows if any + while (filter.get(rowToAccess)) { + rowToAccess = rand.nextInt(rows); + } + filter.set(rowToAccess); + bitmap.add(rowToAccess); + } + vectorOffset = new BitmapVectorOffset(VECTOR_SIZE, new WrappedImmutableRoaringBitmap(bitmap.toImmutableRoaringBitmap()), 0, rows); + } else { + vectorOffset = new NoFilterVectorOffset(VECTOR_SIZE, 0, rows); + } + } + + static int encodeToFile(long[] vals, String encoding, FileChannel output)throws IOException + { + SegmentWriteOutMedium writeOutMedium = new OnHeapMemorySegmentWriteOutMedium(); + + ColumnarLongsSerializer serializer; + switch (encoding) { + case "lz4-longs": + serializer = CompressionFactory.getLongSerializer( + encoding, + writeOutMedium, + "lz4-longs", + ByteOrder.LITTLE_ENDIAN, + CompressionFactory.LongEncodingStrategy.LONGS, + CompressionStrategy.LZ4 + ); + break; + case "lz4-auto": + serializer = CompressionFactory.getLongSerializer( + encoding, + writeOutMedium, + "lz4-auto", + ByteOrder.LITTLE_ENDIAN, + CompressionFactory.LongEncodingStrategy.AUTO, + CompressionStrategy.LZ4 + ); + break; + case "none-longs": + serializer = CompressionFactory.getLongSerializer( + encoding, + writeOutMedium, + "none-longs", + ByteOrder.LITTLE_ENDIAN, + CompressionFactory.LongEncodingStrategy.LONGS, + CompressionStrategy.NONE + ); + break; + case "none-auto": + serializer = CompressionFactory.getLongSerializer( + encoding, + writeOutMedium, + "none-auto", + ByteOrder.LITTLE_ENDIAN, + CompressionFactory.LongEncodingStrategy.AUTO, + CompressionStrategy.NONE + ); + break; + default: + throw new RuntimeException("unknown encoding"); + } + + serializer.open(); + for (long val : vals) { + serializer.add(val); + } + serializer.writeTo(output, null); + return (int) serializer.getSerializedSize(); + } + + static ColumnarLongs createColumnarLongs(String encoding, ByteBuffer buffer) + { + switch (encoding) { + case "lz4-longs": + case "lz4-auto": + case "none-auto": + case "none-longs": + return CompressedColumnarLongsSupplier.fromByteBuffer(buffer, ByteOrder.LITTLE_ENDIAN).get(); + } + + throw new IllegalArgumentException("unknown encoding"); + } + + + // for debugging: validate that all encoders read the same values + static void checkSanity(Map encoders, List encodings, int rows) + throws Exception + { + for (int i = 0; i < rows; i++) { + checkRowSanity(encoders, encodings, i); + } + } + + static void checkRowSanity(Map encoders, List encodings, int row) + throws Exception + { + if (encodings.size() > 1) { + for (int i = 0; i < encodings.size() - 1; i++) { + String currentKey = encodings.get(i); + String nextKey = encodings.get(i + 1); + ColumnarLongs current = encoders.get(currentKey); + ColumnarLongs next = encoders.get(nextKey); + long vCurrent = current.get(row); + long vNext = next.get(row); + if (vCurrent != vNext) { + throw new Exception("values do not match at row " + + row + + " - " + + currentKey + + ":" + + vCurrent + + " " + + nextKey + + ":" + + vNext); + } + } + } + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromGeneratorBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromGeneratorBenchmark.java new file mode 100644 index 000000000000..b46c30cfa13f --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromGeneratorBenchmark.java @@ -0,0 +1,425 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.compression; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.generator.ColumnValueGenerator; +import org.apache.druid.segment.generator.GeneratorColumnSchema; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.List; + +@State(Scope.Benchmark) +public class BaseColumnarLongsFromGeneratorBenchmark extends BaseColumnarLongsBenchmark +{ + static int SEED = 1; + + @Param({ + "0.0", +// "0.5", +// "0.95" + }) + double zeroProbability; + + @Param({"5000000"}) + int rows; + + @Param({ +// "enumerated-0-1", +// "enumerated-full", +// "normal", +// "sequential-1000", +// "sequential-unique", + "uniform-1", + "uniform-2", +// "uniform-3", + "uniform-4", + "uniform-8", + "uniform-12", + "uniform-16", + "uniform-20", + "uniform-24", + "uinform-32", + "uniform-40", + "uniform-48", + "uniform-56", + "uniform-64", +// "zipf-low-100", +// "zipf-low-100000", +// "zipf-low-32-bit", +// "zipf-high-100", +// "zipf-high-100000", +// "zipf-high-32-bit" + }) + String distribution; + + void initializeValues() throws IOException + { + vals = new long[rows]; + final String filename = getGeneratorValueFilename(distribution, rows, zeroProbability); + File dir = getTmpDir(); + File dataFile = new File(dir, filename); + + if (dataFile.exists()) { + System.out.println("Data files already exist, re-using"); + try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8)) { + int lineNum = 0; + String line; + while ((line = br.readLine()) != null) { + vals[lineNum] = Long.parseLong(line); + if (vals[lineNum] < minValue) { + minValue = vals[lineNum]; + } + if (vals[lineNum] > maxValue) { + maxValue = vals[lineNum]; + } + lineNum++; + } + } + } else { + try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) { + ColumnValueGenerator valueGenerator = makeGenerator(distribution, rows, zeroProbability); + + for (int i = 0; i < rows; i++) { + long value; + Object rowValue = valueGenerator.generateRowValue(); + value = rowValue != null ? (long) rowValue : 0; + vals[i] = value; + if (vals[i] < minValue) { + minValue = vals[i]; + } + if (vals[i] > maxValue) { + maxValue = vals[i]; + } + writer.write(vals[i] + "\n"); + } + } + } + } + + static ColumnValueGenerator makeGenerator( + String distribution, + int rows, + double zeroProbability + ) + { + List enumerated; + List probability; + switch (distribution) { + case "enumerated-0-1": + enumerated = ImmutableList.of(0, 1); + probability = ImmutableList.of(0.6, 0.4); + return GeneratorColumnSchema.makeEnumerated( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + enumerated, + probability + ).makeGenerator(SEED); + case "enumerated-full": + enumerated = ImmutableList.of( + 0, + 1, + Long.MAX_VALUE - 1, + Long.MIN_VALUE + 1, + Long.MIN_VALUE / 2, + Long.MAX_VALUE / 2 + ); + probability = ImmutableList.of(0.4, 0.2, 0.1, 0.1, 0.1, 0.1); + return GeneratorColumnSchema.makeEnumerated( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + enumerated, + probability + ).makeGenerator(SEED); + case "normal": + return GeneratorColumnSchema.makeNormal( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 1.0, + (double) Integer.MAX_VALUE, + true + ).makeGenerator(SEED); + case "sequential-1000": + return GeneratorColumnSchema.makeSequential( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + Integer.MAX_VALUE - 1001, + Integer.MAX_VALUE - 1 + ).makeGenerator(SEED); + case "sequential-unique": + return GeneratorColumnSchema.makeSequential( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + rows + ).makeGenerator(SEED); + case "uniform-1": + return GeneratorColumnSchema.makeDiscreteUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + 1 + ).makeGenerator(SEED); + case "uniform-2": + return GeneratorColumnSchema.makeDiscreteUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + 4 + ).makeGenerator(SEED); + case "uniform-3": + return GeneratorColumnSchema.makeDiscreteUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 1000000, + 1000008 + ).makeGenerator(SEED); + case "uniform-4": + return GeneratorColumnSchema.makeDiscreteUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + 1 << 4 + ).makeGenerator(SEED); + case "uniform-8": + return GeneratorColumnSchema.makeDiscreteUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + 1 << 8 + ).makeGenerator(SEED); + case "uniform-12": + return GeneratorColumnSchema.makeDiscreteUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + 1 << 12 + ).makeGenerator(SEED); + case "uniform-16": + return GeneratorColumnSchema.makeDiscreteUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + 1 << 16 + ).makeGenerator(SEED); + case "uniform-20": + return GeneratorColumnSchema.makeContinuousUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + 1 << 20 + ).makeGenerator(SEED); + case "uniform-24": + return GeneratorColumnSchema.makeContinuousUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + (1 << 24) - 1 + ).makeGenerator(SEED); + case "uinform-32": + return GeneratorColumnSchema.makeContinuousUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + Integer.MAX_VALUE - 1 + ).makeGenerator(SEED); + case "uniform-40": + return GeneratorColumnSchema.makeContinuousUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0L, + (1L << 40) - 1 + ).makeGenerator(SEED); + case "uniform-48": + return GeneratorColumnSchema.makeContinuousUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + (1L << 48) - 1 + ).makeGenerator(SEED); + case "uniform-56": + return GeneratorColumnSchema.makeContinuousUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + (1L << 56 - 1) + ).makeGenerator(SEED); + case "uniform-64": + return GeneratorColumnSchema.makeContinuousUniform( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + Long.MAX_VALUE - 1 + ).makeGenerator(SEED); + case "zipf-low-100": + return GeneratorColumnSchema.makeLazyZipf( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + 100, + 1d + ).makeGenerator(SEED); + case "zipf-low-100000": + return GeneratorColumnSchema.makeLazyZipf( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + -50000, + 50000, + 1d + ).makeGenerator(SEED); + case "zipf-low-32-bit": + return GeneratorColumnSchema.makeLazyZipf( + distribution, + ValueType.LONG, + true, + 1, + 0d, + 0, + Integer.MAX_VALUE, + 1d + ).makeGenerator(SEED); + case "zipf-high-100": + return GeneratorColumnSchema.makeLazyZipf( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + 0, + 100, + 3d + ).makeGenerator(SEED); + case "zipf-high-100000": + return GeneratorColumnSchema.makeLazyZipf( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + -50000, + 50000, + 3d + ).makeGenerator(SEED); + case "zipf-high-32-bit": + return GeneratorColumnSchema.makeLazyZipf( + distribution, + ValueType.LONG, + true, + 1, + 0d, + 0, + Integer.MAX_VALUE, + 3d + ).makeGenerator(SEED); + } + throw new IllegalArgumentException("unknown distribution"); + } + + static String getGeneratorValueFilename(String distribution, int rows, double nullProbability) + { + return StringUtils.format("values-%s-%s-%s.bin", distribution, rows, nullProbability); + } + + static String getGeneratorEncodedFilename(String encoding, String distribution, int rows, double nullProbability) + { + return StringUtils.format("%s-%s-%s-%s.bin", encoding, distribution, rows, nullProbability); + } + + static File getTmpDir() + { + final String dirPath = "tmp/encoding/longs/"; + File dir = new File(dirPath); + dir.mkdirs(); + return dir; + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java new file mode 100644 index 000000000000..0fa04f15a715 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.compression; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.api.client.util.Lists; +import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.LongsColumn; +import org.apache.druid.segment.column.ValueType; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Set; + +@State(Scope.Benchmark) +public class BaseColumnarLongsFromSegmentsBenchmark extends BaseColumnarLongsBenchmark +{ + //CHECKSTYLE.OFF: Regexp + // twitter-ticker + @Param({ + "__time", + "followers", + "friends", + "max_followers", + "max_retweets", + "max_statuses", + "retweets", + "statuses", + "tweets" + }) + String columnName; + + @Param({"3259585"}) + int rows; + + + @Param({"tmp/segments/twitter-ticker-1/"}) + String segmentPath; + + @Param({"twitter-ticker"}) + String segmentName; + + + //CHECKSTYLE.ON: Regexp + + private static IndexIO INDEX_IO; + public static ObjectMapper JSON_MAPPER; + + void initializeValues() throws IOException + { + initializeSegmentValueIntermediaryFile(); + File dir = getTmpDir(); + File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName)); + + ArrayList values = Lists.newArrayList(); + try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8)) { + String line; + while ((line = br.readLine()) != null) { + long value = Long.parseLong(line); + if (value < minValue) { + minValue = value; + } + if (value > maxValue) { + maxValue = value; + } + values.add(value); + rows++; + } + } + + vals = values.stream().mapToLong(i -> i).toArray(); + } + + + String getColumnDataFileName(String segmentName, String columnName) + { + return StringUtils.format("%s-longs-%s.txt", segmentName, columnName); + } + + String getColumnEncodedFileName(String encoding, String segmentName, String columnName) + { + return StringUtils.format("%s-%s-longs-%s.bin", encoding, segmentName, columnName); + } + + File getTmpDir() + { + final String dirPath = StringUtils.format("tmp/encoding/%s", segmentName); + File dir = new File(dirPath); + dir.mkdirs(); + return dir; + } + + /** + * writes column values to text file, 1 per line + * + * @throws IOException + */ + void initializeSegmentValueIntermediaryFile() throws IOException + { + File dir = getTmpDir(); + File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName)); + + if (!dataFile.exists()) { + JSON_MAPPER = new DefaultObjectMapper(); + INDEX_IO = new IndexIO( + JSON_MAPPER, + () -> 0 + ); + try (final QueryableIndex index = INDEX_IO.loadIndex(new File(segmentPath))) { + final Set columnNames = Sets.newLinkedHashSet(); + columnNames.add(ColumnHolder.TIME_COLUMN_NAME); + Iterables.addAll(columnNames, index.getColumnNames()); + final ColumnHolder column = index.getColumnHolder(columnName); + final ColumnCapabilities capabilities = column.getCapabilities(); + final ValueType columnType = capabilities.getType(); + try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) { + if (columnType != ValueType.LONG) { + throw new RuntimeException("Invalid column type, expected 'Long'"); + } + LongsColumn theColumn = (LongsColumn) column.getColumn(); + + + for (int i = 0; i < theColumn.length(); i++) { + long value = theColumn.getLongSingleValueRow(i); + writer.write(value + "\n"); + } + } + } + } + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromGeneratorBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromGeneratorBenchmark.java new file mode 100644 index 000000000000..e27a5edfeb37 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromGeneratorBenchmark.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.compression; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.results.format.ResultFormatType; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.File; +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.TimeUnit; + +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 1) +@Measurement(iterations = 2) +public class ColumnarLongsEncodeDataFromGeneratorBenchmark extends BaseColumnarLongsFromGeneratorBenchmark +{ + @Setup + public void setup() throws Exception + { + initializeValues(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void encodeColumn(Blackhole blackhole) throws IOException + { + File dir = getTmpDir(); + File columnDataFile = new File(dir, getGeneratorEncodedFilename(encoding, distribution, rows, zeroProbability)); + columnDataFile.delete(); + FileChannel output = + FileChannel.open(columnDataFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE); + + int size = encodeToFile(vals, encoding, output); + EncodingSizeProfiler.encodedSize = size; + blackhole.consume(size); + output.close(); + } + + public static void main(String[] args) throws RunnerException + { + Options opt = new OptionsBuilder() + .include(ColumnarLongsEncodeDataFromGeneratorBenchmark.class.getSimpleName()) + .addProfiler(EncodingSizeProfiler.class) + .resultFormat(ResultFormatType.CSV) + .result("column-longs-encode-speed.csv") + .build(); + + new Runner(opt).run(); + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java new file mode 100644 index 000000000000..f87156d757f8 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.compression; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.results.format.ResultFormatType; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.File; +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.TimeUnit; + +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 1) +@Measurement(iterations = 1) +public class ColumnarLongsEncodeDataFromSegmentBenchmark extends BaseColumnarLongsFromSegmentsBenchmark +{ + @Setup + public void setup() throws Exception + { + initializeValues(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void encodeColumn(Blackhole blackhole) throws IOException + { + File dir = getTmpDir(); + File columnDataFile = new File(dir, getColumnEncodedFileName(encoding, segmentName, columnName)); + columnDataFile.delete(); + FileChannel output = + FileChannel.open(columnDataFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE); + + int size = BaseColumnarLongsBenchmark.encodeToFile(vals, encoding, output); + EncodingSizeProfiler.encodedSize = size; + blackhole.consume(size); + output.close(); + } + + public static void main(String[] args) throws RunnerException + { + System.out.println("main happened"); + Options opt = new OptionsBuilder() + .include(ColumnarLongsEncodeDataFromSegmentBenchmark.class.getSimpleName()) + .addProfiler(EncodingSizeProfiler.class) + .resultFormat(ResultFormatType.CSV) + .result("column-longs-encode-speed-segments.csv") + .build(); + + new Runner(opt).run(); + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java new file mode 100644 index 000000000000..70553df58bd5 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.compression; + +import com.google.common.collect.Maps; +import org.apache.druid.java.util.common.FileUtils; +import org.apache.druid.segment.data.ColumnarLongs; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.results.format.ResultFormatType; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 3) +@Measurement(iterations = 5) +public class ColumnarLongsSelectRowsFromGeneratorBenchmark extends BaseColumnarLongsFromGeneratorBenchmark +{ + private Map decoders; + private Map encodedSize; + + // Number of rows to read, the test will read random rows +// @Param({"0.1", "0.05", "0.95", "1.0"}) + @Param({ +// "0.1", +// "0.5", +// "0.95", + "1.0" + }) + private double filteredRowCountPercentage; + + @Setup + public void setup() throws Exception + { + decoders = Maps.newHashMap(); + encodedSize = Maps.newHashMap(); + + setupFromFile(encoding); + setupFilters(rows, filteredRowCountPercentage); + + // uncomment me to load multiple encoded files for sanity check + //CHECKSTYLE.OFF: Regexp +// ImmutableList all = ImmutableList.of("lz4-longs", "lz4-auto"); +// for (String _enc : all) { +// if (!_enc.equalsIgnoreCase(encoding)) { +// setupFromFile(_enc); +// } +// } +// +// checkSanity(decoders, all, rows); + //CHECKSTYLE.ON: Regexp + } + + @TearDown + public void teardown() throws Exception + { + for (ColumnarLongs longs : decoders.values()) { + longs.close(); + } + } + + private void setupFromFile(String encoding) throws IOException + { + File dir = getTmpDir(); + File compFile = new File(dir, getGeneratorEncodedFilename(encoding, distribution, rows, zeroProbability)); + ByteBuffer buffer = FileUtils.map(compFile).get(); + + int size = (int) compFile.length(); + encodedSize.put(encoding, size); + ColumnarLongs data = createColumnarLongs(encoding, buffer); + decoders.put(encoding, data); + } + +// @Benchmark +// @BenchmarkMode(Mode.AverageTime) +// @OutputTimeUnit(TimeUnit.MICROSECONDS) +// public void selectRows(Blackhole blackhole) +// { +// EncodingSizeProfiler.encodedSize = encodedSize.get(encoding); +// ColumnarLongs encoder = decoders.get(encoding); +// if (filter == null) { +// for (int i = 0; i < rows; i++) { +// blackhole.consume(encoder.get(i)); +// } +// } else { +// for (int i = filter.nextSetBit(0); i >= 0; i = filter.nextSetBit(i + 1)) { +// blackhole.consume(encoder.get(i)); +// } +// } +// } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void selectRowsVectorized(Blackhole blackhole) + { + EncodingSizeProfiler.encodedSize = encodedSize.get(encoding); + ColumnarLongs columnDecoder = decoders.get(encoding); + long[] vector = new long[VECTOR_SIZE]; + while (!vectorOffset.isDone()) { + if (vectorOffset.isContiguous()) { + columnDecoder.get(vector, vectorOffset.getStartOffset(), vectorOffset.getCurrentVectorSize()); + } else { + columnDecoder.get(vector, vectorOffset.getOffsets(), vectorOffset.getCurrentVectorSize()); + } + for (int i = 0 ; i < vectorOffset.getCurrentVectorSize(); i++) { + blackhole.consume(vector[i]); + } + vectorOffset.advance(); + } + blackhole.consume(vector); + blackhole.consume(vectorOffset); + vectorOffset.reset(); + columnDecoder.close(); + } + +// @Benchmark +// @BenchmarkMode(Mode.AverageTime) +// @OutputTimeUnit(TimeUnit.MICROSECONDS) +// public void readVectorizedSequential(Blackhole bh) +// { +// long[] vector = new long[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; +// EncodingSizeProfiler.encodedSize = encodedSize.get(encoding); +// ColumnarLongs columnDecoder = decoders.get(encoding); +// int count = columnDecoder.size(); +// for (int i = 0; i < count; i++) { +// if (i % vector.length == 0) { +// columnDecoder.get(vector, i, Math.min(vector.length, count - i)); +// } +// bh.consume(vector[i % vector.length]); +// } +// columnDecoder.close(); +// } + + public static void main(String[] args) throws RunnerException + { + Options opt = new OptionsBuilder() + .include(ColumnarLongsSelectRowsFromGeneratorBenchmark.class.getSimpleName()) + .addProfiler(EncodingSizeProfiler.class) + .resultFormat(ResultFormatType.CSV) + .result("column-longs-select-speed.csv") + .build(); + + new Runner(opt).run(); + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java new file mode 100644 index 000000000000..00c84fe21f2a --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.compression; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; +import com.google.common.io.Files; +import org.apache.druid.segment.data.ColumnarLongs; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.results.format.ResultFormatType; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 1) +@Measurement(iterations = 1) +public class ColumnarLongsSelectRowsFromSegmentBenchmark extends BaseColumnarLongsFromSegmentsBenchmark +{ + private Map decoders; + private Map encodedSize; + + // Number of rows to read, the test will read random rows +// @Param({"0.01", "0.1", "0.33", "0.66", "0.95", "1.0"}) + @Param({"1.0"}) + private double filteredRowCountPercentage; + + @Setup + public void setup() throws Exception + { + decoders = Maps.newHashMap(); + encodedSize = Maps.newHashMap(); + setupFilters(rows, filteredRowCountPercentage); + + setupFromFile(encoding); + + + // uncomment me to load some encoding files to cross reference values for sanity check + //CHECKSTYLE.OFF: Regexp + List all = ImmutableList.of("lz4-longs", "lz4-auto"); + for (String _enc : all) { + if (!_enc.equals(encoding)) { + setupFromFile(_enc); + } + } + + checkSanity(decoders, all, rows); + } + + @TearDown + public void teardown() + { + for (ColumnarLongs longs : decoders.values()) { + longs.close(); + } + } + + private void setupFromFile(String encoding) throws IOException + { + File dir = getTmpDir(); + File compFile = new File(dir, getColumnEncodedFileName(encoding, segmentName, columnName)); + ByteBuffer buffer = Files.map(compFile); + + int size = (int) compFile.length(); + encodedSize.put(encoding, size); + ColumnarLongs data = BaseColumnarLongsBenchmark.createColumnarLongs(encoding, buffer); + decoders.put(encoding, data); + } + +// @Benchmark +// @BenchmarkMode(Mode.AverageTime) +// @OutputTimeUnit(TimeUnit.MICROSECONDS) +// public void selectRows(Blackhole blackhole) +// { +// EncodingSizeProfiler.encodedSize = encodedSize.get(encoding); +// ColumnarLongs encoder = decoders.get(encoding); +// if (filter == null) { +// for (int i = 0; i < rows; i++) { +// blackhole.consume(encoder.get(i)); +// } +// } else { +// for (int i = filter.nextSetBit(0); i >= 0; i = filter.nextSetBit(i + 1)) { +// blackhole.consume(encoder.get(i)); +// } +// } +// } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void selectRowsVectorized(Blackhole blackhole) + { + EncodingSizeProfiler.encodedSize = encodedSize.get(encoding); + ColumnarLongs columnDecoder = decoders.get(encoding); + long[] vector = new long[VECTOR_SIZE]; + while (!vectorOffset.isDone()) { + if (vectorOffset.isContiguous()) { + columnDecoder.get(vector, vectorOffset.getStartOffset(), vectorOffset.getCurrentVectorSize()); + } else { + columnDecoder.get(vector, vectorOffset.getOffsets(), vectorOffset.getCurrentVectorSize()); + } + for (int i = 0 ; i < vectorOffset.getCurrentVectorSize(); i++) { + blackhole.consume(vector[i]); + } + vectorOffset.advance(); + } + blackhole.consume(vector); + blackhole.consume(vectorOffset); + vectorOffset.reset(); + columnDecoder.close(); + } + + + public static void main(String[] args) throws RunnerException + { + System.out.println("main happened"); + Options opt = new OptionsBuilder() + .include(ColumnarLongsSelectRowsFromSegmentBenchmark.class.getSimpleName()) + .addProfiler(EncodingSizeProfiler.class) + .resultFormat(ResultFormatType.CSV) + .result("column-longs-select-speed-segments.csv") + .build(); + + new Runner(opt).run(); + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/CompressedColumnarIntsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/CompressedColumnarIntsBenchmark.java similarity index 99% rename from benchmarks/src/test/java/org/apache/druid/benchmark/CompressedColumnarIntsBenchmark.java rename to benchmarks/src/test/java/org/apache/druid/benchmark/compression/CompressedColumnarIntsBenchmark.java index 5e283ae55d9c..bbcd2fc13950 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/CompressedColumnarIntsBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/CompressedColumnarIntsBenchmark.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.benchmark; +package org.apache.druid.benchmark.compression; import it.unimi.dsi.fastutil.ints.IntArrayList; import org.apache.druid.common.config.NullHandling; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/CompressedVSizeColumnarMultiIntsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/CompressedVSizeColumnarMultiIntsBenchmark.java similarity index 99% rename from benchmarks/src/test/java/org/apache/druid/benchmark/CompressedVSizeColumnarMultiIntsBenchmark.java rename to benchmarks/src/test/java/org/apache/druid/benchmark/compression/CompressedVSizeColumnarMultiIntsBenchmark.java index bbf5b537c839..5eb88195f92e 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/CompressedVSizeColumnarMultiIntsBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/CompressedVSizeColumnarMultiIntsBenchmark.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.benchmark; +package org.apache.druid.benchmark.compression; import com.google.common.base.Function; import com.google.common.collect.Iterables; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/EncodingSizeProfiler.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/EncodingSizeProfiler.java new file mode 100644 index 000000000000..5a8baa587eac --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/EncodingSizeProfiler.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.compression; + +import org.openjdk.jmh.infra.BenchmarkParams; +import org.openjdk.jmh.infra.IterationParams; +import org.openjdk.jmh.profile.InternalProfiler; +import org.openjdk.jmh.results.AggregationPolicy; +import org.openjdk.jmh.results.IterationResult; +import org.openjdk.jmh.results.Result; +import org.openjdk.jmh.results.ScalarResult; + +import java.util.Collection; +import java.util.Collections; + +public class EncodingSizeProfiler implements InternalProfiler +{ + public static int encodedSize; + + @Override + public void beforeIteration( + BenchmarkParams benchmarkParams, + IterationParams iterationParams + ) + { + } + + @Override + public Collection afterIteration( + BenchmarkParams benchmarkParams, + IterationParams iterationParams, + IterationResult result + ) + { + return Collections.singletonList( + new ScalarResult("encoded size", encodedSize, "bytes", AggregationPolicy.MAX) + ); + } + + @Override + public String getDescription() + { + return "super janky encoding size result collector"; + } +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/FloatCompressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/FloatCompressionBenchmark.java similarity index 98% rename from benchmarks/src/test/java/org/apache/druid/benchmark/FloatCompressionBenchmark.java rename to benchmarks/src/test/java/org/apache/druid/benchmark/compression/FloatCompressionBenchmark.java index 1663c0eba373..a8063ab667a4 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/FloatCompressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/FloatCompressionBenchmark.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.benchmark; +package org.apache.druid.benchmark.compression; import com.google.common.base.Supplier; import org.apache.druid.common.config.NullHandling; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/FloatCompressionBenchmarkFileGenerator.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/FloatCompressionBenchmarkFileGenerator.java similarity index 99% rename from benchmarks/src/test/java/org/apache/druid/benchmark/FloatCompressionBenchmarkFileGenerator.java rename to benchmarks/src/test/java/org/apache/druid/benchmark/compression/FloatCompressionBenchmarkFileGenerator.java index 424f2977104c..82709a6c4b3b 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/FloatCompressionBenchmarkFileGenerator.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/FloatCompressionBenchmarkFileGenerator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.benchmark; +package org.apache.druid.benchmark.compression; import com.google.common.collect.ImmutableList; import org.apache.druid.common.config.NullHandling; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/LongCompressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/LongCompressionBenchmark.java similarity index 74% rename from benchmarks/src/test/java/org/apache/druid/benchmark/LongCompressionBenchmark.java rename to benchmarks/src/test/java/org/apache/druid/benchmark/compression/LongCompressionBenchmark.java index 7bd057311a4d..88242948c5d9 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/LongCompressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/LongCompressionBenchmark.java @@ -17,12 +17,13 @@ * under the License. */ -package org.apache.druid.benchmark; +package org.apache.druid.benchmark.compression; import com.google.common.base.Supplier; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.MappedByteBufferHandler; +import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.data.ColumnarLongs; import org.apache.druid.segment.data.CompressedColumnarLongsSupplier; import org.openjdk.jmh.annotations.Benchmark; @@ -42,7 +43,6 @@ import java.io.File; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; /** @@ -50,8 +50,8 @@ */ @State(Scope.Benchmark) @Fork(value = 1) -@Warmup(iterations = 10) -@Measurement(iterations = 25) +@Warmup(iterations = 3) +@Measurement(iterations = 5) @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MILLISECONDS) public class LongCompressionBenchmark @@ -69,7 +69,7 @@ public class LongCompressionBenchmark @Param({"auto", "longs"}) private static String format; - @Param({"lz4", "none"}) + @Param({"lz4"}) private static String strategy; private Supplier supplier; @@ -92,26 +92,41 @@ public void tearDown() bufferHandler.close(); } +// @Benchmark +// public void readContinuous(Blackhole bh) +// { +// ColumnarLongs columnarLongs = supplier.get(); +// int count = columnarLongs.size(); +// for (int i = 0; i < count; i++) { +// bh.consume(columnarLongs.get(i)); +// } +// columnarLongs.close(); +// } + @Benchmark - public void readContinuous(Blackhole bh) + public void readVectorizedSequential(Blackhole bh) { + long[] vector = new long[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; ColumnarLongs columnarLongs = supplier.get(); int count = columnarLongs.size(); for (int i = 0; i < count; i++) { - bh.consume(columnarLongs.get(i)); + if (i % vector.length == 0) { + columnarLongs.get(vector, i, Math.min(vector.length, count - i)); + } + bh.consume(vector[i % vector.length]); } columnarLongs.close(); } - @Benchmark - public void readSkipping(Blackhole bh) - { - ColumnarLongs columnarLongs = supplier.get(); - int count = columnarLongs.size(); - for (int i = 0; i < count; i += ThreadLocalRandom.current().nextInt(2000)) { - bh.consume(columnarLongs.get(i)); - } - columnarLongs.close(); - } +// @Benchmark +// public void readSkipping(Blackhole bh) +// { +// ColumnarLongs columnarLongs = supplier.get(); +// int count = columnarLongs.size(); +// for (int i = 0; i < count; i += ThreadLocalRandom.current().nextInt(2000)) { +// bh.consume(columnarLongs.get(i)); +// } +// columnarLongs.close(); +// } } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/LongCompressionBenchmarkFileGenerator.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/LongCompressionBenchmarkFileGenerator.java similarity index 99% rename from benchmarks/src/test/java/org/apache/druid/benchmark/LongCompressionBenchmarkFileGenerator.java rename to benchmarks/src/test/java/org/apache/druid/benchmark/compression/LongCompressionBenchmarkFileGenerator.java index b9bca954de45..55d5f6b82bbb 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/LongCompressionBenchmarkFileGenerator.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/LongCompressionBenchmarkFileGenerator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.benchmark; +package org.apache.druid.benchmark.compression; import com.google.common.collect.ImmutableList; import org.apache.druid.common.config.NullHandling; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/VSizeSerdeBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/VSizeSerdeBenchmark.java similarity index 99% rename from benchmarks/src/test/java/org/apache/druid/benchmark/VSizeSerdeBenchmark.java rename to benchmarks/src/test/java/org/apache/druid/benchmark/compression/VSizeSerdeBenchmark.java index 1738046205e1..995925e2d69f 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/VSizeSerdeBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/VSizeSerdeBenchmark.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.benchmark; +package org.apache.druid.benchmark.compression; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.FileUtils; diff --git a/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java b/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java index cc1e25197bbb..435aa2ddfd1a 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java +++ b/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java @@ -68,23 +68,13 @@ public long read(int index) @Override public void read(long[] out, int outPosition, int startIndex, int length) { - deserializer.get(out, outPosition, startIndex, length); - - for (int i = 0; i < length; i++) { - out[outPosition + i] += base; - } + deserializer.getDelta(out, outPosition, startIndex, length, base); } @Override public int read(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) { - final int len = deserializer.get(out, outPosition, indexes, length, indexOffset, limit); - - for (int i = 0; i < len; i++) { - out[outPosition + i] += base; - } - - return len; + return deserializer.getDelta(out, outPosition, indexes, length, indexOffset, limit, base); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java b/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java index 7035be3dc4f5..6a5e17b1080b 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java +++ b/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java @@ -74,23 +74,13 @@ public long read(int index) @Override public void read(long[] out, int outPosition, int startIndex, int length) { - deserializer.get(out, outPosition, startIndex, length); - - for (int i = 0; i < length; i++) { - out[outPosition + i] = table[(int) out[outPosition + i]]; - } + deserializer.getTable(out, outPosition, startIndex, length, table); } @Override public int read(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) { - final int len = deserializer.get(out, outPosition, indexes, length, indexOffset, limit); - - for (int i = 0; i < len; i++) { - out[outPosition + i] = table[(int) out[outPosition + i]]; - } - - return len; + return deserializer.getTable(out, outPosition, indexes, length, indexOffset, limit, table); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java index 01837dd089ba..500c9af22c5d 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java @@ -19,8 +19,8 @@ package org.apache.druid.segment.data; -import org.apache.datasketches.memory.Memory; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.UOE; import javax.annotation.Nullable; import java.io.Closeable; @@ -330,7 +330,7 @@ public void write(long value) throws IOException curByte = (byte) value; first = false; } else { - curByte = (byte) ((curByte << 4) | ((value >> (numBytes << 3)) & 0xF)); + curByte = (byte) ((curByte << 4) | ((value >>> (numBytes << 3)) & 0xF)); buffer.put(curByte); first = true; } @@ -417,14 +417,14 @@ public interface LongDeserializer { long get(int index); - default void get(long[] out, int outPosition, int startIndex, int length) + default void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) { for (int i = 0; i < length; i++) { - out[outPosition + i] = get(startIndex + i); + out[outPosition + i] = delta + get(startIndex + i); } } - default int get(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + default int getDelta(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long delta) { for (int i = 0; i < length; i++) { int index = indexes[outPosition + i] - indexOffset; @@ -432,7 +432,26 @@ default int get(long[] out, int outPosition, int[] indexes, int length, int inde return i; } - out[outPosition + i] = get(index); + out[outPosition + i] = delta + get(index); + } + + return length; + } + + default void getTable(long[] out, int outPosition, int startIndex, int length, long[] table) + { + throw new UOE("Table decoding not supported for %s", this.getClass().getSimpleName()); + } + + default int getTable(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long[] table) + { + for (int i = 0; i < length; i++) { + int index = indexes[outPosition + i] - indexOffset; + if (index >= limit) { + return i; + } + + out[outPosition + i] = table[(int) get(index)]; } return length; @@ -441,89 +460,322 @@ default int get(long[] out, int outPosition, int[] indexes, int length, int inde private static final class Size1Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size1Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { int shift = 7 - (index & 7); - return (buffer.getByte((index >> 3)) >> shift) & 1; + return (buffer.get(offset + (index >> 3)) >> shift) & 1; + } + + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + int index = startIndex; + int i = 0; + + // byte align + while ((index & 0x7) != 0 && i < length) { + out[outPosition + i++] = delta + get(index++); + } + for ( ; i + Byte.SIZE < length; index += Byte.SIZE) { + final byte unpack = buffer.get(offset + (index >> 3)); + out[outPosition + i++] = delta + (unpack >> 7) & 1; + out[outPosition + i++] = delta + (unpack >> 6) & 1; + out[outPosition + i++] = delta + (unpack >> 5) & 1; + out[outPosition + i++] = delta + (unpack >> 4) & 1; + out[outPosition + i++] = delta + (unpack >> 3) & 1; + out[outPosition + i++] = delta + (unpack >> 2) & 1; + out[outPosition + i++] = delta + (unpack >> 1) & 1; + out[outPosition + i++] = delta + unpack & 1; + } + while (i < length) { + out[outPosition + i++] = delta + get(index++); + } + } + + @Override + public void getTable(long[] out, int outPosition, int startIndex, int length, long[] table) + { + int index = startIndex; + int i = 0; + + // byte align + while ((index & 0x7) != 0 && i < length) { + out[outPosition + i++] = table[(int) get(index++)]; + } + for ( ; i + Byte.SIZE < length; index += Byte.SIZE) { + final byte unpack = buffer.get(offset + (index >> 3)); + out[outPosition + i++] = table[(unpack >> 7) & 1]; + out[outPosition + i++] = table[(unpack >> 6) & 1]; + out[outPosition + i++] = table[(unpack >> 5) & 1]; + out[outPosition + i++] = table[(unpack >> 4) & 1]; + out[outPosition + i++] = table[(unpack >> 3) & 1]; + out[outPosition + i++] = table[(unpack >> 2) & 1]; + out[outPosition + i++] = table[(unpack >> 1) & 1]; + out[outPosition + i++] = table[unpack & 1]; + } + while (i < length) { + out[outPosition + i++] = table[(int) get(index++)]; + } } } private static final class Size2Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size2Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { int shift = 6 - ((index & 3) << 1); - return (buffer.getByte((index >> 2)) >> shift) & 3; + return (buffer.get(offset + (index >> 2)) >> shift) & 3; + } + + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + int index = startIndex; + int i = 0; + + // byte align + while ((index & 0x3) != 0 && i < length) { + out[outPosition + i++] = delta + get(index++); + } +// for ( ; i + 4 < length; index += 4) { +// final byte unpack = buffer.get(offset + (index >> 2)); +// out[outPosition + i++] = delta + (unpack >> 6) & 3; +// out[outPosition + i++] = delta + (unpack >> 4) & 3; +// out[outPosition + i++] = delta + (unpack >> 2) & 3; +// out[outPosition + i++] = delta + unpack & 3; +// } + for ( ; i + 8 < length; index += 8) { + final short unpack = buffer.getShort(offset + (index >> 2)); + out[outPosition + i++] = delta + (unpack >> 14) & 3; + out[outPosition + i++] = delta + (unpack >> 12) & 3; + out[outPosition + i++] = delta + (unpack >> 10) & 3; + out[outPosition + i++] = delta + (unpack >> 8) & 3; + out[outPosition + i++] = delta + (unpack >> 6) & 3; + out[outPosition + i++] = delta + (unpack >> 4) & 3; + out[outPosition + i++] = delta + (unpack >> 2) & 3; + out[outPosition + i++] = delta + unpack & 3; + } + while (i < length) { + out[outPosition + i++] = delta + get(index++); + } + } + + @Override + public void getTable(long[] out, int outPosition, int startIndex, int length, long[] table) + { + int index = startIndex; + int i = 0; + + // byte align + while ((index & 0x3) != 0 && i < length) { + out[outPosition + i++] = table[(int) get(index++)]; + } +// for ( ; i + 4 < length; index += 4) { +// final byte unpack = buffer.get(offset + (index >> 2)); +// out[outPosition + i++] = table[(unpack >> 6) & 3]; +// out[outPosition + i++] = table[(unpack >> 4) & 3]; +// out[outPosition + i++] = table[(unpack >> 2) & 3]; +// out[outPosition + i++] = table[unpack & 3]; +// } + for ( ; i + 8 < length; index += 8) { + final short unpack = buffer.getShort(offset + (index >> 2)); + out[outPosition + i++] = table[(unpack >> 14) & 3]; + out[outPosition + i++] = table[(unpack >> 12) & 3]; + out[outPosition + i++] = table[(unpack >> 10) & 3]; + out[outPosition + i++] = table[(unpack >> 8) & 3]; + out[outPosition + i++] = table[(unpack >> 6) & 3]; + out[outPosition + i++] = table[(unpack >> 4) & 3]; + out[outPosition + i++] = table[(unpack >> 2) & 3]; + out[outPosition + i++] = table[unpack & 3]; + } + while (i < length) { + out[outPosition + i++] = table[(int) get(index++)]; + } } } private static final class Size4Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size4Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { int shift = ((index + 1) & 1) << 2; - return (buffer.getByte((index >> 1)) >> shift) & 0xF; + return (buffer.get(offset + (index >> 1)) >> shift) & 0xF; + } + + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + int index = startIndex; + int i = 0; + + // byte align + while ((index & 0x1) != 0 && i < length) { + out[outPosition + i++] = delta + get(index++) & 0xF; + } +// for ( ; i + 2 < length; index += 2) { +// final byte unpack = buffer.get(offset + (index >> 1)); +// out[outPosition + i++] = delta + (unpack >> 4) & 0xF; +// out[outPosition + i++] = delta + unpack & 0xF; +// } + for ( ; i + 8 < length; index += 8) { + final int unpack = buffer.getInt(offset + (index >> 1)); + out[outPosition + i++] = delta + (unpack >> 28) & 0xF; + out[outPosition + i++] = delta + (unpack >> 24) & 0xF; + out[outPosition + i++] = delta + (unpack >> 20) & 0xF; + out[outPosition + i++] = delta + (unpack >> 16) & 0xF; + out[outPosition + i++] = delta + (unpack >> 12) & 0xF; + out[outPosition + i++] = delta + (unpack >> 8) & 0xF; + out[outPosition + i++] = delta + (unpack >> 4) & 0xF; + out[outPosition + i++] = delta + unpack & 0xF; + } + while (i < length) { + out[outPosition + i++] = delta + get(index++); + } + } + + @Override + public void getTable(long[] out, int outPosition, int startIndex, int length, long[] table) + { + int index = startIndex; + int i = 0; + + // byte align + while ((index & 0x1) != 0 && i < length) { + out[outPosition + i++] = table[(int) get(index++)]; + } +// for ( ; i + 2 < length; index += 2) { +// final byte unpack = buffer.get(offset + (index >> 1)); +// out[outPosition + i++] = table[(unpack >> 4) & 0xF]; +// out[outPosition + i++] = table[unpack & 0xF]; +// } + for ( ; i + 8 < length; index += 8) { + final int unpack = buffer.getInt(offset + (index >> 1)); + out[outPosition + i++] = table[(unpack >> 28) & 0xF]; + out[outPosition + i++] = table[(unpack >> 24) & 0xF]; + out[outPosition + i++] = table[(unpack >> 20) & 0xF]; + out[outPosition + i++] = table[(unpack >> 16) & 0xF]; + out[outPosition + i++] = table[(unpack >> 12) & 0xF]; + out[outPosition + i++] = table[(unpack >> 8) & 0xF]; + out[outPosition + i++] = table[(unpack >> 4) & 0xF]; + out[outPosition + i++] = table[unpack & 0xF]; + } + while (i < length) { + out[outPosition + i++] = table[(int) get(index++)]; + } } } private static final class Size8Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size8Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { - return buffer.getByte(index) & 0xFF; + return buffer.get(offset + index) & 0xFF; + } + + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + for (int i = 0, indexOffset = startIndex; i < length; i++, indexOffset++) { + out[outPosition + i] = delta + buffer.get(offset + indexOffset) & 0xFF; + } +// int i = 0; +// for (int indexOffset = startIndex; i + 8 < length; indexOffset += 8) { +// final long unpack = buffer.getLong(indexOffset); +// out[outPosition + i++] = delta + ((unpack >>> 56) & 0xFF); +// out[outPosition + i++] = delta + ((unpack >>> 48) & 0xFF); +// out[outPosition + i++] = delta + ((unpack >>> 40) & 0xFF); +// out[outPosition + i++] = delta + ((unpack >>> 32) & 0xFF); +// out[outPosition + i++] = delta + ((unpack >>> 24) & 0xFF); +// out[outPosition + i++] = delta + ((unpack >>> 16) & 0xFF); +// out[outPosition + i++] = delta + ((unpack >>> 8) & 0xFF); +// out[outPosition + i++] = delta + (unpack & 0xFF); +// } +// while (i < length) { +// out[outPosition + i] = delta + (int) get(startIndex + i); +// i++; +// } + } + + @Override + public int getDelta(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long base) + { + for (int i = 0; i < length; i++) { + int index = indexes[outPosition + i] - indexOffset; + if (index >= limit) { + return i; + } + + out[outPosition + i] = base + (buffer.get(offset + index) & 0xFF); + } + + return length; } @Override - public void get(long[] out, int outPosition, int startIndex, int length) + public void getTable(long[] out, int outPosition, int startIndex, int length, long[] table) { - long pos = startIndex; - for (int i = 0; i < length; i++, pos += 1) { - out[outPosition + i] = buffer.getByte(pos) & 0xFF; + for (int i = 0, indexOffset = startIndex; i < length; i++, indexOffset++) { + out[outPosition + i] = table[buffer.get(offset + indexOffset) & 0xFF]; } +// int i = 0; +// for (int indexOffset = startIndex; i + 8 < length; indexOffset += 8) { +// out[outPosition + i++] = table[buffer.getByte(indexOffset) & 0xFF]; +// out[outPosition + i++] = table[buffer.getByte(indexOffset + 1) & 0xFF]; +// out[outPosition + i++] = table[buffer.getByte(indexOffset + 2) & 0xFF]; +// out[outPosition + i++] = table[buffer.getByte(indexOffset + 3) & 0xFF]; +// out[outPosition + i++] = table[buffer.getByte(indexOffset + 4) & 0xFF]; +// out[outPosition + i++] = table[buffer.getByte(indexOffset + 5) & 0xFF]; +// out[outPosition + i++] = table[buffer.getByte(indexOffset + 6) & 0xFF]; +// out[outPosition + i++] = table[buffer.getByte(indexOffset + 7) & 0xFF]; +// } +// while (i < length) { +// out[outPosition + i] = table[(int) get(startIndex + i)]; +// i++; +// } } @Override - public int get(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + public int getTable(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long[] table) { for (int i = 0; i < length; i++) { int index = indexes[outPosition + i] - indexOffset; @@ -531,7 +783,7 @@ public int get(long[] out, int outPosition, int[] indexes, int length, int index return i; } - out[outPosition + i] = buffer.getByte(index) & 0xFF; + out[outPosition + i] = table[buffer.get(offset + index) & 0xFF]; } return length; @@ -540,52 +792,106 @@ public int get(long[] out, int outPosition, int[] indexes, int length, int index private static final class Size12Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size12Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { int shift = ((index + 1) & 1) << 2; - int offset = (index * 3) >> 1; - return (buffer.getShort(offset) >> shift) & 0xFFF; + int indexOffset = (index * 3) >> 1; + return (buffer.getShort(offset + indexOffset) >> shift) & 0xFFF; + } + + + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + int i = 0; + int index = startIndex; + // every other value is byte aligned + if ((index & 0x1) != 0) { + out[outPosition + i++] = get(index++); + } + final int unpackSize = Long.BYTES + Integer.BYTES; + for (int indexOffset = (index * 3) >> 1; i + 8 < length; indexOffset += unpackSize) { + final long unpack = buffer.getLong(offset + indexOffset); + final int unpack2 = buffer.getInt(offset + indexOffset + Long.BYTES); + out[outPosition + i++] = delta + ((unpack >> 52) & 0xFFF); + out[outPosition + i++] = delta + ((unpack >> 40) & 0xFFF); + out[outPosition + i++] = delta + ((unpack >> 28) & 0xFFF); + out[outPosition + i++] = delta + ((unpack >> 16) & 0xFFF); + out[outPosition + i++] = delta + ((unpack >> 4) & 0xFFF); + out[outPosition + i++] = delta + (((unpack & 0xF) << 8) | ((unpack2 >> 24) & 0xFF)); + out[outPosition + i++] = delta + ((unpack2 >> 12) & 0xFFF); + out[outPosition + i++] = delta + (unpack2 & 0xFFF); + } + while (i < length) { + out[outPosition + i] = delta + (int) get(startIndex + i); + i++; + } } } private static final class Size16Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size16Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { - return buffer.getShort((long) index << 1) & 0xFFFF; + return buffer.getShort(offset + (index << 1)) & 0xFFFF; } @Override - public void get(long[] out, int outPosition, int startIndex, int length) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) { - long pos = (long) startIndex << 1; - for (int i = 0; i < length; i++, pos += Short.BYTES) { - out[outPosition + i] = buffer.getShort(pos) & 0xFFFF; + for (int i = 0, indexOffset = (startIndex << 1); i < length; i++, indexOffset += Short.BYTES) { + out[outPosition + i] = delta + buffer.getShort(offset + indexOffset) & 0xFFFF; } +// int i = 0; +// final int unpackSize = 8 * Short.BYTES; +// for (int indexOffset = startIndex << 1; i + 8 < length; indexOffset += unpackSize) { +//// final long unpack = buffer.getLong(indexOffset); +//// final long unpack2 = buffer.getLong(indexOffset + Long.BYTES); +//// out[outPosition + i++] = delta + ((unpack >> 48) & 0xFFFF); +//// out[outPosition + i++] = delta + ((unpack >> 32) & 0xFFFF); +//// out[outPosition + i++] = delta + ((unpack >> 16) & 0xFFFF); +//// out[outPosition + i++] = delta + (unpack & 0xFFFF); +//// out[outPosition + i++] = delta + ((unpack2 >> 48) & 0xFFFF); +//// out[outPosition + i++] = delta + ((unpack2 >> 32) & 0xFFFF); +//// out[outPosition + i++] = delta + ((unpack2 >> 16) & 0xFFFF); +//// out[outPosition + i++] = delta + (unpack2 & 0xFFFF); +// out[outPosition + i++] = delta + (buffer.getShort(indexOffset) & 0xFFFF); +// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 2) & 0xFFFF); +// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 4) & 0xFFFF); +// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 6) & 0xFFFF); +// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 8) & 0xFFFF); +// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 10) & 0xFFFF); +// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 12) & 0xFFFF); +// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 14) & 0xFFFF); +// } +// while (i < length) { +// out[outPosition + i] = delta + (int) get(startIndex + i); +// i++; +// } } @Override - public int get(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + public int getDelta(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long base) { for (int i = 0; i < length; i++) { int index = indexes[outPosition + i] - indexOffset; @@ -593,7 +899,30 @@ public int get(long[] out, int outPosition, int[] indexes, int length, int index return i; } - out[outPosition + i] = buffer.getShort((long) index << 1) & 0xFFFF; + out[outPosition + i] = base + buffer.getShort(offset + (index << 1)) & 0xFFFF; + } + + return length; + + } + @Override + public void getTable(long[] out, int outPosition, int startIndex, int length, long[] table) + { + for (int i = 0, indexOffset = (startIndex << 1); i < length; i++, indexOffset += Short.BYTES) { + out[outPosition + i] = table[buffer.getShort(offset + indexOffset) & 0xFFFF]; + } + } + + @Override + public int getTable(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long[] table) + { + for (int i = 0; i < length; i++) { + int index = indexes[outPosition + i] - indexOffset; + if (index >= limit) { + return i; + } + + out[outPosition + i] = table[buffer.getShort(offset + (index << 1)) & 0xFFFF]; } return length; @@ -602,139 +931,315 @@ public int get(long[] out, int outPosition, int[] indexes, int length, int index private static final class Size20Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size20Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { int shift = (((index + 1) & 1) << 2) + 8; - int offset = (index * 5) >> 1; - return (buffer.getInt(offset) >> shift) & 0xFFFFF; + int indexOffset = (index * 5) >> 1; + return (buffer.getInt(offset + indexOffset) >> shift) & 0xFFFFF; + } + + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + int i = 0; + int index = startIndex; + // every other value is byte aligned + if ((index & 0x1) != 0) { + out[outPosition + i++] = get(index++); + } + final int unpackSize = Long.BYTES + Long.BYTES + Integer.BYTES; + for (int indexOffset = (index * 5) >> 1; i + 8 < length; indexOffset += unpackSize) { + final long unpack = buffer.getLong(offset + indexOffset); + final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); + final int unpack3 = buffer.getInt(offset + indexOffset + Long.BYTES + Long.BYTES); + out[outPosition + i++] = delta + ((unpack >>> 44) & 0xFFFFF); + out[outPosition + i++] = delta + ((unpack >>> 24) & 0xFFFFF); + out[outPosition + i++] = delta + ((unpack >>> 4) & 0xFFFFF); + out[outPosition + i++] = delta + (((unpack & 0xF) << 16) | ((unpack2 >>> 48) & 0xFFFF)); + out[outPosition + i++] = delta + ((unpack2 >>> 28) & 0xFFFFF); + out[outPosition + i++] = delta + ((unpack2 >>> 8) & 0xFFFFF); + out[outPosition + i++] = delta + (((unpack2 & 0xFF) << 12) | ((unpack3 >>> 20) & 0xFFF)); + out[outPosition + i++] = delta + (unpack3 & 0xFFFFF); + } + while (i < length) { + out[outPosition + i] = delta + (int) get(startIndex + i); + i++; + } } } private static final class Size24Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size24Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { - return buffer.getInt(index * 3L) >>> 8; + return buffer.getInt(offset + (index * 3)) >>> 8; + } + + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + int i = 0; + final int unpackSize = 3 * Long.BYTES; + for (int indexOffset = startIndex * 3; i + 8 < length; indexOffset += unpackSize) { + final long unpack = buffer.getLong(offset + indexOffset); + final long unpack2 = buffer.getLong(offset +indexOffset + Long.BYTES); + final long unpack3 = buffer.getLong(offset + indexOffset + Long.BYTES + Long.BYTES); + out[outPosition + i++] = delta + ((unpack >>> 40) & 0xFFFFFF); + out[outPosition + i++] = delta + ((unpack >>> 16) & 0xFFFFFF); + out[outPosition + i++] = delta + (((unpack & 0xFFFF) << 8) | ((unpack2 >>> 56) & 0xFF)); + out[outPosition + i++] = delta + ((unpack2 >>> 32) & 0xFFFFFF); + out[outPosition + i++] = delta + ((unpack2 >>> 8) & 0xFFFFFF); + out[outPosition + i++] = delta + (((unpack2 & 0xFF) << 16) | ((unpack3 >>> 48) & 0xFFFF)); + out[outPosition + i++] = delta + ((unpack3 >>> 24) & 0xFFFFFF); + out[outPosition + i++] = delta + (unpack3 & 0xFFFFFF); + } + while (i < length) { + out[outPosition + i] = delta + (int) get(startIndex + i); + i++; + } } } private static final class Size32Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size32Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { - return buffer.getInt(((long) index << 2)) & 0xFFFFFFFFL; + return buffer.getInt((offset + (index << 2))) & 0xFFFFFFFFL; + } + + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + for (int i = 0, indexOffset = (startIndex << 2); i < length; i++, indexOffset += Integer.BYTES) { + out[outPosition + i] = delta + buffer.getInt(offset + indexOffset) & 0xFFFFFFFFL; + } +// int i = 0; +// final int unpackSize = 8 * Integer.BYTES; +// for (int indexOffset = startIndex << 2; i + 8 < length; indexOffset += unpackSize) { +// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset) & 0xFFFFFFFFL); +// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 4) & 0xFFFFFFFFL); +// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 8) & 0xFFFFFFFFL); +// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 12) & 0xFFFFFFFFL); +// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 16) & 0xFFFFFFFFL); +// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 20) & 0xFFFFFFFFL); +// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 24) & 0xFFFFFFFFL); +// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 28) & 0xFFFFFFFFL); +// } +// while (i < length) { +// out[outPosition + i] = delta + (int) get(startIndex + i); +// i++; +// } } } private static final class Size40Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size40Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { - return buffer.getLong(index * 5L) >>> 24; + return buffer.getLong(offset + (index * 5)) >>> 24; } + +// @Override +// public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) +// { +// int i = 0; +// final int unpackSize = 5 * Long.BYTES; +// for (int indexOffset = startIndex * 5; i + 8 < length; indexOffset += unpackSize) { +// final long unpack = buffer.getLong(offset + indexOffset); +// final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); +// final long unpack3 = buffer.getLong(offset + indexOffset + (2 * Long.BYTES)); +// final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); +// final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); +// out[outPosition + i++] = delta + ((unpack >>> 24) & 0xFFFFFFFFFFL); +// out[outPosition + i++] = delta + (((unpack & 0xFFFFFFL) << 16) | ((unpack2 >>> 48) & 0xFFFFL)); +// out[outPosition + i++] = delta + ((unpack2 >>> 8) & 0xFFFFFFFFFFL); +// out[outPosition + i++] = delta + (((unpack2 & 0xFF) << 32) | ((unpack3 >>> 32) & 0xFFFFFFFFL)); +// out[outPosition + i++] = delta + (((unpack3 & 0xFFFFFFFFL) << 32) | ((unpack4 >>> 56 ) & 0xFF)); +// out[outPosition + i++] = delta + ((unpack4 >>> 16) & 0xFFFFFFFFFFL); +// out[outPosition + i++] = delta + (((unpack4 & 0xFFFF) << 24) | ((unpack5 >>> 40) & 0xFFFFFF)); +// out[outPosition + i++] = delta + (unpack5 & 0xFFFFFFFFFFL); +// } +// while (i < length) { +// out[outPosition + i] = delta + (int) get(startIndex + i); +// i++; +// } +// } } private static final class Size48Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size48Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { - return buffer.getLong(index * 6L) >>> 16; + return buffer.getLong(offset + (index * 6)) >>> 16; } + +// @Override +// public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) +// { +// int i = 0; +// final int unpackSize = 6 * Long.BYTES; +// for (int indexOffset = startIndex * 6; i + 8 < length; indexOffset += unpackSize) { +// final long unpack = buffer.getLong(offset + indexOffset); +// final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); +// final long unpack3 = buffer.getLong(offset + indexOffset + (2 * Long.BYTES)); +// final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); +// final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); +// final long unpack6 = buffer.getLong(offset + indexOffset + (5 * Long.BYTES)); +// out[outPosition + i++] = delta + ((unpack >>> 16) & 0xFFFFFFFFFFFFL); +// out[outPosition + i++] = delta + (((unpack & 0xFFFFL) << 32) | ((unpack2 >>> 32) & 0xFFFFFFFFL)); +// out[outPosition + i++] = delta + (((unpack2 & 0xFFFFFFFFL) << 32) | ((unpack3 >>> 48) & 0xFFFFL)); +// out[outPosition + i++] = delta + (unpack3 & 0xFFFFFFFFFFFFL); +// out[outPosition + i++] = delta + ((unpack4 >>> 16) & 0xFFFFFFFFFFFFL); +// out[outPosition + i++] = delta + (((unpack4 & 0xFFFFL) << 32) | ((unpack5 >>> 32) & 0xFFFFFFFFL)); +// out[outPosition + i++] = delta + (((unpack5 & 0xFFFFFFFFL) << 32) | ((unpack6 >>> 48) & 0xFFFFL)); +// out[outPosition + i++] = delta + (unpack6 & 0xFFFFFFFFFFFFL); +// } +// while (i < length) { +// out[outPosition + i] = delta + (int) get(startIndex + i); +// i++; +// } +// } } private static final class Size56Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size56Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { - return buffer.getLong(index * 7L) >>> 8; + return buffer.getLong(offset + (index * 7)) >>> 8; } + +// @Override +// public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) +// { +// int i = 0; +// final int unpackSize = 7 * Long.BYTES; +// for (int indexOffset = startIndex * 7; i + 8 < length; indexOffset += unpackSize) { +// final long unpack = buffer.getLong(offset + indexOffset); +// final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); +// final long unpack3 = buffer.getLong(offset + indexOffset + (2 * Long.BYTES)); +// final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); +// final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); +// final long unpack6 = buffer.getLong(offset + indexOffset + (5 * Long.BYTES)); +// final long unpack7 = buffer.getLong(offset + indexOffset + (6 * Long.BYTES)); +// out[outPosition + i++] = delta + ((unpack >>> 8) & 0xFFFFFFFFFFFFFFL); +// out[outPosition + i++] = delta + (((unpack & 0xFFL) << 48) | ((unpack2 >>> 16) & 0xFFFFFFFFFFFFL)); +// out[outPosition + i++] = delta + (((unpack2 & 0xFFFFL) << 40) | ((unpack3 >>> 24) & 0xFFFFFFFFFFL)); +// out[outPosition + i++] = delta + (((unpack3 & 0xFFFFFFL) << 32) | ((unpack4 >>> 32) & 0xFFFFFFFFL)); +// out[outPosition + i++] = delta + (((unpack4 & 0xFFFFFFFFL) << 24) | ((unpack5 >>> 40) & 0xFFFFFFL)); +// out[outPosition + i++] = delta + (((unpack5 & 0xFFFFFFFFFFL) << 16) | ((unpack6 >>> 48) & 0xFFFFL)); +// out[outPosition + i++] = delta + (((unpack6 & 0xFFFFFFFFFFFFL) << 8) | ((unpack7 >>> 56) & 0xFFL)); +// out[outPosition + i++] = delta + (unpack7 & 0xFFFFFFFFFFFFFFL); +// } +// while (i < length) { +// out[outPosition + i] = delta + (int) get(startIndex + i); +// i++; +// } +// } } private static final class Size64Des implements LongDeserializer { - final Memory buffer; + final ByteBuffer buffer; + final int offset; public Size64Des(ByteBuffer buffer, int bufferOffset) { - final ByteBuffer dup = buffer.duplicate(); - dup.position(bufferOffset); - this.buffer = Memory.wrap(dup.slice(), buffer.order()); + this.buffer = buffer; + this.offset = bufferOffset; } @Override public long get(int index) { - return buffer.getLong((long) index << 3); + return buffer.getLong(offset + (index << 3)); } @Override - public void get(long[] out, int outPosition, int startIndex, int length) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) { - buffer.getLongArray((long) startIndex << 3, out, outPosition, length); + for (int i = 0, indexOffset = (startIndex << 3); i < length; i++, indexOffset += Long.BYTES) { + out[outPosition + i] = delta + buffer.getLong(offset + indexOffset); + } +// int i = 0; +// final int unpackSize = 8 * Long.BYTES; +// for (int indexOffset = (startIndex << 3); i + 8 < length; indexOffset += unpackSize) { +// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset); +// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 8); +// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 16); +// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 24); +// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 32); +// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 40); +// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 48); +// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 56); +// } +// while (i < length) { +// out[outPosition + i] = delta + (int) get(startIndex + i); +// i++; +// } } @Override - public int get(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + public int getDelta(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long base) { for (int i = 0; i < length; i++) { int index = indexes[outPosition + i] - indexOffset; @@ -742,7 +1247,7 @@ public int get(long[] out, int outPosition, int[] indexes, int length, int index return i; } - out[outPosition + i] = buffer.getLong((long) index << 3); + out[outPosition + i] = base + buffer.getLong(offset + (index << 3)); } return length; diff --git a/processing/src/main/java/org/apache/druid/segment/generator/ColumnValueGenerator.java b/processing/src/main/java/org/apache/druid/segment/generator/ColumnValueGenerator.java index fbfc1a9f3c15..1fc09f57be6a 100644 --- a/processing/src/main/java/org/apache/druid/segment/generator/ColumnValueGenerator.java +++ b/processing/src/main/java/org/apache/druid/segment/generator/ColumnValueGenerator.java @@ -33,8 +33,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Random; +import java.util.function.Supplier; -public class ColumnValueGenerator +public class ColumnValueGenerator implements Supplier { private final GeneratorColumnSchema schema; private final long seed; @@ -224,4 +225,10 @@ private void initDistribution() ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed); } } + + @Override + public Object get() + { + return generateRowValue(); + } } diff --git a/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java b/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java index 675c49420cb0..f0effe78a2d7 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java @@ -224,12 +224,18 @@ private void assertIndexMatchesVals(ColumnarLongs indexed, long[] vals) Assert.assertEquals(vals.length, indexed.size()); // sequential access + long[] vector = new long[256]; int[] indices = new int[vals.length]; for (int i = 0; i < indexed.size(); ++i) { + if (i % 256 == 0) { + indexed.get(vector, i, Math.min(256, indexed.size() - i)); + } Assert.assertEquals(vals[i], indexed.get(i)); + Assert.assertEquals(vals[i], vector[i % 256]); indices[i] = i; } + // random access, limited to 1000 elements for large lists (every element would take too long) IntArrays.shuffle(indices, ThreadLocalRandom.current()); final int limit = Math.min(indexed.size(), 1000); diff --git a/processing/src/test/java/org/apache/druid/segment/data/VSizeLongSerdeTest.java b/processing/src/test/java/org/apache/druid/segment/data/VSizeLongSerdeTest.java index 739a30228d18..6168769efd17 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/VSizeLongSerdeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/VSizeLongSerdeTest.java @@ -238,8 +238,9 @@ private static void testContiguousGetSingleRow( final long[] out = new long[values.length + outPosition]; for (int i = 0; i < values.length; i++) { + Arrays.fill(out, -1); - deserializer.get(out, outPosition, i, 1); + deserializer.getDelta(out, outPosition, i, 1, 0); Assert.assertEquals( StringUtils.format("Deserializer (testContiguousGetSingleRow, numBits = %d, position = %d)", numBits, i), @@ -258,7 +259,7 @@ private static void testContiguousGetWholeRegion( final int outPosition = 1; final long[] out = new long[values.length + outPosition]; Arrays.fill(out, -1); - deserializer.get(out, outPosition, 0, values.length); + deserializer.getDelta(out, outPosition, 0, values.length, 0); Assert.assertArrayEquals( StringUtils.format("Deserializer (testContiguousGetWholeRegion, numBits = %d)", numBits), @@ -283,7 +284,7 @@ private static void testNoncontiguousGetSingleRow( Arrays.fill(indexes, -1); indexes[outPosition] = i + indexOffset; - deserializer.get(out, outPosition, indexes, 1, indexOffset, values.length); + deserializer.getDelta(out, outPosition, indexes, 1, indexOffset, values.length, 0); Assert.assertEquals( StringUtils.format("Deserializer (testNoncontiguousGetSingleRow, numBits = %d, position = %d)", numBits, i), @@ -318,7 +319,7 @@ private static void testNoncontiguousGetEveryOtherValue( } } - deserializer.get(out, outPosition, indexes, cnt, indexOffset, values.length); + deserializer.getDelta(out, outPosition, indexes, cnt, indexOffset, values.length, 0); Assert.assertArrayEquals( StringUtils.format("Deserializer (testNoncontiguousGetEveryOtherValue, numBits = %d)", numBits), @@ -357,7 +358,7 @@ private static void testNoncontiguousGetEveryOtherValueWithLimit( } } - final int ret = deserializer.get(out, outPosition, indexes, cnt, indexOffset, limit); + final int ret = deserializer.getDelta(out, outPosition, indexes, cnt, indexOffset, limit, 0); Assert.assertArrayEquals( StringUtils.format("Deserializer (testNoncontiguousGetEveryOtherValue, numBits = %d)", numBits), From c2eb80fac2aabda68f6b07075ae417ba9240facc Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 16 Mar 2021 02:29:15 -0700 Subject: [PATCH 03/11] more more faster --- .../BaseColumnarLongsBenchmark.java | 27 +- ...seColumnarLongsFromGeneratorBenchmark.java | 108 +++---- ...aseColumnarLongsFromSegmentsBenchmark.java | 114 ++----- ...LongsEncodeDataFromGeneratorBenchmark.java | 52 +++- ...arLongsEncodeDataFromSegmentBenchmark.java | 81 ++++- ...LongsSelectRowsFromGeneratorBenchmark.java | 68 ++-- ...arLongsSelectRowsFromSegmentBenchmark.java | 64 ++-- .../druid/segment/data/VSizeLongSerde.java | 293 ++++++------------ 8 files changed, 351 insertions(+), 456 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java index 598395ffb622..dabcf07ad719 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java @@ -20,6 +20,7 @@ package org.apache.druid.benchmark.compression; import org.apache.druid.collections.bitmap.WrappedImmutableRoaringBitmap; +import org.apache.druid.java.util.common.RE; import org.apache.druid.segment.data.ColumnarLongs; import org.apache.druid.segment.data.ColumnarLongsSerializer; import org.apache.druid.segment.data.CompressedColumnarLongsSupplier; @@ -50,6 +51,10 @@ public class BaseColumnarLongsBenchmark { static final int VECTOR_SIZE = 512; + /** + * Name of the long encoding strategy. For longs, this is a composite of both byte level block compression and + * encoding of values within the block. + */ @Param({ "lz4-longs", "lz4-auto" @@ -166,7 +171,8 @@ static ColumnarLongs createColumnarLongs(String encoding, ByteBuffer buffer) } - // for debugging: validate that all encoders read the same values + // for testing encodings: validate that all encoders read the same values + // noinspection unused static void checkSanity(Map encoders, List encodings, int rows) throws Exception { @@ -176,7 +182,6 @@ static void checkSanity(Map encoders, List encodi } static void checkRowSanity(Map encoders, List encodings, int row) - throws Exception { if (encodings.size() > 1) { for (int i = 0; i < encodings.size() - 1; i++) { @@ -187,16 +192,14 @@ static void checkRowSanity(Map encoders, List enc long vCurrent = current.get(row); long vNext = next.get(row); if (vCurrent != vNext) { - throw new Exception("values do not match at row " - + row - + " - " - + currentKey - + ":" - + vCurrent - + " " - + nextKey - + ":" - + vNext); + throw new RE( + "values do not match at row %s - %s:%s %s:%s", + row, + currentKey, + vCurrent, + nextKey, + vNext + ); } } } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromGeneratorBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromGeneratorBenchmark.java index b46c30cfa13f..5e1c85e534b2 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromGeneratorBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromGeneratorBenchmark.java @@ -28,12 +28,7 @@ import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.State; -import java.io.BufferedReader; import java.io.File; -import java.io.IOException; -import java.io.Writer; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; import java.util.List; @State(Scope.Benchmark) @@ -41,25 +36,37 @@ public class BaseColumnarLongsFromGeneratorBenchmark extends BaseColumnarLongsBe { static int SEED = 1; + /** + * Controls the probability that any generated value will be a zero, to simulate sparely populated columns + */ @Param({ "0.0", -// "0.5", -// "0.95" + "0.25", + "0.5", + "0.75", + "0.95" }) double zeroProbability; + /** + * Number of rows generated for the value distribution + */ @Param({"5000000"}) int rows; + /** + * Value distributions to simulate various patterns of long column + */ @Param({ -// "enumerated-0-1", -// "enumerated-full", -// "normal", -// "sequential-1000", -// "sequential-unique", + "enumerated-0-1", + "enumerated-full", + "normal-1-32", + "normal-40-1000", + "sequential-1000", + "sequential-unique", "uniform-1", "uniform-2", -// "uniform-3", + "uniform-3", "uniform-4", "uniform-8", "uniform-12", @@ -71,58 +78,15 @@ public class BaseColumnarLongsFromGeneratorBenchmark extends BaseColumnarLongsBe "uniform-48", "uniform-56", "uniform-64", -// "zipf-low-100", -// "zipf-low-100000", -// "zipf-low-32-bit", -// "zipf-high-100", -// "zipf-high-100000", -// "zipf-high-32-bit" + "zipf-low-100", + "zipf-low-100000", + "zipf-low-32-bit", + "zipf-high-100", + "zipf-high-100000", + "zipf-high-32-bit" }) String distribution; - void initializeValues() throws IOException - { - vals = new long[rows]; - final String filename = getGeneratorValueFilename(distribution, rows, zeroProbability); - File dir = getTmpDir(); - File dataFile = new File(dir, filename); - - if (dataFile.exists()) { - System.out.println("Data files already exist, re-using"); - try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8)) { - int lineNum = 0; - String line; - while ((line = br.readLine()) != null) { - vals[lineNum] = Long.parseLong(line); - if (vals[lineNum] < minValue) { - minValue = vals[lineNum]; - } - if (vals[lineNum] > maxValue) { - maxValue = vals[lineNum]; - } - lineNum++; - } - } - } else { - try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) { - ColumnValueGenerator valueGenerator = makeGenerator(distribution, rows, zeroProbability); - - for (int i = 0; i < rows; i++) { - long value; - Object rowValue = valueGenerator.generateRowValue(); - value = rowValue != null ? (long) rowValue : 0; - vals[i] = value; - if (vals[i] < minValue) { - minValue = vals[i]; - } - if (vals[i] > maxValue) { - maxValue = vals[i]; - } - writer.write(vals[i] + "\n"); - } - } - } - } static ColumnValueGenerator makeGenerator( String distribution, @@ -164,7 +128,7 @@ static ColumnValueGenerator makeGenerator( enumerated, probability ).makeGenerator(SEED); - case "normal": + case "normal-1-32": return GeneratorColumnSchema.makeNormal( distribution, ValueType.LONG, @@ -172,7 +136,18 @@ static ColumnValueGenerator makeGenerator( 1, zeroProbability, 1.0, - (double) Integer.MAX_VALUE, + (double) (1L << 32), + true + ).makeGenerator(SEED); + case "normal-40-1000": + return GeneratorColumnSchema.makeNormal( + distribution, + ValueType.LONG, + true, + 1, + zeroProbability, + (double) (1L << 40), + 1000.0, true ).makeGenerator(SEED); case "sequential-1000": @@ -405,11 +380,6 @@ static ColumnValueGenerator makeGenerator( throw new IllegalArgumentException("unknown distribution"); } - static String getGeneratorValueFilename(String distribution, int rows, double nullProbability) - { - return StringUtils.format("values-%s-%s-%s.bin", distribution, rows, nullProbability); - } - static String getGeneratorEncodedFilename(String encoding, String distribution, int rows, double nullProbability) { return StringUtils.format("%s-%s-%s-%s.bin", encoding, distribution, rows, nullProbability); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java index 0fa04f15a715..97305b9b312c 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java @@ -19,36 +19,19 @@ package org.apache.druid.benchmark.compression; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.api.client.util.Lists; -import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; -import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.column.ColumnCapabilities; -import org.apache.druid.segment.column.ColumnHolder; -import org.apache.druid.segment.column.LongsColumn; -import org.apache.druid.segment.column.ValueType; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.State; -import java.io.BufferedReader; import java.io.File; -import java.io.IOException; -import java.io.Writer; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Set; @State(Scope.Benchmark) public class BaseColumnarLongsFromSegmentsBenchmark extends BaseColumnarLongsBenchmark { - //CHECKSTYLE.OFF: Regexp - // twitter-ticker + /** + * Long columns to read from the segment file specified by {@link #segmentPath} + */ @Param({ "__time", "followers", @@ -62,53 +45,31 @@ public class BaseColumnarLongsFromSegmentsBenchmark extends BaseColumnarLongsBen }) String columnName; + /** + * Number of rows in the segment. This should actually match the number of rows specified in {@link #segmentPath}. If + * it is smaller than only this many rows will be read, if larger then the benchmark will explode trying to read more + * data than exists rows. + * + * This is a hassle, but ensures that the row count ends up in the output measurements. + */ @Param({"3259585"}) int rows; + /** + * Path to a segment file to read long columns from. This shouldn't really be used as a parameter, but is nice to + * be included in the output measurements. + */ @Param({"tmp/segments/twitter-ticker-1/"}) String segmentPath; + /** + * Friendly name of the segment. Like {@link #segmentPath}, this shouldn't really be used as a parameter, but is also + * nice to be included in the output measurements. + */ @Param({"twitter-ticker"}) String segmentName; - - //CHECKSTYLE.ON: Regexp - - private static IndexIO INDEX_IO; - public static ObjectMapper JSON_MAPPER; - - void initializeValues() throws IOException - { - initializeSegmentValueIntermediaryFile(); - File dir = getTmpDir(); - File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName)); - - ArrayList values = Lists.newArrayList(); - try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8)) { - String line; - while ((line = br.readLine()) != null) { - long value = Long.parseLong(line); - if (value < minValue) { - minValue = value; - } - if (value > maxValue) { - maxValue = value; - } - values.add(value); - rows++; - } - } - - vals = values.stream().mapToLong(i -> i).toArray(); - } - - - String getColumnDataFileName(String segmentName, String columnName) - { - return StringUtils.format("%s-longs-%s.txt", segmentName, columnName); - } - String getColumnEncodedFileName(String encoding, String segmentName, String columnName) { return StringUtils.format("%s-%s-longs-%s.bin", encoding, segmentName, columnName); @@ -121,43 +82,4 @@ File getTmpDir() dir.mkdirs(); return dir; } - - /** - * writes column values to text file, 1 per line - * - * @throws IOException - */ - void initializeSegmentValueIntermediaryFile() throws IOException - { - File dir = getTmpDir(); - File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName)); - - if (!dataFile.exists()) { - JSON_MAPPER = new DefaultObjectMapper(); - INDEX_IO = new IndexIO( - JSON_MAPPER, - () -> 0 - ); - try (final QueryableIndex index = INDEX_IO.loadIndex(new File(segmentPath))) { - final Set columnNames = Sets.newLinkedHashSet(); - columnNames.add(ColumnHolder.TIME_COLUMN_NAME); - Iterables.addAll(columnNames, index.getColumnNames()); - final ColumnHolder column = index.getColumnHolder(columnName); - final ColumnCapabilities capabilities = column.getCapabilities(); - final ValueType columnType = capabilities.getType(); - try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) { - if (columnType != ValueType.LONG) { - throw new RuntimeException("Invalid column type, expected 'Long'"); - } - LongsColumn theColumn = (LongsColumn) column.getColumn(); - - - for (int i = 0; i < theColumn.length(); i++) { - long value = theColumn.getLongSingleValueRow(i); - writer.write(value + "\n"); - } - } - } - } - } } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromGeneratorBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromGeneratorBenchmark.java index e27a5edfeb37..f02d25d06f55 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromGeneratorBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromGeneratorBenchmark.java @@ -19,6 +19,8 @@ package org.apache.druid.benchmark.compression; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.generator.ColumnValueGenerator; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -36,9 +38,13 @@ import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; +import java.io.BufferedReader; import java.io.File; import java.io.IOException; +import java.io.Writer; import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.StandardOpenOption; import java.util.concurrent.TimeUnit; @@ -51,7 +57,46 @@ public class ColumnarLongsEncodeDataFromGeneratorBenchmark extends BaseColumnarL @Setup public void setup() throws Exception { - initializeValues(); + vals = new long[rows]; + final String filename = getGeneratorValueFilename(distribution, rows, zeroProbability); + File dir = getTmpDir(); + File dataFile = new File(dir, filename); + + if (dataFile.exists()) { + System.out.println("Data files already exist, re-using"); + try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8)) { + int lineNum = 0; + String line; + while ((line = br.readLine()) != null) { + vals[lineNum] = Long.parseLong(line); + if (vals[lineNum] < minValue) { + minValue = vals[lineNum]; + } + if (vals[lineNum] > maxValue) { + maxValue = vals[lineNum]; + } + lineNum++; + } + } + } else { + try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) { + ColumnValueGenerator valueGenerator = makeGenerator(distribution, rows, zeroProbability); + + for (int i = 0; i < rows; i++) { + long value; + Object rowValue = valueGenerator.generateRowValue(); + value = rowValue != null ? (long) rowValue : 0; + vals[i] = value; + if (vals[i] < minValue) { + minValue = vals[i]; + } + if (vals[i] > maxValue) { + maxValue = vals[i]; + } + writer.write(vals[i] + "\n"); + } + } + } } @Benchmark @@ -82,4 +127,9 @@ public static void main(String[] args) throws RunnerException new Runner(opt).run(); } + + private static String getGeneratorValueFilename(String distribution, int rows, double nullProbability) + { + return StringUtils.format("values-%s-%s-%s.bin", distribution, rows, nullProbability); + } } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java index f87156d757f8..b8c3f8ab563e 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java @@ -19,6 +19,17 @@ package org.apache.druid.benchmark.compression; +import com.google.api.client.util.Lists; +import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.LongsColumn; +import org.apache.druid.segment.column.ValueType; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -36,10 +47,16 @@ import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; +import java.io.BufferedReader; import java.io.File; import java.io.IOException; +import java.io.Writer; import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Set; import java.util.concurrent.TimeUnit; @State(Scope.Benchmark) @@ -51,7 +68,27 @@ public class ColumnarLongsEncodeDataFromSegmentBenchmark extends BaseColumnarLon @Setup public void setup() throws Exception { - initializeValues(); + initializeSegmentValueIntermediaryFile(); + File dir = getTmpDir(); + File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName)); + + ArrayList values = Lists.newArrayList(); + try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8)) { + String line; + while ((line = br.readLine()) != null) { + long value = Long.parseLong(line); + if (value < minValue) { + minValue = value; + } + if (value > maxValue) { + maxValue = value; + } + values.add(value); + rows++; + } + } + + vals = values.stream().mapToLong(i -> i).toArray(); } @Benchmark @@ -71,6 +108,48 @@ public void encodeColumn(Blackhole blackhole) throws IOException output.close(); } + /** + * writes column values to an intermediary text file, 1 per line, encoders read from this file as input to write + * encoded column files. + */ + private void initializeSegmentValueIntermediaryFile() throws IOException + { + File dir = getTmpDir(); + File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName)); + + if (!dataFile.exists()) { + IndexIO INDEX_IO = new IndexIO( + new DefaultObjectMapper(), + () -> 0 + ); + try (final QueryableIndex index = INDEX_IO.loadIndex(new File(segmentPath))) { + final Set columnNames = Sets.newLinkedHashSet(); + columnNames.add(ColumnHolder.TIME_COLUMN_NAME); + Iterables.addAll(columnNames, index.getColumnNames()); + final ColumnHolder column = index.getColumnHolder(columnName); + final ColumnCapabilities capabilities = column.getCapabilities(); + final ValueType columnType = capabilities.getType(); + try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) { + if (columnType != ValueType.LONG) { + throw new RuntimeException("Invalid column type, expected 'Long'"); + } + LongsColumn theColumn = (LongsColumn) column.getColumn(); + + + for (int i = 0; i < theColumn.length(); i++) { + long value = theColumn.getLongSingleValueRow(i); + writer.write(value + "\n"); + } + } + } + } + } + + private String getColumnDataFileName(String segmentName, String columnName) + { + return StringUtils.format("%s-longs-%s.txt", segmentName, columnName); + } + public static void main(String[] args) throws RunnerException { System.out.println("main happened"); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java index 70553df58bd5..ae6cd553d004 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java @@ -56,12 +56,16 @@ public class ColumnarLongsSelectRowsFromGeneratorBenchmark extends BaseColumnarL private Map decoders; private Map encodedSize; - // Number of rows to read, the test will read random rows -// @Param({"0.1", "0.05", "0.95", "1.0"}) + /** + * Number of rows to read, the test will randomly set positions in a simulated offset of the specified density in + * {@link #setupFilters(int, double)} + */ @Param({ -// "0.1", -// "0.5", -// "0.95", + "0.1", + "0.25", + "0.5", + "0.75", + "0.95", "1.0" }) private double filteredRowCountPercentage; @@ -75,7 +79,7 @@ public void setup() throws Exception setupFromFile(encoding); setupFilters(rows, filteredRowCountPercentage); - // uncomment me to load multiple encoded files for sanity check + // uncomment this block to run sanity check to ensure all specified encodings produce the same set of results //CHECKSTYLE.OFF: Regexp // ImmutableList all = ImmutableList.of("lz4-longs", "lz4-auto"); // for (String _enc : all) { @@ -108,23 +112,23 @@ private void setupFromFile(String encoding) throws IOException decoders.put(encoding, data); } -// @Benchmark -// @BenchmarkMode(Mode.AverageTime) -// @OutputTimeUnit(TimeUnit.MICROSECONDS) -// public void selectRows(Blackhole blackhole) -// { -// EncodingSizeProfiler.encodedSize = encodedSize.get(encoding); -// ColumnarLongs encoder = decoders.get(encoding); -// if (filter == null) { -// for (int i = 0; i < rows; i++) { -// blackhole.consume(encoder.get(i)); -// } -// } else { -// for (int i = filter.nextSetBit(0); i >= 0; i = filter.nextSetBit(i + 1)) { -// blackhole.consume(encoder.get(i)); -// } -// } -// } + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void selectRows(Blackhole blackhole) + { + EncodingSizeProfiler.encodedSize = encodedSize.get(encoding); + ColumnarLongs encoder = decoders.get(encoding); + if (filter == null) { + for (int i = 0; i < rows; i++) { + blackhole.consume(encoder.get(i)); + } + } else { + for (int i = filter.nextSetBit(0); i >= 0; i = filter.nextSetBit(i + 1)) { + blackhole.consume(encoder.get(i)); + } + } + } @Benchmark @BenchmarkMode(Mode.AverageTime) @@ -151,24 +155,6 @@ public void selectRowsVectorized(Blackhole blackhole) columnDecoder.close(); } -// @Benchmark -// @BenchmarkMode(Mode.AverageTime) -// @OutputTimeUnit(TimeUnit.MICROSECONDS) -// public void readVectorizedSequential(Blackhole bh) -// { -// long[] vector = new long[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; -// EncodingSizeProfiler.encodedSize = encodedSize.get(encoding); -// ColumnarLongs columnDecoder = decoders.get(encoding); -// int count = columnDecoder.size(); -// for (int i = 0; i < count; i++) { -// if (i % vector.length == 0) { -// columnDecoder.get(vector, i, Math.min(vector.length, count - i)); -// } -// bh.consume(vector[i % vector.length]); -// } -// columnDecoder.close(); -// } - public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java index 00c84fe21f2a..76c66e6ee34f 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java @@ -19,7 +19,6 @@ package org.apache.druid.benchmark.compression; -import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import com.google.common.io.Files; import org.apache.druid.segment.data.ColumnarLongs; @@ -45,7 +44,6 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -58,9 +56,11 @@ public class ColumnarLongsSelectRowsFromSegmentBenchmark extends BaseColumnarLon private Map decoders; private Map encodedSize; - // Number of rows to read, the test will read random rows -// @Param({"0.01", "0.1", "0.33", "0.66", "0.95", "1.0"}) - @Param({"1.0"}) + /** + * Number of rows to read, the test will randomly set positions in a simulated offset of the specified density in + * {@link #setupFilters(int, double)} + */ + @Param({"0.01", "0.1", "0.33", "0.66", "0.95", "1.0"}) private double filteredRowCountPercentage; @Setup @@ -73,16 +73,17 @@ public void setup() throws Exception setupFromFile(encoding); - // uncomment me to load some encoding files to cross reference values for sanity check + // uncomment this block to run sanity check to ensure all specified encodings produce the same set of results //CHECKSTYLE.OFF: Regexp - List all = ImmutableList.of("lz4-longs", "lz4-auto"); - for (String _enc : all) { - if (!_enc.equals(encoding)) { - setupFromFile(_enc); - } - } - - checkSanity(decoders, all, rows); +// List all = ImmutableList.of("lz4-longs", "lz4-auto"); +// for (String _enc : all) { +// if (!_enc.equals(encoding)) { +// setupFromFile(_enc); +// } +// } +// +// checkSanity(decoders, all, rows); + //CHECKSTYLE.ON: Regexp } @TearDown @@ -105,23 +106,23 @@ private void setupFromFile(String encoding) throws IOException decoders.put(encoding, data); } -// @Benchmark -// @BenchmarkMode(Mode.AverageTime) -// @OutputTimeUnit(TimeUnit.MICROSECONDS) -// public void selectRows(Blackhole blackhole) -// { -// EncodingSizeProfiler.encodedSize = encodedSize.get(encoding); -// ColumnarLongs encoder = decoders.get(encoding); -// if (filter == null) { -// for (int i = 0; i < rows; i++) { -// blackhole.consume(encoder.get(i)); -// } -// } else { -// for (int i = filter.nextSetBit(0); i >= 0; i = filter.nextSetBit(i + 1)) { -// blackhole.consume(encoder.get(i)); -// } -// } -// } + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void selectRows(Blackhole blackhole) + { + EncodingSizeProfiler.encodedSize = encodedSize.get(encoding); + ColumnarLongs encoder = decoders.get(encoding); + if (filter == null) { + for (int i = 0; i < rows; i++) { + blackhole.consume(encoder.get(i)); + } + } else { + for (int i = filter.nextSetBit(0); i >= 0; i = filter.nextSetBit(i + 1)) { + blackhole.consume(encoder.get(i)); + } + } + } @Benchmark @BenchmarkMode(Mode.AverageTime) @@ -151,7 +152,6 @@ public void selectRowsVectorized(Blackhole blackhole) public static void main(String[] args) throws RunnerException { - System.out.println("main happened"); Options opt = new OptionsBuilder() .include(ColumnarLongsSelectRowsFromSegmentBenchmark.class.getSimpleName()) .addProfiler(EncodingSizeProfiler.class) diff --git a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java index 500c9af22c5d..722f82c09f82 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java @@ -557,13 +557,6 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo while ((index & 0x3) != 0 && i < length) { out[outPosition + i++] = delta + get(index++); } -// for ( ; i + 4 < length; index += 4) { -// final byte unpack = buffer.get(offset + (index >> 2)); -// out[outPosition + i++] = delta + (unpack >> 6) & 3; -// out[outPosition + i++] = delta + (unpack >> 4) & 3; -// out[outPosition + i++] = delta + (unpack >> 2) & 3; -// out[outPosition + i++] = delta + unpack & 3; -// } for ( ; i + 8 < length; index += 8) { final short unpack = buffer.getShort(offset + (index >> 2)); out[outPosition + i++] = delta + (unpack >> 14) & 3; @@ -590,13 +583,6 @@ public void getTable(long[] out, int outPosition, int startIndex, int length, lo while ((index & 0x3) != 0 && i < length) { out[outPosition + i++] = table[(int) get(index++)]; } -// for ( ; i + 4 < length; index += 4) { -// final byte unpack = buffer.get(offset + (index >> 2)); -// out[outPosition + i++] = table[(unpack >> 6) & 3]; -// out[outPosition + i++] = table[(unpack >> 4) & 3]; -// out[outPosition + i++] = table[(unpack >> 2) & 3]; -// out[outPosition + i++] = table[unpack & 3]; -// } for ( ; i + 8 < length; index += 8) { final short unpack = buffer.getShort(offset + (index >> 2)); out[outPosition + i++] = table[(unpack >> 14) & 3]; @@ -642,11 +628,6 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo while ((index & 0x1) != 0 && i < length) { out[outPosition + i++] = delta + get(index++) & 0xF; } -// for ( ; i + 2 < length; index += 2) { -// final byte unpack = buffer.get(offset + (index >> 1)); -// out[outPosition + i++] = delta + (unpack >> 4) & 0xF; -// out[outPosition + i++] = delta + unpack & 0xF; -// } for ( ; i + 8 < length; index += 8) { final int unpack = buffer.getInt(offset + (index >> 1)); out[outPosition + i++] = delta + (unpack >> 28) & 0xF; @@ -673,11 +654,6 @@ public void getTable(long[] out, int outPosition, int startIndex, int length, lo while ((index & 0x1) != 0 && i < length) { out[outPosition + i++] = table[(int) get(index++)]; } -// for ( ; i + 2 < length; index += 2) { -// final byte unpack = buffer.get(offset + (index >> 1)); -// out[outPosition + i++] = table[(unpack >> 4) & 0xF]; -// out[outPosition + i++] = table[unpack & 0xF]; -// } for ( ; i + 8 < length; index += 8) { final int unpack = buffer.getInt(offset + (index >> 1)); out[outPosition + i++] = table[(unpack >> 28) & 0xF]; @@ -718,22 +694,6 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo for (int i = 0, indexOffset = startIndex; i < length; i++, indexOffset++) { out[outPosition + i] = delta + buffer.get(offset + indexOffset) & 0xFF; } -// int i = 0; -// for (int indexOffset = startIndex; i + 8 < length; indexOffset += 8) { -// final long unpack = buffer.getLong(indexOffset); -// out[outPosition + i++] = delta + ((unpack >>> 56) & 0xFF); -// out[outPosition + i++] = delta + ((unpack >>> 48) & 0xFF); -// out[outPosition + i++] = delta + ((unpack >>> 40) & 0xFF); -// out[outPosition + i++] = delta + ((unpack >>> 32) & 0xFF); -// out[outPosition + i++] = delta + ((unpack >>> 24) & 0xFF); -// out[outPosition + i++] = delta + ((unpack >>> 16) & 0xFF); -// out[outPosition + i++] = delta + ((unpack >>> 8) & 0xFF); -// out[outPosition + i++] = delta + (unpack & 0xFF); -// } -// while (i < length) { -// out[outPosition + i] = delta + (int) get(startIndex + i); -// i++; -// } } @Override @@ -757,21 +717,6 @@ public void getTable(long[] out, int outPosition, int startIndex, int length, lo for (int i = 0, indexOffset = startIndex; i < length; i++, indexOffset++) { out[outPosition + i] = table[buffer.get(offset + indexOffset) & 0xFF]; } -// int i = 0; -// for (int indexOffset = startIndex; i + 8 < length; indexOffset += 8) { -// out[outPosition + i++] = table[buffer.getByte(indexOffset) & 0xFF]; -// out[outPosition + i++] = table[buffer.getByte(indexOffset + 1) & 0xFF]; -// out[outPosition + i++] = table[buffer.getByte(indexOffset + 2) & 0xFF]; -// out[outPosition + i++] = table[buffer.getByte(indexOffset + 3) & 0xFF]; -// out[outPosition + i++] = table[buffer.getByte(indexOffset + 4) & 0xFF]; -// out[outPosition + i++] = table[buffer.getByte(indexOffset + 5) & 0xFF]; -// out[outPosition + i++] = table[buffer.getByte(indexOffset + 6) & 0xFF]; -// out[outPosition + i++] = table[buffer.getByte(indexOffset + 7) & 0xFF]; -// } -// while (i < length) { -// out[outPosition + i] = table[(int) get(startIndex + i)]; -// i++; -// } } @Override @@ -828,7 +773,7 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo out[outPosition + i++] = delta + ((unpack >> 28) & 0xFFF); out[outPosition + i++] = delta + ((unpack >> 16) & 0xFFF); out[outPosition + i++] = delta + ((unpack >> 4) & 0xFFF); - out[outPosition + i++] = delta + (((unpack & 0xF) << 8) | ((unpack2 >> 24) & 0xFF)); + out[outPosition + i++] = delta + (((unpack & 0xF) << 8) | ((unpack2 >>> 24) & 0xFF)); out[outPosition + i++] = delta + ((unpack2 >> 12) & 0xFFF); out[outPosition + i++] = delta + (unpack2 & 0xFFF); } @@ -862,32 +807,6 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo for (int i = 0, indexOffset = (startIndex << 1); i < length; i++, indexOffset += Short.BYTES) { out[outPosition + i] = delta + buffer.getShort(offset + indexOffset) & 0xFFFF; } -// int i = 0; -// final int unpackSize = 8 * Short.BYTES; -// for (int indexOffset = startIndex << 1; i + 8 < length; indexOffset += unpackSize) { -//// final long unpack = buffer.getLong(indexOffset); -//// final long unpack2 = buffer.getLong(indexOffset + Long.BYTES); -//// out[outPosition + i++] = delta + ((unpack >> 48) & 0xFFFF); -//// out[outPosition + i++] = delta + ((unpack >> 32) & 0xFFFF); -//// out[outPosition + i++] = delta + ((unpack >> 16) & 0xFFFF); -//// out[outPosition + i++] = delta + (unpack & 0xFFFF); -//// out[outPosition + i++] = delta + ((unpack2 >> 48) & 0xFFFF); -//// out[outPosition + i++] = delta + ((unpack2 >> 32) & 0xFFFF); -//// out[outPosition + i++] = delta + ((unpack2 >> 16) & 0xFFFF); -//// out[outPosition + i++] = delta + (unpack2 & 0xFFFF); -// out[outPosition + i++] = delta + (buffer.getShort(indexOffset) & 0xFFFF); -// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 2) & 0xFFFF); -// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 4) & 0xFFFF); -// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 6) & 0xFFFF); -// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 8) & 0xFFFF); -// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 10) & 0xFFFF); -// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 12) & 0xFFFF); -// out[outPosition + i++] = delta + (buffer.getShort(indexOffset + 14) & 0xFFFF); -// } -// while (i < length) { -// out[outPosition + i] = delta + (int) get(startIndex + i); -// i++; -// } } @Override @@ -962,12 +881,12 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo final long unpack = buffer.getLong(offset + indexOffset); final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); final int unpack3 = buffer.getInt(offset + indexOffset + Long.BYTES + Long.BYTES); - out[outPosition + i++] = delta + ((unpack >>> 44) & 0xFFFFF); - out[outPosition + i++] = delta + ((unpack >>> 24) & 0xFFFFF); - out[outPosition + i++] = delta + ((unpack >>> 4) & 0xFFFFF); + out[outPosition + i++] = delta + ((unpack >> 44) & 0xFFFFF); + out[outPosition + i++] = delta + ((unpack >> 24) & 0xFFFFF); + out[outPosition + i++] = delta + ((unpack >> 4) & 0xFFFFF); out[outPosition + i++] = delta + (((unpack & 0xF) << 16) | ((unpack2 >>> 48) & 0xFFFF)); - out[outPosition + i++] = delta + ((unpack2 >>> 28) & 0xFFFFF); - out[outPosition + i++] = delta + ((unpack2 >>> 8) & 0xFFFFF); + out[outPosition + i++] = delta + ((unpack2 >> 28) & 0xFFFFF); + out[outPosition + i++] = delta + ((unpack2 >> 8) & 0xFFFFF); out[outPosition + i++] = delta + (((unpack2 & 0xFF) << 12) | ((unpack3 >>> 20) & 0xFFF)); out[outPosition + i++] = delta + (unpack3 & 0xFFFFF); } @@ -1004,13 +923,13 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo final long unpack = buffer.getLong(offset + indexOffset); final long unpack2 = buffer.getLong(offset +indexOffset + Long.BYTES); final long unpack3 = buffer.getLong(offset + indexOffset + Long.BYTES + Long.BYTES); - out[outPosition + i++] = delta + ((unpack >>> 40) & 0xFFFFFF); - out[outPosition + i++] = delta + ((unpack >>> 16) & 0xFFFFFF); + out[outPosition + i++] = delta + ((unpack >> 40) & 0xFFFFFF); + out[outPosition + i++] = delta + ((unpack >> 16) & 0xFFFFFF); out[outPosition + i++] = delta + (((unpack & 0xFFFF) << 8) | ((unpack2 >>> 56) & 0xFF)); - out[outPosition + i++] = delta + ((unpack2 >>> 32) & 0xFFFFFF); - out[outPosition + i++] = delta + ((unpack2 >>> 8) & 0xFFFFFF); + out[outPosition + i++] = delta + ((unpack2 >> 32) & 0xFFFFFF); + out[outPosition + i++] = delta + ((unpack2 >> 8) & 0xFFFFFF); out[outPosition + i++] = delta + (((unpack2 & 0xFF) << 16) | ((unpack3 >>> 48) & 0xFFFF)); - out[outPosition + i++] = delta + ((unpack3 >>> 24) & 0xFFFFFF); + out[outPosition + i++] = delta + ((unpack3 >> 24) & 0xFFFFFF); out[outPosition + i++] = delta + (unpack3 & 0xFFFFFF); } while (i < length) { @@ -1043,22 +962,6 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo for (int i = 0, indexOffset = (startIndex << 2); i < length; i++, indexOffset += Integer.BYTES) { out[outPosition + i] = delta + buffer.getInt(offset + indexOffset) & 0xFFFFFFFFL; } -// int i = 0; -// final int unpackSize = 8 * Integer.BYTES; -// for (int indexOffset = startIndex << 2; i + 8 < length; indexOffset += unpackSize) { -// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset) & 0xFFFFFFFFL); -// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 4) & 0xFFFFFFFFL); -// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 8) & 0xFFFFFFFFL); -// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 12) & 0xFFFFFFFFL); -// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 16) & 0xFFFFFFFFL); -// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 20) & 0xFFFFFFFFL); -// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 24) & 0xFFFFFFFFL); -// out[outPosition + i++] = delta + (buffer.getInt(offset + indexOffset + 28) & 0xFFFFFFFFL); -// } -// while (i < length) { -// out[outPosition + i] = delta + (int) get(startIndex + i); -// i++; -// } } } @@ -1079,31 +982,31 @@ public long get(int index) return buffer.getLong(offset + (index * 5)) >>> 24; } -// @Override -// public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) -// { -// int i = 0; -// final int unpackSize = 5 * Long.BYTES; -// for (int indexOffset = startIndex * 5; i + 8 < length; indexOffset += unpackSize) { -// final long unpack = buffer.getLong(offset + indexOffset); -// final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); -// final long unpack3 = buffer.getLong(offset + indexOffset + (2 * Long.BYTES)); -// final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); -// final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); -// out[outPosition + i++] = delta + ((unpack >>> 24) & 0xFFFFFFFFFFL); -// out[outPosition + i++] = delta + (((unpack & 0xFFFFFFL) << 16) | ((unpack2 >>> 48) & 0xFFFFL)); -// out[outPosition + i++] = delta + ((unpack2 >>> 8) & 0xFFFFFFFFFFL); -// out[outPosition + i++] = delta + (((unpack2 & 0xFF) << 32) | ((unpack3 >>> 32) & 0xFFFFFFFFL)); -// out[outPosition + i++] = delta + (((unpack3 & 0xFFFFFFFFL) << 32) | ((unpack4 >>> 56 ) & 0xFF)); -// out[outPosition + i++] = delta + ((unpack4 >>> 16) & 0xFFFFFFFFFFL); -// out[outPosition + i++] = delta + (((unpack4 & 0xFFFF) << 24) | ((unpack5 >>> 40) & 0xFFFFFF)); -// out[outPosition + i++] = delta + (unpack5 & 0xFFFFFFFFFFL); -// } -// while (i < length) { -// out[outPosition + i] = delta + (int) get(startIndex + i); -// i++; -// } -// } + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + int i = 0; + final int unpackSize = 5 * Long.BYTES; + for (int indexOffset = startIndex * 5; i + 8 < length; indexOffset += unpackSize) { + final long unpack = buffer.getLong(offset + indexOffset); + final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); + final long unpack3 = buffer.getLong(offset + indexOffset + (2 * Long.BYTES)); + final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); + final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); + out[outPosition + i++] = delta + ((unpack >>> 24) & 0xFFFFFFFFFFL); + out[outPosition + i++] = delta + (((unpack & 0xFFFFFFL) << 16) | ((unpack2 >>> 48) & 0xFFFFL)); + out[outPosition + i++] = delta + ((unpack2 >>> 8) & 0xFFFFFFFFFFL); + out[outPosition + i++] = delta + (((unpack2 & 0xFFL) << 32) | ((unpack3 >>> 32) & 0xFFFFFFFFL)); + out[outPosition + i++] = delta + (((unpack3 & 0xFFFFFFFFL) << 8) | ((unpack4 >>> 56 ) & 0xFFL)); + out[outPosition + i++] = delta + ((unpack4 >>> 16) & 0xFFFFFFFFFFL); + out[outPosition + i++] = delta + (((unpack4 & 0xFFFFL) << 24) | ((unpack5 >>> 40) & 0xFFFFFFL)); + out[outPosition + i++] = delta + (unpack5 & 0xFFFFFFFFFFL); + } + while (i < length) { + out[outPosition + i] = delta + get(startIndex + i); + i++; + } + } } private static final class Size48Des implements LongDeserializer @@ -1123,32 +1026,32 @@ public long get(int index) return buffer.getLong(offset + (index * 6)) >>> 16; } -// @Override -// public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) -// { -// int i = 0; -// final int unpackSize = 6 * Long.BYTES; -// for (int indexOffset = startIndex * 6; i + 8 < length; indexOffset += unpackSize) { -// final long unpack = buffer.getLong(offset + indexOffset); -// final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); -// final long unpack3 = buffer.getLong(offset + indexOffset + (2 * Long.BYTES)); -// final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); -// final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); -// final long unpack6 = buffer.getLong(offset + indexOffset + (5 * Long.BYTES)); -// out[outPosition + i++] = delta + ((unpack >>> 16) & 0xFFFFFFFFFFFFL); -// out[outPosition + i++] = delta + (((unpack & 0xFFFFL) << 32) | ((unpack2 >>> 32) & 0xFFFFFFFFL)); -// out[outPosition + i++] = delta + (((unpack2 & 0xFFFFFFFFL) << 32) | ((unpack3 >>> 48) & 0xFFFFL)); -// out[outPosition + i++] = delta + (unpack3 & 0xFFFFFFFFFFFFL); -// out[outPosition + i++] = delta + ((unpack4 >>> 16) & 0xFFFFFFFFFFFFL); -// out[outPosition + i++] = delta + (((unpack4 & 0xFFFFL) << 32) | ((unpack5 >>> 32) & 0xFFFFFFFFL)); -// out[outPosition + i++] = delta + (((unpack5 & 0xFFFFFFFFL) << 32) | ((unpack6 >>> 48) & 0xFFFFL)); -// out[outPosition + i++] = delta + (unpack6 & 0xFFFFFFFFFFFFL); -// } -// while (i < length) { -// out[outPosition + i] = delta + (int) get(startIndex + i); -// i++; -// } -// } + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + int i = 0; + final int unpackSize = 6 * Long.BYTES; + for (int indexOffset = startIndex * 6; i + 8 < length; indexOffset += unpackSize) { + final long unpack = buffer.getLong(offset + indexOffset); + final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); + final long unpack3 = buffer.getLong(offset + indexOffset + (2 * Long.BYTES)); + final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); + final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); + final long unpack6 = buffer.getLong(offset + indexOffset + (5 * Long.BYTES)); + out[outPosition + i++] = delta + ((unpack >>> 16) & 0xFFFFFFFFFFFFL); + out[outPosition + i++] = delta + (((unpack & 0xFFFFL) << 32) | ((unpack2 >>> 32) & 0xFFFFFFFFL)); + out[outPosition + i++] = delta + (((unpack2 & 0xFFFFFFFFL) << 16) | ((unpack3 >>> 48) & 0xFFFFL)); + out[outPosition + i++] = delta + (unpack3 & 0xFFFFFFFFFFFFL); + out[outPosition + i++] = delta + ((unpack4 >>> 16) & 0xFFFFFFFFFFFFL); + out[outPosition + i++] = delta + (((unpack4 & 0xFFFFL) << 32) | ((unpack5 >>> 32) & 0xFFFFFFFFL)); + out[outPosition + i++] = delta + (((unpack5 & 0xFFFFFFFFL) << 16) | ((unpack6 >>> 48) & 0xFFFFL)); + out[outPosition + i++] = delta + (unpack6 & 0xFFFFFFFFFFFFL); + } + while (i < length) { + out[outPosition + i] = delta + get(startIndex + i); + i++; + } + } } private static final class Size56Des implements LongDeserializer @@ -1168,33 +1071,33 @@ public long get(int index) return buffer.getLong(offset + (index * 7)) >>> 8; } -// @Override -// public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) -// { -// int i = 0; -// final int unpackSize = 7 * Long.BYTES; -// for (int indexOffset = startIndex * 7; i + 8 < length; indexOffset += unpackSize) { -// final long unpack = buffer.getLong(offset + indexOffset); -// final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); -// final long unpack3 = buffer.getLong(offset + indexOffset + (2 * Long.BYTES)); -// final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); -// final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); -// final long unpack6 = buffer.getLong(offset + indexOffset + (5 * Long.BYTES)); -// final long unpack7 = buffer.getLong(offset + indexOffset + (6 * Long.BYTES)); -// out[outPosition + i++] = delta + ((unpack >>> 8) & 0xFFFFFFFFFFFFFFL); -// out[outPosition + i++] = delta + (((unpack & 0xFFL) << 48) | ((unpack2 >>> 16) & 0xFFFFFFFFFFFFL)); -// out[outPosition + i++] = delta + (((unpack2 & 0xFFFFL) << 40) | ((unpack3 >>> 24) & 0xFFFFFFFFFFL)); -// out[outPosition + i++] = delta + (((unpack3 & 0xFFFFFFL) << 32) | ((unpack4 >>> 32) & 0xFFFFFFFFL)); -// out[outPosition + i++] = delta + (((unpack4 & 0xFFFFFFFFL) << 24) | ((unpack5 >>> 40) & 0xFFFFFFL)); -// out[outPosition + i++] = delta + (((unpack5 & 0xFFFFFFFFFFL) << 16) | ((unpack6 >>> 48) & 0xFFFFL)); -// out[outPosition + i++] = delta + (((unpack6 & 0xFFFFFFFFFFFFL) << 8) | ((unpack7 >>> 56) & 0xFFL)); -// out[outPosition + i++] = delta + (unpack7 & 0xFFFFFFFFFFFFFFL); -// } -// while (i < length) { -// out[outPosition + i] = delta + (int) get(startIndex + i); -// i++; -// } -// } + @Override + public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + { + int i = 0; + final int unpackSize = 7 * Long.BYTES; + for (int indexOffset = startIndex * 7; i + 8 < length; indexOffset += unpackSize) { + final long unpack = buffer.getLong(offset + indexOffset); + final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); + final long unpack3 = buffer.getLong(offset + indexOffset + (2 * Long.BYTES)); + final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); + final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); + final long unpack6 = buffer.getLong(offset + indexOffset + (5 * Long.BYTES)); + final long unpack7 = buffer.getLong(offset + indexOffset + (6 * Long.BYTES)); + out[outPosition + i++] = delta + ((unpack >>> 8) & 0xFFFFFFFFFFFFFFL); + out[outPosition + i++] = delta + (((unpack & 0xFFL) << 48) | ((unpack2 >>> 16) & 0xFFFFFFFFFFFFL)); + out[outPosition + i++] = delta + (((unpack2 & 0xFFFFL) << 40) | ((unpack3 >>> 24) & 0xFFFFFFFFFFL)); + out[outPosition + i++] = delta + (((unpack3 & 0xFFFFFFL) << 32) | ((unpack4 >>> 32) & 0xFFFFFFFFL)); + out[outPosition + i++] = delta + (((unpack4 & 0xFFFFFFFFL) << 24) | ((unpack5 >>> 40) & 0xFFFFFFL)); + out[outPosition + i++] = delta + (((unpack5 & 0xFFFFFFFFFFL) << 16) | ((unpack6 >>> 48) & 0xFFFFL)); + out[outPosition + i++] = delta + (((unpack6 & 0xFFFFFFFFFFFFL) << 8) | ((unpack7 >>> 56) & 0xFFL)); + out[outPosition + i++] = delta + (unpack7 & 0xFFFFFFFFFFFFFFL); + } + while (i < length) { + out[outPosition + i] = delta + get(startIndex + i); + i++; + } + } } private static final class Size64Des implements LongDeserializer @@ -1220,22 +1123,6 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo for (int i = 0, indexOffset = (startIndex << 3); i < length; i++, indexOffset += Long.BYTES) { out[outPosition + i] = delta + buffer.getLong(offset + indexOffset); } -// int i = 0; -// final int unpackSize = 8 * Long.BYTES; -// for (int indexOffset = (startIndex << 3); i + 8 < length; indexOffset += unpackSize) { -// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset); -// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 8); -// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 16); -// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 24); -// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 32); -// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 40); -// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 48); -// out[outPosition + i++] = delta + buffer.getLong(offset + indexOffset + 56); -// } -// while (i < length) { -// out[outPosition + i] = delta + (int) get(startIndex + i); -// i++; -// } } @Override @@ -1246,10 +1133,8 @@ public int getDelta(long[] out, int outPosition, int[] indexes, int length, int if (index >= limit) { return i; } - out[outPosition + i] = base + buffer.getLong(offset + (index << 3)); } - return length; } } From 880a0895c849dcb475b53374d9f2527df19a1162 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 16 Mar 2021 20:19:42 -0700 Subject: [PATCH 04/11] more cleanup --- ...aseColumnarLongsFromSegmentsBenchmark.java | 3 + .../compression/EncodingSizeProfiler.java | 14 + .../compression/LongCompressionBenchmark.java | 46 +-- .../druid/segment/data/VSizeLongSerde.java | 270 +++++++++--------- 4 files changed, 179 insertions(+), 154 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java index 97305b9b312c..0c0fd2df6001 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsFromSegmentsBenchmark.java @@ -59,6 +59,9 @@ public class BaseColumnarLongsFromSegmentsBenchmark extends BaseColumnarLongsBen /** * Path to a segment file to read long columns from. This shouldn't really be used as a parameter, but is nice to * be included in the output measurements. + * + * This is BYO segment, as this file doesn't probably exist for you, replace it and other parameters with the segment + * to test. */ @Param({"tmp/segments/twitter-ticker-1/"}) String segmentPath; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/EncodingSizeProfiler.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/EncodingSizeProfiler.java index 5a8baa587eac..e241de1da1a0 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/EncodingSizeProfiler.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/EncodingSizeProfiler.java @@ -20,6 +20,7 @@ package org.apache.druid.benchmark.compression; import org.openjdk.jmh.infra.BenchmarkParams; +import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.infra.IterationParams; import org.openjdk.jmh.profile.InternalProfiler; import org.openjdk.jmh.results.AggregationPolicy; @@ -30,6 +31,19 @@ import java.util.Collection; import java.util.Collections; +/** + * Crude jmh 'profiler' that allows calling benchmark methods to set this static value in a benchmark run, and if + * this profiler to the run and have this additional measurement show up in the results. + * + * This allows 2 measurements to be collected for the result set, timing of the test, and size in bytes set here. + * + * @see ColumnarLongsSelectRowsFromGeneratorBenchmark#selectRows(Blackhole) + * @see ColumnarLongsSelectRowsFromGeneratorBenchmark#selectRowsVectorized(Blackhole) + * @see ColumnarLongsEncodeDataFromGeneratorBenchmark#encodeColumn(Blackhole) + * @see ColumnarLongsSelectRowsFromSegmentBenchmark#selectRows(Blackhole) + * @see ColumnarLongsSelectRowsFromSegmentBenchmark#selectRowsVectorized(Blackhole) + * @see ColumnarLongsEncodeDataFromSegmentBenchmark#encodeColumn(Blackhole) + */ public class EncodingSizeProfiler implements InternalProfiler { public static int encodedSize; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/LongCompressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/LongCompressionBenchmark.java index 88242948c5d9..e059c8a7ea75 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/LongCompressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/LongCompressionBenchmark.java @@ -43,6 +43,7 @@ import java.io.File; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; /** @@ -92,19 +93,30 @@ public void tearDown() bufferHandler.close(); } -// @Benchmark -// public void readContinuous(Blackhole bh) -// { -// ColumnarLongs columnarLongs = supplier.get(); -// int count = columnarLongs.size(); -// for (int i = 0; i < count; i++) { -// bh.consume(columnarLongs.get(i)); -// } -// columnarLongs.close(); -// } + @Benchmark + public void readContinuous(Blackhole bh) + { + ColumnarLongs columnarLongs = supplier.get(); + int count = columnarLongs.size(); + for (int i = 0; i < count; i++) { + bh.consume(columnarLongs.get(i)); + } + columnarLongs.close(); + } + + @Benchmark + public void readSkipping(Blackhole bh) + { + ColumnarLongs columnarLongs = supplier.get(); + int count = columnarLongs.size(); + for (int i = 0; i < count; i += ThreadLocalRandom.current().nextInt(2000)) { + bh.consume(columnarLongs.get(i)); + } + columnarLongs.close(); + } @Benchmark - public void readVectorizedSequential(Blackhole bh) + public void readVectorizedContinuous(Blackhole bh) { long[] vector = new long[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; ColumnarLongs columnarLongs = supplier.get(); @@ -117,16 +129,4 @@ public void readVectorizedSequential(Blackhole bh) } columnarLongs.close(); } - -// @Benchmark -// public void readSkipping(Blackhole bh) -// { -// ColumnarLongs columnarLongs = supplier.get(); -// int count = columnarLongs.size(); -// for (int i = 0; i < count; i += ThreadLocalRandom.current().nextInt(2000)) { -// bh.consume(columnarLongs.get(i)); -// } -// columnarLongs.close(); -// } - } diff --git a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java index 722f82c09f82..8fc584b6fb6b 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java @@ -20,7 +20,6 @@ package org.apache.druid.segment.data; import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.UOE; import javax.annotation.Nullable; import java.io.Closeable; @@ -413,18 +412,32 @@ public void close() throws IOException } } + /** + * Unpack bitpacked long values from an underlying contiguous memory block + */ public interface LongDeserializer { + /** + * Unpack long value at the specified row index + */ long get(int index); - default void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + /** + * Unpack a contiguous vector of long values at the specified start index of length and adjust them by the supplied + * delta base value. + */ + default void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { for (int i = 0; i < length; i++) { - out[outPosition + i] = delta + get(startIndex + i); + out[outPosition + i] = base + get(startIndex + i); } } - default int getDelta(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long delta) + /** + * Unpack a non-contiguous vector of long values at the specified indexes and adjust them by the supplied delta base + * value. + */ + default int getDelta(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long base) { for (int i = 0; i < length; i++) { int index = indexes[outPosition + i] - indexOffset; @@ -432,18 +445,36 @@ default int getDelta(long[] out, int outPosition, int[] indexes, int length, int return i; } - out[outPosition + i] = delta + get(index); + out[outPosition + i] = base + get(index); } return length; } + /** + * Unpack a contiguous vector of long values at the specified start index of length and lookup and replace stored + * values based on their index in the supplied value lookup 'table' + */ default void getTable(long[] out, int outPosition, int startIndex, int length, long[] table) { - throw new UOE("Table decoding not supported for %s", this.getClass().getSimpleName()); + for (int i = 0; i < length; i++) { + out[outPosition + i] = table[(int) get(startIndex + i)]; + } } - default int getTable(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long[] table) + /** + * Unpack a contiguous vector of long values at the specified indexes and lookup and replace stored values based on + * their index in the supplied value lookup 'table' + */ + default int getTable( + long[] out, + int outPosition, + int[] indexes, + int length, + int indexOffset, + int limit, + long[] table + ) { for (int i = 0; i < length; i++) { int index = indexes[outPosition + i] - indexOffset; @@ -477,28 +508,28 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { int index = startIndex; int i = 0; // byte align while ((index & 0x7) != 0 && i < length) { - out[outPosition + i++] = delta + get(index++); + out[outPosition + i++] = base + get(index++); } for ( ; i + Byte.SIZE < length; index += Byte.SIZE) { final byte unpack = buffer.get(offset + (index >> 3)); - out[outPosition + i++] = delta + (unpack >> 7) & 1; - out[outPosition + i++] = delta + (unpack >> 6) & 1; - out[outPosition + i++] = delta + (unpack >> 5) & 1; - out[outPosition + i++] = delta + (unpack >> 4) & 1; - out[outPosition + i++] = delta + (unpack >> 3) & 1; - out[outPosition + i++] = delta + (unpack >> 2) & 1; - out[outPosition + i++] = delta + (unpack >> 1) & 1; - out[outPosition + i++] = delta + unpack & 1; + out[outPosition + i++] = base + (unpack >> 7) & 1; + out[outPosition + i++] = base + (unpack >> 6) & 1; + out[outPosition + i++] = base + (unpack >> 5) & 1; + out[outPosition + i++] = base + (unpack >> 4) & 1; + out[outPosition + i++] = base + (unpack >> 3) & 1; + out[outPosition + i++] = base + (unpack >> 2) & 1; + out[outPosition + i++] = base + (unpack >> 1) & 1; + out[outPosition + i++] = base + unpack & 1; } while (i < length) { - out[outPosition + i++] = delta + get(index++); + out[outPosition + i++] = base + get(index++); } } @@ -548,28 +579,28 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { int index = startIndex; int i = 0; // byte align while ((index & 0x3) != 0 && i < length) { - out[outPosition + i++] = delta + get(index++); + out[outPosition + i++] = base + get(index++); } for ( ; i + 8 < length; index += 8) { final short unpack = buffer.getShort(offset + (index >> 2)); - out[outPosition + i++] = delta + (unpack >> 14) & 3; - out[outPosition + i++] = delta + (unpack >> 12) & 3; - out[outPosition + i++] = delta + (unpack >> 10) & 3; - out[outPosition + i++] = delta + (unpack >> 8) & 3; - out[outPosition + i++] = delta + (unpack >> 6) & 3; - out[outPosition + i++] = delta + (unpack >> 4) & 3; - out[outPosition + i++] = delta + (unpack >> 2) & 3; - out[outPosition + i++] = delta + unpack & 3; + out[outPosition + i++] = base + (unpack >> 14) & 3; + out[outPosition + i++] = base + (unpack >> 12) & 3; + out[outPosition + i++] = base + (unpack >> 10) & 3; + out[outPosition + i++] = base + (unpack >> 8) & 3; + out[outPosition + i++] = base + (unpack >> 6) & 3; + out[outPosition + i++] = base + (unpack >> 4) & 3; + out[outPosition + i++] = base + (unpack >> 2) & 3; + out[outPosition + i++] = base + unpack & 3; } while (i < length) { - out[outPosition + i++] = delta + get(index++); + out[outPosition + i++] = base + get(index++); } } @@ -619,28 +650,28 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { int index = startIndex; int i = 0; // byte align while ((index & 0x1) != 0 && i < length) { - out[outPosition + i++] = delta + get(index++) & 0xF; + out[outPosition + i++] = base + get(index++) & 0xF; } for ( ; i + 8 < length; index += 8) { final int unpack = buffer.getInt(offset + (index >> 1)); - out[outPosition + i++] = delta + (unpack >> 28) & 0xF; - out[outPosition + i++] = delta + (unpack >> 24) & 0xF; - out[outPosition + i++] = delta + (unpack >> 20) & 0xF; - out[outPosition + i++] = delta + (unpack >> 16) & 0xF; - out[outPosition + i++] = delta + (unpack >> 12) & 0xF; - out[outPosition + i++] = delta + (unpack >> 8) & 0xF; - out[outPosition + i++] = delta + (unpack >> 4) & 0xF; - out[outPosition + i++] = delta + unpack & 0xF; + out[outPosition + i++] = base + (unpack >> 28) & 0xF; + out[outPosition + i++] = base + (unpack >> 24) & 0xF; + out[outPosition + i++] = base + (unpack >> 20) & 0xF; + out[outPosition + i++] = base + (unpack >> 16) & 0xF; + out[outPosition + i++] = base + (unpack >> 12) & 0xF; + out[outPosition + i++] = base + (unpack >> 8) & 0xF; + out[outPosition + i++] = base + (unpack >> 4) & 0xF; + out[outPosition + i++] = base + unpack & 0xF; } while (i < length) { - out[outPosition + i++] = delta + get(index++); + out[outPosition + i++] = base + get(index++); } } @@ -689,10 +720,10 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { for (int i = 0, indexOffset = startIndex; i < length; i++, indexOffset++) { - out[outPosition + i] = delta + buffer.get(offset + indexOffset) & 0xFF; + out[outPosition + i] = base + buffer.get(offset + indexOffset) & 0xFF; } } @@ -756,7 +787,7 @@ public long get(int index) @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { int i = 0; int index = startIndex; @@ -768,17 +799,17 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo for (int indexOffset = (index * 3) >> 1; i + 8 < length; indexOffset += unpackSize) { final long unpack = buffer.getLong(offset + indexOffset); final int unpack2 = buffer.getInt(offset + indexOffset + Long.BYTES); - out[outPosition + i++] = delta + ((unpack >> 52) & 0xFFF); - out[outPosition + i++] = delta + ((unpack >> 40) & 0xFFF); - out[outPosition + i++] = delta + ((unpack >> 28) & 0xFFF); - out[outPosition + i++] = delta + ((unpack >> 16) & 0xFFF); - out[outPosition + i++] = delta + ((unpack >> 4) & 0xFFF); - out[outPosition + i++] = delta + (((unpack & 0xF) << 8) | ((unpack2 >>> 24) & 0xFF)); - out[outPosition + i++] = delta + ((unpack2 >> 12) & 0xFFF); - out[outPosition + i++] = delta + (unpack2 & 0xFFF); + out[outPosition + i++] = base + ((unpack >> 52) & 0xFFF); + out[outPosition + i++] = base + ((unpack >> 40) & 0xFFF); + out[outPosition + i++] = base + ((unpack >> 28) & 0xFFF); + out[outPosition + i++] = base + ((unpack >> 16) & 0xFFF); + out[outPosition + i++] = base + ((unpack >> 4) & 0xFFF); + out[outPosition + i++] = base + (((unpack & 0xF) << 8) | ((unpack2 >>> 24) & 0xFF)); + out[outPosition + i++] = base + ((unpack2 >> 12) & 0xFFF); + out[outPosition + i++] = base + (unpack2 & 0xFFF); } while (i < length) { - out[outPosition + i] = delta + (int) get(startIndex + i); + out[outPosition + i] = base + (int) get(startIndex + i); i++; } } @@ -802,10 +833,10 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { for (int i = 0, indexOffset = (startIndex << 1); i < length; i++, indexOffset += Short.BYTES) { - out[outPosition + i] = delta + buffer.getShort(offset + indexOffset) & 0xFFFF; + out[outPosition + i] = base + buffer.getShort(offset + indexOffset) & 0xFFFF; } } @@ -821,29 +852,6 @@ public int getDelta(long[] out, int outPosition, int[] indexes, int length, int out[outPosition + i] = base + buffer.getShort(offset + (index << 1)) & 0xFFFF; } - return length; - - } - @Override - public void getTable(long[] out, int outPosition, int startIndex, int length, long[] table) - { - for (int i = 0, indexOffset = (startIndex << 1); i < length; i++, indexOffset += Short.BYTES) { - out[outPosition + i] = table[buffer.getShort(offset + indexOffset) & 0xFFFF]; - } - } - - @Override - public int getTable(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit, long[] table) - { - for (int i = 0; i < length; i++) { - int index = indexes[outPosition + i] - indexOffset; - if (index >= limit) { - return i; - } - - out[outPosition + i] = table[buffer.getShort(offset + (index << 1)) & 0xFFFF]; - } - return length; } } @@ -868,7 +876,7 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { int i = 0; int index = startIndex; @@ -881,17 +889,17 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo final long unpack = buffer.getLong(offset + indexOffset); final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); final int unpack3 = buffer.getInt(offset + indexOffset + Long.BYTES + Long.BYTES); - out[outPosition + i++] = delta + ((unpack >> 44) & 0xFFFFF); - out[outPosition + i++] = delta + ((unpack >> 24) & 0xFFFFF); - out[outPosition + i++] = delta + ((unpack >> 4) & 0xFFFFF); - out[outPosition + i++] = delta + (((unpack & 0xF) << 16) | ((unpack2 >>> 48) & 0xFFFF)); - out[outPosition + i++] = delta + ((unpack2 >> 28) & 0xFFFFF); - out[outPosition + i++] = delta + ((unpack2 >> 8) & 0xFFFFF); - out[outPosition + i++] = delta + (((unpack2 & 0xFF) << 12) | ((unpack3 >>> 20) & 0xFFF)); - out[outPosition + i++] = delta + (unpack3 & 0xFFFFF); + out[outPosition + i++] = base + ((unpack >> 44) & 0xFFFFF); + out[outPosition + i++] = base + ((unpack >> 24) & 0xFFFFF); + out[outPosition + i++] = base + ((unpack >> 4) & 0xFFFFF); + out[outPosition + i++] = base + (((unpack & 0xF) << 16) | ((unpack2 >>> 48) & 0xFFFF)); + out[outPosition + i++] = base + ((unpack2 >> 28) & 0xFFFFF); + out[outPosition + i++] = base + ((unpack2 >> 8) & 0xFFFFF); + out[outPosition + i++] = base + (((unpack2 & 0xFF) << 12) | ((unpack3 >>> 20) & 0xFFF)); + out[outPosition + i++] = base + (unpack3 & 0xFFFFF); } while (i < length) { - out[outPosition + i] = delta + (int) get(startIndex + i); + out[outPosition + i] = base + (int) get(startIndex + i); i++; } } @@ -915,7 +923,7 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { int i = 0; final int unpackSize = 3 * Long.BYTES; @@ -923,17 +931,17 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo final long unpack = buffer.getLong(offset + indexOffset); final long unpack2 = buffer.getLong(offset +indexOffset + Long.BYTES); final long unpack3 = buffer.getLong(offset + indexOffset + Long.BYTES + Long.BYTES); - out[outPosition + i++] = delta + ((unpack >> 40) & 0xFFFFFF); - out[outPosition + i++] = delta + ((unpack >> 16) & 0xFFFFFF); - out[outPosition + i++] = delta + (((unpack & 0xFFFF) << 8) | ((unpack2 >>> 56) & 0xFF)); - out[outPosition + i++] = delta + ((unpack2 >> 32) & 0xFFFFFF); - out[outPosition + i++] = delta + ((unpack2 >> 8) & 0xFFFFFF); - out[outPosition + i++] = delta + (((unpack2 & 0xFF) << 16) | ((unpack3 >>> 48) & 0xFFFF)); - out[outPosition + i++] = delta + ((unpack3 >> 24) & 0xFFFFFF); - out[outPosition + i++] = delta + (unpack3 & 0xFFFFFF); + out[outPosition + i++] = base + ((unpack >> 40) & 0xFFFFFF); + out[outPosition + i++] = base + ((unpack >> 16) & 0xFFFFFF); + out[outPosition + i++] = base + (((unpack & 0xFFFF) << 8) | ((unpack2 >>> 56) & 0xFF)); + out[outPosition + i++] = base + ((unpack2 >> 32) & 0xFFFFFF); + out[outPosition + i++] = base + ((unpack2 >> 8) & 0xFFFFFF); + out[outPosition + i++] = base + (((unpack2 & 0xFF) << 16) | ((unpack3 >>> 48) & 0xFFFF)); + out[outPosition + i++] = base + ((unpack3 >> 24) & 0xFFFFFF); + out[outPosition + i++] = base + (unpack3 & 0xFFFFFF); } while (i < length) { - out[outPosition + i] = delta + (int) get(startIndex + i); + out[outPosition + i] = base + (int) get(startIndex + i); i++; } } @@ -957,10 +965,10 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { for (int i = 0, indexOffset = (startIndex << 2); i < length; i++, indexOffset += Integer.BYTES) { - out[outPosition + i] = delta + buffer.getInt(offset + indexOffset) & 0xFFFFFFFFL; + out[outPosition + i] = base + buffer.getInt(offset + indexOffset) & 0xFFFFFFFFL; } } } @@ -983,7 +991,7 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { int i = 0; final int unpackSize = 5 * Long.BYTES; @@ -993,17 +1001,17 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo final long unpack3 = buffer.getLong(offset + indexOffset + (2 * Long.BYTES)); final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); - out[outPosition + i++] = delta + ((unpack >>> 24) & 0xFFFFFFFFFFL); - out[outPosition + i++] = delta + (((unpack & 0xFFFFFFL) << 16) | ((unpack2 >>> 48) & 0xFFFFL)); - out[outPosition + i++] = delta + ((unpack2 >>> 8) & 0xFFFFFFFFFFL); - out[outPosition + i++] = delta + (((unpack2 & 0xFFL) << 32) | ((unpack3 >>> 32) & 0xFFFFFFFFL)); - out[outPosition + i++] = delta + (((unpack3 & 0xFFFFFFFFL) << 8) | ((unpack4 >>> 56 ) & 0xFFL)); - out[outPosition + i++] = delta + ((unpack4 >>> 16) & 0xFFFFFFFFFFL); - out[outPosition + i++] = delta + (((unpack4 & 0xFFFFL) << 24) | ((unpack5 >>> 40) & 0xFFFFFFL)); - out[outPosition + i++] = delta + (unpack5 & 0xFFFFFFFFFFL); + out[outPosition + i++] = base + ((unpack >>> 24) & 0xFFFFFFFFFFL); + out[outPosition + i++] = base + (((unpack & 0xFFFFFFL) << 16) | ((unpack2 >>> 48) & 0xFFFFL)); + out[outPosition + i++] = base + ((unpack2 >>> 8) & 0xFFFFFFFFFFL); + out[outPosition + i++] = base + (((unpack2 & 0xFFL) << 32) | ((unpack3 >>> 32) & 0xFFFFFFFFL)); + out[outPosition + i++] = base + (((unpack3 & 0xFFFFFFFFL) << 8) | ((unpack4 >>> 56 ) & 0xFFL)); + out[outPosition + i++] = base + ((unpack4 >>> 16) & 0xFFFFFFFFFFL); + out[outPosition + i++] = base + (((unpack4 & 0xFFFFL) << 24) | ((unpack5 >>> 40) & 0xFFFFFFL)); + out[outPosition + i++] = base + (unpack5 & 0xFFFFFFFFFFL); } while (i < length) { - out[outPosition + i] = delta + get(startIndex + i); + out[outPosition + i] = base + get(startIndex + i); i++; } } @@ -1027,7 +1035,7 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { int i = 0; final int unpackSize = 6 * Long.BYTES; @@ -1038,17 +1046,17 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo final long unpack4 = buffer.getLong(offset + indexOffset + (3 * Long.BYTES)); final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); final long unpack6 = buffer.getLong(offset + indexOffset + (5 * Long.BYTES)); - out[outPosition + i++] = delta + ((unpack >>> 16) & 0xFFFFFFFFFFFFL); - out[outPosition + i++] = delta + (((unpack & 0xFFFFL) << 32) | ((unpack2 >>> 32) & 0xFFFFFFFFL)); - out[outPosition + i++] = delta + (((unpack2 & 0xFFFFFFFFL) << 16) | ((unpack3 >>> 48) & 0xFFFFL)); - out[outPosition + i++] = delta + (unpack3 & 0xFFFFFFFFFFFFL); - out[outPosition + i++] = delta + ((unpack4 >>> 16) & 0xFFFFFFFFFFFFL); - out[outPosition + i++] = delta + (((unpack4 & 0xFFFFL) << 32) | ((unpack5 >>> 32) & 0xFFFFFFFFL)); - out[outPosition + i++] = delta + (((unpack5 & 0xFFFFFFFFL) << 16) | ((unpack6 >>> 48) & 0xFFFFL)); - out[outPosition + i++] = delta + (unpack6 & 0xFFFFFFFFFFFFL); + out[outPosition + i++] = base + ((unpack >>> 16) & 0xFFFFFFFFFFFFL); + out[outPosition + i++] = base + (((unpack & 0xFFFFL) << 32) | ((unpack2 >>> 32) & 0xFFFFFFFFL)); + out[outPosition + i++] = base + (((unpack2 & 0xFFFFFFFFL) << 16) | ((unpack3 >>> 48) & 0xFFFFL)); + out[outPosition + i++] = base + (unpack3 & 0xFFFFFFFFFFFFL); + out[outPosition + i++] = base + ((unpack4 >>> 16) & 0xFFFFFFFFFFFFL); + out[outPosition + i++] = base + (((unpack4 & 0xFFFFL) << 32) | ((unpack5 >>> 32) & 0xFFFFFFFFL)); + out[outPosition + i++] = base + (((unpack5 & 0xFFFFFFFFL) << 16) | ((unpack6 >>> 48) & 0xFFFFL)); + out[outPosition + i++] = base + (unpack6 & 0xFFFFFFFFFFFFL); } while (i < length) { - out[outPosition + i] = delta + get(startIndex + i); + out[outPosition + i] = base + get(startIndex + i); i++; } } @@ -1072,7 +1080,7 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { int i = 0; final int unpackSize = 7 * Long.BYTES; @@ -1084,17 +1092,17 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo final long unpack5 = buffer.getLong(offset + indexOffset + (4 * Long.BYTES)); final long unpack6 = buffer.getLong(offset + indexOffset + (5 * Long.BYTES)); final long unpack7 = buffer.getLong(offset + indexOffset + (6 * Long.BYTES)); - out[outPosition + i++] = delta + ((unpack >>> 8) & 0xFFFFFFFFFFFFFFL); - out[outPosition + i++] = delta + (((unpack & 0xFFL) << 48) | ((unpack2 >>> 16) & 0xFFFFFFFFFFFFL)); - out[outPosition + i++] = delta + (((unpack2 & 0xFFFFL) << 40) | ((unpack3 >>> 24) & 0xFFFFFFFFFFL)); - out[outPosition + i++] = delta + (((unpack3 & 0xFFFFFFL) << 32) | ((unpack4 >>> 32) & 0xFFFFFFFFL)); - out[outPosition + i++] = delta + (((unpack4 & 0xFFFFFFFFL) << 24) | ((unpack5 >>> 40) & 0xFFFFFFL)); - out[outPosition + i++] = delta + (((unpack5 & 0xFFFFFFFFFFL) << 16) | ((unpack6 >>> 48) & 0xFFFFL)); - out[outPosition + i++] = delta + (((unpack6 & 0xFFFFFFFFFFFFL) << 8) | ((unpack7 >>> 56) & 0xFFL)); - out[outPosition + i++] = delta + (unpack7 & 0xFFFFFFFFFFFFFFL); + out[outPosition + i++] = base + ((unpack >>> 8) & 0xFFFFFFFFFFFFFFL); + out[outPosition + i++] = base + (((unpack & 0xFFL) << 48) | ((unpack2 >>> 16) & 0xFFFFFFFFFFFFL)); + out[outPosition + i++] = base + (((unpack2 & 0xFFFFL) << 40) | ((unpack3 >>> 24) & 0xFFFFFFFFFFL)); + out[outPosition + i++] = base + (((unpack3 & 0xFFFFFFL) << 32) | ((unpack4 >>> 32) & 0xFFFFFFFFL)); + out[outPosition + i++] = base + (((unpack4 & 0xFFFFFFFFL) << 24) | ((unpack5 >>> 40) & 0xFFFFFFL)); + out[outPosition + i++] = base + (((unpack5 & 0xFFFFFFFFFFL) << 16) | ((unpack6 >>> 48) & 0xFFFFL)); + out[outPosition + i++] = base + (((unpack6 & 0xFFFFFFFFFFFFL) << 8) | ((unpack7 >>> 56) & 0xFFL)); + out[outPosition + i++] = base + (unpack7 & 0xFFFFFFFFFFFFFFL); } while (i < length) { - out[outPosition + i] = delta + get(startIndex + i); + out[outPosition + i] = base + get(startIndex + i); i++; } } @@ -1118,10 +1126,10 @@ public long get(int index) } @Override - public void getDelta(long[] out, int outPosition, int startIndex, int length, long delta) + public void getDelta(long[] out, int outPosition, int startIndex, int length, long base) { for (int i = 0, indexOffset = (startIndex << 3); i < length; i++, indexOffset += Long.BYTES) { - out[outPosition + i] = delta + buffer.getLong(offset + indexOffset); + out[outPosition + i] = base + buffer.getLong(offset + indexOffset); } } From 04652b858814f4f6122846af5cc3da33a4561152 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 16 Mar 2021 21:38:01 -0700 Subject: [PATCH 05/11] fixes --- .../benchmark/compression/BaseColumnarLongsBenchmark.java | 8 ++++++-- .../ColumnarLongsEncodeDataFromSegmentBenchmark.java | 2 +- .../ColumnarLongsSelectRowsFromGeneratorBenchmark.java | 4 ++-- .../org/apache/druid/segment/data/VSizeLongSerde.java | 4 ++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java index dabcf07ad719..9093171ff9e9 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/BaseColumnarLongsBenchmark.java @@ -93,7 +93,12 @@ void setupFilters(int rows, double filteredRowCountPercentage) filter.set(rowToAccess); bitmap.add(rowToAccess); } - vectorOffset = new BitmapVectorOffset(VECTOR_SIZE, new WrappedImmutableRoaringBitmap(bitmap.toImmutableRoaringBitmap()), 0, rows); + vectorOffset = new BitmapVectorOffset( + VECTOR_SIZE, + new WrappedImmutableRoaringBitmap(bitmap.toImmutableRoaringBitmap()), + 0, + rows + ); } else { vectorOffset = new NoFilterVectorOffset(VECTOR_SIZE, 0, rows); } @@ -174,7 +179,6 @@ static ColumnarLongs createColumnarLongs(String encoding, ByteBuffer buffer) // for testing encodings: validate that all encoders read the same values // noinspection unused static void checkSanity(Map encoders, List encodings, int rows) - throws Exception { for (int i = 0; i < rows; i++) { checkRowSanity(encoders, encodings, i); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java index b8c3f8ab563e..b4dee7a30825 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java @@ -19,8 +19,8 @@ package org.apache.druid.benchmark.compression; -import com.google.api.client.util.Lists; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.StringUtils; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java index ae6cd553d004..d6c5cd17b238 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java @@ -71,7 +71,7 @@ public class ColumnarLongsSelectRowsFromGeneratorBenchmark extends BaseColumnarL private double filteredRowCountPercentage; @Setup - public void setup() throws Exception + public void setup() throws IOException { decoders = Maps.newHashMap(); encodedSize = Maps.newHashMap(); @@ -93,7 +93,7 @@ public void setup() throws Exception } @TearDown - public void teardown() throws Exception + public void teardown() { for (ColumnarLongs longs : decoders.values()) { longs.close(); diff --git a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java index 8fc584b6fb6b..58356ddb5a9d 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java @@ -929,7 +929,7 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo final int unpackSize = 3 * Long.BYTES; for (int indexOffset = startIndex * 3; i + 8 < length; indexOffset += unpackSize) { final long unpack = buffer.getLong(offset + indexOffset); - final long unpack2 = buffer.getLong(offset +indexOffset + Long.BYTES); + final long unpack2 = buffer.getLong(offset + indexOffset + Long.BYTES); final long unpack3 = buffer.getLong(offset + indexOffset + Long.BYTES + Long.BYTES); out[outPosition + i++] = base + ((unpack >> 40) & 0xFFFFFF); out[outPosition + i++] = base + ((unpack >> 16) & 0xFFFFFF); @@ -1005,7 +1005,7 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo out[outPosition + i++] = base + (((unpack & 0xFFFFFFL) << 16) | ((unpack2 >>> 48) & 0xFFFFL)); out[outPosition + i++] = base + ((unpack2 >>> 8) & 0xFFFFFFFFFFL); out[outPosition + i++] = base + (((unpack2 & 0xFFL) << 32) | ((unpack3 >>> 32) & 0xFFFFFFFFL)); - out[outPosition + i++] = base + (((unpack3 & 0xFFFFFFFFL) << 8) | ((unpack4 >>> 56 ) & 0xFFL)); + out[outPosition + i++] = base + (((unpack3 & 0xFFFFFFFFL) << 8) | ((unpack4 >>> 56) & 0xFFL)); out[outPosition + i++] = base + ((unpack4 >>> 16) & 0xFFFFFFFFFFL); out[outPosition + i++] = base + (((unpack4 & 0xFFFFL) << 24) | ((unpack5 >>> 40) & 0xFFFFFFL)); out[outPosition + i++] = base + (unpack5 & 0xFFFFFFFFFFL); From ba2ca1dd71d4a8b6d6def82b8f4a74401aa8460d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 16 Mar 2021 21:41:43 -0700 Subject: [PATCH 06/11] forbidden --- .../ColumnarLongsEncodeDataFromSegmentBenchmark.java | 4 ++-- .../ColumnarLongsSelectRowsFromGeneratorBenchmark.java | 6 +++--- .../ColumnarLongsSelectRowsFromSegmentBenchmark.java | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java index b4dee7a30825..22e99ca872b9 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java @@ -21,7 +21,6 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.IndexIO; @@ -56,6 +55,7 @@ import java.nio.file.Files; import java.nio.file.StandardOpenOption; import java.util.ArrayList; +import java.util.LinkedHashSet; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -123,7 +123,7 @@ private void initializeSegmentValueIntermediaryFile() throws IOException () -> 0 ); try (final QueryableIndex index = INDEX_IO.loadIndex(new File(segmentPath))) { - final Set columnNames = Sets.newLinkedHashSet(); + final Set columnNames = new LinkedHashSet<>(); columnNames.add(ColumnHolder.TIME_COLUMN_NAME); Iterables.addAll(columnNames, index.getColumnNames()); final ColumnHolder column = index.getColumnHolder(columnName); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java index d6c5cd17b238..cde514ba84b8 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java @@ -19,7 +19,6 @@ package org.apache.druid.benchmark.compression; -import com.google.common.collect.Maps; import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.segment.data.ColumnarLongs; import org.openjdk.jmh.annotations.Benchmark; @@ -44,6 +43,7 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -73,8 +73,8 @@ public class ColumnarLongsSelectRowsFromGeneratorBenchmark extends BaseColumnarL @Setup public void setup() throws IOException { - decoders = Maps.newHashMap(); - encodedSize = Maps.newHashMap(); + decoders = new HashMap<>(); + encodedSize = new HashMap<>(); setupFromFile(encoding); setupFilters(rows, filteredRowCountPercentage); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java index 76c66e6ee34f..544a1eed94c5 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java @@ -19,7 +19,6 @@ package org.apache.druid.benchmark.compression; -import com.google.common.collect.Maps; import com.google.common.io.Files; import org.apache.druid.segment.data.ColumnarLongs; import org.openjdk.jmh.annotations.Benchmark; @@ -44,6 +43,7 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -66,8 +66,8 @@ public class ColumnarLongsSelectRowsFromSegmentBenchmark extends BaseColumnarLon @Setup public void setup() throws Exception { - decoders = Maps.newHashMap(); - encodedSize = Maps.newHashMap(); + decoders = new HashMap<>(); + encodedSize = new HashMap<>(); setupFilters(rows, filteredRowCountPercentage); setupFromFile(encoding); From 3d330a85f1274df961f58782a30fb9f8f7485838 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 16 Mar 2021 23:50:10 -0700 Subject: [PATCH 07/11] benchmark style --- .../ColumnarLongsEncodeDataFromSegmentBenchmark.java | 8 ++++---- .../ColumnarLongsSelectRowsFromGeneratorBenchmark.java | 2 +- .../ColumnarLongsSelectRowsFromSegmentBenchmark.java | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java index 22e99ca872b9..aa2f119754bd 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java @@ -20,7 +20,6 @@ package org.apache.druid.benchmark.compression; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.IndexIO; @@ -56,6 +55,7 @@ import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.LinkedHashSet; +import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -72,7 +72,7 @@ public void setup() throws Exception File dir = getTmpDir(); File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName)); - ArrayList values = Lists.newArrayList(); + List values = new ArrayList<>(); try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8)) { String line; while ((line = br.readLine()) != null) { @@ -118,11 +118,11 @@ private void initializeSegmentValueIntermediaryFile() throws IOException File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName)); if (!dataFile.exists()) { - IndexIO INDEX_IO = new IndexIO( + final IndexIO indexIO = new IndexIO( new DefaultObjectMapper(), () -> 0 ); - try (final QueryableIndex index = INDEX_IO.loadIndex(new File(segmentPath))) { + try (final QueryableIndex index = indexIO.loadIndex(new File(segmentPath))) { final Set columnNames = new LinkedHashSet<>(); columnNames.add(ColumnHolder.TIME_COLUMN_NAME); Iterables.addAll(columnNames, index.getColumnNames()); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java index cde514ba84b8..d25ded778926 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromGeneratorBenchmark.java @@ -144,7 +144,7 @@ public void selectRowsVectorized(Blackhole blackhole) } else { columnDecoder.get(vector, vectorOffset.getOffsets(), vectorOffset.getCurrentVectorSize()); } - for (int i = 0 ; i < vectorOffset.getCurrentVectorSize(); i++) { + for (int i = 0; i < vectorOffset.getCurrentVectorSize(); i++) { blackhole.consume(vector[i]); } vectorOffset.advance(); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java index 544a1eed94c5..0fadb6249e79 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsSelectRowsFromSegmentBenchmark.java @@ -138,7 +138,7 @@ public void selectRowsVectorized(Blackhole blackhole) } else { columnDecoder.get(vector, vectorOffset.getOffsets(), vectorOffset.getCurrentVectorSize()); } - for (int i = 0 ; i < vectorOffset.getCurrentVectorSize(); i++) { + for (int i = 0; i < vectorOffset.getCurrentVectorSize(); i++) { blackhole.consume(vector[i]); } vectorOffset.advance(); From ff81b6db24474f6c93eb95a7243b34a9ae5b298a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 17 Mar 2021 03:24:39 -0700 Subject: [PATCH 08/11] idk why --- .../java/org/apache/druid/segment/data/VSizeLongSerde.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java index 58356ddb5a9d..d67b8adf3ccd 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java @@ -809,7 +809,7 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo out[outPosition + i++] = base + (unpack2 & 0xFFF); } while (i < length) { - out[outPosition + i] = base + (int) get(startIndex + i); + out[outPosition + i] = base + get(startIndex + i); i++; } } @@ -899,7 +899,7 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo out[outPosition + i++] = base + (unpack3 & 0xFFFFF); } while (i < length) { - out[outPosition + i] = base + (int) get(startIndex + i); + out[outPosition + i] = base + get(startIndex + i); i++; } } @@ -941,7 +941,7 @@ public void getDelta(long[] out, int outPosition, int startIndex, int length, lo out[outPosition + i++] = base + (unpack3 & 0xFFFFFF); } while (i < length) { - out[outPosition + i] = base + (int) get(startIndex + i); + out[outPosition + i] = base + get(startIndex + i); i++; } } From 23309e07abf542d873d8a30016605ed79c81750d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 17 Mar 2021 21:16:15 -0700 Subject: [PATCH 09/11] adjust --- .../apache/druid/segment/data/VSizeLongSerde.java | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java index d67b8adf3ccd..a72ff09062ef 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.data; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.UOE; import javax.annotation.Nullable; import java.io.Closeable; @@ -329,7 +330,7 @@ public void write(long value) throws IOException curByte = (byte) value; first = false; } else { - curByte = (byte) ((curByte << 4) | ((value >>> (numBytes << 3)) & 0xF)); + curByte = (byte) ((curByte << 4) | ((value >> (numBytes << 3)) & 0xF)); buffer.put(curByte); first = true; } @@ -426,12 +427,7 @@ public interface LongDeserializer * Unpack a contiguous vector of long values at the specified start index of length and adjust them by the supplied * delta base value. */ - default void getDelta(long[] out, int outPosition, int startIndex, int length, long base) - { - for (int i = 0; i < length; i++) { - out[outPosition + i] = base + get(startIndex + i); - } - } + void getDelta(long[] out, int outPosition, int startIndex, int length, long base); /** * Unpack a non-contiguous vector of long values at the specified indexes and adjust them by the supplied delta base @@ -457,9 +453,7 @@ default int getDelta(long[] out, int outPosition, int[] indexes, int length, int */ default void getTable(long[] out, int outPosition, int startIndex, int length, long[] table) { - for (int i = 0; i < length; i++) { - out[outPosition + i] = table[(int) get(startIndex + i)]; - } + throw new UOE("Table decoding not supported for %s", this.getClass().getSimpleName()); } /** From c3e3fb79dbf29d11ce7a14c1993947c3679e9503 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 18 Mar 2021 17:51:57 -0700 Subject: [PATCH 10/11] add preconditions for value >= 0 for writers --- .../java/org/apache/druid/segment/data/VSizeLongSerde.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java index a72ff09062ef..372c9328ce40 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java @@ -19,6 +19,7 @@ package org.apache.druid.segment.data; +import com.google.common.base.Preconditions; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.UOE; @@ -213,6 +214,7 @@ public Size1Ser(ByteBuffer buffer, int offset) @Override public void write(long value) throws IOException { + Preconditions.checkArgument(value >= 0); if (count == 8) { buffer.put(curByte); count = 0; @@ -267,6 +269,7 @@ public Size2Ser(ByteBuffer buffer, int offset) @Override public void write(long value) throws IOException { + Preconditions.checkArgument(value >= 0); if (count == 8) { buffer.put(curByte); count = 0; @@ -324,6 +327,7 @@ public Mult4Ser(ByteBuffer buffer, int offset, int numBytes) @Override public void write(long value) throws IOException { + Preconditions.checkArgument(value >= 0); int shift = 0; if (first) { shift = 4; @@ -388,6 +392,7 @@ public Mult8Ser(ByteBuffer buffer, int offset, int numBytes) @Override public void write(long value) throws IOException { + Preconditions.checkArgument(value >= 0); for (int i = numBytes - 1; i >= 0; i--) { buffer.put((byte) (value >>> (i * 8))); if (output != null) { From 84f1bcc757f787b2d2bd4af6e81f6894548fb78e Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 18 Mar 2021 21:06:13 -0700 Subject: [PATCH 11/11] add 64 bit exception --- .../java/org/apache/druid/segment/data/VSizeLongSerde.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java index 372c9328ce40..ff0316d0f153 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VSizeLongSerde.java @@ -392,7 +392,10 @@ public Mult8Ser(ByteBuffer buffer, int offset, int numBytes) @Override public void write(long value) throws IOException { - Preconditions.checkArgument(value >= 0); + if (numBytes != 8) { + // if the value is not stored in a full long, ensure it is zero or positive + Preconditions.checkArgument(value >= 0); + } for (int i = numBytes - 1; i >= 0; i--) { buffer.put((byte) (value >>> (i * 8))); if (output != null) {