diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/ProtoWriterTools.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/ProtoWriterTools.java index c0f049b17..778c957ab 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/ProtoWriterTools.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/ProtoWriterTools.java @@ -1010,12 +1010,7 @@ public static int sizeOfVarInt32(final int value) { * @return the number of bytes for encoded value */ public static int sizeOfVarInt64(final long value) { - if (value >= 0) { - return sizeOfUnsignedVarInt64(value); - } else { - // Must sign-extend. - return MAX_VARINT_SIZE; - } + return sizeOfUnsignedVarInt64(value); } /** @@ -1025,11 +1020,7 @@ public static int sizeOfVarInt64(final long value) { * @return the number of bytes for encoded value */ public static int sizeOfUnsignedVarInt32(final int value) { - if ((value & (~0 << 7)) == 0) return 1; - if ((value & (~0 << 14)) == 0) return 2; - if ((value & (~0 << 21)) == 0) return 3; - if ((value & (~0 << 28)) == 0) return 4; - return 5; + return sizeOfUnsignedVarInt64(value); } /** @@ -1039,23 +1030,10 @@ public static int sizeOfUnsignedVarInt32(final int value) { * @return the number of bytes for encoded value */ static int sizeOfUnsignedVarInt64(long value) { - // handle two popular special cases up front ... + // handle popular special case up front if ((value & (~0L << 7)) == 0L) return 1; - if (value < 0L) return 10; - // ... leaving us with 8 remaining, which we can divide and conquer - int n = 2; - if ((value & (~0L << 35)) != 0L) { - n += 4; - value >>>= 28; - } - if ((value & (~0L << 21)) != 0L) { - n += 2; - value >>>= 14; - } - if ((value & (~0L << 14)) != 0L) { - n += 1; - } - return n; + final int clz = Long.numberOfLeadingZeros(value); + return ((Long.SIZE * 9 + (1 << 6)) - (clz * 9)) >>> 6; } /** diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarInt32SizeOfBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarInt32SizeOfBench.java new file mode 100644 index 000000000..86d498ddc --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarInt32SizeOfBench.java @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint; + +import com.hedera.pbj.runtime.ProtoWriterTools; +import java.io.IOException; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.CompilerControl; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +@SuppressWarnings("unused") +@State(Scope.Benchmark) +@Fork(1) +@Warmup(iterations = 3, time = 2) +@Measurement(iterations = 4, time = 2) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@BenchmarkMode(Mode.AverageTime) +public class VarInt32SizeOfBench { + public static final int NUM_OF_VALUES = 1024; + + @Param({"1", "2", "4"}) + public int numOfBytes; + + private int[] numbers; + + @Setup + public void setupNumbers() { + Random random = new Random(9387498731984L); + numbers = new int[NUM_OF_VALUES]; + final int minValue = numOfBytes == 1 ? 0 : 1 << ((numOfBytes - 1) * 7); + final int maxValue = (1 << (numOfBytes * 7)) - 1; + for (int i = 0; i < NUM_OF_VALUES; i++) { + this.numbers[i] = random.nextInt(minValue, maxValue); + } + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void pbjProtoTools(Blackhole blackhole) { + for (int i = 0; i < NUM_OF_VALUES; i++) blackhole.consume(ProtoWriterTools.sizeOfUnsignedVarInt32(numbers[i])); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void kafka(Blackhole blackhole) { + for (int i = 0; i < NUM_OF_VALUES; i++) blackhole.consume(kafkaSizeOfUnsignedVarint(numbers[i])); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void google(Blackhole blackhole) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) + blackhole.consume(com.google.protobuf.CodedOutputStream.computeInt32SizeNoTag(numbers[i])); + } + + public static int kafkaSizeOfUnsignedVarint(int value) { + // Protocol buffers varint encoding is variable length, with a minimum of 1 byte + // (for zero). The values themselves are not important. What's important here is + // any leading zero bits are dropped from output. We can use this leading zero + // count w/ fast intrinsic to calc the output length directly. + // Test cases verify this matches the output for loop logic exactly. + // return (38 - leadingZeros) / 7 + leadingZeros / 32; + // The above formula provides the implementation, but the Java encoding is suboptimal + // when we have a narrow range of integers, so we can do better manually + int leadingZeros = Integer.numberOfLeadingZeros(value); + int leadingZerosBelow38DividedBy7 = ((38 - leadingZeros) * 0b10010010010010011) >>> 19; + return leadingZerosBelow38DividedBy7 + (leadingZeros >>> 5); + } + + public static void main(String[] args) throws Exception { + Options opt = new OptionsBuilder() + .include(VarInt32SizeOfBench.class.getSimpleName()) + .build(); + + new Runner(opt).run(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarInt64SizeOfBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarInt64SizeOfBench.java new file mode 100644 index 000000000..abf618238 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarInt64SizeOfBench.java @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint; + +import com.hedera.pbj.runtime.ProtoWriterTools; +import java.io.IOException; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.CompilerControl; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +@SuppressWarnings("unused") +@State(Scope.Benchmark) +@Fork(1) +// @Warmup(iterations = 4, time = 2) +// @Measurement(iterations = 5, time = 2) +@Warmup(iterations = 3, time = 2) +@Measurement(iterations = 4, time = 2) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@BenchmarkMode(Mode.AverageTime) +public class VarInt64SizeOfBench { + public static final int NUM_OF_VALUES = 1024; + /** + * Number of bytes to read at a time (1, 2, 4, or 8). So create inputs with 1 byte siz,e, 2 byte size, 4 byte size, + * and 8 byte size. + */ + @Param({"1", "2", "4", "8"}) + public int numOfBytes; + + private long[] numbers; + + @Setup + public void setupNumbers() { + Random random = new Random(9387498731984L); + numbers = new long[NUM_OF_VALUES]; + final long minValue = numOfBytes == 1 ? 0L : 1L << ((numOfBytes - 1) * 7); + final long maxValue = (1L << (numOfBytes * 7)) - 1; + // System.out.println("Generating "+NUM_OF_VALUES+" random numbers between "+min + for (int i = 0; i < NUM_OF_VALUES; i++) { + this.numbers[i] = random.nextLong(minValue, maxValue); + } + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void pbjProtoTools(Blackhole blackhole) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) blackhole.consume(ProtoWriterTools.sizeOfVarInt64(numbers[i])); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void kafka(Blackhole blackhole) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) blackhole.consume(kafkaSizeOfVarlong(numbers[i])); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void google(Blackhole blackhole) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) + blackhole.consume(com.google.protobuf.CodedOutputStream.computeInt64SizeNoTag(numbers[i])); + } + + public static int kafkaSizeOfVarlong(long value) { + long v = (value << 1) ^ (value >> 63); + int leadingZeros = Long.numberOfLeadingZeros(v); + int leadingZerosBelow70DividedBy7 = ((70 - leadingZeros) * 0b10010010010010011) >>> 19; + return leadingZerosBelow70DividedBy7 + (leadingZeros >>> 6); + } + + public static int kafkaSizeOfUnsignedVarint(int value) { + // Protocol buffers varint encoding is variable length, with a minimum of 1 byte + // (for zero). The values themselves are not important. What's important here is + // any leading zero bits are dropped from output. We can use this leading zero + // count w/ fast intrinsic to calc the output length directly. + // Test cases verify this matches the output for loop logic exactly. + // return (38 - leadingZeros) / 7 + leadingZeros / 32; + // The above formula provides the implementation, but the Java encoding is suboptimal + // when we have a narrow range of integers, so we can do better manually + int leadingZeros = Integer.numberOfLeadingZeros(value); + int leadingZerosBelow38DividedBy7 = ((38 - leadingZeros) * 0b10010010010010011) >>> 19; + return leadingZerosBelow38DividedBy7 + (leadingZeros >>> 5); + } + + public static void main(String[] args) throws Exception { + Options opt = new OptionsBuilder() + .include(VarInt64SizeOfBench.class.getSimpleName()) + .build(); + + new Runner(opt).run(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntReaderBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntReaderBench.java new file mode 100644 index 000000000..592ebdc58 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntReaderBench.java @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint; + +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.hedera.pbj.integration.NonSynchronizedByteArrayInputStream; +import com.hedera.pbj.runtime.MalformedProtobufException; +import com.hedera.pbj.runtime.io.UnsafeUtils; +import com.hedera.pbj.runtime.io.buffer.BufferedData; +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.pbj.runtime.io.stream.ReadableStreamingData; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.*; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +@SuppressWarnings("unused") +@State(Scope.Benchmark) +@Fork(1) +@Warmup(iterations = 4, time = 2) +@Measurement(iterations = 5, time = 2) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@BenchmarkMode(Mode.AverageTime) +public class VarIntReaderBench { + private static final int NUM_OF_VALUES = 1201; + + ByteBuffer buffer = ByteBuffer.allocate(256 * 1024); + final ByteBuffer bufferDirect = ByteBuffer.allocateDirect(256 * 1024); + final BufferedData dataBuffer = BufferedData.wrap(buffer); + final BufferedData dataBufferDirect = BufferedData.wrap(bufferDirect); + + Bytes bytes = Bytes.EMPTY; + + InputStream bais = null; + ReadableStreamingData rsd = null; + + InputStream baisNonSync = null; + ReadableStreamingData rsdNonSync = null; + + private final int[] offsets = new int[NUM_OF_VALUES]; + /** + * Number of bytes to read at a time (1, 2, 4, or 8). So create inputs with 1 byte siz,e, 2 byte size, 4 byte size, + * and 8 byte size. + */ + @Param({"1", "2", "4", "8"}) + public int numOfBytes; + + public VarIntReaderBench() { + try { + CodedOutputStream cout = CodedOutputStream.newInstance(buffer); + Random random = new Random(9387498731984L); + int pos = 0; + offsets[pos++] = 0; + for (int i = 0; i < 600; i++) { + cout.writeUInt64NoTag(random.nextLong(0, 128)); + offsets[pos++] = cout.getTotalBytesWritten(); + } + for (int i = 0; i < 150; i++) { + cout.writeUInt64NoTag(random.nextLong(128, 256)); + offsets[pos++] = cout.getTotalBytesWritten(); + } + for (int i = 0; i < 150; i++) { + cout.writeUInt64NoTag(random.nextLong(256, Integer.MAX_VALUE)); + offsets[pos++] = cout.getTotalBytesWritten(); + } + for (int i = 0; i < 150; i++) { + cout.writeUInt64NoTag(random.nextLong(Integer.MIN_VALUE, Integer.MAX_VALUE)); + offsets[pos++] = cout.getTotalBytesWritten(); + } + for (int i = 0; i < 150; i++) { + cout.writeUInt64NoTag(random.nextLong(0, Long.MAX_VALUE)); + offsets[pos++] = cout.getTotalBytesWritten(); + } + cout.flush(); + // copy to direct buffer + buffer.flip(); + bufferDirect.put(buffer); + byte[] bts = new byte[buffer.limit()]; + for (int i = 0; i < buffer.limit(); i++) { + bts[i] = buffer.get(i); + } + bytes = Bytes.wrap(bts); + bais = new ByteArrayInputStream(bts.clone()); + rsd = new ReadableStreamingData(bais); + baisNonSync = new NonSynchronizedByteArrayInputStream(bts.clone()); + rsdNonSync = new ReadableStreamingData(baisNonSync); + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void dataBufferRead(Blackhole blackhole) throws IOException { + dataBuffer.reset(); + for (int i = 0; i < 1200; i++) { + blackhole.consume(dataBuffer.readVarLong(false)); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void dataBufferGet(Blackhole blackhole) throws IOException { + dataBuffer.reset(); + int offset = 0; + for (int i = 0; i < 1200; i++) { + blackhole.consume(dataBuffer.getVarLong(offsets[offset++], false)); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void dataBufferDirectRead(Blackhole blackhole) throws IOException { + dataBufferDirect.reset(); + for (int i = 0; i < 1200; i++) { + blackhole.consume(dataBufferDirect.readVarLong(false)); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void dataBytesGet(Blackhole blackhole) throws IOException { + int offset = 0; + for (int i = 0; i < 1200; i++) { + blackhole.consume(bytes.getVarLong(offsets[offset++], false)); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void dataSyncInputStreamRead(Blackhole blackhole) throws IOException { + bais.reset(); + for (int i = 0; i < 1200; i++) { + blackhole.consume(rsd.readVarLong(false)); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void dataNonSyncInputStreamRead(Blackhole blackhole) throws IOException { + baisNonSync.reset(); + for (int i = 0; i < 1200; i++) { + blackhole.consume(rsdNonSync.readVarLong(false)); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void richardGet(Blackhole blackhole) throws MalformedProtobufException { + int offset = 0; + buffer.clear(); + for (int i = 0; i < 1200; i++) { + blackhole.consume(getVarLongRichard(offsets[offset++], buffer)); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void googleRead(Blackhole blackhole) throws IOException { + buffer.clear(); + final CodedInputStream codedInputStream = CodedInputStream.newInstance(buffer); + for (int i = 0; i < 1200; i++) { + blackhole.consume(codedInputStream.readRawVarint64()); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void googleDirecRead(Blackhole blackhole) throws IOException { + bufferDirect.clear(); + final CodedInputStream codedInputStream = CodedInputStream.newInstance(bufferDirect); + for (int i = 0; i < 1200; i++) { + blackhole.consume(codedInputStream.readRawVarint64()); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void googleSlowPathRead(Blackhole blackhole) throws MalformedProtobufException { + buffer.clear(); + for (int i = 0; i < 1200; i++) { + blackhole.consume(readRawVarint64SlowPath(buffer)); + } + } + + @Benchmark + @OperationsPerInvocation(1200) + public void googleSlowPathDirectRead(Blackhole blackhole) throws MalformedProtobufException { + bufferDirect.clear(); + for (int i = 0; i < 1200; i++) { + blackhole.consume(readRawVarint64SlowPath(bufferDirect)); + } + } + + private static long readRawVarint64SlowPath(ByteBuffer buf) throws MalformedProtobufException { + long result = 0; + for (int shift = 0; shift < 64; shift += 7) { + final byte b = buf.get(); + result |= (long) (b & 0x7F) << shift; + if ((b & 0x80) == 0) { + return result; + } + } + throw new MalformedProtobufException("Malformed varInt"); + } + + private static final int VARINT_CONTINUATION_MASK = 0b1000_0000; + private static final int VARINT_DATA_MASK = 0b0111_1111; + private static final int NUM_BITS_PER_VARINT_BYTE = 7; + + public static long getVarLongRichard(int offset, ByteBuffer buf) throws MalformedProtobufException { + // Protobuf encodes smaller integers with fewer bytes than larger integers. It takes a full byte + // to encode 7 bits of information. So, if all 64 bits of a long are in use (for example, if the + // leading bit is 1, or even all bits are 1) then it will take 10 bytes to transmit what would + // have otherwise been 8 bytes of data! + // + // Thus, at most, reading a varint should involve reading 10 bytes of data. + // + // The leading bit of each byte is a continuation bit. If set, another byte will follow. + // If we read 10 bytes in sequence with a continuation bit set, then we have a malformed + // byte stream. + // The bytes come least to most significant 7 bits. So the first byte we read represents + // the lowest 7 bytes, then the next byte is the next highest 7 bytes, etc. + + // The final value. + long value = 0; + // The amount to shift the bits we read by before AND with the value + int shift = -NUM_BITS_PER_VARINT_BYTE; + + // This method works with heap byte buffers only + final byte[] arr = buf.array(); + final int arrOffset = buf.arrayOffset() + offset; + + int i = 0; + for (; i < 10; i++) { + // Use UnsafeUtil instead of arr[arrOffset + i] to avoid array range checks + byte b = UnsafeUtils.getArrayByteNoChecks(arr, arrOffset + i); + value |= (long) (b & 0x7F) << (shift += NUM_BITS_PER_VARINT_BYTE); + + if (b >= 0) { + return value; + } + } + // If we read 10 in a row all with the leading continuation bit set, then throw a malformed + // protobuf exception + throw new MalformedProtobufException("Malformed var int"); + } + + public static void main(String[] args) throws Exception { + Options opt = new OptionsBuilder() + .include(VarIntReaderBench.class.getSimpleName()) + .build(); + + new Runner(opt).run(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntWriterBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntWriterBench.java new file mode 100644 index 000000000..fac11c3ce --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntWriterBench.java @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint; + +import com.hedera.pbj.integration.jmh.varint.writers.GoogleCodedByteArray; +import com.hedera.pbj.integration.jmh.varint.writers.GoogleCodedByteBufferDirect; +import com.hedera.pbj.integration.jmh.varint.writers.GoogleCodedOutputStream; +import com.hedera.pbj.integration.jmh.varint.writers.KafkaByteBuffer; +import com.hedera.pbj.integration.jmh.varint.writers.PbjBufferedData; +import com.hedera.pbj.integration.jmh.varint.writers.PbjBufferedDataDirect; +import com.hedera.pbj.integration.jmh.varint.writers.PbjWritableStreamingData; +import com.hedera.pbj.integration.jmh.varint.writers.RichardStartinByteArray; +import java.io.IOException; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.CompilerControl; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +@SuppressWarnings("unused") +@State(Scope.Benchmark) +@Fork(1) +@Warmup(iterations = 4, time = 2) +@Measurement(iterations = 5, time = 2) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@BenchmarkMode(Mode.AverageTime) +public class VarIntWriterBench { + public static final int NUM_OF_VALUES = 4096 * 4; + /** + * Number of bytes to read at a time (1, 2, 4, or 8). So create inputs with 1 byte siz,e, 2 byte size, 4 byte size, + * and 8 byte size. + */ + // @Param({"1", "2", "3", "4", "8"}) + @Param({"4"}) + public int numOfBytes; + + private long[] numbers; + + @Setup + public void setupNumbers() { + Random random = new Random(9387498731984L); + numbers = new long[NUM_OF_VALUES]; + final long minValue = numOfBytes == 1 ? 0L : 1L << ((numOfBytes - 1) * 7); + final long maxValue = (1L << (numOfBytes * 7)) - 1; + // System.out.println("Generating "+NUM_OF_VALUES+" random numbers between "+min + for (int i = 0; i < NUM_OF_VALUES; i++) { + this.numbers[i] = random.nextLong(minValue, maxValue); + } + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void googleCodedOutputStream(GoogleCodedOutputStream state) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void googleCodedByteArray(GoogleCodedByteArray state) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void googleCodedByteBufferDirect(GoogleCodedByteBufferDirect state) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void pbjWritableStreamingData(PbjWritableStreamingData state) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void pbjBufferedData(PbjBufferedData state) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void pbjBufferedDataDirect(PbjBufferedDataDirect state) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void richardStartinByteArray(RichardStartinByteArray state) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void kafkaByteBuffer(KafkaByteBuffer state) throws IOException { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + public static void main(String[] args) throws Exception { + Options opt = new OptionsBuilder() + .include(VarIntWriterBench.class.getSimpleName()) + .build(); + + new Runner(opt).run(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedByteArray.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedByteArray.java new file mode 100644 index 000000000..a36cbc326 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedByteArray.java @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.google.protobuf.CodedOutputStream; +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import java.io.IOException; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +@State(Scope.Benchmark) +public class GoogleCodedByteArray { + private byte[] byteArray; + private CodedOutputStream output; + + @Setup(Level.Trial) + public void setup() { + byteArray = new byte[8 * VarIntWriterBench.NUM_OF_VALUES]; + output = CodedOutputStream.newInstance(byteArray); + } + + public void writeVarint(long value) throws IOException { + output.writeUInt64NoTag(value); + } + + public void endLoop() { + output = CodedOutputStream.newInstance(byteArray); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedByteBufferDirect.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedByteBufferDirect.java new file mode 100644 index 000000000..97d639f23 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedByteBufferDirect.java @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.google.protobuf.CodedOutputStream; +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import java.io.IOException; +import java.nio.ByteBuffer; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +@State(Scope.Benchmark) +public class GoogleCodedByteBufferDirect { + private ByteBuffer byteBuffer; + private CodedOutputStream output; + + @Setup(Level.Trial) + public void setup() { + byteBuffer = ByteBuffer.allocateDirect(8 * VarIntWriterBench.NUM_OF_VALUES); + output = CodedOutputStream.newInstance(byteBuffer); + } + + public void writeVarint(long value) throws IOException { + output.writeUInt64NoTag(value); + } + + public void endLoop() { + byteBuffer.clear(); + output = CodedOutputStream.newInstance(byteBuffer); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedOutputStream.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedOutputStream.java new file mode 100644 index 000000000..561a6ff4d --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedOutputStream.java @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.google.protobuf.CodedOutputStream; +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +@State(Scope.Benchmark) +public class GoogleCodedOutputStream { + private ByteArrayOutputStream byteArrayOutputStream; + private CodedOutputStream output; + + @Setup(Level.Trial) + public void setup() { + byteArrayOutputStream = new ByteArrayOutputStream(8 * VarIntWriterBench.NUM_OF_VALUES); + output = CodedOutputStream.newInstance(byteArrayOutputStream); + } + + public void writeVarint(long value) throws IOException { + output.writeUInt64NoTag(value); + } + + public void endLoop() { + byteArrayOutputStream.reset(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/KafkaByteBuffer.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/KafkaByteBuffer.java new file mode 100644 index 000000000..fec433f9e --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/KafkaByteBuffer.java @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import java.nio.ByteBuffer; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +/** + * A varint writer based on the code from Kafka project + * ByteUtils.java + */ +@State(Scope.Benchmark) +public class KafkaByteBuffer { + private static final int[] VAR_INT_LENGTHS = new int[65]; + + static { + for (int i = 0; i <= 64; ++i) { + VAR_INT_LENGTHS[i] = ((63 - i) / 7); + } + } + + private ByteBuffer buffer; + + @Setup(Level.Trial) + public void setup() { + buffer = ByteBuffer.allocate(8 * VarIntWriterBench.NUM_OF_VALUES); + } + + @SuppressWarnings("fallthrough") + public void writeVarint(long v) { + while ((v & 0xffffffffffffff80L) != 0L) { + byte b = (byte) ((v & 0x7f) | 0x80); + buffer.put(b); + v >>>= 7; + } + buffer.put((byte) v); + } + + public void endLoop() { + buffer.clear(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjBufferedData.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjBufferedData.java new file mode 100644 index 000000000..509367907 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjBufferedData.java @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import com.hedera.pbj.runtime.io.buffer.BufferedData; +import java.io.IOException; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +@State(Scope.Benchmark) +public class PbjBufferedData { + private BufferedData output; + + @Setup(Level.Trial) + public void setup() { + output = BufferedData.allocate(8 * VarIntWriterBench.NUM_OF_VALUES); + } + + public void writeVarint(long value) throws IOException { + output.writeVarLong(value, false); + } + + public void endLoop() { + output.reset(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjBufferedDataDirect.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjBufferedDataDirect.java new file mode 100644 index 000000000..567cfcbd9 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjBufferedDataDirect.java @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import com.hedera.pbj.runtime.io.buffer.BufferedData; +import java.io.IOException; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +@State(Scope.Benchmark) +public class PbjBufferedDataDirect { + private BufferedData output; + + @Setup(Level.Trial) + public void setup() { + output = BufferedData.allocateOffHeap(8 * VarIntWriterBench.NUM_OF_VALUES); + } + + public void writeVarint(long value) throws IOException { + output.writeVarLong(value, false); + } + + public void endLoop() { + output.reset(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjWritableStreamingData.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjWritableStreamingData.java new file mode 100644 index 000000000..aefca0777 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjWritableStreamingData.java @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import com.hedera.pbj.runtime.io.stream.WritableStreamingData; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +@State(Scope.Benchmark) +public class PbjWritableStreamingData { + private ByteArrayOutputStream byteArrayOutputStream; + private WritableStreamingData output; + + @Setup(Level.Trial) + public void setup() { + byteArrayOutputStream = new ByteArrayOutputStream(8 * VarIntWriterBench.NUM_OF_VALUES); + output = new WritableStreamingData(byteArrayOutputStream); + } + + public void writeVarint(long value) throws IOException { + output.writeVarLong(value, false); + } + + public void endLoop() { + byteArrayOutputStream.reset(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/RichardStartinByteArray.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/RichardStartinByteArray.java new file mode 100644 index 000000000..90423f9bf --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/RichardStartinByteArray.java @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import java.io.IOException; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +/** + * A varint writer based on the code from Richard Startin's post + * Precompute varint lengths, 64 bit values + */ +@State(Scope.Benchmark) +public class RichardStartinByteArray { + private static final int[] VAR_INT_LENGTHS = new int[65]; + + static { + for (int i = 0; i <= 64; ++i) { + VAR_INT_LENGTHS[i] = ((63 - i) / 7); + } + } + + private byte[] buffer; + private int position = 0; + + @Setup(Level.Trial) + public void setup() { + buffer = new byte[8 * VarIntWriterBench.NUM_OF_VALUES]; + } + + @SuppressWarnings("fallthrough") + public void writeVarint(long value) throws IOException { + int length = VAR_INT_LENGTHS[Long.numberOfLeadingZeros(value)]; + buffer[position + length] = (byte) (value >>> (length * 7)); + switch (length - 1) { + case 8: + buffer[position + 8] = (byte) ((value >>> 56) | 0x80); + // Deliberate fallthrough + case 7: + buffer[position + 7] = (byte) ((value >>> 49) | 0x80); + // Deliberate fallthrough + case 6: + buffer[position + 6] = (byte) ((value >>> 42) | 0x80); + // Deliberate fallthrough + case 5: + buffer[position + 5] = (byte) ((value >>> 35) | 0x80); + // Deliberate fallthrough + case 4: + buffer[position + 4] = (byte) ((value >>> 28) | 0x80); + // Deliberate fallthrough + case 3: + buffer[position + 3] = (byte) ((value >>> 21) | 0x80); + // Deliberate fallthrough + case 2: + buffer[position + 2] = (byte) ((value >>> 14) | 0x80); + // Deliberate fallthrough + case 1: + buffer[position + 1] = (byte) ((value >>> 7) | 0x80); + // Deliberate fallthrough + case 0: + buffer[position] = (byte) (value | 0x80); + } + } + + public void endLoop() { + position = 0; + } +}