diff --git a/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt index bfeea7a0c4..78a6ca9004 100644 --- a/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt @@ -457,7 +457,7 @@ class IonRawTextWriter_1_1 internal constructor( output.appendAscii(" ") } - override fun stepInTaglessElementList(macroId: Int, macroName: String?) { + override fun stepInTaglessElementList(macroId: Int, macroName: String?, lengthPrefixed: Boolean) { stepInList(usingLengthPrefix = false) // Arg here doesn't actually matter. writeMacroEncodingTag(macroName ?: macroId.toString()) output.appendAscii(" ") @@ -470,7 +470,7 @@ class IonRawTextWriter_1_1 internal constructor( output.appendAscii(" ") } - override fun stepInTaglessElementSExp(macroId: Int, macroName: String?) { + override fun stepInTaglessElementSExp(macroId: Int, macroName: String?, lengthPrefixed: Boolean) { stepInSExp(usingLengthPrefix = false) // Arg here doesn't actually matter. writeMacroEncodingTag(macroName ?: macroId.toString()) output.appendAscii(" ") @@ -482,11 +482,6 @@ class IonRawTextWriter_1_1 internal constructor( currentContainer = EExpression } - override fun writeTaglessInt(implicitOpcode: Int, value: Int) { - // TODO: Consider checking the opcode - writeInt(value.toLong()) - } - override fun writeTaglessInt(implicitOpcode: Int, value: Long) { writeInt(value) } diff --git a/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java b/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java index a52ccd7a35..278d3b3a27 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java +++ b/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java @@ -52,7 +52,7 @@ public T next() { /** * @param initialCapacity the initial capacity of the underlying collection. - * @param elementFactory the factory used to create a new element on {@link #push()} when the queue has + * @param elementFactory the factory used to create a new element on {@link #push(Recycler)} when the queue has * not previously grown to the new depth. */ public _Private_RecyclingQueue(int initialCapacity, ElementFactory elementFactory) { @@ -73,7 +73,7 @@ public T get(int index) { /** * Pushes an element onto the top of the queue, instantiating a new element only if the queue has not * previously grown to the new depth. - * @return the element at the top of the queue after the push. This element must be initialized by the caller. + * @return the index of the element at the top of the queue after the push. This element must be initialized by the caller. */ public int push(Recycler recycler) { currentIndex++; @@ -87,6 +87,23 @@ public int push(Recycler recycler) { return currentIndex; } + /** + * Pushes an element onto the top of the queue, instantiating a new element only if the queue has not + * previously grown to the new depth. + * @return the element at the top of the queue after the push. + */ + public T pushAndGet(Recycler recycler) { + currentIndex++; + if (currentIndex >= elements.size()) { + top = elementFactory.newElement(); + elements.add(top); + } else { + top = elements.get(currentIndex); + } + recycler.recycle(top); + return top; + } + /** * Reclaim the current element. */ @@ -119,4 +136,4 @@ public void clear() { public int size() { return currentIndex + 1; } -} \ No newline at end of file +} diff --git a/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java b/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java deleted file mode 100644 index a543a54b47..0000000000 --- a/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java +++ /dev/null @@ -1,559 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -package com.amazon.ion.impl.bin; - -import com.amazon.ion.Decimal; -import com.amazon.ion.IonType; -import com.amazon.ion.Timestamp; -import com.amazon.ion.impl.bin.utf8.Utf8StringEncoder; -import com.amazon.ion.impl.bin.utf8.Utf8StringEncoderPool; - -import java.math.BigDecimal; -import java.math.BigInteger; - -import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*; -import static java.lang.Double.doubleToRawLongBits; -import static java.lang.Float.floatToIntBits; - -/** - * Provides functions for writing various Ion values to a WriteBuffer. - * - * This class can be subsumed by IonRawBinaryWriter_1_1, when it is created. - */ -public class IonEncoder_1_1 { - - /** - * Writes an Ion Null value to the given WriteBuffer. - * @return the number of bytes written - */ - public static int writeNullValue(WriteBuffer buffer, final IonType ionType) { - if (ionType == IonType.NULL) { - buffer.writeByte(OpCodes.NULL_UNTYPED); - return 1; - } - - buffer.writeByte(OpCodes.NULL_TYPED); - switch (ionType) { - case BOOL: - buffer.writeByte((byte) 0x00); - break; - case INT: - buffer.writeByte((byte) 0x01); - break; - case FLOAT: - buffer.writeByte((byte) 0x02); - break; - case DECIMAL: - buffer.writeByte((byte) 0x03); - break; - case TIMESTAMP: - buffer.writeByte((byte) 0x04); - break; - case STRING: - buffer.writeByte((byte) 0x05); - break; - case SYMBOL: - buffer.writeByte((byte) 0x06); - break; - case BLOB: - buffer.writeByte((byte) 0x07); - break; - case CLOB: - buffer.writeByte((byte) 0x08); - break; - case LIST: - buffer.writeByte((byte) 0x09); - break; - case SEXP: - buffer.writeByte((byte) 0x0A); - break; - case STRUCT: - buffer.writeByte((byte) 0x0B); - break; - case DATAGRAM: - throw new IllegalArgumentException("Cannot write a null datagram"); - } - return 2; - } - - /** - * Writes an Ion Bool value to the given WriteBuffer. - * @return the number of bytes written - */ - public static int writeBoolValue(WriteBuffer buffer, final boolean value) { - if (value) { - buffer.writeByte(OpCodes.BOOLEAN_TRUE); - } else { - buffer.writeByte(OpCodes.BOOLEAN_FALSE); - } - return 1; - } - - /** - * Writes an Ion Integer value to the given WriteBuffer. - * @return the number of bytes written - */ - public static int writeIntValue(WriteBuffer buffer, final long value) { - if (value == 0) { - buffer.writeByte(OpCodes.INTEGER_ZERO_LENGTH); - return 1; - } - int length = WriteBuffer.fixedIntLength(value); - buffer.writeByte((byte) (OpCodes.INTEGER_ZERO_LENGTH + length)); - buffer.writeFixedInt(value); - return 1 + length; - } - - private static final BigInteger BIG_INT_LONG_MAX_VALUE = BigInteger.valueOf(Long.MAX_VALUE); - private static final BigInteger BIG_INT_LONG_MIN_VALUE = BigInteger.valueOf(Long.MIN_VALUE); - - /** - * Writes an Ion Integer value to the given WriteBuffer. - * @return the number of bytes written - */ - public static int writeIntValue(WriteBuffer buffer, final BigInteger value) { - if (value == null) { - return writeNullValue(buffer, IonType.INT); - } - if (value.compareTo(BIG_INT_LONG_MIN_VALUE) >= 0 && value.compareTo(BIG_INT_LONG_MAX_VALUE) <= 0) { - return writeIntValue(buffer, value.longValue()); - } - buffer.writeByte(OpCodes.VARIABLE_LENGTH_INTEGER); - byte[] intBytes = value.toByteArray(); - int totalBytes = 1 + intBytes.length + buffer.writeFlexUInt(intBytes.length); - for (int i = intBytes.length; i > 0; i--) { - buffer.writeByte(intBytes[i-1]); - } - return totalBytes; - } - - /** - * Writes a float to the given WriteBuffer using the Ion 1.1 encoding for Ion Floats. - * @return the number of bytes written - */ - public static int writeFloat(WriteBuffer buffer, final float value) { - // TODO: Optimization to write a 16 bit float for non-finite and possibly other values - if (value == 0.0) { - buffer.writeByte(OpCodes.FLOAT_ZERO_LENGTH); - return 1; - } else { - buffer.writeByte(OpCodes.FLOAT_32); - buffer.writeUInt32(floatToIntBits(value)); - return 5; - } - } - - /** - * Writes a double to the given WriteBuffer using the Ion 1.1 encoding for Ion Floats. - * @return the number of bytes written - */ - public static int writeFloat(WriteBuffer buffer, final double value) { - // TODO: Optimization to write a 16 bit float for non-finite and possibly other values - if (value == 0.0) { - buffer.writeByte(OpCodes.FLOAT_ZERO_LENGTH); - return 1; - } else if (!Double.isFinite(value) || value == (float) value) { - buffer.writeByte(OpCodes.FLOAT_32); - buffer.writeUInt32(floatToIntBits((float) value)); - return 5; - } else { - buffer.writeByte(OpCodes.FLOAT_64); - buffer.writeUInt64(doubleToRawLongBits(value)); - return 9; - } - } - - public static int writeDecimalValue(WriteBuffer buffer, final BigDecimal value) { - if (value == null) { - return writeNullValue(buffer, IonType.DECIMAL); - } - - int exponent = -value.scale(); - - if (BigDecimal.ZERO.compareTo(value) == 0 && !Decimal.isNegativeZero(value)) { - if (exponent == 0) { - buffer.writeByte(OpCodes.DECIMAL_ZERO_LENGTH); - return 1; - } else { - // A decimal with a coefficient of +0 is encoded using opcode 6F. - // The opcode is followed by a FlexInt representing the exponent. - buffer.writeByte(OpCodes.POSITIVE_ZERO_DECIMAL); - return 1 + buffer.writeFlexInt(exponent); - } - } - - BigInteger coefficient = value.unscaledValue(); - int numCoefficientBytes = WriteBuffer.flexIntLength(coefficient); - - int numExponentBytes = 0; - if (exponent != 0) { - numExponentBytes = WriteBuffer.fixedIntLength(exponent); - } - - int opCodeAndLengthBytes = 1; - if (numExponentBytes + numCoefficientBytes < 15) { - int opCode = OpCodes.DECIMAL_ZERO_LENGTH + numExponentBytes + numCoefficientBytes; - buffer.writeByte((byte) opCode); - } else { - // Decimal values that require more than 14 bytes can be encoded using the variable-length decimal opcode: 0xF6. - buffer.writeByte(OpCodes.VARIABLE_LENGTH_DECIMAL); - opCodeAndLengthBytes += buffer.writeFlexUInt(numExponentBytes + numCoefficientBytes); - } - buffer.writeFlexInt(coefficient); - if (exponent != 0) { - buffer.writeFixedInt(exponent); - } - - return opCodeAndLengthBytes + numCoefficientBytes + numExponentBytes; - } - - /** - * Writes a Timestamp to the given WriteBuffer using the Ion 1.1 encoding for Ion Timestamps. - * @return the number of bytes written - */ - public static int writeTimestampValue(WriteBuffer buffer, Timestamp value) { - if (value == null) { - return writeNullValue(buffer, IonType.TIMESTAMP); - } - // Timestamps may be encoded using the short form if they meet certain conditions. - // Condition 1: The year is between 1970 and 2097. - if (value.getYear() < 1970 || value.getYear() > 2097) { - return writeLongFormTimestampValue(buffer, value); - } - - // If the precision is year, month, or day, we can skip the remaining checks. - if (!value.getPrecision().includes(Timestamp.Precision.MINUTE)) { - return writeShortFormTimestampValue(buffer, value); - } - - // Condition 2: The fractional seconds are a common precision. - if (value.getZFractionalSecond() != null) { - int secondsScale = value.getZFractionalSecond().scale(); - if (secondsScale != 0 && secondsScale != 3 && secondsScale != 6 && secondsScale != 9) { - return writeLongFormTimestampValue(buffer, value); - } - } - // Condition 3: The local offset is either UTC, unknown, or falls between -14:00 to +14:00 and is divisible by 15 minutes. - Integer offset = value.getLocalOffset(); - if (offset != null && (offset < -14 * 60 || offset > 14 * 60 || offset % 15 != 0)) { - return writeLongFormTimestampValue(buffer, value); - } - return writeShortFormTimestampValue(buffer, value); - } - - /** - * Writes a short-form timestamp. - * Value cannot be null. - * If calling from outside this class, use writeTimestampValue instead. - */ - private static int writeShortFormTimestampValue(WriteBuffer buffer, Timestamp value) { - long bits = (value.getYear() - 1970L); - if (value.getPrecision() == Timestamp.Precision.YEAR) { - buffer.writeByte(OpCodes.TIMESTAMP_YEAR_PRECISION); - buffer.writeFixedIntOrUInt(bits, 1); - return 2; - } - - bits |= ((long) value.getMonth()) << S_TIMESTAMP_MONTH_BIT_OFFSET; - if (value.getPrecision() == Timestamp.Precision.MONTH) { - buffer.writeByte(OpCodes.TIMESTAMP_MONTH_PRECISION); - buffer.writeFixedIntOrUInt(bits, 2); - return 3; - } - - bits |= ((long) value.getDay()) << S_TIMESTAMP_DAY_BIT_OFFSET; - if (value.getPrecision() == Timestamp.Precision.DAY) { - buffer.writeByte(OpCodes.TIMESTAMP_DAY_PRECISION); - buffer.writeFixedIntOrUInt(bits, 2); - return 3; - } - - bits |= ((long) value.getHour()) << S_TIMESTAMP_HOUR_BIT_OFFSET; - bits |= ((long) value.getMinute()) << S_TIMESTAMP_MINUTE_BIT_OFFSET; - if (value.getLocalOffset() == null || value.getLocalOffset() == 0) { - if (value.getLocalOffset() != null) { - bits |= S_U_TIMESTAMP_UTC_FLAG; - } - - if (value.getPrecision() == Timestamp.Precision.MINUTE) { - buffer.writeByte(OpCodes.TIMESTAMP_MINUTE_PRECISION); - buffer.writeFixedIntOrUInt(bits, 4); - return 5; - } - - bits |= ((long) value.getSecond()) << S_U_TIMESTAMP_SECOND_BIT_OFFSET; - - int secondsScale = 0; - if (value.getZFractionalSecond() != null) { - secondsScale = value.getZFractionalSecond().scale(); - } - if (secondsScale != 0) { - long fractionalSeconds = value.getZFractionalSecond().unscaledValue().longValue(); - bits |= fractionalSeconds << S_U_TIMESTAMP_FRACTION_BIT_OFFSET; - } - switch (secondsScale) { - case 0: - buffer.writeByte(OpCodes.TIMESTAMP_SECOND_PRECISION); - buffer.writeFixedIntOrUInt(bits, 5); - return 6; - case 3: - buffer.writeByte(OpCodes.TIMESTAMP_MILLIS_PRECISION); - buffer.writeFixedIntOrUInt(bits, 6); - return 7; - case 6: - buffer.writeByte(OpCodes.TIMESTAMP_MICROS_PRECISION); - buffer.writeFixedIntOrUInt(bits, 7); - return 8; - case 9: - buffer.writeByte(OpCodes.TIMESTAMP_NANOS_PRECISION); - buffer.writeFixedIntOrUInt(bits, 8); - return 9; - default: - throw new IllegalStateException("This is unreachable!"); - } - } else { - long localOffset = (value.getLocalOffset().longValue() / 15) + (14 * 4); - bits |= (localOffset & LEAST_SIGNIFICANT_7_BITS) << S_O_TIMESTAMP_OFFSET_BIT_OFFSET; - - if (value.getPrecision() == Timestamp.Precision.MINUTE) { - buffer.writeByte(OpCodes.TIMESTAMP_MINUTE_PRECISION_WITH_OFFSET); - buffer.writeFixedIntOrUInt(bits, 5); - return 6; - } - - bits |= ((long) value.getSecond()) << S_O_TIMESTAMP_SECOND_BIT_OFFSET; - - // The fractional seconds bits will be put into a separate long because we need nine bytes total - // if there are nanoseconds (which is too much for one long) and the boundary between the seconds - // and fractional seconds subfields conveniently aligns with a byte boundary. - long fractionBits = 0; - int secondsScale = 0; - if (value.getZFractionalSecond() != null) { - secondsScale = value.getZFractionalSecond().scale(); - } - if (secondsScale != 0) { - fractionBits = value.getZFractionalSecond().unscaledValue().longValue(); - } - switch (secondsScale) { - case 0: - buffer.writeByte(OpCodes.TIMESTAMP_SECOND_PRECISION_WITH_OFFSET); - buffer.writeFixedIntOrUInt(bits, 5); - return 6; - case 3: - buffer.writeByte(OpCodes.TIMESTAMP_MILLIS_PRECISION_WITH_OFFSET); - buffer.writeFixedIntOrUInt(bits, 5); - buffer.writeFixedIntOrUInt(fractionBits, 2); - return 8; - case 6: - buffer.writeByte(OpCodes.TIMESTAMP_MICROS_PRECISION_WITH_OFFSET); - buffer.writeFixedIntOrUInt(bits, 5); - buffer.writeFixedIntOrUInt(fractionBits, 3); - return 9; - case 9: - buffer.writeByte(OpCodes.TIMESTAMP_NANOS_PRECISION_WITH_OFFSET); - buffer.writeFixedIntOrUInt(bits, 5); - buffer.writeFixedIntOrUInt(fractionBits, 4); - return 10; - default: - throw new IllegalStateException("This is unreachable!"); - } - } - } - - /** - * Writes a long-form timestamp. - * Value may not be null. - * Only visible for testing. If calling from outside this class, use writeTimestampValue instead. - */ - static int writeLongFormTimestampValue(WriteBuffer buffer, Timestamp value) { - buffer.writeByte(OpCodes.VARIABLE_LENGTH_TIMESTAMP); - - long bits = value.getYear(); - if (value.getPrecision() == Timestamp.Precision.YEAR) { - buffer.writeFlexUInt(2); - buffer.writeFixedIntOrUInt(bits, 2); - return 4; // OpCode + FlexUInt + 2 bytes data - } - - bits |= ((long) value.getMonth()) << L_TIMESTAMP_MONTH_BIT_OFFSET; - if (value.getPrecision() == Timestamp.Precision.MONTH) { - buffer.writeFlexUInt(3); - buffer.writeFixedIntOrUInt(bits, 3); - return 5; // OpCode + FlexUInt + 3 bytes data - } - - bits |= ((long) value.getDay()) << L_TIMESTAMP_DAY_BIT_OFFSET; - if (value.getPrecision() == Timestamp.Precision.DAY) { - buffer.writeFlexUInt(3); - buffer.writeFixedIntOrUInt(bits, 3); - return 5; // OpCode + FlexUInt + 3 bytes data - } - - bits |= ((long) value.getHour()) << L_TIMESTAMP_HOUR_BIT_OFFSET; - bits |= ((long) value.getMinute()) << L_TIMESTAMP_MINUTE_BIT_OFFSET; - long localOffsetValue = L_TIMESTAMP_UNKNOWN_OFFSET_VALUE; - if (value.getLocalOffset() != null) { - localOffsetValue = value.getLocalOffset() + (24 * 60); - } - bits |= localOffsetValue << L_TIMESTAMP_OFFSET_BIT_OFFSET; - - if (value.getPrecision() == Timestamp.Precision.MINUTE) { - buffer.writeFlexUInt(6); - buffer.writeFixedIntOrUInt(bits, 6); - return 8; // OpCode + FlexUInt + 6 bytes data - } - - - bits |= ((long) value.getSecond()) << L_TIMESTAMP_SECOND_BIT_OFFSET; - int secondsScale = 0; - if (value.getZFractionalSecond() != null) { - secondsScale = value.getZFractionalSecond().scale(); - } - if (secondsScale == 0) { - buffer.writeFlexUInt(7); - buffer.writeFixedIntOrUInt(bits, 7); - return 9; // OpCode + FlexUInt + 7 bytes data - } - - BigDecimal fractionalSeconds = value.getZFractionalSecond(); - BigInteger coefficient = fractionalSeconds.unscaledValue(); - long exponent = fractionalSeconds.scale(); - int numCoefficientBytes = WriteBuffer.flexUIntLength(coefficient); - int numExponentBytes = WriteBuffer.fixedUIntLength(exponent); - // Years-seconds data (7 bytes) + fraction coefficient + fraction exponent - int dataLength = 7 + numCoefficientBytes + numExponentBytes; - - buffer.writeFlexUInt(dataLength); - buffer.writeFixedIntOrUInt(bits, 7); - buffer.writeFlexUInt(coefficient); - buffer.writeFixedUInt(exponent); - - // OpCode + FlexUInt length + dataLength - return 1 + WriteBuffer.flexUIntLength(dataLength) + dataLength; - } - - /** - * Writes a String to the given WriteBuffer using the Ion 1.1 encoding for Ion Strings. - * @return the number of bytes written - */ - public static int writeStringValue(WriteBuffer buffer, String value) { - return writeInlineText(buffer, value, IonType.STRING, OpCodes.STRING_ZERO_LENGTH, OpCodes.VARIABLE_LENGTH_STRING); - } - - /** - * Writes an inline Symbol to the given WriteBuffer using the Ion 1.1 encoding for Ion Symbols. - * @return the number of bytes written - */ - public static int writeSymbolValue(WriteBuffer buffer, String value) { - return writeInlineText(buffer, value, IonType.SYMBOL, OpCodes.INLINE_SYMBOL_ZERO_LENGTH, OpCodes.VARIABLE_LENGTH_INLINE_SYMBOL); - } - - private static int writeInlineText(WriteBuffer buffer, String value, IonType type, byte zeroLengthOpCode, byte variableLengthOpCode) { - if (value == null) { - return writeNullValue(buffer, type); - } - - // TODO: When merging into the Ion 1.1 raw writer, keep a single instance of the Utf8StringEncoder - // instead of fetching one on every call. - Utf8StringEncoder.Result encoderResult = Utf8StringEncoderPool.getInstance().getOrCreate().encode(value); - - byte[] utf8Buffer = encoderResult.getBuffer(); - int numValueBytes = encoderResult.getEncodedLength(); - int numLengthBytes = 0; - - if (numValueBytes <= 0xF) { - buffer.writeByte((byte)(zeroLengthOpCode | numValueBytes)); - } else { - buffer.writeByte(variableLengthOpCode); - numLengthBytes = buffer.writeFlexUInt(numValueBytes); - } - buffer.writeBytes(utf8Buffer, 0, numValueBytes); - return 1 + numLengthBytes + numValueBytes; - } - - /** - * Writes an interned Symbol's address to the given WriteBuffer using the Ion 1.1 encoding for Ion Symbols. - * @return the number of bytes written - * - * TODO: Do we need to support Symbol Addresses greater than Long.MAX_VALUE? - */ - public static int writeSymbolValue(WriteBuffer buffer, long value) { - if (value < 0) { - throw new IllegalArgumentException("Symbol Address cannot be negative; was: " + value); - } else if (value < FIRST_2_BYTE_SYMBOL_ADDRESS) { - buffer.writeByte(OpCodes.SYMBOL_ADDRESS_1_BYTE); - buffer.writeFixedUInt(value); - return 2; - } else if (value < FIRST_MANY_BYTE_SYMBOL_ADDRESS) { - buffer.writeByte(OpCodes.SYMBOL_ADDRESS_2_BYTES); - buffer.writeFixedIntOrUInt(value - FIRST_2_BYTE_SYMBOL_ADDRESS, 2); - return 3; - } else { - buffer.writeByte(OpCodes.SYMBOL_ADDRESS_MANY_BYTES); - int addressBytes = buffer.writeFlexUInt(value - FIRST_MANY_BYTE_SYMBOL_ADDRESS); - return 1 + addressBytes; - } - } - - /** - * Writes a Blob to the given WriteBuffer using the Ion 1.1 encoding for Ion Blobs. - * @return the number of bytes written - */ - public static int writeBlobValue(WriteBuffer buffer, byte[] value) { - if (value == null) { - return writeNullValue(buffer, IonType.BLOB); - } - - buffer.writeByte(OpCodes.VARIABLE_LENGTH_BLOB); - int numLengthBytes = buffer.writeFlexUInt(value.length); - buffer.writeBytes(value); - return 1 + numLengthBytes + value.length; - } - - /** - * Writes a Clob to the given WriteBuffer using the Ion 1.1 encoding for Ion Clobs. - * @return the number of bytes written - */ - public static int writeClobValue(WriteBuffer buffer, byte[] value) { - if (value == null) { - return writeNullValue(buffer, IonType.CLOB); - } - - buffer.writeByte(OpCodes.VARIABLE_LENGTH_CLOB); - int numLengthBytes = buffer.writeFlexUInt(value.length); - buffer.writeBytes(value); - return 1 + numLengthBytes + value.length; - } - - // TODO: Implement FlexSym Annotations - - /** - * Writes annotations using the given symbol addresses. - */ - public static int writeAnnotations(WriteBuffer buffer, long[] annotations) { - if (annotations == null || annotations.length == 0) { - return 0; - } - if (annotations.length == 1) { - buffer.writeByte(OpCodes.ANNOTATIONS_1_SYMBOL_ADDRESS); - int numAddressBytes = buffer.writeFlexUInt(annotations[0]); - return 1 + numAddressBytes; - } else if (annotations.length == 2) { - buffer.writeByte(OpCodes.ANNOTATIONS_2_SYMBOL_ADDRESS); - int numAddressBytes = buffer.writeFlexUInt(annotations[0]); - numAddressBytes += buffer.writeFlexUInt(annotations[1]); - return 1 + numAddressBytes; - } else { - int numAddressBytes = 0; - for (long ann : annotations) { - numAddressBytes += WriteBuffer.flexUIntLength(ann); - } - buffer.writeByte(OpCodes.ANNOTATIONS_MANY_SYMBOL_ADDRESS); - int numLengthBytes = buffer.writeFlexUInt(numAddressBytes); - for (long ann : annotations) { - buffer.writeFlexUInt(ann); - } - return 1 + numLengthBytes + numAddressBytes; - } - } -} diff --git a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java index e2073126c0..e42110079d 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java +++ b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java @@ -278,39 +278,6 @@ public String toString() } } - private static class PatchPoint - { - /** position of the data being patched out. */ - public long oldPosition; - /** length of the data being patched out.*/ - public int oldLength; - /** size of the container data or annotations.*/ - public long length; - public PatchPoint() - { - oldPosition = -1; - oldLength = -1; - length = -1; - } - - @Override - public String toString() - { - return "(PP old::(" + oldPosition + " " + oldLength + ") patch::(" + length + ")"; - } - - public PatchPoint initialize(final long oldPosition, final int oldLength, final long length) { - this.oldPosition = oldPosition; - this.oldLength = oldLength; - this.length = length; - return this; - } - - public PatchPoint clear() { - return initialize(-1, -1, -1); - } - } - /*package*/ enum StreamCloseMode { NO_CLOSE, diff --git a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt new file mode 100644 index 0000000000..29713f0733 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt @@ -0,0 +1,909 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.Decimal +import com.amazon.ion.IonException +import com.amazon.ion.IonType +import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.bin11.OpCode +import com.amazon.ion.impl._Private_RecyclingQueue +import com.amazon.ion.impl._Private_RecyclingStack +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.DELIMITED_LIST +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.DELIMITED_SEXP +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.DELIMITED_STRUCT_FS +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.DELIMITED_STRUCT_SID +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.DELIMITED_STRUCT_SID_TO_FS +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.DIRECTIVE +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.EEXP +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.PREFIXED_EEXP +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.PREFIXED_LIST +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.PREFIXED_SEXP +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.PREFIXED_STRUCT_FS +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.PREFIXED_STRUCT_SID +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.PREFIXED_STRUCT_SID_TO_FS +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.PREFIXED_TAGLESS_EEXP +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.TAGLESS_EEXP +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.TE_LIST +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.TE_LIST_W_LENGTH_PREFIXED_MACRO +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.TE_SEXP +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.TE_SEXP_W_LENGTH_PREFIXED_MACRO +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.TOP +import com.amazon.ion.impl.bin.WriteBuffer.fixedIntLength +import com.amazon.ion.impl.bin.WriteBuffer.flexIntLength +import com.amazon.ion.impl.bin.WriteBuffer.flexUIntLength +import com.amazon.ion.impl.bin.utf8.Utf8StringEncoderPool +import com.amazon.ion.ion_1_1.IonRawWriter_1_1 +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import java.io.OutputStream +import java.lang.Double.doubleToRawLongBits +import java.lang.Float.floatToIntBits +import java.math.BigDecimal +import java.math.BigInteger +import java.util.function.Consumer + +class IonRawBinaryWriter_1_1 internal constructor( + @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "We're intentionally storing a reference to a mutable object because we need to write to it.") + private val out: OutputStream, + @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "We're intentionally storing a reference to a mutable object because we need to write to it.") + private val buffer: WriteBuffer, + private val lengthPrefixPreallocation: Int, +) : IonRawWriter_1_1 { + + private inline fun confirm(condition: Boolean, lazyMessage: () -> String) { + if (!condition) { + throw IonException(lazyMessage()) + } + } + + private fun WriteBuffer.writeByte(byte: Int): Int { + writeByte(byte.toByte()) + return 1 + } + + /** + * Types of encoding containers. + */ + private object ContainerType { + const val TOP = -1 + const val DIRECTIVE = 0 + const val EEXP = 1 + const val PREFIXED_EEXP = 2 + const val TAGLESS_EEXP = 3 + const val PREFIXED_TAGLESS_EEXP = 4 + + // NOTE: All data model containers are deliberately grouped together to make it easier to check for zero-length containers. + + const val TE_LIST = 5 + const val TE_SEXP = 6 + const val TE_LIST_W_LENGTH_PREFIXED_MACRO = 7 + const val TE_SEXP_W_LENGTH_PREFIXED_MACRO = 8 + const val PREFIXED_LIST = 9 + const val PREFIXED_SEXP = 10 + const val DELIMITED_LIST = 11 + const val DELIMITED_SEXP = 12 + + // NOTE: All struct encodings are deliberately at the end so that we can check if it's a struct by just seeing if + // currentContainer.type >= DELIMITED_STRUCT_SID + + const val DELIMITED_STRUCT_SID = 13 + /** Represents a struct that started out in SID mode and has switched to FlexSym */ + const val DELIMITED_STRUCT_SID_TO_FS = 14 + const val DELIMITED_STRUCT_FS = 15 + + const val PREFIXED_STRUCT_SID = 16 + /** Represents a struct that started out in SID mode and has switched to FlexSym */ + const val PREFIXED_STRUCT_SID_TO_FS = 17 + const val PREFIXED_STRUCT_FS = 18 + } + + private class ContainerInfo( + /** The type of container, represented by one of the constants in [ContainerType]. */ + @JvmField var type: Int = -1, + /** The position, in the output, of the _opcode_ of this container. */ + @JvmField var position: Long = -1, + /** Where the length prefix should be written, relative to the start of this container. */ + @JvmField var metadataOffset: Int = 1, + /** The number of bytes for everything following the length-prefix (if applicable) in this container. */ + @JvmField var length: Long = 0, + // TODO: Test if performance is better with an Object Reference or an index into the PatchPoint queue. + @JvmField var patchPoint: PatchPoint? = null, + /** + * The number of elements in the expression group or arguments to the macro. + * This is updated when _finishing_ writing a value or expression group. + */ + @JvmField var numChildren: Int = 0, + ) { + /** + * Clears this [ContainerInfo] of old data and initializes it with the given new data. + */ + fun reset(type: Int, position: Long, metadataOffset: Int = 1) { + this.type = type + this.position = position + this.metadataOffset = metadataOffset + length = 0 + patchPoint = null + numChildren = 0 + } + } + + companion object { + /** + * Create a new instance for the given OutputStream with the given block size and length preallocation. + */ + @JvmStatic + fun from(out: OutputStream, blockSize: Int, preallocation: Int): IonRawBinaryWriter_1_1 { + return IonRawBinaryWriter_1_1(out, WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(blockSize)) {}, preallocation) + } + + private val IVM_BYTES = byteArrayOf(0xE0.toByte(), 1, 1, 0xEA.toByte()) + } + + private val utf8StringEncoder = Utf8StringEncoderPool.getInstance().getOrCreate() + + private var closed = false + + private val patchPoints = _Private_RecyclingQueue(512) { PatchPoint() } + private val containerStack = _Private_RecyclingStack(8) { ContainerInfo() } + + private var currentContainer: ContainerInfo = containerStack.push { it.reset(-1, 0L) } + + override fun flush() { + if (closed) return + confirm(depth() == 0) { "Cannot call finish() while in a container" } + + if (patchPoints.isEmpty) { + // nothing to patch--write 'em out! + buffer.writeTo(out) + } else { + var bufferPosition: Long = 0 + + // Patch length values are long, so they always fit in 10 bytes or fewer. + val flexUIntScratch = ByteArray(10) + + val iterator = patchPoints.iterate() + + while (iterator.hasNext()) { + val patch = iterator.next() + if (patch.length < 0) { + continue + } + // write up to the thing to be patched + val bufferLength = patch.oldPosition - bufferPosition + buffer.writeTo(out, bufferPosition, bufferLength) + + // write out the patch + // TODO: See if there's a measurable performance benefit if we write directly to the output stream vs using the flexUIntScratch + val numBytes = flexUIntLength(patch.length) + PrimitiveEncoder.writeFlexIntOrUIntInto(flexUIntScratch, 0, patch.length, numBytes) + out.write(flexUIntScratch, 0, numBytes) + + // skip over the pre-allocated field + bufferPosition = patch.oldPosition + bufferPosition += patch.oldLength.toLong() + } + buffer.writeTo(out, bufferPosition, buffer.position() - bufferPosition) + } + + buffer.reset() + patchPoints.clear() + + // TODO: Stream flush mode + } + + override fun close() { + if (closed) return + flush() + buffer.close() + closed = true + } + + override fun depth(): Int = containerStack.size() - 1 // "Top" doesn't count when counting depth. + + override fun isInStruct(): Boolean = currentContainer.type >= DELIMITED_STRUCT_SID + + override fun writeIVM() { + confirm(currentContainer.type == TOP) { "IVM can only be written at the top level of an Ion stream." } + buffer.writeBytes(IVM_BYTES) + } + + override fun writeAnnotations(annotation0: Int) { + val currentContainer = currentContainer + buffer.writeByte(OpCode.ANNOTATION_SID) + currentContainer.length += 1 + buffer.writeFlexUInt(annotation0) + } + + override fun writeAnnotations(annotation0: Int, annotation1: Int) { + TODO("Remove this method from the interface, since it is no longer useful.") + } + + override fun writeAnnotations(annotations: IntArray) { + val buffer = buffer + val numAnnotations = annotations.size + var numAnnotationBytes = numAnnotations + for (i in 0 until numAnnotations) { + buffer.writeByte(OpCode.ANNOTATION_SID) + numAnnotationBytes += buffer.writeFlexUInt(annotations[i]) + } + currentContainer.length += numAnnotationBytes + } + + override fun writeAnnotations(annotation0: CharSequence) { + val buffer = buffer + buffer.writeByte(OpCode.ANNOTATION_TEXT) + val text = utf8StringEncoder.encode(annotation0.toString()) + val textLength = text.encodedLength + val numLengthBytes: Int = buffer.writeFlexUInt(textLength.toLong()) + buffer.writeBytes(text.buffer, 0, textLength) + currentContainer.length += 1 + numLengthBytes + textLength + } + + override fun writeAnnotations(annotation0: CharSequence, annotation1: CharSequence) { + TODO("Remove this method from the interface, since it is no longer useful.") + } + + override fun writeAnnotations(annotations: Array) { + val buffer = buffer + val numAnnotations = annotations.size + var numAnnotationBytes = numAnnotations + for (i in 0 until numAnnotations) { + buffer.writeByte(OpCode.ANNOTATION_TEXT) + val text = utf8StringEncoder.encode(annotations[i].toString()) + val textLength = text.encodedLength + val numLengthBytes: Int = buffer.writeFlexUInt(textLength.toLong()) + buffer.writeBytes(text.buffer, 0, textLength) + numAnnotationBytes += textLength + numLengthBytes + } + currentContainer.length += numAnnotationBytes + } + + /** + * Helper function for writing scalar values that updates the length of the current container. + * + * @param valueWriterExpression should be a function that writes the scalar value to the buffer, and + * returns the number of bytes that were written. + */ + private inline fun writeScalar(valueWriterExpression: () -> Int) { + val numBytesWritten = valueWriterExpression() + currentContainer.length += numBytesWritten + currentContainer.numChildren++ + } + + override fun writeFieldName(sid: Int) { + val currentContainer = currentContainer + when (currentContainer.type) { + PREFIXED_STRUCT_SID, + DELIMITED_STRUCT_SID -> currentContainer.length += buffer.writeFlexUInt(sid.toLong()) + PREFIXED_STRUCT_FS, + DELIMITED_STRUCT_FS, + PREFIXED_STRUCT_SID_TO_FS, + DELIMITED_STRUCT_SID_TO_FS -> currentContainer.length += buffer.writeFlexInt(sid.toLong()) + // Raw writer is not required to check this, but since we're already checking the container type, we can do it here anyway. + else -> throw IonException("Can only write a field name inside of a struct.") + } + } + + override fun writeFieldName(text: CharSequence) { + val currentContainer = currentContainer + when (currentContainer.type) { + PREFIXED_STRUCT_SID, + DELIMITED_STRUCT_SID -> { + buffer.writeByte(PrimitiveEncoder.FLEX_ZERO) // field name $0 + buffer.writeByte(OpCode.STRUCT_SWITCH_MODES) + // NOTE: This has the effect of turning a SID struct into a FlexSym struct because the latter's IDs are one above the former. + currentContainer.type++ + currentContainer.length += 2 + buffer.writeFlexSymText(text) + } + PREFIXED_STRUCT_FS, + DELIMITED_STRUCT_FS, + PREFIXED_STRUCT_SID_TO_FS, + DELIMITED_STRUCT_SID_TO_FS -> currentContainer.length += buffer.writeFlexSymText(text) + // Raw writer is not required to check this, but since we're already checking the container type, we can do it here anyway. + else -> throw IonException("Can only write a field name inside of a struct.") + } + } + + override fun writeNull() = writeScalar { + buffer.writeByte(OpCode.NULL_NULL.toByte()) + 1 + } + + override fun writeNull(type: IonType) = writeScalar { + if (type == IonType.NULL) { + buffer.writeByte(OpCode.NULL_NULL) + 1 + } else { + buffer.writeByte(OpCode.TYPED_NULL) + val typeByte = type.ordinal + buffer.writeByte(typeByte) + 2 + } + } + + override fun writeBool(value: Boolean) = writeScalar { + val data = if (value) OpCode.BOOL_TRUE else OpCode.BOOL_FALSE + buffer.writeByte(data.toByte()) + 1 + } + + override fun writeInt(value: Long) = writeScalar { + if (value == 0L) { + buffer.writeByte(OpCode.INT_0) + 1 + } else { + val length = fixedIntLength(value) + buffer.writeByte(OpCode.INT_0 + length) + buffer.writeFixedIntOrUInt(value, length) + 1 + length + } + } + + override fun writeInt(value: BigInteger) { + if (value.bitLength() < Long.SIZE_BITS) { + writeInt(value.longValueExact()) + } else { + writeScalar { + buffer.writeByte(OpCode.VARIABLE_LENGTH_INTEGER) + val intBytes = value.toByteArray() + val totalBytes = 1 + intBytes.size + buffer.writeFlexUInt(intBytes.size) + for (i in intBytes.size downTo 1) { + buffer.writeByte(intBytes[i - 1]) + } + totalBytes + } + } + } + + override fun writeFloat(value: Double) { + // TODO: Optimization to write a 16 bit float for non-finite and possibly other values + // We could check the number of significand bits and the value of the exponent + // to determine if it can be represented in a smaller format without having a + // complete representation of half-precision floating point numbers. + if (!value.isFinite() || value == value.toFloat().toDouble()) { + writeFloat(value.toFloat()) + } else { + writeScalar { + buffer.writeByte(OpCode.FLOAT_64) + buffer.writeFixedIntOrUInt(doubleToRawLongBits(value), 8) + 9 + } + } + } + + override fun writeFloat(value: Float) = writeScalar { + // TODO: Consider adding a check for some half-precision values that we can use. + if (value == 0.0f) { + buffer.writeByte(OpCode.FLOAT_0) + 1 + } else { + buffer.writeByte(OpCode.FLOAT_32) + buffer.writeFixedIntOrUInt(floatToIntBits(value).toLong(), 4) + 5 + } + } + + override fun writeDecimal(value: BigDecimal) = writeScalar { + + val exponent = -value.scale() + val numExponentBytes = flexIntLength(exponent.toLong()) + + var coefficientBytes: ByteArray? = null + val numCoefficientBytes: Int + if (BigDecimal.ZERO.compareTo(value) == 0) { + numCoefficientBytes = if (Decimal.isNegativeZero(value)) { + 1 + } else if (exponent == 0) { + buffer.writeByte(OpCode.DECIMAL_0) + return@writeScalar 1 + } else { + 0 + } + } else { + coefficientBytes = value.unscaledValue().toByteArray() + numCoefficientBytes = coefficientBytes.size + } + + var opCodeAndLengthBytes = 1 + if (numExponentBytes + numCoefficientBytes < 16) { + val opCode = OpCode.DECIMAL_0 + numExponentBytes + numCoefficientBytes + buffer.writeByte(opCode.toByte()) + } else { + // Decimal values that require more than 15 bytes can be encoded using the variable-length decimal opcode: 0xF6. + buffer.writeByte(OpCode.VARIABLE_LENGTH_DECIMAL) + opCodeAndLengthBytes += buffer.writeFlexUInt(numExponentBytes + numCoefficientBytes) + } + + buffer.writeFlexInt(exponent.toLong()) + if (numCoefficientBytes > 0) { + if (coefficientBytes != null) { + buffer.writeFixedIntOrUInt(coefficientBytes) + } else { + buffer.writeByte(0.toByte()) + } + } + opCodeAndLengthBytes + numCoefficientBytes + numExponentBytes + } + + override fun writeTimestamp(value: Timestamp) = writeScalar { TimestampEncoder_1_1.writeTimestampValue(buffer, value) } + + override fun writeSymbol(id: Int) = writeScalar { + confirm(id >= 0) { "Invalid SID: $id" } + val opcode = OpCode.SYMBOL_SID_FLEX_0 or (id and 0x7) + buffer.writeByte(opcode.toByte()) + 1 + buffer.writeFlexUInt(id ushr 3) + } + + override fun writeSymbol(text: CharSequence) = writeScalar { + val encodedText = utf8StringEncoder.encode(text.toString()) + val encodedTextLength = encodedText.encodedLength + if (encodedTextLength < 16) { + buffer.writeByte((OpCode.SYMBOL_LENGTH_0 + encodedTextLength).toByte()) + buffer.writeBytes(encodedText.buffer, 0, encodedTextLength) + encodedTextLength + 1 + } else { + buffer.writeByte(OpCode.VARIABLE_LENGTH_SYMBOL.toByte()) + val lengthOfLength = buffer.writeFlexUInt(encodedTextLength) + buffer.writeBytes(encodedText.buffer, 0, encodedTextLength) + 1 + lengthOfLength + encodedTextLength + } + } + + override fun writeString(value: CharSequence) = writeScalar { + val encodedText = utf8StringEncoder.encode(value.toString()) + val encodedTextLength = encodedText.encodedLength + if (encodedTextLength < 16) { + buffer.writeByte((OpCode.STRING_LENGTH_0 + encodedTextLength).toByte()) + buffer.writeBytes(encodedText.buffer, 0, encodedTextLength) + encodedTextLength + 1 + } else { + buffer.writeByte(OpCode.VARIABLE_LENGTH_STRING.toByte()) + val lengthOfLength = buffer.writeFlexUInt(encodedTextLength) + buffer.writeBytes(encodedText.buffer, 0, encodedTextLength) + 1 + lengthOfLength + encodedTextLength + } + } + + override fun writeBlob(value: ByteArray, start: Int, length: Int) = writeScalar { + buffer.writeByte(OpCode.VARIABLE_LENGTH_BLOB) + val numLengthBytes = buffer.writeFlexUInt(value.size) + buffer.writeBytes(value) + 1 + numLengthBytes + value.size + } + + override fun writeClob(value: ByteArray, start: Int, length: Int) = writeScalar { + buffer.writeByte(OpCode.VARIABLE_LENGTH_CLOB) + val numLengthBytes = buffer.writeFlexUInt(value.size) + buffer.writeBytes(value) + 1 + numLengthBytes + value.size + } + + override fun stepInList(usingLengthPrefix: Boolean) { + if (usingLengthPrefix) { + currentContainer = containerStack.push { it.reset(PREFIXED_LIST, buffer.position()) } + buffer.writeByte(OpCode.VARIABLE_LENGTH_LIST) + buffer.reserve(lengthPrefixPreallocation) + } else { + currentContainer = containerStack.push { it.reset(DELIMITED_LIST, buffer.position()) } + buffer.writeByte(OpCode.DELIMITED_LIST) + } + } + + override fun stepInSExp(usingLengthPrefix: Boolean) { + if (usingLengthPrefix) { + currentContainer = containerStack.push { it.reset(PREFIXED_SEXP, buffer.position()) } + buffer.writeByte(OpCode.VARIABLE_LENGTH_SEXP) + buffer.reserve(lengthPrefixPreallocation) + } else { + currentContainer = containerStack.push { it.reset(DELIMITED_SEXP, buffer.position()) } + buffer.writeByte(OpCode.DELIMITED_SEXP) + } + } + + override fun stepInStruct(usingLengthPrefix: Boolean) { + // TODO: Check the symbol-inlining options and use FlexSym mode if appropriate. + if (usingLengthPrefix) { + currentContainer = containerStack.push { it.reset(PREFIXED_STRUCT_SID, buffer.position()) } + buffer.writeByte(OpCode.VARIABLE_LENGTH_STRUCT_SID_MODE) + buffer.reserve(lengthPrefixPreallocation) + } else { + currentContainer = containerStack.push { it.reset(DELIMITED_STRUCT_SID, buffer.position()) } + buffer.writeByte(OpCode.DELIMITED_STRUCT_SID_MODE) + } + } + + override fun stepInEExp(name: CharSequence) { + throw UnsupportedOperationException("Binary writer requires macros to be invoked by their ID.") + } + + override fun stepInEExp(id: Int, usingLengthPrefix: Boolean) { + // Length-prefixed e-expression format: + // F4 + // Non-length-prefixed e-expression format: + //
+ + if (usingLengthPrefix) { + currentContainer = containerStack.push { it.reset(PREFIXED_EEXP, buffer.position()) } + buffer.writeByte(OpCode.LENGTH_PREFIXED_MACRO_INVOCATION) + currentContainer.metadataOffset += buffer.writeFlexUInt(id) + buffer.reserve(lengthPrefixPreallocation) + } else { + currentContainer = containerStack.push { it.reset(EEXP, buffer.position()) } + currentContainer.metadataOffset = writeEExpMacroIdWithoutLengthPrefix(id) + } + } + + private fun writeEExpMacroIdWithoutLengthPrefix(id: Int): Int { + return if (id < OpCode.EXTENSIBLE_MACRO_ADDRESS_0) { + buffer.writeByte(id.toByte()) + 1 + } else { + val biasedId = id - OpCode.EXTENSIBLE_MACRO_ADDRESS_0 + val opcode = OpCode.EXTENSIBLE_MACRO_ADDRESS_0 or (biasedId and 0x7) + buffer.writeByte(opcode.toByte()) + 1 + buffer.writeFlexUInt(biasedId ushr 3) + } + } + + override fun writeAbsentArgument() { + buffer.writeByte(OpCode.NO_ARGUMENT) + currentContainer.length++ + } + + override fun stepOut() { + val currentContainer = currentContainer + // The length of the current container. By the end of this method, the total must include + // any opcodes, length prefixes, or other data that is not counted in ContainerInfo.length + var thisContainerTotalLength: Long = currentContainer.length + + // If we have a data-model container with no child values, we can replace it with the prefixed, zero-length opcode. + if (currentContainer.numChildren == 0 && currentContainer.type >= TE_LIST) { + val zeroLengthOpcode = when (currentContainer.type) { + TE_LIST, PREFIXED_LIST, DELIMITED_LIST -> OpCode.LIST_LENGTH_0 + TE_SEXP, PREFIXED_SEXP, DELIMITED_SEXP -> OpCode.SEXP_LENGTH_0 + else -> OpCode.STRUCT_LENGTH_0 + } + thisContainerTotalLength++ // For the opcode + buffer.truncate(currentContainer.position + 1) + buffer.writeUInt8At(currentContainer.position, zeroLengthOpcode.toLong()) + } else when (currentContainer.type) { + TE_LIST, + TE_SEXP, + TE_LIST_W_LENGTH_PREFIXED_MACRO, + TE_SEXP_W_LENGTH_PREFIXED_MACRO -> { + // Add one byte to account for the op code + thisContainerTotalLength += currentContainer.metadataOffset + writeCurrentContainerLength(lengthPrefixPreallocation, currentContainer.numChildren.toLong()) + } + PREFIXED_LIST, + PREFIXED_SEXP -> { + thisContainerTotalLength++ + val contentLength = currentContainer.length + if (contentLength <= 0xF) { + // Clean up any unused space that was pre-allocated. + buffer.shiftBytesLeft(currentContainer.length.toInt(), lengthPrefixPreallocation) + val zeroLengthOpCode = if (currentContainer.type == PREFIXED_LIST) OpCode.LIST_LENGTH_0 else OpCode.SEXP_LENGTH_0 + buffer.writeUInt8At(currentContainer.position, zeroLengthOpCode + contentLength) + } else { + thisContainerTotalLength += writeCurrentContainerLength(lengthPrefixPreallocation) + } + } + DELIMITED_LIST, + DELIMITED_SEXP -> { + thisContainerTotalLength += 2 // For the start and end delimiters + buffer.writeByte(OpCode.DELIMITED_CONTAINER_END) + } + DELIMITED_STRUCT_SID_TO_FS, + DELIMITED_STRUCT_SID, + DELIMITED_STRUCT_FS -> { + // Need a sacrificial field name before the closing delimiter. We'll use $0. + // This works regardless of whether we're in FlexSym or SID mode. + buffer.writeByte(PrimitiveEncoder.FLEX_ZERO) + thisContainerTotalLength += 3 // For the start opcode, throwaway field name, and end marker + buffer.writeByte(OpCode.DELIMITED_CONTAINER_END) + } + PREFIXED_STRUCT_SID, + PREFIXED_STRUCT_SID_TO_FS -> { + // Add one byte to account for the op code + thisContainerTotalLength++ + val contentLength = currentContainer.length + if (contentLength <= 0xF) { + // Clean up any unused space that was pre-allocated. + buffer.shiftBytesLeft(currentContainer.length.toInt(), lengthPrefixPreallocation) + val zeroLengthOpCode = OpCode.STRUCT_LENGTH_0 + buffer.writeUInt8At(currentContainer.position, zeroLengthOpCode + contentLength) + } else { + thisContainerTotalLength += writeCurrentContainerLength(lengthPrefixPreallocation) + } + } + PREFIXED_STRUCT_FS -> { + thisContainerTotalLength += 1 + writeCurrentContainerLength(lengthPrefixPreallocation) + } + TAGLESS_EEXP -> { + // Nothing to do here because there's no opcode, length, or end delimiter. + } + EEXP -> { + // Add this to account for the opcode/address + thisContainerTotalLength += currentContainer.metadataOffset + } + PREFIXED_EEXP, + PREFIXED_TAGLESS_EEXP -> { + // NOTE: For the (non-tagless) prefixed case, we could check if the length is 0 to see if we can go back + // and rewrite this as a non-length-prefixed e-exp, but we won't because that's easier done in the managed + // writer, which already has knowledge of the macro signature. + + // Add to account for the opcode, address, and length prefix + thisContainerTotalLength += currentContainer.metadataOffset + writeCurrentContainerLength(lengthPrefixPreallocation) + } + DIRECTIVE -> { + thisContainerTotalLength += 2 // For the start and end delimiters + buffer.writeByte(OpCode.DELIMITED_CONTAINER_END) + } + else -> throw IonException("Nothing to step out of.") + } + + containerStack.pop() // This is the container we just exited. We don't need to do anything more with it. + val newCurrentContainer = containerStack.peek() + // Update the length of the new current container to include the length of the container that we just stepped out of. + newCurrentContainer.length += thisContainerTotalLength + newCurrentContainer.numChildren++ + this.currentContainer = newCurrentContainer + } + + /** + * Writes the length of the current container and returns the number of bytes needed to do so. + * Transparently handles PatchPoints as necessary. + * + * @param numPreAllocatedLengthPrefixBytes the number of bytes that were pre-allocated for the length prefix of the + * current container. + */ + private fun writeCurrentContainerLength(numPreAllocatedLengthPrefixBytes: Int, lengthToWrite: Long = currentContainer.length): Int { + val lengthPosition = currentContainer.position + currentContainer.metadataOffset + val lengthPrefixBytesRequired = flexUIntLength(lengthToWrite) + // TODO(perf): Patch Points are required when there is less space pre-allocated than is required. However, this + // also uses patch points even when there is _more_ space allocated that required. Check whether it's faster + // to shift bytes around in order to close/cover the excess space that was pre-allocated for the length. + if (lengthPrefixBytesRequired == numPreAllocatedLengthPrefixBytes) { + // We have enough space, so write in the correct length. + buffer.writeFlexIntOrUIntAt(lengthPosition, lengthToWrite, lengthPrefixBytesRequired) + } else { + addPatchPointsToStack() + // All ContainerInfos are in the stack, so we know that its patchPoint is non-null. + currentContainer.patchPoint!!.apply { + oldPosition = lengthPosition + oldLength = numPreAllocatedLengthPrefixBytes + length = lengthToWrite + } + } + return lengthPrefixBytesRequired + } + + private fun addPatchPointsToStack() { + // TODO: We may be able to improve this by skipping patch points on ancestors that are delimited containers, + // since the patch points for delimited containers will go unused anyway. However, the additional branching + // may negate the effect of any reduction in allocations. + + // If we're adding a patch point we first need to ensure that all of our ancestors (containing values) already + // have a patch point. No container can be smaller than the contents, so all outer layers also require patches. + // Instead of allocating iterator, we share one iterator instance within the scope of the container stack and + // reset the cursor every time we track back to the ancestors. + val stackIterator: ListIterator = containerStack.iterator() + // Walk down the stack until we find an ancestor which already has a patch point + while (stackIterator.hasNext() && stackIterator.next().patchPoint == null) { + // Logic happens in the loop condition. + } + + // The iterator cursor is now positioned on an ancestor container that has a patch point + // Ascend back up the stack, fixing the ancestors which need a patch point assigned before us + while (stackIterator.hasPrevious()) { + val ancestor = stackIterator.previous() + if (ancestor.patchPoint == null) { + ancestor.patchPoint = patchPoints.pushAndGet { it.clear() } + } + } + } + + override fun writeTaggedPlaceholder() { + writeScalar { + buffer.writeByte(OpCode.TAGGED_PLACEHOLDER) + 1 + } + } + + override fun writeTaggedPlaceholderWithDefault(default: Consumer) { + buffer.writeByte(OpCode.TAGGED_PLACEHOLDER_WITH_DEFAULT) + currentContainer.length++ + default.accept(this) + } + + override fun writeTaglessPlaceholder(taglessEncodingOpcode: Int) = writeScalar { + buffer.write2Bytes(OpCode.TAGLESS_PLACEHOLDER.toByte(), taglessEncodingOpcode.toByte()) + 2 + } + + override fun stepInDirective(directiveOpcode: Int) { + currentContainer = containerStack.push { it.reset(DIRECTIVE, buffer.position()) } + buffer.writeByte(directiveOpcode) + } + + override fun stepInTaglessElementList(taglessEncodingOpcode: Int) { + currentContainer = containerStack.push { it.reset(TE_LIST, buffer.position()) } + buffer.write2Bytes(OpCode.TAGLESS_ELEMENT_LIST.toByte(), taglessEncodingOpcode.toByte()) + currentContainer.metadataOffset++ + buffer.reserve(lengthPrefixPreallocation) + } + + override fun stepInTaglessElementList(macroId: Int, macroName: String?, lengthPrefixed: Boolean) { + val start = buffer.position() + buffer.writeByte(OpCode.TAGLESS_ELEMENT_LIST) + if (lengthPrefixed) { + currentContainer = containerStack.push { it.reset(TE_LIST_W_LENGTH_PREFIXED_MACRO, start) } + buffer.writeByte(OpCode.LENGTH_PREFIXED_MACRO_INVOCATION) + currentContainer.metadataOffset += 1 + buffer.writeFlexUInt(macroId) + } else { + currentContainer = containerStack.push { it.reset(TE_LIST, start) } + currentContainer.metadataOffset += writeEExpMacroIdWithoutLengthPrefix(macroId) + } + buffer.reserve(lengthPrefixPreallocation) + } + + override fun stepInTaglessElementSExp(taglessEncodingOpcode: Int) { + currentContainer = containerStack.push { it.reset(TE_SEXP, buffer.position()) } + buffer.write2Bytes(OpCode.TAGLESS_ELEMENT_SEXP.toByte(), taglessEncodingOpcode.toByte()) + currentContainer.metadataOffset++ + buffer.reserve(lengthPrefixPreallocation) + } + + override fun stepInTaglessElementSExp(macroId: Int, macroName: String?, lengthPrefixed: Boolean) { + val start = buffer.position() + buffer.writeByte(OpCode.TAGLESS_ELEMENT_SEXP) + if (lengthPrefixed) { + currentContainer = containerStack.push { it.reset(TE_SEXP_W_LENGTH_PREFIXED_MACRO, start) } + buffer.writeByte(OpCode.LENGTH_PREFIXED_MACRO_INVOCATION) + currentContainer.metadataOffset += 1 + buffer.writeFlexUInt(macroId) + } else { + currentContainer = containerStack.push { it.reset(TE_SEXP, start) } + currentContainer.metadataOffset += writeEExpMacroIdWithoutLengthPrefix(macroId) + } + buffer.reserve(lengthPrefixPreallocation) + } + + override fun stepInTaglessEExp() { + when (currentContainer.type) { + TE_LIST_W_LENGTH_PREFIXED_MACRO, + TE_SEXP_W_LENGTH_PREFIXED_MACRO -> { + currentContainer = containerStack.push { it.reset(PREFIXED_TAGLESS_EEXP, buffer.position()) } + buffer.reserve(lengthPrefixPreallocation) + currentContainer.metadataOffset = 0 + } + TE_LIST, + TE_SEXP -> currentContainer = containerStack.push { it.reset(TAGLESS_EEXP, buffer.position()) } + else -> throw IonException("Cannot step into a tagless e-expression here unless in a tagless-element sequence.") + } + } + + override fun writeTaglessInt(implicitOpcode: Int, value: Long) { + val currentContainer = currentContainer + when (implicitOpcode) { + OpCode.INT_8, + OpCode.INT_16, + OpCode.INT_32, + OpCode.INT_64 -> { + val length = implicitOpcode - OpCode.INT_0 + buffer.writeFixedIntOrUInt(value, length) + currentContainer.length += length + } + OpCode.TE_UINT_8, + OpCode.TE_UINT_16, + OpCode.TE_UINT_32, + OpCode.TE_UINT_64 -> { + val length = implicitOpcode - 0xE0 + buffer.writeFixedIntOrUInt(value, length) + currentContainer.length += length + } + OpCode.TE_FLEX_INT -> currentContainer.length += buffer.writeFlexInt(value) + OpCode.TE_FLEX_UINT -> currentContainer.length += buffer.writeFlexUInt(value) + else -> throw IonException("Not a valid tagless int opcode: $implicitOpcode") + } + currentContainer.numChildren++ + } + + override fun writeTaglessInt(implicitOpcode: Int, value: BigInteger) { + val currentContainer = currentContainer + when (implicitOpcode) { + OpCode.INT_8, + OpCode.INT_16, + OpCode.INT_32, + OpCode.INT_64 -> { + val length = implicitOpcode - OpCode.INT_0 + buffer.writeFixedIntOrUInt(value.toLong(), length) + currentContainer.length += length + } + OpCode.TE_UINT_8, + OpCode.TE_UINT_16, + OpCode.TE_UINT_32, + OpCode.TE_UINT_64 -> { + val length = implicitOpcode - 0xE0 + buffer.writeFixedIntOrUInt(value.toLong(), length) + currentContainer.length += length + } + OpCode.TE_FLEX_INT -> currentContainer.length += buffer.writeFlexInt(value) + OpCode.TE_FLEX_UINT -> currentContainer.length += buffer.writeFlexUInt(value) + else -> throw IonException("Not a valid tagless int opcode: $implicitOpcode") + } + currentContainer.numChildren++ + } + + override fun writeTaglessDecimal(implicitOpcode: Int, value: BigDecimal) = writeScalar { + val coefficientSize = buffer.writeFlexInt(value.unscaledValue()) + val exponent = -value.scale() + buffer.writeByte(exponent) + coefficientSize + 1 + } + + override fun writeTaglessTimestamp(implicitOpcode: Int, value: Timestamp) = writeScalar { + TimestampEncoder_1_1.writeTaglessTimestampValue(buffer, implicitOpcode, value) + } + + override fun writeTaglessFloat(implicitOpcode: Int, value: Float) { + val currentContainer = currentContainer + when (implicitOpcode) { + OpCode.FLOAT_16 -> TODO() + OpCode.FLOAT_32 -> { + buffer.writeFixedIntOrUInt(floatToIntBits(value).toLong(), 4) + currentContainer.length += 4 + } + OpCode.FLOAT_64 -> { + buffer.writeFixedIntOrUInt(doubleToRawLongBits(value.toDouble()), 8) + currentContainer.length += 8 + } + else -> throw IonException("Not a valid tagless float opcode: $implicitOpcode") + } + currentContainer.numChildren++ + } + + override fun writeTaglessFloat(implicitOpcode: Int, value: Double) { + val bytesWritten = when (implicitOpcode) { + OpCode.FLOAT_16 -> TODO() + OpCode.FLOAT_32 -> { + buffer.writeFixedIntOrUInt(floatToIntBits(value.toFloat()).toLong(), 4) + 4 + } + OpCode.FLOAT_64 -> { + buffer.writeFixedIntOrUInt(doubleToRawLongBits(value), 8) + 8 + } + else -> throw IonException("Not a valid tagless float opcode: $implicitOpcode") + } + val currentContainer = currentContainer + currentContainer.length += bytesWritten + currentContainer.numChildren++ + } + + override fun writeTaglessSymbol(implicitOpcode: Int, id: Int) { + val bytesWritten = when (implicitOpcode) { + OpCode.TE_SYMBOL_FS -> buffer.writeFlexInt(id.toLong()) + else -> throw IonException("Not a valid tagless symbol id opcode: $implicitOpcode") + } + val currentContainer = currentContainer + currentContainer.length += bytesWritten + currentContainer.numChildren++ + } + + @OptIn(ExperimentalStdlibApi::class) + override fun writeTaglessSymbol(implicitOpcode: Int, text: CharSequence) { + val bytesWritten = when (implicitOpcode) { + OpCode.TE_SYMBOL_FS -> buffer.writeFlexSymText(text) + else -> throw IonException("Not a valid tagless symbol text opcode: ${implicitOpcode.toByte().toHexString()}") + } + val currentContainer = currentContainer + currentContainer.length += bytesWritten + currentContainer.numChildren++ + } + + private fun WriteBuffer.writeFlexSymText(text: CharSequence): Int { + val encodedText = utf8StringEncoder.encode(text.toString()) + val encodedTextLength = encodedText.encodedLength + val lengthOfLength = writeFlexInt(-1 - encodedTextLength.toLong()) + writeBytes(encodedText.buffer, 0, encodedTextLength) + return lengthOfLength + encodedTextLength + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java b/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java index 8df501b349..91a7647042 100644 --- a/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java +++ b/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java @@ -6,9 +6,6 @@ public class Ion_1_1_Constants { private Ion_1_1_Constants() {} - static final int FIRST_2_BYTE_SYMBOL_ADDRESS = 256; - static final int FIRST_MANY_BYTE_SYMBOL_ADDRESS = 65792; - //////// Timestamp Field Constants //////// // S_TIMESTAMP_* is applicable to all short-form timestamps diff --git a/src/main/java/com/amazon/ion/impl/bin/OpCodes.java b/src/main/java/com/amazon/ion/impl/bin/OpCodes.java deleted file mode 100644 index a84c4bc853..0000000000 --- a/src/main/java/com/amazon/ion/impl/bin/OpCodes.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.amazon.ion.impl.bin; - -/** - * Utility class holding Ion 1.1 Op Codes. - */ -public class OpCodes { - private OpCodes() {} - - public static final byte INTEGER_ZERO_LENGTH = 0x50; - // 0x51-0x58 are additional lengths of integers. - // 0x59 Reserved - public static final byte FLOAT_ZERO_LENGTH = 0x5A; - public static final byte FLOAT_16 = 0x5B; - public static final byte FLOAT_32 = 0x5C; - public static final byte FLOAT_64 = 0x5D; - public static final byte BOOLEAN_TRUE = 0x5E; - public static final byte BOOLEAN_FALSE = 0x5F; - - public static final byte DECIMAL_ZERO_LENGTH = 0x60; - // 0x61-0x6E are additional lengths of decimals. - public static final byte POSITIVE_ZERO_DECIMAL = 0x6F; - - public static final byte TIMESTAMP_YEAR_PRECISION = 0x70; - public static final byte TIMESTAMP_MONTH_PRECISION = 0x71; - public static final byte TIMESTAMP_DAY_PRECISION = 0x72; - public static final byte TIMESTAMP_MINUTE_PRECISION = 0x73; - public static final byte TIMESTAMP_SECOND_PRECISION = 0x74; - public static final byte TIMESTAMP_MILLIS_PRECISION = 0x75; - public static final byte TIMESTAMP_MICROS_PRECISION = 0x76; - public static final byte TIMESTAMP_NANOS_PRECISION = 0x77; - public static final byte TIMESTAMP_MINUTE_PRECISION_WITH_OFFSET = 0x78; - public static final byte TIMESTAMP_SECOND_PRECISION_WITH_OFFSET = 0x79; - public static final byte TIMESTAMP_MILLIS_PRECISION_WITH_OFFSET = 0x7A; - public static final byte TIMESTAMP_MICROS_PRECISION_WITH_OFFSET = 0x7B; - public static final byte TIMESTAMP_NANOS_PRECISION_WITH_OFFSET = 0x7C; - // 0x7D-0x7F Reserved - - public static final byte STRING_ZERO_LENGTH = (byte) 0x80; - - public static final byte INLINE_SYMBOL_ZERO_LENGTH = (byte) 0x90; - - public static final byte SYMBOL_ADDRESS_1_BYTE = (byte) 0xE1; - public static final byte SYMBOL_ADDRESS_2_BYTES = (byte) 0xE2; - public static final byte SYMBOL_ADDRESS_MANY_BYTES = (byte) 0xE3; - public static final byte ANNOTATIONS_1_SYMBOL_ADDRESS = (byte) 0xE4; - public static final byte ANNOTATIONS_2_SYMBOL_ADDRESS = (byte) 0xE5; - public static final byte ANNOTATIONS_MANY_SYMBOL_ADDRESS = (byte) 0xE6; - public static final byte ANNOTATIONS_1_FLEX_SYM = (byte) 0xE7; - public static final byte ANNOTATIONS_2_FLEX_SYM = (byte) 0xE8; - public static final byte ANNOTATIONS_MANY_FLEX_SYM = (byte) 0xE9; - public static final byte NULL_UNTYPED = (byte) 0xEA; - public static final byte NULL_TYPED = (byte) 0xEB; - // 0xEC, 0xED NOP - // 0xEE Reserved - // 0xEF System Macro Invocation - - public static final byte VARIABLE_LENGTH_INTEGER = (byte) 0xF5; - public static final byte VARIABLE_LENGTH_DECIMAL = (byte) 0xF6; - public static final byte VARIABLE_LENGTH_TIMESTAMP = (byte) 0xF7; - public static final byte VARIABLE_LENGTH_STRING = (byte) 0xF8; - public static final byte VARIABLE_LENGTH_INLINE_SYMBOL = (byte) 0xF9; - public static final byte VARIABLE_LENGTH_BLOB = (byte) 0xFE; - public static final byte VARIABLE_LENGTH_CLOB = (byte) 0xFF; -} diff --git a/src/main/java/com/amazon/ion/impl/bin/PatchPoint.java b/src/main/java/com/amazon/ion/impl/bin/PatchPoint.java new file mode 100644 index 0000000000..6253060048 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/PatchPoint.java @@ -0,0 +1,44 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin; + +/** + * Represents a slice of bytes that need to be overwritten by a variable length, unsigned integer that is too large + * to fit into the specified slice. + */ +public class PatchPoint { + /** + * position of the data being patched out. + */ + public long oldPosition; + /** + * length of the data being patched out. + */ + public int oldLength; + /** + * size of the container data or annotations. + */ + public long length; + + public PatchPoint() { + oldPosition = -1; + oldLength = -1; + length = -1; + } + + @Override + public String toString() { + return "(PP old::(" + oldPosition + " " + oldLength + ") patch::(" + length + ")"; + } + + public PatchPoint initialize(final long oldPosition, final int oldLength, final long length) { + this.oldPosition = oldPosition; + this.oldLength = oldLength; + this.length = length; + return this; + } + + public PatchPoint clear() { + return initialize(-1, -1, -1); + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/TimestampEncoder_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/TimestampEncoder_1_1.kt new file mode 100644 index 0000000000..96a555e27a --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/TimestampEncoder_1_1.kt @@ -0,0 +1,233 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.bin11.OpCode + +/** + * Helper for writing Ion 1.1 binary timestamps. + */ +internal object TimestampEncoder_1_1 { + /** + * Writes a Timestamp to the given WriteBuffer using the Ion 1.1 encoding for Ion Timestamps. + * @return the number of bytes written + */ + @JvmStatic + fun writeTimestampValue(buffer: WriteBuffer, value: Timestamp): Int { + + // Timestamps may be encoded using the short form if they meet certain conditions. + // Condition 1: The year is between 1970 and 2097. + if (value.year < 1970 || value.year > 2097) { + buffer.writeByte(OpCode.VARIABLE_LENGTH_TIMESTAMP.toByte()) + val bodyLength = writeLongFormTimestampBody(buffer, value) + return 1 + bodyLength + } + + var shortOpcode = OpCode.TIMESTAMP_YEAR_PRECISION + value.precision.ordinal + + // If the precision is year, month, or day, we can skip the remaining checks. + if (!value.precision.includes(Timestamp.Precision.MINUTE)) { + buffer.writeByte(shortOpcode.toByte()) + return 1 + writeTaglessTimestampValue(buffer, shortOpcode, value) + } + + // Condition 2: The fractional seconds are a common precision. + if (value.zFractionalSecond != null) { + val secondsScale = value.zFractionalSecond.scale() + if (secondsScale != 0 && secondsScale != 3 && secondsScale != 6 && secondsScale != 9) { + buffer.writeByte(OpCode.VARIABLE_LENGTH_TIMESTAMP.toByte()) + val bodyLength = writeLongFormTimestampBody(buffer, value) + return 1 + bodyLength + } else { + shortOpcode += secondsScale / 3 + } + } + + // Condition 3: The local offset is either UTC, unknown, or falls between -14:00 to +14:00 and is divisible by 15 minutes. + val offset = value.localOffset + if (offset == null || offset == 0) { + buffer.writeByte(shortOpcode.toByte()) + return 1 + writeTaglessTimestampValue(buffer, shortOpcode, value) + } else if (offset >= -14 * 60 && offset <= 14 * 60 && offset % 15 == 0) { + shortOpcode += OpCode.TIMESTAMP_MINUTE_PRECISION_WITH_OFFSET - OpCode.TIMESTAMP_MINUTE_PRECISION + buffer.writeByte(shortOpcode.toByte()) + return 1 + writeShortTimestampWithOffsetBody(buffer, shortOpcode, value) + } else { + buffer.writeByte(OpCode.VARIABLE_LENGTH_TIMESTAMP.toByte()) + val bodyLength = writeLongFormTimestampBody(buffer, value) + return 1 + bodyLength + } + } + + @JvmStatic + fun writeTaglessTimestampValue(buffer: WriteBuffer, implicitOpcode: Int, value: Timestamp): Int { + // Rather than have a lot of early-escape branching points, we'll just fill the bits with all the + // timestamp fields and then have one branch at the end to write the correct number of bits. + // This also keeps the code a little shorter. + + // TODO(perf) revisit this to see if this is slower than branching earlier. + + var bits = (value.year - 1970L) + bits = bits or ((value.month.toLong()) shl Ion_1_1_Constants.S_TIMESTAMP_MONTH_BIT_OFFSET) + bits = bits or ((value.day.toLong()) shl Ion_1_1_Constants.S_TIMESTAMP_DAY_BIT_OFFSET) + bits = bits or ((value.hour.toLong()) shl Ion_1_1_Constants.S_TIMESTAMP_HOUR_BIT_OFFSET) + bits = bits or ((value.minute.toLong()) shl Ion_1_1_Constants.S_TIMESTAMP_MINUTE_BIT_OFFSET) + if (value.localOffset != null) { + bits = bits or Ion_1_1_Constants.S_U_TIMESTAMP_UTC_FLAG.toLong() + } + bits = bits or ((value.second.toLong()) shl Ion_1_1_Constants.S_U_TIMESTAMP_SECOND_BIT_OFFSET) + + val size = when (implicitOpcode) { + OpCode.TIMESTAMP_YEAR_PRECISION -> { + // Chop off the month and day bits, if there are any. + bits = bits and ((1L shl Ion_1_1_Constants.S_TIMESTAMP_MONTH_BIT_OFFSET) - 1) + 1 + } + OpCode.TIMESTAMP_MONTH_PRECISION -> { + // Chop off the day bits, if there are any. + bits = bits and ((1L shl Ion_1_1_Constants.S_TIMESTAMP_DAY_BIT_OFFSET) - 1) + 2 + } + OpCode.TIMESTAMP_DAY_PRECISION -> 2 + OpCode.TIMESTAMP_MINUTE_PRECISION -> 4 + OpCode.TIMESTAMP_SECOND_PRECISION -> 5 + OpCode.TIMESTAMP_MILLIS_PRECISION -> { + val fractionalSeconds = value.zFractionalSecond.unscaledValue().toLong() + bits = bits or (fractionalSeconds shl Ion_1_1_Constants.S_U_TIMESTAMP_FRACTION_BIT_OFFSET) + 6 + } + OpCode.TIMESTAMP_MICROS_PRECISION -> { + val fractionalSeconds = value.zFractionalSecond.unscaledValue().toLong() + bits = bits or (fractionalSeconds shl Ion_1_1_Constants.S_U_TIMESTAMP_FRACTION_BIT_OFFSET) + 7 + } + OpCode.TIMESTAMP_NANOS_PRECISION -> { + val fractionalSeconds = value.zFractionalSecond.unscaledValue().toLong() + bits = bits or (fractionalSeconds shl Ion_1_1_Constants.S_U_TIMESTAMP_FRACTION_BIT_OFFSET) + 8 + } + else -> throw IllegalStateException("This is unreachable!") + } + buffer.writeFixedIntOrUInt(bits, size) + return size + } + + @JvmStatic + private fun writeShortTimestampWithOffsetBody(buffer: WriteBuffer, implicitOpcode: Int, value: Timestamp): Int { + // Rather than have a lot of early-escape branching points, we'll just fill the bits with all the + // timestamp fields and then have one branch at the end to write the correct number of bits. + // This also keeps the code a little shorter. + + // TODO(perf) revisit this to see if this is slower than branching earlier. + + var bits = (value.year - 1970L) + bits = bits or ((value.month.toLong()) shl Ion_1_1_Constants.S_TIMESTAMP_MONTH_BIT_OFFSET) + bits = bits or ((value.day.toLong()) shl Ion_1_1_Constants.S_TIMESTAMP_DAY_BIT_OFFSET) + bits = bits or ((value.hour.toLong()) shl Ion_1_1_Constants.S_TIMESTAMP_HOUR_BIT_OFFSET) + bits = bits or ((value.minute.toLong()) shl Ion_1_1_Constants.S_TIMESTAMP_MINUTE_BIT_OFFSET) + val localOffset = (value.localOffset.toLong() / 15) + (14 * 4) + bits = bits or ((localOffset and Ion_1_1_Constants.LEAST_SIGNIFICANT_7_BITS) shl Ion_1_1_Constants.S_O_TIMESTAMP_OFFSET_BIT_OFFSET) + bits = bits or ((value.second.toLong()) shl Ion_1_1_Constants.S_O_TIMESTAMP_SECOND_BIT_OFFSET) + + buffer.writeFixedIntOrUInt(bits, 5) + + // The fractional seconds bits will be put into a separate long because we need nine bytes total + // if there are nanoseconds (which is too much for one long) and the boundary between the seconds + // and fractional seconds subfields conveniently aligns with a byte boundary. + var fractionBits = 0L + + val size = when (implicitOpcode) { + OpCode.TIMESTAMP_MINUTE_PRECISION_WITH_OFFSET, + OpCode.TIMESTAMP_SECOND_PRECISION_WITH_OFFSET -> { + 5 + } + OpCode.TIMESTAMP_MILLIS_PRECISION_WITH_OFFSET -> { + fractionBits = value.zFractionalSecond.unscaledValue().toLong() + buffer.writeFixedIntOrUInt(fractionBits, 2) + 7 + } + OpCode.TIMESTAMP_MICROS_PRECISION_WITH_OFFSET -> { + fractionBits = value.zFractionalSecond.unscaledValue().toLong() + buffer.writeFixedIntOrUInt(fractionBits, 3) + 8 + } + OpCode.TIMESTAMP_NANOS_PRECISION_WITH_OFFSET -> { + fractionBits = value.zFractionalSecond.unscaledValue().toLong() + buffer.writeFixedIntOrUInt(fractionBits, 4) + 9 + } + else -> throw IllegalStateException("This is unreachable!") + } + return size + } + + /** + * Writes a long-form timestamp. + * Value may not be null. + * Only visible for testing. If calling from outside this class, use writeTimestampValue instead. + */ + @JvmStatic + internal fun writeLongFormTimestampBody(buffer: WriteBuffer, value: Timestamp): Int { + var bits = value.year.toLong() + if (value.precision == Timestamp.Precision.YEAR) { + buffer.writeFlexUInt(2) + buffer.writeFixedIntOrUInt(bits, 2) + return 3 // FlexUInt + 2 bytes data + } + + bits = bits or ((value.month.toLong()) shl Ion_1_1_Constants.L_TIMESTAMP_MONTH_BIT_OFFSET) + if (value.precision == Timestamp.Precision.MONTH) { + buffer.writeFlexUInt(3) + buffer.writeFixedIntOrUInt(bits, 3) + return 4 // FlexUInt + 3 bytes data + } + + bits = bits or ((value.day.toLong()) shl Ion_1_1_Constants.L_TIMESTAMP_DAY_BIT_OFFSET) + if (value.precision == Timestamp.Precision.DAY) { + buffer.writeFlexUInt(3) + buffer.writeFixedIntOrUInt(bits, 3) + return 4 // FlexUInt + 3 bytes data + } + + bits = bits or ((value.hour.toLong()) shl Ion_1_1_Constants.L_TIMESTAMP_HOUR_BIT_OFFSET) + bits = bits or ((value.minute.toLong()) shl Ion_1_1_Constants.L_TIMESTAMP_MINUTE_BIT_OFFSET) + var localOffsetValue = Ion_1_1_Constants.L_TIMESTAMP_UNKNOWN_OFFSET_VALUE.toLong() + if (value.localOffset != null) { + localOffsetValue = (value.localOffset + (24 * 60)).toLong() + } + bits = bits or (localOffsetValue shl Ion_1_1_Constants.L_TIMESTAMP_OFFSET_BIT_OFFSET) + + if (value.precision == Timestamp.Precision.MINUTE) { + buffer.writeFlexUInt(6) + buffer.writeFixedIntOrUInt(bits, 6) + return 7 // FlexUInt + 6 bytes data + } + + bits = bits or ((value.second.toLong()) shl Ion_1_1_Constants.L_TIMESTAMP_SECOND_BIT_OFFSET) + var secondsScale = 0 + if (value.zFractionalSecond != null) { + secondsScale = value.zFractionalSecond.scale() + } + if (secondsScale == 0) { + buffer.writeFlexUInt(7) + buffer.writeFixedIntOrUInt(bits, 7) + return 8 // FlexUInt + 7 bytes data + } + + val fractionalSeconds = value.zFractionalSecond + val coefficient = fractionalSeconds.unscaledValue() + val exponent = fractionalSeconds.scale().toLong() + val numCoefficientBytes = PrimitiveEncoder.flexUIntLength(coefficient) + val numExponentBytes = WriteBuffer.fixedUIntLength(exponent) + // Years-seconds data (7 bytes) + fraction coefficient + fraction exponent + val dataLength = 7 + numCoefficientBytes + numExponentBytes + + val lengthOfLength = buffer.writeFlexUInt(dataLength) + buffer.writeFixedIntOrUInt(bits, 7) + buffer.writeFlexUInt(coefficient) + buffer.writeFixedUInt(exponent) + + return lengthOfLength + dataLength + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java b/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java index 2955147be7..7d5980937b 100644 --- a/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java +++ b/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; import java.io.Closeable; @@ -32,10 +19,19 @@ private Block current; private int index; private Runnable endOfBlockCallBack; + private byte[] scratch = new byte[32]; public WriteBuffer(final BlockAllocator allocator, Runnable endOfBlockCallBack) { + if (allocator.getBlockSize() < 10) { + // This restriction means that we should never have to write a FlexInt or FlexUInt across a block boundary + // because blocks are always big enough to hold a long written as a FlexInt or FlexUInt. + // If we're near the end of a block, we'll just start a new block a little early to avoid having to write + // across the boundary. + throw new IllegalArgumentException("WriteBuffer requires an allocator with a block size of at least 10."); + } + this.allocator = allocator; this.blocks = new ArrayList(); @@ -90,12 +86,49 @@ public void truncate(final long position) { final int index = index(position); final int offset = offset(position); - final Block block = blocks.get(index); - this.index = index; - block.limit = offset; - current = block; + while (this.index != index) { + blocks.remove(this.index--); + } + current = blocks.get(index); + current.limit = offset; } + /** + * Moves forward without writing any data. + * + * There is no guarantee as to what values the reserved bytes will have. + * Only use this method if you will overwrite the bytes later with valid data, or if you have already written dato + * to these bytes. + * + * Returns the position of the first reserved byte. + */ + public long reserve(int numBytes) { + long startOfReservedBytes = position(); + // It would also fit in the current block if numBytes == current.remaining(), but then we would have to + // increment `index` and check whether to allocate a new block. So, we'll optimize the early return for the most + // common situation, and lump the == case into the slower path. + if (numBytes < current.remaining()) { + current.limit += numBytes; + return startOfReservedBytes; + } + + while (numBytes > 0) { + int numBytesInThisBlock = Math.min(current.remaining(), numBytes); + current.limit += numBytesInThisBlock; + numBytes -= numBytesInThisBlock; + + if (current.remaining() == 0) { + if (index == blocks.size() - 1) { + allocateNewBlock(); + } + index++; + current = blocks.get(index); + } + } + return startOfReservedBytes; + } + + /** Returns the amount of capacity left in the current block. */ public int remaining() { @@ -125,6 +158,26 @@ public void writeByte(final byte octet) block.limit++; } + /** Writes two octets to the buffer, expanding if necessary. */ + public void write2Bytes(final byte octet0, final byte octet1) + { + if (remaining() < 2) + { + if (index == blocks.size() - 1) + { + allocateNewBlock(); + } + index++; + current = blocks.get(index); + } + final Block block = current; + final byte[] data = block.data; + int limit = block.limit; + data[limit++] = octet0; + data[limit++] = octet1; + block.limit = limit; + } + // slow in the sense that we do all kind of block boundary checking private void writeBytesSlow(final byte[] bytes, int off, int len) { @@ -1303,6 +1356,18 @@ public static int flexUIntLength(final long value) { return (numMagnitudeBitsRequired - 1) / 7 + 1; } + /** Writes a FlexUInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ + public int writeFlexUInt(final int value) { + if (value < 0) { + throw new IllegalArgumentException("Attempted to write a FlexUInt for " + value); + } + int numBytes = PrimitiveEncoder.flexUIntLength(value); + // writeFlexIntOrUIntAt does not advance index or limit, so we reserve the bytes, and then write out the number + long position = reserve(numBytes); + writeFlexIntOrUIntAt(position, value, numBytes); + return numBytes; + } + /** Writes a FlexUInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ public int writeFlexUInt(final long value) { if (value < 0) { @@ -1312,6 +1377,31 @@ public int writeFlexUInt(final long value) { return writeFlexIntOrUInt(value, numBytes); } + /** + * Writes a FlexInt or FlexUInt to this WriteBuffer at the specified position. + * + * Because the flex int and flex uint encodings are so similar, we can use this method to write either one as long + * as we provide the correct number of bytes needed to encode the value. + * + * If the allocator's block size is ever less than 10 bytes, this may throw an IndexOutOfBoundsException. + */ + public void writeFlexIntOrUIntAt(final long position, final long value, final int numBytes) { + int index = index(position); + Block block = blocks.get(index); + int dataOffset = offset(position); + if (dataOffset + numBytes < block.capacity()) { + PrimitiveEncoder.writeFlexIntOrUIntInto(block.data, dataOffset, value, numBytes); + } else { + PrimitiveEncoder.writeFlexIntOrUIntInto(scratch, 0, value, numBytes); + if (index == blocks.size() - 1) { + allocateNewBlock(); + } + for (int i = 0; i < numBytes; i++) { + writeUInt8At(position + i, scratch[i]); + } + } + } + /** * Because the flex int and flex uint encodings are so similar, we can use this method to write either one as long * as we provide the correct number of bytes needed to encode the value. @@ -1424,14 +1514,8 @@ private int writeFlexIntOrUIntForBigInteger(final BigInteger value, final int nu /** Get the length of FixedInt for the provided value. */ public static int fixedIntLength(final long value) { - int numMagnitudeBitsRequired; - if (value < 0) { - int numLeadingOnes = Long.numberOfLeadingZeros(~value); - numMagnitudeBitsRequired = 64 - numLeadingOnes; - } else { - int numLeadingZeros = Long.numberOfLeadingZeros(value); - numMagnitudeBitsRequired = 64 - numLeadingZeros; - } + int numLeadingSignBits = java.lang.Long.numberOfLeadingZeros((value >> 63) ^ value); + int numMagnitudeBitsRequired = 64 - numLeadingSignBits; return numMagnitudeBitsRequired / 8 + 1; } @@ -1491,6 +1575,7 @@ public int writeFixedIntOrUInt(final long value, final int numBytes) { * either one as long as we provide the correct number of bytes needed to encode the value. */ private int _writeFixedIntOrUInt(final long value, final int numBytes) { + // TODO(perf): Test whether it's faster to have nested `if` or to have a single `switch`. writeByte((byte) value); if (numBytes > 1) { writeByte((byte) (value >> 8)); @@ -1516,6 +1601,18 @@ private int _writeFixedIntOrUInt(final long value, final int numBytes) { return numBytes; } + /** + * Writes a FixedInt or FixedUInt for an arbitrarily large integer that is represented + * as a byte array in which the most significant byte is the first in the array, and the least + * significant byte is the last in the array. + */ + public int writeFixedIntOrUInt(final byte[] value) { + for (int i = value.length - 1; i >= 0; i--) { + writeByte(value[i]); + } + return value.length; + } + /** Write the entire buffer to output stream. */ public void writeTo(final OutputStream out) throws IOException { diff --git a/src/main/java/com/amazon/ion/ion_1_1/IonRawWriter_1_1.kt b/src/main/java/com/amazon/ion/ion_1_1/IonRawWriter_1_1.kt index 6b86eb4026..2cfbee6be0 100644 --- a/src/main/java/com/amazon/ion/ion_1_1/IonRawWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/ion_1_1/IonRawWriter_1_1.kt @@ -223,7 +223,7 @@ interface IonRawWriter_1_1 { * If [macroName] is non-null, and the implementation supports invoking macros by name, then the implementation * MUST write the macro name rather than the macro id. */ - fun stepInTaglessElementList(macroId: Int, macroName: String?) + fun stepInTaglessElementList(macroId: Int, macroName: String?, lengthPrefixed: Boolean) /** * Starts a tagless-element s-exp, using the given opcode for its child elements. @@ -238,7 +238,7 @@ interface IonRawWriter_1_1 { * If [macroName] is non-null, and the implementation supports invoking macros by name, then the implementation * MUST write the macro name rather than the macro id. */ - fun stepInTaglessElementSExp(macroId: Int, macroName: String?) + fun stepInTaglessElementSExp(macroId: Int, macroName: String?, lengthPrefixed: Boolean) /** * Steps into a tagless E-Expression. @@ -247,14 +247,6 @@ interface IonRawWriter_1_1 { */ fun stepInTaglessEExp() - /** - * Writes an integer value without writing the opcode, using the [implicitOpcode] to determine the correct - * encoding for the value payload. - * - * @throws com.amazon.ion.IonException If the [implicitOpcode] is not a valid opcode for a tagless int value. - */ - fun writeTaglessInt(implicitOpcode: Int, value: Int) - /** * Writes an integer value without writing the opcode, using the [implicitOpcode] to determine the correct * encoding for the value payload. diff --git a/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt b/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt index 2d5139d79b..98d7c113e3 100644 --- a/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt +++ b/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt @@ -791,11 +791,11 @@ class IonRawTextWriterTest_1_1 { stepOut() } assertWriterOutputEquals("[{:foo} ]") { - stepInTaglessElementList(1, "foo") + stepInTaglessElementList(1, "foo", false) stepOut() } assertWriterOutputEquals("[{:foo} ]", builderConfigurator = { withPrettyPrinting() }) { - stepInTaglessElementList(1, "foo") + stepInTaglessElementList(1, "foo", false) stepOut() } } @@ -826,7 +826,7 @@ class IonRawTextWriterTest_1_1 { @Test fun `write a tagless element list with macro-shape`() { assertWriterOutputEquals("[{:foo} (1 2),(3 4)]") { - stepInTaglessElementList(1, "foo") + stepInTaglessElementList(1, "foo", false) stepInTaglessEExp() writeInt(1) writeInt(2) @@ -850,11 +850,11 @@ class IonRawTextWriterTest_1_1 { stepOut() } assertWriterOutputEquals("({:foo} )") { - stepInTaglessElementSExp(1, "foo") + stepInTaglessElementSExp(1, "foo", false) stepOut() } assertWriterOutputEquals("({:foo} )", builderConfigurator = { withPrettyPrinting() }) { - stepInTaglessElementSExp(1, "foo") + stepInTaglessElementSExp(1, "foo", false) stepOut() } } @@ -883,7 +883,7 @@ class IonRawTextWriterTest_1_1 { @Test fun `write a tagless element sexp with macro-shape`() { assertWriterOutputEquals("({:123} (1 2) (3 4))") { - stepInTaglessElementSExp(123, null) + stepInTaglessElementSExp(123, null, false) stepInTaglessEExp() writeInt(1) writeInt(2) diff --git a/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java b/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java deleted file mode 100644 index 0e316447e7..0000000000 --- a/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java +++ /dev/null @@ -1,723 +0,0 @@ -package com.amazon.ion.impl.bin; - -import com.amazon.ion.BitUtils; -import com.amazon.ion.Decimal; -import com.amazon.ion.IonType; -import com.amazon.ion.Timestamp; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.converter.ArgumentConversionException; -import org.junit.jupiter.params.converter.ConvertWith; -import org.junit.jupiter.params.converter.TypedArgumentConverter; -import org.junit.jupiter.params.provider.CsvSource; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.math.BigInteger; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.function.BiFunction; - -public class IonEncoder_1_1Test { - - private static BlockAllocator ALLOCATOR = BlockAllocatorProviders.basicProvider().vendAllocator(11); - private WriteBuffer buf; - - @BeforeEach - public void setup() { - buf = new WriteBuffer(ALLOCATOR, () -> {}); - } - - private byte[] bytes() { - final ByteArrayOutputStream out = new ByteArrayOutputStream(); - try { - buf.writeTo(out); - } catch (final IOException e) { - throw new IllegalStateException(e); - } - return out.toByteArray(); - } - - /** - * Checks that the function writes the expected bytes and returns the expected count of written bytes for the - * given input value. The expected bytes should be a string of space-separated hexadecimal pairs. - */ - private void assertWritingValue(String expectedBytes, T value, BiFunction writeOperation) { - int numBytes = writeOperation.apply(buf, value); - Assertions.assertEquals(expectedBytes, byteArrayToHex(bytes())); - Assertions.assertEquals(byteLengthFromHexString(expectedBytes), numBytes); - } - - /** - * Checks that the function writes the expected bytes and returns the expected count of written bytes for the - * given input value. The expected bytes should be a string of space-separated hexadecimal pairs. - */ - private void assertWritingValue(byte[] expectedBytes, T value, BiFunction writeOperation) { - int numBytes = writeOperation.apply(buf, value); - Assertions.assertEquals(expectedBytes, bytes()); - Assertions.assertEquals(expectedBytes.length, numBytes); - } - - /** - * Checks that the function writes the expected bytes and returns the expected count of written bytes for the - * given input value. The expectedBytes should be a string of space-separated binary octets. - */ - private void assertWritingValueWithBinary(String expectedBytes, T value, BiFunction writeOperation) { - int numBytes = writeOperation.apply(buf, value); - Assertions.assertEquals(expectedBytes, byteArrayToBitString(bytes())); - Assertions.assertEquals(byteLengthFromBitString(expectedBytes), numBytes); - } - - @ParameterizedTest - @CsvSource({ - " NULL, EA", - " BOOL, EB 00", - " INT, EB 01", - " FLOAT, EB 02", - " DECIMAL, EB 03", - "TIMESTAMP, EB 04", - " STRING, EB 05", - " SYMBOL, EB 06", - " BLOB, EB 07", - " CLOB, EB 08", - " LIST, EB 09", - " SEXP, EB 0A", - " STRUCT, EB 0B", - }) - public void testWriteNullValue(IonType value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeNullValue); - } - - @Test - public void testWriteNullValueForDatagram() { - Assertions.assertThrows(IllegalArgumentException.class, () -> IonEncoder_1_1.writeNullValue(buf, IonType.DATAGRAM)); - } - - @ParameterizedTest - @CsvSource({ - "true, 5E", - "false, 5F", - }) - public void testWriteBooleanValue(boolean value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeBoolValue); - } - - @ParameterizedTest - @CsvSource({ - " 0, 50", - " 1, 51 01", - " 17, 51 11", - " 127, 51 7F", - " 128, 52 80 00", - " 5555, 52 B3 15", - " 32767, 52 FF 7F", - " 32768, 53 00 80 00", - " 292037, 53 C5 74 04", - " 321672342, 54 96 54 2C 13", - " 64121672342, 55 96 12 F3 ED 0E", - " 1274120283167, 56 1F A4 7C A7 28 01", - " 851274120283167, 57 1F C4 8B B3 3A 06 03", - " 72624976668147840, 58 80 40 20 10 08 04 02 01", - " 9223372036854775807, 58 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE - " -1, 51 FF", - " -2, 51 FE", - " -14, 51 F2", - " -128, 51 80", - " -129, 52 7F FF", - " -944, 52 50 FC", - " -32768, 52 00 80", - " -32769, 53 FF 7F FF", - " -8388608, 53 00 00 80", - " -8388609, 54 FF FF 7F FF", - " -72624976668147841, 58 7F BF DF EF F7 FB FD FE", - "-9223372036854775808, 58 00 00 00 00 00 00 00 80", // Long.MIN_VALUE - }) - public void testWriteIntegerValue(long value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeIntValue); - } - - @ParameterizedTest - @CsvSource({ - " 0, 50", - " 1, 51 01", - " 17, 51 11", - " 127, 51 7F", - " 128, 52 80 00", - " 5555, 52 B3 15", - " 32767, 52 FF 7F", - " 32768, 53 00 80 00", - " 292037, 53 C5 74 04", - " 321672342, 54 96 54 2C 13", - " 64121672342, 55 96 12 F3 ED 0E", - " 1274120283167, 56 1F A4 7C A7 28 01", - " 851274120283167, 57 1F C4 8B B3 3A 06 03", - " 72624976668147840, 58 80 40 20 10 08 04 02 01", - " 9223372036854775807, 58 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE - " 9223372036854775808, F5 13 00 00 00 00 00 00 00 80 00", - "999999999999999999999999999999, F5 1B FF FF FF 3F EA ED 74 46 D0 9C 2C 9F 0C", - " -1, 51 FF", - " -2, 51 FE", - " -14, 51 F2", - " -128, 51 80", - " -129, 52 7F FF", - " -944, 52 50 FC", - " -32768, 52 00 80", - " -32769, 53 FF 7F FF", - " -8388608, 53 00 00 80", - " -8388609, 54 FF FF 7F FF", - " -72624976668147841, 58 7F BF DF EF F7 FB FD FE", - " -9223372036854775808, 58 00 00 00 00 00 00 00 80", // Long.MIN_VALUE - " -9223372036854775809, F5 13 FF FF FF FF FF FF FF 7F FF", - "-99999999999999999999999999999, F5 1B 01 00 00 60 35 E8 8D 92 51 F0 E1 BC FE", - }) - public void testWriteIntegerValueForBigInteger(BigInteger value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeIntValue); - } - - @Test - public void testWriteIntegerValueForNullBigInteger() { - int numBytes = IonEncoder_1_1.writeIntValue(buf, null); - Assertions.assertEquals("EB 01", byteArrayToHex(bytes())); - Assertions.assertEquals(2, numBytes); - } - - @ParameterizedTest - @CsvSource({ - " 0.0, 5A", - " 1.0, 5C 3F 80 00 00", - " 1.5, 5C 3F C0 00 00", - " 3.1415927, 5C 40 49 0F DB", - " 4.00537109375, 5C 40 80 2C 00", - " 423542.09375, 5C 48 CE CE C3", - " 3.40282347E+38, 5C 7F 7F FF FF", // Float.MAX_VALUE - " -1.0, 5C BF 80 00 00", - " -1.5, 5C BF C0 00 00", - " -3.1415927, 5C C0 49 0F DB", - " -4.00537109375, 5C C0 80 2C 00", - " -423542.09375, 5C C8 CE CE C3", - "-3.40282347E+38, 5C FF 7F FF FF", // Float.MIN_VALUE - " NaN, 5C 7F C0 00 00", - " Infinity, 5C 7F 80 00 00", - " -Infinity, 5C FF 80 00 00", - }) - public void testWriteFloatValue(float value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeFloat); - } - - @ParameterizedTest - @CsvSource({ - " 0.0, 5A", - " 1.0, 5C 3F 80 00 00", - " 1.5, 5C 3F C0 00 00", - " 3.141592653589793, 5D 40 09 21 FB 54 44 2D 18", - " 4.00537109375, 5C 40 80 2C 00", - " 4.11111111111, 5D 40 10 71 C7 1C 71 C2 39", - " 423542.09375, 5C 48 CE CE C3", - " 8236423542.09375, 5D 41 FE AE DD 97 61 80 00", - " 1.79769313486231570e+308, 5D 7F EF FF FF FF FF FF FF", // Double.MAX_VALUE - " -1.0, 5C BF 80 00 00", - " -1.5, 5C BF C0 00 00", - " -3.141592653589793, 5D C0 09 21 FB 54 44 2D 18", - " -4.00537109375, 5C C0 80 2C 00", - " -4.11111111111, 5D C0 10 71 C7 1C 71 C2 39", - " -423542.09375, 5C C8 CE CE C3", - " -8236423542.09375, 5D C1 FE AE DD 97 61 80 00", - "-1.79769313486231570e+308, 5D FF EF FF FF FF FF FF FF", // Double.MIN_VALUE - " NaN, 5C 7F C0 00 00", - " Infinity, 5C 7F 80 00 00", - " -Infinity, 5C FF 80 00 00", - }) - public void testWriteFloatValueForDouble(double value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeFloat); - } - - @ParameterizedTest - @CsvSource({ - " 0., 60", - " 0e1, 6F 03", - " 0e63, 6F 7F", - " 0e99, 6F 8E 01", - " 0.0, 6F FF", - " 0.00, 6F FD", - " 0.000, 6F FB", - " 0e-64, 6F 81", - " 0e-99, 6F 76 FE", - " -0., 61 01", - " -0e1, 62 01 01", - " -0e3, 62 01 03", - " -0e127, 62 01 7F", - " -0e199, 63 01 C7 00", - " -0e-1, 62 01 FF", - " -0e-2, 62 01 FE", - " -0e-3, 62 01 FD", - " -0e-127, 62 01 81", - " -0e-199, 63 01 39 FF", - " 0.01, 62 03 FE", - " 0.1, 62 03 FF", - " 1, 61 03", - " 1e1, 62 03 01", - " 1e2, 62 03 02", - " 1e127, 62 03 7F", - " 1e128, 63 03 80 00", - " 1e65536, 64 03 00 00 01", - " 2, 61 05", - " 7, 61 0F", - " 14, 61 1D", - " 1.0, 62 15 FF", - " 1.00, 63 92 01 FE", - " 1.27, 63 FE 01 FE", - " 3.142, 63 1A 31 FD", - " 3.14159, 64 7C 59 26 FB", - " 3.141593, 65 98 FD FE 02 FA", - " 3.141592653, 66 B0 C9 1C 68 17 F7", - " 3.14159265359, 67 E0 93 7D 56 49 12 F5", - " 3.1415926535897932, 69 80 4C 43 76 65 9E 9C 6F F0", - " 3.1415926535897932384626434, 6E 00 50 E0 DC F7 CC D6 08 48 99 92 3F 03 E7", - "3.141592653589793238462643383, F6 1F 00 E0 2D 8F A4 21 D0 E7 46 C0 87 AA 89 02 E5", - }) - public void testWriteDecimalValue(@ConvertWith(StringToDecimal.class) Decimal value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeDecimalValue); - } - - @Test - public void testWriteDecimalValueForNull() { - int numBytes = IonEncoder_1_1.writeDecimalValue(buf, null); - Assertions.assertEquals("EB 03", byteArrayToHex(bytes())); - Assertions.assertEquals(2, numBytes); - } - - // Because timestamp subfields are smeared across bytes, it's easier to reason about them in 1s and 0s - // instead of hex digits - @ParameterizedTest - @CsvSource({ - // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ssssUmmm ffffffss ffffffff ffffffff ffffffff - "2023-10-15T01:00Z, 01110011 00110101 01111101 00000001 00001000", - "2023-10-15T01:59Z, 01110011 00110101 01111101 01100001 00001111", - "2023-10-15T11:22Z, 01110011 00110101 01111101 11001011 00001010", - "2023-10-15T23:00Z, 01110011 00110101 01111101 00010111 00001000", - "2023-10-15T23:59Z, 01110011 00110101 01111101 01110111 00001111", - "2023-10-15T11:22:00Z, 01110100 00110101 01111101 11001011 00001010 00000000", - "2023-10-15T11:22:33Z, 01110100 00110101 01111101 11001011 00011010 00000010", - "2023-10-15T11:22:59Z, 01110100 00110101 01111101 11001011 10111010 00000011", - "2023-10-15T11:22:33.000Z, 01110101 00110101 01111101 11001011 00011010 00000010 00000000", - "2023-10-15T11:22:33.444Z, 01110101 00110101 01111101 11001011 00011010 11110010 00000110", - "2023-10-15T11:22:33.999Z, 01110101 00110101 01111101 11001011 00011010 10011110 00001111", - "2023-10-15T11:22:33.000000Z, 01110110 00110101 01111101 11001011 00011010 00000010 00000000 00000000", - "2023-10-15T11:22:33.444555Z, 01110110 00110101 01111101 11001011 00011010 00101110 00100010 00011011", - "2023-10-15T11:22:33.999999Z, 01110110 00110101 01111101 11001011 00011010 11111110 00001000 00111101", - "2023-10-15T11:22:33.000000000Z, 01110111 00110101 01111101 11001011 00011010 00000010 00000000 00000000 00000000", - "2023-10-15T11:22:33.444555666Z, 01110111 00110101 01111101 11001011 00011010 01001010 10000110 11111101 01101001", - "2023-10-15T11:22:33.999999999Z, 01110111 00110101 01111101 11001011 00011010 11111110 00100111 01101011 11101110", - }) - public void testWriteTimestampValueWithUtcShortForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { - assertWritingValueWithBinary(expectedBytes, value, IonEncoder_1_1::writeTimestampValue); - } - - - @ParameterizedTest - @CsvSource({ - // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ssssUmmm ffffffss ffffffff ffffffff ffffffff - "1970T, 01110000 00000000", - "2023T, 01110000 00110101", - "2097T, 01110000 01111111", - "2023-01T, 01110001 10110101 00000000", - "2023-10T, 01110001 00110101 00000101", - "2023-12T, 01110001 00110101 00000110", - "2023-10-01T, 01110010 00110101 00001101", - "2023-10-15T, 01110010 00110101 01111101", - "2023-10-31T, 01110010 00110101 11111101", - "2023-10-15T01:00-00:00, 01110011 00110101 01111101 00000001 00000000", - "2023-10-15T01:59-00:00, 01110011 00110101 01111101 01100001 00000111", - "2023-10-15T11:22-00:00, 01110011 00110101 01111101 11001011 00000010", - "2023-10-15T23:00-00:00, 01110011 00110101 01111101 00010111 00000000", - "2023-10-15T23:59-00:00, 01110011 00110101 01111101 01110111 00000111", - "2023-10-15T11:22:00-00:00, 01110100 00110101 01111101 11001011 00000010 00000000", - "2023-10-15T11:22:33-00:00, 01110100 00110101 01111101 11001011 00010010 00000010", - "2023-10-15T11:22:59-00:00, 01110100 00110101 01111101 11001011 10110010 00000011", - "2023-10-15T11:22:33.000-00:00, 01110101 00110101 01111101 11001011 00010010 00000010 00000000", - "2023-10-15T11:22:33.444-00:00, 01110101 00110101 01111101 11001011 00010010 11110010 00000110", - "2023-10-15T11:22:33.999-00:00, 01110101 00110101 01111101 11001011 00010010 10011110 00001111", - "2023-10-15T11:22:33.000000-00:00, 01110110 00110101 01111101 11001011 00010010 00000010 00000000 00000000", - "2023-10-15T11:22:33.444555-00:00, 01110110 00110101 01111101 11001011 00010010 00101110 00100010 00011011", - "2023-10-15T11:22:33.999999-00:00, 01110110 00110101 01111101 11001011 00010010 11111110 00001000 00111101", - "2023-10-15T11:22:33.000000000-00:00, 01110111 00110101 01111101 11001011 00010010 00000010 00000000 00000000 00000000", - "2023-10-15T11:22:33.444555666-00:00, 01110111 00110101 01111101 11001011 00010010 01001010 10000110 11111101 01101001", - "2023-10-15T11:22:33.999999999-00:00, 01110111 00110101 01111101 11001011 00010010 11111110 00100111 01101011 11101110", - }) - public void testWriteTimestampValueWithUnknownOffsetShortForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { - assertWritingValueWithBinary(expectedBytes, value, IonEncoder_1_1::writeTimestampValue); - } - - @ParameterizedTest - @CsvSource({ - // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ooooommm ssssssoo ffffffff ffffffff ffffffff ..ffffff - "2023-10-15T01:00-14:00, 01111000 00110101 01111101 00000001 00000000 00000000", - "2023-10-15T01:00+14:00, 01111000 00110101 01111101 00000001 10000000 00000011", - "2023-10-15T01:00-01:15, 01111000 00110101 01111101 00000001 10011000 00000001", - "2023-10-15T01:00+01:15, 01111000 00110101 01111101 00000001 11101000 00000001", - "2023-10-15T01:59+01:15, 01111000 00110101 01111101 01100001 11101111 00000001", - "2023-10-15T11:22+01:15, 01111000 00110101 01111101 11001011 11101010 00000001", - "2023-10-15T23:00+01:15, 01111000 00110101 01111101 00010111 11101000 00000001", - "2023-10-15T23:59+01:15, 01111000 00110101 01111101 01110111 11101111 00000001", - "2023-10-15T11:22:00+01:15, 01111001 00110101 01111101 11001011 11101010 00000001", - "2023-10-15T11:22:33+01:15, 01111001 00110101 01111101 11001011 11101010 10000101", - "2023-10-15T11:22:59+01:15, 01111001 00110101 01111101 11001011 11101010 11101101", - "2023-10-15T11:22:33.000+01:15, 01111010 00110101 01111101 11001011 11101010 10000101 00000000 00000000", - "2023-10-15T11:22:33.444+01:15, 01111010 00110101 01111101 11001011 11101010 10000101 10111100 00000001", - "2023-10-15T11:22:33.999+01:15, 01111010 00110101 01111101 11001011 11101010 10000101 11100111 00000011", - "2023-10-15T11:22:33.000000+01:15, 01111011 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000", - "2023-10-15T11:22:33.444555+01:15, 01111011 00110101 01111101 11001011 11101010 10000101 10001011 11001000 00000110", - "2023-10-15T11:22:33.999999+01:15, 01111011 00110101 01111101 11001011 11101010 10000101 00111111 01000010 00001111", - "2023-10-15T11:22:33.000000000+01:15, 01111100 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000 00000000", - "2023-10-15T11:22:33.444555666+01:15, 01111100 00110101 01111101 11001011 11101010 10000101 10010010 01100001 01111111 00011010", - "2023-10-15T11:22:33.999999999+01:15, 01111100 00110101 01111101 11001011 11101010 10000101 11111111 11001001 10011010 00111011", - - }) - public void testWriteTimestampValueWithKnownOffsetShortForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { - assertWritingValueWithBinary(expectedBytes, value, IonEncoder_1_1::writeTimestampValue); - } - - @ParameterizedTest - @CsvSource({ - // OpCode Length YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Coefficient+ Scale - "0001T, 11110111 00000101 00000001 00000000", - "1947T, 11110111 00000101 10011011 00000111", - "9999T, 11110111 00000101 00001111 00100111", - "1947-01T, 11110111 00000111 10011011 01000111 00000000", - "1947-12T, 11110111 00000111 10011011 00000111 00000011", - "1947-01-01T, 11110111 00000111 10011011 01000111 00000100", - "1947-12-23T, 11110111 00000111 10011011 00000111 01011111", - "1947-12-31T, 11110111 00000111 10011011 00000111 01111111", - "1947-12-23T00:00Z, 11110111 00001101 10011011 00000111 01011111 00000000 10000000 00010110", - "1947-12-23T23:59Z, 11110111 00001101 10011011 00000111 11011111 10111011 10000011 00010110", - "1947-12-23T23:59:00Z, 11110111 00001111 10011011 00000111 11011111 10111011 10000011 00010110 00000000", - "1947-12-23T23:59:59Z, 11110111 00001111 10011011 00000111 11011111 10111011 10000011 11010110 00001110", - "1947-12-23T23:59:00.0Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000001", - "1947-12-23T23:59:00.00Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000010", - "1947-12-23T23:59:00.000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000011", - "1947-12-23T23:59:00.0000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000100", - "1947-12-23T23:59:00.00000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000101", - "1947-12-23T23:59:00.000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000110", - "1947-12-23T23:59:00.0000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000111", - "1947-12-23T23:59:00.00000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00001000", - "1947-12-23T23:59:00.9Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010011 00000001", - "1947-12-23T23:59:00.99Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11000111 00000010", - "1947-12-23T23:59:00.999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 10011110 00001111 00000011", - "1947-12-23T23:59:00.9999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00111110 10011100 00000100", - "1947-12-23T23:59:00.99999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00110100 00001100 00000101", - "1947-12-23T23:59:00.999999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00010001 01111010 00000110", - "1947-12-23T23:59:00.9999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 01100111 10001001 00001001 00000111", - "1947-12-23T23:59:00.99999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 00001111 01011110 01011111 00001000", - - "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + - "11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 10001101", - - "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + - "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + - "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + - "11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 01101000 00000001", - - "1947-12-23T23:59:00.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999Z, " + - "11110111 10010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 " + - "11111100 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 " + - "11111111 10010100 10001001 01111001 01101100 11001110 01111000 11110010 01000000 01111101 10100110 11000111 10101000 01000110 01011001 01110001 01001101 " + - "00100000 11110101 01101110 01111010 00001100 00001001 11101111 01111111 11110011 00011110 00010100 11010111 01101000 01110111 10101100 01101100 10001110 " + - "00110010 10110111 10000010 11110010 00110110 01101000 11110010 10100111 10001101", - - - // Offsets - "2048-01-01T01:01-23:59, 11110111 00001101 00000000 01001000 10000100 00010000 00000100 00000000", - "2048-01-01T01:01-00:02, 11110111 00001101 00000000 01001000 10000100 00010000 01111000 00010110", - "2048-01-01T01:01-00:01, 11110111 00001101 00000000 01001000 10000100 00010000 01111100 00010110", - "2048-01-01T01:01-00:00, 11110111 00001101 00000000 01001000 10000100 00010000 11111100 00111111", - "2048-01-01T01:01+00:00, 11110111 00001101 00000000 01001000 10000100 00010000 10000000 00010110", - "2048-01-01T01:01+00:01, 11110111 00001101 00000000 01001000 10000100 00010000 10000100 00010110", - "2048-01-01T01:01+00:02, 11110111 00001101 00000000 01001000 10000100 00010000 10001000 00010110", - "2048-01-01T01:01+23:59, 11110111 00001101 00000000 01001000 10000100 00010000 11111100 00101100", - }) - public void testWriteTimestampValueLongForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { - assertWritingValueWithBinary(expectedBytes, value, IonEncoder_1_1::writeLongFormTimestampValue); - } - - @ParameterizedTest - @CsvSource({ - // Long form because it's out of the year range - "0001T, 11110111 00000101 00000001 00000000", - "9999T, 11110111 00000101 00001111 00100111", - - // Long form because the offset is too high/low - "2048-01-01T01:01+14:15, 11110111 00001101 00000000 01001000 10000100 00010000 11011100 00100011", - "2048-01-01T01:01-14:15, 11110111 00001101 00000000 01001000 10000100 00010000 00100100 00001001", - - // Long form because the offset is not a multiple of 15 - "2048-01-01T01:01+00:01, 11110111 00001101 00000000 01001000 10000100 00010000 10000100 00010110", - - // Long form because the fractional seconds are millis, micros, or nanos - "2023-12-31T23:59:00.0Z, 11110111 00010011 11100111 00000111 11111111 10111011 10000011 00010110 00000000 00000001 00000001", - }) - public void testWriteTimestampDelegatesCorrectlyToLongForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { - assertWritingValueWithBinary(expectedBytes, value, IonEncoder_1_1::writeTimestampValue); - } - - @Test - public void testWriteTimestampValueForNullTimestamp() { - int numBytes = IonEncoder_1_1.writeTimestampValue(buf, null); - Assertions.assertEquals("EB 04", byteArrayToHex(bytes())); - Assertions.assertEquals(2, numBytes); - } - - @ParameterizedTest - @CsvSource({ - "'', 80", - "'a', 81 61", - "'ab', 82 61 62", - "'abc', 83 61 62 63", - "'fourteen bytes', 8E 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73", - "'this has sixteen', F8 21 74 68 69 73 20 68 61 73 20 73 69 78 74 65 65 6E", - "'variable length encoding', F8 31 76 61 72 69 61 62 6C 65 20 6C 65 6E 67 74 68 20 65 6E 63 6F 64 69 6E 67", - }) - public void testWriteStringValue(String value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeStringValue); - } - - @Test - public void testWriteStringValueForNull() { - int numBytes = IonEncoder_1_1.writeStringValue(buf, null); - Assertions.assertEquals("EB 05", byteArrayToHex(bytes())); - Assertions.assertEquals(2, numBytes); - } - - @ParameterizedTest - @CsvSource({ - "'', 90", - "'a', 91 61", - "'ab', 92 61 62", - "'abc', 93 61 62 63", - "'fourteen bytes', 9E 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73", - "'this has sixteen', F9 21 74 68 69 73 20 68 61 73 20 73 69 78 74 65 65 6E", - "'variable length encoding', F9 31 76 61 72 69 61 62 6C 65 20 6C 65 6E 67 74 68 20 65 6E 63 6F 64 69 6E 67", - }) - public void testWriteSymbolValue(String value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeSymbolValue); - } - - @ParameterizedTest - @CsvSource({ - "0, E1 00", - "1, E1 01", - "255, E1 FF", - "256, E2 00 00", - "257, E2 01 00", - "512, E2 00 01", - "513, E2 01 01", - "65535, E2 FF FE", - "65791, E2 FF FF", - "65792, E3 01", - "65793, E3 03", - "65919, E3 FF", - "65920, E3 02 02", - "9223372036854775807, E3 00 FF FD FD FF FF FF FF FF" - }) - public void testWriteSymbolValue(long value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeSymbolValue); - } - - @Test - public void testWriteSymbolValueForNull() { - int numBytes = IonEncoder_1_1.writeSymbolValue(buf, null); - Assertions.assertEquals("EB 06", byteArrayToHex(bytes())); - Assertions.assertEquals(2, numBytes); - } - - @ParameterizedTest - @CsvSource({ - "'', FE 01", // - "20, FE 03 20", - "49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79, " + - "FE 31 49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79" - }) - public void testWriteBlobValue(@ConvertWith(HexStringToByteArray.class) byte[] value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeBlobValue); - } - - @Test - public void testWriteBlobValueForNull() { - int numBytes = IonEncoder_1_1.writeBlobValue(buf, null); - Assertions.assertEquals("EB 07", byteArrayToHex(bytes())); - Assertions.assertEquals(2, numBytes); - } - - @ParameterizedTest - @CsvSource({ - "'', FF 01", - "20, FF 03 20", - "49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79, " + - "FF 31 49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79" - }) - public void testWriteClobValue(@ConvertWith(HexStringToByteArray.class) byte[] value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeClobValue); - } - - @Test - public void testWriteClobValueForNull() { - int numBytes = IonEncoder_1_1.writeClobValue(buf, null); - Assertions.assertEquals("EB 08", byteArrayToHex(bytes())); - Assertions.assertEquals(2, numBytes); - } - - @ParameterizedTest - @CsvSource({ - " '', ''", // Empty array of annotations - " $0, E4 01", - " $10, E4 15", - " $256, E4 02 04", - " $10 $11, E5 15 17", - " $256 $257, E5 02 04 06 04", - " $10 $11 $12, E6 07 15 17 19", - "$256 $257 $258, E6 0D 02 04 06 04 0A 04", - }) - public void testWriteAnnotations(@ConvertWith(SymbolIdsToLongArray.class) long[] value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeAnnotations); - } - - @Test - public void testWriteAnnotationsForNull() { - int numBytes = IonEncoder_1_1.writeAnnotations(buf, null); - Assertions.assertEquals("", byteArrayToHex(bytes())); - Assertions.assertEquals(0, numBytes); - } - - /** - * Utility method to make it easier to write test cases that assert specific sequences of bytes. - */ - private static String byteArrayToHex(byte[] bytes) { - StringBuilder sb = new StringBuilder(); - for (byte b : bytes) { - sb.append(String.format("%02X ", b)); - } - return sb.toString().trim(); - } - - /** - * Determines the number of bytes needed to represent a series of hexadecimal digits. - */ - private static int byteLengthFromHexString(String hexString) { - return (hexString.replaceAll("[^\\dA-F]", "").length()) / 2; - } - - /** - * Converts a byte array to a string of bits, such as "00110110 10001001". - * The purpose of this method is to make it easier to read and write test assertions. - */ - private static String byteArrayToBitString(byte[] bytes) { - StringBuilder s = new StringBuilder(); - for (byte aByte : bytes) { - for (int bit = 7; bit >= 0; bit--) { - if (((0x01 << bit) & aByte) != 0) { - s.append("1"); - } else { - s.append("0"); - } - } - s.append(" "); - } - return s.toString().trim(); - } - - /** - * Determines the number of bytes needed to represent a series of hexadecimal digits. - */ - private static int byteLengthFromBitString(String bitString) { - return (bitString.replaceAll("[^01]", "").length()) / 8; - } - - /** - * Converts a String to a Timestamp for a @Parameterized test - */ - static class StringToTimestamp extends TypedArgumentConverter { - protected StringToTimestamp() { - super(String.class, Timestamp.class); - } - - @Override - protected Timestamp convert(String source) throws ArgumentConversionException { - if (source == null) return null; - return Timestamp.valueOf(source); - } - } - - /** - * Converts a String to a Decimal for a @Parameterized test - */ - static class StringToDecimal extends TypedArgumentConverter { - protected StringToDecimal() { - super(String.class, Decimal.class); - } - - @Override - protected Decimal convert(String source) throws ArgumentConversionException { - if (source == null) return null; - return Decimal.valueOf(source); - } - } - - /** - * Converts a Hex String to a Byte Array for a @Parameterized test - */ - static class HexStringToByteArray extends TypedArgumentConverter { - - private static final CharsetEncoder ASCII_ENCODER = StandardCharsets.US_ASCII.newEncoder(); - - protected HexStringToByteArray() { - super(String.class, byte[].class); - } - - @Override - protected byte[] convert(String source) throws ArgumentConversionException { - if (source == null) return null; - if (source.trim().isEmpty()) return new byte[0]; - String[] octets = source.split(" "); - byte[] result = new byte[octets.length]; - for (int i = 0; i < octets.length; i++) { - if (octets[i].length() == 1) { - char c = octets[i].charAt(0); - if (!ASCII_ENCODER.canEncode(c)) { - throw new IllegalArgumentException("Cannot convert non-ascii character: " + c); - } - result[i] = (byte) c; - } else { - result[i] = (byte) Integer.parseInt(octets[i], 16); - } - } - return result; - } - } - - /** - * Converts a String of symbol ids to a long[] for a @Parameterized test - */ - static class SymbolIdsToLongArray extends TypedArgumentConverter { - protected SymbolIdsToLongArray() { - super(String.class, long[].class); - } - - @Override - protected long[] convert(String source) throws ArgumentConversionException { - if (source == null) return null; - int size = (int) source.chars().filter(i -> i == '$').count(); - String[] sids = source.split("\\$"); - long[] result = new long[size]; - int i = 0; - for (String sid : sids) { - if (sid.isEmpty()) continue; - result[i] = Long.parseLong(sid.trim()); - i++; - } - return result; - } - } -} diff --git a/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java b/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java index bfa1e196d2..f79bc05fc6 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java +++ b/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; import com.amazon.ion.IonDatagram; @@ -282,10 +269,14 @@ public void testAutoFlush_67K() throws Exception{ @Test public void testAutoFlush_twiceBlockSize() throws IOException { - IonReader reader = system().newReader(singleTopLevelValue_13B.toByteArray()); + ByteArrayOutputStream expected = new ByteArrayOutputStream(); + IonWriter writer = IonBinaryWriterBuilder.standard().build(expected); + writer.writeString("abcdefghijklmnopqrstuvwxyz"); // Write a 28-byte (typeId + length + content) IonString. + writer.close(); + IonReader reader = system().newReader(expected.toByteArray()); ByteArrayOutputStream actual = new ByteArrayOutputStream(); - // Set the actual writer block size as 5 bytes. The test data is a 13-byte IonString "taco_burrito". - IonBinaryWriterBuilder builder = IonBinaryWriterBuilder.standard().withAutoFlushEnabled(autoFlushMode.isEnabled()).withBlockSize(5); + // Set the actual writer block size as 10 bytes. The test data is a 28-byte IonString "abcdefghijklmnopqrstuvwxyz". + IonBinaryWriterBuilder builder = IonBinaryWriterBuilder.standard().withAutoFlushEnabled(autoFlushMode.isEnabled()).withBlockSize(10); IonWriter actualWriter = builder.build(actual); while (reader.next() != null) { actualWriter.writeValue(reader); @@ -293,9 +284,9 @@ public void testAutoFlush_twiceBlockSize() throws IOException { actualWriter.close(); if (lstAppendMode.isEnabled() && autoFlushMode.isEnabled()) { // When auto-flush is enabled, no flush is expected since this is a single top-level value and should continue encoding until this value is completed. - assertArrayEquals(actual.toByteArray(), singleTopLevelValue_13B.toByteArray()); + assertArrayEquals(expected.toByteArray(), actual.toByteArray()); } - assertEquivalentDataModel(actual, singleTopLevelValue_13B); + assertEquivalentDataModel(actual, expected); } @Test diff --git a/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt b/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt new file mode 100644 index 0000000000..95f99c8069 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt @@ -0,0 +1,1537 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.Decimal +import com.amazon.ion.IonException +import com.amazon.ion.IonType +import com.amazon.ion.TextToBinaryUtils.cleanCommentedHexBytes +import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.bin11.OpCode +import com.amazon.ion.ion_1_1.IonRawWriter_1_1 +import com.amazon.ion.ion_1_1.TaglessScalarType +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.ValueSource +import java.io.ByteArrayOutputStream +import java.math.BigDecimal +import java.math.BigInteger + +class IonRawBinaryWriterTest_1_1 { + + private fun ionWriter( + baos: ByteArrayOutputStream = ByteArrayOutputStream() + ) = IonRawBinaryWriter_1_1( + out = baos, + buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(32)) {}, + lengthPrefixPreallocation = 1, + ) + + private inline fun writeAsHexString(autoClose: Boolean = true, block: IonRawBinaryWriter_1_1.() -> Unit): String { + val baos = ByteArrayOutputStream() + val rawWriter = ionWriter(baos) + block.invoke(rawWriter) + if (autoClose) rawWriter.close() + @OptIn(ExperimentalStdlibApi::class) + return baos.toByteArray().joinToString(" ") { it.toHexString(HexFormat.UpperCase) } + } + + /** + * @param hexBytes a string containing white-space delimited pairs of hex digits representing the expected output. + * The string may contain multiple lines. Anything after a `|` character on a line is ignored, so + * you can use `|` to add comments. + */ + @OptIn(ExperimentalStdlibApi::class) + private inline fun assertWriterOutputEquals(hexBytes: String, autoClose: Boolean = true, block: IonRawBinaryWriter_1_1.() -> Unit) { + val cleanedHexBytes = hexBytes.cleanCommentedHexBytes() + assertEquals(hexBytes.cleanCommentedHexBytes(), writeAsHexString(autoClose, block)) + + // Also check to see that the correct number of bytes are being reported to an enclosing container + val expectedLength = if (cleanedHexBytes.isBlank()) 0 else cleanedHexBytes.split(' ').size + val actualByteString = writeAsHexString(autoClose) { + try { + stepInList(usingLengthPrefix = true) + block() + stepOut() + } catch (t: Throwable) { + // It's illegal to wrap `block()` in a list, so we'll just skip this check. + return + } + } + if (expectedLength > 0xF) { + // Rather than try to parse the flexuint in the output, we'll just compare them as flexuint hex strings + // If this fails, it could be confusing. It's possible that if the length is underreported as being less + // than 16, then the "actualLengthBytes" could be an empty string. + val flexUIntLen = PrimitiveEncoder.flexUIntLength(expectedLength.toLong()) + val flexUIntBytes = ByteArray(flexUIntLen) + PrimitiveEncoder.writeFlexIntOrUIntInto(flexUIntBytes, 0, expectedLength.toLong(), flexUIntLen) + val byteString = flexUIntBytes.joinToString(" ") { it.toHexString(HexFormat.UpperCase) } + val actualLengthBytes = actualByteString.drop(3).dropLast(expectedLength * 3) + assertEquals(byteString, actualLengthBytes) + } else { + // Take the length from the opcode and compare with the length we calculated + val actualLen = "${actualByteString[1]}".toInt(radix = 0x10) // Fun fact! Every radix is 10 unless you write it in another base. + assertEquals(expectedLength, actualLen) + } + } + + private inline fun assertWriterThrows(block: IonRawBinaryWriter_1_1.() -> Unit) { + val baos = ByteArrayOutputStream() + val rawWriter = IonRawBinaryWriter_1_1( + out = baos, + buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(32)) {}, + lengthPrefixPreallocation = 1, + ) + assertThrows { + block.invoke(rawWriter) + } + } + + @Test + fun `calling close while in a container should throw IonException`() { + assertWriterThrows { + stepInList(usingLengthPrefix = true) + close() + } + } + + @Test + fun `calling finish while in a container should throw IonException`() { + assertWriterThrows { + stepInList(usingLengthPrefix = false) + flush() + } + } + + @Test + fun `calling stepOut while not in a container should throw IonException`() { + assertWriterThrows { + stepOut() + } + } + + @Test + fun `calling writeIVM when in a container should throw IonException`() { + assertWriterThrows { + stepInList(usingLengthPrefix = true) + writeIVM() + } + } + + @Test + fun `calling finish should cause the buffered data to be written to the output stream`() { + val actual = writeAsHexString(autoClose = false) { + writeIVM() + flush() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `after calling finish, it should still be possible to write more data`() { + val actual = writeAsHexString { + flush() + writeIVM() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `calling close should cause the buffered data to be written to the output stream`() { + val actual = writeAsHexString(autoClose = false) { + writeIVM() + close() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `calling close or finish multiple times should not throw any exceptions`() { + val actual = writeAsHexString { + writeIVM() + flush() + close() + flush() + close() + flush() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `write the IVM`() { + assertWriterOutputEquals("E0 01 01 EA") { + writeIVM() + } + } + + @Test + fun `write nothing`() { + assertWriterOutputEquals("") { + } + } + + @Test + fun `write a null`() { + assertWriterOutputEquals("8E 8E") { + writeNull() + writeNull(IonType.NULL) + } + } + + @ParameterizedTest + @CsvSource( + "BOOL, 8F 01", + "INT, 8F 02", + "FLOAT, 8F 03", + "DECIMAL, 8F 04", + "TIMESTAMP, 8F 05", + "SYMBOL, 8F 06", + "STRING, 8F 07", + "CLOB, 8F 08", + "BLOB, 8F 09", + "LIST, 8F 0A", + "SEXP, 8F 0B", + "STRUCT, 8F 0C", + ) + fun `write a null with a specific type`(ionType: IonType, expectedBytes: String) { + assertWriterOutputEquals(expectedBytes) { writeNull(ionType) } + } + + @ParameterizedTest + @CsvSource("true, 6E", "false, 6F") + fun `write a boolean`(value: Boolean, hexBytes: String) { + assertWriterOutputEquals(hexBytes) { + writeBool(value) + } + } + + @Test + fun `write a delimited list`() { + assertWriterOutputEquals("F0 6E 6F EF") { + stepInList(usingLengthPrefix = false) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a prefixed list`() { + assertWriterOutputEquals("B2 6E 6F") { + stepInList(usingLengthPrefix = true) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a variable-length prefixed list`() { + assertWriterOutputEquals("FA 21 ${" 6E".repeat(16)}") { + stepInList(usingLengthPrefix = true) + repeat(16) { writeBool(true) } + stepOut() + flush() + } + } + + @Test + fun `write a prefixed list that is so long it requires patch points`() { + assertWriterOutputEquals("FA 02 02 ${" 6E".repeat(128)}") { + stepInList(usingLengthPrefix = true) + repeat(128) { writeBool(true) } + stepOut() + } + } + + @Test + fun `write multiple nested prefixed lists`() { + assertWriterOutputEquals("B4 B3 B2 B1 B0") { + repeat(5) { stepInList(usingLengthPrefix = true) } + repeat(5) { stepOut() } + } + } + + @Test + fun `write multiple nested delimited lists`() { + assertWriterOutputEquals("F0 F0 F0 B0 EF EF EF") { + repeat(4) { stepInList(usingLengthPrefix = false) } + repeat(4) { stepOut() } + } + } + + @Test + fun `write multiple nested delimited and prefixed lists`() { + assertWriterOutputEquals("F0 B9 F0 B6 F0 B3 F0 B0 EF EF EF EF") { + repeat(4) { + stepInList(usingLengthPrefix = false) + stepInList(usingLengthPrefix = true) + } + repeat(8) { stepOut() } + } + } + + @Test + fun `write a delimited sexp`() { + assertWriterOutputEquals("F1 6E 6F EF") { + stepInSExp(usingLengthPrefix = false) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a prefixed sexp`() { + assertWriterOutputEquals("C2 6E 6F") { + stepInSExp(usingLengthPrefix = true) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a variable-length prefixed sexp`() { + assertWriterOutputEquals("FB 21 ${" 6E".repeat(16)}") { + stepInSExp(usingLengthPrefix = true) + repeat(16) { writeBool(true) } + stepOut() + flush() + } + } + + @Test + fun `write a prefixed sexp that is so long it requires patch points`() { + assertWriterOutputEquals("FB 02 02 ${" 6E".repeat(128)}") { + stepInSExp(usingLengthPrefix = true) + repeat(128) { writeBool(true) } + stepOut() + } + } + + @Test + fun `write multiple nested prefixed sexps`() { + assertWriterOutputEquals("C4 C3 C2 C1 C0") { + repeat(5) { stepInSExp(usingLengthPrefix = true) } + repeat(5) { stepOut() } + } + } + + @Test + fun `write multiple nested delimited sexps`() { + assertWriterOutputEquals("F1 F1 F1 C0 EF EF EF") { + repeat(4) { stepInSExp(usingLengthPrefix = false) } + repeat(4) { stepOut() } + } + } + + @Test + fun `write multiple nested delimited and prefixed sexps`() { + assertWriterOutputEquals("F1 C9 F1 C6 F1 C3 F1 C0 EF EF EF EF") { + repeat(4) { + stepInSExp(usingLengthPrefix = false) + stepInSExp(usingLengthPrefix = true) + } + repeat(8) { stepOut() } + } + } + + @Test + fun `write a prefixed struct`() { + assertWriterOutputEquals( + """ + D4 | Struct Length = 4 + 17 | SID 11 + 6E | true + 19 | SID 12 + 6F | false + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(11) + writeBool(true) + writeFieldName(12) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a variable length prefixed struct`() { + assertWriterOutputEquals( + """ + FC | Variable Length SID Struct + 21 | Length = 16 + ${"17 6E ".repeat(8)} + """ + ) { + stepInStruct(usingLengthPrefix = true) + repeat(8) { + writeFieldName(11) + writeBool(true) + } + stepOut() + } + } + + @Test + fun `write a struct so long it requires patch points`() { + assertWriterOutputEquals( + """ + FC | Variable Length SID Struct + 02 02 | Length = 128 + ${"17 6E ".repeat(64)} + """ + ) { + stepInStruct(usingLengthPrefix = true) + repeat(64) { + writeFieldName(11) + writeBool(true) + } + stepOut() + } + } + + @Test + fun `write multiple nested prefixed structs`() { + assertWriterOutputEquals( + """ + D8 | Struct Length = 8 + 17 | SID 11 + D6 | Struct Length = 6 + 17 | SID 11 + D4 | Struct Length = 4 + 17 | SID 11 + D2 | Struct Length = 2 + 17 | SID 11 + D0 | Struct Length = 0 + """ + ) { + stepInStruct(usingLengthPrefix = true) + repeat(4) { + writeFieldName(11) + stepInStruct(usingLengthPrefix = true) + } + repeat(5) { + stepOut() + } + } + } + + @Test + fun `write multiple nested delimited structs`() { + assertWriterOutputEquals( + """ + F2 | Begin delimited struct + 17 | FlexSym SID 11 + F2 | Begin delimited struct + 17 F2 17 F2 17 | etc. + D0 | empty struct + 01 EF | End delimited struct + 01 EF 01 EF 01 EF | etc. + """ + ) { + stepInStruct(usingLengthPrefix = false) + repeat(4) { + writeFieldName(11) + stepInStruct(usingLengthPrefix = false) + } + repeat(5) { + stepOut() + } + } + } + + @Test + fun `write delimited struct`() { + assertWriterOutputEquals( + """ + F2 | Begin delimited struct + 17 | SID 11 + 6E | true + 01 EE | switch to flex sym mode + F9 66 6F 6F | FlexSym 'foo' + 6E | true + 02 01 | FlexSym SID 64 + 6E | true + 01 EF | End delimited struct + """ + ) { + stepInStruct(usingLengthPrefix = false) + writeFieldName(11) + writeBool(true) + writeFieldName("foo") + writeBool(true) + writeFieldName(64) + writeBool(true) + stepOut() + } + } + + @Test + fun `write empty struct`() { + assertWriterOutputEquals("D0 D0") { + stepInStruct(usingLengthPrefix = false) + stepOut() + stepInStruct(usingLengthPrefix = true) + stepOut() + } + } + + @Test + fun `write prefixed struct with a single flex sym field`() { + assertWriterOutputEquals( + """ + D7 | Variable length Struct, L=7 + 01 EE | switch to FlexSym encoding + F9 66 6F 6F | FlexSym 'foo' + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName("foo") + writeBool(true) + stepOut() + } + } + + @Test + fun `write prefixed struct with multiple fields and flex syms`() { + assertWriterOutputEquals( + """ + FC | Variable length Struct + 23 | Length = 16 + 01 EE | switch to FlexSym encoding + F9 66 6F 6F | FlexSym 'foo' + 6E | true + F9 62 61 72 | FlexSym 'bar' + 6E | true + F9 62 61 7A | FlexSym 'baz' + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName("foo") + writeBool(true) + writeFieldName("bar") + writeBool(true) + writeFieldName("baz") + writeBool(true) + stepOut() + } + } + + @Test + fun `write prefixed struct that starts with sids and switches partway through to use flex syms`() { + assertWriterOutputEquals( + """ + DC | Struct, Length = 11 + 81 | SID 64 + 6E | true + 01 EE | switch to FlexSym encoding + F9 66 6F 6F | FlexSym 'foo' + 6E | true + 02 01 | FlexSym SID 64 + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(64) + writeBool(true) + writeFieldName("foo") + writeBool(true) + writeFieldName(64) + writeBool(true) + stepOut() + } + } + + @Test + fun `write prefixed struct with sid 0`() { + assertWriterOutputEquals( + """ + D2 | Variable length Struct + 01 | FlexSym SID 0 + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(0) + writeBool(true) + stepOut() + } + } + + @Test + fun `write prefixed struct with sid 0 after another value`() { + assertWriterOutputEquals( + """ + D8 | Variable length struct (length=8) + 03 | SID 1 + 6E | true + 01 | SID 0 + 6E | true + 05 | SID 2 + 6E | true + 01 | SID 0 + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(1) + writeBool(true) + writeFieldName(0) + writeBool(true) + writeFieldName(2) + writeBool(true) + writeFieldName(0) + writeBool(true) + stepOut() + } + } + + @Test + fun `calling writeFieldName outside of a struct should throw an exception`() { + assertWriterThrows { writeFieldName(12) } + assertWriterThrows { writeFieldName("foo") } + } + + @Test + fun `writeAnnotations with empty int array should write no annotations`() { + assertWriterOutputEquals("6E") { + writeAnnotations(intArrayOf()) + writeBool(true) + } + } + + @Test + fun `write one sid annotation`() { + val expectedBytes = "58 07 6E" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeAnnotations(intArrayOf()) + writeAnnotations(arrayOf()) + writeBool(true) + } + } + + @Test + fun `write multiple sid annotations`() { + val expectedBytes = "58 07 58 09 58 02 04 6E" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeAnnotations(4) + writeAnnotations(256) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(intArrayOf(3, 4)) + writeAnnotations(256) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(intArrayOf(3, 4, 256)) + writeBool(true) + } + } + + @Test + fun `write sid 0 annotation`() { + assertWriterOutputEquals("58 01 6E") { + writeAnnotations(0) + writeBool(true) + } + } + + @Test + fun `write one inline annotation`() { + val expectedBytes = "59 07 66 6F 6F 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeBool(false) + } + } + + @Test + fun `write two inline annotations`() { + val expectedBytes = "59 07 66 6F 6F 59 07 62 61 72 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations("bar") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(arrayOf("foo", "bar")) + writeBool(false) + } + } + + @Test + fun `write three inline annotations`() { + val expectedBytes = "59 07 66 6F 6F 59 07 62 61 72 59 07 62 61 7A 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations("bar") + writeAnnotations("baz") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(arrayOf("foo", "bar")) + writeAnnotations("baz") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(arrayOf("foo", "bar", "baz")) + writeBool(false) + } + } + + @Test + fun `write two mixed sid and inline annotations`() { + val expectedBytes = "58 15 59 07 66 6F 6F 6E" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(10) + writeAnnotations("foo") + writeBool(true) + } + } + + @Test + fun `write annotations that are long enough to need a patch point`() { + val opCode = "59" + val length = "3E 02" + val text = "41 6D 61 7A 6F 6E 20 49 6F 6E 20 69 73 20 61 20 72 69 63 68 6C 79 2D 74 79 70 65 64 2C 20 73 65 " + + "6C 66 2D 64 65 73 63 72 69 62 69 6E 67 2C 20 68 69 65 72 61 72 63 68 69 63 61 6C 20 64 61 74 61 20 " + + "73 65 72 69 61 6C 69 7A 61 74 69 6F 6E 20 66 6F 72 6D 61 74 20 6F 66 66 65 72 69 6E 67 20 69 6E 74 " + + "65 72 63 68 61 6E 67 65 61 62 6C 65 20 62 69 6E 61 72 79 20 61 6E 64 20 74 65 78 74 20 72 65 70 72 " + + "65 73 65 6E 74 61 74 69 6F 6E 73 2E" + val falseOpCode = "6F" + assertWriterOutputEquals("$opCode $length $text $falseOpCode") { + writeAnnotations( + "Amazon Ion is a richly-typed, self-describing, hierarchical data serialization " + + "format offering interchangeable binary and text representations." + ) + writeBool(false) + } + } + + @Test + fun `write int`() { + assertWriterOutputEquals( + """ + 61 01 + 61 0A + 62 C7 CF + F5 13 D2 0A 1F EB 8C A9 54 AB 00 + """ + ) { + writeInt(1) + writeInt(BigInteger.TEN) + writeInt(-12345) + writeInt(BigInteger("12345678901234567890")) + } + } + + @Test + fun `write float`() { + assertWriterOutputEquals( + """ + 6A + 6A + 6C C3 F5 48 40 + 6D 1F 85 EB 51 B8 1E 09 40 + """ + ) { + writeFloat(0.0f) + writeFloat(0.0) + writeFloat(3.14f) + writeFloat(3.14) + } + } + + @ParameterizedTest + @CsvSource( + " 0., 70", + " 0e1, 71 03", + " 0e63, 71 7F", + " 0e64, 72 02 01", + " 0e99, 72 8E 01", + " 0.0, 71 FF", + " 0.00, 71 FD", + " 0.000, 71 FB", + " 0e-64, 71 81", + " 0e-65, 72 FE FE", + " 0e-99, 72 76 FE", + " -0., 72 01 00", + " -0e1, 72 03 00", + " -0e3, 72 07 00", + " -0e63, 72 7F 00", + " -0e64, 73 02 01 00", + " -0e127, 73 FE 01 00", + " -0e199, 73 1E 03 00", + " -0e-1, 72 FF 00", + " -0e-2, 72 FD 00", + " -0e-3, 72 FB 00", + " -0e-64, 72 81 00", + " -0e-65, 73 FE FE 00", + " -0e-199, 73 E6 FC 00", + " 0.01, 72 FD 01", + " 0.1, 72 FF 01", + " 1, 72 01 01", + " 1e1, 72 03 01", + " 1e2, 72 05 01", + " 1e63, 72 7F 01", + " 1e64, 73 02 01 01", + " 1e65536, 74 04 00 08 01", + " 2, 72 01 02", + " 7, 72 01 07", + " 14, 72 01 0E", + " 1.0, 72 FF 0A", + " 1.00, 72 FD 64", + " 1.27, 72 FD 7F", + " 3.142, 73 FB 46 0C", + " 3.14159, 74 F7 2F CB 04", + " 3.141593, 74 F5 D9 EF 2F", + " 3.141592653, 76 EF 4D E6 40 BB 00", + " 3.14159265359, 76 EB 4F F6 59 25 49", + " 3.1415926535897932, 78 E1 4C 43 76 65 9E 9C 6F", + " 3.141592653589793238, 79 DD D6 49 32 A2 DF 2D 99 2B", + " 3.14159265358979323846, 7A D9 C6 D7 A4 5B 5B EB D5 07 11", + " 3.141592653589793238462, 7B D7 BE 6D 70 94 91 31 5B 4E AA 00", + " 3.141592653589793238462643, 7C D1 B3 B0 2C D7 AB A0 39 14 42 99 02", + " 3.14159265358979323846264343, 7D CD 17 06 75 0D 20 C3 82 E6 CF DD 03 01", + " 3.14159265358979323846264338328, 7E C7 98 B7 1F 91 34 35 CA 6E 1C 74 1A F7 03", + " 3.14159265358979323846264338327950, 7F C1 8E 29 E5 E3 56 D5 DF C5 10 8F 55 3F 7D 0F", + " 3.14159265358979323846264338327950288, F6 21 BB D0 53 2A 37 6A 5B 59 F2 84 D9 36 66 3F 81 3C", + " 3.1415926535897932384626433832795028841971, F6 25 B1 F3 E5 23 F6 6C F2 1C 99 4B 7C A8 71 57 5D B7 52 5C", + ) + fun `write decimal`(decimalValue: String, expectedBytes: String) { + assertWriterOutputEquals(expectedBytes) { + writeDecimal(Decimal.valueOf(decimalValue)) + } + } + + @Test + fun `write timestamp`() { + assertWriterOutputEquals( + """ + 87 35 46 AF 7C 55 47 70 2D + F7 05 4B 08 + """ + ) { + writeTimestamp(Timestamp.valueOf("2023-12-08T15:37:23.190583253Z")) + writeTimestamp(Timestamp.valueOf("2123T")) + } + } + + @Test + fun `write symbol`() { + assertWriterOutputEquals( + """ + 50 01 + 51 01 + 52 01 + 55 01 + 57 01 + 50 03 + 51 1E 18 + A3 66 6F 6F + """ + ) { + writeSymbol(0) + writeSymbol(1) + writeSymbol(2) + writeSymbol(5) + writeSymbol(7) + writeSymbol(8) + writeSymbol(12345) + writeSymbol("foo") + } + // Longer symbol text + assertWriterOutputEquals( + """ + F9 | Var-length Symbol text + 3F | Length = 31 + 69 6E 74 65 72 63 68 61 6E 67 65 61 + 62 6C 65 20 62 69 6E 61 72 79 20 61 + 6E 64 20 74 65 78 74 + """ + ) { + writeSymbol("interchangeable binary and text") + } + } + + @Test + fun `attempting to write a negative SID should throw exception`() { + assertWriterThrows { + writeSymbol(-1) + } + } + + @Test + fun `write string`() { + assertWriterOutputEquals("93 66 6F 6F") { + writeString("foo") + } + assertWriterOutputEquals( + """ + F8 | Var-length String + 3F | Length = 31 + 69 6E 74 65 72 63 68 61 6E 67 65 61 + 62 6C 65 20 62 69 6E 61 72 79 20 61 + 6E 64 20 74 65 78 74 + """ + ) { + writeString("interchangeable binary and text") + } + } + + @Test + fun `write blob`() { + assertWriterOutputEquals("FE 07 01 02 03") { + writeBlob(byteArrayOf(1, 2, 3), 0, 3) + } + } + + @Test + fun `write clob`() { + assertWriterOutputEquals("FF 07 04 05 06") { + writeClob(byteArrayOf(4, 5, 6), 0, 3) + } + } + + @ParameterizedTest + @CsvSource( + // one-byte macro address opcodes + " 0, 00", + " 1, 01", + " 64, 40", + " 71, 47", + // Extended macro addresses + " 72, 48 01", + " 73, 49 01", + " 79, 4F 01", + " 80, 48 03", + " 87, 4F 03", + " 88, 48 05", + " 319, 4F 3D", + " 320, 48 3F", + " 1095, 4F FF ", + " 1096, 48 02 02", + " 1211, 4B 3A 02", + " 4159, 4F FA 07", + " 4160, 48 FE 07", + " 4161, 49 FE 07", + " 69695, 4F FA 87", + " 69696, 48 FE 87", + " 131143, 4F FE FF", + " 131144, 48 04 00 02", + " 1052735, 4F F4 0F 10", + " 16777287, 4F FC FF FF", + " 16777288, 48 08 00 00 02", + "${Int.MAX_VALUE}, 4F 68 FF FF FF" + ) + fun `write an e-expression with no args`(id: Int, expectedBytes: String) { + assertWriterOutputEquals(expectedBytes) { + stepInEExp(id, usingLengthPrefix = false) + stepOut() + } + } + + @ParameterizedTest + @CsvSource( + // Macro Id; Op Address Length=0 + " 0, F4 01 01", + " 64, F4 81 01", + " 65, F4 83 01", + " 127, F4 FF 01", + " 128, F4 02 02 01", + " 729, F4 66 0B 01", + " 16383, F4 FE FF 01", + " 16384, F4 04 00 02 01", + " 1052736, F4 04 82 80 01", + " 2097151, F4 FC FF FF 01", + " 2097152, F4 08 00 00 02 01", + "${Int.MAX_VALUE}, F4 F0 FF FF FF 0F 01", + ) + fun `write a length-prefixed e-expression with no args`(id: Int, expectedBytes: String) { + // This test ensures that the macro address is written correctly + assertWriterOutputEquals(expectedBytes) { + stepInEExp(id, usingLengthPrefix = true) + stepOut() + } + } + + @Test + fun `write a length-prefixed e-expression with many args`() { + // This test ensures that the macro length is written correctly + assertWriterOutputEquals("F4 03 15 60 60 60 60 60 60 60 60 60 60") { + stepInEExp(1, usingLengthPrefix = true) + repeat(10) { writeInt(0L) } + stepOut() + } + } + + @Test + fun `write nested e-expressions`() { + // E-Expressions don't have length prefixes, so we're putting them inside lists + // so that we can check that the length gets propagated correctly to the parent + assertWriterOutputEquals( + """ + BA | List Length 10 + 1F | Macro 31 + B8 | List Length 8 + 40 | Macro 64 + B6 | List Length 6 + 4B 03 | Macro 83 + B3 | List Length 3 + 48 FE 07 | Macro 4160 + """ + ) { + stepInList(usingLengthPrefix = true) + stepInEExp(31, usingLengthPrefix = false) + stepInList(usingLengthPrefix = true) + stepInEExp(64, usingLengthPrefix = false) + stepInList(usingLengthPrefix = true) + stepInEExp(83, usingLengthPrefix = false) + stepInList(usingLengthPrefix = true) + stepInEExp(4160, usingLengthPrefix = false) + repeat(8) { stepOut() } + } + } + + @Test + fun `write an e-expression in the value position of a struct`() { + assertWriterOutputEquals( + """ + D2 | Struct length 2 + 03 | SID 1 + 01 | Macro 1 + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(1) + stepInEExp(1, usingLengthPrefix = false) + stepOut() + stepOut() + } + } + + @Test + fun `write an e-expression with an absent arg`() { + assertWriterOutputEquals("01 E0") { + stepInEExp(1, usingLengthPrefix = false) + writeAbsentArgument() + stepOut() + } + // This test ensures that the "absent arg" length is accounted correctly + assertWriterOutputEquals("F4 03 03 E0") { + stepInEExp(1, usingLengthPrefix = true) + writeAbsentArgument() + stepOut() + } + } + + @Test + fun `calling stepInEExp(String) should throw NotImplementedError`() { + assertThrows { + writeAsHexString { + stepInEExp("foo") + } + } + } + + @ParameterizedTest + @CsvSource( + // SID + " 0, 01", + " 4, 09", + " 246, DA 03", + // Text + " a, FD 61", + " abc, F9 61 62 63", + " '', FF", + ) + fun `write a tagless symbol`(value: String, expectedBytes: String) { + // If it's an int, write as SID, else write as text + val writeTheValue: IonRawBinaryWriter_1_1.() -> Unit = value.toIntOrNull() + ?.let { { writeTaglessSymbol(TaglessScalarType.SYMBOL.getOpcode(), it) } } + ?: { writeTaglessSymbol(TaglessScalarType.SYMBOL.getOpcode(), value) } + // Write the value as single expression + assertWriterOutputEquals("02 $expectedBytes") { + stepInEExp(0x02, usingLengthPrefix = false) + writeTheValue() + stepOut() + } + } + + @Test + fun `write a tagless symbol in a length-prefixed e-expression`() { + assertWriterOutputEquals( + """ + F4 | Length prefixed e-expression + 05 | Macro Address 2 + 0B | Length = 5 + 09 | FlexSym $4 + F9 61 62 63 | FlexSym 'abc' + """ + ) { + stepInEExp(0x02, usingLengthPrefix = true) + writeTaglessSymbol(TaglessScalarType.SYMBOL.getOpcode(), 4) + writeTaglessSymbol(TaglessScalarType.SYMBOL.getOpcode(), "abc") + stepOut() + } + } + + @ParameterizedTest + @ValueSource( + ints = [ + OpCode.DIRECTIVE_SET_SYMBOLS, + OpCode.DIRECTIVE_ADD_SYMBOLS, + OpCode.DIRECTIVE_SET_MACROS, + OpCode.DIRECTIVE_ADD_MACROS, + OpCode.DIRECTIVE_USE, + OpCode.DIRECTIVE_MODULE, + OpCode.DIRECTIVE_IMPORT, + OpCode.DIRECTIVE_ENCODING, + ] + ) + fun `write a directive`(directiveOpcode: Int) { + val dir = directiveOpcode.toString(radix = 16) + assertWriterOutputEquals("$dir 60 61 01 EF $dir EF") { + stepInDirective(directiveOpcode) + writeInt(0) + writeInt(1) + stepOut() + stepInDirective(directiveOpcode) + stepOut() + } + } + + @Test + fun `write a tagged placeholder`() { + assertWriterOutputEquals("E9") { + writeTaggedPlaceholder() + } + } + + @Test + fun `write a tagged placeholder with default value`() { + assertWriterOutputEquals("EA 60") { + writeTaggedPlaceholderWithDefault { it.writeInt(0) } + } + assertWriterOutputEquals("EA 58 01 60") { + writeTaggedPlaceholderWithDefault { + it.writeAnnotations(0) + it.writeInt(0) + } + } + } + + @ParameterizedTest + @CsvSource( + "INT, EB 60", + "INT_8, EB 61", + "INT_16, EB 62", + "INT_32, EB 64", + "INT_64, EB 68", + "UINT, EB E0", + "UINT_8, EB E1", + "UINT_16, EB E2", + "UINT_32, EB E4", + "UINT_64, EB E8", + "FLOAT_16, EB 6B", + "FLOAT_32, EB 6C", + "FLOAT_64, EB 6D", + "SMALL_DECIMAL, EB 70", + "TIMESTAMP_DAY, EB 82", + "TIMESTAMP_MIN, EB 83", + "TIMESTAMP_S, EB 84", + "TIMESTAMP_MS, EB 85", + "TIMESTAMP_US, EB 86", + "TIMESTAMP_NS, EB 87", + "SYMBOL, EB EE", + ) + fun `write a tagless placeholder`(type: TaglessScalarType, expectedBytes: String) { + assertWriterOutputEquals(expectedBytes) { writeTaglessPlaceholder(type.getOpcode()) } + } + + @ParameterizedTest + @CsvSource( + // Value, Type, Expected Bytes + " 1, INT_8, 01", + " 2, INT_16, 02 00", + " 3, INT_32, 03 00 00 00", + " 4, INT_64, 04 00 00 00 00 00 00 00", + " 5, INT, 0B", + " 50, INT, 65", + " 500, INT, D2 07", + " 5000, INT, 22 4E", + " 50000, INT, 84 1A 06", + " 6, UINT_8, 06", + " 7, UINT_16, 07 00", + " 8, UINT_32, 08 00 00 00", + " 9, UINT_64, 09 00 00 00 00 00 00 00", + " 10, UINT, 15", + " 100, UINT, C9", + " 1000, UINT, A2 0F", + " 10000, UINT, 42 9C", + " 100000, UINT, 04 35 0C", + ) + fun `write tagless integers`(value: BigInteger, encoding: TaglessScalarType, expectedBytes: String) { + // This writes the ints completely out of context so that we can test them apart from a macro or container. + assertWriterOutputEquals(expectedBytes) { writeTaglessInt(encoding.getOpcode(), value) } + assertWriterOutputEquals(expectedBytes) { writeTaglessInt(encoding.getOpcode(), value.toLong()) } + } + + @ParameterizedTest + @CsvSource( + // Value, Type, Expected Bytes + "2025-01-01T, TIMESTAMP_DAY, B7 08", + "2025-01-01T01:02Z, TIMESTAMP_MIN, B7 08 41 08", + "2025-01-01T01:02:03Z, TIMESTAMP_S, B7 08 41 38 00", + "2025-01-01T01:02:03.004Z, TIMESTAMP_MS, B7 08 41 38 10 00", + "2025-01-01T01:02:03.004005Z, TIMESTAMP_US, B7 08 41 38 94 3E 00", + "2025-01-01T01:02:03.004005006Z, TIMESTAMP_NS, B7 08 41 38 38 72 F4 00", + ) + fun `write tagless timestamps`(tsText: String, encoding: TaglessScalarType, expectedBytes: String) { + assertWriterOutputEquals(expectedBytes) { + writeTaglessTimestamp(encoding.getOpcode(), Timestamp.valueOf(tsText)) + } + } + + @ParameterizedTest + @CsvSource( + // TODO: Implement writing Float 16 + // "1.0, 00 3C", + // "2.0, 00 40", + "3.0, 00 00 40 40", + "4.0, 00 00 80 40", + "5.0, 00 00 00 00 00 00 14 40", + "6.0, 00 00 00 00 00 00 18 40", + ) + fun `write tagless float`(value: Double, expectedBytes: String) { + val opcode = when (expectedBytes.length) { + 5 -> OpCode.FLOAT_16 + 11 -> OpCode.FLOAT_32 + 23 -> OpCode.FLOAT_64 + else -> TODO("Unreachable: ${expectedBytes.length}") + } + assertWriterOutputEquals(expectedBytes) { writeTaglessFloat(opcode, value) } + assertWriterOutputEquals(expectedBytes) { writeTaglessFloat(opcode, value.toFloat()) } + } + + @ParameterizedTest + @CsvSource( + " 1.5, 1F FF", + " 7.29, 66 0B FE", + "1.2345, CC 81 01 FC", + ) + fun `write tagless small decimal`(value: BigDecimal, expectedBytes: String) { + assertWriterOutputEquals(expectedBytes) { + writeTaglessDecimal(OpCode.TE_SMALL_DECIMAL, value) + } + } + + @Test + fun `write tagless element list with scalar`() { + assertWriterOutputEquals( + """ + 5B 62 | List + 07 | Length = 3 (children) + 01 00 + 02 00 + 03 00 + """ + ) { + val taglessOp = TaglessScalarType.INT_16.getOpcode() + stepInTaglessElementList(taglessOp) + writeTaglessInt(taglessOp, 1) + writeTaglessInt(taglessOp, 2) + writeTaglessInt(taglessOp, 3) + stepOut() + } + } + + @Test + fun `write tagless element list with macro-shape`() { + assertWriterOutputEquals( + """ + 5B 01 | List, macro 1 + 05 | Length = 2 (children) + E0 + 61 05 + E0 + 61 06 + """ + ) { + stepInTaglessElementList(1, "foo", false) + stepInTaglessEExp() + writeAbsentArgument() + writeInt(5) + stepOut() + stepInTaglessEExp() + writeAbsentArgument() + writeInt(6) + stepOut() + stepOut() + } + } + + @Test + fun `write tagless element list with length-prefixed macro-shape`() { + assertWriterOutputEquals( + """ + 5B F4 03 | List, length-prefixed macro 1 + 05 | Length = 2 (children) + 07 + E0 + 61 05 + 07 + E0 + 61 06 + """ + ) { + stepInTaglessElementList(1, "foo", true) + stepInTaglessEExp() + writeAbsentArgument() + writeInt(5) + stepOut() + stepInTaglessEExp() + writeAbsentArgument() + writeInt(6) + stepOut() + stepOut() + } + } + + @Test + fun `write empty tagless element list`() { + assertWriterOutputEquals("B0 B0") { + val taglessOp = TaglessScalarType.INT_16.getOpcode() + stepInTaglessElementList(taglessOp) + stepOut() + stepInTaglessElementList(1, "foo", false) + stepOut() + } + } + + @Test + fun `write tagless element sexp with scalar`() { + assertWriterOutputEquals( + """ + 5C 62 | Sexp + 07 | Length = 3 (children) + 01 00 + 02 00 + 03 00 + """ + ) { + val taglessOp = TaglessScalarType.INT_16.getOpcode() + stepInTaglessElementSExp(taglessOp) + writeTaglessInt(taglessOp, 1) + writeTaglessInt(taglessOp, 2) + writeTaglessInt(taglessOp, 3) + stepOut() + } + } + + @Test + fun `write tagless element sexp with macro-shape`() { + assertWriterOutputEquals( + """ + 5C 01 | Sexp, macro 1 + 05 | Length = 2 (children) + E0 + 61 05 + E0 + 61 06 + """ + ) { + stepInTaglessElementSExp(1, "foo", false) + stepInTaglessEExp() + writeAbsentArgument() + writeInt(5) + stepOut() + stepInTaglessEExp() + writeAbsentArgument() + writeInt(6) + stepOut() + stepOut() + } + } + + @Test + fun `write tagless element sexp with length-prefixed macro-shape`() { + assertWriterOutputEquals( + """ + 5C F4 03 | Sexp, length-prefixed macro 1 + 05 | Length = 2 (children) + 07 + E0 + 61 05 + 07 + E0 + 61 06 + """ + ) { + stepInTaglessElementSExp(1, "foo", true) + stepInTaglessEExp() + writeAbsentArgument() + writeInt(5) + stepOut() + stepInTaglessEExp() + writeAbsentArgument() + writeInt(6) + stepOut() + stepOut() + } + } + + @Test + fun `write empty tagless element sexp`() { + assertWriterOutputEquals("C0 C0") { + val taglessOp = TaglessScalarType.INT_16.getOpcode() + stepInTaglessElementSExp(taglessOp) + stepOut() + stepInTaglessElementSExp(1, "foo", false) + stepOut() + } + } + + /** + * Writes this Ion, taken from https://amazon-ion.github.io/ion-docs/ + * ``` + * { + * name: "Fido", + * age: years::4, + * birthday: 2012-03-01T, + * toys: [ball, rope], + * weight: pounds::41.2, + * buzz: {{VG8gaW5maW5pdHkuLi4gYW5kIGJleW9uZCE=}}, + * } + * ``` + */ + @Test + fun `write something complex`() { + assertWriterOutputEquals( + """ + E0 01 01 EA | IVM + E1 | (:$ ion set_symbols + 94 6E 61 6D 65 | "name", + 93 61 67 65 | "age", + 95 79 65 61 72 73 | "years", + 98 62 69 72 74 68 64 61 79 | "birthday", + 94 74 6F 79 73 | "toys", + 94 62 61 6C 6C | "ball", + 96 77 65 69 67 68 74 | "weight", + 94 62 75 7A 7A | "buzz", + EF | ) + FC 85 | { // length=66 + 15 94 46 69 64 6F | $10: "Fido", + 17 58 19 61 04 | $11: $12::4, + 1B 82 AA 09 | $13: 2012-03-01T + 1D B7 | $14: [ // length=7 + 57 03 | $15, + A4 72 6F 70 65 | rope + | ], + 21 | $16: + 59 0D 70 6F 75 6E 64 73 | pounds:: + 73 FF 9C 01 | 41.2 + 23 FE 35 | $17: {{ // length=26 + 54 6F 20 69 6E 66 69 6E 69 | VG8gaW5maW5p + 74 79 2E 2E 2E 20 61 6E 64 | dHkuLi4gYW5k + 20 62 65 79 6F 6E 64 21 | IGJleW9uZCE= + | }} + | } + """ + ) { + writeIVM() + stepInDirective(OpCode.DIRECTIVE_SET_SYMBOLS) + writeString("name") + writeString("age") + writeString("years") + writeString("birthday") + writeString("toys") + writeString("ball") + writeString("weight") + writeString("buzz") + stepOut() + writeStruct { + writeFieldName(10) + writeString("Fido") + writeFieldName(11) + writeAnnotations(12) + writeInt(4) + writeFieldName(13) + writeTimestamp(Timestamp.valueOf("2012-03-01T")) + writeFieldName(14) + writeList { + writeSymbol(15) + writeSymbol("rope") + } + writeFieldName(16) + writeAnnotations("pounds") + writeDecimal(BigDecimal.valueOf(41.2)) + writeFieldName(17) + writeBlob( + byteArrayOf( + 84, 111, 32, 105, 110, 102, 105, 110, 105, + 116, 121, 46, 46, 46, 32, 97, 110, 100, + 32, 98, 101, 121, 111, 110, 100, 33 + ) + ) + } + } + } + + /** + * Helper function that steps into a struct, applies the contents of [block] to + * the writer, and then steps out of the struct. + * Using this function makes it easy for the indentation of the writer code to + * match the indentation of the equivalent pretty-printed Ion. + */ + private inline fun IonRawWriter_1_1.writeStruct(block: IonRawWriter_1_1.() -> Unit) { + stepInStruct(usingLengthPrefix = true) + block() + stepOut() + } + + /** + * Helper function that steps into a list, applies the contents of [block] to + * the writer, and then steps out of the list. + * Using this function makes it easy for the indentation of the writer code to + * match the indentation of the equivalent pretty-printed Ion. + */ + private inline fun IonRawWriter_1_1.writeList(block: IonRawWriter_1_1.() -> Unit) { + stepInList(usingLengthPrefix = true) + block() + stepOut() + } + + /** + * Helper function that steps into a sexp, applies the contents of [block] to + * the writer, and then steps out of the sexp. + * Using this function makes it easy for the indentation of the writer code to + * match the indentation of the equivalent pretty-printed Ion. + */ + private inline fun IonRawWriter_1_1.writeSExp(block: IonRawWriter_1_1.() -> Unit) { + stepInSExp(usingLengthPrefix = true) + block() + stepOut() + } +} diff --git a/src/test/java/com/amazon/ion/impl/bin/TimestampEncoder_1_1Test.kt b/src/test/java/com/amazon/ion/impl/bin/TimestampEncoder_1_1Test.kt new file mode 100644 index 0000000000..7d4504fc34 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/bin/TimestampEncoder_1_1Test.kt @@ -0,0 +1,319 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.Timestamp +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.converter.ArgumentConversionException +import org.junit.jupiter.params.converter.ConvertWith +import org.junit.jupiter.params.converter.TypedArgumentConverter +import org.junit.jupiter.params.provider.CsvSource +import java.io.ByteArrayOutputStream +import java.io.IOException +import java.util.function.BiFunction + +class TimestampEncoder_1_1Test { + private val ALLOCATOR: BlockAllocator = BlockAllocatorProviders.basicProvider().vendAllocator(11) + + private val buf = WriteBuffer(ALLOCATOR) {} + + private fun bytes(): ByteArray { + val out = ByteArrayOutputStream() + try { + buf.writeTo(out) + } catch (e: IOException) { + throw IllegalStateException(e) + } + return out.toByteArray() + } + + /** + * Checks that the function writes the expected bytes and returns the expected count of written bytes for the + * given input value. The expected bytes should be a string of space-separated hexadecimal pairs. + */ + private fun assertWritingValue( + expectedBytes: String, + value: T, + writeOperation: BiFunction + ) { + val numBytes = writeOperation.apply(buf, value) + Assertions.assertEquals(expectedBytes, byteArrayToHex(bytes())) + Assertions.assertEquals(byteLengthFromHexString(expectedBytes), numBytes) + } + + /** + * Checks that the function writes the expected bytes and returns the expected count of written bytes for the + * given input value. The expected bytes should be a string of space-separated hexadecimal pairs. + */ + private fun assertWritingValue( + expectedBytes: ByteArray, + value: T, + writeOperation: BiFunction + ) { + val numBytes = writeOperation.apply(buf, value) + Assertions.assertEquals(expectedBytes, bytes()) + Assertions.assertEquals(expectedBytes.size, numBytes) + } + + /** + * Checks that the function writes the expected bytes and returns the expected count of written bytes for the + * given input value. The expectedBytes should be a string of space-separated binary octets. + */ + private fun assertWritingValueWithBinary( + expectedBytes: String, + writeOperation: () -> Int + ) { + val numBytes = writeOperation() + Assertions.assertEquals(expectedBytes, byteArrayToBitString(bytes())) + Assertions.assertEquals(byteLengthFromBitString(expectedBytes), numBytes) + } + + // Because timestamp subfields are smeared across bytes, it's easier to reason about them in 1s and 0s + // instead of hex digits + @ParameterizedTest + @CsvSource( + // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ssssUmmm ffffffss ffffffff ffffffff ffffffff + "2023-10-15T01:00Z, 10000011 00110101 01111101 00000001 00001000", + "2023-10-15T01:59Z, 10000011 00110101 01111101 01100001 00001111", + "2023-10-15T11:22Z, 10000011 00110101 01111101 11001011 00001010", + "2023-10-15T23:00Z, 10000011 00110101 01111101 00010111 00001000", + "2023-10-15T23:59Z, 10000011 00110101 01111101 01110111 00001111", + "2023-10-15T11:22:00Z, 10000100 00110101 01111101 11001011 00001010 00000000", + "2023-10-15T11:22:33Z, 10000100 00110101 01111101 11001011 00011010 00000010", + "2023-10-15T11:22:59Z, 10000100 00110101 01111101 11001011 10111010 00000011", + "2023-10-15T11:22:33.000Z, 10000101 00110101 01111101 11001011 00011010 00000010 00000000", + "2023-10-15T11:22:33.444Z, 10000101 00110101 01111101 11001011 00011010 11110010 00000110", + "2023-10-15T11:22:33.999Z, 10000101 00110101 01111101 11001011 00011010 10011110 00001111", + "2023-10-15T11:22:33.000000Z, 10000110 00110101 01111101 11001011 00011010 00000010 00000000 00000000", + "2023-10-15T11:22:33.444555Z, 10000110 00110101 01111101 11001011 00011010 00101110 00100010 00011011", + "2023-10-15T11:22:33.999999Z, 10000110 00110101 01111101 11001011 00011010 11111110 00001000 00111101", + "2023-10-15T11:22:33.000000000Z, 10000111 00110101 01111101 11001011 00011010 00000010 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666Z, 10000111 00110101 01111101 11001011 00011010 01001010 10000110 11111101 01101001", + "2023-10-15T11:22:33.999999999Z, 10000111 00110101 01111101 11001011 00011010 11111110 00100111 01101011 11101110" + ) + fun testWriteTimestampValueWithUtcShortForm( + @ConvertWith(StringToTimestamp::class) value: Timestamp, + expectedBytes: String + ) { + assertWritingValueWithBinary(expectedBytes) { + TimestampEncoder_1_1.writeTimestampValue(buf, value) + } + } + + @ParameterizedTest + @CsvSource( + // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ssssUmmm ffffffss ffffffff ffffffff ffffffff + "1970T, 10000000 00000000", + "2023T, 10000000 00110101", + "2097T, 10000000 01111111", + "2023-01T, 10000001 10110101 00000000", + "2023-10T, 10000001 00110101 00000101", + "2023-12T, 10000001 00110101 00000110", + "2023-10-01T, 10000010 00110101 00001101", + "2023-10-15T, 10000010 00110101 01111101", + "2023-10-31T, 10000010 00110101 11111101", + "2023-10-15T01:00-00:00, 10000011 00110101 01111101 00000001 00000000", + "2023-10-15T01:59-00:00, 10000011 00110101 01111101 01100001 00000111", + "2023-10-15T11:22-00:00, 10000011 00110101 01111101 11001011 00000010", + "2023-10-15T23:00-00:00, 10000011 00110101 01111101 00010111 00000000", + "2023-10-15T23:59-00:00, 10000011 00110101 01111101 01110111 00000111", + "2023-10-15T11:22:00-00:00, 10000100 00110101 01111101 11001011 00000010 00000000", + "2023-10-15T11:22:33-00:00, 10000100 00110101 01111101 11001011 00010010 00000010", + "2023-10-15T11:22:59-00:00, 10000100 00110101 01111101 11001011 10110010 00000011", + "2023-10-15T11:22:33.000-00:00, 10000101 00110101 01111101 11001011 00010010 00000010 00000000", + "2023-10-15T11:22:33.444-00:00, 10000101 00110101 01111101 11001011 00010010 11110010 00000110", + "2023-10-15T11:22:33.999-00:00, 10000101 00110101 01111101 11001011 00010010 10011110 00001111", + "2023-10-15T11:22:33.000000-00:00, 10000110 00110101 01111101 11001011 00010010 00000010 00000000 00000000", + "2023-10-15T11:22:33.444555-00:00, 10000110 00110101 01111101 11001011 00010010 00101110 00100010 00011011", + "2023-10-15T11:22:33.999999-00:00, 10000110 00110101 01111101 11001011 00010010 11111110 00001000 00111101", + "2023-10-15T11:22:33.000000000-00:00, 10000111 00110101 01111101 11001011 00010010 00000010 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666-00:00, 10000111 00110101 01111101 11001011 00010010 01001010 10000110 11111101 01101001", + "2023-10-15T11:22:33.999999999-00:00, 10000111 00110101 01111101 11001011 00010010 11111110 00100111 01101011 11101110" + ) + fun testWriteTimestampValueWithUnknownOffsetShortForm( + @ConvertWith(StringToTimestamp::class) value: Timestamp, + expectedBytes: String + ) { + assertWritingValueWithBinary(expectedBytes) { + TimestampEncoder_1_1.writeTimestampValue(buf, value) + } + } + + @ParameterizedTest + @CsvSource( + // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ooooommm ssssssoo ffffffff ffffffff ffffffff ..ffffff + "2023-10-15T01:00-14:00, 10001000 00110101 01111101 00000001 00000000 00000000", + "2023-10-15T01:00+14:00, 10001000 00110101 01111101 00000001 10000000 00000011", + "2023-10-15T01:00-01:15, 10001000 00110101 01111101 00000001 10011000 00000001", + "2023-10-15T01:00+01:15, 10001000 00110101 01111101 00000001 11101000 00000001", + "2023-10-15T01:59+01:15, 10001000 00110101 01111101 01100001 11101111 00000001", + "2023-10-15T11:22+01:15, 10001000 00110101 01111101 11001011 11101010 00000001", + "2023-10-15T23:00+01:15, 10001000 00110101 01111101 00010111 11101000 00000001", + "2023-10-15T23:59+01:15, 10001000 00110101 01111101 01110111 11101111 00000001", + "2023-10-15T11:22:00+01:15, 10001001 00110101 01111101 11001011 11101010 00000001", + "2023-10-15T11:22:33+01:15, 10001001 00110101 01111101 11001011 11101010 10000101", + "2023-10-15T11:22:59+01:15, 10001001 00110101 01111101 11001011 11101010 11101101", + "2023-10-15T11:22:33.000+01:15, 10001010 00110101 01111101 11001011 11101010 10000101 00000000 00000000", + "2023-10-15T11:22:33.444+01:15, 10001010 00110101 01111101 11001011 11101010 10000101 10111100 00000001", + "2023-10-15T11:22:33.999+01:15, 10001010 00110101 01111101 11001011 11101010 10000101 11100111 00000011", + "2023-10-15T11:22:33.000000+01:15, 10001011 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555+01:15, 10001011 00110101 01111101 11001011 11101010 10000101 10001011 11001000 00000110", + "2023-10-15T11:22:33.999999+01:15, 10001011 00110101 01111101 11001011 11101010 10000101 00111111 01000010 00001111", + "2023-10-15T11:22:33.000000000+01:15, 10001100 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666+01:15, 10001100 00110101 01111101 11001011 11101010 10000101 10010010 01100001 01111111 00011010", + "2023-10-15T11:22:33.999999999+01:15, 10001100 00110101 01111101 11001011 11101010 10000101 11111111 11001001 10011010 00111011" + + ) + fun testWriteTimestampValueWithKnownOffsetShortForm( + @ConvertWith(StringToTimestamp::class) value: Timestamp, + expectedBytes: String + ) { + assertWritingValueWithBinary(expectedBytes) { + TimestampEncoder_1_1.writeTimestampValue(buf, value) + } + } + + @ParameterizedTest + @CsvSource( + // Length YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Coefficient+ Scale + "0001T, 00000101 00000001 00000000", + "1947T, 00000101 10011011 00000111", + "9999T, 00000101 00001111 00100111", + "1947-01T, 00000111 10011011 01000111 00000000", + "1947-12T, 00000111 10011011 00000111 00000011", + "1947-01-01T, 00000111 10011011 01000111 00000100", + "1947-12-23T, 00000111 10011011 00000111 01011111", + "1947-12-31T, 00000111 10011011 00000111 01111111", + "1947-12-23T00:00Z, 00001101 10011011 00000111 01011111 00000000 10000000 00010110", + "1947-12-23T23:59Z, 00001101 10011011 00000111 11011111 10111011 10000011 00010110", + "1947-12-23T23:59:00Z, 00001111 10011011 00000111 11011111 10111011 10000011 00010110 00000000", + "1947-12-23T23:59:59Z, 00001111 10011011 00000111 11011111 10111011 10000011 11010110 00001110", + "1947-12-23T23:59:00.0Z, 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000001", + "1947-12-23T23:59:00.00Z, 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000010", + "1947-12-23T23:59:00.000Z, 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000011", + "1947-12-23T23:59:00.0000Z, 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000100", + "1947-12-23T23:59:00.00000Z, 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000101", + "1947-12-23T23:59:00.000000Z, 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000110", + "1947-12-23T23:59:00.0000000Z, 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000111", + "1947-12-23T23:59:00.00000000Z, 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00001000", + "1947-12-23T23:59:00.9Z, 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010011 00000001", + "1947-12-23T23:59:00.99Z, 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11000111 00000010", + "1947-12-23T23:59:00.999Z, 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 10011110 00001111 00000011", + "1947-12-23T23:59:00.9999Z, 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00111110 10011100 00000100", + "1947-12-23T23:59:00.99999Z, 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00110100 00001100 00000101", + "1947-12-23T23:59:00.999999Z, 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00010001 01111010 00000110", + "1947-12-23T23:59:00.9999999Z, 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 01100111 10001001 00001001 00000111", + "1947-12-23T23:59:00.99999999Z, 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 00001111 01011110 01011111 00001000", + "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + + "00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 10001101", + ( + "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + + "00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 01101000 00000001" + ), + ( + "1947-12-23T23:59:00.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999Z, " + + "10010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 " + + "11111100 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 " + + "11111111 10010100 10001001 01111001 01101100 11001110 01111000 11110010 01000000 01111101 10100110 11000111 10101000 01000110 01011001 01110001 01001101 " + + "00100000 11110101 01101110 01111010 00001100 00001001 11101111 01111111 11110011 00011110 00010100 11010111 01101000 01110111 10101100 01101100 10001110 " + + "00110010 10110111 10000010 11110010 00110110 01101000 11110010 10100111 10001101" + ), // Offsets + + "2048-01-01T01:01-23:59, 00001101 00000000 01001000 10000100 00010000 00000100 00000000", + "2048-01-01T01:01-00:02, 00001101 00000000 01001000 10000100 00010000 01111000 00010110", + "2048-01-01T01:01-00:01, 00001101 00000000 01001000 10000100 00010000 01111100 00010110", + "2048-01-01T01:01-00:00, 00001101 00000000 01001000 10000100 00010000 11111100 00111111", + "2048-01-01T01:01+00:00, 00001101 00000000 01001000 10000100 00010000 10000000 00010110", + "2048-01-01T01:01+00:01, 00001101 00000000 01001000 10000100 00010000 10000100 00010110", + "2048-01-01T01:01+00:02, 00001101 00000000 01001000 10000100 00010000 10001000 00010110", + "2048-01-01T01:01+23:59, 00001101 00000000 01001000 10000100 00010000 11111100 00101100" + ) + fun testWriteTimestampValueLongForm( + @ConvertWith(StringToTimestamp::class) value: Timestamp, + expectedBytes: String + ) { + assertWritingValueWithBinary(expectedBytes) { + TimestampEncoder_1_1.writeLongFormTimestampBody(buf, value) + } + } + + @ParameterizedTest + @CsvSource( + // Long form because it's out of the year range + "0001T, 11110111 00000101 00000001 00000000", + "9999T, 11110111 00000101 00001111 00100111", // Long form because the offset is too high/low + "2048-01-01T01:01+14:15, 11110111 00001101 00000000 01001000 10000100 00010000 11011100 00100011", + "2048-01-01T01:01-14:15, 11110111 00001101 00000000 01001000 10000100 00010000 00100100 00001001", // Long form because the offset is not a multiple of 15 + + "2048-01-01T01:01+00:01, 11110111 00001101 00000000 01001000 10000100 00010000 10000100 00010110", // Long form because the fractional seconds are millis, micros, or nanos + + "2023-12-31T23:59:00.0Z, 11110111 00010011 11100111 00000111 11111111 10111011 10000011 00010110 00000000 00000001 00000001" + ) + fun testWriteTimestampDelegatesCorrectlyToLongForm( + @ConvertWith(StringToTimestamp::class) value: Timestamp, + expectedBytes: String + ) { + assertWritingValueWithBinary(expectedBytes) { + TimestampEncoder_1_1.writeTimestampValue(buf, value) + } + } + + /** + * Converts a String to a Timestamp for a @Parameterized test + */ + internal class StringToTimestamp protected constructor() : + TypedArgumentConverter(String::class.java, Timestamp::class.java) { + @Throws(ArgumentConversionException::class) + override fun convert(source: String?): Timestamp? { + if (source == null) return null + return Timestamp.valueOf(source) + } + } + + /** + * Utility method to make it easier to write test cases that assert specific sequences of bytes. + */ + private fun byteArrayToHex(bytes: ByteArray): String { + val sb = StringBuilder() + for (b in bytes) { + sb.append(String.format("%02X ", b)) + } + return sb.toString().trim { it <= ' ' } + } + + /** + * Determines the number of bytes needed to represent a series of hexadecimal digits. + */ + private fun byteLengthFromHexString(hexString: String): Int { + return (hexString.replace("[^\\dA-F]".toRegex(), "").length) / 2 + } + + /** + * Converts a byte array to a string of bits, such as "00110110 10001001". + * The purpose of this method is to make it easier to read and write test assertions. + */ + private fun byteArrayToBitString(bytes: ByteArray): String { + val s = StringBuilder() + for (aByte in bytes) { + for (bit in 7 downTo 0) { + if (((0x01 shl bit) and aByte.toInt()) != 0) { + s.append("1") + } else { + s.append("0") + } + } + s.append(" ") + } + return s.toString().trim { it <= ' ' } + } + + /** + * Determines the number of bytes needed to represent a series of hexadecimal digits. + */ + private fun byteLengthFromBitString(bitString: String): Int { + return (bitString.replace("[^01]".toRegex(), "").length) / 8 + } +} diff --git a/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java b/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java index e71c4fb7c4..6eb5f1e44b 100644 --- a/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java +++ b/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java @@ -18,16 +18,18 @@ import static com.amazon.ion.TestUtils.hexDump; import static com.amazon.ion.impl.bin.WriteBuffer.varUIntLength; import static com.amazon.ion.impl.bin.WriteBuffer.writeVarUIntTo; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.concurrent.atomic.AtomicBoolean; @@ -79,11 +81,17 @@ private void assertBuffer(final byte[] expected) final byte[] actual = bytes(); assertArrayEquals( - "Bytes don't match!\nEXPECTED:\n" + hexDump(expected) + "\nACTUAL:\n" + hexDump(actual) + "\n", - expected, actual + expected, actual, + "Bytes don't match!\nEXPECTED:\n" + hexDump(expected) + "\nACTUAL:\n" + hexDump(actual) + "\n" ); } + @Test + public void testConstructorThrowsWhenBlockSizeTooSmall() { + BlockAllocator ba = BlockAllocatorProviders.basicProvider().vendAllocator(9); + assertThrows(IllegalArgumentException.class, () -> new WriteBuffer(ba, () -> {})); + } + @Test public void testInt8Positive() { @@ -944,9 +952,26 @@ public void testBytes() throws IOException @Test public void testTruncate() throws IOException { - buf.writeBytes("ARGLEFOOBARGLEDOO".getBytes("UTF-8")); + buf.writeBytes("ARGLEFOOBARGLEDOO".getBytes(StandardCharsets.UTF_8)); + buf.writeBytes("ARGLE".getBytes(StandardCharsets.UTF_8)); + buf.truncate(3); + // Check that the expected bytes are present + assertBuffer("ARG".getBytes(StandardCharsets.UTF_8)); + // ...and check that we can resume writing without any issues + buf.writeBytes("LEFOOBARGLEDOO".getBytes(StandardCharsets.UTF_8)); + assertBuffer("ARGLEFOOBARGLEDOO".getBytes(StandardCharsets.UTF_8)); + } + + @Test + public void testTruncateAcrossBlocks() throws IOException + { + buf.writeBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.UTF_8)); buf.truncate(3); - assertBuffer("ARG".getBytes("UTF-8")); + // Check that the expected bytes are present + assertBuffer("ABC".getBytes(StandardCharsets.UTF_8)); + // ...and check that we can resume writing without any issues + buf.writeBytes("DEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.UTF_8)); + assertBuffer("ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.UTF_8)); } @Test @@ -1121,6 +1146,32 @@ public void shiftBytesLeftWithLengthZeroAcrossBlocks() { assertBuffer("0123456789".getBytes()); } + @Test + public void reserveShouldSkipTheRequestedNumberOfBytes() { + buf.reserve(5); + buf.writeBytes("A".getBytes()); + // WARNING: In testing, the reserved bytes do happen to be 0, but you cannot assume that is true in the general case. + assertBuffer("\0\0\0\0\0A".getBytes()); + } + + @Test + public void reserveShouldSkipTheRequestedNumberOfBytesAcrossOneBlock() { + assertEquals(11, ALLOCATOR.getBlockSize()); + buf.reserve(15); + buf.writeBytes("A".getBytes()); + // WARNING: In testing, the reserved bytes do happen to be 0, but you cannot assume that is true in the general case. + assertBuffer("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0A".getBytes()); + } + + @Test + public void reserveShouldSkipTheRequestedNumberOfBytesAcrossManyBlock() { + assertEquals(11, ALLOCATOR.getBlockSize()); + buf.reserve(40); + buf.writeBytes("A".getBytes()); + // WARNING: In testing, the reserved bytes do happen to be 0, but you cannot assume that is true in the general case. + assertBuffer("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0A".getBytes()); + } + /** * Test if the method 'writeVarUIntTo' writes the expected bytes to the output stream. * @throws Exception if there is an error occurred while writing data to the output stream. @@ -1386,6 +1437,25 @@ public void testWriteFlexInt(long value, String expectedBits) { Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); } + @Test + public void testWriteFlexIntAcrossBlocks() { + long value = Long.MIN_VALUE; + String expectedNumberBits = "00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 11111110"; + + for (int i = 0; i < ALLOCATOR.getBlockSize(); i++) { + buf.reset(); + StringBuilder expectedBits = new StringBuilder(); + for (int j = 0; j < i; j++) { + buf.writeByte((byte) 0x55); + expectedBits.append("01010101 "); + } + expectedBits.append(expectedNumberBits); + buf.writeFlexInt(value); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits.toString(), actualBits); + } + } + @ParameterizedTest @CsvSource({ " 0, 00000001", @@ -1591,8 +1661,8 @@ public void testWriteFlexUIntForNegativeBigInteger() { public void testWriteFixedInt(long value, String expectedBits) { int numBytes = buf.writeFixedInt(value); String actualBits = byteArrayToBitString(bytes()); - Assertions.assertEquals(expectedBits, actualBits); - Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); + assertEquals(expectedBits, actualBits); + assertEquals((expectedBits.length() + 1)/9, numBytes); } @ParameterizedTest