From ad7c0f982cd4e324818a63bc4bce70cbf2b93cfa Mon Sep 17 00:00:00 2001 From: Joshua Barr Date: Wed, 2 Aug 2023 11:41:40 -0700 Subject: [PATCH 1/3] Only initiate PatchPoint when needed. --- badGeneratedData.10n | Bin 0 -> 6 bytes .../ion/impl/IonReaderBinaryIncremental.java | 43 ++++----- .../ion/impl/_Private_RecyclingQueue.java | 13 ++- .../ion/impl/_Private_RecyclingStack.java | 86 +++++++++++++++++- .../ion/impl/bin/IonRawBinaryWriter.java | 76 +++++++++------- 5 files changed, 155 insertions(+), 63 deletions(-) create mode 100644 badGeneratedData.10n diff --git a/badGeneratedData.10n b/badGeneratedData.10n new file mode 100644 index 0000000000000000000000000000000000000000..83d663e75697dc89e9668cab85dd6106988969d9 GIT binary patch literal 6 NcmaFB$ndI=1po-|0zm)( literal 0 HcmV?d00001 diff --git a/src/com/amazon/ion/impl/IonReaderBinaryIncremental.java b/src/com/amazon/ion/impl/IonReaderBinaryIncremental.java index f5487b7dbc..90e1d7c840 100644 --- a/src/com/amazon/ion/impl/IonReaderBinaryIncremental.java +++ b/src/com/amazon/ion/impl/IonReaderBinaryIncremental.java @@ -34,6 +34,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; /** *

@@ -101,6 +102,12 @@ private static class ContainerInfo { * The byte position of the end of the container. */ private int endPosition; + + private ContainerInfo initialize(final IonType type, final int endPosition) { + this.type = type; + this.endPosition = endPosition; + return this; + } } /** @@ -109,16 +116,6 @@ private static class ContainerInfo { private static final IonBufferConfiguration STANDARD_BUFFER_CONFIGURATION = IonBufferConfiguration.Builder.standard().build(); - // Constructs ContainerInfo instances. - private static final _Private_RecyclingStack.ElementFactory CONTAINER_INFO_FACTORY = - new _Private_RecyclingStack.ElementFactory() { - - @Override - public ContainerInfo newElement() { - return new ContainerInfo(); - } - }; - // Symbol IDs for symbols contained in the system symbol table. private static class SystemSymbolIDs { @@ -361,12 +358,13 @@ private static int logBase2(int value) { } lookahead = new IonReaderLookaheadBuffer(configuration, inputStream); buffer = (ResizingPipedInputStream) lookahead.getPipe(); - containerStack = new _Private_RecyclingStack( - CONTAINER_STACK_INITIAL_CAPACITY, - CONTAINER_INFO_FACTORY + containerStack = new _Private_RecyclingStack<>( + CONTAINER_STACK_INITIAL_CAPACITY, + ContainerInfo::new ); + annotationSids = new IntList(ANNOTATIONS_LIST_INITIAL_CAPACITY); - symbols = new ArrayList(SYMBOLS_LIST_INITIAL_CAPACITY); + symbols = new ArrayList<>(SYMBOLS_LIST_INITIAL_CAPACITY); scalarConverter = new _Private_ScalarConversions.ValueVariant(); resetImports(); } @@ -512,10 +510,7 @@ public boolean equals(Object o) { // NOTE: once ImportLocation is available via the SymbolToken interface, it should be compared here // when text is null. SymbolToken other = (SymbolToken) o; - if(getText() == null || other.getText() == null) { - return getText() == other.getText(); - } - return getText().equals(other.getText()); + return Objects.equals(getText(), other.getText()); } @Override @@ -567,8 +562,8 @@ private class LocalSymbolTableSnapshot implements SymbolTable, SymbolTableAsStru maxId = importsMaxId + numberOfLocalSymbols; // Map with initial size the number of symbols and load factor 1, meaning it must be full before growing. // It is not expected to grow. - listView = new ArrayList(symbols.subList(0, numberOfLocalSymbols)); - mapView = new HashMap((int) Math.ceil(numberOfLocalSymbols / 0.75), 0.75f); + listView = new ArrayList<>(symbols.subList(0, numberOfLocalSymbols)); + mapView = new HashMap<>((int) Math.ceil(numberOfLocalSymbols / 0.75), 0.75f); for (int i = 0; i < numberOfLocalSymbols; i++) { String symbol = listView.get(i); if (symbol != null) { @@ -853,7 +848,7 @@ private String getSymbol(int sid) { private SymbolToken getSymbolToken(int sid) { int symbolTableSize = maxSymbolId() + 1; if (symbolTokensById == null) { - symbolTokensById = new ArrayList(symbolTableSize); + symbolTokensById = new ArrayList<>(symbolTableSize); } if (symbolTokensById.size() < symbolTableSize) { for (int i = symbolTokensById.size(); i < symbolTableSize; i++) { @@ -908,7 +903,7 @@ private void readSymbolTable(IonReaderLookaheadBuffer.Marker marker) { peekIndex = currentValueEndPosition; } else if (typeID.type == IonType.LIST) { resetImports(); - newImports = new ArrayList(3); + newImports = new ArrayList<>(3); newImports.add(getSystemSymbolTable()); stepIn(); IonType type = next(); @@ -1204,9 +1199,7 @@ public void stepIn() { } // Note: the IonReader interface dictates that stepping into a null container has the same behavior as // an empty container. - ContainerInfo containerInfo = containerStack.push(); - containerInfo.type = valueType; - containerInfo.endPosition = valueEndPosition; + containerStack.push(c -> c.initialize(valueType, valueEndPosition)); valueType = null; valueTypeID = null; valueEndPosition = -1; diff --git a/src/com/amazon/ion/impl/_Private_RecyclingQueue.java b/src/com/amazon/ion/impl/_Private_RecyclingQueue.java index 5f015cd518..a52ccd7a35 100644 --- a/src/com/amazon/ion/impl/_Private_RecyclingQueue.java +++ b/src/com/amazon/ion/impl/_Private_RecyclingQueue.java @@ -20,6 +20,14 @@ public interface ElementFactory { T newElement(); } + @FunctionalInterface + public interface Recycler { + /** + * Re-initialize an element + */ + void recycle(T t); + } + /** * Iterator for the queue. */ @@ -67,7 +75,7 @@ public T get(int index) { * previously grown to the new depth. * @return the element at the top of the queue after the push. This element must be initialized by the caller. */ - public T push() { + public int push(Recycler recycler) { currentIndex++; if (currentIndex >= elements.size()) { top = elementFactory.newElement(); @@ -75,7 +83,8 @@ public T push() { } else { top = elements.get(currentIndex); } - return top; + recycler.recycle(top); + return currentIndex; } /** diff --git a/src/com/amazon/ion/impl/_Private_RecyclingStack.java b/src/com/amazon/ion/impl/_Private_RecyclingStack.java index a98e09d410..0c1f4e7a26 100644 --- a/src/com/amazon/ion/impl/_Private_RecyclingStack.java +++ b/src/com/amazon/ion/impl/_Private_RecyclingStack.java @@ -2,13 +2,20 @@ import java.util.ArrayList; import java.util.List; +import java.util.ListIterator; +import java.util.NoSuchElementException; /** * A stack whose elements are recycled. This can be useful when the stack needs to grow and shrink * frequently and has a predictable maximum depth. * @param the type of elements stored. */ -public final class _Private_RecyclingStack { +public final class _Private_RecyclingStack implements Iterable { + + @Override + public ListIterator iterator() { + return new $Iterator(); + } /** * Factory for new stack elements. @@ -22,6 +29,14 @@ public interface ElementFactory { T newElement(); } + @FunctionalInterface + public interface Recycler { + /** + * Re-initialize an element + */ + void recycle(T t); + } + private final List elements; private final ElementFactory elementFactory; private int currentIndex; @@ -29,11 +44,11 @@ public interface ElementFactory { /** * @param initialCapacity the initial capacity of the underlying collection. - * @param elementFactory the factory used to create a new element on {@link #push()} when the stack has + * @param elementFactory the factory used to create a new element on {@link #push(Recycler)} when the stack has * not previously grown to the new depth. */ public _Private_RecyclingStack(int initialCapacity, ElementFactory elementFactory) { - elements = new ArrayList(initialCapacity); + elements = new ArrayList<>(initialCapacity); this.elementFactory = elementFactory; currentIndex = -1; top = null; @@ -44,7 +59,7 @@ public _Private_RecyclingStack(int initialCapacity, ElementFactory elementFac * previously grown to the new depth. * @return the element at the top of the stack after the push. This element must be initialized by the caller. */ - public T push() { + public T push(Recycler recycler) { currentIndex++; if (currentIndex >= elements.size()) { top = elementFactory.newElement(); @@ -52,6 +67,7 @@ public T push() { } else { top = elements.get(currentIndex); } + recycler.recycle(top); return top; } @@ -92,4 +108,66 @@ public boolean isEmpty() { public int size() { return currentIndex + 1; } + + private class $Iterator implements ListIterator { + private int cursor; + + $Iterator() { + cursor = _Private_RecyclingStack.this.currentIndex; + } + + @Override + public boolean hasNext() { + return cursor >= 0; + } + + @Override + public T next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + // post-decrement because "next" is where the cursor is + return _Private_RecyclingStack.this.elements.get(cursor--); + } + + @Override + public boolean hasPrevious() { + return cursor + 1 <= _Private_RecyclingStack.this.currentIndex; + } + + @Override + public T previous() { + if (!hasPrevious()) { + throw new NoSuchElementException(); + } + // pre-increment: "next" is where the cursor is, so "previous" is upward in stack + return _Private_RecyclingStack.this.elements.get(++cursor); + } + + @Override + public int nextIndex() { + return cursor; + } + + @Override + public int previousIndex() { + return cursor + 1; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + @Override + public void set(T t) { + throw new UnsupportedOperationException(); + } + + @Override + public void add(T t) { + throw new UnsupportedOperationException(); + } + } + } diff --git a/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java b/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java index b85ea74fcb..50a7942c5a 100644 --- a/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java +++ b/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java @@ -55,10 +55,8 @@ import java.io.OutputStream; import java.math.BigDecimal; import java.math.BigInteger; -import java.util.ArrayList; import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; +import java.util.ListIterator; /** * Low-level binary {@link IonWriter} that understands encoding concerns but doesn't operate with any sense of symbol table management. @@ -258,37 +256,42 @@ private class ContainerInfo /** The size of the current value. */ public long length; /** - * The index of the patch point placeholder. + * The index of the patch point if present, -1 otherwise. */ - public int placeholderPatchIndex; + public int patchIndex; public ContainerInfo() { type = null; position = -1; length = -1; - placeholderPatchIndex = -1; + patchIndex = -1; } public void appendPatch(final long oldPosition, final int oldLength, final long length) { - patchPoints.get(placeholderPatchIndex).initialize(oldPosition, oldLength, length); + if (patchIndex == -1) { + // We have no assigned patch point, we need to make our own + patchIndex = patchPoints.push(p -> p.initialize(oldPosition, oldLength, length)); + } else { + // We have an assigned patch point already, but we need to overwrite it with the correct data + patchPoints.get(patchIndex).initialize(oldPosition, oldLength, length); + } } - public void initialize(final ContainerType type, final long offset) { + public ContainerInfo initialize(final ContainerType type, final long offset) { this.type = type; this.position = offset; this.length = 0; - if (type != ContainerType.VALUE) { - placeholderPatchIndex = patchPoints.size(); - patchPoints.push().initialize(-1, -1, -1); - } + this.patchIndex = -1; + + return this; } @Override public String toString() { - return "(CI " + type + " pos:" + position + " len:" + length + ")"; + return "(CI " + type + " pos:" + position + " len:" + length + " patch:"+patchIndex+")"; } } @@ -313,10 +316,15 @@ public String toString() return "(PP old::(" + oldPosition + " " + oldLength + ") patch::(" + length + ")"; } - public void initialize(final long oldPosition, final int oldLength, final long length) { + public PatchPoint initialize(final long oldPosition, final int oldLength, final long length) { this.oldPosition = oldPosition; this.oldLength = oldLength; this.length = length; + return this; + } + + public PatchPoint clear() { + return initialize(-1, -1, -1); } } @@ -543,23 +551,29 @@ private void updateLength(long length) private void pushContainer(final ContainerType type) { // XXX we push before writing the type of container - containers.push().initialize(type, buffer.position() + 1); + containers.push(c -> c.initialize(type, buffer.position() + 1)); } - private void addPatchPoint(final long position, final int oldLength, final long value, final ContainerInfo container) + private void addPatchPoint(final ContainerInfo container, final long position, final int oldLength, final long value) { + // If we're adding a patch point we first need to ensure that all of our ancestors (containing values) already + // have a patch point. No container can be smaller than the contents, so all outer layers also require patches. + ListIterator stackIterator = containers.iterator(); + // Walk down the stack until we find an ancestor which already has a patch point + while (stackIterator.hasNext() && stackIterator.next().patchIndex == -1); + + // The iterator cursor is now positioned on an ancestor container that has a patch point + // Ascend back up the stack, fixing the ancestors which need a patch point assigned before us + while (stackIterator.hasPrevious()) { + ContainerInfo ancestor = stackIterator.previous(); + if (ancestor.patchIndex == -1) { + ancestor.patchIndex = patchPoints.push(PatchPoint::clear); + } + } + // record the size of the length data. final int patchLength = WriteBuffer.varUIntLength(value); - if (container == null) - { - // not nested, just append to the root list - patchPoints.push().initialize(position, oldLength, value); - } - else - { - // nested, apply it to the current container - container.appendPatch(position, oldLength, value); - } + container.appendPatch(position, oldLength, value); updateLength(patchLength - oldLength); } @@ -613,7 +627,6 @@ private ContainerInfo popContainer() // We've reclaimed some number of bytes; adjust the container length as appropriate. length -= numberOfBytesToShiftBy; - reclaimPlaceholderPatchPoint(currentContainer.placeholderPatchIndex); } else if (currentContainer.length <= preallocationMode.contentMaxLength) { @@ -621,14 +634,13 @@ else if (currentContainer.length <= preallocationMode.contentMaxLength) // fit in the preallocated length bytes that were added to the buffer when the container was started. // Update those bytes with the VarUInt encoding of the length value. preallocationMode.patchLength(buffer, positionOfFirstLengthByte, length); - reclaimPlaceholderPatchPoint(currentContainer.placeholderPatchIndex); } else { // The container's encoded body is too long to fit in the length bytes that were preallocated. - // Write the VarUInt encoding of the length in a secondary buffer and make a note to include that - // when we go to flush the primary buffer to the output stream. - addPatchPoint(positionOfFirstLengthByte, preallocationMode.numberOfLengthBytes(), length, currentContainer); + // Write the length in the patch point list, making a note to include the VarUInt encoding of the length + // at the right point when we go to flush the primary buffer to the output stream. + addPatchPoint(currentContainer, positionOfFirstLengthByte, preallocationMode.numberOfLengthBytes(), length); } } @@ -1074,7 +1086,7 @@ private void patchSingleByteTypedOptimisticValue(final byte type, final Containe { // side patch buffer.writeUInt8At(info.position - 1, type | 0xE); - addPatchPoint(info.position, 0, info.length, null); + addPatchPoint(info, info.position, 0, info.length); } } From 05acf07394937aed6aae5130e58f311646b6c193 Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Mon, 18 Sep 2023 14:27:01 -0700 Subject: [PATCH 2/3] Reuse iterator to avoid unnecessary allocation. --- badGeneratedData.10n | Bin 6 -> 0 bytes .../amazon/ion/impl/_Private_RecyclingStack.java | 14 ++++++++------ .../amazon/ion/impl/bin/IonRawBinaryWriter.java | 6 ++++-- 3 files changed, 12 insertions(+), 8 deletions(-) delete mode 100644 badGeneratedData.10n diff --git a/badGeneratedData.10n b/badGeneratedData.10n deleted file mode 100644 index 83d663e75697dc89e9668cab85dd6106988969d9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6 NcmaFB$ndI=1po-|0zm)( diff --git a/src/com/amazon/ion/impl/_Private_RecyclingStack.java b/src/com/amazon/ion/impl/_Private_RecyclingStack.java index 0c1f4e7a26..c2e2d4385e 100644 --- a/src/com/amazon/ion/impl/_Private_RecyclingStack.java +++ b/src/com/amazon/ion/impl/_Private_RecyclingStack.java @@ -11,10 +11,16 @@ * @param the type of elements stored. */ public final class _Private_RecyclingStack implements Iterable { - + private $Iterator stackIterator; @Override public ListIterator iterator() { - return new $Iterator(); + stackIterator = new $Iterator(); + return stackIterator; + } + + // Reset the cursor of iterator. + public void resetIterator() { + stackIterator.cursor = _Private_RecyclingStack.this.currentIndex; } /** @@ -112,10 +118,6 @@ public int size() { private class $Iterator implements ListIterator { private int cursor; - $Iterator() { - cursor = _Private_RecyclingStack.this.currentIndex; - } - @Override public boolean hasNext() { return cursor >= 0; diff --git a/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java b/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java index 50a7942c5a..4c5c6f9d89 100644 --- a/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java +++ b/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java @@ -359,6 +359,7 @@ public PatchPoint clear() { private final IntList currentAnnotationSids; // XXX this is for managed detection of TLV that is a LST--this is easier to track here than at the managed level private boolean hasTopLevelSymbolTableAnnotation; + private ListIterator stackIterator; private boolean closed; @@ -399,7 +400,7 @@ public ContainerInfo newElement() { this.currentFieldSid = SID_UNASSIGNED; this.currentAnnotationSids = new IntList(); this.hasTopLevelSymbolTableAnnotation = false; - + this.stackIterator = containers.iterator(); this.closed = false; } @@ -558,7 +559,8 @@ private void addPatchPoint(final ContainerInfo container, final long position, f { // If we're adding a patch point we first need to ensure that all of our ancestors (containing values) already // have a patch point. No container can be smaller than the contents, so all outer layers also require patches. - ListIterator stackIterator = containers.iterator(); + // Instead of allocating iterator, we share one iterator instance within the scope of the container stack and reset the cursor every time we track back to the ancestors. + containers.resetIterator(); // Walk down the stack until we find an ancestor which already has a patch point while (stackIterator.hasNext() && stackIterator.next().patchIndex == -1); From 96ef72d51fbf587a117f3163b6c3035d4a90e16d Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Tue, 19 Sep 2023 11:42:07 -0700 Subject: [PATCH 3/3] Conditionally allocate iterator. --- src/com/amazon/ion/impl/_Private_RecyclingStack.java | 11 +++++------ src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java | 4 +--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/com/amazon/ion/impl/_Private_RecyclingStack.java b/src/com/amazon/ion/impl/_Private_RecyclingStack.java index c2e2d4385e..7f925cd2c3 100644 --- a/src/com/amazon/ion/impl/_Private_RecyclingStack.java +++ b/src/com/amazon/ion/impl/_Private_RecyclingStack.java @@ -14,15 +14,14 @@ public final class _Private_RecyclingStack implements Iterable { private $Iterator stackIterator; @Override public ListIterator iterator() { - stackIterator = new $Iterator(); + if (stackIterator != null) { + stackIterator.cursor = _Private_RecyclingStack.this.currentIndex; + } else { + stackIterator = new $Iterator(); + } return stackIterator; } - // Reset the cursor of iterator. - public void resetIterator() { - stackIterator.cursor = _Private_RecyclingStack.this.currentIndex; - } - /** * Factory for new stack elements. * @param the type of element. diff --git a/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java b/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java index 4c5c6f9d89..88e4cca7e3 100644 --- a/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java +++ b/src/com/amazon/ion/impl/bin/IonRawBinaryWriter.java @@ -359,7 +359,6 @@ public PatchPoint clear() { private final IntList currentAnnotationSids; // XXX this is for managed detection of TLV that is a LST--this is easier to track here than at the managed level private boolean hasTopLevelSymbolTableAnnotation; - private ListIterator stackIterator; private boolean closed; @@ -400,7 +399,6 @@ public ContainerInfo newElement() { this.currentFieldSid = SID_UNASSIGNED; this.currentAnnotationSids = new IntList(); this.hasTopLevelSymbolTableAnnotation = false; - this.stackIterator = containers.iterator(); this.closed = false; } @@ -560,7 +558,7 @@ private void addPatchPoint(final ContainerInfo container, final long position, f // If we're adding a patch point we first need to ensure that all of our ancestors (containing values) already // have a patch point. No container can be smaller than the contents, so all outer layers also require patches. // Instead of allocating iterator, we share one iterator instance within the scope of the container stack and reset the cursor every time we track back to the ancestors. - containers.resetIterator(); + ListIterator stackIterator = containers.iterator(); // Walk down the stack until we find an ancestor which already has a patch point while (stackIterator.hasNext() && stackIterator.next().patchIndex == -1);