Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
Expand All @@ -36,6 +37,7 @@
import static com.amazon.ion.SystemSymbols.NAME_SID;
import static com.amazon.ion.SystemSymbols.SYMBOLS_SID;
import static com.amazon.ion.SystemSymbols.VERSION_SID;
import static com.amazon.ion.impl._Private_Utils.safeEquals;

/**
* An IonCursor capable of application-level parsing of binary Ion streams.
Expand Down Expand Up @@ -82,6 +84,12 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina
// symbol table is encountered in the stream.
private SymbolTable cachedReadOnlySymbolTable = null;

// The cached SymbolTable that was determined to be a superset of the reader's current symbol table during a call
// to 'isSymbolTableSubsetOf'. This is set to null whenever the reader encounters a new symbol table. Therefore,
// when non-null, determining whether the reader's symbol table is a subset of a given table is as simple as
// checking whether that table is the same as 'lastSupersetSymbolTable'.
private SymbolTable lastSupersetSymbolTable = null;

// The reusable annotation iterator.
private final AnnotationSequenceIterator annotationIterator = new AnnotationSequenceIterator();

Expand Down Expand Up @@ -206,6 +214,110 @@ private SymbolTable getSystemSymbolTable() {
return SharedSymbolTable.getSystemSymbolTable(getIonMajorVersion());
}

boolean compareSymbolTableImportsArrayToList(SymbolTable[] arr, int arrayLength, List<SymbolTable> list) {
// Note: the array variant must begin with a system symbol table, while the list variant must not.
if (arrayLength - 1 != list.size()) {
return false;
}
for (int i = 1; i < arrayLength; i++) {
// TODO amazon-ion/ion-java/issues/18 Currently, we check imports by their references, which
// is overly strict; imports that have different references but the same symbols should pass the check.
// However, this is a cheaper check and is compatible with how common Catalog implementations handle
// shared tables.
if (list.get(i - 1) != arr[i]) {
return false;
}
}
return true;
}

boolean compareSymbolsArrayToCollection(String[] arr, int arrayLength, Collection<String> collection) {
// Precondition: the collection contains at least as many elements as the array.
Iterator<String> collectionIterator = collection.iterator();
for (int i = 0; i < arrayLength; i++) {
if (!safeEquals(arr[i], collectionIterator.next())) {
return false;
}
}
return true;
}
Comment on lines +234 to +243
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would i < arrayLength && i.hasNext() be too expensive? How about a check if (collection.size() < arrayLength) return false;? I assume the precondition is actually that the collection contains at least arrayLength elements, not array.length elements? Isn't another precondition here that arrayLength <= arr.length?

Suggested change
boolean compareSymbolsArrayToCollection(String[] arr, int arrayLength, Collection<String> collection) {
// Precondition: the collection contains at least as many elements as the array.
Iterator<String> collectionIterator = collection.iterator();
for (int i = 0; i < arrayLength; i++) {
if (!safeEquals(arr[i], collectionIterator.next())) {
return false;
}
}
return true;
}
boolean compareSymbolsArrayToCollection(String[] arr, int arrayLength, Collection<String> collection) {
if (arr.length < arrayLength || collection.size() < arrayLength) return false;
Iterator<String> collectionIterator = collection.iterator();
for (int i = 0; i < arrayLength; i++) {
if (!safeEquals(arr[i], collectionIterator.next())) {
return false;
}
}
return true;
}

This suggestion ought to cover all these, please straighten me out if I've got it wrong :)


/**
* Determines whether the symbol table active at the reader's current position is a subset of another symbol table,
* meaning that every symbol in the reader's symbol table is present and has the same symbol ID in the other
* table.
* @param other another symbol table.
* @return true if the reader's symbol table is a subset of the other table; otherwise, false.
*/
boolean isSymbolTableSubsetOf(SymbolTable other)
{
if (lastSupersetSymbolTable != null) {
// lastSupersetSymbolTable is reset when the reader's symbol table changes, so we know the reader's symbol
// table is the same as it was when last compared. This is an optimization that avoids the more expensive
// comparisons when this method is called repetitively within the same symbol table contexts. This
// commonly happens during repetitive calls to IonWriter.writeValue(IonReader), which is used directly
// by users and by the looping wrapper IonWriter.writeValues(IonReader).
return other == lastSupersetSymbolTable && other.getMaxId() == lastSupersetSymbolTable.getMaxId();
}

int numberOfLocalSymbols = localSymbolMaxOffset + 1;
int maxId = imports.getMaxId() + numberOfLocalSymbols;

// Note: the first imported table is always the system symbol table.
boolean isSystemSymbolTable = numberOfLocalSymbols == 0 && imports.getImportedTablesNoCopy().length == 1;
boolean otherHasPrivateAttributes = other instanceof _Private_LocalSymbolTable;
_Private_LocalSymbolTable otherLocal = otherHasPrivateAttributes ? (_Private_LocalSymbolTable) other : null;
if (isSystemSymbolTable) {
if (other.isSystemTable() && maxId == other.getMaxId()) {
// Both represent the same system table.
lastSupersetSymbolTable = other;
return true;
}
// The other symbol table might not literally be the system symbol table, but if it's a local symbol table
// with zero local symbols and zero imports, that counts.
if (otherHasPrivateAttributes && otherLocal.getNumberOfLocalSymbols() == 0 && otherLocal.getImportedTablesAsList().isEmpty()) {
lastSupersetSymbolTable = other;
return true;
}
return false;
}
if (!otherHasPrivateAttributes) {
// The reader's symbol table is not a system symbol table, but the other is. Other cannot be a superset.
return false;
}
if (maxId > otherLocal.getMaxId()) return false;

// NOTE: the following uses of _Private_LocalSymbolTable utilize knowledge of the implementation used by
// the binary writer, which has the only known use case for this method. Specifically, we call the interface
// method variants that return lists instead of arrays because we know this matches the binary writer's symbol
// table's internal representation and therefore does not require copying. If this method ends up being used
// for other symbol table implementations, which is unlikely, we should add logic to choose the most efficient
// variant to call for the particular implementation (such as by adding something like a `boolean usesArrays()`
// method to the interface).

SymbolTable[] readerImports = imports.getImportedTablesNoCopy();
if (!compareSymbolTableImportsArrayToList(readerImports, readerImports.length, otherLocal.getImportedTablesAsList())) {
return false;
}

// Superset extends subset if subset doesn't have any declared symbols.
if (numberOfLocalSymbols == 0) {
lastSupersetSymbolTable = other;
return true;
}

// Superset must have same/more declared (local) symbols than subset.
if (numberOfLocalSymbols > otherLocal.getNumberOfLocalSymbols()) return false;

Collection<String> otherSymbols = otherLocal.getLocalSymbolsNoCopy();
if (!compareSymbolsArrayToCollection(symbols, numberOfLocalSymbols, otherSymbols)) {
return false;
}
Comment on lines +309 to +315
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I see that one of the checks I added in my suggestion lives outside the method. Is it correct to push it down? It looks to me like it should be.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to leave it as-is. otherLocal.getNumberOfLocalSymbols() is subtly different than otherSymbols.size() (allowing for the no-copy collection to overwrite without clearing if that's what it wants to do). I'm also comfortable leaving out some of the normal safety checks given that these methods are internal and we can rely on other internal constraints, such as the one that requires our arrays/collections to be at least as large as the related "getNumberOf.." methods say they are.


lastSupersetSymbolTable = other;
return true;
}

/**
* Read-only snapshot of the local symbol table at the reader's current position.
*/
Expand Down Expand Up @@ -431,6 +543,21 @@ public _Private_LocalSymbolTable makeCopy() {
public SymbolTable[] getImportedTablesNoCopy() {
return importedTables.getImportedTablesNoCopy();
}

@Override
public List<SymbolTable> getImportedTablesAsList() {
throw new UnsupportedOperationException("Call getImportedTablesNoCopy() instead.");
}

@Override
public List<String> getLocalSymbolsNoCopy() {
throw new UnsupportedOperationException("If this is needed, add a no-copy variant that returns an array.");
}

@Override
public int getNumberOfLocalSymbols() {
return idToText.length;
}
}

/**
Expand All @@ -442,6 +569,7 @@ private void resetSymbolTable() {
Arrays.fill(symbols, 0, localSymbolMaxOffset + 1, null);
localSymbolMaxOffset = -1;
cachedReadOnlySymbolTable = null;
lastSupersetSymbolTable = null;
}

/**
Expand Down Expand Up @@ -474,6 +602,7 @@ protected void restoreSymbolTable(SymbolTable symbolTable) {
}
localSymbolMaxOffset = snapshot.maxId - firstLocalSymbolId;
System.arraycopy(snapshot.idToText, 0, symbols, 0, snapshot.idToText.length);
lastSupersetSymbolTable = null;
} else {
// Note: this will only happen when `symbolTable` is the system symbol table.
resetSymbolTable();
Expand Down Expand Up @@ -626,6 +755,9 @@ private void finishReadingSymbolTableStruct() {
}
localSymbolMaxOffset += newSymbols.size();
}
// Note: last superset table is reset even if new symbols were simply appended because there's no
// guarantee those symbols are reflected in the superset table.
lastSupersetSymbolTable = null;
state = State.READING_VALUE;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.amazon.ion.SymbolTable;
import com.amazon.ion.system.IonReaderBuilder;

import java.io.IOException;
import java.io.InputStream;

/**
Expand Down Expand Up @@ -44,7 +45,7 @@
* stream's values risk exceeding the available memory, then continuable reading must not be used.
* </p>
*/
final class IonReaderContinuableTopLevelBinary extends IonReaderContinuableApplicationBinary implements IonReader, _Private_ReaderWriter {
final class IonReaderContinuableTopLevelBinary extends IonReaderContinuableApplicationBinary implements IonReader, _Private_ReaderWriter, _Private_ByteTransferReader {

// True if continuable reading is disabled.
private final boolean isNonContinuable;
Expand Down Expand Up @@ -315,6 +316,20 @@ public void hoist(Span span) {
}
}

@Override
public boolean transferCurrentValue(_Private_ByteTransferSink writer) throws IOException {
if (hasAnnotations || !isByteBacked() || isInStruct()) {
return false;
}
writer.writeBytes(buffer, (int) valuePreHeaderIndex, (int) (valueMarker.endIndex - valuePreHeaderIndex));
return true;
}

@Override
public boolean isSymbolTableCompatible(SymbolTable symbolTable) {
return isSymbolTableSubsetOf(symbolTable);
}

@Override
public <T> T asFacet(Class<T> facetType) {
if (facetType == SpanProvider.class) {
Expand Down
16 changes: 16 additions & 0 deletions src/main/java/com/amazon/ion/impl/LocalSymbolTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import com.amazon.ion.util.IonTextUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -610,6 +611,21 @@ public SymbolTable[] getImportedTablesNoCopy()
return myImportsList.getImportedTablesNoCopy();
}

@Override
public List<SymbolTable> getImportedTablesAsList() {
throw new UnsupportedOperationException("Call getImportedTablesNoCopy() instead.");
}

@Override
public Collection<String> getLocalSymbolsNoCopy() {
throw new UnsupportedOperationException("If this is needed, add a no-copy variant that returns an array.");
}

@Override
public int getNumberOfLocalSymbols() {
return mySymbolsCount;
}

public void writeTo(IonWriter writer) throws IOException
{
IonReader reader = new SymbolTableReader(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
package com.amazon.ion.impl;

import com.amazon.ion.IonReader;
import com.amazon.ion.SymbolTable;

import java.io.IOException;

/**
Expand All @@ -24,6 +26,20 @@
*/
public interface _Private_ByteTransferReader
{
public void transferCurrentValue(_Private_ByteTransferSink writer)
/**
* Copies the raw bytes representing the current value, excluding any field name or annotations, if possible.
* @param writer the sink for the bytes
* @return true if the byte transfer occurred; false if it was not possible.
* @throws IOException if thrown by the sink during transfer.
*/
public boolean transferCurrentValue(_Private_ByteTransferSink writer)
throws IOException;

/**
* Determines whether the reader's symbol table is compatible (i.e., a subset of) the given symbol table. When
* true, values can be transferred from the reader to the writer verbatim.
* @param symbolTable the symbol table active in the writer.
* @return true if the reader's symbol table is compatible; otherwise, false.
*/
public boolean isSymbolTableCompatible(SymbolTable symbolTable);
}
26 changes: 25 additions & 1 deletion src/main/java/com/amazon/ion/impl/_Private_LocalSymbolTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import com.amazon.ion.SymbolTable;

interface _Private_LocalSymbolTable extends SymbolTable {
import java.util.Collection;
import java.util.List;

public interface _Private_LocalSymbolTable extends SymbolTable {

/**
* @return a mutable copy of the symbol table.
Expand All @@ -22,4 +25,25 @@ interface _Private_LocalSymbolTable extends SymbolTable {
* @see SymbolTable#getImportedTables()
*/
SymbolTable[] getImportedTablesNoCopy();

/**
* Returns the imported symbol tables as a List without making a copy (if possible).
* Like {@link #getImportedTables()}, the list does not include the system symbol table.
*
* @return the imported symbol tables. Does not include the system symbol table.
*
* @see SymbolTable#getImportedTables()
*/
List<SymbolTable> getImportedTablesAsList();

/**
* Returns a collection containing the local symbols, without making a copy.
* @return the local symbols.
*/
Collection<String> getLocalSymbolsNoCopy();

/**
* @return the number of local symbols, which do not include the imported or system symbols.
*/
int getNumberOfLocalSymbols();
}
23 changes: 9 additions & 14 deletions src/main/java/com/amazon/ion/impl/bin/AbstractIonWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,11 @@
}

/** The cache for copy optimization checks--null if not copy optimized. */
private final _Private_SymtabExtendsCache symtabExtendsCache;
private final boolean isStreamCopyOptimized;

/*package*/ AbstractIonWriter(final WriteValueOptimization optimization)
{
this.symtabExtendsCache = optimization == WriteValueOptimization.COPY_OPTIMIZED
? new _Private_SymtabExtendsCache() : null;
this.isStreamCopyOptimized = optimization == WriteValueOptimization.COPY_OPTIMIZED;
}

public final void writeValue(final IonValue value) throws IOException
Expand All @@ -54,18 +53,14 @@ public final void writeValue(final IonReader reader) throws IOException
{
final IonType type = reader.getType();

if (isStreamCopyOptimized())
if (isStreamCopyOptimized() && reader instanceof _Private_ByteTransferReader)
{
final _Private_ByteTransferReader transferReader =
reader.asFacet(_Private_ByteTransferReader.class);

if (transferReader != null
&& (_Private_Utils.isNonSymbolScalar(type)
|| symtabExtendsCache.symtabsCompat(getSymbolTable(), reader.getSymbolTable())))
_Private_ByteTransferReader byteTransferReader = (_Private_ByteTransferReader) reader;
if (_Private_Utils.isNonSymbolScalar(type) || byteTransferReader.isSymbolTableCompatible(getSymbolTable()))
{
// we have something we can pipe over
transferReader.transferCurrentValue(this);
return;
if (byteTransferReader.transferCurrentValue(this)) {
return;
}
}
}

Expand Down Expand Up @@ -223,7 +218,7 @@ public final void writeTimestampUTC(final Date value) throws IOException

public final boolean isStreamCopyOptimized()
{
return symtabExtendsCache != null;
return isStreamCopyOptimized;
}

@SuppressWarnings("deprecation")
Expand Down
Loading