Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
614 changes: 607 additions & 7 deletions src/main/java/com/amazon/ion/bytecode/BytecodeIonReader.kt

Large diffs are not rendered by default.

145 changes: 145 additions & 0 deletions src/main/java/com/amazon/ion/bytecode/EncodingContextManager.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.bytecode

import com.amazon.ion.bytecode.util.BytecodeBuffer
import com.amazon.ion.bytecode.util.ConstantPool
import com.amazon.ion.ion_1_1.MacroImpl
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings

/**
* TODO:
* Write more documentation.
* Implement stubbed out methods.
*
* Notes:
*
* It is never safe to remove or modify any existing data in the effective tables. It is safe to append data to those
* tables for an `add_symbols`, `add_macros`, or `use` directive (as long as the active encoding modules are just `$ion` and `_`).
*/
internal class EncodingContextManager {

companion object {
val SYSTEM_SYMBOLS = arrayOf(
null,
"\$ion",
"\$ion_1_0",
"\$ion_symbol_table",
"name",
"version",
"imports",
"symbols",
"max_id",
"\$ion_shared_symbol_table",
)
}

// These make up the effective macro table and effective symbol table
private var macroBytecode = BytecodeBuffer()
private var macroOffsets = BytecodeBuffer()
private var macroNames = ConstantPool()
private var symbols = mutableListOf<String?>().apply { SYSTEM_SYMBOLS.forEach { add(it) } }

// TODO: Do we need the constant pool here?
private var constants = ConstantPool()

private class Module(
val symbols: Array<String>,
val macros: Array<MacroImpl>,
val macroNames: Array<String?>
)

// Tracks only modules _other_ than the system module and default module
private val additionalAvailableModules = mutableMapOf<String, Module>()
// Tracks only modules _other_ than the system module and default module
private var additionalActiveModules = mutableListOf<Module>()

@SuppressFBWarnings("IE_EXPOSE_REP", justification = "array is accessible for performance")
fun getEffectiveMacroTableBytecode(): IntArray = macroBytecode.unsafeGetArray()

@SuppressFBWarnings("IE_EXPOSE_REP", justification = "array is accessible for performance")
fun getEffectiveMacroTableOffsets(): IntArray = macroOffsets.unsafeGetArray()

fun getEffectiveSymbolTable(): Array<String?> = symbols.toTypedArray()

@SuppressFBWarnings("IE_EXPOSE_REP", justification = "array is accessible for performance")
fun getEffectiveConstantPool(): Array<Any?> = constants.unsafeGetArray()

/** Called when encountering an IVM */
fun reset() {
additionalActiveModules.clear()
additionalAvailableModules.clear()
macroBytecode.clear()
macroOffsets.clear()
macroNames.clear()
symbols.clear()
SYSTEM_SYMBOLS.forEach { symbols.add(it) }
constants.clear()
}

/**
* The [BytecodeIonReader] should be positioned in the directive, but not on the first value yet.
* When this method returns, the [BytecodeIonReader] will be positioned at the end of the directive, but not stepped out.
*/
fun readSetSymbolsDirective(reader: BytecodeIonReader) {
TODO()
}

/**
* The [BytecodeIonReader] should be positioned in the directive, but not on the first value yet.
* When this method returns, the [BytecodeIonReader] will be positioned at the end of the directive, but not stepped out.
*/
fun readAddSymbols(reader: BytecodeIonReader) {
TODO()
}

/**
* The [BytecodeIonReader] should be positioned in the directive, but not on the first value yet.
* When this method returns, the [BytecodeIonReader] will be positioned at the end of the directive, but not stepped out.
*/
fun readSetMacrosDirective(reader: BytecodeIonReader) {
TODO()
}

/**
* The [BytecodeIonReader] should be positioned in the directive, but not on the first value yet.
* When this method returns, the [BytecodeIonReader] will be positioned at the end of the directive, but not stepped out.
*/
fun readAddMacrosDirective(reader: BytecodeIonReader) {
TODO()
}

/**
* The [BytecodeIonReader] should be positioned in the directive, but not on the first value yet.
* When this method returns, the [BytecodeIonReader] will be positioned at the end of the directive, but not stepped out.
*/
fun readUseDirective(reader: BytecodeIonReader) {
TODO("Shared symbol tables and shared modules not supported yet.")
}

/**
* The [BytecodeIonReader] should be positioned in the directive, but not on the first value yet.
* When this method returns, the [BytecodeIonReader] will be positioned at the end of the directive, but not stepped out.
*/
fun readModuleDirective(reader: BytecodeIonReader) {
TODO("Module definitions not supported yet.")
}

/**
* The [BytecodeIonReader] should be positioned in the directive, but not on the first value yet.
* When this method returns, the [BytecodeIonReader] will be positioned at the end of the directive, but not stepped out.
*/
fun readImportDirective(reader: BytecodeIonReader) {
TODO("Shared symbol tables and shared modules not supported yet.")
}

/**
* The [BytecodeIonReader] should be positioned in the directive, but not on the first value yet.
* When this method returns, the [BytecodeIonReader] will be positioned at the end of the directive, but not stepped out.
*
* Content should be a list of module names, as symbols.
*/
fun readEncodingDirective(reader: BytecodeIonReader) {
TODO()
}
}
46 changes: 46 additions & 0 deletions src/main/java/com/amazon/ion/bytecode/ScalarConversionHelper.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.bytecode

import com.amazon.ion.Decimal
import com.amazon.ion.impl._Private_ScalarConversions
import com.amazon.ion.impl._Private_ScalarConversions.ValueVariant
import java.math.BigInteger

/**
* Wraps [_Private_ScalarConversions] to cut down on repeated code in [BytecodeIonReader].
*
* This abstraction probably adds overhead, but it should only be used in the non-ideal paths—performing **lossy**
* conversions on scalar values.
*/
internal class ScalarConversionHelper {
private val scalarConverter = ValueVariant()
private val preparedConverter = ThisPreparedConverter()

private inline fun initConversion(startType: Int, addValueFn: ValueVariant.() -> Unit): PreparedConverter {
val converter = scalarConverter
converter.clear()
converter.addValueFn()
converter.authoritativeType = startType
return preparedConverter
}

fun from(value: Int) = initConversion(_Private_ScalarConversions.AS_TYPE.int_value) { addValue(value) }
fun from(value: Long) = initConversion(_Private_ScalarConversions.AS_TYPE.long_value) { addValue(value) }
fun from(value: BigInteger?) = initConversion(_Private_ScalarConversions.AS_TYPE.bigInteger_value) { addValue(value) }
fun from(value: Double) = initConversion(_Private_ScalarConversions.AS_TYPE.double_value) { addValue(value) }
fun from(value: Decimal?) = initConversion(_Private_ScalarConversions.AS_TYPE.decimal_value) { addValue(value) }

sealed class PreparedConverter(private val converter: ValueVariant) {

private fun doConversion(toType: Int): ValueVariant = converter.apply { cast(get_conversion_fnid(toType)) }

fun intoInt(): Int = doConversion(_Private_ScalarConversions.AS_TYPE.int_value).int
fun intoLong(): Long = doConversion(_Private_ScalarConversions.AS_TYPE.long_value).long
fun intoBigInteger(): BigInteger = doConversion(_Private_ScalarConversions.AS_TYPE.bigInteger_value).bigInteger
fun intoDecimal(): Decimal = doConversion(_Private_ScalarConversions.AS_TYPE.decimal_value).decimal
fun intoDouble(): Double = doConversion(_Private_ScalarConversions.AS_TYPE.double_value).double
}

private inner class ThisPreparedConverter : PreparedConverter(this@ScalarConversionHelper.scalarConverter)
}
10 changes: 10 additions & 0 deletions src/main/java/com/amazon/ion/bytecode/ir/Debugger.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.bytecode.ir

import com.amazon.ion.bytecode.ir.Debugger.invoke
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
import java.util.function.Consumer

Expand Down Expand Up @@ -31,6 +32,15 @@ import java.util.function.Consumer
)
internal object Debugger {

@OptIn(ExperimentalStdlibApi::class)
fun renderSingleInstruction(instruction: Int): String {
val operationInt = Instructions.toOperation(instruction)
val instructionInfo = InstructionInfo.entries.singleOrNull { it.operation == operationInt }

instructionInfo ?: return "UNKNOWN ${instruction.toHexString()}"
return "${instructionInfo.name} ${instructionInfo.dataType.formatter(Instructions.getData(instruction))}"
}

/**
* Helper function to render bytecode as an `Array<String>` to make it easier to read in the IntelliJ debugger.
*/
Expand Down
25 changes: 25 additions & 0 deletions src/main/java/com/amazon/ion/bytecode/ir/OperationKind.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.bytecode.ir

import com.amazon.ion.IonType

/**
* Constants defining the different categories of operations for the bytecode instruction set.
*
Expand Down Expand Up @@ -98,4 +100,27 @@ internal object OperationKind {
else -> throw IllegalArgumentException("Not a valid instruction kind: $instructionKind")
}
}

/**
* Returns the [IonType] corresponding to the operation kind, if any.
*/
@JvmStatic
fun ionTypeOf(operationKind: Int): IonType? {
return when (operationKind) {
NULL -> IonType.NULL
BOOL -> IonType.BOOL
INT -> IonType.INT
FLOAT -> IonType.FLOAT
DECIMAL -> IonType.DECIMAL
TIMESTAMP -> IonType.TIMESTAMP
STRING -> IonType.STRING
SYMBOL -> IonType.SYMBOL
CLOB -> IonType.CLOB
BLOB -> IonType.BLOB
LIST -> IonType.LIST
SEXP -> IonType.SEXP
STRUCT -> IonType.STRUCT
else -> null
}
}
}
18 changes: 15 additions & 3 deletions src/main/java/com/amazon/ion/bytecode/ir/instruction_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ This instruction indicates that there is a String value, and the text of the str
| NULL_BLOB | `0x57` | `01010` | `111` | `00` | - | - | |
| LIST_START | `0x58` | `01011` | `000` | `11` | bytecode_length (u22) | - | Length must include the END_CONTAINER instruction |
| NULL_LIST | `0x5F` | `01011` | `111` | `00` | - | - | |
| SEXP_START | `0x60` | `01100` | `000` | `11` | bytecode_length (u22) | - | Length must include the END_CONTAINER instruction |
| SEXP_START | `0x60` | `01100` | `000` | `11` | bytecode_length (u22) | - | Length must include the END_CONTAINER instruction |
| NULL_SEXP | `0x67` | `01100` | `111` | `00` | - | - | |
| STRUCT_START | `0x68` | `01101` | `000` | `11` | bytecode_length (u22) | - | Length must include the END_CONTAINER instruction |
| STRUCT_START | `0x68` | `01101` | `000` | `11` | bytecode_length (u22) | - | Length must include the END_CONTAINER instruction |
| NULL_STRUCT | `0x6F` | `01101` | `111` | `00` | - | - | |
| ANNOTATION_CP | `0x70` | `01110` | `000` | `00` | cp_index (u22) | - | Non-null [String] in constant pool |
| ANNOTATION_REF | `0x71` | `01110` | `001` | `01` | ref_length (u22) | offset (u32) | Reference to UTF-8 bytes |
Expand All @@ -90,7 +90,7 @@ This instruction indicates that there is a String value, and the text of the str
| FIELD_NAME_REF | `0x79` | `01111` | `001` | `01` | ref_length (u22) | offset (u32) | Reference to UTF-8 bytes |
| FIELD_NAME_SID | `0x7A` | `01111` | `010` | `00` | sid (u22) | - | |
| IVM | `0x80` | `10000` | `000` | `00` | version (u8, u8) | - | version is packed as u8 major, u8 minor |
| DIRECTIVE_SET_SYMBOLS | `0x88` | `10001` | `000` | `00` | - | - | Must have END_CONTAINER instruction to delimit end of directive |
| DIRECTIVE_SET_SYMBOLS | `0x88` | `10001` | `000` | `00` | - | - | Must have END_CONTAINER instruction to delimit end of directive |
| DIRECTIVE_ADD_SYMBOLS | `0x89` | `10001` | `001` | `00` | - | - | Must have END_CONTAINER instruction to delimit end of directive |
| DIRECTIVE_SET_MACROS | `0x8A` | `10001` | `010` | `00` | - | - | Must have END_CONTAINER instruction to delimit end of directive |
| DIRECTIVE_ADD_MACROS | `0x8B` | `10001` | `011` | `00` | - | - | Must have END_CONTAINER instruction to delimit end of directive |
Expand Down Expand Up @@ -127,6 +127,18 @@ Possible TODOs:
from inside a lengthy NOP. Comments that are longer than u22 max value could be encoded using multiple comment
instructions. The span should include the comment-delimiting characters.

## Integers

* All eagerly-read integer values MUST be encoded using the instruction with the smallest integer size in which the integer value can fit.
* The length value for an `INT_REF` instruction must include the sign bit.

## Exposing unevaluated macro invocations

* Generators may be configured to expose some or all macro invocations.
* Generators should expose macro invocations using the `INVOKE` instruction
* The `INVOKE` instruction is followed by one argument for each parameter in the signature (making any "no argument" expressions explicit).
* The arguments are followed by the `END_CONTAINER` instruction. (TODO: Should we have a distinct `END_INVOKE` instruction?)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it makes sense to have a separate END_INVOKE if you already have a separate END_TEMPLATE, but I'm pretty indifferent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, but I've left a TODO because I think we can defer this until we are exposing un-evaluated macro invocations.


## Directive Content

### `SET_SYMBOLS`, `ADD_SYMBOLS`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ interface AppendableConstantPoolView {
/** Adds a value to the constant pool, returning the index assigned to the value. */
fun add(value: Any?): Int
/** Retrieves a value from the constant pool. */
fun get(i: Int): Any?
operator fun get(i: Int): Any?

val size: Int
}
14 changes: 13 additions & 1 deletion src/main/java/com/amazon/ion/bytecode/util/BytecodeBuffer.kt
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,22 @@ internal class BytecodeBuffer private constructor(
* @param length the number of bytecode instructions to copy
*/
fun addSlice(values: BytecodeBuffer, startInclusive: Int, length: Int) {
addSlice(values.data, startInclusive, length)
}

/**
* Appends a slice of bytecode instructions from an [IntArray] to this buffer.
* The buffer will automatically grow if necessary to accommodate the new instructions.
*
* @param values the source `IntArray` to copy from
* @param startInclusive the starting index in the source array (inclusive)
* @param length the number of bytecode instructions to copy
*/
fun addSlice(values: IntArray, startInclusive: Int, length: Int) {
val thisNumberOfValues = this.numberOfValues
val newNumberOfValues = thisNumberOfValues + length
val data = ensureCapacity(newNumberOfValues)
System.arraycopy(values.data, startInclusive, data, thisNumberOfValues, length)
System.arraycopy(values, startInclusive, data, thisNumberOfValues, length)
this.numberOfValues = newNumberOfValues
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/amazon/ion/bytecode/util/ConstantPool.kt
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ internal class ConstantPool private constructor(
/**
* Returns the `i`th int in the list.
*/
override fun get(i: Int): Any? {
override operator fun get(i: Int): Any? {
if (i < 0 || i >= numberOfValues) {
throw IndexOutOfBoundsException("Invalid index $i requested from IntList with $numberOfValues values.")
}
Expand Down
Loading
Loading