From 2a61c7f5f9a781f2c0442e72306ba5661e53d192 Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Tue, 24 May 2022 17:07:52 -0700 Subject: [PATCH 01/15] Adds package to process ion schema file. --- .../ion/benchmark/schema/ReparsedType.java | 114 ++++++++++++++++++ .../schema/constraints/ByteLength.java | 23 ++++ .../schema/constraints/CodepointLength.java | 23 ++++ .../schema/constraints/ContainerLength.java | 23 ++++ .../schema/constraints/Precision.java | 23 ++++ .../constraints/QuantifiableConstraints.java | 25 ++++ .../benchmark/schema/constraints/Range.java | 83 +++++++++++++ .../benchmark/schema/constraints/Regex.java | 32 +++++ .../constraints/ReparsedConstraint.java | 5 + .../benchmark/schema/constraints/Scale.java | 23 ++++ .../constraints/TimestampPrecision.java | 43 +++++++ .../schema/constraints/ValidValues.java | 60 +++++++++ 12 files changed, 477 insertions(+) create mode 100644 src/com/amazon/ion/benchmark/schema/ReparsedType.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/Precision.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/Range.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/Regex.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/Scale.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java create mode 100644 src/com/amazon/ion/benchmark/schema/constraints/ValidValues.java diff --git a/src/com/amazon/ion/benchmark/schema/ReparsedType.java b/src/com/amazon/ion/benchmark/schema/ReparsedType.java new file mode 100644 index 0000000..8835ae7 --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/ReparsedType.java @@ -0,0 +1,114 @@ +package com.amazon.ion.benchmark.schema; + +import com.amazon.ion.IonStruct; +import com.amazon.ion.IonType; +import com.amazon.ion.IonValue; +import com.amazon.ion.benchmark.schema.constraints.*; +import com.amazon.ionschema.Type; + +import java.util.HashMap; +import java.util.Map; + +// Parsing the type definition in ISL file into ReparsedType format which allows getting constraints information directly. +public class ReparsedType { + public final Type type; + private static final String KEYWORD_TIMESTAMP_PRECISION = "timestamp_precision"; + private static final String KEYWORD_TYPE = "type"; + private static final String KEYWORD_CODE_POINT_LENGTH = "codepoint_length"; + private static final String KEYWORD_REGEX = "regex"; + private static final String KEYWORD_CONTAINER_LENGTH = "container_length"; + private static final String KEYWORD_BYTE_LENGTH = "byte_length"; + private static final String KEYWORD_SCALE = "scale"; + private static final String KEYWORD_PRECISION = "precision"; + private static final String KEYWORD_VALID_VALUES = "valid_values"; + private static final String KEYWORD_NAME = "name"; + // Using map to avoid processing the multiple repeat constraints situation. + Map constraintMap; + + /** + * Initializing the newly created ReparsedType object. + * @param type represents type definition of ISL file. + */ + public ReparsedType(Type type) { + this.type = type; + constraintMap = new HashMap<>(); + getIsl().forEach(this::handleField); + } + + /** + * Get the name of type definition. + * @return the name of type definition. + */ + public String getName() { + return type.getName(); + } + + /** + * Handling the fields which are not used for specifying generated data. + * @param field represents the field contained by the type definition. + */ + private void handleField(IonValue field) { + switch (field.getFieldName()) { + case KEYWORD_NAME: + case KEYWORD_TYPE: + return; + default: + constraintMap.put(field.getFieldName(), toConstraint(field)); + } + } + + /** + * Redefining the getIsl method to convert type definition to IonStruct format. + * @return an IonStruct which contains constraints in type definition. + */ + public IonStruct getIsl() { + return (IonStruct) type.getIsl(); + } + + /** + * Get the value of constraint 'type' in IonType format. + * @return the value of 'type' in IonType format. + */ + public IonType getIonType() { + return IonType.valueOf(getIsl().get(KEYWORD_TYPE).toString().toUpperCase()); + } + + /** + * Get the constraintMap. + * The keys in constraintMap represent constraint name, and the values represents the ReparsedConstraint. + * @return constraintMap. + */ + public Map getConstraintMap() { + return constraintMap; + } + + //TODO: Constraints come in two flavors- container and scalar? + /** + * This method helps to categorize constraints based on the data type that they represent. + * @param field represents the field contained in type definition. + * @return ReparsedConstraints which are processed based on the provided constraint 'type'. + */ + private static ReparsedConstraint toConstraint(IonValue field) { + switch (field.getFieldName()) { + //TODO: Add cases of constraints 'annotation' and 'occurs'. + case KEYWORD_BYTE_LENGTH: + return ByteLength.of(field); + case KEYWORD_PRECISION: + return Precision.of(field); + case KEYWORD_SCALE: + return Scale.of(field); + case KEYWORD_CODE_POINT_LENGTH: + return CodepointLength.of(field); + case KEYWORD_CONTAINER_LENGTH: + return ContainerLength.of(field); + case KEYWORD_VALID_VALUES: + return ValidValues.of(field); + case KEYWORD_REGEX: + return Regex.of(field); + case KEYWORD_TIMESTAMP_PRECISION: + return TimestampPrecision.of(field); + default: + throw new IllegalArgumentException("This field is not understood: " + field); + } + } +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java b/src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java new file mode 100644 index 0000000..0115b1b --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java @@ -0,0 +1,23 @@ +package com.amazon.ion.benchmark.schema.constraints; + +import com.amazon.ion.IonValue; + +public class ByteLength extends QuantifiableConstraints{ + + /** + * Initializing the ByteLength object. + * @param value represents constraint field 'byte_length'. + */ + public ByteLength(IonValue value) { + super(value); + } + + /** + * Parsing constraint field into ByteLength. + * @param field represents the value of constraint 'byte_length'. + * @return the newly created ByteLength object. + */ + public static ByteLength of(IonValue field) { + return new ByteLength(field); + } +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java b/src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java new file mode 100644 index 0000000..0db22f4 --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java @@ -0,0 +1,23 @@ +package com.amazon.ion.benchmark.schema.constraints; + +import com.amazon.ion.IonValue; + +public class CodepointLength extends QuantifiableConstraints { + + /** + * Initializing the CodepointLength object. + * @param value represents constraint field 'codepoint_length'. + */ + public CodepointLength(IonValue value) { + super(value); + } + + /** + * Parsing constraint field into CodepointLength. + * @param field represents the value of constraint 'codepoint_length'. + * @return the newly created CodepointLength object. + */ + public static CodepointLength of(IonValue field) { + return new CodepointLength(field); + } +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java b/src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java new file mode 100644 index 0000000..7672398 --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java @@ -0,0 +1,23 @@ +package com.amazon.ion.benchmark.schema.constraints; + +import com.amazon.ion.IonValue; + +public class ContainerLength extends QuantifiableConstraints{ + + /** + * Initializing the ContainerLength object. + * @param value represents constraint field 'container_length'. + */ + public ContainerLength(IonValue value) { + super(value); + } + + /** + * Parsing constraint field into ContainerLength. + * @param field represents the value of constraint 'container_length'. + * @return newly created ContainerLength object. + */ + public static ContainerLength of(IonValue field) { + return new ContainerLength(field); + } +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Precision.java b/src/com/amazon/ion/benchmark/schema/constraints/Precision.java new file mode 100644 index 0000000..2fac3ea --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/Precision.java @@ -0,0 +1,23 @@ +package com.amazon.ion.benchmark.schema.constraints; + +import com.amazon.ion.IonValue; + +public class Precision extends QuantifiableConstraints{ + + /** + * Initializing the Precision object. + * @param value represents constraint field 'precision'. + */ + public Precision(IonValue value) { + super(value); + } + + /** + * Parsing constraint field into Precision. + * @param field represents the value of constraint 'precision'. + * @return the newly created Precision object. + */ + public static Precision of(IonValue field) { + return new Precision(field); + } +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java b/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java new file mode 100644 index 0000000..190e607 --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java @@ -0,0 +1,25 @@ +package com.amazon.ion.benchmark.schema.constraints; + +import com.amazon.ion.IonValue; + +// This class is used for processing the constraints [codepoint_length | byte_length | precision | scale | container_length]. +// These constraints have two formats of value [ | >]. +public abstract class QuantifiableConstraints extends ReparsedConstraint { + Range range; + + /** + * Initializing the newly created QuantifiableConstraint object. + * @param value represents one of [codepoint_length | byte_length | precision | scale | container_length] field value. + */ + public QuantifiableConstraints(IonValue value) { + range = Range.of(value); + } + + /** + * Getting the range value if the constraint value contains annotation 'range'. + * @return object Range which represents constraint value. + */ + public Range getRange() { + return range; + } +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Range.java b/src/com/amazon/ion/benchmark/schema/constraints/Range.java new file mode 100644 index 0000000..e94fa47 --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/Range.java @@ -0,0 +1,83 @@ +package com.amazon.ion.benchmark.schema.constraints; + +import com.amazon.ion.IonList; +import com.amazon.ion.IonSequence; +import com.amazon.ion.IonTimestamp; +import com.amazon.ion.IonType; +import com.amazon.ion.IonValue; + +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Random; + +// Processing the constraint value which contains 'range' annotation. +public class Range { + private static final String KEYWORD_RANGE = "range"; + IonSequence sequence; + + /** + * Initializing the newly created Range object. + * @param sequence represents the range value in IonSequence format. + */ + public Range(IonSequence sequence) { + this.sequence = sequence; + } + + /** + * Getting the lower bound value from range. + * @param klass represent the Class object of different data types. + * @param represents different ion data types. + * @return parameterized type data which extends IonValue. + */ + public T lowerBound(Class klass) { + return klass.cast(sequence.get(0)); + } + + /** + * Getting the upper bound value from range. + * @param klass represent the Class object of different data types. + * @param represents different ion data types. + * @return parameterized type data which extends IonValue. + */ + public T upperBound(Class klass) { + return klass.cast(sequence.get(1)); + } + + /** + * Parsing the provided IonValue into Range. + * @param value represents the value of provided constraint. + * @return an object of Range. + */ + public static Range of(IonValue value) { + IonSequence sequence; + if (!(value instanceof IonList)) { + sequence = value.getSystem().newList(value.clone(), value.clone()); + sequence.addTypeAnnotation(KEYWORD_RANGE); + } else { + sequence = (IonSequence) value; + } + return new Range(sequence); + } + + /** + * Checking whether the value contains annotation 'range'. + * @param value represents the constraint value. + * @return the result in the boolean format. + */ + public static boolean isRange(IonValue value) { + return Arrays.stream(value.getTypeAnnotations()).anyMatch(KEYWORD_RANGE::equals); + } + + /** + * Getting a random quantifiable value within the range. This method will be used when the range value is in '>' format. + * @return a BigDecimal which is within the provided range. This value would be cast into different data types as needed. + */ + public BigDecimal getRandomQuantifiableValueFromRange() { + Random random = new Random(); + IonValue lowerBound = sequence.get(0); + IonValue upperBound = sequence.get(1); + BigDecimal lowerBoundBigDecimal = lowerBound.getType().equals(IonType.TIMESTAMP) ? ((IonTimestamp)lowerBound).getDecimalMillis() : new BigDecimal(lowerBound.toString()); + BigDecimal upperBoundBigDecimal = upperBound.getType().equals(IonType.TIMESTAMP) ? ((IonTimestamp)upperBound).getDecimalMillis() : new BigDecimal(upperBound.toString()); + return lowerBoundBigDecimal.add(new BigDecimal(random.nextDouble()).multiply(upperBoundBigDecimal.subtract(lowerBoundBigDecimal))); + } +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Regex.java b/src/com/amazon/ion/benchmark/schema/constraints/Regex.java new file mode 100644 index 0000000..9935b60 --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/Regex.java @@ -0,0 +1,32 @@ +package com.amazon.ion.benchmark.schema.constraints; + +import com.amazon.ion.IonValue; + +public class Regex extends ReparsedConstraint{ + String pattern; + + /** + * Initializing the newly created object. + * @param pattern represents the value of constraint 'regex'. + */ + public Regex(IonValue pattern) { + this.pattern = pattern.toString().replace("\"",""); + } + + /** + * Getting the 'regex' value. + * @return a String to represent the value of 'regex'. + */ + public String getPattern() { + return this.pattern; + } + + /** + * Parsing constraint field into Regex. + * @param value represents the value of constraint 'regex'. + * @return newly created Scale object. + */ + public static Regex of(IonValue value) { + return new Regex(value); + } +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java b/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java new file mode 100644 index 0000000..ed8a37a --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java @@ -0,0 +1,5 @@ +package com.amazon.ion.benchmark.schema.constraints; + +public abstract class ReparsedConstraint { + +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Scale.java b/src/com/amazon/ion/benchmark/schema/constraints/Scale.java new file mode 100644 index 0000000..917d2da --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/Scale.java @@ -0,0 +1,23 @@ +package com.amazon.ion.benchmark.schema.constraints; + +import com.amazon.ion.IonValue; + +public class Scale extends QuantifiableConstraints{ + + /** + * Initializing the Scale object. + * @param value represents constraint field 'scale'. + */ + public Scale(IonValue value) { + super(value); + } + + /** + * Parsing constraint field into Scale. + * @param field represents the value of constraint 'scale'. + * @return the newly created Scale object. + */ + public static Scale of(IonValue field) { + return new Scale(field); + } +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java b/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java new file mode 100644 index 0000000..525e419 --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java @@ -0,0 +1,43 @@ +package com.amazon.ion.benchmark.schema.constraints; + +import com.amazon.ion.*; +import com.amazon.ion.benchmark.IonSchemaUtilities; + +import java.util.Random; + +public class TimestampPrecision extends QuantifiableConstraints{ + + /** + * Initializing the newly created TimestampPrecision object. + * @param value represent the value of constraint 'timestamp_precision'. + */ + public TimestampPrecision(IonValue value) { + super(value); + } + + /** + * Parsing the constraint 'timestamp_precision' into TimestampPrecision. + * @param field represent the value of constraint 'timestamp_precision'. + * @return the object of TimestampPrecision. + */ + public static TimestampPrecision of(IonValue field) { + return new TimestampPrecision(field); + } + + /** + * Getting the timestamp precision randomly from the provided timestamp precision range. + * @param range represents the range of timestamp precision. + * @return randomly generated Timestamp.Precision. + */ + public static Timestamp.Precision getRandomTimestampPrecision(Range range) { + Random random = new Random(); + IonSequence constraintSequence = range.sequence; + Timestamp.Precision[] precisions = Timestamp.Precision.values(); + String lowerBound = constraintSequence.get(0).toString(); + String upperBound = constraintSequence.get(1).toString(); + int lowerBoundOrdinal = lowerBound.equals(IonSchemaUtilities.KEYWORD_MIN) ? 0 : Timestamp.Precision.valueOf(lowerBound.toUpperCase()).ordinal(); + int upperBoundOrdinal = upperBound.equals(IonSchemaUtilities.KEYWORD_MAX) ? precisions.length : Timestamp.Precision.valueOf(upperBound.toUpperCase()).ordinal(); + int randomIndex = random.nextInt(upperBoundOrdinal - lowerBoundOrdinal + 1) + lowerBoundOrdinal; + return precisions[randomIndex]; + } +} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ValidValues.java b/src/com/amazon/ion/benchmark/schema/constraints/ValidValues.java new file mode 100644 index 0000000..7b2fc7b --- /dev/null +++ b/src/com/amazon/ion/benchmark/schema/constraints/ValidValues.java @@ -0,0 +1,60 @@ +package com.amazon.ion.benchmark.schema.constraints; + +import com.amazon.ion.IonList; +import com.amazon.ion.IonValue; + +// This class is used for parsing constraint 'valid_values' and providing the utilities of processing the value of constraint. +// valid_values: [ ... ] +// valid_values: > +// valid_values: > +public class ValidValues extends ReparsedConstraint { + // TODO: Handling min and max value + final private IonList validValues; + final private Range range; + final private boolean isRange; + + /** + * Initializing the newly created ValidValues object. + * @param validValues represents the value of constraint 'valid_values'. + * @param isRange is a boolean value to represent the format of 'valid_values'. + */ + public ValidValues(IonList validValues, boolean isRange) { + this.validValues = isRange ? null : validValues; + this.range = isRange ? Range.of(validValues) : null; + this.isRange = isRange; + } + + /** + * Getting the value of constraint 'valid_values' in IonList format. + * @return an IonList which represents the value of constraint 'valid_values'. + */ + public IonList getValidValues() { + return validValues; + } + + /** + * Checking whether constraint 'valid_values' contains range. + * @return a boolean value to represent whether 'valid_values' contains range. + */ + public boolean isRange() { + return isRange; + } + + /** + * Getting the range value of constraint 'valid_values' if its format is one of [> | >] + * @return a Range object. + */ + public Range getRange() { + return range; + } + + /** + * Parsing constraint field into ValidValues format. + * @param field represents constraint field 'valid_values'. + * @return the newly created ValidValues object. + */ + public static ValidValues of(IonValue field) { + boolean isRange = Range.isRange(field); + return new ValidValues((IonList) field, isRange); + } +} From 38334bfdc548426495b5e5d94297cd4c16336081 Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Tue, 24 May 2022 23:31:33 -0700 Subject: [PATCH 02/15] Changes the way of processing ion schema file. --- .../ion/benchmark/GeneratorOptions.java | 7 +-- .../ion/benchmark/IonSchemaUtilities.java | 13 ++++-- .../ion/benchmark/ReadGeneralConstraints.java | 46 +++++++------------ 3 files changed, 30 insertions(+), 36 deletions(-) diff --git a/src/com/amazon/ion/benchmark/GeneratorOptions.java b/src/com/amazon/ion/benchmark/GeneratorOptions.java index decb18b..4a291ac 100644 --- a/src/com/amazon/ion/benchmark/GeneratorOptions.java +++ b/src/com/amazon/ion/benchmark/GeneratorOptions.java @@ -1,5 +1,6 @@ package com.amazon.ion.benchmark; +import com.amazon.ionschema.Schema; import java.util.List; import java.util.Map; @@ -19,8 +20,8 @@ public static void executeGenerator(Map optionsMap) throws Excep String format = ((List) optionsMap.get("--format")).get(0); String path = optionsMap.get("").toString(); String inputFilePath = optionsMap.get("--input-ion-schema").toString(); - // Check whether the input schema file is valid. - IonSchemaUtilities.checkValidationOfSchema(inputFilePath); - ReadGeneralConstraints.readIonSchemaAndGenerate(size, inputFilePath, format, path); + // Check whether the input schema file is valid and get the loaded schema. + Schema schema = IonSchemaUtilities.checkValidationOfSchema(inputFilePath); + ReadGeneralConstraints.readIonSchemaAndGenerate(size, schema, format, path); } } diff --git a/src/com/amazon/ion/benchmark/IonSchemaUtilities.java b/src/com/amazon/ion/benchmark/IonSchemaUtilities.java index bd41825..4cdd185 100644 --- a/src/com/amazon/ion/benchmark/IonSchemaUtilities.java +++ b/src/com/amazon/ion/benchmark/IonSchemaUtilities.java @@ -15,6 +15,7 @@ import com.amazon.ionschema.InvalidSchemaException; import com.amazon.ionschema.IonSchemaSystem; import com.amazon.ionschema.IonSchemaSystemBuilder; +import com.amazon.ionschema.Schema; import java.io.IOException; import java.util.Arrays; @@ -54,16 +55,20 @@ public class IonSchemaUtilities { /** * Check the validation of input ion schema file and will throw InvalidSchemaException message when an invalid schema definition is encountered. * @param inputFile represents the file path of the ion schema file. - * @throws Exception if an error occur when creating FileInputStream. + * @return schema loaded from input ISL file. */ - public static void checkValidationOfSchema(String inputFile) throws Exception { + public static Schema checkValidationOfSchema(String inputFile) { + // Build ion schema system from input ISL file. IonSchemaSystem ISS = buildIonSchemaSystem(inputFile); + // Get the name of ISL file as schema ID. String schemaID = inputFile.substring(inputFile.lastIndexOf('/') + 1); + // If the input ISL file is not validated by ion schema kotlin, it will throw an error. + // If the input ISL file is valid, the loaded schema will be returned. try { - ISS.loadSchema(schemaID); + return ISS.loadSchema(schemaID); } catch (InvalidSchemaException e) { System.out.println(e.getMessage()); - throw new Exception("The provided ion schema file is not valid"); + throw new IllegalStateException("The provided ion schema file is not valid"); } } diff --git a/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java b/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java index 6df9011..175fd96 100644 --- a/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java +++ b/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java @@ -1,52 +1,40 @@ package com.amazon.ion.benchmark; -import com.amazon.ion.IonDatagram; import com.amazon.ion.IonLoader; -import com.amazon.ion.IonReader; -import com.amazon.ion.IonStruct; import com.amazon.ion.IonSystem; -import com.amazon.ion.IonType; -import com.amazon.ion.IonValue; import com.amazon.ion.IonWriter; -import com.amazon.ion.system.IonReaderBuilder; +import com.amazon.ion.benchmark.schema.ReparsedType; import com.amazon.ion.system.IonSystemBuilder; +import com.amazon.ionschema.Schema; +import com.amazon.ionschema.Type; -import java.io.BufferedInputStream; import java.io.File; -import java.io.FileInputStream; /** - * Parse Ion Schema file and get the general constraints in the file then pass the constraints to the Ion data generator. + * Parse Ion Schema file and extract the type definition as ReparsedType object then pass the re-parsed type definition to the Ion data generator. */ public class ReadGeneralConstraints { public static final IonSystem SYSTEM = IonSystemBuilder.standard().build(); public static final IonLoader LOADER = SYSTEM.newLoader(); /** - * Get general constraints of Ion Schema and call the relevant generator method based on the type. + * Parsing schema type definition to ReparsedType and passing the re-parsed value to data generating process. * @param size is the size of the output file. - * @param path is the path of the Ion Schema file. + * @param schema an Ion Schema loaded by ion-schema-kotlin. * @param format is the format of the generated file, select from set (ion_text | ion_binary). * @param outputFile is the path of the generated file. - * @throws Exception if errors occur when reading and writing data. + * @throws Exception if errors occur when writing data. */ - public static void readIonSchemaAndGenerate(int size, String path, String format, String outputFile) throws Exception { - try (IonReader reader = IonReaderBuilder.standard().build(new BufferedInputStream(new FileInputStream(path)))) { - IonDatagram schema = LOADER.load(reader); - for (int i = 0; i < schema.size(); i++) { - IonValue schemaValue = schema.get(i); - // Assume there's only one constraint between schema_header and schema_footer, if more constraints added, here is the point where developers should start. - if (schemaValue.getType().equals(IonType.STRUCT) && schemaValue.getTypeAnnotations()[0].equals(IonSchemaUtilities.KEYWORD_TYPE)) { - IonStruct constraintStruct = (IonStruct) schemaValue; - //Construct the writer and pass the constraints to the following writing data to files process. - File file = new File(outputFile); - try (IonWriter writer = WriteRandomIonValues.formatWriter(format, file)) { - WriteRandomIonValues.writeRequestedSizeFile(size, writer, file, constraintStruct); - } - // Print the successfully generated data notification which includes the file path information. - WriteRandomIonValues.printInfo(outputFile); - } - } + public static void readIonSchemaAndGenerate(int size, Schema schema, String format, String outputFile) throws Exception { + // Assume there's only one constraint between schema_header and schema_footer. + // If more constraints added, here is the point where developers should start. + Type schemaType = schema.getTypes().next(); + ReparsedType parsedTypeDefinition = new ReparsedType(schemaType); + File file = new File(outputFile); + try (IonWriter writer = WriteRandomIonValues.formatWriter(format, file)) { + WriteRandomIonValues.writeRequestedSizeFile(size, writer, file, parsedTypeDefinition); } + // Print the successfully generated data notification which includes the file path information. + WriteRandomIonValues.printInfo(outputFile); } } From 1d1ff41b10da9e44ff6f03e5b9f6f4bda935804a Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Wed, 25 May 2022 12:18:34 -0700 Subject: [PATCH 03/15] Updates float generating process. --- .../ion/benchmark/WriteRandomIonValues.java | 87 +++++++++++-------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java index 0e2d624..5c1886f 100644 --- a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java +++ b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java @@ -23,6 +23,9 @@ import com.amazon.ion.IonValue; import com.amazon.ion.IonWriter; import com.amazon.ion.Timestamp; +import com.amazon.ion.benchmark.schema.ReparsedType; +import com.amazon.ion.benchmark.schema.constraints.ReparsedConstraint; +import com.amazon.ion.benchmark.schema.constraints.ValidValues; import com.amazon.ion.system.IonBinaryWriterBuilder; import com.amazon.ion.system.IonReaderBuilder; import com.amazon.ion.system.IonSystemBuilder; @@ -40,7 +43,9 @@ import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; @@ -271,22 +276,22 @@ public static void writeRandomSymbolValues(int size, String format, String path) * @param size specifies the size in bytes of the generated file. * @param writer writer is IonWriter. * @param file the generated file which contains specified Ion data. - * @param constraintStruct is an IonStruct which contains the top-level constraints in Ion Schema. - * @throws IOException if an error occur when writing generated data. + * @param parsedTypeDefinition is parsed from ion schema file as IonStruct format, it contains the top-level constraints. + * @throws Exception if an error occur when writing generated data. */ - public static void writeRequestedSizeFile(int size, IonWriter writer, File file, IonStruct constraintStruct) throws Exception { + public static void writeRequestedSizeFile(int size, IonWriter writer, File file, ReparsedType parsedTypeDefinition) throws Exception { int currentSize = 0; int count = 0; // Determine how many values should be written before the writer.flush(), and this process aims to reduce the execution time of writer.flush(). while (currentSize <= 0.05 * size) { - WriteRandomIonValues.writeDataToFile(writer, constraintStruct); + WriteRandomIonValues.writeDataToFile(writer, parsedTypeDefinition); count += 1; writer.flush(); currentSize = (int) file.length(); } while (currentSize <= size) { for (int i = 0; i < count; i++) { - WriteRandomIonValues.writeDataToFile(writer, constraintStruct); + WriteRandomIonValues.writeDataToFile(writer, parsedTypeDefinition); } writer.flush(); currentSize = (int) file.length(); @@ -294,27 +299,30 @@ public static void writeRequestedSizeFile(int size, IonWriter writer, File file, } /** - * This method will be reused by different data generator - * @param writer writer is IonWriter. - * @param constraintStruct is an IonStruct which contains the top-level constraints in Ion Schema. - * @throws IOException if an error occur during the data writing process. + * Generating data which is conformed with provided constraints and writing it to the output file. + * @param writer is IonWriter. + * @param parsedTypeDefinition is parsed from ion schema file as IonStruct format, it contains the top-level constraints. + * @throws Exception if an error occur during the data writing process. */ - private static void writeDataToFile(IonWriter writer, IonStruct constraintStruct) throws Exception { - IonType type = IonType.valueOf(constraintStruct.get(IonSchemaUtilities.KEYWORD_TYPE).toString().toUpperCase()); - IonValue value = null; - // Check whether the 'valid_values' constraints provided in the top level constraint struct. - // If a list of 'valid_values' provided, the generated value should be selected randomly from the provided 'valid_values' list every iteration. - // Constraint 'valid_values' has three formats. >, > and [ ... ]. This step only check the format [ ... ]. - // If the annotation 'range' has been detected, it will be processed in the constructing data steps. - if (constraintStruct != null) { - value = IonSchemaUtilities.parseValidValues(constraintStruct); - } - if (value != null && IonSchemaUtilities.getConstraintValueAsRange(constraintStruct, IonSchemaUtilities.KEYWORD_VALID_VALUES) == null) { - value.writeTo(writer); + private static void writeDataToFile(IonWriter writer, ReparsedType parsedTypeDefinition) throws Exception { + // The first step is to check whether parsedTypeDefinition contains 'valid_values'. The reason we prioritize checking + // 'valid_values' is that the constraint 'type' might not be contained in the type definition, in that case we cannot trigger + // the following data constructing process. + // Assume if 'valid_values' provided in ISL file, constraint 'type' is optional, else constraint 'type' is required. + Map constraintMap = parsedTypeDefinition.getConstraintMap(); + Map constraintMapClone = new HashMap<>(); + constraintMapClone.putAll(constraintMap); + ValidValues validValues = (ValidValues) constraintMap.get("valid_values"); + if (validValues != null && !validValues.isRange()) { + IonValue validValue = getRandomValueFromList(validValues.getValidValues()); + validValue.writeTo(writer); + } else if (parsedTypeDefinition.getIonType() == null) { + throw new IllegalStateException("Constraint 'type' is required."); } else { + IonType type = parsedTypeDefinition.getIonType(); switch (type) { case FLOAT: - writer.writeFloat(WriteRandomIonValues.constructFloat(constraintStruct)); + writer.writeFloat(WriteRandomIonValues.constructFloat(constraintMapClone)); break; case SYMBOL: writer.writeSymbol(WriteRandomIonValues.constructString(constraintStruct)); @@ -349,6 +357,17 @@ private static void writeDataToFile(IonWriter writer, IonStruct constraintStruct } } + /** + * Get a random IonValue from IonList. + * @param values represents IonList. + * @return the randomly chosen IonValue. + */ + public static IonValue getRandomValueFromList(IonList values) { + Random random = new Random(); + int randomIndex = random.nextInt(values.size()); + return values.get(randomIndex); + } + /** * Construct string which is conformed with the constraints provided in ISL. * @param constraintStruct is an IonStruct which contains the top-level constraints in Ion Schema. @@ -410,22 +429,22 @@ private static String constructStringFromCodepointLength(int codePointsLengthBou /** * Construct the float which is conformed with the constraints provided in ISL. - * @param constraintStruct is an IonStruct which contains the top-level constraints in Ion Schema. + * @param constraintMapClone collects the constraints from ISL file, the key represents the name of constraints, + * and the value is constraint value in ReparsedConstraint format. * @return the constructed double value. - * @throws Exception if error occurs when getting the constraints value. */ - public static double constructFloat(IonStruct constraintStruct) throws Exception { - double randomDouble; - IonList range = IonSchemaUtilities.getConstraintValueAsRange(constraintStruct, IonSchemaUtilities.KEYWORD_VALID_VALUES); - if (range != null) { - // Extract the value of 'valid_values:range:: [lowerBound, upperBound]' and convert IonValue to double. - double lowerBound = Double.valueOf(range.get(0).toString()); - double upperBound = Double.valueOf(range.get(1).toString()); - randomDouble = ThreadLocalRandom.current().nextDouble(lowerBound, upperBound); + public static Double constructFloat(Map constraintMapClone) { + // In the process of generating IonFloat, there is no type-specified constraints. For this step we + // only consider the general constraint 'valid_values'. + ValidValues validValues = (ValidValues) constraintMapClone.remove("valid_values"); + if (!constraintMapClone.isEmpty()) { + throw new IllegalStateException ("Found unhandled constraints : " + constraintMapClone.values()); + } + if (validValues != null) { + return validValues.getRange().getRandomQuantifiableValueFromRange().doubleValue(); } else { - randomDouble = ThreadLocalRandom.current().nextDouble(); + return ThreadLocalRandom.current().nextDouble(); } - return randomDouble; } /** From de35e99dabe5dfcf2c507a337c86fa330be2f2a9 Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Wed, 25 May 2022 12:36:18 -0700 Subject: [PATCH 04/15] Updates symbol and string generating process. --- .../ion/benchmark/WriteRandomIonValues.java | 64 +++++++++++-------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java index 5c1886f..384d147 100644 --- a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java +++ b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java @@ -24,6 +24,8 @@ import com.amazon.ion.IonWriter; import com.amazon.ion.Timestamp; import com.amazon.ion.benchmark.schema.ReparsedType; +import com.amazon.ion.benchmark.schema.constraints.CodepointLength; +import com.amazon.ion.benchmark.schema.constraints.Regex; import com.amazon.ion.benchmark.schema.constraints.ReparsedConstraint; import com.amazon.ion.benchmark.schema.constraints.ValidValues; import com.amazon.ion.system.IonBinaryWriterBuilder; @@ -43,10 +45,13 @@ import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.Set; import java.util.concurrent.ThreadLocalRandom; /** @@ -61,6 +66,7 @@ class WriteRandomIonValues { final static private int DEFAULT_PRECISION = 20; final static private int DEFAULT_SCALE_LOWER_BOUND = -20; final static private int DEFAULT_SCALE_UPPER_BOUND = 20; + final static private Set VALID_STRING_SYMBOL_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("regex", "codepoint_length"))); /** * Build up the writer based on the provided format (ion_text|ion_binary) @@ -325,13 +331,13 @@ private static void writeDataToFile(IonWriter writer, ReparsedType parsedTypeDef writer.writeFloat(WriteRandomIonValues.constructFloat(constraintMapClone)); break; case SYMBOL: - writer.writeSymbol(WriteRandomIonValues.constructString(constraintStruct)); + writer.writeSymbol(WriteRandomIonValues.constructString(constraintMapClone)); break; case INT: writer.writeInt(WriteRandomIonValues.constructInt(constraintStruct)); break; case STRING: - writer.writeString(WriteRandomIonValues.constructString(constraintStruct)); + writer.writeString(WriteRandomIonValues.constructString(constraintMapClone)); break; case DECIMAL: writer.writeDecimal(WriteRandomIonValues.constructDecimal(constraintStruct)); @@ -339,12 +345,13 @@ private static void writeDataToFile(IonWriter writer, ReparsedType parsedTypeDef case TIMESTAMP: writer.writeTimestamp(WriteRandomIonValues.constructTimestamp(constraintStruct)); break; - case STRUCT: - WriteRandomIonValues.constructAndWriteIonStruct(constraintStruct, writer); - break; - case LIST: - WriteRandomIonValues.constructAndWriteIonList(writer, constraintStruct); - break; +// Temporally comment the struct and list generating process. +// case STRUCT: +// WriteRandomIonValues.constructAndWriteIonStruct(constraintStruct, writer); +// break; +// case LIST: +// WriteRandomIonValues.constructAndWriteIonList(writer, constraintStruct); +// break; case BLOB: writer.writeBlob(WriteRandomIonValues.constructLobs(constraintStruct)); break; @@ -370,31 +377,32 @@ public static IonValue getRandomValueFromList(IonList values) { /** * Construct string which is conformed with the constraints provided in ISL. - * @param constraintStruct is an IonStruct which contains the top-level constraints in Ion Schema. + * @param constraintMapClone collects the constraints from ISL file, the key represents the name of constraints, + * and the value is constraint value in ReparsedConstraint format. * @return constructed string. - * @throws Exception if error occurs when parsing the constraints. */ - public static String constructString(IonStruct constraintStruct) throws Exception { + public static String constructString(Map constraintMapClone) { Random random = new Random(); - String constructedString; - String regexPattern = IonSchemaUtilities.parseTextConstraints(constraintStruct, IonSchemaUtilities.KEYWORD_REGEX); - Integer codePointsLengthBound = IonSchemaUtilities.parseConstraints(constraintStruct, IonSchemaUtilities.KEYWORD_CODE_POINT_LENGTH); - // For now, if there are potentially-conflicting constraints detected, an exception statement will be thrown. - // For more information: https://github.com/amzn/ion-java-benchmark-cli/issues/33 - if (regexPattern != null && codePointsLengthBound != null) { - throw new IllegalStateException("This constraints combination can not be processed in Ion Data Generator."); - } else if (regexPattern == null && codePointsLengthBound != null) { - // Construct string with the specified Unicode codepoints length. - constructedString = constructStringFromCodepointLength(codePointsLengthBound); - } else if (regexPattern != null && codePointsLengthBound == null) { - RgxGen rgxGen = new RgxGen(regexPattern); - constructedString = rgxGen.generate(); + Regex regex = (Regex) constraintMapClone.remove("regex"); + CodepointLength codepoint_length = (CodepointLength) constraintMapClone.remove("codepoint_length"); + + if (!constraintMapClone.isEmpty()) { + throw new IllegalStateException ("Found unhandled constraints : " + constraintMapClone.values()); + } + if (regex != null && codepoint_length != null) { + throw new IllegalStateException ("Can only handle one of : " + VALID_STRING_SYMBOL_CONSTRAINTS); + } else if (regex != null) { + String pattern = regex.getPattern(); + RgxGen rgxGen = new RgxGen(pattern); + return rgxGen.generate(); + } else if (codepoint_length != null) { + int length = codepoint_length.getRange().getRandomQuantifiableValueFromRange().intValue(); + return constructStringFromCodepointLength(length); } else { - // Preset the Unicode codepoints length as average number 20; - codePointsLengthBound = random.nextInt(20); - constructedString = constructStringFromCodepointLength(codePointsLengthBound); + // If there is no constraints provided, a randomly constructed string with + // preset Unicode codepoints length will be generated. + return constructStringFromCodepointLength(random.nextInt(20)); } - return constructedString; } /** From 3aeabc8a2430bac205de0f7ff7023168389e7c09 Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Wed, 25 May 2022 12:48:34 -0700 Subject: [PATCH 05/15] Updates blob and clob generating process. --- .../ion/benchmark/WriteRandomIonValues.java | 233 +++++++++--------- 1 file changed, 119 insertions(+), 114 deletions(-) diff --git a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java index 384d147..ae4c860 100644 --- a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java +++ b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java @@ -24,6 +24,7 @@ import com.amazon.ion.IonWriter; import com.amazon.ion.Timestamp; import com.amazon.ion.benchmark.schema.ReparsedType; +import com.amazon.ion.benchmark.schema.constraints.ByteLength; import com.amazon.ion.benchmark.schema.constraints.CodepointLength; import com.amazon.ion.benchmark.schema.constraints.Regex; import com.amazon.ion.benchmark.schema.constraints.ReparsedConstraint; @@ -353,10 +354,10 @@ private static void writeDataToFile(IonWriter writer, ReparsedType parsedTypeDef // WriteRandomIonValues.constructAndWriteIonList(writer, constraintStruct); // break; case BLOB: - writer.writeBlob(WriteRandomIonValues.constructLobs(constraintStruct)); + writer.writeBlob(WriteRandomIonValues.constructLobs(constraintMapClone)); break; case CLOB: - writer.writeClob(WriteRandomIonValues.constructLobs(constraintStruct)); + writer.writeClob(WriteRandomIonValues.constructLobs(constraintMapClone)); break; default: throw new IllegalStateException(type + " is not supported."); @@ -569,14 +570,18 @@ public static Timestamp constructTimestamp(IonStruct constraintStruct) throws Ex /** * Construct clob/blob which is conformed with the constraints provided in ISL. - * @param constraintStruct is an IonStruct which contains the top-level constraints in Ion Schema. - * @throws Exception if an error occurs when parsing constraints. + * @param constraintMapClone collects the constraints from ISL file, the key represents the name of constraints, + * and the value is in ReparsedConstraint format. */ - public static byte[] constructLobs(IonStruct constraintStruct) throws Exception { + public static byte[] constructLobs( Map constraintMapClone) { int byte_length; Random random = new Random(); - if (constraintStruct != null) { - byte_length = IonSchemaUtilities.parseConstraints(constraintStruct, IonSchemaUtilities.KEYWORD_BYTE_LENGTH); + ByteLength byteLength = (ByteLength) constraintMapClone.remove("byte_length"); + if (!constraintMapClone.isEmpty()) { + throw new IllegalStateException ("Found unhandled constraints : " + constraintMapClone.values()); + } + if (byteLength != null) { + byte_length = byteLength.getRange().getRandomQuantifiableValueFromRange().intValue(); } else { byte_length = random.nextInt(512); } @@ -591,46 +596,46 @@ public static byte[] constructLobs(IonStruct constraintStruct) throws Exception * @param writer writes Ion struct data. * @throws IOException if errors occur when writing data. */ - public static void constructAndWriteIonStruct(IonStruct constraintStruct, IonWriter writer) throws Exception { - Random random = new Random(); - IonList annotations = IonSchemaUtilities.getAnnotation(constraintStruct); - IonStruct fields = (IonStruct) constraintStruct.get(IonSchemaUtilities.KEYWORD_FIELDS); - try (IonReader reader = IonReaderBuilder.standard().build(fields)) { - reader.next(); - reader.stepIn(); - for (int i = 0; i < annotations.size(); i++) { - writer.addTypeAnnotation(annotations.get(i).toString()); - } - writer.stepIn(IonType.STRUCT); - while (reader.next() != null) { - String fieldName = reader.getFieldName(); - IonValue struct = SYSTEM.newValue(reader); - IonStruct value = (IonStruct) struct; - // If the value of "occurs" is optional, the integer represents this value is 1 or 0. - int occurTime = IonSchemaUtilities.parseConstraints(value, IonSchemaUtilities.KEYWORD_OCCURS); - if (occurTime == 0) { - continue; - } - writer.setFieldName(fieldName); - IonType type = IonType.valueOf(value.get(IonSchemaUtilities.KEYWORD_TYPE).toString().toUpperCase()); - switch (type) { - // If more types of Ion data are available, the logic should be added below. - case STRING: - writer.writeString(WriteRandomIonValues.constructString(value)); - break; - case TIMESTAMP: - writer.writeTimestamp(WriteRandomIonValues.constructTimestamp(value)); - break; - case LIST: - WriteRandomIonValues.constructAndWriteIonList(writer, value); - break; - default: - throw new IllegalStateException(type + " is not supported when generating Ion Struct based on Ion Schema."); - } - } - writer.stepOut(); - } - } +// public static void constructAndWriteIonStruct(IonStruct constraintStruct, IonWriter writer) throws Exception { +// Random random = new Random(); +// IonList annotations = IonSchemaUtilities.getAnnotation(constraintStruct); +// IonStruct fields = (IonStruct) constraintStruct.get(IonSchemaUtilities.KEYWORD_FIELDS); +// try (IonReader reader = IonReaderBuilder.standard().build(fields)) { +// reader.next(); +// reader.stepIn(); +// for (int i = 0; i < annotations.size(); i++) { +// writer.addTypeAnnotation(annotations.get(i).toString()); +// } +// writer.stepIn(IonType.STRUCT); +// while (reader.next() != null) { +// String fieldName = reader.getFieldName(); +// IonValue struct = SYSTEM.newValue(reader); +// IonStruct value = (IonStruct) struct; +// // If the value of "occurs" is optional, the integer represents this value is 1 or 0. +// int occurTime = IonSchemaUtilities.parseConstraints(value, IonSchemaUtilities.KEYWORD_OCCURS); +// if (occurTime == 0) { +// continue; +// } +// writer.setFieldName(fieldName); +// IonType type = IonType.valueOf(value.get(IonSchemaUtilities.KEYWORD_TYPE).toString().toUpperCase()); +// switch (type) { +// // If more types of Ion data are available, the logic should be added below. +// case STRING: +// writer.writeString(WriteRandomIonValues.constructString(value)); +// break; +// case TIMESTAMP: +// writer.writeTimestamp(WriteRandomIonValues.constructTimestamp(value)); +// break; +// case LIST: +// WriteRandomIonValues.constructAndWriteIonList(writer, value); +// break; +// default: +// throw new IllegalStateException(type + " is not supported when generating Ion Struct based on Ion Schema."); +// } +// } +// writer.stepOut(); +// } +// } /** * Construct Ion List based on the constraints provided by Ion Schema. @@ -638,58 +643,58 @@ public static void constructAndWriteIonStruct(IonStruct constraintStruct, IonWri * @param constraintStruct is an IonStruct which contains the top-level constraints in Ion Schema. * @throws Exception if errors occur when reading or writing data. */ - public static void constructAndWriteIonList(IonWriter writer, IonStruct constraintStruct) throws Exception { - // When there's only one required element in Ion List and the length of generated Ion List is not specified, we set the default length as a integer smaller than 20. - Integer containerLength = IonSchemaUtilities.parseConstraints(constraintStruct, IonSchemaUtilities.KEYWORD_CONTAINER_LENGTH); - IonList annotations = IonSchemaUtilities.getAnnotation(constraintStruct); - int occurrences; - try (IonReader reader = IonReaderBuilder.standard().build(constraintStruct)) { - reader.next(); - reader.stepIn(); - while (reader.next() != null) { - if (annotations != null) { - for (int i = 0; i < annotations.size(); i++) { - writer.addTypeAnnotation(annotations.get(i).toString()); - } - } - writer.stepIn(IonType.LIST); - // If constraint name is 'element', only one type of Ion Data is specified. - if (constraintStruct.get(IonSchemaUtilities.KEYWORD_ELEMENT) != null && containerLength != null) { - IonType type = IonType.valueOf(constraintStruct.get(IonSchemaUtilities.KEYWORD_ELEMENT).toString().toUpperCase()); - for (int i = 0; i < containerLength; i++) { - occurrences = 1; - WriteRandomIonValues.constructScalarTypeData(type, writer, occurrences, constraintStruct); - } - break; - } else if (constraintStruct.get(IonSchemaUtilities.KEYWORD_ORDERED_ELEMENTS) != null) { - IonList orderedElement = (IonList) constraintStruct.get(IonSchemaUtilities.KEYWORD_ORDERED_ELEMENTS); - for (int index = 0; index < orderedElement.size(); index++) { - IonType elementType = orderedElement.get(index).getType(); - IonType valueType; - switch (elementType) { - case SYMBOL: - occurrences = 1; - valueType = IonType.valueOf(orderedElement.get(index).toString().toUpperCase()); - WriteRandomIonValues.constructScalarTypeData(valueType, writer, occurrences, NO_CONSTRAINT_STRUCT); - break; - case STRUCT: - IonStruct constraintsStruct = (IonStruct) orderedElement.get(index); - occurrences = IonSchemaUtilities.parseConstraints(constraintsStruct, IonSchemaUtilities.KEYWORD_OCCURS); - if(occurrences == 0) { - break; - } - valueType = IonType.valueOf(constraintsStruct.get(IonSchemaUtilities.KEYWORD_TYPE).toString().toUpperCase()); - WriteRandomIonValues.constructScalarTypeData(valueType, writer, occurrences, constraintsStruct); - break; - } - } - writer.stepOut(); - return; - } - } - writer.stepOut(); - } - } +// public static void constructAndWriteIonList(IonWriter writer, IonStruct constraintStruct) throws Exception { +// // When there's only one required element in Ion List and the length of generated Ion List is not specified, we set the default length as a integer smaller than 20. +// Integer containerLength = IonSchemaUtilities.parseConstraints(constraintStruct, IonSchemaUtilities.KEYWORD_CONTAINER_LENGTH); +// IonList annotations = IonSchemaUtilities.getAnnotation(constraintStruct); +// int occurrences; +// try (IonReader reader = IonReaderBuilder.standard().build(constraintStruct)) { +// reader.next(); +// reader.stepIn(); +// while (reader.next() != null) { +// if (annotations != null) { +// for (int i = 0; i < annotations.size(); i++) { +// writer.addTypeAnnotation(annotations.get(i).toString()); +// } +// } +// writer.stepIn(IonType.LIST); +// // If constraint name is 'element', only one type of Ion Data is specified. +// if (constraintStruct.get(IonSchemaUtilities.KEYWORD_ELEMENT) != null && containerLength != null) { +// IonType type = IonType.valueOf(constraintStruct.get(IonSchemaUtilities.KEYWORD_ELEMENT).toString().toUpperCase()); +// for (int i = 0; i < containerLength; i++) { +// occurrences = 1; +// WriteRandomIonValues.constructScalarTypeData(type, writer, occurrences, constraintStruct); +// } +// break; +// } else if (constraintStruct.get(IonSchemaUtilities.KEYWORD_ORDERED_ELEMENTS) != null) { +// IonList orderedElement = (IonList) constraintStruct.get(IonSchemaUtilities.KEYWORD_ORDERED_ELEMENTS); +// for (int index = 0; index < orderedElement.size(); index++) { +// IonType elementType = orderedElement.get(index).getType(); +// IonType valueType; +// switch (elementType) { +// case SYMBOL: +// occurrences = 1; +// valueType = IonType.valueOf(orderedElement.get(index).toString().toUpperCase()); +// WriteRandomIonValues.constructScalarTypeData(valueType, writer, occurrences, NO_CONSTRAINT_STRUCT); +// break; +// case STRUCT: +// IonStruct constraintsStruct = (IonStruct) orderedElement.get(index); +// occurrences = IonSchemaUtilities.parseConstraints(constraintsStruct, IonSchemaUtilities.KEYWORD_OCCURS); +// if(occurrences == 0) { +// break; +// } +// valueType = IonType.valueOf(constraintsStruct.get(IonSchemaUtilities.KEYWORD_TYPE).toString().toUpperCase()); +// WriteRandomIonValues.constructScalarTypeData(valueType, writer, occurrences, constraintsStruct); +// break; +// } +// } +// writer.stepOut(); +// return; +// } +// } +// writer.stepOut(); +// } +// } /** * Construct scalar type Ion data based on the occurrence time. This method is mainly reused during the process of generating Ion List which will specify the occurrence time. @@ -698,19 +703,19 @@ public static void constructAndWriteIonList(IonWriter writer, IonStruct constrai * @param occurTime is the occurrence time of the element in Ion List. * @throws IOException if errors occur when writing data. */ - public static void constructScalarTypeData(IonType valueType, IonWriter writer, int occurTime, IonStruct constraintStruct) throws Exception { - for (int i = 0; i < occurTime; i++) { - switch (valueType) { - // If more scalar types of Ion data are supported, this is the point to add more cases. - case STRING: - writer.writeString(WriteRandomIonValues.constructString(constraintStruct)); - break; - case INT: - writer.writeInt(WriteRandomIonValues.constructInt(constraintStruct)); - break; - default: - throw new IllegalStateException(valueType + " is not supported when generating Ion List based on Ion Schema."); - } - } - } +// public static void constructScalarTypeData(IonType valueType, IonWriter writer, int occurTime, IonStruct constraintStruct) throws Exception { +// for (int i = 0; i < occurTime; i++) { +// switch (valueType) { +// // If more scalar types of Ion data are supported, this is the point to add more cases. +// case STRING: +// writer.writeString(WriteRandomIonValues.constructString(constraintStruct)); +// break; +// case INT: +// writer.writeInt(WriteRandomIonValues.constructInt(constraintStruct)); +// break; +// default: +// throw new IllegalStateException(valueType + " is not supported when generating Ion List based on Ion Schema."); +// } +// } +// } } From 1eb60ff56286a4927eadeee94419493fe706f6e4 Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Wed, 25 May 2022 13:00:09 -0700 Subject: [PATCH 06/15] Updates int generating process. --- .../ion/benchmark/WriteRandomIonValues.java | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java index ae4c860..a368042 100644 --- a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java +++ b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java @@ -335,7 +335,7 @@ private static void writeDataToFile(IonWriter writer, ReparsedType parsedTypeDef writer.writeSymbol(WriteRandomIonValues.constructString(constraintMapClone)); break; case INT: - writer.writeInt(WriteRandomIonValues.constructInt(constraintStruct)); + writer.writeInt(WriteRandomIonValues.constructInt(constraintMapClone)); break; case STRING: writer.writeString(WriteRandomIonValues.constructString(constraintMapClone)); @@ -486,22 +486,24 @@ public static BigDecimal constructDecimal(IonStruct constraintStruct) throws Exc /** * Generate random integers which is conformed with the constraints provided in ISL. - * @param constraintStruct is an IonStruct which contains the top-level constraints in Ion Schema. - * @return constructed integers - * @throws Exception if error occurs when parsing the constraint 'valid_values'. + * @param constraintMapClone collects the constraints from ISL file, the key represents the name of constraints, + * and the value is in ReparsedConstraint format. + * @return constructed int. */ - public static long constructInt(IonStruct constraintStruct) throws Exception { - long longValue; - IonList range = IonSchemaUtilities.getConstraintValueAsRange(constraintStruct, IonSchemaUtilities.KEYWORD_VALID_VALUES); - if (range != null) { - // Convert IonValue to long - long lowerBound = Long.valueOf(range.get(0).toString()); - long upperBound = Long.valueOf(range.get(1).toString()); - longValue = ThreadLocalRandom.current().nextLong(lowerBound, upperBound); + public static long constructInt(Map constraintMapClone) { + // In the process of generating IonInt, there is no type-specified constraints. For this step we + // only consider the general constraints 'valid_values'. + ValidValues validValues = (ValidValues) constraintMapClone.remove("valid_values"); + if (!constraintMapClone.isEmpty()) { + throw new IllegalStateException ("Found unhandled constraints : " + constraintMapClone.values()); + } + if (validValues != null) { + // The generated data is conformed with the provided 'valid_values' range. + return validValues.getRange().getRandomQuantifiableValueFromRange().longValue(); } else { - longValue = ThreadLocalRandom.current().nextLong(); + // If there is no constraint provided, the generator will construct a random value. + return ThreadLocalRandom.current().nextLong(); } - return longValue; } /** From 5e0619cb06eabf313896a4f60fbffe24892a069c Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Wed, 25 May 2022 14:47:54 -0700 Subject: [PATCH 07/15] Updates decimal generating process. --- .../ion/benchmark/WriteRandomIonValues.java | 53 ++++++++++++------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java index a368042..95ba3d0 100644 --- a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java +++ b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java @@ -26,8 +26,10 @@ import com.amazon.ion.benchmark.schema.ReparsedType; import com.amazon.ion.benchmark.schema.constraints.ByteLength; import com.amazon.ion.benchmark.schema.constraints.CodepointLength; +import com.amazon.ion.benchmark.schema.constraints.Precision; import com.amazon.ion.benchmark.schema.constraints.Regex; import com.amazon.ion.benchmark.schema.constraints.ReparsedConstraint; +import com.amazon.ion.benchmark.schema.constraints.Scale; import com.amazon.ion.benchmark.schema.constraints.ValidValues; import com.amazon.ion.system.IonBinaryWriterBuilder; import com.amazon.ion.system.IonReaderBuilder; @@ -68,6 +70,7 @@ class WriteRandomIonValues { final static private int DEFAULT_SCALE_LOWER_BOUND = -20; final static private int DEFAULT_SCALE_UPPER_BOUND = 20; final static private Set VALID_STRING_SYMBOL_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("regex", "codepoint_length"))); + final static private Set VALID_DECIMAL_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("precision", "scale"))); /** * Build up the writer based on the provided format (ion_text|ion_binary) @@ -341,7 +344,7 @@ private static void writeDataToFile(IonWriter writer, ReparsedType parsedTypeDef writer.writeString(WriteRandomIonValues.constructString(constraintMapClone)); break; case DECIMAL: - writer.writeDecimal(WriteRandomIonValues.constructDecimal(constraintStruct)); + writer.writeDecimal(WriteRandomIonValues.constructDecimal(constraintMapClone)); break; case TIMESTAMP: writer.writeTimestamp(WriteRandomIonValues.constructTimestamp(constraintStruct)); @@ -458,30 +461,42 @@ public static Double constructFloat(Map constraintMa /** * Construct the decimal which is conformed with the constraints provided in ISL. - * @param constraintStruct is an IonStruct which contains the top-level constraints in Ion Schema. + * @param constraintMapClone collects the constraints from ISL file, the key represents the name of constraints, + * and the value is constraint value in ReparsedConstraint format. * @return the constructed decimal. - * @throws Exception if error occurs when parsing the constraints. */ - public static BigDecimal constructDecimal(IonStruct constraintStruct) throws Exception { + public static BigDecimal constructDecimal(Map constraintMapClone) { Random random = new Random(); - // precision represents the minimum/maximum range indicating the number of digits in the unscaled value of a decimal. The minimum precision must be greater than or equal to 1. - Integer precision = IonSchemaUtilities.parseConstraints(constraintStruct, IonSchemaUtilities.KEYWORD_PRECISION); - if (precision == null) { - precision = random.nextInt(DEFAULT_PRECISION); - } - // scale represents the minimum/maximum range indicating the number of digits to the right of the decimal point. The minimum scale must be greater than or equal to 0. - Integer scale = IonSchemaUtilities.parseConstraints(constraintStruct, IonSchemaUtilities.KEYWORD_SCALE); - if (scale == null) { - scale = random.nextInt(DEFAULT_SCALE_UPPER_BOUND - DEFAULT_SCALE_LOWER_BOUND + 1) + DEFAULT_SCALE_LOWER_BOUND; - } + // If there is no constraints provided, assign scale and precision with default values. + int scaleValue = random.nextInt(DEFAULT_SCALE_UPPER_BOUND - DEFAULT_SCALE_LOWER_BOUND + 1) + DEFAULT_SCALE_LOWER_BOUND; + int precisionValue = random.nextInt(DEFAULT_PRECISION); + Scale scale = (Scale) constraintMapClone.remove("scale"); + Precision precision = (Precision) constraintMapClone.remove("precision"); + ValidValues validValues = (ValidValues) constraintMapClone.remove("valid_values"); StringBuilder rs = new StringBuilder(); rs.append(random.nextInt(9) + 1); - for (int digit = 1; digit < precision; digit++) { - rs.append(random.nextInt(10)); + if (!constraintMapClone.isEmpty()) { + throw new IllegalStateException ("Found unhandled constraints : " + constraintMapClone.values()); + } + if (validValues == null) { + if (scale != null) { + scaleValue = scale.getRange().getRandomQuantifiableValueFromRange().intValue(); + } + if (precision != null) { + precisionValue = precision.getRange().getRandomQuantifiableValueFromRange().intValue(); + } + for (int digit = 1; digit < precisionValue; digit++) { + rs.append(random.nextInt(10)); + } + BigInteger unscaledValue = new BigInteger(rs.toString()); + return new BigDecimal(unscaledValue, scaleValue); + } else { + if (scale != null || precision != null) { + throw new IllegalStateException("Cannot handle 'valid_values' and constraint from " + VALID_DECIMAL_CONSTRAINTS + "at the same time."); + } else { + return validValues.getRange().getRandomQuantifiableValueFromRange(); + } } - BigInteger unscaledValue = new BigInteger(rs.toString()); - BigDecimal bigDecimal = new BigDecimal(unscaledValue, scale); - return bigDecimal; } /** From 8ee73303611b126ca15050ab114c3af3cda9d2fc Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Thu, 26 May 2022 14:11:27 -0700 Subject: [PATCH 08/15] Updates timestamp generating process. --- .../ion/benchmark/WriteRandomIonValues.java | 114 +++++++++--------- .../ion/benchmark/schema/ReparsedType.java | 3 +- 2 files changed, 56 insertions(+), 61 deletions(-) diff --git a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java index 95ba3d0..e35a582 100644 --- a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java +++ b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java @@ -17,8 +17,10 @@ import com.amazon.ion.IonList; import com.amazon.ion.IonReader; +import com.amazon.ion.IonSequence; import com.amazon.ion.IonStruct; import com.amazon.ion.IonSystem; +import com.amazon.ion.IonTimestamp; import com.amazon.ion.IonType; import com.amazon.ion.IonValue; import com.amazon.ion.IonWriter; @@ -27,9 +29,11 @@ import com.amazon.ion.benchmark.schema.constraints.ByteLength; import com.amazon.ion.benchmark.schema.constraints.CodepointLength; import com.amazon.ion.benchmark.schema.constraints.Precision; +import com.amazon.ion.benchmark.schema.constraints.Range; import com.amazon.ion.benchmark.schema.constraints.Regex; import com.amazon.ion.benchmark.schema.constraints.ReparsedConstraint; import com.amazon.ion.benchmark.schema.constraints.Scale; +import com.amazon.ion.benchmark.schema.constraints.TimestampPrecision; import com.amazon.ion.benchmark.schema.constraints.ValidValues; import com.amazon.ion.system.IonBinaryWriterBuilder; import com.amazon.ion.system.IonReaderBuilder; @@ -71,7 +75,11 @@ class WriteRandomIonValues { final static private int DEFAULT_SCALE_UPPER_BOUND = 20; final static private Set VALID_STRING_SYMBOL_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("regex", "codepoint_length"))); final static private Set VALID_DECIMAL_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("precision", "scale"))); - + final static private Set VALID_TIMESTAMP_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("timestamp_offset", "timestamp_precision"))); + // 0001-01-01T00:00:00.0Z in millis. + final static private BigDecimal MINIMUM_TIMESTAMP_IN_MILLIS_DECIMAL = new BigDecimal(-62135769600000L); + // 10000T in millis, upper bound exclusive. + final static private BigDecimal MAXIMUM_TIMESTAMP_IN_MILLIS_DECIMAL = new BigDecimal(253402300800000L); /** * Build up the writer based on the provided format (ion_text|ion_binary) * @param format the option to decide which writer to be constructed. @@ -347,7 +355,7 @@ private static void writeDataToFile(IonWriter writer, ReparsedType parsedTypeDef writer.writeDecimal(WriteRandomIonValues.constructDecimal(constraintMapClone)); break; case TIMESTAMP: - writer.writeTimestamp(WriteRandomIonValues.constructTimestamp(constraintStruct)); + writer.writeTimestamp(WriteRandomIonValues.constructTimestamp(constraintMapClone)); break; // Temporally comment the struct and list generating process. // case STRUCT: @@ -503,7 +511,7 @@ public static BigDecimal constructDecimal(Map constr * Generate random integers which is conformed with the constraints provided in ISL. * @param constraintMapClone collects the constraints from ISL file, the key represents the name of constraints, * and the value is in ReparsedConstraint format. - * @return constructed int. + * @return the constructed int. */ public static long constructInt(Map constraintMapClone) { // In the process of generating IonInt, there is no type-specified constraints. For this step we @@ -523,72 +531,58 @@ public static long constructInt(Map constraintMapClo /** * Construct timestamp which is conformed with the constraints provided in ISL. - * @param constraintStruct is an IonStruct which contains the top-level constraints in Ion Schema. - * @return Constructed timestamp - * @throws Exception if error occurs when parsing the constraints. + * @param constraintMapClone collects the constraints from ISL file, the key represents the name of constraints, + * and the value is in ReparsedConstraint format. + * @return the constructed timestamp. */ - public static Timestamp constructTimestamp(IonStruct constraintStruct) throws Exception { - Timestamp timestamp; + public static Timestamp constructTimestamp(Map constraintMapClone) { Random random = new Random(); - int randomIndex = IonSchemaUtilities.parseConstraints(constraintStruct, IonSchemaUtilities.KEYWORD_TIMESTAMP_PRECISION); - Timestamp.Precision precision = PRECISIONS[randomIndex]; - switch (precision) { - case YEAR: - timestamp = Timestamp.forYear(random.nextInt(9998) + 1); - break; - case MONTH: - timestamp = Timestamp.forMonth(random.nextInt(9998) + 1, random.nextInt(12) + 1); - break; - case DAY: - timestamp = Timestamp.forDay( - random.nextInt(9998) + 1, - random.nextInt(12) + 1, - random.nextInt(28) + 1 // Use max 28 for simplicity. Not including up to 31 is not going to affect the measurement. - ); - break; - case MINUTE: - timestamp = Timestamp.forMinute( - random.nextInt(9998) + 1, - random.nextInt(12) + 1, - random.nextInt(28) + 1, // Use max 28 for simplicity. Not including up to 31 is not going to affect the measurement. - random.nextInt(24), - random.nextInt(60), - localOffset(random) - ); - break; - case SECOND: - timestamp = Timestamp.forSecond( - random.nextInt(9998) + 1, - random.nextInt(12) + 1, - random.nextInt(28) + 1, // Use max 28 for simplicity. Not including up to 31 is not going to affect the measurement. - random.nextInt(24), - random.nextInt(60), - random.nextInt(60), - localOffset(random) - ); - break; - case FRACTION: - int scale = random.nextInt(20); - timestamp = Timestamp.forSecond( - random.nextInt(9998) + 1, - random.nextInt(12) + 1, - random.nextInt(28) + 1, // Use max 28 for simplicity. Not including up to 31 is not going to affect the measurement. - random.nextInt(24), - random.nextInt(60), - randomSecondWithFraction(random,scale), - localOffset(random) - ); - break; - default: - throw new IllegalStateException(); + // Preset the local offset. + Integer localOffset = localOffset(random); + // Create a range which contains the default lower bound and upper bound values. + IonSequence sequence = SYSTEM.newList( SYSTEM.newDecimal(MINIMUM_TIMESTAMP_IN_MILLIS_DECIMAL), SYSTEM.newDecimal(MAXIMUM_TIMESTAMP_IN_MILLIS_DECIMAL)); + Range range = new Range(sequence); + // Preset the default precision. + Timestamp.Precision precision = PRECISIONS[random.nextInt(PRECISIONS.length)]; + TimestampPrecision timestampPrecision = (TimestampPrecision) constraintMapClone.remove("timestamp_precision"); + ValidValues validValues = (ValidValues) constraintMapClone.remove("valid_values"); + if (!constraintMapClone.isEmpty()) { + throw new IllegalStateException ("Found unhandled constraints : " + constraintMapClone.values()); + } + if (validValues == null) { + if (timestampPrecision != null) { + precision = TimestampPrecision.getRandomTimestampPrecision(timestampPrecision.getRange()); + } + } else { + if (timestampPrecision != null) { + throw new IllegalStateException("Cannot handle 'valid_values' and constraint from " + VALID_TIMESTAMP_CONSTRAINTS + "at the same time."); + } else { + range = validValues.getRange(); + IonTimestamp upperBound = range.upperBound(IonTimestamp.class); + localOffset = upperBound.getLocalOffset(); + precision = upperBound.timestampValue().getPrecision(); + } } - return timestamp; + // Generate a random millisecond within the provided range. + BigDecimal randomMillis = range.getRandomQuantifiableValueFromRange(); + // Generate timestamp based on the provided millisecond value and precision. + Timestamp regeneratedTimestamp = Timestamp.forMillis(randomMillis, localOffset); + + int year = regeneratedTimestamp.getYear(); + int month = regeneratedTimestamp.getMonth(); + int day = regeneratedTimestamp.getDay(); + int minute = regeneratedTimestamp.getMinute(); + int hour = regeneratedTimestamp.getHour(); + int seconds = regeneratedTimestamp.getSecond(); + BigDecimal fracSecond = regeneratedTimestamp.getDecimalSecond().subtract(BigDecimal.valueOf(seconds)); + return Timestamp.createFromUtcFields(precision, year, month, day, hour, minute, seconds, fracSecond, localOffset); } /** * Construct clob/blob which is conformed with the constraints provided in ISL. * @param constraintMapClone collects the constraints from ISL file, the key represents the name of constraints, * and the value is in ReparsedConstraint format. + * @return the constructed bytes. */ public static byte[] constructLobs( Map constraintMapClone) { int byte_length; diff --git a/src/com/amazon/ion/benchmark/schema/ReparsedType.java b/src/com/amazon/ion/benchmark/schema/ReparsedType.java index 8835ae7..9c5fde5 100644 --- a/src/com/amazon/ion/benchmark/schema/ReparsedType.java +++ b/src/com/amazon/ion/benchmark/schema/ReparsedType.java @@ -82,7 +82,7 @@ public Map getConstraintMap() { return constraintMap; } - //TODO: Constraints come in two flavors- container and scalar? + //TODO: Constraints come in two flavors - container and scalar? /** * This method helps to categorize constraints based on the data type that they represent. * @param field represents the field contained in type definition. @@ -91,6 +91,7 @@ public Map getConstraintMap() { private static ReparsedConstraint toConstraint(IonValue field) { switch (field.getFieldName()) { //TODO: Add cases of constraints 'annotation' and 'occurs'. + //TODO: Add container type constraints: 'element', 'ordered_element', 'fields', these might cover some of the implemented constraints. case KEYWORD_BYTE_LENGTH: return ByteLength.of(field); case KEYWORD_PRECISION: From 5be4df508dc68d77ead29f66bf8bfd4e71cf20bc Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Thu, 26 May 2022 22:24:58 -0700 Subject: [PATCH 09/15] Temporarily comment some unit tests for generating constainer types of data. --- .../ion/benchmark/WriteRandomIonValues.java | 2 +- .../ion/benchmark/DataGeneratorTest.java | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java index e35a582..82d2a8f 100644 --- a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java +++ b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java @@ -357,7 +357,7 @@ private static void writeDataToFile(IonWriter writer, ReparsedType parsedTypeDef case TIMESTAMP: writer.writeTimestamp(WriteRandomIonValues.constructTimestamp(constraintMapClone)); break; -// Temporally comment the struct and list generating process. +// Temporarily comment the struct and list generating process. // case STRUCT: // WriteRandomIonValues.constructAndWriteIonStruct(constraintStruct, writer); // break; diff --git a/tst/com/amazon/ion/benchmark/DataGeneratorTest.java b/tst/com/amazon/ion/benchmark/DataGeneratorTest.java index 5b3a2d2..d6dba5b 100644 --- a/tst/com/amazon/ion/benchmark/DataGeneratorTest.java +++ b/tst/com/amazon/ion/benchmark/DataGeneratorTest.java @@ -157,28 +157,28 @@ public void testSizeOfGeneratedData() throws Exception { * Test if there's violation when generating Ion Struct based on Ion Schema. * @throws Exception if error occurs during the violation detecting process. */ - @Test - public void testViolationOfIonStruct() throws Exception { - DataGeneratorTest.violationDetect(INPUT_ION_STRUCT_FILE_PATH); - } +// @Test +// public void testViolationOfIonStruct() throws Exception { +// DataGeneratorTest.violationDetect(INPUT_ION_STRUCT_FILE_PATH); +// } /** * Test if there's violation when generating Ion List based on Ion Schema. * @throws Exception if error occurs during the violation detecting process. */ - @Test - public void testViolationOfIonList() throws Exception { - DataGeneratorTest.violationDetect(INPUT_ION_LIST_FILE_PATH); - } +// @Test +// public void testViolationOfIonList() throws Exception { +// DataGeneratorTest.violationDetect(INPUT_ION_LIST_FILE_PATH); +// } /** * Test if there's violation when generating nested Ion Struct based on Ion Schema. * @throws Exception if error occurs during the violation detecting process. */ - @Test - public void testViolationOfNestedIonStruct() throws Exception { - DataGeneratorTest.violationDetect(INPUT_NESTED_ION_STRUCT_PATH); - } +// @Test +// public void testViolationOfNestedIonStruct() throws Exception { +// DataGeneratorTest.violationDetect(INPUT_NESTED_ION_STRUCT_PATH); +// } /** * Test if there's violation when generating Ion Timestamp based on Ion Schema. From 89842cd80022225b739b28425c7aff08eb6f186c Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Mon, 6 Jun 2022 09:24:57 -0700 Subject: [PATCH 10/15] Updates PR based on the suggestions from comments. --- src/com/amazon/ion/benchmark/GeneratorOptions.java | 2 +- src/com/amazon/ion/benchmark/IonSchemaUtilities.java | 8 +++++--- .../amazon/ion/benchmark/ReadGeneralConstraints.java | 2 +- src/com/amazon/ion/benchmark/WriteRandomIonValues.java | 10 +++------- src/com/amazon/ion/benchmark/schema/ReparsedType.java | 2 +- .../ion/benchmark/schema/constraints/ByteLength.java | 2 +- .../benchmark/schema/constraints/CodepointLength.java | 2 +- .../benchmark/schema/constraints/ContainerLength.java | 2 +- .../ion/benchmark/schema/constraints/Precision.java | 2 +- .../schema/constraints/QuantifiableConstraints.java | 2 +- .../amazon/ion/benchmark/schema/constraints/Range.java | 2 +- .../amazon/ion/benchmark/schema/constraints/Regex.java | 4 ++-- .../schema/constraints/ReparsedConstraint.java | 2 +- .../amazon/ion/benchmark/schema/constraints/Scale.java | 2 +- .../schema/constraints/TimestampPrecision.java | 2 +- .../ion/benchmark/schema/constraints/ValidValues.java | 2 +- 16 files changed, 23 insertions(+), 25 deletions(-) diff --git a/src/com/amazon/ion/benchmark/GeneratorOptions.java b/src/com/amazon/ion/benchmark/GeneratorOptions.java index 4a291ac..67f780f 100644 --- a/src/com/amazon/ion/benchmark/GeneratorOptions.java +++ b/src/com/amazon/ion/benchmark/GeneratorOptions.java @@ -21,7 +21,7 @@ public static void executeGenerator(Map optionsMap) throws Excep String path = optionsMap.get("").toString(); String inputFilePath = optionsMap.get("--input-ion-schema").toString(); // Check whether the input schema file is valid and get the loaded schema. - Schema schema = IonSchemaUtilities.checkValidationOfSchema(inputFilePath); + Schema schema = IonSchemaUtilities.loadSchemaDefinition(inputFilePath); ReadGeneralConstraints.readIonSchemaAndGenerate(size, schema, format, path); } } diff --git a/src/com/amazon/ion/benchmark/IonSchemaUtilities.java b/src/com/amazon/ion/benchmark/IonSchemaUtilities.java index 4cdd185..2f7358e 100644 --- a/src/com/amazon/ion/benchmark/IonSchemaUtilities.java +++ b/src/com/amazon/ion/benchmark/IonSchemaUtilities.java @@ -18,6 +18,7 @@ import com.amazon.ionschema.Schema; import java.io.IOException; +import java.nio.file.Paths; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -53,15 +54,16 @@ public class IonSchemaUtilities { private static final IonLoader LOADER = SYSTEM.newLoader(); /** - * Check the validation of input ion schema file and will throw InvalidSchemaException message when an invalid schema definition is encountered. + * Load schema definition and check the validation of input ion schema file. + * If an invalid schema definition is encountered, this method will throw InvalidSchemaException message. * @param inputFile represents the file path of the ion schema file. * @return schema loaded from input ISL file. */ - public static Schema checkValidationOfSchema(String inputFile) { + public static Schema loadSchemaDefinition(String inputFile) { // Build ion schema system from input ISL file. IonSchemaSystem ISS = buildIonSchemaSystem(inputFile); // Get the name of ISL file as schema ID. - String schemaID = inputFile.substring(inputFile.lastIndexOf('/') + 1); + String schemaID = Paths.get(inputFile).toFile().getName(); // If the input ISL file is not validated by ion schema kotlin, it will throw an error. // If the input ISL file is valid, the loaded schema will be returned. try { diff --git a/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java b/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java index 175fd96..b424752 100644 --- a/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java +++ b/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java @@ -26,7 +26,7 @@ public class ReadGeneralConstraints { * @throws Exception if errors occur when writing data. */ public static void readIonSchemaAndGenerate(int size, Schema schema, String format, String outputFile) throws Exception { - // Assume there's only one constraint between schema_header and schema_footer. + // Assume there's only one type definition between schema_header and schema_footer. // If more constraints added, here is the point where developers should start. Type schemaType = schema.getTypes().next(); ReparsedType parsedTypeDefinition = new ReparsedType(schemaType); diff --git a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java index 82d2a8f..236e9e4 100644 --- a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java +++ b/src/com/amazon/ion/benchmark/WriteRandomIonValues.java @@ -76,10 +76,8 @@ class WriteRandomIonValues { final static private Set VALID_STRING_SYMBOL_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("regex", "codepoint_length"))); final static private Set VALID_DECIMAL_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("precision", "scale"))); final static private Set VALID_TIMESTAMP_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("timestamp_offset", "timestamp_precision"))); - // 0001-01-01T00:00:00.0Z in millis. - final static private BigDecimal MINIMUM_TIMESTAMP_IN_MILLIS_DECIMAL = new BigDecimal(-62135769600000L); - // 10000T in millis, upper bound exclusive. - final static private BigDecimal MAXIMUM_TIMESTAMP_IN_MILLIS_DECIMAL = new BigDecimal(253402300800000L); + // Create a range which contains the default lower bound and upper bound values. + final static private Range RANGE = new Range(SYSTEM.newList( SYSTEM.newDecimal(62135769600000L), SYSTEM.newDecimal(253402300800000L))); /** * Build up the writer based on the provided format (ion_text|ion_binary) * @param format the option to decide which writer to be constructed. @@ -537,11 +535,9 @@ public static long constructInt(Map constraintMapClo */ public static Timestamp constructTimestamp(Map constraintMapClone) { Random random = new Random(); + Range range = RANGE; // Preset the local offset. Integer localOffset = localOffset(random); - // Create a range which contains the default lower bound and upper bound values. - IonSequence sequence = SYSTEM.newList( SYSTEM.newDecimal(MINIMUM_TIMESTAMP_IN_MILLIS_DECIMAL), SYSTEM.newDecimal(MAXIMUM_TIMESTAMP_IN_MILLIS_DECIMAL)); - Range range = new Range(sequence); // Preset the default precision. Timestamp.Precision precision = PRECISIONS[random.nextInt(PRECISIONS.length)]; TimestampPrecision timestampPrecision = (TimestampPrecision) constraintMapClone.remove("timestamp_precision"); diff --git a/src/com/amazon/ion/benchmark/schema/ReparsedType.java b/src/com/amazon/ion/benchmark/schema/ReparsedType.java index 9c5fde5..e203eef 100644 --- a/src/com/amazon/ion/benchmark/schema/ReparsedType.java +++ b/src/com/amazon/ion/benchmark/schema/ReparsedType.java @@ -23,7 +23,7 @@ public class ReparsedType { private static final String KEYWORD_VALID_VALUES = "valid_values"; private static final String KEYWORD_NAME = "name"; // Using map to avoid processing the multiple repeat constraints situation. - Map constraintMap; + private final Map constraintMap; /** * Initializing the newly created ReparsedType object. diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java b/src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java index 0115b1b..0a5f910 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java @@ -8,7 +8,7 @@ public class ByteLength extends QuantifiableConstraints{ * Initializing the ByteLength object. * @param value represents constraint field 'byte_length'. */ - public ByteLength(IonValue value) { + private ByteLength(IonValue value) { super(value); } diff --git a/src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java b/src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java index 0db22f4..6bd6970 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java @@ -8,7 +8,7 @@ public class CodepointLength extends QuantifiableConstraints { * Initializing the CodepointLength object. * @param value represents constraint field 'codepoint_length'. */ - public CodepointLength(IonValue value) { + private CodepointLength(IonValue value) { super(value); } diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java b/src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java index 7672398..50ff7a0 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java @@ -8,7 +8,7 @@ public class ContainerLength extends QuantifiableConstraints{ * Initializing the ContainerLength object. * @param value represents constraint field 'container_length'. */ - public ContainerLength(IonValue value) { + private ContainerLength(IonValue value) { super(value); } diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Precision.java b/src/com/amazon/ion/benchmark/schema/constraints/Precision.java index 2fac3ea..0bc6a16 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/Precision.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/Precision.java @@ -8,7 +8,7 @@ public class Precision extends QuantifiableConstraints{ * Initializing the Precision object. * @param value represents constraint field 'precision'. */ - public Precision(IonValue value) { + private Precision(IonValue value) { super(value); } diff --git a/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java b/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java index 190e607..12c0eab 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java @@ -4,7 +4,7 @@ // This class is used for processing the constraints [codepoint_length | byte_length | precision | scale | container_length]. // These constraints have two formats of value [ | >]. -public abstract class QuantifiableConstraints extends ReparsedConstraint { +public abstract class QuantifiableConstraints implements ReparsedConstraint { Range range; /** diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Range.java b/src/com/amazon/ion/benchmark/schema/constraints/Range.java index e94fa47..fd15096 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/Range.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/Range.java @@ -13,7 +13,7 @@ // Processing the constraint value which contains 'range' annotation. public class Range { private static final String KEYWORD_RANGE = "range"; - IonSequence sequence; + public final IonSequence sequence; /** * Initializing the newly created Range object. diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Regex.java b/src/com/amazon/ion/benchmark/schema/constraints/Regex.java index 9935b60..c2423fd 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/Regex.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/Regex.java @@ -2,8 +2,8 @@ import com.amazon.ion.IonValue; -public class Regex extends ReparsedConstraint{ - String pattern; +public class Regex implements ReparsedConstraint{ + private final String pattern; /** * Initializing the newly created object. diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java b/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java index ed8a37a..3e23313 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java @@ -1,5 +1,5 @@ package com.amazon.ion.benchmark.schema.constraints; -public abstract class ReparsedConstraint { +public interface ReparsedConstraint { } diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Scale.java b/src/com/amazon/ion/benchmark/schema/constraints/Scale.java index 917d2da..99354a1 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/Scale.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/Scale.java @@ -8,7 +8,7 @@ public class Scale extends QuantifiableConstraints{ * Initializing the Scale object. * @param value represents constraint field 'scale'. */ - public Scale(IonValue value) { + private Scale(IonValue value) { super(value); } diff --git a/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java b/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java index 525e419..dea7089 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java @@ -11,7 +11,7 @@ public class TimestampPrecision extends QuantifiableConstraints{ * Initializing the newly created TimestampPrecision object. * @param value represent the value of constraint 'timestamp_precision'. */ - public TimestampPrecision(IonValue value) { + private TimestampPrecision(IonValue value) { super(value); } diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ValidValues.java b/src/com/amazon/ion/benchmark/schema/constraints/ValidValues.java index 7b2fc7b..0f1e967 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/ValidValues.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/ValidValues.java @@ -7,7 +7,7 @@ // valid_values: [ ... ] // valid_values: > // valid_values: > -public class ValidValues extends ReparsedConstraint { +public class ValidValues implements ReparsedConstraint { // TODO: Handling min and max value final private IonList validValues; final private Range range; From b984845e4075d7cd1d2623a458349c2ff9a15573 Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Mon, 6 Jun 2022 15:14:36 -0700 Subject: [PATCH 11/15] Separates data constructing from data writing process and updates the method of counting the size of written data. --- pom.xml | 5 + ...domIonValues.java => DataConstructor.java} | 167 ++---------------- .../ion/benchmark/GeneratorOptions.java | 2 +- .../ion/benchmark/ReadGeneralConstraints.java | 46 ++++- 4 files changed, 58 insertions(+), 162 deletions(-) rename src/com/amazon/ion/benchmark/{WriteRandomIonValues.java => DataConstructor.java} (79%) diff --git a/pom.xml b/pom.xml index 65f6bfe..f5d9869 100644 --- a/pom.xml +++ b/pom.xml @@ -109,6 +109,11 @@ rgxgen 1.3 + + com.google.guava + guava + 31.1-jre + diff --git a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java b/src/com/amazon/ion/benchmark/DataConstructor.java similarity index 79% rename from src/com/amazon/ion/benchmark/WriteRandomIonValues.java rename to src/com/amazon/ion/benchmark/DataConstructor.java index 236e9e4..4e29f05 100644 --- a/src/com/amazon/ion/benchmark/WriteRandomIonValues.java +++ b/src/com/amazon/ion/benchmark/DataConstructor.java @@ -17,7 +17,6 @@ import com.amazon.ion.IonList; import com.amazon.ion.IonReader; -import com.amazon.ion.IonSequence; import com.amazon.ion.IonStruct; import com.amazon.ion.IonSystem; import com.amazon.ion.IonTimestamp; @@ -38,7 +37,6 @@ import com.amazon.ion.system.IonBinaryWriterBuilder; import com.amazon.ion.system.IonReaderBuilder; import com.amazon.ion.system.IonSystemBuilder; -import com.amazon.ion.system.IonTextWriterBuilder; import com.github.curiousoddman.rgxgen.RgxGen; import java.io.BufferedInputStream; @@ -64,7 +62,7 @@ /** * Generate specific scalar type of Ion data randomly, for some specific type, e.g. String, Decimal, Timestamp, users can put specifications on these types of Ion data. */ -class WriteRandomIonValues { +class DataConstructor { // The constant defined below are used as placeholder in the method WriteRandomIonValues.writeRequestedSizeFile. final static private IonSystem SYSTEM = IonSystemBuilder.standard().build(); final static private List DEFAULT_RANGE = Arrays.asList(0, 0x10FFFF); @@ -78,29 +76,6 @@ class WriteRandomIonValues { final static private Set VALID_TIMESTAMP_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("timestamp_offset", "timestamp_precision"))); // Create a range which contains the default lower bound and upper bound values. final static private Range RANGE = new Range(SYSTEM.newList( SYSTEM.newDecimal(62135769600000L), SYSTEM.newDecimal(253402300800000L))); - /** - * Build up the writer based on the provided format (ion_text|ion_binary) - * @param format the option to decide which writer to be constructed. - * @param file the generated file which contains specified Ion data. - * @return the writer which conforms with the required format. - * @throws Exception if an error occurs while creating a file output stream. - */ - public static IonWriter formatWriter(String format, File file) throws Exception { - IonWriter writer; - OutputStream out = new BufferedOutputStream(new FileOutputStream(file)); - Format formatName = Format.valueOf(format.toUpperCase()); - switch (formatName) { - case ION_BINARY: - writer = IonBinaryWriterBuilder.standard().withLocalSymbolTableAppendEnabled().build(out); - break; - case ION_TEXT: - writer = IonTextWriterBuilder.standard().build(out); - break; - default: - throw new IllegalStateException("Please input the format ion_text or ion_binary"); - } - return writer; - } /** * Use Ion-java parser to parse the data provided in the options which specify the range of data. @@ -216,111 +191,11 @@ private static BigDecimal randomSecondWithFraction(Random random, int scale) { } /** - * This method is not available now. - * @throws Exception - */ - private static void writeRandomAnnotatedFloats() throws Exception { - File file = new File("randomAnnotatedFloats.10n"); - List annotations = new ArrayList<>(500); - Random random = new Random(); - for (int i = 0; i < 500; i++) { - int length = random.nextInt(20); - StringBuilder sb = new StringBuilder(); - for (int j = 0; j < length; j++) { - int codePoint; - int type; - do { - codePoint = random.nextInt(Character.MAX_CODE_POINT); - type = Character.getType(codePoint); - } while (type == Character.PRIVATE_USE || type == Character.SURROGATE || type == Character.UNASSIGNED); - sb.appendCodePoint(codePoint); - } - annotations.add(sb.toString()); - } - try (OutputStream out = new BufferedOutputStream(new FileOutputStream(file)); - IonWriter writer = IonBinaryWriterBuilder.standard().build(out)) { - // Target about 100MB of data. Annotated floats will average around 14 bytes. - for (int i = 0; i < (100_000_000 / 14); i++) { - // 60% of values will have 1 annotation; 40% will have 2 or 3. - int numberOfAnnotations = random.nextInt(5) + 1; - if (numberOfAnnotations > 3) { - numberOfAnnotations = 1; - } - for (int j = 0; j < numberOfAnnotations; j++) { - writer.addTypeAnnotation(annotations.get(random.nextInt(500))); - } - writer.writeFloat(Double.longBitsToDouble(random.nextLong())); - } - } - } - - /** - * This method is not available now. Refactor required. - * @param size specifies the size in bytes of the generated file. - * @param format the format of output file (ion_binary | ion_text). - * @param path the destination of the generated file. - * @throws Exception if an error occurs when building up the writer. - */ - public static void writeRandomSymbolValues(int size, String format, String path) throws Exception { - File file = new File(path); - try (IonWriter writer = WriteRandomIonValues.formatWriter(format, file)) { - List symbols = new ArrayList<>(500); - Random random = new Random(); - for (int i = 0; i < 500; i++) { - int length = random.nextInt(20); - StringBuilder sb = new StringBuilder(); - for (int j = 0; j < length; j++) { - int codePoint; - int charactereType; - do { - codePoint = random.nextInt(Character.MAX_CODE_POINT); - charactereType = Character.getType(codePoint); - } while (charactereType == Character.PRIVATE_USE || charactereType == Character.SURROGATE || charactereType == Character.UNASSIGNED); - sb.appendCodePoint(codePoint); - } - symbols.add(sb.toString()); - } - for (int i = 0; i < size / 2; i++) { - writer.writeSymbol(symbols.get(random.nextInt(500))); - } - } - WriteRandomIonValues.printInfo(path); - } - - /** - * This method is used for generating requested size file by comparing the current file size and the target size. - * @param size specifies the size in bytes of the generated file. - * @param writer writer is IonWriter. - * @param file the generated file which contains specified Ion data. - * @param parsedTypeDefinition is parsed from ion schema file as IonStruct format, it contains the top-level constraints. - * @throws Exception if an error occur when writing generated data. - */ - public static void writeRequestedSizeFile(int size, IonWriter writer, File file, ReparsedType parsedTypeDefinition) throws Exception { - int currentSize = 0; - int count = 0; - // Determine how many values should be written before the writer.flush(), and this process aims to reduce the execution time of writer.flush(). - while (currentSize <= 0.05 * size) { - WriteRandomIonValues.writeDataToFile(writer, parsedTypeDefinition); - count += 1; - writer.flush(); - currentSize = (int) file.length(); - } - while (currentSize <= size) { - for (int i = 0; i < count; i++) { - WriteRandomIonValues.writeDataToFile(writer, parsedTypeDefinition); - } - writer.flush(); - currentSize = (int) file.length(); - } - } - - /** - * Generating data which is conformed with provided constraints and writing it to the output file. - * @param writer is IonWriter. + * Constructing data which is conformed with provided type definition. * @param parsedTypeDefinition is parsed from ion schema file as IonStruct format, it contains the top-level constraints. - * @throws Exception if an error occur during the data writing process. + * @return constructed ion data. */ - private static void writeDataToFile(IonWriter writer, ReparsedType parsedTypeDefinition) throws Exception { + public static IonValue constructIonData(ReparsedType parsedTypeDefinition) { // The first step is to check whether parsedTypeDefinition contains 'valid_values'. The reason we prioritize checking // 'valid_values' is that the constraint 'type' might not be contained in the type definition, in that case we cannot trigger // the following data constructing process. @@ -330,44 +205,28 @@ private static void writeDataToFile(IonWriter writer, ReparsedType parsedTypeDef constraintMapClone.putAll(constraintMap); ValidValues validValues = (ValidValues) constraintMap.get("valid_values"); if (validValues != null && !validValues.isRange()) { - IonValue validValue = getRandomValueFromList(validValues.getValidValues()); - validValue.writeTo(writer); + return getRandomValueFromList(validValues.getValidValues()); } else if (parsedTypeDefinition.getIonType() == null) { throw new IllegalStateException("Constraint 'type' is required."); } else { IonType type = parsedTypeDefinition.getIonType(); switch (type) { case FLOAT: - writer.writeFloat(WriteRandomIonValues.constructFloat(constraintMapClone)); - break; + return SYSTEM.newFloat(constructFloat(constraintMapClone)); case SYMBOL: - writer.writeSymbol(WriteRandomIonValues.constructString(constraintMapClone)); - break; + return SYSTEM.newSymbol(constructString(constraintMapClone)); case INT: - writer.writeInt(WriteRandomIonValues.constructInt(constraintMapClone)); - break; + return SYSTEM.newInt(constructInt(constraintMapClone)); case STRING: - writer.writeString(WriteRandomIonValues.constructString(constraintMapClone)); - break; + return SYSTEM.newString(constructString(constraintMapClone)); case DECIMAL: - writer.writeDecimal(WriteRandomIonValues.constructDecimal(constraintMapClone)); - break; + return SYSTEM.newDecimal(constructDecimal(constraintMapClone)); case TIMESTAMP: - writer.writeTimestamp(WriteRandomIonValues.constructTimestamp(constraintMapClone)); - break; -// Temporarily comment the struct and list generating process. -// case STRUCT: -// WriteRandomIonValues.constructAndWriteIonStruct(constraintStruct, writer); -// break; -// case LIST: -// WriteRandomIonValues.constructAndWriteIonList(writer, constraintStruct); -// break; + return SYSTEM.newTimestamp(constructTimestamp(constraintMapClone)); case BLOB: - writer.writeBlob(WriteRandomIonValues.constructLobs(constraintMapClone)); - break; + return SYSTEM.newBlob(constructLobs(constraintMapClone)); case CLOB: - writer.writeClob(WriteRandomIonValues.constructLobs(constraintMapClone)); - break; + return SYSTEM.newClob(constructLobs(constraintMapClone)); default: throw new IllegalStateException(type + " is not supported."); } diff --git a/src/com/amazon/ion/benchmark/GeneratorOptions.java b/src/com/amazon/ion/benchmark/GeneratorOptions.java index 67f780f..5e63f95 100644 --- a/src/com/amazon/ion/benchmark/GeneratorOptions.java +++ b/src/com/amazon/ion/benchmark/GeneratorOptions.java @@ -22,6 +22,6 @@ public static void executeGenerator(Map optionsMap) throws Excep String inputFilePath = optionsMap.get("--input-ion-schema").toString(); // Check whether the input schema file is valid and get the loaded schema. Schema schema = IonSchemaUtilities.loadSchemaDefinition(inputFilePath); - ReadGeneralConstraints.readIonSchemaAndGenerate(size, schema, format, path); + ReadGeneralConstraints.constructAndWriteIonData(size, schema, format, path); } } diff --git a/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java b/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java index b424752..920beaa 100644 --- a/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java +++ b/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java @@ -2,13 +2,18 @@ import com.amazon.ion.IonLoader; import com.amazon.ion.IonSystem; +import com.amazon.ion.IonValue; import com.amazon.ion.IonWriter; import com.amazon.ion.benchmark.schema.ReparsedType; +import com.amazon.ion.system.IonBinaryWriterBuilder; import com.amazon.ion.system.IonSystemBuilder; +import com.amazon.ion.system.IonTextWriterBuilder; import com.amazon.ionschema.Schema; import com.amazon.ionschema.Type; +import com.google.common.io.CountingOutputStream; -import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; /** * Parse Ion Schema file and extract the type definition as ReparsedType object then pass the re-parsed type definition to the Ion data generator. @@ -18,23 +23,50 @@ public class ReadGeneralConstraints { public static final IonLoader LOADER = SYSTEM.newLoader(); /** - * Parsing schema type definition to ReparsedType and passing the re-parsed value to data generating process. + * Getting the constructed data which is conformed with ISL and writing data to the output file. * @param size is the size of the output file. * @param schema an Ion Schema loaded by ion-schema-kotlin. * @param format is the format of the generated file, select from set (ion_text | ion_binary). * @param outputFile is the path of the generated file. * @throws Exception if errors occur when writing data. */ - public static void readIonSchemaAndGenerate(int size, Schema schema, String format, String outputFile) throws Exception { + public static void constructAndWriteIonData(int size, Schema schema, String format, String outputFile) throws Exception { // Assume there's only one type definition between schema_header and schema_footer. // If more constraints added, here is the point where developers should start. Type schemaType = schema.getTypes().next(); ReparsedType parsedTypeDefinition = new ReparsedType(schemaType); - File file = new File(outputFile); - try (IonWriter writer = WriteRandomIonValues.formatWriter(format, file)) { - WriteRandomIonValues.writeRequestedSizeFile(size, writer, file, parsedTypeDefinition); + CountingOutputStream outputStreamCounter = new CountingOutputStream(new FileOutputStream(outputFile)); + long count = 0; + try (IonWriter writer = formatWriter(format, outputStreamCounter)) { + while (count <= size) { + IonValue constructedData = DataConstructor.constructIonData(parsedTypeDefinition); + constructedData.writeTo(writer); + count = outputStreamCounter.getCount(); + } } // Print the successfully generated data notification which includes the file path information. - WriteRandomIonValues.printInfo(outputFile); + DataConstructor.printInfo(outputFile); + } + + /** + * Construct the writer based on the provided format (ion_text|ion_binary). + * @param format decides which writer should be constructed. + * @param outputStream represents the bytes stream which will be written into the output file. + * @return the writer which conforms with the required format. + */ + public static IonWriter formatWriter(String format, OutputStream outputStream) { + IonWriter writer; + Format formatName = Format.valueOf(format.toUpperCase()); + switch (formatName) { + case ION_BINARY: + writer = IonBinaryWriterBuilder.standard().withLocalSymbolTableAppendEnabled().build(outputStream); + break; + case ION_TEXT: + writer = IonTextWriterBuilder.standard().build(outputStream); + break; + default: + throw new IllegalStateException("Please input the format ion_text or ion_binary"); + } + return writer; } } From 870c7a90f470f25d744871329f2f67e96dffc59e Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Tue, 7 Jun 2022 13:21:59 -0700 Subject: [PATCH 12/15] Updates based on the most recent comments: Adds accessor method to Range. Removes subclasses of QuantifiableConstraints which don't include addtional methods or attributes. Updates varaible name from RANGE to DEFAULT_TIMESTAMP_IN_MILLIS_DECIMAL_RANGE. --- .../amazon/ion/benchmark/DataConstructor.java | 17 ++++++-------- .../ion/benchmark/schema/ReparsedType.java | 6 +---- .../schema/constraints/ByteLength.java | 23 ------------------- .../schema/constraints/CodepointLength.java | 23 ------------------- .../schema/constraints/ContainerLength.java | 23 ------------------- .../schema/constraints/Precision.java | 23 ------------------- .../constraints/QuantifiableConstraints.java | 15 +++++++++--- .../benchmark/schema/constraints/Range.java | 10 +++++++- .../benchmark/schema/constraints/Scale.java | 23 ------------------- .../constraints/TimestampPrecision.java | 2 +- 10 files changed, 30 insertions(+), 135 deletions(-) delete mode 100644 src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java delete mode 100644 src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java delete mode 100644 src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java delete mode 100644 src/com/amazon/ion/benchmark/schema/constraints/Precision.java delete mode 100644 src/com/amazon/ion/benchmark/schema/constraints/Scale.java diff --git a/src/com/amazon/ion/benchmark/DataConstructor.java b/src/com/amazon/ion/benchmark/DataConstructor.java index 4e29f05..20ceae2 100644 --- a/src/com/amazon/ion/benchmark/DataConstructor.java +++ b/src/com/amazon/ion/benchmark/DataConstructor.java @@ -25,13 +25,10 @@ import com.amazon.ion.IonWriter; import com.amazon.ion.Timestamp; import com.amazon.ion.benchmark.schema.ReparsedType; -import com.amazon.ion.benchmark.schema.constraints.ByteLength; -import com.amazon.ion.benchmark.schema.constraints.CodepointLength; -import com.amazon.ion.benchmark.schema.constraints.Precision; +import com.amazon.ion.benchmark.schema.constraints.QuantifiableConstraints; import com.amazon.ion.benchmark.schema.constraints.Range; import com.amazon.ion.benchmark.schema.constraints.Regex; import com.amazon.ion.benchmark.schema.constraints.ReparsedConstraint; -import com.amazon.ion.benchmark.schema.constraints.Scale; import com.amazon.ion.benchmark.schema.constraints.TimestampPrecision; import com.amazon.ion.benchmark.schema.constraints.ValidValues; import com.amazon.ion.system.IonBinaryWriterBuilder; @@ -75,7 +72,7 @@ class DataConstructor { final static private Set VALID_DECIMAL_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("precision", "scale"))); final static private Set VALID_TIMESTAMP_CONSTRAINTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("timestamp_offset", "timestamp_precision"))); // Create a range which contains the default lower bound and upper bound values. - final static private Range RANGE = new Range(SYSTEM.newList( SYSTEM.newDecimal(62135769600000L), SYSTEM.newDecimal(253402300800000L))); + final static private Range DEFAULT_TIMESTAMP_IN_MILLIS_DECIMAL_RANGE = new Range(SYSTEM.newList( SYSTEM.newDecimal(62135769600000L), SYSTEM.newDecimal(253402300800000L))); /** * Use Ion-java parser to parse the data provided in the options which specify the range of data. @@ -253,7 +250,7 @@ public static IonValue getRandomValueFromList(IonList values) { public static String constructString(Map constraintMapClone) { Random random = new Random(); Regex regex = (Regex) constraintMapClone.remove("regex"); - CodepointLength codepoint_length = (CodepointLength) constraintMapClone.remove("codepoint_length"); + QuantifiableConstraints codepoint_length = (QuantifiableConstraints) constraintMapClone.remove("codepoint_length"); if (!constraintMapClone.isEmpty()) { throw new IllegalStateException ("Found unhandled constraints : " + constraintMapClone.values()); @@ -335,8 +332,8 @@ public static BigDecimal constructDecimal(Map constr // If there is no constraints provided, assign scale and precision with default values. int scaleValue = random.nextInt(DEFAULT_SCALE_UPPER_BOUND - DEFAULT_SCALE_LOWER_BOUND + 1) + DEFAULT_SCALE_LOWER_BOUND; int precisionValue = random.nextInt(DEFAULT_PRECISION); - Scale scale = (Scale) constraintMapClone.remove("scale"); - Precision precision = (Precision) constraintMapClone.remove("precision"); + QuantifiableConstraints scale = (QuantifiableConstraints) constraintMapClone.remove("scale"); + QuantifiableConstraints precision = (QuantifiableConstraints) constraintMapClone.remove("precision"); ValidValues validValues = (ValidValues) constraintMapClone.remove("valid_values"); StringBuilder rs = new StringBuilder(); rs.append(random.nextInt(9) + 1); @@ -394,7 +391,7 @@ public static long constructInt(Map constraintMapClo */ public static Timestamp constructTimestamp(Map constraintMapClone) { Random random = new Random(); - Range range = RANGE; + Range range = DEFAULT_TIMESTAMP_IN_MILLIS_DECIMAL_RANGE; // Preset the local offset. Integer localOffset = localOffset(random); // Preset the default precision. @@ -442,7 +439,7 @@ public static Timestamp constructTimestamp(Map const public static byte[] constructLobs( Map constraintMapClone) { int byte_length; Random random = new Random(); - ByteLength byteLength = (ByteLength) constraintMapClone.remove("byte_length"); + QuantifiableConstraints byteLength = (QuantifiableConstraints) constraintMapClone.remove("byte_length"); if (!constraintMapClone.isEmpty()) { throw new IllegalStateException ("Found unhandled constraints : " + constraintMapClone.values()); } diff --git a/src/com/amazon/ion/benchmark/schema/ReparsedType.java b/src/com/amazon/ion/benchmark/schema/ReparsedType.java index e203eef..9bdc8af 100644 --- a/src/com/amazon/ion/benchmark/schema/ReparsedType.java +++ b/src/com/amazon/ion/benchmark/schema/ReparsedType.java @@ -93,15 +93,11 @@ private static ReparsedConstraint toConstraint(IonValue field) { //TODO: Add cases of constraints 'annotation' and 'occurs'. //TODO: Add container type constraints: 'element', 'ordered_element', 'fields', these might cover some of the implemented constraints. case KEYWORD_BYTE_LENGTH: - return ByteLength.of(field); case KEYWORD_PRECISION: - return Precision.of(field); case KEYWORD_SCALE: - return Scale.of(field); case KEYWORD_CODE_POINT_LENGTH: - return CodepointLength.of(field); case KEYWORD_CONTAINER_LENGTH: - return ContainerLength.of(field); + return QuantifiableConstraints.of(field); case KEYWORD_VALID_VALUES: return ValidValues.of(field); case KEYWORD_REGEX: diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java b/src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java deleted file mode 100644 index 0a5f910..0000000 --- a/src/com/amazon/ion/benchmark/schema/constraints/ByteLength.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.amazon.ion.benchmark.schema.constraints; - -import com.amazon.ion.IonValue; - -public class ByteLength extends QuantifiableConstraints{ - - /** - * Initializing the ByteLength object. - * @param value represents constraint field 'byte_length'. - */ - private ByteLength(IonValue value) { - super(value); - } - - /** - * Parsing constraint field into ByteLength. - * @param field represents the value of constraint 'byte_length'. - * @return the newly created ByteLength object. - */ - public static ByteLength of(IonValue field) { - return new ByteLength(field); - } -} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java b/src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java deleted file mode 100644 index 6bd6970..0000000 --- a/src/com/amazon/ion/benchmark/schema/constraints/CodepointLength.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.amazon.ion.benchmark.schema.constraints; - -import com.amazon.ion.IonValue; - -public class CodepointLength extends QuantifiableConstraints { - - /** - * Initializing the CodepointLength object. - * @param value represents constraint field 'codepoint_length'. - */ - private CodepointLength(IonValue value) { - super(value); - } - - /** - * Parsing constraint field into CodepointLength. - * @param field represents the value of constraint 'codepoint_length'. - * @return the newly created CodepointLength object. - */ - public static CodepointLength of(IonValue field) { - return new CodepointLength(field); - } -} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java b/src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java deleted file mode 100644 index 50ff7a0..0000000 --- a/src/com/amazon/ion/benchmark/schema/constraints/ContainerLength.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.amazon.ion.benchmark.schema.constraints; - -import com.amazon.ion.IonValue; - -public class ContainerLength extends QuantifiableConstraints{ - - /** - * Initializing the ContainerLength object. - * @param value represents constraint field 'container_length'. - */ - private ContainerLength(IonValue value) { - super(value); - } - - /** - * Parsing constraint field into ContainerLength. - * @param field represents the value of constraint 'container_length'. - * @return newly created ContainerLength object. - */ - public static ContainerLength of(IonValue field) { - return new ContainerLength(field); - } -} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Precision.java b/src/com/amazon/ion/benchmark/schema/constraints/Precision.java deleted file mode 100644 index 0bc6a16..0000000 --- a/src/com/amazon/ion/benchmark/schema/constraints/Precision.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.amazon.ion.benchmark.schema.constraints; - -import com.amazon.ion.IonValue; - -public class Precision extends QuantifiableConstraints{ - - /** - * Initializing the Precision object. - * @param value represents constraint field 'precision'. - */ - private Precision(IonValue value) { - super(value); - } - - /** - * Parsing constraint field into Precision. - * @param field represents the value of constraint 'precision'. - * @return the newly created Precision object. - */ - public static Precision of(IonValue field) { - return new Precision(field); - } -} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java b/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java index 12c0eab..7f9b1a7 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/QuantifiableConstraints.java @@ -4,15 +4,24 @@ // This class is used for processing the constraints [codepoint_length | byte_length | precision | scale | container_length]. // These constraints have two formats of value [ | >]. -public abstract class QuantifiableConstraints implements ReparsedConstraint { - Range range; +public class QuantifiableConstraints implements ReparsedConstraint { + private final Range range; /** * Initializing the newly created QuantifiableConstraint object. * @param value represents one of [codepoint_length | byte_length | precision | scale | container_length] field value. */ public QuantifiableConstraints(IonValue value) { - range = Range.of(value); + this.range = Range.of(value); + } + + /** + * Parsing constraint field into QuantifiableConstraints. + * @param field represents the value of constraint. + * @return newly created QuantifiableConstraints object. + */ + public static QuantifiableConstraints of(IonValue field) { + return new QuantifiableConstraints(field); } /** diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Range.java b/src/com/amazon/ion/benchmark/schema/constraints/Range.java index fd15096..525848c 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/Range.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/Range.java @@ -13,7 +13,7 @@ // Processing the constraint value which contains 'range' annotation. public class Range { private static final String KEYWORD_RANGE = "range"; - public final IonSequence sequence; + private final IonSequence sequence; /** * Initializing the newly created Range object. @@ -23,6 +23,14 @@ public Range(IonSequence sequence) { this.sequence = sequence; } + /** + * Helping to access the private variable sequence. + * @return IonSequence which represents the range value. + */ + public IonSequence getSequence() { + return this.sequence; + } + /** * Getting the lower bound value from range. * @param klass represent the Class object of different data types. diff --git a/src/com/amazon/ion/benchmark/schema/constraints/Scale.java b/src/com/amazon/ion/benchmark/schema/constraints/Scale.java deleted file mode 100644 index 99354a1..0000000 --- a/src/com/amazon/ion/benchmark/schema/constraints/Scale.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.amazon.ion.benchmark.schema.constraints; - -import com.amazon.ion.IonValue; - -public class Scale extends QuantifiableConstraints{ - - /** - * Initializing the Scale object. - * @param value represents constraint field 'scale'. - */ - private Scale(IonValue value) { - super(value); - } - - /** - * Parsing constraint field into Scale. - * @param field represents the value of constraint 'scale'. - * @return the newly created Scale object. - */ - public static Scale of(IonValue field) { - return new Scale(field); - } -} diff --git a/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java b/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java index dea7089..43add21 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/TimestampPrecision.java @@ -31,7 +31,7 @@ public static TimestampPrecision of(IonValue field) { */ public static Timestamp.Precision getRandomTimestampPrecision(Range range) { Random random = new Random(); - IonSequence constraintSequence = range.sequence; + IonSequence constraintSequence = range.getSequence(); Timestamp.Precision[] precisions = Timestamp.Precision.values(); String lowerBound = constraintSequence.get(0).toString(); String upperBound = constraintSequence.get(1).toString(); From bca28f92762a6feb86e0750d06b3a5a6903a160f Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Mon, 13 Jun 2022 12:53:45 -0700 Subject: [PATCH 13/15] Remove the guava dependency and add comment for ReparsedConstraint. --- pom.xml | 5 --- .../ion/benchmark/CountingOutputStream.java | 33 +++++++++++++++++++ .../ion/benchmark/ReadGeneralConstraints.java | 1 - .../constraints/ReparsedConstraint.java | 6 +++- 4 files changed, 38 insertions(+), 7 deletions(-) create mode 100644 src/com/amazon/ion/benchmark/CountingOutputStream.java diff --git a/pom.xml b/pom.xml index f5d9869..65f6bfe 100644 --- a/pom.xml +++ b/pom.xml @@ -109,11 +109,6 @@ rgxgen 1.3 - - com.google.guava - guava - 31.1-jre - diff --git a/src/com/amazon/ion/benchmark/CountingOutputStream.java b/src/com/amazon/ion/benchmark/CountingOutputStream.java new file mode 100644 index 0000000..3d1009e --- /dev/null +++ b/src/com/amazon/ion/benchmark/CountingOutputStream.java @@ -0,0 +1,33 @@ +package com.amazon.ion.benchmark; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +public class CountingOutputStream extends FilterOutputStream { + private long count; + + /** + * Creates an output stream filter built on top of the specified + * underlying output stream. + * + * @param out the underlying output stream to be assigned to + * the field this.out for later use, or + * null if this instance is to be + * created without an underlying stream. + */ + public CountingOutputStream(OutputStream out) { + super(out); + } + + /** Returns the number of bytes written. */ + public long getCount() { + return count; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + this.count += len; + } +} diff --git a/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java b/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java index 920beaa..80b83f9 100644 --- a/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java +++ b/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java @@ -10,7 +10,6 @@ import com.amazon.ion.system.IonTextWriterBuilder; import com.amazon.ionschema.Schema; import com.amazon.ionschema.Type; -import com.google.common.io.CountingOutputStream; import java.io.FileOutputStream; import java.io.OutputStream; diff --git a/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java b/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java index 3e23313..ef08444 100644 --- a/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java +++ b/src/com/amazon/ion/benchmark/schema/constraints/ReparsedConstraint.java @@ -1,5 +1,9 @@ package com.amazon.ion.benchmark.schema.constraints; - +/* +This interface is the abstraction of all constraints. It will be implemented by different constraint classes which have different domain knowledge. +After parsing the type definition, all constraints will be packed into a HashMap. +The ReparsedConstraint will be cast into specific constraint based on which instance it represents. +*/ public interface ReparsedConstraint { } From 5b591195e23d77fd12c1ddfc05f3d728ec8958eb Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Mon, 13 Jun 2022 16:19:03 -0700 Subject: [PATCH 14/15] Updates based on comments: Adds writer.flush() while counting the written data size. Override write(int b) method in CountingOutputStream. --- .../ion/benchmark/CountingOutputStream.java | 6 ++++++ .../ion/benchmark/ReadGeneralConstraints.java | 18 +++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/com/amazon/ion/benchmark/CountingOutputStream.java b/src/com/amazon/ion/benchmark/CountingOutputStream.java index 3d1009e..7f63b43 100644 --- a/src/com/amazon/ion/benchmark/CountingOutputStream.java +++ b/src/com/amazon/ion/benchmark/CountingOutputStream.java @@ -30,4 +30,10 @@ public void write(byte[] b, int off, int len) throws IOException { out.write(b, off, len); this.count += len; } + + @Override + public void write(int b) throws IOException { + out.write(b); + this.count++; + } } diff --git a/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java b/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java index 80b83f9..e8a9f83 100644 --- a/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java +++ b/src/com/amazon/ion/benchmark/ReadGeneralConstraints.java @@ -35,12 +35,24 @@ public static void constructAndWriteIonData(int size, Schema schema, String form Type schemaType = schema.getTypes().next(); ReparsedType parsedTypeDefinition = new ReparsedType(schemaType); CountingOutputStream outputStreamCounter = new CountingOutputStream(new FileOutputStream(outputFile)); - long count = 0; try (IonWriter writer = formatWriter(format, outputStreamCounter)) { - while (count <= size) { + int count = 0; + long currentSize = 0; + // Determine how many values should be written before the writer.flush(), and this process aims to reduce the execution time of writer.flush(). + while (currentSize <= 0.05 * size) { IonValue constructedData = DataConstructor.constructIonData(parsedTypeDefinition); constructedData.writeTo(writer); - count = outputStreamCounter.getCount(); + count ++; + writer.flush(); + currentSize = outputStreamCounter.getCount(); + } + while (currentSize <= size) { + for (int i = 0; i < count; i++) { + IonValue constructedData = DataConstructor.constructIonData(parsedTypeDefinition); + constructedData.writeTo(writer); + } + writer.flush(); + currentSize = outputStreamCounter.getCount(); } } // Print the successfully generated data notification which includes the file path information. From b9e259e846a4d3d960995beaed78a0991ba5ffc0 Mon Sep 17 00:00:00 2001 From: Linlin Sun Date: Sun, 19 Jun 2022 23:06:31 -0700 Subject: [PATCH 15/15] Adds comment to explain the case of 'open content'. --- src/com/amazon/ion/benchmark/schema/ReparsedType.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/com/amazon/ion/benchmark/schema/ReparsedType.java b/src/com/amazon/ion/benchmark/schema/ReparsedType.java index 9bdc8af..e58653d 100644 --- a/src/com/amazon/ion/benchmark/schema/ReparsedType.java +++ b/src/com/amazon/ion/benchmark/schema/ReparsedType.java @@ -105,6 +105,8 @@ private static ReparsedConstraint toConstraint(IonValue field) { case KEYWORD_TIMESTAMP_PRECISION: return TimestampPrecision.of(field); default: + // For now, Ion Data Generator doesn't support processing 'open' content. + // If the constraint 'content' included in the ISL , the data generator will throw an exception. throw new IllegalArgumentException("This field is not understood: " + field); } }