diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantArrayBuilder.java b/core/src/main/java/org/apache/iceberg/variants/VariantArrayBuilder.java new file mode 100644 index 000000000000..7be7938add61 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantArrayBuilder.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.util.DateTimeUtil; + +public class VariantArrayBuilder extends VariantBuilderBase { + private final List offsets; + + public VariantArrayBuilder(ByteBufferWrapper valueBuffer, Dictionary dict) { + super(valueBuffer, dict); + offsets = Lists.newArrayList(); + } + + public VariantObjectBuilder startObject() { + addOffset(); + return new VariantObjectBuilder(valueBuffer(), dict()); + } + + public VariantArrayBuilder startArray() { + addOffset(); + return new VariantArrayBuilder(valueBuffer(), dict()); + } + + public VariantArrayBuilder writeNull() { + addOffset(); + writeNullInternal(); + return this; + } + + public VariantArrayBuilder writeBoolean(boolean value) { + addOffset(); + writeBooleanInternal(value); + return this; + } + + public VariantArrayBuilder writeIntegral(long value) { + addOffset(); + writeIntegralInternal(value); + return this; + } + + public VariantArrayBuilder writeDouble(double value) { + addOffset(); + writeDoubleInternal(value); + return this; + } + + public VariantArrayBuilder writeDecimal(BigDecimal value) { + addOffset(); + writeDecimalInternal(value); + return this; + } + + public VariantArrayBuilder writeDate(LocalDate value) { + addOffset(); + writeDateInternal(DateTimeUtil.daysFromDate(value)); + return this; + } + + public VariantArrayBuilder writeTimestampTz(OffsetDateTime value) { + addOffset(); + writeTimestampTzInternal(DateTimeUtil.microsFromTimestamptz(value)); + return this; + } + + public VariantArrayBuilder writeTimestampNtz(LocalDateTime value) { + addOffset(); + writeTimestampNtzInternal(DateTimeUtil.microsFromTimestamp(value)); + return this; + } + + public VariantArrayBuilder writeFloat(float value) { + addOffset(); + writeFloatInternal(value); + return this; + } + + public VariantArrayBuilder writeBinary(byte[] value) { + addOffset(); + writeBinaryInternal(value); + return this; + } + + public VariantArrayBuilder writeString(String str) { + addOffset(); + writeStringInternal(str); + return this; + } + + private void addOffset() { + offsets.add(valueBuffer().pos() - startPos()); + } + + public void endArray() { + super.endArray(startPos(), offsets); + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantBuilder.java b/core/src/main/java/org/apache/iceberg/variants/VariantBuilder.java new file mode 100644 index 000000000000..0d3dfae0cb64 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantBuilder.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.core.exc.InputCoercionException; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.math.BigDecimal; +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.util.JsonUtil; + +/** A builder class to build a primitive/array/object variant. */ +public class VariantBuilder extends VariantBuilderBase { + public VariantBuilder() { + super(new ByteBufferWrapper(), new Dictionary()); + } + + public VariantPrimitiveBuilder createPrimitive() { + return new VariantPrimitiveBuilder(valueBuffer(), dict()); + } + + public VariantObjectBuilder startObject() { + return new VariantObjectBuilder(valueBuffer(), dict()); + } + + public VariantArrayBuilder startArray() { + return new VariantArrayBuilder(valueBuffer(), dict()); + } + + /** + * Parses a JSON string and constructs a Variant object. + * + * @param json The JSON string to parse. + * @return The constructed Variant object. + */ + public static Variant parseJson(String json) { + Preconditions.checkArgument( + json != null && !json.isEmpty(), "Input JSON string cannot be null or empty."); + + try (JsonParser parser = JsonUtil.factory().createParser(json)) { + parser.nextToken(); + VariantBuilder builder = new VariantBuilder(); + builder.parseJson(parser); + return builder.build(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private void parseJson(JsonParser parser) throws IOException { + JsonToken token = parser.currentToken(); + + if (token == null) { + throw new JsonParseException(parser, "Unexpected null token"); + } + + switch (token) { + case START_OBJECT: + parseObject(parser); + break; + case START_ARRAY: + parseArray(parser); + break; + default: + parsePrimitive(parser); + } + } + + private void parseObject(JsonParser parser) throws IOException { + List fields = Lists.newArrayList(); + int startPos = valueBuffer().pos(); + + // Store object keys to dictionary of metadata + while (parser.nextToken() != JsonToken.END_OBJECT) { + String key = parser.currentName(); + parser.nextToken(); // Move to the value + + int id = dict().add(key); + fields.add(new VariantBuilderBase.FieldEntry(key, id, valueBuffer().pos() - startPos)); + parseJson(parser); + } + + endObject(startPos, fields); + } + + private void parseArray(JsonParser parser) throws IOException { + List offsets = Lists.newArrayList(); + int startPos = valueBuffer().pos(); + + while (parser.nextToken() != JsonToken.END_ARRAY) { + offsets.add(valueBuffer().pos() - startPos); + parseJson(parser); + } + + endArray(startPos, offsets); + } + + private void parsePrimitive(JsonParser parser) throws IOException { + JsonToken token = parser.currentToken(); + + switch (token) { + case VALUE_STRING: + writeStringInternal(parser.getText()); + break; + case VALUE_NUMBER_INT: + try { + writeIntegralInternal(parser.getLongValue()); + } catch (InputCoercionException ignored) { + writeFloatValue(parser); + } + break; + case VALUE_NUMBER_FLOAT: + writeFloatValue(parser); + break; + case VALUE_TRUE: + writeBooleanInternal(true); + break; + case VALUE_FALSE: + writeBooleanInternal(false); + break; + case VALUE_NULL: + writeNullInternal(); + break; + default: + throw new JsonParseException(parser, "Unexpected token " + token); + } + } + + /** + * This function attempts to write floating number in decimal format to store the exact value if + * it fits in the decimal for Variant; otherwise, write as a double value. + * + * @param parser instance of JSONParser with the current token to be floating number + */ + private void writeFloatValue(JsonParser parser) throws IOException { + String input = parser.getText(); + BigDecimal decimalValue = new BigDecimal(input); + + // Decimal values only support a scale in [0, 38] and a precision <= 38 + if (decimalValue.scale() >= 0 + && decimalValue.scale() <= MAX_DECIMAL16_PRECISION + && decimalValue.precision() <= MAX_DECIMAL16_PRECISION) { + writeDecimalInternal(decimalValue); + } else { + writeDoubleInternal(parser.getDoubleValue()); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantBuilderBase.java b/core/src/main/java/org/apache/iceberg/variants/VariantBuilderBase.java new file mode 100644 index 000000000000..a9ddb2256cae --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantBuilderBase.java @@ -0,0 +1,424 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; + +abstract class VariantBuilderBase { + private static final int MAX_DECIMAL4_PRECISION = 9; + private static final int MAX_DECIMAL8_PRECISION = 18; + protected static final int MAX_DECIMAL16_PRECISION = 38; + + private final ByteBufferWrapper valueBuffer; + private final Dictionary dict; + private int startPos; + + ByteBufferWrapper valueBuffer() { + return valueBuffer; + } + + Dictionary dict() { + return dict; + } + + int startPos() { + return startPos; + } + + VariantBuilderBase(ByteBufferWrapper valueBuffer, Dictionary dict) { + this.valueBuffer = valueBuffer; + this.dict = dict; + startPos = valueBuffer.pos; + } + + /** + * Builds the variant metadata from `dictionaryKeys` and returns the resulting Variant object. + * + * @return The constructed Variant object. + */ + public Variant build() { + int numKeys = dict.size(); + + // Calculate total size of dictionary strings + long numStringBytes = dict.totalBytes(); + + // Determine the number of bytes required for dictionary size and offset entry + int offsetSize = sizeOf(Math.max((int) numStringBytes, numKeys)); + + // metadata: header byte, dictionary size, offsets and string bytes + long metadataSize = 1 + offsetSize + (numKeys + 1) * offsetSize + numStringBytes; + + ByteBuffer metadataBuffer = + ByteBuffer.allocate((int) metadataSize).order(ByteOrder.LITTLE_ENDIAN); + + // Write header byte (version + offset size) + VariantUtil.writeByte( + metadataBuffer, VariantUtil.metadataHeader(Variants.VERSION, offsetSize), 0); + + // Write number of keys + VariantUtil.writeLittleEndianUnsigned(metadataBuffer, numKeys, 1, offsetSize); + + // Write offsets + int offset = 1 + offsetSize; + int dictOffset = 0; + for (byte[] key : dict.keys()) { + VariantUtil.writeLittleEndianUnsigned(metadataBuffer, dictOffset, offset, offsetSize); + dictOffset += key.length; + offset += offsetSize; + } + VariantUtil.writeLittleEndianUnsigned(metadataBuffer, numStringBytes, offset, offsetSize); + + // Write dictionary strings + offset += offsetSize; + for (byte[] key : dict.keys()) { + VariantUtil.writeBufferAbsolute(metadataBuffer, offset, ByteBuffer.wrap(key)); + offset += key.length; + } + + return new VariantImpl(metadataBuffer, valueBuffer.buffer); + } + + protected void writeNullInternal() { + valueBuffer.writePrimitive(Variants.PhysicalType.NULL, null); + } + + protected void writeBooleanInternal(boolean value) { + valueBuffer.writePrimitive( + value ? Variants.PhysicalType.BOOLEAN_TRUE : Variants.PhysicalType.BOOLEAN_FALSE, value); + } + + /** + * Writes an integral value to the variant builder, automatically choosing the smallest type + * (INT8, INT16, INT32, or INT64) to store the value efficiently. + * + * @param value The integral value to append. + */ + protected void writeIntegralInternal(long value) { + if (value == (byte) value) { + valueBuffer.writePrimitive(Variants.PhysicalType.INT8, (byte) value); + } else if (value == (short) value) { + valueBuffer.writePrimitive(Variants.PhysicalType.INT16, (short) value); + } else if (value == (int) value) { + valueBuffer.writePrimitive(Variants.PhysicalType.INT32, (int) value); + } else { + valueBuffer.writePrimitive(Variants.PhysicalType.INT64, value); + } + } + + protected void writeDoubleInternal(double value) { + valueBuffer.writePrimitive(Variants.PhysicalType.DOUBLE, value); + } + + /** + * Writes a decimal value to the variant builder, choosing the smallest decimal type (DECIMAL4, + * DECIMAL8, DECIMAL16) that fits its precision and scale. + */ + public void writeDecimalInternal(BigDecimal value) { + Preconditions.checkArgument( + value.precision() <= MAX_DECIMAL16_PRECISION, + "Unsupported Decimal precision: %s", + value.precision()); + + if (value.scale() <= MAX_DECIMAL4_PRECISION && value.precision() <= MAX_DECIMAL4_PRECISION) { + valueBuffer.writePrimitive(Variants.PhysicalType.DECIMAL4, value); + } else if (value.scale() <= MAX_DECIMAL8_PRECISION + && value.precision() <= MAX_DECIMAL8_PRECISION) { + valueBuffer.writePrimitive(Variants.PhysicalType.DECIMAL8, value); + } else { + valueBuffer.writePrimitive(Variants.PhysicalType.DECIMAL16, value); + } + } + + protected void writeDateInternal(int daysSinceEpoch) { + valueBuffer.writePrimitive(Variants.PhysicalType.DATE, daysSinceEpoch); + } + + /** Writes a timestamp with timezone (microseconds since epoch) to the variant builder. */ + protected void writeTimestampTzInternal(long microsSinceEpoch) { + valueBuffer.writePrimitive(Variants.PhysicalType.TIMESTAMPTZ, microsSinceEpoch); + } + + /** Writes a timestamp without timezone (microseconds since epoch) to the variant builder. */ + protected void writeTimestampNtzInternal(long microsSinceEpoch) { + valueBuffer.writePrimitive(Variants.PhysicalType.TIMESTAMPNTZ, microsSinceEpoch); + } + + protected void writeFloatInternal(float value) { + valueBuffer.writePrimitive(Variants.PhysicalType.FLOAT, value); + } + + protected void writeBinaryInternal(byte[] value) { + valueBuffer.writePrimitive(Variants.PhysicalType.BINARY, ByteBuffer.wrap(value)); + } + + protected void writeStringInternal(String value) { + valueBuffer.writePrimitive(Variants.PhysicalType.STRING, value); + } + + /** Choose the smallest number of bytes to store the given value. */ + protected static int sizeOf(int maxValue) { + if (maxValue <= 0xFF) { + return 1; + } else if (maxValue <= 0xFFFF) { + return 2; + } else if (maxValue <= 0xFFFFFF) { + return 3; + } + + return 4; + } + + /** + * Completes writing an object to the buffer. Object fields are already written, and this method + * inserts header including header byte, number of elements, field IDs, and field offsets. + * + * @param objStartPos The starting position of the object data in the buffer. + * @param fields The list of field entries (key, ID, offset). + */ + protected void endObject(int objStartPos, List fields) { + int numElements = fields.size(); + + // Sort fields by key and ensure no duplicate keys + Collections.sort(fields); + int maxId = numElements == 0 ? 0 : fields.get(0).id; + for (int i = 1; i < numElements; i++) { + maxId = Math.max(maxId, fields.get(i).id); + if (fields.get(i).key.equals(fields.get(i - 1).key)) { + throw new IllegalStateException("Duplicate key in Variant: " + fields.get(i).key); + } + } + + int dataSize = valueBuffer.pos - objStartPos; // Total byte size of the object values + boolean isLarge = numElements > 0xFF; // Determine whether to use large format + int sizeBytes = isLarge ? 4 : 1; // Number of bytes for the object size + int fieldIdSize = sizeOf(maxId); // Number of bytes for each field id + int fieldOffsetSize = sizeOf(dataSize); // Number of bytes for each field offset + int headerSize = + 1 + sizeBytes + numElements * fieldIdSize + (numElements + 1) * fieldOffsetSize; + + // Shift existing data to make room for header + valueBuffer.shift(objStartPos, headerSize); + + valueBuffer.insertByte( + VariantUtil.objectHeader(isLarge, fieldIdSize, fieldOffsetSize), + objStartPos); // Insert header byte + valueBuffer.insertLittleEndianUnsigned( + numElements, sizeBytes, objStartPos + 1); // Insert number of elements + + // Insert field IDs and offsets + int fieldIdStart = objStartPos + 1 + sizeBytes; + int fieldOffsetStart = fieldIdStart + numElements * fieldIdSize; + for (int i = 0; i < numElements; i++) { + valueBuffer.insertLittleEndianUnsigned( + fields.get(i).id, fieldIdSize, fieldIdStart + i * fieldIdSize); + valueBuffer.insertLittleEndianUnsigned( + fields.get(i).offset, fieldOffsetSize, fieldOffsetStart + i * fieldOffsetSize); + } + + // Insert the offset to the end of the data + valueBuffer.insertLittleEndianUnsigned( + dataSize, fieldOffsetSize, fieldOffsetStart + numElements * fieldOffsetSize); + } + + /** + * Completes writing an array to the buffer. Array values are already written, and this method + * inserts header including the header byte, number of elements, and field offsets. + * + * @param arrStartPos The starting position of the array values in the buffer. + * @param offsets The offsets for each array value. + */ + protected void endArray(int arrStartPos, List offsets) { + int dataSize = valueBuffer.pos - arrStartPos; // Total byte size of the array values + int numElements = offsets.size(); + + boolean isLarge = numElements > 0xFF; // Determine whether to use large format + int sizeBytes = isLarge ? 4 : 1; // Number of bytes for the array size + int fieldOffsetSize = sizeOf(dataSize); // Number of bytes of each field offset + int headerSize = 1 + sizeBytes + (numElements + 1) * fieldOffsetSize; // header size + int offsetStart = arrStartPos + 1 + sizeBytes; // Start position for offsets + + // Shift existing data to make room for header + valueBuffer.shift(arrStartPos, headerSize); + + valueBuffer.insertByte( + VariantUtil.arrayHeader(isLarge, fieldOffsetSize), arrStartPos); // Insert header byte + valueBuffer.insertLittleEndianUnsigned( + numElements, sizeBytes, arrStartPos + 1); // Insert number of elements + + // Insert field offsets + for (int i = 0; i < numElements; i++) { + valueBuffer.insertLittleEndianUnsigned( + offsets.get(i), fieldOffsetSize, offsetStart + i * fieldOffsetSize); + } + + // Insert the offset to the end of the data + valueBuffer.insertLittleEndianUnsigned( + dataSize, fieldOffsetSize, offsetStart + numElements * fieldOffsetSize); + } + + /** An auto-growing byte buffer that doubles its size whenever the capacity is exceeded. */ + protected static class ByteBufferWrapper { + private static final int INITIAL_CAPACITY = 128; // Starting capacity + private ByteBuffer buffer; + private int pos = 0; + + ByteBufferWrapper() { + this(INITIAL_CAPACITY); + } + + ByteBufferWrapper(int initialCapacity) { + if (initialCapacity <= 0) { + throw new IllegalArgumentException("Initial capacity must be positive"); + } + this.buffer = ByteBuffer.allocate(initialCapacity).order(ByteOrder.LITTLE_ENDIAN); + } + + /** + * Ensures the buffer has enough capacity to hold additional bytes. + * + * @param additional The number of additional bytes required. + */ + private void ensureCapacity(int additional) { + int required = pos + additional; + if (required > buffer.capacity()) { + int newCapacity = Integer.highestOneBit(required); + newCapacity = newCapacity < required ? newCapacity * 2 : newCapacity; // Double the capacity + + ByteBuffer newBuffer = + ByteBuffer.allocate(newCapacity) + .order(ByteOrder.LITTLE_ENDIAN) + .put(buffer.array(), 0, pos); + buffer = newBuffer; + } + } + + void writePrimitive(Variants.PhysicalType type, T value) { + PrimitiveWrapper wrapper = new PrimitiveWrapper(type, value); + ensureCapacity(pos + wrapper.sizeInBytes()); + wrapper.writeTo(buffer, pos); + pos += wrapper.sizeInBytes(); + } + + /** + * Move the bytes of buffer range [start, pos) by the provided offset position. This is used for + * writing array/object header. + */ + void shift(int start, int offset) { + Preconditions.checkArgument(offset > 0, "offset must be positive"); + Preconditions.checkArgument(pos >= start, "start must be no greater than pos"); + ensureCapacity(offset); + + if (pos > start) { + System.arraycopy(buffer.array(), start, buffer.array(), start + offset, pos - start); + } + + pos += offset; + } + + /** + * Insert a byte into the buffer of the provided position. Note: this assumes shift() has been + * called to leave space for insert. + */ + void insertByte(byte value, int insertPos) { + Preconditions.checkArgument(insertPos < pos, "insertPos must be smaller than pos"); + VariantUtil.writeByteAbsolute(buffer, value, insertPos); + } + + /** + * Insert a number into the buffer of the provided position. Note: this assumes shift() has been + * called to leave space for insert. + */ + void insertLittleEndianUnsigned(long value, int numBytes, int insertPos) { + Preconditions.checkArgument(insertPos < pos, "insertPos must be smaller than pos"); + if (numBytes < 1 || numBytes > 8) { + throw new IllegalArgumentException("numBytes must be between 1 and 8"); + } + + VariantUtil.writeLittleEndianUnsignedAbsolute(buffer, value, insertPos, numBytes); + } + + int pos() { + return pos; + } + } + + /** + * A Variant metadata dictionary implementation which assigns a monotonically increasing assigned + * id to newly added string + */ + protected static class Dictionary { + // Store the mapping from a string to a monotonically increasing assigned id + private final Map stringIds = Maps.newHashMap(); + // Store all the strings encoded with UTF8 in `dictionary` in the order of assigned ids. + private final List utf8Strings = Lists.newArrayList(); + + /** Return the assigned id if string exists; otherwise, assign the next id and return. */ + int add(String key) { + return stringIds.computeIfAbsent( + key, + k -> { + int newId = stringIds.size(); + utf8Strings.add(k.getBytes(StandardCharsets.UTF_8)); + return newId; + }); + } + + int size() { + return utf8Strings.size(); + } + + long totalBytes() { + return utf8Strings.stream().mapToLong(key -> key.length).sum(); + } + + List keys() { + return utf8Strings; + } + } + + /** + * Temporarily store the information of a field. We need to collect all fields in an JSON object, + * sort them by their keys, and build the variant object in sorted order. + */ + protected static final class FieldEntry implements Comparable { + private final String key; + private final int id; + private final int offset; + + FieldEntry(String key, int id, int offset) { + this.key = key; + this.id = id; + this.offset = offset; + } + + @Override + public int compareTo(FieldEntry other) { + return key.compareTo(other.key); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantImpl.java b/core/src/main/java/org/apache/iceberg/variants/VariantImpl.java new file mode 100644 index 000000000000..8672870d0916 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantImpl.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import java.nio.ByteBuffer; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +public final class VariantImpl implements Variant { + // The mask to retrieve the version from first metadata byte + private static final byte VERSION_MASK = 0x0F; + + private final VariantMetadata metadata; + private final VariantValue value; + + public VariantImpl(ByteBuffer metadata, ByteBuffer value) { + Preconditions.checkArgument( + metadata != null && metadata.limit() > 0, "Metadata must not be null or empty."); + Preconditions.checkArgument( + value != null && value.limit() > 0, "Value must not be null or empty."); + + Preconditions.checkArgument( + (metadata.get(0) & VERSION_MASK) == Variants.VERSION, "Unsupported metadata version."); + + this.metadata = SerializedMetadata.from(metadata); + + int header = value.get(0); + Variants.BasicType basicType = VariantUtil.basicType(header); + switch (basicType) { + case PRIMITIVE: + this.value = SerializedPrimitive.from(value.array()); + break; + case ARRAY: + this.value = SerializedArray.from(this.metadata, value.array()); + break; + case OBJECT: + this.value = SerializedObject.from(this.metadata, value.array()); + break; + case SHORT_STRING: + this.value = SerializedShortString.from(value.array()); + break; + default: + throw new UnsupportedOperationException("Unsupported basic type: " + basicType); + } + } + + @Override + public VariantMetadata metadata() { + return metadata; + } + + @Override + public VariantValue value() { + return value; + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantObjectBuilder.java b/core/src/main/java/org/apache/iceberg/variants/VariantObjectBuilder.java new file mode 100644 index 000000000000..e5974cb4e33e --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantObjectBuilder.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.util.DateTimeUtil; + +public class VariantObjectBuilder extends VariantBuilderBase { + private final List fields; + + VariantObjectBuilder(ByteBufferWrapper valueBuffer, Dictionary dict) { + super(valueBuffer, dict); + fields = Lists.newArrayList(); + } + + public VariantObjectBuilder startObject(String key) { + writeKey(key); + return new VariantObjectBuilder(valueBuffer(), dict()); + } + + public VariantArrayBuilder startArray(String key) { + writeKey(key); + return new VariantArrayBuilder(valueBuffer(), dict()); + } + + private void writeKey(String key) { + int id = dict().add(key); + fields.add(new FieldEntry(key, id, valueBuffer().pos() - startPos())); + } + + public VariantObjectBuilder writeNull(String key) { + writeKey(key); + writeNullInternal(); + return this; + } + + public VariantObjectBuilder writeBoolean(String key, boolean value) { + writeKey(key); + writeBooleanInternal(value); + return this; + } + + public VariantObjectBuilder writeIntegral(String key, long value) { + writeKey(key); + writeIntegralInternal(value); + return this; + } + + public VariantObjectBuilder writeDouble(String key, double value) { + writeKey(key); + writeDoubleInternal(value); + return this; + } + + public VariantObjectBuilder writeDecimal(String key, BigDecimal value) { + writeKey(key); + writeDecimalInternal(value); + return this; + } + + public VariantObjectBuilder writeDate(String key, LocalDate value) { + writeKey(key); + writeDateInternal(DateTimeUtil.daysFromDate(value)); + return this; + } + + public VariantObjectBuilder writeTimestampTz(String key, OffsetDateTime value) { + writeKey(key); + writeTimestampTzInternal(DateTimeUtil.microsFromTimestamptz(value)); + return this; + } + + public VariantObjectBuilder writeTimestampNtz(String key, LocalDateTime value) { + writeKey(key); + writeTimestampNtzInternal(DateTimeUtil.microsFromTimestamp(value)); + return this; + } + + public VariantObjectBuilder writeFloat(String key, float value) { + writeKey(key); + writeFloatInternal(value); + return this; + } + + public VariantObjectBuilder writeBinary(String key, byte[] value) { + writeKey(key); + writeBinaryInternal(value); + return this; + } + + public VariantObjectBuilder writeString(String key, String value) { + writeKey(key); + writeStringInternal(value); + return this; + } + + public void endObject() { + super.endObject(startPos(), fields); + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantPrimitiveBuilder.java b/core/src/main/java/org/apache/iceberg/variants/VariantPrimitiveBuilder.java new file mode 100644 index 000000000000..367e790a5948 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantPrimitiveBuilder.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import org.apache.iceberg.util.DateTimeUtil; + +public class VariantPrimitiveBuilder extends VariantBuilderBase { + public VariantPrimitiveBuilder(ByteBufferWrapper valueBuffer, Dictionary dict) { + super(valueBuffer, dict); + } + + public VariantPrimitiveBuilder writeNull() { + writeNullInternal(); + return this; + } + + public VariantPrimitiveBuilder writeBoolean(boolean value) { + writeBooleanInternal(value); + return this; + } + + public VariantPrimitiveBuilder writeIntegral(long value) { + writeIntegralInternal(value); + return this; + } + + public VariantPrimitiveBuilder writeDouble(double value) { + writeDoubleInternal(value); + return this; + } + + public VariantPrimitiveBuilder writeDecimal(BigDecimal value) { + writeDecimalInternal(value); + return this; + } + + public VariantPrimitiveBuilder writeDate(LocalDate value) { + writeDateInternal(DateTimeUtil.daysFromDate(value)); + return this; + } + + public VariantPrimitiveBuilder writeTimestampTz(OffsetDateTime value) { + writeTimestampTzInternal(DateTimeUtil.microsFromTimestamptz(value)); + return this; + } + + public VariantPrimitiveBuilder writeTimestampNtz(LocalDateTime value) { + writeTimestampNtzInternal(DateTimeUtil.microsFromTimestamp(value)); + return this; + } + + public VariantPrimitiveBuilder writeFloat(float value) { + writeFloatInternal(value); + return this; + } + + public VariantPrimitiveBuilder writeBinary(byte[] value) { + writeBinaryInternal(value); + return this; + } + + public VariantPrimitiveBuilder writeString(String value) { + writeStringInternal(value); + return this; + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java b/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java index d6b78fe899e6..ecc8f3d919b9 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java @@ -44,15 +44,32 @@ static int writeBufferAbsolute(ByteBuffer buffer, int offset, ByteBuffer toCopy) return toCopy.remaining(); } + static void writeByteAbsolute(ByteBuffer buffer, int value, int offset) { + int originalPosition = buffer.position(); + buffer.put(offset, (byte) (value & 0xFF)); + buffer.position(originalPosition); + } + + static void writeLittleEndianUnsignedAbsolute( + ByteBuffer buffer, long value, int offset, int size) { + int originalPosition = buffer.position(); + buffer.position(0); + writeLittleEndianUnsigned(buffer, value, offset, size); + buffer.position(originalPosition); + } + static void writeByte(ByteBuffer buffer, int value, int offset) { buffer.put(buffer.position() + offset, (byte) (value & 0xFF)); } - static void writeLittleEndianUnsigned(ByteBuffer buffer, int value, int offset, int size) { + static void writeLittleEndianUnsigned(ByteBuffer buffer, long value, int offset, int size) { int base = buffer.position() + offset; switch (size) { + case 8: + buffer.putLong(base, value); + return; case 4: - buffer.putInt(base, value); + buffer.putInt(base, (int) (value & 0xFFFFFFFF)); return; case 3: buffer.putShort(base, (short) (value & 0xFFFF)); @@ -164,10 +181,18 @@ static int sizeOf(int maxValue) { } } + public static byte metadataHeader(int version, int offsetSize) { + return (byte) (version | ((offsetSize - 1) << 6)); + } + static byte primitiveHeader(int primitiveType) { return (byte) (primitiveType << Variants.Primitives.PRIMITIVE_TYPE_SHIFT); } + public static byte shortStrHeader(int size) { + return (byte) (size << 2 | 0b01); + } + static byte objectHeader(boolean isLarge, int fieldIdSize, int offsetSize) { return (byte) ((isLarge ? 0b1000000 : 0) | ((fieldIdSize - 1) << 4) | ((offsetSize - 1) << 2) | 0b10); diff --git a/core/src/main/java/org/apache/iceberg/variants/Variants.java b/core/src/main/java/org/apache/iceberg/variants/Variants.java index e10682fe544a..14b1d4b009a7 100644 --- a/core/src/main/java/org/apache/iceberg/variants/Variants.java +++ b/core/src/main/java/org/apache/iceberg/variants/Variants.java @@ -25,6 +25,9 @@ import org.apache.iceberg.util.DateTimeUtil; public class Variants { + // Variant version + public static final byte VERSION = 1; + private Variants() {} enum LogicalType { diff --git a/core/src/test/java/org/apache/iceberg/variants/TestVariantBuilderArray.java b/core/src/test/java/org/apache/iceberg/variants/TestVariantBuilderArray.java new file mode 100644 index 000000000000..1c8061b297ef --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/variants/TestVariantBuilderArray.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.util.List; +import net.minidev.json.JSONArray; +import org.apache.iceberg.util.DateTimeUtil; +import org.junit.jupiter.api.Test; + +public class TestVariantBuilderArray { + @Test + public void testSimpleArrayJson() throws IOException { + List input = List.of("Ford", "BMW", "Fiat"); + Variant variant = VariantBuilder.parseJson(JSONArray.toJSONString(input)); + VariantArray arr = variant.value().asArray(); + + assertThat(arr.type()).isEqualTo(Variants.PhysicalType.ARRAY); + for (int i = 0; i < arr.numElements(); i++) { + assertThat(arr.get(i).asPrimitive().get()).isEqualTo(input.get(i)); + } + } + + @Test + public void testArrayJson() { + String input = + "[{\n" + + " \"firstName\": \"John\"," + + " \"lastName\": \"Smith\"," + + " \"age\": 25,\n" + + " \"address\" : {\n" + + " \"streetAddress\": \"21 2nd Street\",\n" + + " \"city\": \"New York\",\n" + + " \"state\": \"NY\",\n" + + " \"postalCode\": \"10021\"\n" + + " },\n" + + " \"phoneNumber\": [\n" + + " {\"type\": \"home\", \"number\": \"212 555-1234\"},\n" + + " {\"type\": \"fax\", \"number\": \"646 555-4567\"}\n" + + " ]\n" + + " }]"; + validateVariant(VariantBuilder.parseJson(input)); + } + + @Test + public void testBuildSimpleArray() { + List input = List.of("Ford", "BMW", "Fiat"); + VariantArrayBuilder builder = new VariantBuilder().startArray(); + for (String str : input) { + builder.writeString(str); + } + builder.endArray(); + + Variant variant = builder.build(); + VariantArray arr = variant.value().asArray(); + + assertThat(arr.type()).isEqualTo(Variants.PhysicalType.ARRAY); + assertThat(arr.numElements()).isEqualTo(3); + for (int i = 0; i < arr.numElements(); i++) { + assertThat(arr.get(i).asPrimitive().get()).isEqualTo(input.get(i)); + } + } + + @Test + public void testBuildArray() { + VariantArrayBuilder builder = new VariantBuilder().startArray(); + builder + .writeNull() + .writeBoolean(true) + .writeBoolean(false) + .writeIntegral(34) + .writeIntegral(1234) + .writeIntegral(1234567890) + .writeIntegral(1234567890987654321L) + .writeDouble(1234e-2) + .writeDecimal(new BigDecimal("123456.789")) + .writeDecimal(new BigDecimal("123456789.987654321")) + .writeDecimal(new BigDecimal("12345678901234567890.987654321")) + .writeDate(LocalDate.parse("2017-08-18")) + .writeTimestampTz(OffsetDateTime.parse("2017-08-18T14:21:01.919+00:00")) + .writeTimestampNtz(LocalDateTime.parse("2017-08-18T14:21:01.919")) + .writeFloat(12.34f) + .writeBinary("iceberg".getBytes()) + .writeString("This test string is used to generate a primitive string type of variant") + .writeString("iceberg"); + builder.startArray().writeString("Ford").writeString("BMW").writeString("Fiat").endArray(); + + builder + .startObject() + .writeString("firstName", "John") + .writeString("lastName", "Smith") + .writeIntegral("age", 25) + .endObject(); + builder.endArray(); + + Variant variant = builder.build(); + VariantArray arr = variant.value().asArray(); + assertThat(arr.type()).isEqualTo(Variants.PhysicalType.ARRAY); + assertThat(arr.numElements()).isEqualTo(20); + assertThat(arr.get(0).asPrimitive().get()).isNull(); + assertThat(arr.get(1).asPrimitive().get()).isEqualTo(true); + assertThat(arr.get(2).asPrimitive().get()).isEqualTo(false); + assertThat(arr.get(3).asPrimitive().get()).isEqualTo((byte) 34); + assertThat(arr.get(4).asPrimitive().get()).isEqualTo((short) 1234); + assertThat(arr.get(5).asPrimitive().get()).isEqualTo(1234567890); + assertThat(arr.get(6).asPrimitive().get()).isEqualTo(1234567890987654321L); + assertThat(arr.get(7).asPrimitive().get()).isEqualTo(12.34); + assertThat(arr.get(8).asPrimitive().get()).isEqualTo(new BigDecimal("123456.789")); + assertThat(arr.get(9).asPrimitive().get()).isEqualTo(new BigDecimal("123456789.987654321")); + assertThat(arr.get(10).asPrimitive().get()) + .isEqualTo(new BigDecimal("12345678901234567890.987654321")); + assertThat(arr.get(11).asPrimitive().get()) + .isEqualTo(DateTimeUtil.daysFromDate(LocalDate.parse("2017-08-18"))); + assertThat(arr.get(12).asPrimitive().get()) + .isEqualTo( + DateTimeUtil.microsFromTimestamptz( + OffsetDateTime.parse("2017-08-18T14:21:01.919+00:00"))); + assertThat(arr.get(13).asPrimitive().get()) + .isEqualTo( + DateTimeUtil.microsFromTimestamp(LocalDateTime.parse("2017-08-18T14:21:01.919"))); + assertThat(arr.get(14).asPrimitive().get()).isEqualTo(12.34f); + assertThat(arr.get(15).asPrimitive().get()).isEqualTo(ByteBuffer.wrap("iceberg".getBytes())); + assertThat(arr.get(16).asPrimitive().get()) + .isEqualTo("This test string is used to generate a primitive string type of variant"); + assertThat(arr.get(17).asPrimitive().get()).isEqualTo("iceberg"); + assertThat(arr.get(18).type()).isEqualTo(Variants.PhysicalType.ARRAY); + + assertThat(arr.get(19).type()).isEqualTo(Variants.PhysicalType.OBJECT); + } + + private void validateVariant(Variant variant) { + VariantArray arr = variant.value().asArray(); + assertThat(arr.numElements()).isEqualTo(1); + + VariantObject object = arr.get(0).asObject(); + assertThat(object.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(object.numFields()).isEqualTo(5); + + assertThat(object.get("firstName").asPrimitive().get()).isEqualTo("John"); + assertThat(object.get("lastName").asPrimitive().get()).isEqualTo("Smith"); + assertThat(object.get("age").asPrimitive().get()).isEqualTo((byte) 25); + + VariantObject address = object.get("address").asObject(); + assertThat(address.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(address.numFields()).isEqualTo(4); + assertThat(address.get("streetAddress").asPrimitive().get()).isEqualTo("21 2nd Street"); + assertThat(address.get("city").asPrimitive().get()).isEqualTo("New York"); + assertThat(address.get("state").asPrimitive().get()).isEqualTo("NY"); + assertThat(address.get("postalCode").asPrimitive().get()).isEqualTo("10021"); + + VariantArray phoneNumbers = object.get("phoneNumber").asArray(); + assertThat(phoneNumbers.numElements()).isEqualTo(2); + VariantObject phoneNumber1 = phoneNumbers.get(0).asObject(); + assertThat(phoneNumber1.get("type").asPrimitive().get()).isEqualTo("home"); + assertThat(phoneNumber1.get("number").asPrimitive().get()).isEqualTo("212 555-1234"); + VariantObject phoneNumber2 = phoneNumbers.get(1).asObject(); + assertThat(phoneNumber2.get("type").asPrimitive().get()).isEqualTo("fax"); + assertThat(phoneNumber2.get("number").asPrimitive().get()).isEqualTo("646 555-4567"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/variants/TestVariantBuilderObject.java b/core/src/test/java/org/apache/iceberg/variants/TestVariantBuilderObject.java new file mode 100644 index 000000000000..7cad657d0993 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/variants/TestVariantBuilderObject.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.within; + +import java.io.IOException; +import java.math.BigDecimal; +import org.junit.jupiter.api.Test; + +public class TestVariantBuilderObject { + @Test + public void testEmptyObjectJson() throws IOException { + Variant variant = VariantBuilder.parseJson("{}"); + VariantObject object = variant.value().asObject(); + + assertThat(object.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(object.numFields()).isEqualTo(0); + } + + @Test + public void testNumbers() { + String input = + "{\n" + + " \"intVal1\": 1234," + + " \"intVal2\": 12345678901234567890," + + " \"floatVal1\": 1.234," + + " \"floatVal2\": 1.234e-10," + + " \"floatVal3\": 1.234e10," + + " \"floatVal4\": 12345678901234567890123456789012345678.90" + + " }"; + + Variant variant = VariantBuilder.parseJson(input); + VariantObject object = variant.value().asObject(); + + assertThat(object.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(object.get("intVal1").asPrimitive().get()).isEqualTo((short) 1234); + assertThat(object.get("intVal2").asPrimitive().get()) + .isEqualTo(new BigDecimal("12345678901234567890")); + assertThat(object.get("floatVal1").asPrimitive().get()).isEqualTo(new BigDecimal("1.234")); + assertThat(object.get("floatVal2").asPrimitive().get()).isEqualTo(new BigDecimal("1.234e-10")); + assertThat(object.get("floatVal3").asPrimitive().get()).isEqualTo(1.234e10); + assertThat((double) object.get("floatVal4").asPrimitive().get()) + .isCloseTo(12345678901234567890123456789012345678.90, within(0.000001)); + } + + @Test + public void testObjectJson() { + String input = + "{\n" + + " \"firstName\": \"John\"," + + " \"lastName\": \"Smith\"," + + " \"age\": 25,\n" + + " \"address\" : {\n" + + " \"streetAddress\": \"21 2nd Street\",\n" + + " \"city\": \"New York\",\n" + + " \"state\": \"NY\",\n" + + " \"postalCode\": \"10021\"\n" + + " },\n" + + " \"phoneNumber\": [\n" + + " {\"type\": \"home\", \"number\": \"212 555-1234\"},\n" + + " {\"type\": \"fax\", \"number\": \"646 555-4567\"}\n" + + " ]\n" + + " }"; + + validateVariant(VariantBuilder.parseJson(input)); + } + + @Test + public void testBuildObject() { + VariantObjectBuilder builder = + new VariantBuilder() + .startObject() + .writeString("firstName", "John") + .writeString("lastName", "Smith") + .writeIntegral("age", 25); + builder + .startObject("address") + .writeString("streetAddress", "21 2nd Street") + .writeString("city", "New York") + .writeString("state", "NY") + .writeString("postalCode", "10021") + .endObject(); + VariantArrayBuilder phoneNumberBuilder = builder.startArray("phoneNumber"); + phoneNumberBuilder + .startObject() + .writeString("type", "home") + .writeString("number", "212 555-1234") + .endObject(); + phoneNumberBuilder + .startObject() + .writeString("type", "fax") + .writeString("number", "646 555-4567") + .endObject(); + phoneNumberBuilder.endArray(); + builder.endObject(); + + validateVariant(builder.build()); + } + + private void validateVariant(Variant variant) { + VariantObject object = variant.value().asObject(); + + assertThat(object.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(object.numFields()).isEqualTo(5); + + assertThat(object.get("firstName").asPrimitive().get()).isEqualTo("John"); + assertThat(object.get("lastName").asPrimitive().get()).isEqualTo("Smith"); + assertThat(object.get("age").asPrimitive().get()).isEqualTo((byte) 25); + + VariantObject address = object.get("address").asObject(); + assertThat(address.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(address.numFields()).isEqualTo(4); + assertThat(address.get("streetAddress").asPrimitive().get()).isEqualTo("21 2nd Street"); + assertThat(address.get("city").asPrimitive().get()).isEqualTo("New York"); + assertThat(address.get("state").asPrimitive().get()).isEqualTo("NY"); + assertThat(address.get("postalCode").asPrimitive().get()).isEqualTo("10021"); + + VariantArray phoneNumbers = object.get("phoneNumber").asArray(); + assertThat(phoneNumbers.numElements()).isEqualTo(2); + VariantObject phoneNumber1 = phoneNumbers.get(0).asObject(); + assertThat(phoneNumber1.get("type").asPrimitive().get()).isEqualTo("home"); + assertThat(phoneNumber1.get("number").asPrimitive().get()).isEqualTo("212 555-1234"); + VariantObject phoneNumber2 = phoneNumbers.get(1).asObject(); + assertThat(phoneNumber2.get("type").asPrimitive().get()).isEqualTo("fax"); + assertThat(phoneNumber2.get("number").asPrimitive().get()).isEqualTo("646 555-4567"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/variants/TestVariantBuilderPrimitive.java b/core/src/test/java/org/apache/iceberg/variants/TestVariantBuilderPrimitive.java new file mode 100644 index 000000000000..ce045b9b58a2 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/variants/TestVariantBuilderPrimitive.java @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.util.stream.Stream; +import org.apache.iceberg.util.DateTimeUtil; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; + +public class TestVariantBuilderPrimitive { + private static Stream primitiveInputs() { + return Stream.of( + Arguments.of("null", Variants.PhysicalType.NULL, null), + Arguments.of("true", Variants.PhysicalType.BOOLEAN_TRUE, true), + Arguments.of("false", Variants.PhysicalType.BOOLEAN_FALSE, false), + Arguments.of("34", Variants.PhysicalType.INT8, (byte) 34), + Arguments.of("1234", Variants.PhysicalType.INT16, (short) 1234), + Arguments.of("1234567890", Variants.PhysicalType.INT32, 1234567890), + Arguments.of("1234567890987654321", Variants.PhysicalType.INT64, 1234567890987654321L), + Arguments.of("1234e-2", Variants.PhysicalType.DECIMAL4, new BigDecimal("12.34")), + Arguments.of("123456.789", Variants.PhysicalType.DECIMAL4, new BigDecimal("123456.789")), + Arguments.of( + "123456789.987654321", + Variants.PhysicalType.DECIMAL8, + new BigDecimal("123456789.987654321")), + Arguments.of( + "12345678901234567890.987654321", + Variants.PhysicalType.DECIMAL16, + new BigDecimal("12345678901234567890.987654321")), + Arguments.of( + "\"This test string is used to generate a primitive string type of variant\"", + Variants.PhysicalType.STRING, + "This test string is used to generate a primitive string type of variant")); + } + + @ParameterizedTest + @MethodSource("primitiveInputs") + public void testPrimitiveJson( + String input, Variants.PhysicalType expectedType, Object expectedValue) { + Variant variant = VariantBuilder.parseJson(input); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(expectedType); + assertThat(primitive.get()).isEqualTo(expectedValue); + } + + @Test + public void testShortStringJson() throws IOException { + Variant variant = VariantBuilder.parseJson("\"iceberg\""); + VariantPrimitive shortString = variant.value().asPrimitive(); + + assertThat(shortString.type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(shortString.get()).isEqualTo("iceberg"); + } + + @Test + public void testPrimitiveNull() { + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeNull(); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(Variants.PhysicalType.NULL); + assertThat(primitive.get()).isEqualTo(null); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testPrimitiveBoolean(boolean value) { + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeBoolean(value); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()) + .isEqualTo( + value ? Variants.PhysicalType.BOOLEAN_TRUE : Variants.PhysicalType.BOOLEAN_FALSE); + assertThat(primitive.get()).isEqualTo(value); + } + + private static Stream testPrimitiveNumericInputs() { + return Stream.of( + Arguments.of(34, Variants.PhysicalType.INT8, (byte) 34), + Arguments.of(1234, Variants.PhysicalType.INT16, (short) 1234), + Arguments.of(1234567890, Variants.PhysicalType.INT32, 1234567890), + Arguments.of(1234567890987654321L, Variants.PhysicalType.INT64, 1234567890987654321L)); + } + + @ParameterizedTest + @MethodSource("testPrimitiveNumericInputs") + public void testPrimitiveNumeric(long value, Variants.PhysicalType type, Object expectedValue) { + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeIntegral(value); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(type); + assertThat(primitive.get()).isEqualTo(expectedValue); + } + + @Test + public void testPrimitiveDouble() { + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeDouble(1234e-2); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(Variants.PhysicalType.DOUBLE); + assertThat(primitive.get()).isEqualTo(12.34); + } + + private static Stream testPrimitiveDecimalInputs() { + return Stream.of( + Arguments.of(new BigDecimal("123456.789"), Variants.PhysicalType.DECIMAL4), + Arguments.of(new BigDecimal("123456789.987654321"), Variants.PhysicalType.DECIMAL8), + Arguments.of( + new BigDecimal("12345678901234567890.987654321"), Variants.PhysicalType.DECIMAL16)); + } + + @ParameterizedTest + @MethodSource("testPrimitiveDecimalInputs") + public void testPrimitiveDecimal(BigDecimal value, Variants.PhysicalType type) { + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeDecimal(value); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(type); + assertThat(primitive.get()).isEqualTo(value); + } + + @Test + public void testPrimitiveDate() { + String dateString = "2017-08-18"; + LocalDate date = LocalDate.parse(dateString); + + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeDate(date); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(Variants.PhysicalType.DATE); + assertThat(primitive.get()).isEqualTo(DateTimeUtil.daysFromDate(date)); + } + + @Test + public void testPrimitiveTimestampTZ() { + String tzString = "2017-08-18T14:21:01.919+00:00"; + OffsetDateTime ts = OffsetDateTime.parse(tzString); + + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeTimestampTz(ts); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(Variants.PhysicalType.TIMESTAMPTZ); + assertThat(primitive.get()).isEqualTo(DateTimeUtil.microsFromTimestamptz(ts)); + } + + @Test + public void testPrimitiveTimestampNTZ() { + String ntzString = "2017-08-18T14:21:01.919"; + LocalDateTime ts = LocalDateTime.parse(ntzString); + + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeTimestampNtz(ts); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(Variants.PhysicalType.TIMESTAMPNTZ); + assertThat(primitive.get()).isEqualTo(DateTimeUtil.microsFromTimestamp(ts)); + } + + @Test + public void testPrimitiveFloat() { + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeFloat(12.34f); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(Variants.PhysicalType.FLOAT); + assertThat(primitive.get()).isEqualTo(12.34f); + } + + @Test + public void testPrimitiveBinary() { + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeBinary("iceberg".getBytes()); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(Variants.PhysicalType.BINARY); + assertThat(primitive.get()).isEqualTo(ByteBuffer.wrap("iceberg".getBytes())); + } + + @Test + public void testPrimitiveString() { + String value = "This test string is used to generate a primitive string type of variant"; + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeString(value); + Variant variant = builder.build(); + VariantPrimitive primitive = variant.value().asPrimitive(); + + assertThat(primitive.type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(primitive.get()).isEqualTo(value); + } + + @Test + public void testPrimitiveShortString() { + String value = "iceberg"; + VariantPrimitiveBuilder builder = new VariantBuilder().createPrimitive(); + builder.writeString(value); + Variant variant = builder.build(); + VariantPrimitive shortString = variant.value().asPrimitive(); + + assertThat(shortString.type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(shortString.get()).isEqualTo("iceberg"); + } +}