diff --git a/performance/src/main/java/org/apache/arrow/vector/UuidVectorBenchmarks.java b/performance/src/main/java/org/apache/arrow/vector/UuidVectorBenchmarks.java new file mode 100644 index 0000000000..15aad17d84 --- /dev/null +++ b/performance/src/main/java/org/apache/arrow/vector/UuidVectorBenchmarks.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.impl.UuidWriterImpl; +import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.UuidHolder; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.profile.GCProfiler; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** Benchmarks for {@link UuidVector}. */ +@State(Scope.Benchmark) +public class UuidVectorBenchmarks { + // checkstyle:off: MissingJavadocMethod + + private static final int VECTOR_LENGTH = 10_000; + + private static final int ALLOCATOR_CAPACITY = 1024 * 1024; + + private BufferAllocator allocator; + + private UuidVector vector; + + private UUID[] testUuids; + + @Setup + public void prepare() { + allocator = new RootAllocator(ALLOCATOR_CAPACITY); + vector = new UuidVector("vector", allocator); + vector.allocateNew(VECTOR_LENGTH); + vector.setValueCount(VECTOR_LENGTH); + + // Pre-generate UUIDs for consistent benchmarking + testUuids = new UUID[VECTOR_LENGTH]; + for (int i = 0; i < VECTOR_LENGTH; i++) { + testUuids[i] = new UUID(i, i * 2L); + } + } + + @TearDown + public void tearDown() { + vector.close(); + allocator.close(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setWithUuidHolder() { + UuidHolder holder = new UuidHolder(); + for (int i = 0; i < VECTOR_LENGTH; i++) { + // Old version: get the holder populated with buffer reference, then set it back + vector.get(i, holder); + vector.setSafe(i, holder); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setWithNullableUuidHolder() { + NullableUuidHolder holder = new NullableUuidHolder(); + for (int i = 0; i < VECTOR_LENGTH; i++) { + // Old version: get the holder populated with buffer reference, then set it back + holder.isSet = i % 3 == 0 ? 0 : 1; + if (holder.isSet == 1) { + vector.get(i, holder); + } + vector.setSafe(i, holder); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setUuidDirectly() { + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.setSafe(i, testUuids[i]); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setWithWriter() { + UuidWriterImpl writer = new UuidWriterImpl(vector); + for (int i = 0; i < VECTOR_LENGTH; i++) { + if (i % 3 != 0) { + writer.writeExtension(testUuids[i]); + } + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void getWithUuidHolder() { + UuidHolder holder = new UuidHolder(); + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.get(i, holder); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void getWithNullableUuidHolder() { + NullableUuidHolder holder = new NullableUuidHolder(); + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.get(i, holder); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void getUuidDirectly() { + for (int i = 0; i < VECTOR_LENGTH; i++) { + UUID uuid = vector.getObject(i); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = + new OptionsBuilder() + .include(UuidVectorBenchmarks.class.getSimpleName()) + .forks(1) + .addProfiler(GCProfiler.class) + .build(); + + new Runner(opt).run(); + } + // checkstyle:on: MissingJavadocMethod +} diff --git a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java index c662a6e064..26d07385a9 100644 --- a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java @@ -16,6 +16,7 @@ */ package org.apache.arrow.vector; +import static org.apache.arrow.memory.util.MemoryUtil.LITTLE_ENDIAN; import static org.apache.arrow.vector.extension.UuidType.UUID_BYTE_WIDTH; import java.nio.ByteBuffer; @@ -23,7 +24,9 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.complex.impl.UuidReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.extension.UuidType; @@ -132,7 +135,8 @@ public int hashCode(int index) { @Override public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); + int start = this.getStartOffset(index); + return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, start + UUID_BYTE_WIDTH); } /** @@ -146,17 +150,24 @@ public int isSet(int index) { } /** - * Gets the UUID value at the given index as an ArrowBuf. + * Reads the UUID value at the given index into a UuidHolder. * - * @param index the index to retrieve - * @return a buffer slice containing the 16-byte UUID - * @throws IllegalStateException if the value at the index is null and null checking is enabled + * @param index the index to read from + * @param holder the holder to populate with the UUID data */ - public ArrowBuf get(int index) throws IllegalStateException { - if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); + public void get(int index, UuidHolder holder) { + Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector."); + final ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer(); + final long start = (long) index * UUID_BYTE_WIDTH; + final long next = start + Long.BYTES; + // UUIDs are stored in big-endian byte order in Arrow buffers. + // ArrowBuf.getLong() reads in native byte order, so we need to reverse bytes on LE systems. + if (LITTLE_ENDIAN) { + holder.mostSigBits = Long.reverseBytes(dataBuffer.getLong(start)); + holder.leastSigBits = Long.reverseBytes(dataBuffer.getLong(next)); } else { - return getBufferSlicePostNullCheck(index); + holder.mostSigBits = dataBuffer.getLong(start); + holder.leastSigBits = dataBuffer.getLong(next); } } @@ -167,23 +178,33 @@ public ArrowBuf get(int index) throws IllegalStateException { * @param holder the holder to populate with the UUID data */ public void get(int index, NullableUuidHolder holder) { + Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector."); if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { holder.isSet = 0; } else { holder.isSet = 1; - holder.buffer = getBufferSlicePostNullCheck(index); + final ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer(); + final long offset = (long) index * UUID_BYTE_WIDTH; + // UUIDs are stored in big-endian byte order in Arrow buffers. + // ArrowBuf.getLong() reads in native byte order, so we need to reverse bytes on LE systems. + if (LITTLE_ENDIAN) { + holder.mostSigBits = Long.reverseBytes(dataBuffer.getLong(offset)); + holder.leastSigBits = Long.reverseBytes(dataBuffer.getLong(offset + Long.BYTES)); + } else { + holder.mostSigBits = dataBuffer.getLong(offset); + holder.leastSigBits = dataBuffer.getLong(offset + Long.BYTES); + } } } /** - * Reads the UUID value at the given index into a UuidHolder. + * Calculates the byte offset for a given index in the data buffer. * - * @param index the index to read from - * @param holder the holder to populate with the UUID data + * @param index the index of the UUID value + * @return the byte offset in the data buffer */ - public void get(int index, UuidHolder holder) { - holder.isSet = 1; - holder.buffer = getBufferSlicePostNullCheck(index); + public final int getStartOffset(int index) { + return index * UUID_BYTE_WIDTH; } /** @@ -207,7 +228,7 @@ public void set(int index, UUID value) { * @param holder the holder containing the UUID data */ public void set(int index, UuidHolder holder) { - this.set(index, holder.isSet, holder.buffer); + set(index, holder.getUuid()); } /** @@ -217,28 +238,11 @@ public void set(int index, UuidHolder holder) { * @param holder the holder containing the UUID data */ public void set(int index, NullableUuidHolder holder) { - this.set(index, holder.isSet, holder.buffer); - } - - /** - * Sets the UUID value at the given index with explicit null flag. - * - * @param index the index to set - * @param isSet 1 if the value is set, 0 if null - * @param buffer the buffer containing the 16-byte UUID data - */ - public void set(int index, int isSet, ArrowBuf buffer) { - getUnderlyingVector().set(index, isSet, buffer); - } - - /** - * Sets the UUID value at the given index from an ArrowBuf. - * - * @param index the index to set - * @param value the buffer containing the 16-byte UUID data - */ - public void set(int index, ArrowBuf value) { - getUnderlyingVector().set(index, value); + if (holder.isSet == 0) { + getUnderlyingVector().setNull(index); + } else { + set(index, holder.getUuid()); + } } /** @@ -249,10 +253,12 @@ public void set(int index, ArrowBuf value) { * @param sourceOffset the offset in the source buffer where the UUID data starts */ public void set(int index, ArrowBuf source, int sourceOffset) { - // Copy bytes from source buffer to target vector data buffer - ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer(); - dataBuffer.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH); - getUnderlyingVector().setIndexDefined(index); + Preconditions.checkNotNull(source, "Cannot set UUID vector, the source buffer is null."); + + BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index); + getUnderlyingVector() + .getDataBuffer() + .setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH); } /** @@ -286,10 +292,10 @@ public void setSafe(int index, UUID value) { * @param holder the holder containing the UUID data, or null to set a null value */ public void setSafe(int index, NullableUuidHolder holder) { - if (holder != null) { - getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer); - } else { + if (holder == null || holder.isSet == 0) { getUnderlyingVector().setNull(index); + } else { + setSafe(index, holder.getUuid()); } } @@ -297,14 +303,23 @@ public void setSafe(int index, NullableUuidHolder holder) { * Sets the UUID value at the given index from a UuidHolder, expanding capacity if needed. * * @param index the index to set - * @param holder the holder containing the UUID data, or null to set a null value + * @param holder the holder containing the UUID data */ public void setSafe(int index, UuidHolder holder) { - if (holder != null) { - getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer); - } else { - getUnderlyingVector().setNull(index); - } + setSafe(index, holder.getUuid()); + } + + /** + * Sets the UUID value at the given index by copying from a source buffer, expanding capacity if + * needed. + * + * @param index the index to set + * @param buffer the source buffer to copy from + * @param start the offset in the source buffer where the UUID data starts + */ + public void setSafe(int index, ArrowBuf buffer, int start) { + getUnderlyingVector().handleSafe(index); + this.set(index, buffer, start); } /** @@ -400,15 +415,9 @@ public TransferPair getTransferPair(BufferAllocator allocator) { return getTransferPair(this.getField().getName(), allocator); } - private ArrowBuf getBufferSlicePostNullCheck(int index) { - return getUnderlyingVector() - .getDataBuffer() - .slice((long) index * UUID_BYTE_WIDTH, UUID_BYTE_WIDTH); - } - @Override public int getTypeWidth() { - return getUnderlyingVector().getTypeWidth(); + return UUID_BYTE_WIDTH; } /** {@link TransferPair} for {@link UuidVector}. */ diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java new file mode 100644 index 0000000000..cbf3fd5399 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.UuidHolder; +import org.apache.arrow.vector.types.Types; + +/** + * Reader implementation for reading UUID values from a {@link NullableUuidHolder}. + * + *

This reader wraps a single UUID holder value and provides methods to read from it. Unlike + * {@link UuidReaderImpl} which reads from a vector, this reader operates on a holder instance. + * + * @see NullableUuidHolder + * @see UuidReaderImpl + */ +public class NullableUuidHolderReaderImpl extends AbstractFieldReader { + private final NullableUuidHolder holder; + + /** + * Constructs a reader for the given UUID holder. + * + * @param holder the UUID holder to read from + */ + public NullableUuidHolderReaderImpl(NullableUuidHolder holder) { + this.holder = holder; + } + + @Override + public int size() { + throw new UnsupportedOperationException( + "size() is not supported on NullableUuidHolderReaderImpl. " + + "This reader wraps a single UUID holder value, not a collection. " + + "Use UuidReaderImpl for vector-based UUID reading."); + } + + @Override + public boolean next() { + throw new UnsupportedOperationException( + "next() is not supported on NullableUuidHolderReaderImpl. " + + "This reader wraps a single UUID holder value, not an iterator. " + + "Use UuidReaderImpl for vector-based UUID reading."); + } + + @Override + public void setPosition(int index) { + throw new UnsupportedOperationException( + "setPosition() is not supported on NullableUuidHolderReaderImpl. " + + "This reader wraps a single UUID holder value, not a vector. " + + "Use UuidReaderImpl for vector-based UUID reading."); + } + + @Override + public Types.MinorType getMinorType() { + return Types.MinorType.EXTENSIONTYPE; + } + + @Override + public boolean isSet() { + return holder.isSet == 1; + } + + @Override + public void read(ExtensionHolder h) { + if (h instanceof NullableUuidHolder) { + NullableUuidHolder nullableHolder = (NullableUuidHolder) h; + nullableHolder.mostSigBits = this.holder.mostSigBits; + nullableHolder.leastSigBits = this.holder.leastSigBits; + nullableHolder.isSet = this.holder.isSet; + } else if (h instanceof UuidHolder) { + UuidHolder uuidHolder = (UuidHolder) h; + uuidHolder.mostSigBits = this.holder.mostSigBits; + uuidHolder.leastSigBits = this.holder.leastSigBits; + } else { + throw new IllegalArgumentException( + "Unsupported holder type: " + + h.getClass().getName() + + ". " + + "Only NullableUuidHolder and UuidHolder are supported for UUID values. " + + "Provided holder type cannot be used to read UUID data."); + } + } + + @Override + public Object readObject() { + if (!isSet()) { + return null; + } + // Convert UUID longs to Java UUID object + return holder.getUuid(); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java index 8a78add11c..3f60ca9223 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java @@ -50,8 +50,15 @@ public void writeExtension(Object value) { vector.setSafe(getPosition(), (ArrowBuf) value); } else if (value instanceof java.util.UUID) { vector.setSafe(getPosition(), (java.util.UUID) value); + } else if (value instanceof ExtensionHolder) { + write((ExtensionHolder) value); } else { - throw new IllegalArgumentException("Unsupported value type for UUID: " + value.getClass()); + throw new IllegalArgumentException( + "Unsupported value type for UUID: " + + value.getClass().getName() + + ". " + + "Supported types are: byte[] (16 bytes), ArrowBuf (16 bytes), or java.util.UUID. " + + "Convert your value to one of these types before writing."); } vector.setValueCount(getPosition() + 1); } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java index e5398d82cf..ee2d95385b 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java @@ -16,20 +16,35 @@ */ package org.apache.arrow.vector.holders; -import org.apache.arrow.memory.ArrowBuf; +import java.util.UUID; /** * Value holder for nullable UUID values. * *

The {@code isSet} field controls nullability: when {@code isSet = 1}, the holder contains a - * valid UUID in {@code buffer}; when {@code isSet = 0}, the holder represents a null value and - * {@code buffer} should not be accessed. + * valid UUID represented as two longs; when {@code isSet = 0}, the holder represents a null value + * and the long fields should not be accessed. * * @see UuidHolder * @see org.apache.arrow.vector.UuidVector * @see org.apache.arrow.vector.extension.UuidType */ public class NullableUuidHolder extends ExtensionHolder { - /** Buffer containing 16-byte UUID data. */ - public ArrowBuf buffer; + /** The most significant 64 bits of the UUID. */ + public long mostSigBits; + + /** The least significant 64 bits of the UUID. */ + public long leastSigBits; + + /** + * Converts the holder's two longs to a UUID object. + * + * @return the UUID represented by this holder, or null if isSet is 0 + */ + public UUID getUuid() { + if (this.isSet == 0) { + return null; + } + return new UUID(mostSigBits, leastSigBits); + } } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java index 484e05c24b..2204f13425 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java @@ -16,23 +16,35 @@ */ package org.apache.arrow.vector.holders; -import org.apache.arrow.memory.ArrowBuf; +import java.util.UUID; /** * Value holder for non-nullable UUID values. * - *

Contains a 16-byte UUID in {@code buffer} with {@code isSet} always 1. + *

Contains a 16-byte UUID represented as two longs with {@code isSet} always 1. * * @see NullableUuidHolder * @see org.apache.arrow.vector.UuidVector * @see org.apache.arrow.vector.extension.UuidType */ public class UuidHolder extends ExtensionHolder { - /** Buffer containing 16-byte UUID data. */ - public ArrowBuf buffer; + /** The most significant 64 bits of the UUID. */ + public long mostSigBits; + + /** The least significant 64 bits of the UUID. */ + public long leastSigBits; /** Constructs a UuidHolder with isSet = 1. */ public UuidHolder() { this.isSet = 1; } + + /** + * Converts the holder's two longs to a UUID object. + * + * @return the UUID represented by this holder + */ + public UUID getUuid() { + return new UUID(mostSigBits, leastSigBits); + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 41a95a8d11..6104f4e82b 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -49,7 +49,6 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1259,12 +1258,12 @@ public void testListVectorReaderForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = holder.getUuid(); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = holder.getUuid(); assertEquals(u2, actualUuid); } } @@ -1300,12 +1299,12 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = holder.getUuid(); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = holder.getUuid(); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index df8f338f45..ba460a10a4 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -50,7 +50,6 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1304,12 +1303,12 @@ public void testMapVectorWithExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = holder.getUuid(); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = holder.getUuid(); assertEquals(u2, actualUuid); } } @@ -1349,12 +1348,12 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = holder.getUuid(); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = holder.getUuid(); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java index 9f7c65b82b..bbf5620f32 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java @@ -233,7 +233,8 @@ void testVectorByteArrayOperations() { // Verify the bytes match byte[] actualBytes = new byte[UuidType.UUID_BYTE_WIDTH]; - uuidVector.get(0).getBytes(0, actualBytes); + int offset = uuidVector.getStartOffset(0); + uuidVector.getDataBuffer().getBytes(offset, actualBytes); assertArrayEquals(uuidBytes, actualBytes); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java index 3d70238ece..01a4efd7d0 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java @@ -22,11 +22,11 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.nio.ByteBuffer; import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.impl.NullableUuidHolderReaderImpl; import org.apache.arrow.vector.complex.impl.UuidReaderImpl; import org.apache.arrow.vector.complex.impl.UuidWriterImpl; import org.apache.arrow.vector.extension.UuidType; @@ -60,21 +60,14 @@ void testWriteToExtensionVector() throws Exception { try (UuidVector vector = new UuidVector("test", allocator); UuidWriterImpl writer = new UuidWriterImpl(vector)) { UUID uuid = UUID.randomUUID(); - ByteBuffer bb = ByteBuffer.allocate(UuidType.UUID_BYTE_WIDTH); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - // Allocate ArrowBuf for the holder - try (ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { - buf.setBytes(0, bb.array()); - - UuidHolder holder = new UuidHolder(); - holder.buffer = buf; + UuidHolder holder = new UuidHolder(); + holder.mostSigBits = uuid.getMostSignificantBits(); + holder.leastSigBits = uuid.getLeastSignificantBits(); - writer.write(holder); - UUID result = vector.getObject(0); - assertEquals(uuid, result); - } + writer.write(holder); + UUID result = vector.getObject(0); + assertEquals(uuid, result); } } @@ -135,8 +128,8 @@ void testWriteExtensionWithUnsupportedType() throws Exception { IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> writer.writeExtension("invalid-type")); - assertEquals( - "Unsupported value type for UUID: class java.lang.String", exception.getMessage()); + assertTrue( + exception.getMessage().contains("Unsupported value type for UUID: java.lang.String")); } } @@ -165,14 +158,12 @@ void testWriteExtensionMultipleValues() throws Exception { @Test void testWriteWithUuidHolder() throws Exception { try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector); - ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { + UuidWriterImpl writer = new UuidWriterImpl(vector)) { UUID uuid = UUID.randomUUID(); - byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); - buf.setBytes(0, uuidBytes); UuidHolder holder = new UuidHolder(); - holder.buffer = buf; + holder.mostSigBits = uuid.getMostSignificantBits(); + holder.leastSigBits = uuid.getLeastSignificantBits(); holder.isSet = 1; writer.setPosition(0); @@ -187,14 +178,12 @@ void testWriteWithUuidHolder() throws Exception { @Test void testWriteWithNullableUuidHolder() throws Exception { try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector); - ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { + UuidWriterImpl writer = new UuidWriterImpl(vector)) { UUID uuid = UUID.randomUUID(); - byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); - buf.setBytes(0, uuidBytes); NullableUuidHolder holder = new NullableUuidHolder(); - holder.buffer = buf; + holder.mostSigBits = uuid.getMostSignificantBits(); + holder.leastSigBits = uuid.getLeastSignificantBits(); holder.isSet = 1; writer.setPosition(0); @@ -236,7 +225,7 @@ void testReaderCopyAsValueExtensionVector() throws Exception { UuidReaderImpl reader2 = (UuidReaderImpl) vector.getReader(); UuidHolder holder = new UuidHolder(); reader2.read(0, holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = holder.getUuid(); assertEquals(uuid, actualUuid); } } @@ -254,7 +243,7 @@ void testReaderReadWithUuidHolder() throws Exception { UuidHolder holder = new UuidHolder(); reader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = holder.getUuid(); assertEquals(uuid, actualUuid); assertEquals(1, holder.isSet); } @@ -273,7 +262,7 @@ void testReaderReadWithNullableUuidHolder() throws Exception { NullableUuidHolder holder = new NullableUuidHolder(); reader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = holder.getUuid(); assertEquals(uuid, actualUuid); assertEquals(1, holder.isSet); } @@ -312,7 +301,7 @@ void testReaderReadWithArrayIndexUuidHolder() throws Exception { UuidHolder holder = new UuidHolder(); reader.read(1, holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = holder.getUuid(); assertEquals(uuid2, actualUuid); assertEquals(1, holder.isSet); } @@ -333,7 +322,7 @@ void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { NullableUuidHolder holder1 = new NullableUuidHolder(); reader.read(0, holder1); - assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, 0)); + assertEquals(uuid1, holder1.getUuid()); assertEquals(1, holder1.isSet); NullableUuidHolder holder2 = new NullableUuidHolder(); @@ -342,7 +331,7 @@ void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { NullableUuidHolder holder3 = new NullableUuidHolder(); reader.read(2, holder3); - assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, 0)); + assertEquals(uuid2, holder3.getUuid()); assertEquals(1, holder3.isSet); } } @@ -448,4 +437,272 @@ void testReaderGetField() throws Exception { assertEquals("test", reader.getField().getName()); } } + + @Test + void testHolderStartOffsetWithMultipleValues() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setSafe(1, uuid2); + vector.setSafe(2, uuid3); + vector.setValueCount(3); + + // Test UuidHolder with different indices + UuidHolder holder = new UuidHolder(); + vector.get(0, holder); + assertEquals(uuid1, holder.getUuid()); + + vector.get(1, holder); + assertEquals(uuid2, holder.getUuid()); + + vector.get(2, holder); + assertEquals(uuid3, holder.getUuid()); + } + } + + @Test + void testNullableHolderStartOffsetWithMultipleValues() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setNull(1); + vector.setSafe(2, uuid2); + vector.setValueCount(3); + + // Test NullableUuidHolder with different indices + NullableUuidHolder holder = new NullableUuidHolder(); + vector.get(0, holder); + assertEquals(1, holder.isSet); + assertEquals(uuid1, holder.getUuid()); + + vector.get(1, holder); + assertEquals(0, holder.isSet); + + vector.get(2, holder); + assertEquals(1, holder.isSet); + assertEquals(uuid2, holder.getUuid()); + } + } + + @Test + void testSetFromHolderWithStartOffset() throws Exception { + try (UuidVector sourceVector = new UuidVector("source", allocator); + UuidVector targetVector = new UuidVector("target", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + sourceVector.setSafe(0, uuid1); + sourceVector.setSafe(1, uuid2); + sourceVector.setSafe(2, uuid3); + sourceVector.setValueCount(3); + + // Get holder from index 1 + UuidHolder holder = new UuidHolder(); + sourceVector.get(1, holder); + + // Set target vector using holder + targetVector.setSafe(0, holder); + targetVector.setValueCount(1); + + // Verify the value was copied correctly + assertEquals(uuid2, targetVector.getObject(0)); + } + } + + @Test + void testSetFromNullableHolderWithStartOffset() throws Exception { + try (UuidVector sourceVector = new UuidVector("source", allocator); + UuidVector targetVector = new UuidVector("target", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + sourceVector.setSafe(0, uuid1); + sourceVector.setNull(1); + sourceVector.setSafe(2, uuid2); + sourceVector.setValueCount(3); + + // Get holder from index 2 + NullableUuidHolder holder = new NullableUuidHolder(); + sourceVector.get(2, holder); + assertEquals(1, holder.isSet); + + // Set target vector using holder + targetVector.setSafe(0, holder); + targetVector.setValueCount(1); + + // Verify the value was copied correctly + assertEquals(uuid2, targetVector.getObject(0)); + + // Test with null holder + NullableUuidHolder nullHolder = new NullableUuidHolder(); + sourceVector.get(1, nullHolder); + assertEquals(0, nullHolder.isSet); + + targetVector.setSafe(1, nullHolder); + targetVector.setValueCount(2); + assertTrue(targetVector.isNull(1)); + } + } + + @Test + void testGetStartOffset() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + vector.allocateNew(10); + + // Test getStartOffset for various indices + assertEquals(0, vector.getStartOffset(0)); + assertEquals(16, vector.getStartOffset(1)); + assertEquals(32, vector.getStartOffset(2)); + assertEquals(48, vector.getStartOffset(3)); + assertEquals(160, vector.getStartOffset(10)); + } + } + + @Test + void testReaderWithStartOffsetMultipleReads() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setSafe(1, uuid2); + vector.setSafe(2, uuid3); + vector.setValueCount(3); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + UuidHolder holder = new UuidHolder(); + + // Read from different positions + reader.read(0, holder); + assertEquals(uuid1, holder.getUuid()); + + reader.read(1, holder); + assertEquals(uuid2, holder.getUuid()); + + reader.read(2, holder); + assertEquals(uuid3, holder.getUuid()); + } + } + + @Test + void testWriterWithExtensionHolder() throws Exception { + try (UuidVector sourceVector = new UuidVector("source", allocator); + UuidVector targetVector = new UuidVector("target", allocator)) { + UUID uuid = UUID.randomUUID(); + sourceVector.setSafe(0, uuid); + sourceVector.setValueCount(1); + + // Get holder from source + UuidHolder holder = new UuidHolder(); + sourceVector.get(0, holder); + + // Write using UuidWriterImpl with ExtensionHolder + UuidWriterImpl writer = new UuidWriterImpl(targetVector); + writer.setPosition(0); + writer.writeExtension(holder); + + assertEquals(uuid, targetVector.getObject(0)); + } + } + + @Test + void testNullableUuidHolderReaderImpl() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setValueCount(1); + + // Get holder from vector + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(0, sourceHolder); + assertEquals(1, sourceHolder.isSet); + + // Create reader from holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + assertTrue(reader.isSet()); + assertEquals(uuid, reader.readObject()); + + // Read into another holder + NullableUuidHolder targetHolder = new NullableUuidHolder(); + reader.read(targetHolder); + assertEquals(1, targetHolder.isSet); + assertEquals(uuid, targetHolder.getUuid()); + } + } + + @Test + void testNullableUuidHolderReaderImplWithNull() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + vector.setNull(0); + vector.setValueCount(1); + + // Get null holder from vector + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(0, sourceHolder); + assertEquals(0, sourceHolder.isSet); + + // Create reader from null holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); + + // Read into another holder + NullableUuidHolder targetHolder = new NullableUuidHolder(); + reader.read(targetHolder); + assertEquals(0, targetHolder.isSet); + } + } + + @Test + void testNullableUuidHolderReaderImplReadIntoUuidHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setValueCount(1); + + // Get holder from vector + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(0, sourceHolder); + + // Create reader from holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + + // Read into UuidHolder (non-nullable) + UuidHolder targetHolder = new UuidHolder(); + reader.read(targetHolder); + assertEquals(uuid, targetHolder.getUuid()); + } + } + + @Test + void testNullableUuidHolderReaderImplWithNonZeroStart() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + vector.setSafe(0, uuid1); + vector.setSafe(1, uuid2); + vector.setValueCount(2); + + // Get holder from index 1 + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(1, sourceHolder); + assertEquals(1, sourceHolder.isSet); + + // Create reader from holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + assertEquals(uuid2, reader.readObject()); + + // Read into another holder + NullableUuidHolder targetHolder = new NullableUuidHolder(); + reader.read(targetHolder); + assertEquals(uuid2, targetHolder.getUuid()); + } + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 871a3cc461..3b6c9b8932 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -106,7 +106,6 @@ import org.apache.arrow.vector.util.JsonStringHashMap; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -2522,7 +2521,7 @@ public void extensionWriterReader() throws Exception { uuidReader.setPosition(0); UuidHolder uuidHolder = new UuidHolder(); uuidReader.read(uuidHolder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(uuidHolder.buffer, 0); + UUID actualUuid = uuidHolder.getUuid(); assertEquals(u1, actualUuid); assertTrue(uuidReader.isSet()); assertEquals(uuidReader.getMinorType(), MinorType.EXTENSIONTYPE);