From e35ed7c3585e8cf681c4e11aa9d91a7c7b041e55 Mon Sep 17 00:00:00 2001 From: jhrotko Date: Wed, 7 Jan 2026 18:04:50 +0000 Subject: [PATCH 1/5] GH-948: Use buffer indexing for UUID vector --- .../org/apache/arrow/vector/UuidVector.java | 111 +++---- .../impl/NullableUuidHolderReaderImpl.java | 124 +++++++ .../vector/complex/impl/UuidWriterImpl.java | 9 +- .../vector/holders/NullableUuidHolder.java | 3 + .../arrow/vector/holders/UuidHolder.java | 3 + .../apache/arrow/vector/TestListVector.java | 8 +- .../apache/arrow/vector/TestMapVector.java | 8 +- .../org/apache/arrow/vector/TestUuidType.java | 3 +- .../apache/arrow/vector/TestUuidVector.java | 314 +++++++++++++++++- 9 files changed, 507 insertions(+), 76 deletions(-) create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java diff --git a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java index c662a6e06..128e9cc66 100644 --- a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java @@ -23,10 +23,13 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.complex.impl.UuidReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.holders.ExtensionHolder; import org.apache.arrow.vector.holders.NullableUuidHolder; import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.pojo.Field; @@ -132,7 +135,8 @@ public int hashCode(int index) { @Override public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); + int start = this.getStartOffset(index); + return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, start + UUID_BYTE_WIDTH); } /** @@ -146,17 +150,19 @@ public int isSet(int index) { } /** - * Gets the UUID value at the given index as an ArrowBuf. + * Reads the UUID value at the given index into a UuidHolder. * - * @param index the index to retrieve - * @return a buffer slice containing the 16-byte UUID - * @throws IllegalStateException if the value at the index is null and null checking is enabled + * @param index the index to read from + * @param holder the holder to populate with the UUID data */ - public ArrowBuf get(int index) throws IllegalStateException { + public void get(int index, UuidHolder holder) { + Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector."); if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); + holder.isSet = 0; } else { - return getBufferSlicePostNullCheck(index); + holder.isSet = 1; + holder.buffer = getDataBuffer(); + holder.start = getStartOffset(index); } } @@ -167,23 +173,24 @@ public ArrowBuf get(int index) throws IllegalStateException { * @param holder the holder to populate with the UUID data */ public void get(int index, NullableUuidHolder holder) { + Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector."); if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { holder.isSet = 0; } else { holder.isSet = 1; - holder.buffer = getBufferSlicePostNullCheck(index); + holder.buffer = getDataBuffer(); + holder.start = getStartOffset(index); } } /** - * Reads the UUID value at the given index into a UuidHolder. + * Calculates the byte offset for a given index in the data buffer. * - * @param index the index to read from - * @param holder the holder to populate with the UUID data + * @param index the index of the UUID value + * @return the byte offset in the data buffer */ - public void get(int index, UuidHolder holder) { - holder.isSet = 1; - holder.buffer = getBufferSlicePostNullCheck(index); + public final int getStartOffset(int index) { + return index * UUID_BYTE_WIDTH; } /** @@ -207,7 +214,7 @@ public void set(int index, UUID value) { * @param holder the holder containing the UUID data */ public void set(int index, UuidHolder holder) { - this.set(index, holder.isSet, holder.buffer); + this.set(index, holder.buffer, holder.start); } /** @@ -217,28 +224,11 @@ public void set(int index, UuidHolder holder) { * @param holder the holder containing the UUID data */ public void set(int index, NullableUuidHolder holder) { - this.set(index, holder.isSet, holder.buffer); - } - - /** - * Sets the UUID value at the given index with explicit null flag. - * - * @param index the index to set - * @param isSet 1 if the value is set, 0 if null - * @param buffer the buffer containing the 16-byte UUID data - */ - public void set(int index, int isSet, ArrowBuf buffer) { - getUnderlyingVector().set(index, isSet, buffer); - } - - /** - * Sets the UUID value at the given index from an ArrowBuf. - * - * @param index the index to set - * @param value the buffer containing the 16-byte UUID data - */ - public void set(int index, ArrowBuf value) { - getUnderlyingVector().set(index, value); + if (holder.isSet == 0) { + getUnderlyingVector().setNull(index); + } else { + this.set(index, holder.buffer, holder.start); + } } /** @@ -249,10 +239,12 @@ public void set(int index, ArrowBuf value) { * @param sourceOffset the offset in the source buffer where the UUID data starts */ public void set(int index, ArrowBuf source, int sourceOffset) { - // Copy bytes from source buffer to target vector data buffer - ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer(); - dataBuffer.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH); - getUnderlyingVector().setIndexDefined(index); + Preconditions.checkNotNull(source, "Cannot set UUID vector, the source buffer is null."); + + BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index); + getUnderlyingVector() + .getDataBuffer() + .setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH); } /** @@ -286,10 +278,10 @@ public void setSafe(int index, UUID value) { * @param holder the holder containing the UUID data, or null to set a null value */ public void setSafe(int index, NullableUuidHolder holder) { - if (holder != null) { - getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer); - } else { + if (holder == null || holder.isSet == 0) { getUnderlyingVector().setNull(index); + } else { + this.setSafe(index, holder.buffer, holder.start); } } @@ -297,14 +289,23 @@ public void setSafe(int index, NullableUuidHolder holder) { * Sets the UUID value at the given index from a UuidHolder, expanding capacity if needed. * * @param index the index to set - * @param holder the holder containing the UUID data, or null to set a null value + * @param holder the holder containing the UUID data */ public void setSafe(int index, UuidHolder holder) { - if (holder != null) { - getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer); - } else { - getUnderlyingVector().setNull(index); - } + this.setSafe(index, holder.buffer, holder.start); + } + + /** + * Sets the UUID value at the given index by copying from a source buffer, expanding capacity if + * needed. + * + * @param index the index to set + * @param buffer the source buffer to copy from + * @param start the offset in the source buffer where the UUID data starts + */ + public void setSafe(int index, ArrowBuf buffer, int start) { + getUnderlyingVector().handleSafe(index); + this.set(index, buffer, start); } /** @@ -400,15 +401,9 @@ public TransferPair getTransferPair(BufferAllocator allocator) { return getTransferPair(this.getField().getName(), allocator); } - private ArrowBuf getBufferSlicePostNullCheck(int index) { - return getUnderlyingVector() - .getDataBuffer() - .slice((long) index * UUID_BYTE_WIDTH, UUID_BYTE_WIDTH); - } - @Override public int getTypeWidth() { - return getUnderlyingVector().getTypeWidth(); + return UUID_BYTE_WIDTH; } /** {@link TransferPair} for {@link UuidVector}. */ diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java new file mode 100644 index 000000000..099ac455e --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.UuidHolder; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.util.UuidUtility; + +/** + * Reader implementation for reading UUID values from a {@link NullableUuidHolder}. + * + *

This reader wraps a single UUID holder value and provides methods to read from it. Unlike + * {@link UuidReaderImpl} which reads from a vector, this reader operates on a holder instance. + * + * @see NullableUuidHolder + * @see UuidReaderImpl + */ +public class NullableUuidHolderReaderImpl extends AbstractFieldReader { + private final NullableUuidHolder holder; + + /** + * Constructs a reader for the given UUID holder. + * + * @param holder the UUID holder to read from + */ + public NullableUuidHolderReaderImpl(NullableUuidHolder holder) { + this.holder = holder; + } + + @Override + public int size() { + throw new UnsupportedOperationException( + "size() is not supported on NullableUuidHolderReaderImpl. " + + "This reader wraps a single UUID holder value, not a collection. " + + "Use UuidReaderImpl for vector-based UUID reading."); + } + + @Override + public boolean next() { + throw new UnsupportedOperationException( + "next() is not supported on NullableUuidHolderReaderImpl. " + + "This reader wraps a single UUID holder value, not an iterator. " + + "Use UuidReaderImpl for vector-based UUID reading."); + } + + @Override + public void setPosition(int index) { + throw new UnsupportedOperationException( + "setPosition() is not supported on NullableUuidHolderReaderImpl. " + + "This reader wraps a single UUID holder value, not a vector. " + + "Use UuidReaderImpl for vector-based UUID reading."); + } + + @Override + public Types.MinorType getMinorType() { + return Types.MinorType.EXTENSIONTYPE; + } + + @Override + public boolean isSet() { + return holder.isSet == 1; + } + + @Override + public void read(ExtensionHolder h) { + if (h instanceof NullableUuidHolder) { + NullableUuidHolder nullableHolder = (NullableUuidHolder) h; + nullableHolder.buffer = this.holder.buffer; + nullableHolder.isSet = this.holder.isSet; + nullableHolder.start = this.holder.start; + } else if (h instanceof UuidHolder) { + UuidHolder uuidHolder = (UuidHolder) h; + uuidHolder.buffer = this.holder.buffer; + uuidHolder.start = this.holder.start; + } else { + throw new IllegalArgumentException( + "Unsupported holder type: " + + h.getClass().getName() + + ". " + + "Only NullableUuidHolder and UuidHolder are supported for UUID values. " + + "Provided holder type cannot be used to read UUID data."); + } + } + + @Override + public Object readObject() { + if (!isSet()) { + return null; + } + // Convert UUID bytes to Java UUID object + try { + return UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + } catch (Exception e) { + throw new RuntimeException( + String.format( + "Failed to read UUID from buffer. Invalid Arrow buffer state: " + + "capacity=%d, readableBytes=%d, readerIndex=%d, writerIndex=%d, refCnt=%d. " + + "The buffer must contain exactly 16 bytes of valid UUID data.", + holder.buffer.capacity(), + holder.buffer.readableBytes(), + holder.buffer.readerIndex(), + holder.buffer.writerIndex(), + holder.buffer.refCnt()), + e); + } + } +} + diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java index 8a78add11..3f60ca922 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java @@ -50,8 +50,15 @@ public void writeExtension(Object value) { vector.setSafe(getPosition(), (ArrowBuf) value); } else if (value instanceof java.util.UUID) { vector.setSafe(getPosition(), (java.util.UUID) value); + } else if (value instanceof ExtensionHolder) { + write((ExtensionHolder) value); } else { - throw new IllegalArgumentException("Unsupported value type for UUID: " + value.getClass()); + throw new IllegalArgumentException( + "Unsupported value type for UUID: " + + value.getClass().getName() + + ". " + + "Supported types are: byte[] (16 bytes), ArrowBuf (16 bytes), or java.util.UUID. " + + "Convert your value to one of these types before writing."); } vector.setValueCount(getPosition() + 1); } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java index e5398d82c..ea93b5adf 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java @@ -32,4 +32,7 @@ public class NullableUuidHolder extends ExtensionHolder { /** Buffer containing 16-byte UUID data. */ public ArrowBuf buffer; + + /** Offset in the buffer where the UUID data starts. */ + public int start = 0; } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java index 484e05c24..8bcac90c3 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java @@ -31,6 +31,9 @@ public class UuidHolder extends ExtensionHolder { /** Buffer containing 16-byte UUID data. */ public ArrowBuf buffer; + /** Offset in the buffer where the UUID data starts. */ + public int start = 0; + /** Constructs a UuidHolder with isSet = 1. */ public UuidHolder() { this.isSet = 1; diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 41a95a8d1..73146b75e 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1259,12 +1259,12 @@ public void testListVectorReaderForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u2, actualUuid); } } @@ -1300,12 +1300,12 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index df8f338f4..7262229f2 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -1304,12 +1304,12 @@ public void testMapVectorWithExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u2, actualUuid); } } @@ -1349,12 +1349,12 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java index 9f7c65b82..bbf5620f3 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java @@ -233,7 +233,8 @@ void testVectorByteArrayOperations() { // Verify the bytes match byte[] actualBytes = new byte[UuidType.UUID_BYTE_WIDTH]; - uuidVector.get(0).getBytes(0, actualBytes); + int offset = uuidVector.getStartOffset(0); + uuidVector.getDataBuffer().getBytes(offset, actualBytes); assertArrayEquals(uuidBytes, actualBytes); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java index 3d70238ec..f22b53583 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java @@ -27,6 +27,7 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.impl.NullableUuidHolderReaderImpl; import org.apache.arrow.vector.complex.impl.UuidReaderImpl; import org.apache.arrow.vector.complex.impl.UuidWriterImpl; import org.apache.arrow.vector.extension.UuidType; @@ -135,8 +136,8 @@ void testWriteExtensionWithUnsupportedType() throws Exception { IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> writer.writeExtension("invalid-type")); - assertEquals( - "Unsupported value type for UUID: class java.lang.String", exception.getMessage()); + assertTrue( + exception.getMessage().contains("Unsupported value type for UUID: java.lang.String")); } } @@ -236,7 +237,7 @@ void testReaderCopyAsValueExtensionVector() throws Exception { UuidReaderImpl reader2 = (UuidReaderImpl) vector.getReader(); UuidHolder holder = new UuidHolder(); reader2.read(0, holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(uuid, actualUuid); } } @@ -254,7 +255,7 @@ void testReaderReadWithUuidHolder() throws Exception { UuidHolder holder = new UuidHolder(); reader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(uuid, actualUuid); assertEquals(1, holder.isSet); } @@ -273,7 +274,7 @@ void testReaderReadWithNullableUuidHolder() throws Exception { NullableUuidHolder holder = new NullableUuidHolder(); reader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(uuid, actualUuid); assertEquals(1, holder.isSet); } @@ -312,7 +313,7 @@ void testReaderReadWithArrayIndexUuidHolder() throws Exception { UuidHolder holder = new UuidHolder(); reader.read(1, holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(uuid2, actualUuid); assertEquals(1, holder.isSet); } @@ -333,7 +334,7 @@ void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { NullableUuidHolder holder1 = new NullableUuidHolder(); reader.read(0, holder1); - assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, 0)); + assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, holder1.start)); assertEquals(1, holder1.isSet); NullableUuidHolder holder2 = new NullableUuidHolder(); @@ -342,7 +343,7 @@ void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { NullableUuidHolder holder3 = new NullableUuidHolder(); reader.read(2, holder3); - assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, 0)); + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, holder3.start)); assertEquals(1, holder3.isSet); } } @@ -448,4 +449,301 @@ void testReaderGetField() throws Exception { assertEquals("test", reader.getField().getName()); } } + + @Test + void testHolderStartOffsetWithMultipleValues() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setSafe(1, uuid2); + vector.setSafe(2, uuid3); + vector.setValueCount(3); + + // Test UuidHolder with different indices + UuidHolder holder1 = new UuidHolder(); + vector.get(0, holder1); + assertEquals(0, holder1.start); + assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, holder1.start)); + + UuidHolder holder2 = new UuidHolder(); + vector.get(1, holder2); + assertEquals(16, holder2.start); // UUID_BYTE_WIDTH = 16 + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder2.buffer, holder2.start)); + + UuidHolder holder3 = new UuidHolder(); + vector.get(2, holder3); + assertEquals(32, holder3.start); // 2 * UUID_BYTE_WIDTH = 32 + assertEquals(uuid3, UuidUtility.uuidFromArrowBuf(holder3.buffer, holder3.start)); + + // Verify all holders share the same buffer + assertEquals(holder1.buffer, holder2.buffer); + assertEquals(holder2.buffer, holder3.buffer); + } + } + + @Test + void testNullableHolderStartOffsetWithMultipleValues() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setNull(1); + vector.setSafe(2, uuid2); + vector.setValueCount(3); + + // Test NullableUuidHolder with different indices + NullableUuidHolder holder1 = new NullableUuidHolder(); + vector.get(0, holder1); + assertEquals(0, holder1.start); + assertEquals(1, holder1.isSet); + assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, holder1.start)); + + NullableUuidHolder holder2 = new NullableUuidHolder(); + vector.get(1, holder2); + assertEquals(16, holder2.start); // UUID_BYTE_WIDTH = 16 + assertEquals(0, holder2.isSet); + + NullableUuidHolder holder3 = new NullableUuidHolder(); + vector.get(2, holder3); + assertEquals(32, holder3.start); // 2 * UUID_BYTE_WIDTH = 32 + assertEquals(1, holder3.isSet); + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, holder3.start)); + + // Verify all holders share the same buffer + assertEquals(holder1.buffer, holder2.buffer); + assertEquals(holder2.buffer, holder3.buffer); + } + } + + @Test + void testSetFromHolderWithStartOffset() throws Exception { + try (UuidVector sourceVector = new UuidVector("source", allocator); + UuidVector targetVector = new UuidVector("target", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + sourceVector.setSafe(0, uuid1); + sourceVector.setSafe(1, uuid2); + sourceVector.setSafe(2, uuid3); + sourceVector.setValueCount(3); + + // Get holder from index 1 (should have start = 16) + UuidHolder holder = new UuidHolder(); + sourceVector.get(1, holder); + assertEquals(16, holder.start); + + // Set target vector using holder with non-zero start offset + targetVector.setSafe(0, holder); + targetVector.setValueCount(1); + + // Verify the value was copied correctly + assertEquals(uuid2, targetVector.getObject(0)); + } + } + + @Test + void testSetFromNullableHolderWithStartOffset() throws Exception { + try (UuidVector sourceVector = new UuidVector("source", allocator); + UuidVector targetVector = new UuidVector("target", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + sourceVector.setSafe(0, uuid1); + sourceVector.setNull(1); + sourceVector.setSafe(2, uuid2); + sourceVector.setValueCount(3); + + // Get holder from index 2 (should have start = 32) + NullableUuidHolder holder = new NullableUuidHolder(); + sourceVector.get(2, holder); + assertEquals(32, holder.start); + assertEquals(1, holder.isSet); + + // Set target vector using holder with non-zero start offset + targetVector.setSafe(0, holder); + targetVector.setValueCount(1); + + // Verify the value was copied correctly + assertEquals(uuid2, targetVector.getObject(0)); + + // Test with null holder + NullableUuidHolder nullHolder = new NullableUuidHolder(); + sourceVector.get(1, nullHolder); + assertEquals(16, nullHolder.start); + assertEquals(0, nullHolder.isSet); + + targetVector.setSafe(1, nullHolder); + targetVector.setValueCount(2); + assertTrue(targetVector.isNull(1)); + } + } + + @Test + void testGetStartOffset() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + vector.allocateNew(10); + + // Test getStartOffset for various indices + assertEquals(0, vector.getStartOffset(0)); + assertEquals(16, vector.getStartOffset(1)); + assertEquals(32, vector.getStartOffset(2)); + assertEquals(48, vector.getStartOffset(3)); + assertEquals(160, vector.getStartOffset(10)); + } + } + + @Test + void testReaderWithStartOffsetMultipleReads() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setSafe(1, uuid2); + vector.setSafe(2, uuid3); + vector.setValueCount(3); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + UuidHolder holder = new UuidHolder(); + + // Read from different positions and verify start offset + reader.read(0, holder); + assertEquals(0, holder.start); + assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + + reader.read(1, holder); + assertEquals(16, holder.start); + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + + reader.read(2, holder); + assertEquals(32, holder.start); + assertEquals(uuid3, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + } + } + + @Test + void testWriterWithExtensionHolder() throws Exception { + try (UuidVector sourceVector = new UuidVector("source", allocator); + UuidVector targetVector = new UuidVector("target", allocator)) { + UUID uuid = UUID.randomUUID(); + sourceVector.setSafe(0, uuid); + sourceVector.setValueCount(1); + + // Get holder from source + UuidHolder holder = new UuidHolder(); + sourceVector.get(0, holder); + + // Write using UuidWriterImpl with ExtensionHolder + UuidWriterImpl writer = new UuidWriterImpl(targetVector); + writer.setPosition(0); + writer.writeExtension(holder); + + assertEquals(uuid, targetVector.getObject(0)); + } + } + + @Test + void testNullableUuidHolderReaderImpl() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setValueCount(1); + + // Get holder from vector + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(0, sourceHolder); + assertEquals(1, sourceHolder.isSet); + assertEquals(0, sourceHolder.start); + + // Create reader from holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + assertTrue(reader.isSet()); + assertEquals(uuid, reader.readObject()); + + // Read into another holder + NullableUuidHolder targetHolder = new NullableUuidHolder(); + reader.read(targetHolder); + assertEquals(1, targetHolder.isSet); + assertEquals(0, targetHolder.start); + assertEquals(uuid, UuidUtility.uuidFromArrowBuf(targetHolder.buffer, targetHolder.start)); + } + } + + @Test + void testNullableUuidHolderReaderImplWithNull() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + vector.setNull(0); + vector.setValueCount(1); + + // Get null holder from vector + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(0, sourceHolder); + assertEquals(0, sourceHolder.isSet); + + // Create reader from null holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); + + // Read into another holder + NullableUuidHolder targetHolder = new NullableUuidHolder(); + reader.read(targetHolder); + assertEquals(0, targetHolder.isSet); + } + } + + @Test + void testNullableUuidHolderReaderImplReadIntoUuidHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setValueCount(1); + + // Get holder from vector + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(0, sourceHolder); + + // Create reader from holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + + // Read into UuidHolder (non-nullable) + UuidHolder targetHolder = new UuidHolder(); + reader.read(targetHolder); + assertEquals(0, targetHolder.start); + assertEquals(uuid, UuidUtility.uuidFromArrowBuf(targetHolder.buffer, targetHolder.start)); + } + } + + @Test + void testNullableUuidHolderReaderImplWithNonZeroStart() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + vector.setSafe(0, uuid1); + vector.setSafe(1, uuid2); + vector.setValueCount(2); + + // Get holder from index 1 (start = 16) + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(1, sourceHolder); + assertEquals(1, sourceHolder.isSet); + assertEquals(16, sourceHolder.start); + + // Create reader from holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + assertEquals(uuid2, reader.readObject()); + + // Read into another holder and verify start is preserved + NullableUuidHolder targetHolder = new NullableUuidHolder(); + reader.read(targetHolder); + assertEquals(16, targetHolder.start); + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(targetHolder.buffer, targetHolder.start)); + } + } } From 421b5d4dc6de60cd763dabda08e281ae0c7f9573 Mon Sep 17 00:00:00 2001 From: jhrotko Date: Thu, 8 Jan 2026 18:44:58 +0000 Subject: [PATCH 2/5] Use MSB and LSB longs to represent UUID --- .../org/apache/arrow/vector/UuidVector.java | 23 ++-- .../impl/NullableUuidHolderReaderImpl.java | 28 ++--- .../vector/holders/NullableUuidHolder.java | 14 +-- .../arrow/vector/holders/UuidHolder.java | 12 +- .../apache/arrow/vector/TestListVector.java | 9 +- .../apache/arrow/vector/TestMapVector.java | 9 +- .../apache/arrow/vector/TestUuidVector.java | 109 ++++++------------ .../complex/writer/TestComplexWriter.java | 3 +- 8 files changed, 77 insertions(+), 130 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java index 128e9cc66..55b46fa3b 100644 --- a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java @@ -29,7 +29,6 @@ import org.apache.arrow.vector.complex.impl.UuidReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.extension.UuidType; -import org.apache.arrow.vector.holders.ExtensionHolder; import org.apache.arrow.vector.holders.NullableUuidHolder; import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.pojo.Field; @@ -161,8 +160,9 @@ public void get(int index, UuidHolder holder) { holder.isSet = 0; } else { holder.isSet = 1; - holder.buffer = getDataBuffer(); - holder.start = getStartOffset(index); + final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); + holder.mostSigBits = bb.getLong(); + holder.leastSigBits = bb.getLong(); } } @@ -178,8 +178,9 @@ public void get(int index, NullableUuidHolder holder) { holder.isSet = 0; } else { holder.isSet = 1; - holder.buffer = getDataBuffer(); - holder.start = getStartOffset(index); + final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); + holder.mostSigBits = bb.getLong(); + holder.leastSigBits = bb.getLong(); } } @@ -214,7 +215,8 @@ public void set(int index, UUID value) { * @param holder the holder containing the UUID data */ public void set(int index, UuidHolder holder) { - this.set(index, holder.buffer, holder.start); + UUID uuid = new UUID(holder.mostSigBits, holder.leastSigBits); + set(index, uuid); } /** @@ -227,7 +229,8 @@ public void set(int index, NullableUuidHolder holder) { if (holder.isSet == 0) { getUnderlyingVector().setNull(index); } else { - this.set(index, holder.buffer, holder.start); + UUID uuid = new UUID(holder.mostSigBits, holder.leastSigBits); + set(index, uuid); } } @@ -281,7 +284,8 @@ public void setSafe(int index, NullableUuidHolder holder) { if (holder == null || holder.isSet == 0) { getUnderlyingVector().setNull(index); } else { - this.setSafe(index, holder.buffer, holder.start); + UUID uuid = new UUID(holder.mostSigBits, holder.leastSigBits); + setSafe(index, uuid); } } @@ -292,7 +296,8 @@ public void setSafe(int index, NullableUuidHolder holder) { * @param holder the holder containing the UUID data */ public void setSafe(int index, UuidHolder holder) { - this.setSafe(index, holder.buffer, holder.start); + UUID uuid = new UUID(holder.mostSigBits, holder.leastSigBits); + setSafe(index, uuid); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java index 099ac455e..51c3e3273 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java @@ -16,11 +16,11 @@ */ package org.apache.arrow.vector.complex.impl; +import java.util.UUID; import org.apache.arrow.vector.holders.ExtensionHolder; import org.apache.arrow.vector.holders.NullableUuidHolder; import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.util.UuidUtility; /** * Reader implementation for reading UUID values from a {@link NullableUuidHolder}. @@ -81,13 +81,13 @@ public boolean isSet() { public void read(ExtensionHolder h) { if (h instanceof NullableUuidHolder) { NullableUuidHolder nullableHolder = (NullableUuidHolder) h; - nullableHolder.buffer = this.holder.buffer; + nullableHolder.mostSigBits = this.holder.mostSigBits; + nullableHolder.leastSigBits = this.holder.leastSigBits; nullableHolder.isSet = this.holder.isSet; - nullableHolder.start = this.holder.start; } else if (h instanceof UuidHolder) { UuidHolder uuidHolder = (UuidHolder) h; - uuidHolder.buffer = this.holder.buffer; - uuidHolder.start = this.holder.start; + uuidHolder.mostSigBits = this.holder.mostSigBits; + uuidHolder.leastSigBits = this.holder.leastSigBits; } else { throw new IllegalArgumentException( "Unsupported holder type: " @@ -103,22 +103,8 @@ public Object readObject() { if (!isSet()) { return null; } - // Convert UUID bytes to Java UUID object - try { - return UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); - } catch (Exception e) { - throw new RuntimeException( - String.format( - "Failed to read UUID from buffer. Invalid Arrow buffer state: " - + "capacity=%d, readableBytes=%d, readerIndex=%d, writerIndex=%d, refCnt=%d. " - + "The buffer must contain exactly 16 bytes of valid UUID data.", - holder.buffer.capacity(), - holder.buffer.readableBytes(), - holder.buffer.readerIndex(), - holder.buffer.writerIndex(), - holder.buffer.refCnt()), - e); - } + // Convert UUID longs to Java UUID object + return new UUID(holder.mostSigBits, holder.leastSigBits); } } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java index ea93b5adf..e01d68115 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java @@ -16,23 +16,21 @@ */ package org.apache.arrow.vector.holders; -import org.apache.arrow.memory.ArrowBuf; - /** * Value holder for nullable UUID values. * *

The {@code isSet} field controls nullability: when {@code isSet = 1}, the holder contains a - * valid UUID in {@code buffer}; when {@code isSet = 0}, the holder represents a null value and - * {@code buffer} should not be accessed. + * valid UUID represented as two longs; when {@code isSet = 0}, the holder represents a null value + * and the long fields should not be accessed. * * @see UuidHolder * @see org.apache.arrow.vector.UuidVector * @see org.apache.arrow.vector.extension.UuidType */ public class NullableUuidHolder extends ExtensionHolder { - /** Buffer containing 16-byte UUID data. */ - public ArrowBuf buffer; + /** The most significant 64 bits of the UUID. */ + public long mostSigBits; - /** Offset in the buffer where the UUID data starts. */ - public int start = 0; + /** The least significant 64 bits of the UUID. */ + public long leastSigBits; } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java index 8bcac90c3..77f70f738 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java @@ -16,23 +16,21 @@ */ package org.apache.arrow.vector.holders; -import org.apache.arrow.memory.ArrowBuf; - /** * Value holder for non-nullable UUID values. * - *

Contains a 16-byte UUID in {@code buffer} with {@code isSet} always 1. + *

Contains a 16-byte UUID represented as two longs with {@code isSet} always 1. * * @see NullableUuidHolder * @see org.apache.arrow.vector.UuidVector * @see org.apache.arrow.vector.extension.UuidType */ public class UuidHolder extends ExtensionHolder { - /** Buffer containing 16-byte UUID data. */ - public ArrowBuf buffer; + /** The most significant 64 bits of the UUID. */ + public long mostSigBits; - /** Offset in the buffer where the UUID data starts. */ - public int start = 0; + /** The least significant 64 bits of the UUID. */ + public long leastSigBits; /** Constructs a UuidHolder with isSet = 1. */ public UuidHolder() { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 73146b75e..6f139dee5 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -49,7 +49,6 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1259,12 +1258,12 @@ public void testListVectorReaderForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(u2, actualUuid); } } @@ -1300,12 +1299,12 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 7262229f2..24868dbb7 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -50,7 +50,6 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1304,12 +1303,12 @@ public void testMapVectorWithExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(u2, actualUuid); } } @@ -1349,12 +1348,12 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java index f22b53583..7c4408de1 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java @@ -22,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.nio.ByteBuffer; import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -61,21 +60,14 @@ void testWriteToExtensionVector() throws Exception { try (UuidVector vector = new UuidVector("test", allocator); UuidWriterImpl writer = new UuidWriterImpl(vector)) { UUID uuid = UUID.randomUUID(); - ByteBuffer bb = ByteBuffer.allocate(UuidType.UUID_BYTE_WIDTH); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - // Allocate ArrowBuf for the holder - try (ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { - buf.setBytes(0, bb.array()); - - UuidHolder holder = new UuidHolder(); - holder.buffer = buf; + UuidHolder holder = new UuidHolder(); + holder.mostSigBits = uuid.getMostSignificantBits(); + holder.leastSigBits = uuid.getLeastSignificantBits(); - writer.write(holder); - UUID result = vector.getObject(0); - assertEquals(uuid, result); - } + writer.write(holder); + UUID result = vector.getObject(0); + assertEquals(uuid, result); } } @@ -166,14 +158,12 @@ void testWriteExtensionMultipleValues() throws Exception { @Test void testWriteWithUuidHolder() throws Exception { try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector); - ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { + UuidWriterImpl writer = new UuidWriterImpl(vector)) { UUID uuid = UUID.randomUUID(); - byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); - buf.setBytes(0, uuidBytes); UuidHolder holder = new UuidHolder(); - holder.buffer = buf; + holder.mostSigBits = uuid.getMostSignificantBits(); + holder.leastSigBits = uuid.getLeastSignificantBits(); holder.isSet = 1; writer.setPosition(0); @@ -188,14 +178,12 @@ void testWriteWithUuidHolder() throws Exception { @Test void testWriteWithNullableUuidHolder() throws Exception { try (UuidVector vector = new UuidVector("test", allocator); - UuidWriterImpl writer = new UuidWriterImpl(vector); - ArrowBuf buf = allocator.buffer(UuidType.UUID_BYTE_WIDTH)) { + UuidWriterImpl writer = new UuidWriterImpl(vector)) { UUID uuid = UUID.randomUUID(); - byte[] uuidBytes = UuidUtility.getBytesFromUUID(uuid); - buf.setBytes(0, uuidBytes); NullableUuidHolder holder = new NullableUuidHolder(); - holder.buffer = buf; + holder.mostSigBits = uuid.getMostSignificantBits(); + holder.leastSigBits = uuid.getLeastSignificantBits(); holder.isSet = 1; writer.setPosition(0); @@ -237,7 +225,7 @@ void testReaderCopyAsValueExtensionVector() throws Exception { UuidReaderImpl reader2 = (UuidReaderImpl) vector.getReader(); UuidHolder holder = new UuidHolder(); reader2.read(0, holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(uuid, actualUuid); } } @@ -255,7 +243,7 @@ void testReaderReadWithUuidHolder() throws Exception { UuidHolder holder = new UuidHolder(); reader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(uuid, actualUuid); assertEquals(1, holder.isSet); } @@ -274,7 +262,7 @@ void testReaderReadWithNullableUuidHolder() throws Exception { NullableUuidHolder holder = new NullableUuidHolder(); reader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(uuid, actualUuid); assertEquals(1, holder.isSet); } @@ -313,7 +301,7 @@ void testReaderReadWithArrayIndexUuidHolder() throws Exception { UuidHolder holder = new UuidHolder(); reader.read(1, holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); assertEquals(uuid2, actualUuid); assertEquals(1, holder.isSet); } @@ -334,7 +322,7 @@ void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { NullableUuidHolder holder1 = new NullableUuidHolder(); reader.read(0, holder1); - assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, holder1.start)); + assertEquals(uuid1, new UUID(holder1.mostSigBits, holder1.leastSigBits)); assertEquals(1, holder1.isSet); NullableUuidHolder holder2 = new NullableUuidHolder(); @@ -343,7 +331,7 @@ void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { NullableUuidHolder holder3 = new NullableUuidHolder(); reader.read(2, holder3); - assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, holder3.start)); + assertEquals(uuid2, new UUID(holder3.mostSigBits, holder3.leastSigBits)); assertEquals(1, holder3.isSet); } } @@ -465,22 +453,15 @@ void testHolderStartOffsetWithMultipleValues() throws Exception { // Test UuidHolder with different indices UuidHolder holder1 = new UuidHolder(); vector.get(0, holder1); - assertEquals(0, holder1.start); - assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, holder1.start)); + assertEquals(uuid1, new UUID(holder1.mostSigBits, holder1.leastSigBits)); UuidHolder holder2 = new UuidHolder(); vector.get(1, holder2); - assertEquals(16, holder2.start); // UUID_BYTE_WIDTH = 16 - assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder2.buffer, holder2.start)); + assertEquals(uuid2, new UUID(holder2.mostSigBits, holder2.leastSigBits)); UuidHolder holder3 = new UuidHolder(); vector.get(2, holder3); - assertEquals(32, holder3.start); // 2 * UUID_BYTE_WIDTH = 32 - assertEquals(uuid3, UuidUtility.uuidFromArrowBuf(holder3.buffer, holder3.start)); - - // Verify all holders share the same buffer - assertEquals(holder1.buffer, holder2.buffer); - assertEquals(holder2.buffer, holder3.buffer); + assertEquals(uuid3, new UUID(holder3.mostSigBits, holder3.leastSigBits)); } } @@ -498,24 +479,17 @@ void testNullableHolderStartOffsetWithMultipleValues() throws Exception { // Test NullableUuidHolder with different indices NullableUuidHolder holder1 = new NullableUuidHolder(); vector.get(0, holder1); - assertEquals(0, holder1.start); assertEquals(1, holder1.isSet); - assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, holder1.start)); + assertEquals(uuid1, new UUID(holder1.mostSigBits, holder1.leastSigBits)); NullableUuidHolder holder2 = new NullableUuidHolder(); vector.get(1, holder2); - assertEquals(16, holder2.start); // UUID_BYTE_WIDTH = 16 assertEquals(0, holder2.isSet); NullableUuidHolder holder3 = new NullableUuidHolder(); vector.get(2, holder3); - assertEquals(32, holder3.start); // 2 * UUID_BYTE_WIDTH = 32 assertEquals(1, holder3.isSet); - assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, holder3.start)); - - // Verify all holders share the same buffer - assertEquals(holder1.buffer, holder2.buffer); - assertEquals(holder2.buffer, holder3.buffer); + assertEquals(uuid2, new UUID(holder3.mostSigBits, holder3.leastSigBits)); } } @@ -532,12 +506,11 @@ void testSetFromHolderWithStartOffset() throws Exception { sourceVector.setSafe(2, uuid3); sourceVector.setValueCount(3); - // Get holder from index 1 (should have start = 16) + // Get holder from index 1 UuidHolder holder = new UuidHolder(); sourceVector.get(1, holder); - assertEquals(16, holder.start); - // Set target vector using holder with non-zero start offset + // Set target vector using holder targetVector.setSafe(0, holder); targetVector.setValueCount(1); @@ -558,13 +531,12 @@ void testSetFromNullableHolderWithStartOffset() throws Exception { sourceVector.setSafe(2, uuid2); sourceVector.setValueCount(3); - // Get holder from index 2 (should have start = 32) + // Get holder from index 2 NullableUuidHolder holder = new NullableUuidHolder(); sourceVector.get(2, holder); - assertEquals(32, holder.start); assertEquals(1, holder.isSet); - // Set target vector using holder with non-zero start offset + // Set target vector using holder targetVector.setSafe(0, holder); targetVector.setValueCount(1); @@ -574,7 +546,6 @@ void testSetFromNullableHolderWithStartOffset() throws Exception { // Test with null holder NullableUuidHolder nullHolder = new NullableUuidHolder(); sourceVector.get(1, nullHolder); - assertEquals(16, nullHolder.start); assertEquals(0, nullHolder.isSet); targetVector.setSafe(1, nullHolder); @@ -612,18 +583,15 @@ void testReaderWithStartOffsetMultipleReads() throws Exception { UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); UuidHolder holder = new UuidHolder(); - // Read from different positions and verify start offset + // Read from different positions reader.read(0, holder); - assertEquals(0, holder.start); - assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + assertEquals(uuid1, new UUID(holder.mostSigBits, holder.leastSigBits)); reader.read(1, holder); - assertEquals(16, holder.start); - assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + assertEquals(uuid2, new UUID(holder.mostSigBits, holder.leastSigBits)); reader.read(2, holder); - assertEquals(32, holder.start); - assertEquals(uuid3, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + assertEquals(uuid3, new UUID(holder.mostSigBits, holder.leastSigBits)); } } @@ -659,7 +627,6 @@ void testNullableUuidHolderReaderImpl() throws Exception { NullableUuidHolder sourceHolder = new NullableUuidHolder(); vector.get(0, sourceHolder); assertEquals(1, sourceHolder.isSet); - assertEquals(0, sourceHolder.start); // Create reader from holder NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); @@ -670,8 +637,7 @@ void testNullableUuidHolderReaderImpl() throws Exception { NullableUuidHolder targetHolder = new NullableUuidHolder(); reader.read(targetHolder); assertEquals(1, targetHolder.isSet); - assertEquals(0, targetHolder.start); - assertEquals(uuid, UuidUtility.uuidFromArrowBuf(targetHolder.buffer, targetHolder.start)); + assertEquals(uuid, new UUID(targetHolder.mostSigBits, targetHolder.leastSigBits)); } } @@ -715,8 +681,7 @@ void testNullableUuidHolderReaderImplReadIntoUuidHolder() throws Exception { // Read into UuidHolder (non-nullable) UuidHolder targetHolder = new UuidHolder(); reader.read(targetHolder); - assertEquals(0, targetHolder.start); - assertEquals(uuid, UuidUtility.uuidFromArrowBuf(targetHolder.buffer, targetHolder.start)); + assertEquals(uuid, new UUID(targetHolder.mostSigBits, targetHolder.leastSigBits)); } } @@ -729,21 +694,19 @@ void testNullableUuidHolderReaderImplWithNonZeroStart() throws Exception { vector.setSafe(1, uuid2); vector.setValueCount(2); - // Get holder from index 1 (start = 16) + // Get holder from index 1 NullableUuidHolder sourceHolder = new NullableUuidHolder(); vector.get(1, sourceHolder); assertEquals(1, sourceHolder.isSet); - assertEquals(16, sourceHolder.start); // Create reader from holder NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); assertEquals(uuid2, reader.readObject()); - // Read into another holder and verify start is preserved + // Read into another holder NullableUuidHolder targetHolder = new NullableUuidHolder(); reader.read(targetHolder); - assertEquals(16, targetHolder.start); - assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(targetHolder.buffer, targetHolder.start)); + assertEquals(uuid2, new UUID(targetHolder.mostSigBits, targetHolder.leastSigBits)); } } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 871a3cc46..16560fcb0 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -106,7 +106,6 @@ import org.apache.arrow.vector.util.JsonStringHashMap; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -2522,7 +2521,7 @@ public void extensionWriterReader() throws Exception { uuidReader.setPosition(0); UuidHolder uuidHolder = new UuidHolder(); uuidReader.read(uuidHolder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(uuidHolder.buffer, 0); + UUID actualUuid = new UUID(uuidHolder.mostSigBits, uuidHolder.leastSigBits); assertEquals(u1, actualUuid); assertTrue(uuidReader.isSet()); assertEquals(uuidReader.getMinorType(), MinorType.EXTENSIONTYPE); From c9b09b0c2d6193b2e44be88b6bf40c103dcdd66a Mon Sep 17 00:00:00 2001 From: jhrotko Date: Fri, 9 Jan 2026 09:15:14 +0000 Subject: [PATCH 3/5] Add getUuid method in [Nullable]UuidHolder --- .../org/apache/arrow/vector/UuidVector.java | 12 ++-- .../impl/NullableUuidHolderReaderImpl.java | 2 +- .../vector/holders/NullableUuidHolder.java | 9 +++ .../arrow/vector/holders/UuidHolder.java | 6 ++ .../apache/arrow/vector/TestListVector.java | 8 +-- .../apache/arrow/vector/TestMapVector.java | 8 +-- .../apache/arrow/vector/TestUuidVector.java | 60 +++++++++---------- .../complex/writer/TestComplexWriter.java | 2 +- 8 files changed, 57 insertions(+), 50 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java index 55b46fa3b..f8e53022f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java @@ -215,8 +215,7 @@ public void set(int index, UUID value) { * @param holder the holder containing the UUID data */ public void set(int index, UuidHolder holder) { - UUID uuid = new UUID(holder.mostSigBits, holder.leastSigBits); - set(index, uuid); + set(index, holder.getUuid()); } /** @@ -229,8 +228,7 @@ public void set(int index, NullableUuidHolder holder) { if (holder.isSet == 0) { getUnderlyingVector().setNull(index); } else { - UUID uuid = new UUID(holder.mostSigBits, holder.leastSigBits); - set(index, uuid); + set(index, holder.getUuid()); } } @@ -284,8 +282,7 @@ public void setSafe(int index, NullableUuidHolder holder) { if (holder == null || holder.isSet == 0) { getUnderlyingVector().setNull(index); } else { - UUID uuid = new UUID(holder.mostSigBits, holder.leastSigBits); - setSafe(index, uuid); + setSafe(index, holder.getUuid()); } } @@ -296,8 +293,7 @@ public void setSafe(int index, NullableUuidHolder holder) { * @param holder the holder containing the UUID data */ public void setSafe(int index, UuidHolder holder) { - UUID uuid = new UUID(holder.mostSigBits, holder.leastSigBits); - setSafe(index, uuid); + setSafe(index, holder.getUuid()); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java index 51c3e3273..84f8614e0 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java @@ -104,7 +104,7 @@ public Object readObject() { return null; } // Convert UUID longs to Java UUID object - return new UUID(holder.mostSigBits, holder.leastSigBits); + return holder.getUuid(); } } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java index e01d68115..d10cc4dfc 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java @@ -16,6 +16,8 @@ */ package org.apache.arrow.vector.holders; +import java.util.UUID; + /** * Value holder for nullable UUID values. * @@ -33,4 +35,11 @@ public class NullableUuidHolder extends ExtensionHolder { /** The least significant 64 bits of the UUID. */ public long leastSigBits; + + public UUID getUuid() { + if(this.isSet == 0) { + return null; + } + return new UUID(mostSigBits, leastSigBits); + } } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java index 77f70f738..d2d481a4e 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java @@ -16,6 +16,8 @@ */ package org.apache.arrow.vector.holders; +import java.util.UUID; + /** * Value holder for non-nullable UUID values. * @@ -36,4 +38,8 @@ public class UuidHolder extends ExtensionHolder { public UuidHolder() { this.isSet = 1; } + + public UUID getUuid() { + return new UUID(mostSigBits, leastSigBits); + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 6f139dee5..6104f4e82 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1258,12 +1258,12 @@ public void testListVectorReaderForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + UUID actualUuid = holder.getUuid(); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + actualUuid = holder.getUuid(); assertEquals(u2, actualUuid); } } @@ -1299,12 +1299,12 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = reader.reader(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + UUID actualUuid = holder.getUuid(); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + actualUuid = holder.getUuid(); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 24868dbb7..ba460a10a 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -1303,12 +1303,12 @@ public void testMapVectorWithExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + UUID actualUuid = holder.getUuid(); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + actualUuid = holder.getUuid(); assertEquals(u2, actualUuid); } } @@ -1348,12 +1348,12 @@ public void testCopyFromForExtensionType() throws Exception { FieldReader uuidReader = mapReader.value(); UuidHolder holder = new UuidHolder(); uuidReader.read(holder); - UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + UUID actualUuid = holder.getUuid(); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + actualUuid = holder.getUuid(); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java index 7c4408de1..01a4efd7d 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java @@ -225,7 +225,7 @@ void testReaderCopyAsValueExtensionVector() throws Exception { UuidReaderImpl reader2 = (UuidReaderImpl) vector.getReader(); UuidHolder holder = new UuidHolder(); reader2.read(0, holder); - UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + UUID actualUuid = holder.getUuid(); assertEquals(uuid, actualUuid); } } @@ -243,7 +243,7 @@ void testReaderReadWithUuidHolder() throws Exception { UuidHolder holder = new UuidHolder(); reader.read(holder); - UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + UUID actualUuid = holder.getUuid(); assertEquals(uuid, actualUuid); assertEquals(1, holder.isSet); } @@ -262,7 +262,7 @@ void testReaderReadWithNullableUuidHolder() throws Exception { NullableUuidHolder holder = new NullableUuidHolder(); reader.read(holder); - UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + UUID actualUuid = holder.getUuid(); assertEquals(uuid, actualUuid); assertEquals(1, holder.isSet); } @@ -301,7 +301,7 @@ void testReaderReadWithArrayIndexUuidHolder() throws Exception { UuidHolder holder = new UuidHolder(); reader.read(1, holder); - UUID actualUuid = new UUID(holder.mostSigBits, holder.leastSigBits); + UUID actualUuid = holder.getUuid(); assertEquals(uuid2, actualUuid); assertEquals(1, holder.isSet); } @@ -322,7 +322,7 @@ void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { NullableUuidHolder holder1 = new NullableUuidHolder(); reader.read(0, holder1); - assertEquals(uuid1, new UUID(holder1.mostSigBits, holder1.leastSigBits)); + assertEquals(uuid1, holder1.getUuid()); assertEquals(1, holder1.isSet); NullableUuidHolder holder2 = new NullableUuidHolder(); @@ -331,7 +331,7 @@ void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { NullableUuidHolder holder3 = new NullableUuidHolder(); reader.read(2, holder3); - assertEquals(uuid2, new UUID(holder3.mostSigBits, holder3.leastSigBits)); + assertEquals(uuid2, holder3.getUuid()); assertEquals(1, holder3.isSet); } } @@ -451,17 +451,15 @@ void testHolderStartOffsetWithMultipleValues() throws Exception { vector.setValueCount(3); // Test UuidHolder with different indices - UuidHolder holder1 = new UuidHolder(); - vector.get(0, holder1); - assertEquals(uuid1, new UUID(holder1.mostSigBits, holder1.leastSigBits)); + UuidHolder holder = new UuidHolder(); + vector.get(0, holder); + assertEquals(uuid1, holder.getUuid()); - UuidHolder holder2 = new UuidHolder(); - vector.get(1, holder2); - assertEquals(uuid2, new UUID(holder2.mostSigBits, holder2.leastSigBits)); + vector.get(1, holder); + assertEquals(uuid2, holder.getUuid()); - UuidHolder holder3 = new UuidHolder(); - vector.get(2, holder3); - assertEquals(uuid3, new UUID(holder3.mostSigBits, holder3.leastSigBits)); + vector.get(2, holder); + assertEquals(uuid3, holder.getUuid()); } } @@ -477,19 +475,17 @@ void testNullableHolderStartOffsetWithMultipleValues() throws Exception { vector.setValueCount(3); // Test NullableUuidHolder with different indices - NullableUuidHolder holder1 = new NullableUuidHolder(); - vector.get(0, holder1); - assertEquals(1, holder1.isSet); - assertEquals(uuid1, new UUID(holder1.mostSigBits, holder1.leastSigBits)); + NullableUuidHolder holder = new NullableUuidHolder(); + vector.get(0, holder); + assertEquals(1, holder.isSet); + assertEquals(uuid1, holder.getUuid()); - NullableUuidHolder holder2 = new NullableUuidHolder(); - vector.get(1, holder2); - assertEquals(0, holder2.isSet); + vector.get(1, holder); + assertEquals(0, holder.isSet); - NullableUuidHolder holder3 = new NullableUuidHolder(); - vector.get(2, holder3); - assertEquals(1, holder3.isSet); - assertEquals(uuid2, new UUID(holder3.mostSigBits, holder3.leastSigBits)); + vector.get(2, holder); + assertEquals(1, holder.isSet); + assertEquals(uuid2, holder.getUuid()); } } @@ -585,13 +581,13 @@ void testReaderWithStartOffsetMultipleReads() throws Exception { // Read from different positions reader.read(0, holder); - assertEquals(uuid1, new UUID(holder.mostSigBits, holder.leastSigBits)); + assertEquals(uuid1, holder.getUuid()); reader.read(1, holder); - assertEquals(uuid2, new UUID(holder.mostSigBits, holder.leastSigBits)); + assertEquals(uuid2, holder.getUuid()); reader.read(2, holder); - assertEquals(uuid3, new UUID(holder.mostSigBits, holder.leastSigBits)); + assertEquals(uuid3, holder.getUuid()); } } @@ -637,7 +633,7 @@ void testNullableUuidHolderReaderImpl() throws Exception { NullableUuidHolder targetHolder = new NullableUuidHolder(); reader.read(targetHolder); assertEquals(1, targetHolder.isSet); - assertEquals(uuid, new UUID(targetHolder.mostSigBits, targetHolder.leastSigBits)); + assertEquals(uuid, targetHolder.getUuid()); } } @@ -681,7 +677,7 @@ void testNullableUuidHolderReaderImplReadIntoUuidHolder() throws Exception { // Read into UuidHolder (non-nullable) UuidHolder targetHolder = new UuidHolder(); reader.read(targetHolder); - assertEquals(uuid, new UUID(targetHolder.mostSigBits, targetHolder.leastSigBits)); + assertEquals(uuid, targetHolder.getUuid()); } } @@ -706,7 +702,7 @@ void testNullableUuidHolderReaderImplWithNonZeroStart() throws Exception { // Read into another holder NullableUuidHolder targetHolder = new NullableUuidHolder(); reader.read(targetHolder); - assertEquals(uuid2, new UUID(targetHolder.mostSigBits, targetHolder.leastSigBits)); + assertEquals(uuid2, targetHolder.getUuid()); } } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 16560fcb0..3b6c9b893 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -2521,7 +2521,7 @@ public void extensionWriterReader() throws Exception { uuidReader.setPosition(0); UuidHolder uuidHolder = new UuidHolder(); uuidReader.read(uuidHolder); - UUID actualUuid = new UUID(uuidHolder.mostSigBits, uuidHolder.leastSigBits); + UUID actualUuid = uuidHolder.getUuid(); assertEquals(u1, actualUuid); assertTrue(uuidReader.isSet()); assertEquals(uuidReader.getMinorType(), MinorType.EXTENSIONTYPE); From 51cf225c6fad69e4b926a9e2c92dffd8e7cce2d1 Mon Sep 17 00:00:00 2001 From: jhrotko Date: Fri, 9 Jan 2026 09:37:36 +0000 Subject: [PATCH 4/5] use ArrowBuf.getLong --- .../org/apache/arrow/vector/UuidVector.java | 35 +++++++++++++------ .../impl/NullableUuidHolderReaderImpl.java | 2 -- .../vector/holders/NullableUuidHolder.java | 7 +++- .../arrow/vector/holders/UuidHolder.java | 5 +++ 4 files changed, 35 insertions(+), 14 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java index f8e53022f..26d07385a 100644 --- a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java @@ -16,6 +16,7 @@ */ package org.apache.arrow.vector; +import static org.apache.arrow.memory.util.MemoryUtil.LITTLE_ENDIAN; import static org.apache.arrow.vector.extension.UuidType.UUID_BYTE_WIDTH; import java.nio.ByteBuffer; @@ -156,13 +157,17 @@ public int isSet(int index) { */ public void get(int index, UuidHolder holder) { Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector."); - if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { - holder.isSet = 0; + final ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer(); + final long start = (long) index * UUID_BYTE_WIDTH; + final long next = start + Long.BYTES; + // UUIDs are stored in big-endian byte order in Arrow buffers. + // ArrowBuf.getLong() reads in native byte order, so we need to reverse bytes on LE systems. + if (LITTLE_ENDIAN) { + holder.mostSigBits = Long.reverseBytes(dataBuffer.getLong(start)); + holder.leastSigBits = Long.reverseBytes(dataBuffer.getLong(next)); } else { - holder.isSet = 1; - final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); - holder.mostSigBits = bb.getLong(); - holder.leastSigBits = bb.getLong(); + holder.mostSigBits = dataBuffer.getLong(start); + holder.leastSigBits = dataBuffer.getLong(next); } } @@ -178,9 +183,17 @@ public void get(int index, NullableUuidHolder holder) { holder.isSet = 0; } else { holder.isSet = 1; - final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); - holder.mostSigBits = bb.getLong(); - holder.leastSigBits = bb.getLong(); + final ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer(); + final long offset = (long) index * UUID_BYTE_WIDTH; + // UUIDs are stored in big-endian byte order in Arrow buffers. + // ArrowBuf.getLong() reads in native byte order, so we need to reverse bytes on LE systems. + if (LITTLE_ENDIAN) { + holder.mostSigBits = Long.reverseBytes(dataBuffer.getLong(offset)); + holder.leastSigBits = Long.reverseBytes(dataBuffer.getLong(offset + Long.BYTES)); + } else { + holder.mostSigBits = dataBuffer.getLong(offset); + holder.leastSigBits = dataBuffer.getLong(offset + Long.BYTES); + } } } @@ -244,8 +257,8 @@ public void set(int index, ArrowBuf source, int sourceOffset) { BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index); getUnderlyingVector() - .getDataBuffer() - .setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH); + .getDataBuffer() + .setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java index 84f8614e0..cbf3fd539 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java @@ -16,7 +16,6 @@ */ package org.apache.arrow.vector.complex.impl; -import java.util.UUID; import org.apache.arrow.vector.holders.ExtensionHolder; import org.apache.arrow.vector.holders.NullableUuidHolder; import org.apache.arrow.vector.holders.UuidHolder; @@ -107,4 +106,3 @@ public Object readObject() { return holder.getUuid(); } } - diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java index d10cc4dfc..ee2d95385 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java @@ -36,8 +36,13 @@ public class NullableUuidHolder extends ExtensionHolder { /** The least significant 64 bits of the UUID. */ public long leastSigBits; + /** + * Converts the holder's two longs to a UUID object. + * + * @return the UUID represented by this holder, or null if isSet is 0 + */ public UUID getUuid() { - if(this.isSet == 0) { + if (this.isSet == 0) { return null; } return new UUID(mostSigBits, leastSigBits); diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java index d2d481a4e..2204f1342 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java @@ -39,6 +39,11 @@ public UuidHolder() { this.isSet = 1; } + /** + * Converts the holder's two longs to a UUID object. + * + * @return the UUID represented by this holder + */ public UUID getUuid() { return new UUID(mostSigBits, leastSigBits); } From 18e3354dea0c8e0678ed642946f7bab4012fd8b9 Mon Sep 17 00:00:00 2001 From: jhrotko Date: Fri, 9 Jan 2026 13:27:32 +0000 Subject: [PATCH 5/5] Add Uuid vector benchmarks --- .../arrow/vector/UuidVectorBenchmarks.java | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 performance/src/main/java/org/apache/arrow/vector/UuidVectorBenchmarks.java diff --git a/performance/src/main/java/org/apache/arrow/vector/UuidVectorBenchmarks.java b/performance/src/main/java/org/apache/arrow/vector/UuidVectorBenchmarks.java new file mode 100644 index 000000000..15aad17d8 --- /dev/null +++ b/performance/src/main/java/org/apache/arrow/vector/UuidVectorBenchmarks.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.impl.UuidWriterImpl; +import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.UuidHolder; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.profile.GCProfiler; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** Benchmarks for {@link UuidVector}. */ +@State(Scope.Benchmark) +public class UuidVectorBenchmarks { + // checkstyle:off: MissingJavadocMethod + + private static final int VECTOR_LENGTH = 10_000; + + private static final int ALLOCATOR_CAPACITY = 1024 * 1024; + + private BufferAllocator allocator; + + private UuidVector vector; + + private UUID[] testUuids; + + @Setup + public void prepare() { + allocator = new RootAllocator(ALLOCATOR_CAPACITY); + vector = new UuidVector("vector", allocator); + vector.allocateNew(VECTOR_LENGTH); + vector.setValueCount(VECTOR_LENGTH); + + // Pre-generate UUIDs for consistent benchmarking + testUuids = new UUID[VECTOR_LENGTH]; + for (int i = 0; i < VECTOR_LENGTH; i++) { + testUuids[i] = new UUID(i, i * 2L); + } + } + + @TearDown + public void tearDown() { + vector.close(); + allocator.close(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setWithUuidHolder() { + UuidHolder holder = new UuidHolder(); + for (int i = 0; i < VECTOR_LENGTH; i++) { + // Old version: get the holder populated with buffer reference, then set it back + vector.get(i, holder); + vector.setSafe(i, holder); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setWithNullableUuidHolder() { + NullableUuidHolder holder = new NullableUuidHolder(); + for (int i = 0; i < VECTOR_LENGTH; i++) { + // Old version: get the holder populated with buffer reference, then set it back + holder.isSet = i % 3 == 0 ? 0 : 1; + if (holder.isSet == 1) { + vector.get(i, holder); + } + vector.setSafe(i, holder); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setUuidDirectly() { + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.setSafe(i, testUuids[i]); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setWithWriter() { + UuidWriterImpl writer = new UuidWriterImpl(vector); + for (int i = 0; i < VECTOR_LENGTH; i++) { + if (i % 3 != 0) { + writer.writeExtension(testUuids[i]); + } + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void getWithUuidHolder() { + UuidHolder holder = new UuidHolder(); + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.get(i, holder); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void getWithNullableUuidHolder() { + NullableUuidHolder holder = new NullableUuidHolder(); + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.get(i, holder); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void getUuidDirectly() { + for (int i = 0; i < VECTOR_LENGTH; i++) { + UUID uuid = vector.getObject(i); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = + new OptionsBuilder() + .include(UuidVectorBenchmarks.class.getSimpleName()) + .forks(1) + .addProfiler(GCProfiler.class) + .build(); + + new Runner(opt).run(); + } + // checkstyle:on: MissingJavadocMethod +}