diff --git a/java/vector/src/main/java/org/apache/arrow/vector/unsafe/Float8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/Float8Vector.java new file mode 100644 index 00000000000..0d4a590096b --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/Float8Vector.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.unsafe; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.holders.Float8Holder; +import org.apache.arrow.vector.holders.NullableFloat8Holder; +import org.apache.arrow.vector.types.pojo.FieldType; + +import io.netty.buffer.ArrowBuf; +import io.netty.util.internal.PlatformDependent; + +/** + * Unsafe implementation of org.apache.arrow.vector.Float8Vector. + * Compared with org.apache.arrow.vector.Float8Vector, it avoids checks and directly operates on the direct memory, + * so it provides much better performance. + */ +public class Float8Vector extends org.apache.arrow.vector.Float8Vector { + + /** + * The number of bits to shift to multiply/divide by the type width. + */ + public static final byte TYPE_LOG2_WIDTH = 3; + + /** + * Instantiate a Float8Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param allocator allocator for memory management. + */ + public Float8Vector(String name, BufferAllocator allocator) { + super(name, allocator); + } + + /** + * Instantiate a Float8Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param fieldType type of Field materialized by this vector + * @param allocator allocator for memory management. + */ + public Float8Vector(String name, FieldType fieldType, BufferAllocator allocator) { + super(name, fieldType, allocator); + } + + + /*----------------------------------------------------------------* + | | + | vector value retrieval methods | + | | + *----------------------------------------------------------------*/ + + + /** + * Get the element at the given index from the vector. + * + * @param index position of element + * @return element at given index + */ + @Override + public double get(int index) { + return Double.longBitsToDouble(PlatformDependent.getLong(valueBuffer.memoryAddress() + (index << TYPE_LOG2_WIDTH))); + } + + /** + * Get the element at the given index from the vector and + * sets the state in holder. If element at given index + * is null, holder.isSet will be zero. + * + * @param index position of element + */ + public void get(int index, NullableFloat8Holder holder) { + if (UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index) == 0) { + holder.isSet = 0; + return; + } + holder.isSet = 1; + holder.value = this.get(index); + } + + /** + * Same as {@link #get(int)}. + * + * @param index position of element + * @return element at given index + */ + public Double getObject(int index) { + if (UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index) == 0) { + return null; + } else { + return this.get(index); + } + } + + /** + * Copy a cell value from a particular index in source vector to a particular + * position in this vector. + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + */ + public void copyFrom(int fromIndex, int thisIndex, org.apache.arrow.vector.Float8Vector from) { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, thisIndex, + UnsafeBitVectorHelper.isValidityBitSet(from.getValidityBuffer(), fromIndex)); + + // since we are not sure if the from object is an unsafe object, + // we get its value through the underlying buffer address. + final double value = Double.longBitsToDouble( + PlatformDependent.getLong(from.getDataBufferAddress() + (fromIndex >>> TYPE_LOG2_WIDTH))); + this.set(thisIndex, value); + } + + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + + private void setValue(int index, double value) { + PlatformDependent.putLong( + valueBuffer.memoryAddress() + (index << TYPE_LOG2_WIDTH), Double.doubleToRawLongBits(value)); + } + + /** + * Set the element at the given index to the given value. + * + * @param index position of element + * @param value value of element + */ + public void set(int index, double value) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, value); + } + + /** + * Set the element at the given index to the value set in data holder. + * If the value in holder is not indicated as set, element in the + * at the given index will be null. + * + * @param index position of element + * @param holder nullable data holder for value of element + */ + public void set(int index, NullableFloat8Holder holder) throws IllegalArgumentException { + if (holder.isSet < 0) { + throw new IllegalArgumentException(); + } else if (holder.isSet > 0) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, holder.value); + } else { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + } + + /** + * Set the element at the given index to the value set in data holder. + * + * @param index position of element + * @param holder data holder for value of element + */ + public void set(int index, Float8Holder holder) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, holder.value); + } + + /** + * Set the element at the given index to null. + * + * @param index position of element + */ + public void setNull(int index) { + handleSafe(index); + // not really needed to set the bit to 0 as long as + // the buffer always starts from 0. + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void set(int index, int isSet, double value) { + if (isSet > 0) { + set(index, value); + } else { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + } + + /** + * Given a data buffer, get the value stored at a particular position + * in the vector. + * + *

This method should not be used externally. + * + * @param buffer data buffer + * @param index position of the element. + * @return value stored at the index. + */ + public static double get(final ArrowBuf buffer, final int index) { + return Double.longBitsToDouble(PlatformDependent.getLong(buffer.memoryAddress() + (index >> TYPE_LOG2_WIDTH))); + } + + /** + * Same as {@link #isNull(int)}. + * + * @param index position of element + * @return 1 if element at given index is not null, 0 otherwise + */ + @Override + public int isSet(int index) { + return UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/unsafe/IntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/IntVector.java new file mode 100644 index 00000000000..2b22adc874b --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/IntVector.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.unsafe; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.holders.IntHolder; +import org.apache.arrow.vector.holders.NullableIntHolder; +import org.apache.arrow.vector.types.pojo.FieldType; + +import io.netty.buffer.ArrowBuf; +import io.netty.util.internal.PlatformDependent; + +/** + * Unsafe implementation of org.apache.arrow.vector.IntVector. + * Compared with org.apache.arrow.vector.IntVector, it avoids checks and directly operates on the off-heap memory, + * so it provides much better performance. + */ +public class IntVector extends org.apache.arrow.vector.IntVector { + + /** + * The number of bits to shift to multiply/divide by the type width. + */ + public static final byte TYPE_LOG2_WIDTH = 2; + + /** + * Instantiate a IntVector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param allocator allocator for memory management. + */ + public IntVector(String name, BufferAllocator allocator) { + super(name, allocator); + } + + /** + * Instantiate a IntVector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param fieldType type of Field materialized by this vector + * @param allocator allocator for memory management. + */ + public IntVector(String name, FieldType fieldType, BufferAllocator allocator) { + super(name, fieldType, allocator); + } + + /*----------------------------------------------------------------* + | | + | vector value retrieval methods | + | | + *----------------------------------------------------------------*/ + + + /** + * Get the element at the given index from the vector. + * + * @param index position of element + * @return element at given index + */ + public int get(int index) throws IllegalStateException { + return PlatformDependent.getInt(valueBuffer.memoryAddress() + (index << TYPE_LOG2_WIDTH)); + } + + /** + * Get the element at the given index from the vector and + * sets the state in holder. If element at given index + * is null, holder.isSet will be zero. + * + * @param index position of element + */ + public void get(int index, NullableIntHolder holder) { + if (UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index) == 0) { + holder.isSet = 0; + return; + } + holder.isSet = 1; + holder.value = PlatformDependent.getInt(valueBuffer.memoryAddress() + (index << TYPE_LOG2_WIDTH)); + } + + /** + * Same as {@link #get(int)}. + * + * @param index position of element + * @return element at given index + */ + public Integer getObject(int index) { + if (UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index) == 0) { + return null; + } else { + return get(index); + } + } + + /** + * Copy a cell value from a particular index in source vector to a particular + * position in this vector. + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + */ + public void copyFrom(int fromIndex, int thisIndex, org.apache.arrow.vector.IntVector from) { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, thisIndex, + UnsafeBitVectorHelper.isValidityBitSet(from.getValidityBuffer(), fromIndex)); + final int value = PlatformDependent.getInt(from.getDataBufferAddress() + (thisIndex << TYPE_LOG2_WIDTH)); + this.set(thisIndex, value); + } + + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + + private void setValue(int index, int value) { + PlatformDependent.putInt(valueBuffer.memoryAddress() + (index << TYPE_LOG2_WIDTH), value); + } + + /** + * Set the element at the given index to the given value. + * + * @param index position of element + * @param value value of element + */ + public void set(int index, int value) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, value); + } + + /** + * Set the element at the given index to the value set in data holder. + * If the value in holder is not indicated as set, element in the + * at the given index will be null. + * + * @param index position of element + * @param holder nullable data holder for value of element + */ + public void set(int index, NullableIntHolder holder) throws IllegalArgumentException { + if (holder.isSet < 0) { + throw new IllegalArgumentException(); + } else if (holder.isSet > 0) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, holder.value); + } else { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + } + + /** + * Set the element at the given index to the value set in data holder. + * + * @param index position of element + * @param holder data holder for value of element + */ + public void set(int index, IntHolder holder) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, holder.value); + } + + /** + * Set the element at the given index to null. + * + * @param index position of element + */ + public void setNull(int index) { + handleSafe(index); + // not really needed to set the bit to 0 as long as + // the buffer always starts from 0. + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void set(int index, int isSet, int value) { + if (isSet > 0) { + set(index, value); + } else { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + } + + /** + * Same as {@link #isNull(int)}. + * + * @param index position of element + * @return 1 if element at given index is not null, 0 otherwise + */ + @Override + public int isSet(int index) { + return UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index); + } + + /** + * Given a data buffer, get the value stored at a particular position + * in the vector. + * + *

This method should not be used externally. + * + * @param buffer data buffer + * @param index position of the element. + * @return value stored at the index. + */ + public static int get(final ArrowBuf buffer, final int index) { + return PlatformDependent.getInt(buffer.memoryAddress() + (index << TYPE_LOG2_WIDTH)); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/unsafe/UnsafeBitVectorHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/UnsafeBitVectorHelper.java new file mode 100644 index 00000000000..6675ef08c71 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/UnsafeBitVectorHelper.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.unsafe; + +import org.apache.arrow.vector.BitVectorHelper; + +import io.netty.buffer.ArrowBuf; +import io.netty.util.internal.PlatformDependent; + +/** + * Helper class for performing generic operations on a bit vector buffer. + * Compared with org.apache.arrow.BitVectorHelp, this implementation tries the best to avoid checks.. + */ +public class UnsafeBitVectorHelper { + /** + * Set the bit at provided index to 1. + * + * @param validityBuffer validity buffer of the vector + * @param index index to be set + */ + public static void setValidityBitToOne(ArrowBuf validityBuffer, int index) { + final int byteIndex = BitVectorHelper.byteIndex(index); + final int bitIndex = BitVectorHelper.bitIndex(index); + byte currentByte = PlatformDependent.getByte(validityBuffer.memoryAddress() + byteIndex); + final byte bitMask = (byte) (1L << bitIndex); + currentByte |= bitMask; + PlatformDependent.putByte(validityBuffer.memoryAddress() + byteIndex, currentByte); + } + + /** + * Set the bit at a given index to provided value (1 or 0). + * + * @param validityBuffer validity buffer of the vector + * @param index index to be set + * @param value value to set + */ + public static void setValidityBit(ArrowBuf validityBuffer, int index, int value) { + final int byteIndex = BitVectorHelper.byteIndex(index); + final int bitIndex = BitVectorHelper.bitIndex(index); + byte currentByte = PlatformDependent.getByte(validityBuffer.memoryAddress() + byteIndex); + final byte bitMask = (byte) (1L << bitIndex); + if (value != 0) { + currentByte |= bitMask; + } else { + currentByte -= (bitMask & currentByte); + } + PlatformDependent.putByte(validityBuffer.memoryAddress() + byteIndex, currentByte); + } + + /** + * Check if a validity bit is set. + * @param validityBuffer the validity buffer. + * @param index index of the bit to check. + * @return 1 if the bit is set, and 0 otherwise. + */ + public static int isValidityBitSet(ArrowBuf validityBuffer, int index) { + final int byteIndex = index >> 3; + final byte b = PlatformDependent.getByte(validityBuffer .memoryAddress() + byteIndex); + final int bitIndex = index & 7; + return (b >> bitIndex) & 0x01; + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorFactory.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorFactory.java new file mode 100644 index 00000000000..30dda7a9825 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorFactory.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; + +/** + * Factory methods for vectors.. + */ +public class VectorFactory { + + /** + * The type of the vector. + */ + public enum VectorType { + /** + * This type of vectors will do all checks, so segmentation faults can be avoided, + * but the performance may not be good. + */ + SAFE, + + /** + * This type of vectors will try to avoid checks, so segmentation faults can happen, + * but the performance can be be good. + */ + UNSAFE + } + + /** + * Create an empty Float8Vector. + * @param vectorType type of the vector to create (safe or unsafe). + * @param name name of the vector. + * @param allocator the allocator to create vector. + * @return the created vector. + */ + public static Float8Vector createFloat8Vector(VectorType vectorType, String name, BufferAllocator allocator) { + switch (vectorType) { + case SAFE: + return new org.apache.arrow.vector.Float8Vector(name, allocator); + case UNSAFE: + return new org.apache.arrow.vector.unsafe.Float8Vector(name, allocator); + default: + throw new IllegalArgumentException("Unknown vector type for Float8Vector"); + } + } + + /** + * Create an empty IntVector. + * @param vectorType type of the vector to create (safe or unsafe). + * @param name name of the vector. + * @param allocator the allocator to create vector. + * @return the created vector. + */ + public static IntVector createIntVector(VectorType vectorType, String name, BufferAllocator allocator) { + switch (vectorType) { + case SAFE: + return new org.apache.arrow.vector.IntVector(name, allocator); + case UNSAFE: + return new org.apache.arrow.vector.unsafe.IntVector(name, allocator); + default: + throw new IllegalArgumentException("Unknown vector type for IntVector"); + } + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index f0cc4c4b882..9776f6832f3 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -43,19 +43,33 @@ import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.util.VectorFactory; import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import io.netty.buffer.ArrowBuf; - +@RunWith(Parameterized.class) public class TestValueVector { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; + private VectorFactory.VectorType vectorType; + + public TestValueVector(VectorFactory.VectorType vectorType) { + this.vectorType = vectorType; + } + + @Parameterized.Parameters(name = "vector type: {0}") + public static List getVectorTypes() { + return Arrays.asList(VectorFactory.VectorType.SAFE, VectorFactory.VectorType.UNSAFE); + } + @Before public void init() { allocator = new RootAllocator(Long.MAX_VALUE); @@ -173,7 +187,7 @@ public void testFixedType1() { @Test /* IntVector */ public void testFixedType2() { - try (final IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { + try (final IntVector intVector = VectorFactory.createIntVector(vectorType, EMPTY_SCHEMA_PATH, allocator)) { boolean error = false; int initialCapacity = 16; @@ -207,13 +221,16 @@ public void testFixedType2() { j++; } - try { - intVector.set(initialCapacity, j); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; + // for unsafe vectors, we do not check boundaries, so skip this assert + if (vectorType == VectorFactory.VectorType.SAFE) { + try { + intVector.set(initialCapacity, j); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + error = false; + } } /* check vector contents */ @@ -223,13 +240,16 @@ public void testFixedType2() { j++; } - try { - intVector.get(initialCapacity); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; + // for unsafe vectors, we do not check boundaries, so skip this assert + if (vectorType == VectorFactory.VectorType.SAFE) { + try { + intVector.get(initialCapacity); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + error = false; + } } /* this should trigger a realloc() */ @@ -361,7 +381,7 @@ public void testFixedType3() { @Test /* Float8Vector */ public void testFixedType4() { - try (final Float8Vector floatVector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) { + try (final Float8Vector floatVector = VectorFactory.createFloat8Vector(vectorType, EMPTY_SCHEMA_PATH, allocator)) { boolean error = false; int initialCapacity = 16; @@ -398,13 +418,16 @@ public void testFixedType4() { floatVector.set(12, 7.87); floatVector.set(14, 8.56); - try { - floatVector.set(initialCapacity, 9.53); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; + // for unsafe vectors, we do not check boundaries, so skip this assert + if (vectorType == VectorFactory.VectorType.SAFE) { + try { + floatVector.set(initialCapacity, 9.53); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + error = false; + } } /* check floatVector contents */ @@ -417,13 +440,16 @@ public void testFixedType4() { assertEquals(7.87, floatVector.get(12), 0); assertEquals(8.56, floatVector.get(14), 0); - try { - floatVector.get(initialCapacity); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; + // for unsafe vectors, we do not check boundaries, so skip this assert + if (vectorType == VectorFactory.VectorType.SAFE) { + try { + floatVector.get(initialCapacity); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + error = false; + } } /* this should trigger a realloc() */