diff --git a/java/vector/src/main/java/org/apache/arrow/vector/unsafe/Float8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/Float8Vector.java new file mode 100644 index 00000000000..0d4a590096b --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/Float8Vector.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.unsafe; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.holders.Float8Holder; +import org.apache.arrow.vector.holders.NullableFloat8Holder; +import org.apache.arrow.vector.types.pojo.FieldType; + +import io.netty.buffer.ArrowBuf; +import io.netty.util.internal.PlatformDependent; + +/** + * Unsafe implementation of org.apache.arrow.vector.Float8Vector. + * Compared with org.apache.arrow.vector.Float8Vector, it avoids checks and directly operates on the direct memory, + * so it provides much better performance. + */ +public class Float8Vector extends org.apache.arrow.vector.Float8Vector { + + /** + * The number of bits to shift to multiply/divide by the type width. + */ + public static final byte TYPE_LOG2_WIDTH = 3; + + /** + * Instantiate a Float8Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param allocator allocator for memory management. + */ + public Float8Vector(String name, BufferAllocator allocator) { + super(name, allocator); + } + + /** + * Instantiate a Float8Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param fieldType type of Field materialized by this vector + * @param allocator allocator for memory management. + */ + public Float8Vector(String name, FieldType fieldType, BufferAllocator allocator) { + super(name, fieldType, allocator); + } + + + /*----------------------------------------------------------------* + | | + | vector value retrieval methods | + | | + *----------------------------------------------------------------*/ + + + /** + * Get the element at the given index from the vector. + * + * @param index position of element + * @return element at given index + */ + @Override + public double get(int index) { + return Double.longBitsToDouble(PlatformDependent.getLong(valueBuffer.memoryAddress() + (index << TYPE_LOG2_WIDTH))); + } + + /** + * Get the element at the given index from the vector and + * sets the state in holder. If element at given index + * is null, holder.isSet will be zero. + * + * @param index position of element + */ + public void get(int index, NullableFloat8Holder holder) { + if (UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index) == 0) { + holder.isSet = 0; + return; + } + holder.isSet = 1; + holder.value = this.get(index); + } + + /** + * Same as {@link #get(int)}. + * + * @param index position of element + * @return element at given index + */ + public Double getObject(int index) { + if (UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index) == 0) { + return null; + } else { + return this.get(index); + } + } + + /** + * Copy a cell value from a particular index in source vector to a particular + * position in this vector. + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + */ + public void copyFrom(int fromIndex, int thisIndex, org.apache.arrow.vector.Float8Vector from) { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, thisIndex, + UnsafeBitVectorHelper.isValidityBitSet(from.getValidityBuffer(), fromIndex)); + + // since we are not sure if the from object is an unsafe object, + // we get its value through the underlying buffer address. + final double value = Double.longBitsToDouble( + PlatformDependent.getLong(from.getDataBufferAddress() + (fromIndex >>> TYPE_LOG2_WIDTH))); + this.set(thisIndex, value); + } + + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + + private void setValue(int index, double value) { + PlatformDependent.putLong( + valueBuffer.memoryAddress() + (index << TYPE_LOG2_WIDTH), Double.doubleToRawLongBits(value)); + } + + /** + * Set the element at the given index to the given value. + * + * @param index position of element + * @param value value of element + */ + public void set(int index, double value) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, value); + } + + /** + * Set the element at the given index to the value set in data holder. + * If the value in holder is not indicated as set, element in the + * at the given index will be null. + * + * @param index position of element + * @param holder nullable data holder for value of element + */ + public void set(int index, NullableFloat8Holder holder) throws IllegalArgumentException { + if (holder.isSet < 0) { + throw new IllegalArgumentException(); + } else if (holder.isSet > 0) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, holder.value); + } else { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + } + + /** + * Set the element at the given index to the value set in data holder. + * + * @param index position of element + * @param holder data holder for value of element + */ + public void set(int index, Float8Holder holder) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, holder.value); + } + + /** + * Set the element at the given index to null. + * + * @param index position of element + */ + public void setNull(int index) { + handleSafe(index); + // not really needed to set the bit to 0 as long as + // the buffer always starts from 0. + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void set(int index, int isSet, double value) { + if (isSet > 0) { + set(index, value); + } else { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + } + + /** + * Given a data buffer, get the value stored at a particular position + * in the vector. + * + *
This method should not be used externally. + * + * @param buffer data buffer + * @param index position of the element. + * @return value stored at the index. + */ + public static double get(final ArrowBuf buffer, final int index) { + return Double.longBitsToDouble(PlatformDependent.getLong(buffer.memoryAddress() + (index >> TYPE_LOG2_WIDTH))); + } + + /** + * Same as {@link #isNull(int)}. + * + * @param index position of element + * @return 1 if element at given index is not null, 0 otherwise + */ + @Override + public int isSet(int index) { + return UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/unsafe/IntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/IntVector.java new file mode 100644 index 00000000000..2b22adc874b --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/IntVector.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.unsafe; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.holders.IntHolder; +import org.apache.arrow.vector.holders.NullableIntHolder; +import org.apache.arrow.vector.types.pojo.FieldType; + +import io.netty.buffer.ArrowBuf; +import io.netty.util.internal.PlatformDependent; + +/** + * Unsafe implementation of org.apache.arrow.vector.IntVector. + * Compared with org.apache.arrow.vector.IntVector, it avoids checks and directly operates on the off-heap memory, + * so it provides much better performance. + */ +public class IntVector extends org.apache.arrow.vector.IntVector { + + /** + * The number of bits to shift to multiply/divide by the type width. + */ + public static final byte TYPE_LOG2_WIDTH = 2; + + /** + * Instantiate a IntVector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param allocator allocator for memory management. + */ + public IntVector(String name, BufferAllocator allocator) { + super(name, allocator); + } + + /** + * Instantiate a IntVector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param fieldType type of Field materialized by this vector + * @param allocator allocator for memory management. + */ + public IntVector(String name, FieldType fieldType, BufferAllocator allocator) { + super(name, fieldType, allocator); + } + + /*----------------------------------------------------------------* + | | + | vector value retrieval methods | + | | + *----------------------------------------------------------------*/ + + + /** + * Get the element at the given index from the vector. + * + * @param index position of element + * @return element at given index + */ + public int get(int index) throws IllegalStateException { + return PlatformDependent.getInt(valueBuffer.memoryAddress() + (index << TYPE_LOG2_WIDTH)); + } + + /** + * Get the element at the given index from the vector and + * sets the state in holder. If element at given index + * is null, holder.isSet will be zero. + * + * @param index position of element + */ + public void get(int index, NullableIntHolder holder) { + if (UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index) == 0) { + holder.isSet = 0; + return; + } + holder.isSet = 1; + holder.value = PlatformDependent.getInt(valueBuffer.memoryAddress() + (index << TYPE_LOG2_WIDTH)); + } + + /** + * Same as {@link #get(int)}. + * + * @param index position of element + * @return element at given index + */ + public Integer getObject(int index) { + if (UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index) == 0) { + return null; + } else { + return get(index); + } + } + + /** + * Copy a cell value from a particular index in source vector to a particular + * position in this vector. + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + */ + public void copyFrom(int fromIndex, int thisIndex, org.apache.arrow.vector.IntVector from) { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, thisIndex, + UnsafeBitVectorHelper.isValidityBitSet(from.getValidityBuffer(), fromIndex)); + final int value = PlatformDependent.getInt(from.getDataBufferAddress() + (thisIndex << TYPE_LOG2_WIDTH)); + this.set(thisIndex, value); + } + + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + + private void setValue(int index, int value) { + PlatformDependent.putInt(valueBuffer.memoryAddress() + (index << TYPE_LOG2_WIDTH), value); + } + + /** + * Set the element at the given index to the given value. + * + * @param index position of element + * @param value value of element + */ + public void set(int index, int value) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, value); + } + + /** + * Set the element at the given index to the value set in data holder. + * If the value in holder is not indicated as set, element in the + * at the given index will be null. + * + * @param index position of element + * @param holder nullable data holder for value of element + */ + public void set(int index, NullableIntHolder holder) throws IllegalArgumentException { + if (holder.isSet < 0) { + throw new IllegalArgumentException(); + } else if (holder.isSet > 0) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, holder.value); + } else { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + } + + /** + * Set the element at the given index to the value set in data holder. + * + * @param index position of element + * @param holder data holder for value of element + */ + public void set(int index, IntHolder holder) { + UnsafeBitVectorHelper.setValidityBitToOne(validityBuffer, index); + setValue(index, holder.value); + } + + /** + * Set the element at the given index to null. + * + * @param index position of element + */ + public void setNull(int index) { + handleSafe(index); + // not really needed to set the bit to 0 as long as + // the buffer always starts from 0. + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void set(int index, int isSet, int value) { + if (isSet > 0) { + set(index, value); + } else { + UnsafeBitVectorHelper.setValidityBit(validityBuffer, index, 0); + } + } + + /** + * Same as {@link #isNull(int)}. + * + * @param index position of element + * @return 1 if element at given index is not null, 0 otherwise + */ + @Override + public int isSet(int index) { + return UnsafeBitVectorHelper.isValidityBitSet(validityBuffer, index); + } + + /** + * Given a data buffer, get the value stored at a particular position + * in the vector. + * + *
This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int get(final ArrowBuf buffer, final int index) {
+ return PlatformDependent.getInt(buffer.memoryAddress() + (index << TYPE_LOG2_WIDTH));
+ }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/unsafe/UnsafeBitVectorHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/UnsafeBitVectorHelper.java
new file mode 100644
index 00000000000..6675ef08c71
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/unsafe/UnsafeBitVectorHelper.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.unsafe;
+
+import org.apache.arrow.vector.BitVectorHelper;
+
+import io.netty.buffer.ArrowBuf;
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Helper class for performing generic operations on a bit vector buffer.
+ * Compared with org.apache.arrow.BitVectorHelp, this implementation tries the best to avoid checks..
+ */
+public class UnsafeBitVectorHelper {
+ /**
+ * Set the bit at provided index to 1.
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param index index to be set
+ */
+ public static void setValidityBitToOne(ArrowBuf validityBuffer, int index) {
+ final int byteIndex = BitVectorHelper.byteIndex(index);
+ final int bitIndex = BitVectorHelper.bitIndex(index);
+ byte currentByte = PlatformDependent.getByte(validityBuffer.memoryAddress() + byteIndex);
+ final byte bitMask = (byte) (1L << bitIndex);
+ currentByte |= bitMask;
+ PlatformDependent.putByte(validityBuffer.memoryAddress() + byteIndex, currentByte);
+ }
+
+ /**
+ * Set the bit at a given index to provided value (1 or 0).
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param index index to be set
+ * @param value value to set
+ */
+ public static void setValidityBit(ArrowBuf validityBuffer, int index, int value) {
+ final int byteIndex = BitVectorHelper.byteIndex(index);
+ final int bitIndex = BitVectorHelper.bitIndex(index);
+ byte currentByte = PlatformDependent.getByte(validityBuffer.memoryAddress() + byteIndex);
+ final byte bitMask = (byte) (1L << bitIndex);
+ if (value != 0) {
+ currentByte |= bitMask;
+ } else {
+ currentByte -= (bitMask & currentByte);
+ }
+ PlatformDependent.putByte(validityBuffer.memoryAddress() + byteIndex, currentByte);
+ }
+
+ /**
+ * Check if a validity bit is set.
+ * @param validityBuffer the validity buffer.
+ * @param index index of the bit to check.
+ * @return 1 if the bit is set, and 0 otherwise.
+ */
+ public static int isValidityBitSet(ArrowBuf validityBuffer, int index) {
+ final int byteIndex = index >> 3;
+ final byte b = PlatformDependent.getByte(validityBuffer .memoryAddress() + byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorFactory.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorFactory.java
new file mode 100644
index 00000000000..30dda7a9825
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorFactory.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+
+/**
+ * Factory methods for vectors..
+ */
+public class VectorFactory {
+
+ /**
+ * The type of the vector.
+ */
+ public enum VectorType {
+ /**
+ * This type of vectors will do all checks, so segmentation faults can be avoided,
+ * but the performance may not be good.
+ */
+ SAFE,
+
+ /**
+ * This type of vectors will try to avoid checks, so segmentation faults can happen,
+ * but the performance can be be good.
+ */
+ UNSAFE
+ }
+
+ /**
+ * Create an empty Float8Vector.
+ * @param vectorType type of the vector to create (safe or unsafe).
+ * @param name name of the vector.
+ * @param allocator the allocator to create vector.
+ * @return the created vector.
+ */
+ public static Float8Vector createFloat8Vector(VectorType vectorType, String name, BufferAllocator allocator) {
+ switch (vectorType) {
+ case SAFE:
+ return new org.apache.arrow.vector.Float8Vector(name, allocator);
+ case UNSAFE:
+ return new org.apache.arrow.vector.unsafe.Float8Vector(name, allocator);
+ default:
+ throw new IllegalArgumentException("Unknown vector type for Float8Vector");
+ }
+ }
+
+ /**
+ * Create an empty IntVector.
+ * @param vectorType type of the vector to create (safe or unsafe).
+ * @param name name of the vector.
+ * @param allocator the allocator to create vector.
+ * @return the created vector.
+ */
+ public static IntVector createIntVector(VectorType vectorType, String name, BufferAllocator allocator) {
+ switch (vectorType) {
+ case SAFE:
+ return new org.apache.arrow.vector.IntVector(name, allocator);
+ case UNSAFE:
+ return new org.apache.arrow.vector.unsafe.IntVector(name, allocator);
+ default:
+ throw new IllegalArgumentException("Unknown vector type for IntVector");
+ }
+ }
+}
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index f0cc4c4b882..9776f6832f3 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -43,19 +43,33 @@
import org.apache.arrow.vector.util.OversizedAllocationException;
import org.apache.arrow.vector.util.Text;
import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.VectorFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
import io.netty.buffer.ArrowBuf;
-
+@RunWith(Parameterized.class)
public class TestValueVector {
private static final String EMPTY_SCHEMA_PATH = "";
private BufferAllocator allocator;
+ private VectorFactory.VectorType vectorType;
+
+ public TestValueVector(VectorFactory.VectorType vectorType) {
+ this.vectorType = vectorType;
+ }
+
+ @Parameterized.Parameters(name = "vector type: {0}")
+ public static List