From 15fde9d2b29612328e4ee5349fc351dc86a27f04 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Fri, 9 Sep 2016 11:59:59 -0700 Subject: [PATCH 1/2] ARROW-287: Make nullable vectors use a BitVecor instead of UInt1Vector for bits --- .../codegen/templates/NullableValueVectors.java | 6 +++--- .../java/org/apache/arrow/vector/BitVector.java | 16 ++++++++++++++-- .../apache/arrow/vector/complex/ListVector.java | 6 +++--- .../arrow/vector/complex/NullableMapVector.java | 8 ++++---- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index bb2c0012160..486cfeefc7a 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -53,7 +53,7 @@ public final class ${className} extends BaseDataValueVector implements <#if type private final String valuesField = "$values$"; private final Field field; - final UInt1Vector bits = new UInt1Vector(bitsField, allocator); + final BitVector bits = new BitVector(bitsField, allocator); final ${valuesName} values; private final Mutator mutator; @@ -446,7 +446,7 @@ public void copyFromSafe(int fromIndex, int thisIndex, Nullable${minor.class}Vec } public final class Accessor extends BaseDataValueVector.BaseAccessor <#if type.major = "VarLen">implements VariableWidthVector.VariableWidthAccessor { - final UInt1Vector.Accessor bAccessor = bits.getAccessor(); + final BitVector.Accessor bAccessor = bits.getAccessor(); final ${valuesName}.Accessor vAccessor = values.getAccessor(); /** @@ -545,7 +545,7 @@ public void setIndexDefined(int index){ public void set(int index, <#if type.major == "VarLen">byte[]<#elseif (type.width < 4)>int<#else>${minor.javaType!type.javaType} value) { setCount++; final ${valuesName}.Mutator valuesMutator = values.getMutator(); - final UInt1Vector.Mutator bitsMutator = bits.getMutator(); + final BitVector.Mutator bitsMutator = bits.getMutator(); <#if type.major == "VarLen"> for (int i = lastSet + 1; i < index; i++) { valuesMutator.set(i, emptyByteArray); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java index fee6e9cdef7..c12db5045c2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java @@ -17,8 +17,6 @@ */ package org.apache.arrow.vector; -import io.netty.buffer.ArrowBuf; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -29,6 +27,8 @@ import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.arrow.vector.util.TransferPair; +import io.netty.buffer.ArrowBuf; + /** * Bit implements a vector of bit-width values. Elements in the vector are accessed by position from the logical start * of the vector. The width of each element is 1 bit. The equivalent Java primitive is an int containing the value '0' @@ -435,6 +435,18 @@ public final void generateTestData(int values) { setValueCount(values); } + public void generateTestDataAlt(int size) { + setValueCount(size); + boolean even = true; + final int valueCount = getAccessor().getValueCount(); + for(int i = 0; i < valueCount; i++, even = !even) { + if(even){ + set(i, (byte) 1); + }else{ + set(i, (byte) 0); + } + } + } } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 2984c362514..dd99c734f7f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -28,9 +28,9 @@ import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.AddOrGetResult; import org.apache.arrow.vector.BaseDataValueVector; +import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; @@ -55,7 +55,7 @@ public class ListVector extends BaseRepeatedValueVector implements FieldVector { final UInt4Vector offsets; - final UInt1Vector bits; + final BitVector bits; private final List innerVectors; private Mutator mutator = new Mutator(); private Accessor accessor = new Accessor(); @@ -65,7 +65,7 @@ public class ListVector extends BaseRepeatedValueVector implements FieldVector { public ListVector(String name, BufferAllocator allocator, CallBack callBack) { super(name, allocator); - this.bits = new UInt1Vector("$bits$", allocator); + this.bits = new BitVector("$bits$", allocator); this.offsets = getOffsetVector(); this.innerVectors = Collections.unmodifiableList(Arrays.asList(bits, offsets)); this.writer = new UnionListWriter(this); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java index 6b257c095d2..8e1bbfabdc9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java @@ -25,10 +25,10 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.BaseDataValueVector; +import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.NullableVectorDefinitionSetter; -import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.impl.NullableMapReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -45,7 +45,7 @@ public class NullableMapVector extends MapVector implements FieldVector { private final NullableMapReaderImpl reader = new NullableMapReaderImpl(this); - protected final UInt1Vector bits; + protected final BitVector bits; private final List innerVectors; @@ -54,7 +54,7 @@ public class NullableMapVector extends MapVector implements FieldVector { public NullableMapVector(String name, BufferAllocator allocator, CallBack callBack) { super(name, checkNotNull(allocator), callBack); - this.bits = new UInt1Vector("$bits$", allocator); + this.bits = new BitVector("$bits$", allocator); this.innerVectors = Collections.unmodifiableList(Arrays.asList(bits)); this.accessor = new Accessor(); this.mutator = new Mutator(); @@ -186,7 +186,7 @@ public boolean allocateNewSafe() { return success; } public final class Accessor extends MapVector.Accessor { - final UInt1Vector.Accessor bAccessor = bits.getAccessor(); + final BitVector.Accessor bAccessor = bits.getAccessor(); @Override public Object getObject(int index) { From d4e5084411841896d6777553ea57194a5dc2c1fd Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Tue, 13 Sep 2016 09:29:02 -0700 Subject: [PATCH 2/2] add nullable vector test that verifies Bit based buffers --- .../apache/arrow/vector/TestValueVector.java | 67 +++++++++++++++++-- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 21cdc4f4d8d..124452e96ee 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -17,17 +17,23 @@ */ package org.apache.arrow.vector; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.nio.charset.Charset; +import java.util.List; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.schema.TypeLayout; import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.types.pojo.Field; import org.junit.After; import org.junit.Before; import org.junit.Test; -import java.nio.charset.Charset; - -import static org.junit.Assert.*; +import io.netty.buffer.ArrowBuf; public class TestValueVector { @@ -223,6 +229,59 @@ public void testNullableFloat() { } } + @Test + public void testNullableInt() { + // Create a new value vector for 1024 integers + try (final NullableIntVector vector = (NullableIntVector) MinorType.INT.getNewVector(EMPTY_SCHEMA_PATH, allocator, null)) { + final NullableIntVector.Mutator m = vector.getMutator(); + vector.allocateNew(1024); + + // Put and set a few values. + m.set(0, 1); + m.set(1, 2); + m.set(100, 3); + m.set(1022, 4); + m.set(1023, 5); + + m.setValueCount(1024); + + final NullableIntVector.Accessor accessor = vector.getAccessor(); + assertEquals(1, accessor.get(0)); + assertEquals(2, accessor.get(1)); + assertEquals(3, accessor.get(100)); + assertEquals(4, accessor.get(1022)); + assertEquals(5, accessor.get(1023)); + + // Ensure null values. + assertTrue(vector.getAccessor().isNull(3)); + + Field field = vector.getField(); + TypeLayout typeLayout = field.getTypeLayout(); + + List buffers = vector.getFieldBuffers(); + + assertEquals(2, typeLayout.getVectors().size()); + assertEquals(2, buffers.size()); + + ArrowBuf validityVectorBuf = buffers.get(0); + assertEquals(128, validityVectorBuf.readableBytes()); + assertEquals(3, validityVectorBuf.getByte(0)); // 1st and second bit defined + for (int i = 1; i < 12; i++) { + assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined until 100 + } + assertEquals(16, validityVectorBuf.getByte(12)); // 100th bit is defined (12 * 8 + 4) + for (int i = 13; i < 127; i++) { + assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined between 100th and 1022nd + } + assertEquals(-64, validityVectorBuf.getByte(127)); // 1022nd and 1023rd bit defined + + vector.allocateNew(2048); + // vector has been erased + assertTrue(vector.getAccessor().isNull(0)); + } + } + + @Test public void testBitVector() { // Create a new value vector for 1024 integers