From 7fe58d53870631405d27eb22bf8ea7ad641f805b Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Wed, 17 Apr 2024 15:20:34 +0530 Subject: [PATCH 01/34] feat: adding initial interface for ListView --- .../complex/BaseRepeatedValueViewVector.java | 143 ++++++++ .../arrow/vector/complex/ListViewVector.java | 309 ++++++++++++++++++ 2 files changed, 452 insertions(+) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java new file mode 100644 index 00000000000..14439b43fa4 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -0,0 +1,143 @@ +package org.apache.arrow.vector.complex; + +import java.util.Iterator; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.util.CallBack; + +public abstract class BaseRepeatedValueViewVector extends BaseValueVector + implements RepeatedValueVector, BaseListVector { + + public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; + public static final String DATA_VECTOR_NAME = "$data$"; + + public static final byte OFFSET_WIDTH = 4; + public static final byte SIZE_WIDTH = 4; + protected ArrowBuf offsetBuffer; + protected ArrowBuf sizeBuffer; + protected FieldVector vector; + protected final CallBack repeatedCallBack; + protected int valueCount; + protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; + protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH; + private final String name; + + protected String defaultDataVectorName = DATA_VECTOR_NAME; + + protected BaseRepeatedValueViewVector(String name, BufferAllocator allocator, CallBack callBack) { + this(name, allocator, DEFAULT_DATA_VECTOR, callBack); + } + + protected BaseRepeatedValueViewVector( + String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) { + super(allocator); + this.name = name; + this.offsetBuffer = allocator.getEmpty(); + this.sizeBuffer = allocator.getEmpty(); + this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null"); + this.repeatedCallBack = callBack; + this.valueCount = 0; + } + + @Override + public String getName() { + return null; + } + + @Override + public boolean allocateNewSafe() { + return false; + } + + @Override + public void reAlloc() {} + + @Override + public FieldVector getDataVector() { + return vector; + } + + @Override + public void setInitialCapacity(int numRecords) { + + } + + @Override + public void setInitialCapacity(int numRecords, double density) { + + } + + public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { + + } + + @Override + public int getValueCapacity() { + return 0; + } + + @Override + public int getBufferSize() { + return 0; + } + + @Override + public int getBufferSizeFor(int valueCount) { + return 0; + } + + @Override + public Iterator iterator() { + return null; + } + + @Override + public void clear() { + + } + + @Override + public void reset() { + + } + + @Override + public ArrowBuf[] getBuffers(boolean clear) { + return new ArrowBuf[0]; + } + + @Override + public int getValueCount() { + return valueCount; + } + + @Override + public boolean isNull(int index) { + return false; + } + + @Override + public void setValueCount(int valueCount) { + + } + + public boolean isEmpty(int index) { + return false; + } + + public int startNewValue(int index) { + return 0; + } + + @Override + @Deprecated + public UInt4Vector getOffsetVector() { + throw new UnsupportedOperationException("There is no inner offset vector"); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java new file mode 100644 index 00000000000..baaaf280d6e --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -0,0 +1,309 @@ +package org.apache.arrow.vector.complex; + +import static java.util.Collections.singletonList; + +import java.util.List; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.TransferPair; + +public class ListViewVector extends BaseRepeatedValueViewVector implements PromotableVector { + + protected ArrowBuf validityBuffer; + protected UnionListReader reader; + private CallBack callBack; + protected Field field; + protected int validityAllocationSizeInBytes; + + /** + * The maximum index that is actually set. + */ + protected int lastSet; + + public static ListViewVector empty(String name, BufferAllocator allocator) { + return new ListViewVector(name, allocator, FieldType.nullable(ArrowType.List.INSTANCE), null); + } + + public ListViewVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { + this(new Field(name, fieldType, null), allocator, callBack); + } + + public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) { + super(field.getName(), allocator, callBack); + this.validityBuffer = allocator.getEmpty(); + this.field = field; + this.callBack = callBack; + this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); + this.lastSet = -1; + } + + + + @Override + public void initializeChildrenFromFields(List children) { + + } + + @Override + public void setInitialCapacity(int numRecords) { + + } + + @Override + public void setInitialCapacity(int numRecords, double density) { + + } + + @Override + public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { + super.setInitialTotalCapacity(numRecords, totalNumberOfElements); + } + + @Override + public List getChildrenFromFields() { + return singletonList(getDataVector()); + } + + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + + } + + @Override + public List getFieldBuffers() { + return null; + } + + @Override + public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { + + } + + @Override + public void allocateNew() throws OutOfMemoryException { + + } + + @Override + public boolean allocateNewSafe() { + boolean success = false; + return success; + } + + @Override + public void reAlloc() { + + } + + @Override + public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { + } + + @Override + public void copyFrom(int inIndex, int outIndex, ValueVector from) { + + } + + @Override + public FieldVector getDataVector() { + return vector; + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return getTransferPair(ref, allocator, null); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return getTransferPair(field, allocator, null); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + throw new UnsupportedOperationException(); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + throw new UnsupportedOperationException(); + } + + @Override + public TransferPair makeTransferPair(ValueVector target) { + throw new UnsupportedOperationException(); + } + + @Override + public long getValidityBufferAddress() { + return validityBuffer.memoryAddress(); + } + + @Override + public long getDataBufferAddress() { + throw new UnsupportedOperationException(); + } + + @Override + public long getOffsetBufferAddress() { + return offsetBuffer.memoryAddress(); + } + + @Override + public ArrowBuf getValidityBuffer() { + return validityBuffer; + } + + @Override + public ArrowBuf getDataBuffer() { + throw new UnsupportedOperationException(); + } + + @Override + public ArrowBuf getOffsetBuffer() { + return offsetBuffer; + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return 0; + } + + @Override + public OUT accept(VectorVisitor visitor, IN value) { + throw new UnsupportedOperationException(); + } + + @Override + protected FieldReader getReaderImpl() { + throw new UnsupportedOperationException(); + } + + @Override + public UnionListReader getReader() { + throw new UnsupportedOperationException(); + } + + @Override + public int getBufferSize() { + return 0; + } + + @Override + public int getBufferSizeFor(int valueCount) { + return 0; + } + + @Override + public Field getField() { return null; } + + @Override + public MinorType getMinorType() { + return MinorType.LIST; + } + + @Override + public void clear() { + } + + @Override + public void reset() { + } + + @Override + public ArrowBuf[] getBuffers(boolean clear) { + return new ArrowBuf[0]; + } + + @Override + public List getObject(int index) { + return null; + } + + @Override + public boolean isNull(int index) { + return false; + } + + @Override + public boolean isEmpty(int index) { + return false; + } + + @Override + public int getNullCount() { + return 0; + } + + @Override + public int getValueCapacity() { + return 0; + } + + @Override + public void setNull(int index) { + + } + + @Override + public int startNewValue(int index) { + return 0; + } + + @Override + public void setValueCount(int valueCount) { + + } + + @Override + public int getElementStartIndex(int index) { + return 0; + } + + @Override + public int getElementEndIndex(int index) { + return 0; + } + + @Override + public AddOrGetResult addOrGetVector(FieldType type) { + return null; + } + + @Override + public UnionVector promoteToUnion() { + return null; + } + + @Deprecated + @Override + public List getFieldInnerVectors() { + throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); + } + + +} From 36dae51c6c57af535a785016d814021b7421be29 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Thu, 18 Apr 2024 17:22:01 +0530 Subject: [PATCH 02/34] feat: adding non-functional interfaces to get a functional build --- .../binder/ColumnBinderArrowTypeVisitor.java | 5 +++ .../jdbc/utils/AvaticaParameterBinder.java | 5 +++ .../arrow/driver/jdbc/utils/ConvertUtils.java | 5 +++ .../src/main/codegen/data/ArrowTypes.tdd | 7 ++- .../codegen/templates/UnionListWriter.java | 2 +- .../org/apache/arrow/vector/BufferLayout.java | 13 +++++- .../org/apache/arrow/vector/TypeLayout.java | 20 ++++++++- .../complex/BaseRepeatedValueViewVector.java | 21 ++++++++- .../arrow/vector/complex/ListViewVector.java | 43 ++++++++++++++++--- .../vector/complex/impl/PromotableWriter.java | 26 +++++++++++ .../org/apache/arrow/vector/types/Types.java | 21 +++++++++ 11 files changed, 156 insertions(+), 12 deletions(-) diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java index 7d50676688e..7420a8c23dd 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java @@ -256,4 +256,9 @@ public ColumnBinder visit(ArrowType.Interval type) { public ColumnBinder visit(ArrowType.Duration type) { throw new UnsupportedOperationException("No column binder implemented for type " + type); } + + @Override + public ColumnBinder visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("No column binder implemented for type " + type); + } } diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java index fd9127c2269..70a58ff440e 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java @@ -254,6 +254,11 @@ public Boolean visit(ArrowType.Interval type) { public Boolean visit(ArrowType.Duration type) { return new DurationAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); } + + @Override + public Boolean visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("Binding is not yet supported for type " + type); + } } } diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java index 93b5faaef32..6ec33fafcfa 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java @@ -274,6 +274,11 @@ public AvaticaParameter visit(ArrowType.Interval type) { public AvaticaParameter visit(ArrowType.Duration type) { return new DurationAvaticaParameterConverter(type).createParameter(field); } + + @Override + public AvaticaParameter visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("AvaticaParameter not yet supported for type " + type); + } } } diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 9fe40f2319b..99578bd5031 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -129,6 +129,11 @@ name: "Duration", fields: [{name: "unit", type: short, valueType: TimeUnit}], complex: false - } + }, + { + name: "ListView", + fields: [], + complex: true + }, ] } diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 5c0565ee271..9d04d87d871 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -26,7 +26,7 @@ import java.math.BigDecimal; <@pp.dropOutputFile /> -<#list ["List", "LargeList"] as listName> +<#list ["List", "ListView", "LargeList"] as listName> <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" /> diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java index 9725693348a..4eeb92a0c91 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java @@ -28,12 +28,18 @@ public class BufferLayout { /** * Enumeration of the different logical types a buffer can have. + * Data buffer is common to most of the layouts. + * Offset buffer is used for variable width types. + * Validity buffer is used for nullable types. + * Type buffer is used for Union types. + * Size buffer is used for ListView and LargeListView types. */ public enum BufferType { DATA("DATA"), OFFSET("OFFSET"), VALIDITY("VALIDITY"), - TYPE("TYPE_ID"); + TYPE("TYPE_ID"), + SIZE("SIZE"); private final String name; @@ -57,6 +63,7 @@ public String getName() { private static final BufferLayout VALUES_32 = new BufferLayout(BufferType.DATA, 32); private static final BufferLayout VALUES_16 = new BufferLayout(BufferType.DATA, 16); private static final BufferLayout VALUES_8 = new BufferLayout(BufferType.DATA, 8); + private static final BufferLayout SIZE_BUFFER = new BufferLayout(BufferType.SIZE, 32); public static BufferLayout typeBuffer() { return TYPE_BUFFER; @@ -70,6 +77,10 @@ public static BufferLayout largeOffsetBuffer() { return LARGE_OFFSET_BUFFER; } + public static BufferLayout sizeBuffer() { + return SIZE_BUFFER; + } + /** * Returns a databuffer for the given bitwidth. Only supports powers of two between 8 and 128 * inclusive. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index 18032528c86..ea92efdc55f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -101,7 +101,7 @@ public TypeLayout visit(Timestamp type) { } @Override - public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + public TypeLayout visit(ArrowType.List type) { List vectors = asList( BufferLayout.validityVector(), BufferLayout.offsetBuffer() @@ -109,6 +109,16 @@ public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) return new TypeLayout(vectors); } + @Override + public TypeLayout visit(ArrowType.ListView type) { + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.offsetBuffer(), + BufferLayout.sizeBuffer() + ); + return new TypeLayout(vectors); + } + @Override public TypeLayout visit(ArrowType.LargeList type) { List vectors = asList( @@ -312,11 +322,17 @@ public Integer visit(Timestamp type) { } @Override - public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + public Integer visit(ArrowType.List type) { // validity buffer + offset buffer return 2; } + @Override + public Integer visit(ArrowType.ListView type) { + // validity buffer + offset buffer + size buffer + return 3; + } + @Override public Integer visit(ArrowType.LargeList type) { // validity buffer + offset buffer diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index 14439b43fa4..4f083207973 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -1,6 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.arrow.vector.complex; import java.util.Iterator; + import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; @@ -56,7 +74,8 @@ public boolean allocateNewSafe() { } @Override - public void reAlloc() {} + public void reAlloc() { + } @Override public FieldVector getDataVector() { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index baaaf280d6e..f53f74e8dc9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -1,19 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.arrow.vector.complex; import static java.util.Collections.singletonList; import java.util.List; + import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.UnionListReader; @@ -43,11 +57,26 @@ public static ListViewVector empty(String name, BufferAllocator allocator) { return new ListViewVector(name, allocator, FieldType.nullable(ArrowType.List.INSTANCE), null); } + /** + * Constructs a new instance. + * + * @param name The name of the instance. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param fieldType The type of this list. + * @param callBack A schema change callback. + */ public ListViewVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { this(new Field(name, fieldType, null), allocator, callBack); } - public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) { + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param callBack A schema change callback. + */ + public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) { super(field.getName(), allocator, callBack); this.validityBuffer = allocator.getEmpty(); this.field = field; @@ -219,7 +248,9 @@ public int getBufferSizeFor(int valueCount) { } @Override - public Field getField() { return null; } + public Field getField() { + return null; + } @Override public MinorType getMinorType() { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index 7f724829ef1..a379156bb72 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -29,6 +29,7 @@ import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -54,6 +55,7 @@ public class PromotableWriter extends AbstractPromotableFieldWriter { private final AbstractStructVector parentContainer; private final ListVector listVector; + private final ListViewVector listViewVector; private final FixedSizeListVector fixedListVector; private final LargeListVector largeListVector; private final NullableStructWriterFactory nullableStructWriterFactory; @@ -94,6 +96,7 @@ public PromotableWriter( NullableStructWriterFactory nullableStructWriterFactory) { this.parentContainer = parentContainer; this.listVector = null; + this.listViewVector = null; this.fixedListVector = null; this.largeListVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; @@ -142,6 +145,27 @@ public PromotableWriter( ListVector listVector, NullableStructWriterFactory nullableStructWriterFactory) { this.listVector = listVector; + this.listViewVector = null; + this.parentContainer = null; + this.fixedListVector = null; + this.largeListVector = null; + this.nullableStructWriterFactory = nullableStructWriterFactory; + init(v); + } + + /** + * Constructs a new instance. + * + * @param v The vector to initialize the writer with. + * @param listViewVector The vector that serves as a parent of v. + * @param nullableStructWriterFactory The factory to create the delegate writer. + */ + public PromotableWriter( + ValueVector v, + ListViewVector listViewVector, + NullableStructWriterFactory nullableStructWriterFactory) { + this.listViewVector = listViewVector; + this.listVector = null; this.parentContainer = null; this.fixedListVector = null; this.largeListVector = null; @@ -163,6 +187,7 @@ public PromotableWriter( this.fixedListVector = fixedListVector; this.parentContainer = null; this.listVector = null; + this.listViewVector = null; this.largeListVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); @@ -183,6 +208,7 @@ public PromotableWriter( this.fixedListVector = null; this.parentContainer = null; this.listVector = null; + this.listViewVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 89d8441d42a..e10a65e3b2c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -71,6 +71,7 @@ import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -136,6 +137,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary; import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8; import org.apache.arrow.vector.types.pojo.ArrowType.List; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Null; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; @@ -692,6 +694,20 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new UnionListWriter((ListVector) vector); } }, + LISTVIEW(ListView.INSTANCE) { + @Override + public FieldVector getNewVector( + Field field, + BufferAllocator allocator, + CallBack schemaChangeCallback) { + return new ListViewVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new UnionListWriter((ListVector) vector); + } + }, LARGELIST(ArrowType.LargeList.INSTANCE) { @Override public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { @@ -1064,6 +1080,11 @@ public MinorType visit(Duration type) { return MinorType.DURATION; } + @Override + public MinorType visit(ListView type) { + return MinorType.LISTVIEW; + } + @Override public MinorType visit(ExtensionType type) { return MinorType.EXTENSIONTYPE; From 4c50a783e0af5278ed10d13076c534474e905788 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 23 Apr 2024 19:03:05 +0530 Subject: [PATCH 03/34] feat: initial functional listview alpha version --- .../codegen/templates/UnionListWriter.java | 22 ++ .../complex/BaseRepeatedValueViewVector.java | 145 ++++++++++++- .../arrow/vector/complex/ListViewVector.java | 198 +++++++++++++++++- .../vector/complex/impl/PromotableWriter.java | 4 + .../arrow/vector/TestListViewVector.java | 138 ++++++++++++ 5 files changed, 497 insertions(+), 10 deletions(-) create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 9d04d87d871..eeb964c055f 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -59,6 +59,10 @@ public class Union${listName}Writer extends AbstractFieldWriter { private static final int OFFSET_WIDTH = 4; + <#if listName = "ListView"> + private static final long SIZE_WIDTH = 4; + + public Union${listName}Writer(${listName}Vector vector) { this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); } @@ -193,6 +197,24 @@ public void endList() { setPosition(idx() + 1); listStarted = false; } + <#elseif listName == "ListView"> + @Override + public void startList() { + vector.startNewValue(idx()); + writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH)); + listStarted = true; + } + + @Override + public void endList() { + int sizeUptoIdx = 0; + for (int i = 0; i < idx(); i++) { + sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH); + } + vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx); + setPosition(idx() + 1); + listStarted = false; + } <#else> @Override public void startList() { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index 4f083207973..01b326749a3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -17,17 +17,26 @@ package org.apache.arrow.vector.complex; +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; + import java.util.Iterator; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.AddOrGetResult; import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.util.SchemaChangeRuntimeException; public abstract class BaseRepeatedValueViewVector extends BaseValueVector implements RepeatedValueVector, BaseListVector { @@ -70,11 +79,100 @@ public String getName() { @Override public boolean allocateNewSafe() { - return false; + boolean dataAlloc = false; + try { + offsetBuffer = allocateOffsetBuffer(offsetAllocationSizeInBytes); + sizeBuffer = allocateSizeBuffer(sizeAllocationSizeInBytes); + dataAlloc = vector.allocateNewSafe(); + } catch (Exception e) { + e.printStackTrace(); + clear(); + return false; + } finally { + if (!dataAlloc) { + clear(); + } + } + return dataAlloc; + } + + protected ArrowBuf allocateOffsetBuffer(final long size) { + final int curSize = (int) size; + ArrowBuf offsetBuffer = allocator.buffer(curSize); + offsetBuffer.readerIndex(0); + offsetAllocationSizeInBytes = curSize; + offsetBuffer.setZero(0, offsetBuffer.capacity()); + return offsetBuffer; + } + + protected ArrowBuf allocateSizeBuffer(final long size) { + final int curSize = (int) size; + ArrowBuf sizeBuffer = allocator.buffer(curSize); + sizeBuffer.readerIndex(0); + sizeAllocationSizeInBytes = curSize; + sizeBuffer.setZero(0, sizeBuffer.capacity()); + return sizeBuffer; } @Override public void reAlloc() { + reallocOffsetBuffer(); + reallocSizeBuffer(); + vector.reAlloc(); + } + + protected void reallocOffsetBuffer() { + final long currentBufferCapacity = offsetBuffer.capacity(); + long newAllocationSize = currentBufferCapacity * 2; + if (newAllocationSize == 0) { + if (offsetAllocationSizeInBytes > 0) { + newAllocationSize = offsetAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; + } + } + + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + offsetBuffer.getReferenceManager().release(1); + offsetBuffer = newBuf; + offsetAllocationSizeInBytes = newAllocationSize; + } + + protected void reallocSizeBuffer() { + final long currentBufferCapacity = sizeBuffer.capacity(); + long newAllocationSize = currentBufferCapacity * 2; + if (newAllocationSize == 0) { + if (sizeAllocationSizeInBytes > 0) { + newAllocationSize = sizeAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH * 2; + } + } + + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.min(newAllocationSize, (long) SIZE_WIDTH * Integer.MAX_VALUE); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= sizeBuffer.capacity()) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, sizeBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + sizeBuffer.getReferenceManager().release(1); + sizeBuffer = newBuf; + sizeAllocationSizeInBytes = newAllocationSize; } @Override @@ -101,6 +199,14 @@ public int getValueCapacity() { return 0; } + protected int getOffsetBufferValueCapacity() { + return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH); + } + + protected int getSizeBufferValueCapacity() { + return capAtMaxInt(sizeBuffer.capacity() / SIZE_WIDTH); + } + @Override public int getBufferSize() { return 0; @@ -118,7 +224,11 @@ public Iterator iterator() { @Override public void clear() { - + offsetBuffer = releaseBuffer(offsetBuffer); + sizeBuffer = releaseBuffer(sizeBuffer); + vector.clear(); + valueCount = 0; + super.clear(); } @Override @@ -146,6 +256,37 @@ public void setValueCount(int valueCount) { } + /** + * Initialize the data vector (and execute callback) if it hasn't already been done, + * returns the data vector. + */ + public AddOrGetResult addOrGetVector(FieldType fieldType) { + boolean created = false; + if (vector instanceof NullVector) { + vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, repeatedCallBack); + // returned vector must have the same field + created = true; + if (repeatedCallBack != null && + // not a schema change if changing from ZeroVector to ZeroVector + (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) { + repeatedCallBack.doWork(); + } + } + + if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { + final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]", + fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); + throw new SchemaChangeRuntimeException(msg); + } + + return new AddOrGetResult<>((T) vector, created); + } + + protected void replaceDataVector(FieldVector v) { + vector.clear(); + vector = v; + } + public boolean isEmpty(int index) { return false; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index f53f74e8dc9..5cee7a1bfc1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -18,19 +18,24 @@ package org.apache.arrow.vector.complex; import static java.util.Collections.singletonList; +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; import java.util.List; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.Types.MinorType; @@ -38,6 +43,8 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.JsonStringArrayList; +import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.arrow.vector.util.TransferPair; public class ListViewVector extends BaseRepeatedValueViewVector implements PromotableVector { @@ -54,7 +61,7 @@ public class ListViewVector extends BaseRepeatedValueViewVector implements Promo protected int lastSet; public static ListViewVector empty(String name, BufferAllocator allocator) { - return new ListViewVector(name, allocator, FieldType.nullable(ArrowType.List.INSTANCE), null); + return new ListViewVector(name, allocator, FieldType.nullable(ArrowType.ListView.INSTANCE), null); } /** @@ -129,18 +136,79 @@ public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long @Override public void allocateNew() throws OutOfMemoryException { - + if (!allocateNewSafe()) { + throw new OutOfMemoryException("Failure while allocating memory"); + } } @Override public boolean allocateNewSafe() { boolean success = false; + try { + /* release the current buffers, hence this is a new allocation */ + clear(); + /* allocate validity buffer */ + allocateValidityBuffer(validityAllocationSizeInBytes); + /* allocate offset, data and sizes buffer */ + success = super.allocateNewSafe(); + } finally { + if (!success) { + clear(); + } + } return success; } + protected void allocateValidityBuffer(final long size) { + final int curSize = (int) size; + validityBuffer = allocator.buffer(curSize); + validityBuffer.readerIndex(0); + validityAllocationSizeInBytes = curSize; + validityBuffer.setZero(0, validityBuffer.capacity()); + } + @Override public void reAlloc() { + /* reallocate the validity buffer */ + reallocValidityBuffer(); + /* reallocate the offset, size, and data */ + super.reAlloc(); + } + + protected void reallocValidityAndSizeAndOffsetBuffers() { + reallocOffsetBuffer(); + reallocValidityBuffer(); + reallocSizeBuffer(); + } + + private void reallocValidityBuffer() { + final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); + long newAllocationSize = getNewAllocationSize(currentBufferCapacity); + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + validityBuffer.getReferenceManager().release(1); + validityBuffer = newBuf; + validityAllocationSizeInBytes = (int) newAllocationSize; + } + + private long getNewAllocationSize(int currentBufferCapacity) { + long newAllocationSize = currentBufferCapacity * 2L; + if (newAllocationSize == 0) { + if (validityAllocationSizeInBytes > 0) { + newAllocationSize = validityAllocationSizeInBytes; + } else { + newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L; + } + } + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + assert newAllocationSize >= 1; + if (newAllocationSize > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + return newAllocationSize; } @Override @@ -212,6 +280,14 @@ public ArrowBuf getOffsetBuffer() { return offsetBuffer; } + public ArrowBuf getSizeBuffer() { + return sizeBuffer; + } + + public long getSizeBufferAddress() { + return sizeBuffer.memoryAddress(); + } + @Override public int hashCode(int index) { return hashCode(index, null); @@ -259,6 +335,9 @@ public MinorType getMinorType() { @Override public void clear() { + super.clear(); + validityBuffer = releaseBuffer(validityBuffer); + lastSet = -1; } @Override @@ -272,7 +351,18 @@ public ArrowBuf[] getBuffers(boolean clear) { @Override public List getObject(int index) { - return null; + if (isSet(index) == 0) { + return null; + } + final List vals = new JsonStringArrayList<>(); + final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); + final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH); + final ValueVector vv = getDataVector(); + for (int i = start; i < end; i++) { + vals.add(vv.getObject(i)); + } + + return vals; } @Override @@ -285,6 +375,19 @@ public boolean isEmpty(int index) { return false; } + /** + * Same as {@link #isNull(int)}. + * + * @param index position of element + * @return 1 if element at given index is not null, 0 otherwise + */ + public int isSet(int index) { + final int byteIndex = index >> 3; + final byte b = validityBuffer.getByte(byteIndex); + final int bitIndex = index & 7; + return (b >> bitIndex) & 0x01; + } + @Override public int getNullCount() { return 0; @@ -295,6 +398,16 @@ public int getValueCapacity() { return 0; } + private int getValidityAndSizeValueCapacity() { + final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); + final int sizeValueCapacity = Math.max(getSizeBufferValueCapacity(), 0); + return Math.min(offsetValueCapacity, sizeValueCapacity); + } + + private int getValidityBufferValueCapacity() { + return capAtMaxInt(validityBuffer.capacity() * 8); + } + @Override public void setNull(int index) { @@ -302,12 +415,58 @@ public void setNull(int index) { @Override public int startNewValue(int index) { - return 0; + while (index >= getValidityAndSizeValueCapacity()) { + reallocValidityAndSizeAndOffsetBuffers(); + } + + if (lastSet >= index) { + lastSet = index - 1; + } + + if (index == 0) { + offsetBuffer.setInt(0, 0); + } else if (index > lastSet) { + for (int i = lastSet + 1; i <= index; i++) { + final int lastOffSet = offsetBuffer.getInt((i - 1L) * OFFSET_WIDTH); + final int lastSize = sizeBuffer.getInt((i - 1L) * SIZE_WIDTH); + final int newOffSet = lastOffSet + lastSize; + offsetBuffer.setInt(i * OFFSET_WIDTH, newOffSet); + } + } else { + final int lastOffset = offsetBuffer.getInt(lastSet * OFFSET_WIDTH); + final int lastSize = sizeBuffer.getInt(lastSet * SIZE_WIDTH); + final int newOffSet = lastOffset + lastSize; + offsetBuffer.setInt((lastSet + 1) * OFFSET_WIDTH, newOffSet); + } + + BitVectorHelper.setBit(validityBuffer, index); + lastSet = index; + return offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH); + } + + private int getLengthOfChildVector() { + int length = 0; + for (int i = 0; i <= lastSet + 1; i++) { + length += sizeBuffer.getInt(i * SIZE_WIDTH); + } + return length; } @Override public void setValueCount(int valueCount) { - + this.valueCount = valueCount; + if (valueCount > 0) { + while (valueCount > getValidityAndSizeValueCapacity()) { + /* check if validity and offset buffers need to be re-allocated */ + reallocValidityAndSizeAndOffsetBuffers(); + } + } + /* valueCount for the data vector is the current end offset */ + final int childValueCount = (valueCount == 0) ? 0 : getLengthOfChildVector(); + /* set the value count of data vector and this will take care of + * checking whether data buffer needs to be reallocated. + */ + vector.setValueCount(childValueCount); } @Override @@ -321,13 +480,25 @@ public int getElementEndIndex(int index) { } @Override - public AddOrGetResult addOrGetVector(FieldType type) { - return null; + public AddOrGetResult addOrGetVector(FieldType fieldType) { + AddOrGetResult result = super.addOrGetVector(fieldType); + invalidateReader(); + return result; } @Override public UnionVector promoteToUnion() { - return null; + UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack); + replaceDataVector(vector); + invalidateReader(); + if (callBack != null) { + callBack.doWork(); + } + return vector; + } + + protected void invalidateReader() { + reader = null; } @Deprecated @@ -336,5 +507,16 @@ public List getFieldInnerVectors() { throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); } + public UnionListViewWriter getWriter() { + return new UnionListViewWriter(this); + } + public int getLastSet() { + return lastSet; + } + + @Override + public int getValueCount() { + return valueCount; + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index a379156bb72..c59b997286d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -306,6 +306,8 @@ protected FieldWriter getWriter(MinorType type, ArrowType arrowType) { v = listVector.addOrGetVector(fieldType).getVector(); } else if (fixedListVector != null) { v = fixedListVector.addOrGetVector(fieldType).getVector(); + } else if (listViewVector != null) { + v = listViewVector.addOrGetVector(fieldType).getVector(); } else { v = largeListVector.addOrGetVector(fieldType).getVector(); } @@ -348,6 +350,8 @@ private FieldWriter promoteToUnion() { unionVector = fixedListVector.promoteToUnion(); } else if (largeListVector != null) { unionVector = largeListVector.promoteToUnion(); + } else if (listViewVector != null) { + unionVector = listViewVector.promoteToUnion(); } unionVector.addVector((FieldVector) tp.getTo()); writer = new UnionWriter(unionVector, nullableStructWriterFactory); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java new file mode 100644 index 00000000000..697ce171037 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; +import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestListViewVector { + + private BufferAllocator allocator; + + @Before + public void init() { + allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); + } + + @After + public void terminate() throws Exception { + allocator.close(); + } + + @Test + public void testNestedListVector() { + try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { + + UnionListWriter listWriter = listVector.getWriter(); + + /* allocate memory */ + listWriter.allocate(); + + /* the dataVector that backs a listVector will also be a + * listVector for this test. + */ + + /* write one or more inner lists at index 0 */ + listWriter.setPosition(0); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(12); + listWriter.bigInt().writeBigInt(-7); + listWriter.bigInt().writeBigInt(25); + listWriter.endList(); + + listWriter.setValueCount(1); + listVector.setValueCount(1); + + System.out.println(listVector); + } + + } + + @Test + public void testBasicListViewVector() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* write the first list at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(-7); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + /* the second list at index 2 is null (we are not setting any)*/ + + /* write the third list at index 2 */ + listViewWriter.setPosition(2); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(0); + listViewWriter.bigInt().writeBigInt(-127); + listViewWriter.bigInt().writeBigInt(127); + listViewWriter.bigInt().writeBigInt(50); + listViewWriter.endList(); + + /* write the fourth list at index 3 (empty list) */ + listViewWriter.setPosition(3); + listViewWriter.startList(); + listViewWriter.endList(); + + listViewWriter.setPosition(4); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(1); + listViewWriter.bigInt().writeBigInt(2); + listViewWriter.bigInt().writeBigInt(3); + listViewWriter.bigInt().writeBigInt(4); + listViewWriter.endList(); + + // assertEquals(3, listViewVector.getLastSet()); + + listViewVector.setValueCount(5); + // assertEquals(4, listViewVector.getValueCount()); + /* get vector at index 0 -- the value is a BigIntVector*/ + ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + FieldVector dataVec = listViewVector.getDataVector(); + + for (int i = 0; i < dataVec.getValueCount(); i++) { + Object o1 = dataVec.getObject(i); + System.out.println(i + " : " + o1); + } + + for (int i = 0; i < 5; i++) { + System.out.println("Index: " + i + " Offset: " + offSetBuffer.getInt(i * 4) + + " Size: " + sizeBuffer.getInt(i * 4)); + } + + System.out.println(listViewVector); + } + } +} From 222c72a1bc35ad13c34c573cfcb116b1ed423dd3 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Thu, 25 Apr 2024 06:24:25 +0530 Subject: [PATCH 04/34] feat: adding nested listview test --- .../arrow/vector/complex/ListViewVector.java | 2 +- .../arrow/vector/TestListViewVector.java | 164 ++++++++++++++++-- 2 files changed, 154 insertions(+), 12 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 5cee7a1bfc1..cce0ad46f69 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -367,7 +367,7 @@ public List getObject(int index) { @Override public boolean isNull(int index) { - return false; + return (isSet(index) == 0); } @Override diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 697ce171037..96a45fd421b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -17,8 +17,14 @@ package org.apache.arrow.vector; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + +import java.util.ArrayList; + import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; @@ -71,6 +77,124 @@ public void testNestedListVector() { } + @Test + public void testNestedListViewVector() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* the dataVector that backs a listVector will also be a + * listVector for this test. + */ + + /* write one or more inner lists at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(50); + listViewWriter.list().bigInt().writeBigInt(100); + listViewWriter.list().bigInt().writeBigInt(200); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(75); + listViewWriter.list().bigInt().writeBigInt(125); + listViewWriter.list().bigInt().writeBigInt(150); + listViewWriter.list().bigInt().writeBigInt(175); + listViewWriter.list().endList(); + + listViewWriter.endList(); + + /* write one or more inner lists at index 1 */ + listViewWriter.setPosition(1); + listViewWriter.startList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(10); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(15); + listViewWriter.list().bigInt().writeBigInt(20); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(25); + listViewWriter.list().bigInt().writeBigInt(30); + listViewWriter.list().bigInt().writeBigInt(35); + listViewWriter.list().endList(); + + listViewWriter.endList(); + + assertEquals(1, listViewVector.getLastSet()); + + listViewVector.setValueCount(2); + + // [[[50,100,200],[75,125,150,175]], [[10],[15,20],[25,30,35]]] + + assertEquals(2, listViewVector.getValueCount()); + + /* get listViewVector value at index 0 -- the value itself is a listViewVector */ + Object result = listViewVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(50), list.get(0)); + assertEquals(Long.valueOf(100), list.get(1)); + assertEquals(Long.valueOf(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(75), list.get(0)); + assertEquals(Long.valueOf(125), list.get(1)); + assertEquals(Long.valueOf(150), list.get(2)); + assertEquals(Long.valueOf(175), list.get(3)); + + /* get listViewVector value at index 1 -- the value itself is a listViewVector */ + result = listViewVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ + assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(10), list.get(0)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(15), list.get(0)); + assertEquals(Long.valueOf(20), list.get(1)); + + list = resultSet.get(2); + assertEquals(Long.valueOf(25), list.get(0)); + assertEquals(Long.valueOf(30), list.get(1)); + assertEquals(Long.valueOf(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listViewVector.isNull(0)); + assertFalse(listViewVector.isNull(1)); + + ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + } + } + @Test public void testBasicListViewVector() { try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { @@ -105,6 +229,7 @@ public void testBasicListViewVector() { listViewWriter.startList(); listViewWriter.endList(); + /* write the fifth list at index 4 */ listViewWriter.setPosition(4); listViewWriter.startList(); listViewWriter.bigInt().writeBigInt(1); @@ -113,26 +238,43 @@ public void testBasicListViewVector() { listViewWriter.bigInt().writeBigInt(4); listViewWriter.endList(); - // assertEquals(3, listViewVector.getLastSet()); + assertEquals(4, listViewVector.getLastSet()); listViewVector.setValueCount(5); - // assertEquals(4, listViewVector.getValueCount()); + // check value count + assertEquals(5, listViewVector.getValueCount()); + /* get vector at index 0 -- the value is a BigIntVector*/ ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); FieldVector dataVec = listViewVector.getDataVector(); - for (int i = 0; i < dataVec.getValueCount(); i++) { - Object o1 = dataVec.getObject(i); - System.out.println(i + " : " + o1); - } + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - for (int i = 0; i < 5; i++) { - System.out.println("Index: " + i + " Offset: " + offSetBuffer.getInt(i * 4) + - " Size: " + sizeBuffer.getInt(i * 4)); - } + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - System.out.println(listViewVector); + // check data vector + assertEquals(12, ((BigIntVector) dataVec).get(0)); + assertEquals(-7, ((BigIntVector) dataVec).get(1)); + assertEquals(25, ((BigIntVector) dataVec).get(2)); + assertEquals(0, ((BigIntVector) dataVec).get(3)); + assertEquals(-127, ((BigIntVector) dataVec).get(4)); + assertEquals(127, ((BigIntVector) dataVec).get(5)); + assertEquals(50, ((BigIntVector) dataVec).get(6)); + assertEquals(1, ((BigIntVector) dataVec).get(7)); + assertEquals(2, ((BigIntVector) dataVec).get(8)); + assertEquals(3, ((BigIntVector) dataVec).get(9)); + assertEquals(4, ((BigIntVector) dataVec).get(10)); } } } From 05b9d5da3a12cf43f7553f3376025010ad79fb19 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Thu, 25 Apr 2024 08:19:52 +0530 Subject: [PATCH 05/34] fix: adding test for nested list v2 --- .../complex/BaseRepeatedValueViewVector.java | 28 +++++++++++- .../arrow/vector/complex/ListViewVector.java | 10 ++++- .../arrow/vector/TestListViewVector.java | 43 ++++++++++++++++--- 3 files changed, 72 insertions(+), 9 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index 01b326749a3..c7518bb5671 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -26,7 +26,9 @@ import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.UInt4Vector; @@ -74,7 +76,7 @@ protected BaseRepeatedValueViewVector( @Override public String getName() { - return null; + return name; } @Override @@ -182,7 +184,13 @@ public FieldVector getDataVector() { @Override public void setInitialCapacity(int numRecords) { - + offsetAllocationSizeInBytes = (numRecords) * OFFSET_WIDTH; + sizeAllocationSizeInBytes = (numRecords) * SIZE_WIDTH; + if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) { + vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + } else { + vector.setInitialCapacity(numRecords); + } } @Override @@ -253,7 +261,23 @@ public boolean isNull(int index) { @Override public void setValueCount(int valueCount) { + this.valueCount = valueCount; + while (valueCount > getOffsetBufferValueCapacity()) { + reallocOffsetBuffer(); + } + while (valueCount > getSizeBufferValueCapacity()) { + reallocSizeBuffer(); + } + final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector(); + vector.setValueCount(childValueCount); + } + private int getLengthOfChildVector() { + int length = 0; + for (int i = 0; i <= valueCount; i++) { + length += sizeBuffer.getInt(i * SIZE_WIDTH); + } + return length; } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index cce0ad46f69..e4eb6d134d5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -21,6 +21,7 @@ import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +import java.util.Collections; import java.util.List; import org.apache.arrow.memory.ArrowBuf; @@ -101,7 +102,8 @@ public void initializeChildrenFromFields(List children) { @Override public void setInitialCapacity(int numRecords) { - + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords); } @Override @@ -325,7 +327,11 @@ public int getBufferSizeFor(int valueCount) { @Override public Field getField() { - return null; + if (field.getChildren().contains(getDataVector().getField())) { + return field; + } + field = new Field(field.getName(), field.getFieldType(), Collections.singletonList(getDataVector().getField())); + return field; } @Override diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 96a45fd421b..b6c44940a41 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -29,6 +29,8 @@ import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.FieldType; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -182,8 +184,8 @@ public void testNestedListViewVector() { assertFalse(listViewVector.isNull(0)); assertFalse(listViewVector.isNull(1)); - ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); // check offset buffer assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); @@ -245,9 +247,9 @@ public void testBasicListViewVector() { assertEquals(5, listViewVector.getValueCount()); /* get vector at index 0 -- the value is a BigIntVector*/ - ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - FieldVector dataVec = listViewVector.getDataVector(); + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + final FieldVector dataVec = listViewVector.getDataVector(); // check offset buffer assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); @@ -277,4 +279,35 @@ public void testBasicListViewVector() { assertEquals(4, ((BigIntVector) dataVec).get(10)); } } + + @Test + public void testNestedListVector1() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + + MinorType listType = MinorType.LISTVIEW; + MinorType scalarType = MinorType.BIGINT; + + listViewVector.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList1 = (ListViewVector) listViewVector.getDataVector(); + innerList1.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList2 = (ListViewVector) innerList1.getDataVector(); + innerList2.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList3 = (ListViewVector) innerList2.getDataVector(); + innerList3.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList4 = (ListViewVector) innerList3.getDataVector(); + innerList4.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList5 = (ListViewVector) innerList4.getDataVector(); + innerList5.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList6 = (ListViewVector) innerList5.getDataVector(); + innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); + + listViewVector.setInitialCapacity(128); + } + } } From d92e2ba8bcb82bbd585bb2ff0c2d93a5e9fd3575 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 30 Apr 2024 05:50:17 +0530 Subject: [PATCH 06/34] fix: adding a method to allocate buffers --- .../vector/complex/BaseRepeatedValueViewVector.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index c7518bb5671..af23f078b6c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -83,8 +83,7 @@ public String getName() { public boolean allocateNewSafe() { boolean dataAlloc = false; try { - offsetBuffer = allocateOffsetBuffer(offsetAllocationSizeInBytes); - sizeBuffer = allocateSizeBuffer(sizeAllocationSizeInBytes); + allocateBuffers(); dataAlloc = vector.allocateNewSafe(); } catch (Exception e) { e.printStackTrace(); @@ -98,7 +97,12 @@ public boolean allocateNewSafe() { return dataAlloc; } - protected ArrowBuf allocateOffsetBuffer(final long size) { + private void allocateBuffers() { + offsetBuffer = allocateOffsetBuffer(offsetAllocationSizeInBytes); + sizeBuffer = allocateSizeBuffer(sizeAllocationSizeInBytes); + } + + private ArrowBuf allocateOffsetBuffer(final long size) { final int curSize = (int) size; ArrowBuf offsetBuffer = allocator.buffer(curSize); offsetBuffer.readerIndex(0); @@ -107,7 +111,7 @@ protected ArrowBuf allocateOffsetBuffer(final long size) { return offsetBuffer; } - protected ArrowBuf allocateSizeBuffer(final long size) { + private ArrowBuf allocateSizeBuffer(final long size) { final int curSize = (int) size; ArrowBuf sizeBuffer = allocator.buffer(curSize); sizeBuffer.readerIndex(0); From a25fa6a283feecd3e35095d7de1eca3ce0dc8325 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 30 Apr 2024 08:04:33 +0530 Subject: [PATCH 07/34] fix: adding null check test case 1 --- .../main/codegen/templates/UnionReader.java | 2 +- .../arrow/vector/TestListViewVector.java | 265 ++++++++++++------ 2 files changed, 183 insertions(+), 84 deletions(-) diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java index 956bc91e918..243bd832255 100644 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ b/java/vector/src/main/codegen/templates/UnionReader.java @@ -39,7 +39,7 @@ @SuppressWarnings("unused") public class UnionReader extends AbstractFieldReader { - private static final int NUM_SUPPORTED_TYPES = 48; + private static final int NUM_SUPPORTED_TYPES = 49; private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES]; public UnionVector data; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index b6c44940a41..61c70e2528e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -76,7 +76,189 @@ public void testNestedListVector() { System.out.println(listVector); } + } + + @Test + public void testBasicListViewVector() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* write the first list at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(-7); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + /* the second list at index 2 is null (we are not setting any)*/ + + /* write the third list at index 2 */ + listViewWriter.setPosition(2); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(0); + listViewWriter.bigInt().writeBigInt(-127); + listViewWriter.bigInt().writeBigInt(127); + listViewWriter.bigInt().writeBigInt(50); + listViewWriter.endList(); + + /* write the fourth list at index 3 (empty list) */ + listViewWriter.setPosition(3); + listViewWriter.startList(); + listViewWriter.endList(); + + /* write the fifth list at index 4 */ + listViewWriter.setPosition(4); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(1); + listViewWriter.bigInt().writeBigInt(2); + listViewWriter.bigInt().writeBigInt(3); + listViewWriter.bigInt().writeBigInt(4); + listViewWriter.endList(); + + assertEquals(4, listViewVector.getLastSet()); + + listViewVector.setValueCount(5); + // check value count + assertEquals(5, listViewVector.getValueCount()); + + /* get vector at index 0 -- the value is a BigIntVector*/ + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + final FieldVector dataVec = listViewVector.getDataVector(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check data vector + assertEquals(12, ((BigIntVector) dataVec).get(0)); + assertEquals(-7, ((BigIntVector) dataVec).get(1)); + assertEquals(25, ((BigIntVector) dataVec).get(2)); + assertEquals(0, ((BigIntVector) dataVec).get(3)); + assertEquals(-127, ((BigIntVector) dataVec).get(4)); + assertEquals(127, ((BigIntVector) dataVec).get(5)); + assertEquals(50, ((BigIntVector) dataVec).get(6)); + assertEquals(1, ((BigIntVector) dataVec).get(7)); + assertEquals(2, ((BigIntVector) dataVec).get(8)); + assertEquals(3, ((BigIntVector) dataVec).get(9)); + assertEquals(4, ((BigIntVector) dataVec).get(10)); + } + } + + @Test + public void testImplicitNullVectors() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + /* allocate memory */ + listViewWriter.allocate(); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + /* write the first list at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(-7); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + int offSet0 = offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size0 = sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH); + int lastSet0 = listViewVector.getLastSet(); + // after the first list is written, + // the initial offset must be 0, + // the size must be 3 (as there are 3 elements in the array), + // the lastSet must be 0 since, the first list is written at index 0. + + assertEquals(0, offSet0); + assertEquals(3, size0); + assertEquals(0, lastSet0); + + listViewWriter.setPosition(5); + listViewWriter.startList(); + + // writing the 6th list at index 5, + // and the list items from index 1 through 4 are not populated. + // but since there is a gap between the 0th and 5th list, in terms + // of buffer allocation, the offset and size buffers must be updated + // to reflect the implicit null vectors. + + for (int i = 1; i < 5; i++) { + int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); + int lastSet = listViewVector.getLastSet(); + // Since the list is not written, the offset and size must equal to child vector's size + // i.e., 3, and size should be 0 as the list is not written. + // And the last set value is the value currently being written, which is 5. + assertEquals(3, offSet); + assertEquals(0, size); + assertEquals(5, lastSet); + } + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + int offSet5 = offSetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size5 = sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH); + int lastSet5 = listViewVector.getLastSet(); + + assertEquals(3, offSet5); + assertEquals(2, size5); + assertEquals(5, lastSet5); + + listViewWriter.setPosition(10); + listViewWriter.startList(); + + // writing the 11th list at index 10, + // and the list items from index 6 through 10 are not populated. + // but since there is a gap between the 5th and 11th list, in terms + // of buffer allocation, the offset and size buffers must be updated + // to reflect the implicit null vectors. + for (int i = 6; i < 10; i++) { + int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); + int lastSet = listViewVector.getLastSet(); + // Since the list is not written, the offset and size must equal to child vector's size + // i.e., 3, and size should be 0 as the list is not written. + // And the last set value is the value currently being written, which is 10. + assertEquals(5, offSet); + assertEquals(0, size); + assertEquals(10, lastSet); + } + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.endList(); + + int offSet11 = offSetBuffer.getInt(10 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size11 = sizeBuffer.getInt(10 * BaseRepeatedValueViewVector.SIZE_WIDTH); + int lastSet11 = listViewVector.getLastSet(); + + assertEquals(5, offSet11); + assertEquals(1, size11); + assertEquals(10, lastSet11); + + listViewVector.setValueCount(11); + } } @Test @@ -197,89 +379,6 @@ public void testNestedListViewVector() { } } - @Test - public void testBasicListViewVector() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - UnionListViewWriter listViewWriter = listViewVector.getWriter(); - - /* allocate memory */ - listViewWriter.allocate(); - - /* write the first list at index 0 */ - listViewWriter.setPosition(0); - listViewWriter.startList(); - - listViewWriter.bigInt().writeBigInt(12); - listViewWriter.bigInt().writeBigInt(-7); - listViewWriter.bigInt().writeBigInt(25); - listViewWriter.endList(); - - /* the second list at index 2 is null (we are not setting any)*/ - - /* write the third list at index 2 */ - listViewWriter.setPosition(2); - listViewWriter.startList(); - - listViewWriter.bigInt().writeBigInt(0); - listViewWriter.bigInt().writeBigInt(-127); - listViewWriter.bigInt().writeBigInt(127); - listViewWriter.bigInt().writeBigInt(50); - listViewWriter.endList(); - - /* write the fourth list at index 3 (empty list) */ - listViewWriter.setPosition(3); - listViewWriter.startList(); - listViewWriter.endList(); - - /* write the fifth list at index 4 */ - listViewWriter.setPosition(4); - listViewWriter.startList(); - listViewWriter.bigInt().writeBigInt(1); - listViewWriter.bigInt().writeBigInt(2); - listViewWriter.bigInt().writeBigInt(3); - listViewWriter.bigInt().writeBigInt(4); - listViewWriter.endList(); - - assertEquals(4, listViewVector.getLastSet()); - - listViewVector.setValueCount(5); - // check value count - assertEquals(5, listViewVector.getValueCount()); - - /* get vector at index 0 -- the value is a BigIntVector*/ - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - final FieldVector dataVec = listViewVector.getDataVector(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check data vector - assertEquals(12, ((BigIntVector) dataVec).get(0)); - assertEquals(-7, ((BigIntVector) dataVec).get(1)); - assertEquals(25, ((BigIntVector) dataVec).get(2)); - assertEquals(0, ((BigIntVector) dataVec).get(3)); - assertEquals(-127, ((BigIntVector) dataVec).get(4)); - assertEquals(127, ((BigIntVector) dataVec).get(5)); - assertEquals(50, ((BigIntVector) dataVec).get(6)); - assertEquals(1, ((BigIntVector) dataVec).get(7)); - assertEquals(2, ((BigIntVector) dataVec).get(8)); - assertEquals(3, ((BigIntVector) dataVec).get(9)); - assertEquals(4, ((BigIntVector) dataVec).get(10)); - } - } - @Test public void testNestedListVector1() throws Exception { try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { From 3b72fa75771b33e96c9f09453ec37435e91363cf Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Fri, 3 May 2024 09:53:50 +0530 Subject: [PATCH 08/34] fix: adding buffer setting api test --- .../arrow/vector/complex/ListViewVector.java | 37 ++- .../arrow/vector/TestListViewVector.java | 211 +++++++++++++++++- 2 files changed, 229 insertions(+), 19 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index e4eb6d134d5..ca664cb524c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -384,7 +384,7 @@ public boolean isEmpty(int index) { /** * Same as {@link #isNull(int)}. * - * @param index position of element + * @param index position of the element * @return 1 if element at given index is not null, 0 otherwise */ public int isSet(int index) { @@ -419,6 +419,17 @@ public void setNull(int index) { } + /** + * Start new value in the ListView vector. + * There are a few cases that are handled in this function. + * There are two main scenarios that need to be considered. + * The first scenario is simple insertion where indices are continuously updated. + * The other scenario is the event of non-continuous writing, + * the offset buffer needs to be updated. + * + * @param index index of the value to start + * @return offset of the new value + */ @Override public int startNewValue(int index) { while (index >= getValidityAndSizeValueCapacity()) { @@ -432,22 +443,24 @@ public int startNewValue(int index) { if (index == 0) { offsetBuffer.setInt(0, 0); } else if (index > lastSet) { + /* when skipping indices, we need to update the offset buffer */ + /* setting offset from lastSet + 1 to index (included) */ for (int i = lastSet + 1; i <= index; i++) { - final int lastOffSet = offsetBuffer.getInt((i - 1L) * OFFSET_WIDTH); - final int lastSize = sizeBuffer.getInt((i - 1L) * SIZE_WIDTH); - final int newOffSet = lastOffSet + lastSize; - offsetBuffer.setInt(i * OFFSET_WIDTH, newOffSet); + final int prevOffSet = offsetBuffer.getInt((i - 1L) * OFFSET_WIDTH); + final int prevSize = sizeBuffer.getInt((i - 1L) * SIZE_WIDTH); + final int currOffSet = prevOffSet + prevSize; + offsetBuffer.setInt(i * OFFSET_WIDTH, currOffSet); } } else { - final int lastOffset = offsetBuffer.getInt(lastSet * OFFSET_WIDTH); - final int lastSize = sizeBuffer.getInt(lastSet * SIZE_WIDTH); - final int newOffSet = lastOffset + lastSize; - offsetBuffer.setInt((lastSet + 1) * OFFSET_WIDTH, newOffSet); + final int prevOffset = offsetBuffer.getInt((index - 1) * OFFSET_WIDTH); + final int prevSize = sizeBuffer.getInt((index - 1) * SIZE_WIDTH); + final int currOffSet = prevOffset + prevSize; + offsetBuffer.setInt(index * OFFSET_WIDTH, currOffSet); } BitVectorHelper.setBit(validityBuffer, index); lastSet = index; - return offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH); + return offsetBuffer.getInt(index * OFFSET_WIDTH); } private int getLengthOfChildVector() { @@ -521,6 +534,10 @@ public int getLastSet() { return lastSet; } + public void setLastSet(int newLastSet) { + lastSet = newLastSet; + } + @Override public int getValueCount() { return valueCount; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 61c70e2528e..9b06c72185e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -17,10 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import java.util.ArrayList; +import java.util.List; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -31,20 +32,20 @@ import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestListViewVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -71,10 +72,19 @@ public void testNestedListVector() { listWriter.bigInt().writeBigInt(25); listWriter.endList(); - listWriter.setValueCount(1); - listVector.setValueCount(1); + listWriter.setPosition(1); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(100); + listWriter.endList(); + + listWriter.bigInt().writeBigInt(180); + + listWriter.setValueCount(3); + listVector.setValueCount(3); System.out.println(listVector); + System.out.println(listVector.getDataVector()); } } @@ -409,4 +419,187 @@ public void testNestedListVector1() throws Exception { listViewVector.setInitialCapacity(128); } } + + private void setNewValues(ListViewVector listViewVector, List offSets, + List sizes, FieldVector elementFieldVec) { + if (offSets.size() != sizes.size()) { + throw new IllegalArgumentException("Offsets and sizes must be of the same size"); + } + UnionListViewWriter writer = listViewVector.getWriter(); + int lastSet = listViewVector.getLastSet(); + int listViewValueCount = listViewVector.getValueCount(); + int newListViewValueCount = listViewValueCount + offSets.size(); + lastSet += offSets.size(); + + // set validity bit + final ArrowBuf validityBuffer = listViewVector.getValidityBuffer(); + for (int i = listViewValueCount; i < newListViewValueCount; i++) { + if (sizes.get(i - listViewValueCount) != null) { + // only set validity bit if size is not 0 + BitVectorHelper.setBit(validityBuffer, i); + } + } + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // set offset and size buffers + for (int i = listViewValueCount; i < newListViewValueCount; i++) { + writer.setPosition(i); + offSetBuffer.setInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH, + offSets.get(i - listViewValueCount)); + if (sizes.get(i - listViewValueCount) != null) { + sizeBuffer.setInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH, + sizes.get(i - listViewValueCount)); + } else { + sizeBuffer.setInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH, + 0); + } + + } + // update the writer position + writer.setPosition(listViewVector.getOffsetBuffer() + .getInt((newListViewValueCount - 1) * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + FieldVector dataVec = listViewVector.getDataVector(); + + for (int i = 0; i < elementFieldVec.getValueCount(); i++) { + dataVec.copyFromSafe(i, i + dataVec.getValueCount(), elementFieldVec); + } + dataVec.setValueCount(dataVec.getValueCount() + elementFieldVec.getValueCount()); + writer.setPosition(newListViewValueCount); + listViewVector.setLastSet(lastSet); + listViewVector.setValueCount(newListViewValueCount); + } + + /** + * Setting up the buffers directly needs to be validated with the base method used in + * the ListVector class where we use the approach of startList(), + * write to the child vector and endList(). + *

+ * To support this, we have to consider the following scenarios; + *

+ * 1. Only using directly buffer-based inserts. + * 2. Default list insertion followed by buffer-based inserts. + * 3. Buffer-based inserts followed by default list insertion. + *

+ * Steps taken in default list insertion + * 1. startList() + * - startNewValue + * - update lastSet + * - set offset buffer + * - set validity buffer + * - writer set position + * 2. ListWriter will write child vector + * 3. endList() + * - write size to Size buffer + * + */ + @Test + public void testBasicListViewAddition() { + List offSets1 = new ArrayList<>(); + List sizes1 = new ArrayList<>(); + + offSets1.add(3); + offSets1.add(3); + offSets1.add(7); + + sizes1.add(null); + sizes1.add(4); + sizes1.add(0); + + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* write the first list at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(-7); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + listViewVector.setValueCount(1); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + try (BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { + + elementVector.allocateNew(4); + + elementVector.set(0, 0); + elementVector.set(1, -127); + elementVector.set(2, 127); + elementVector.set(3, 50); + + elementVector.setValueCount(4); + + setNewValues(listViewVector, offSets1, sizes1, elementVector); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + } + + setNewValues(listViewVector, offSets1, sizes1, listViewVector.getDataVector()); + + listViewWriter.setPosition(4); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(121); + listViewWriter.bigInt().writeBigInt(-71); + listViewWriter.bigInt().writeBigInt(251); + listViewWriter.endList(); + + listViewVector.setValueCount(5); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + assertEquals(121, ((BigIntVector) listViewVector.getDataVector()).get(7)); + assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); + assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); + } + } } From 696b4517df6692f9589359006f6ed8342321c365 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Fri, 3 May 2024 13:49:51 +0530 Subject: [PATCH 09/34] fix: intermediate commit --- .../arrow/vector/complex/ListViewVector.java | 122 ++++++++++++++ .../arrow/vector/TestListViewVector.java | 159 +++++++----------- 2 files changed, 185 insertions(+), 96 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index ca664cb524c..2e9a6a583a3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.List; +import java.util.Objects; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -33,6 +34,7 @@ import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.UnionListReader; @@ -463,6 +465,89 @@ public int startNewValue(int index) { return offsetBuffer.getInt(index * OFFSET_WIDTH); } + /** + * Constructing a ListViewVector when the offsets, sizes and field vector are available. + *

+ * Steps taken follow the workflow used in creating a ListViewVector with the API + * used in ListVector. + * + * @param offSets new offSets to be set + * @param sizes new sizes to be set + * @param elementFieldVec new elements to be appended to the field vector + */ + public void setNewValues( + List offSets, List sizes, FieldVector elementFieldVec) { + // Null checks + Objects.requireNonNull(offSets, "Offsets cannot be null"); + Objects.requireNonNull(sizes, "Sizes cannot be null"); + Objects.requireNonNull(elementFieldVec, "Element Field Vector cannot be null"); + + while (offSets.size() >= getValidityAndSizeValueCapacity()) { + reallocValidityAndSizeAndOffsetBuffers(); + } + + /* TODO: add the validation method + * 0 <= offsets[i] <= length of the child array + * 0 <= offsets[i] + size[i] <= length of the child array + */ + if (offSets.size() != sizes.size()) { + throw new IllegalArgumentException("Offsets and sizes must be of the same size." + + " Offsets size: " + offSets.size() + ", Sizes size: " + sizes.size()); + } + + UnionListViewWriter writer = this.getWriter(); + int lastSet = this.getLastSet(); + int listViewValueCount = this.getValueCount(); + int newListViewValueCount = listViewValueCount + offSets.size(); + lastSet += offSets.size(); + + // set validity bit + final ArrowBuf validityBuffer = this.getValidityBuffer(); + for (int i = listViewValueCount; i < newListViewValueCount; i++) { + if (sizes.get(i - listViewValueCount) != null) { + // only set validity bit if size is not 0 + BitVectorHelper.setBit(validityBuffer, i); + } + } + + final ArrowBuf offSetBuffer = this.getOffsetBuffer(); + final ArrowBuf sizeBuffer = this.getSizeBuffer(); + + // set offset and size buffers + for (int i = listViewValueCount; i < newListViewValueCount; i++) { + writer.setPosition(i); + offSetBuffer.setInt(i * OFFSET_WIDTH, + offSets.get(i - listViewValueCount)); + // we use null to determine a list with size 0 and no list + if (sizes.get(i - listViewValueCount) != null) { + sizeBuffer.setInt(i * SIZE_WIDTH, + sizes.get(i - listViewValueCount)); + } else { + sizeBuffer.setInt(i * SIZE_WIDTH, + 0); + } + } + + // updating field vector + if (this.getDataVector() instanceof NullVector) { + this.vector = DEFAULT_DATA_VECTOR; + this.vector.allocateNew(); + } + + if (this.getDataVector() instanceof NullVector) { + this.vector = elementFieldVec; + } else { + FieldVector dataVec = this.getDataVector(); + for (int i = 0; i < elementFieldVec.getValueCount(); i++) { + dataVec.copyFromSafe(i, i + dataVec.getValueCount(), elementFieldVec); + } + dataVec.setValueCount(dataVec.getValueCount() + elementFieldVec.getValueCount()); + } + writer.setPosition(newListViewValueCount); + this.setLastSet(lastSet); + this.setValueCount(newListViewValueCount); + } + private int getLengthOfChildVector() { int length = 0; for (int i = 0; i <= lastSet + 1; i++) { @@ -471,6 +556,43 @@ private int getLengthOfChildVector() { return length; } + private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) { + for (int i = 0; i < bufValues.length; i++) { + buffer.setInt(i * bufWidth, bufValues[i]); + } + } + + /** + * Constructing a ListViewVector when the offsets, sizes and field vector are available. + *

+ * Steps taken follow the workflow used in creating a ListViewVector with the API + * used in ListVector. + * + * @param offSets new offSets to be set + * @param sizes new sizes to be set + * @param elementFieldVec new elements to be appended to the field vector + */ + public void set(ArrowBuf offSets, ArrowBuf sizes, FieldVector elementFieldVec, int length) { + // Null checks + Objects.requireNonNull(offSets, "Offsets cannot be null"); + Objects.requireNonNull(sizes, "Sizes cannot be null"); + Objects.requireNonNull(elementFieldVec, "Element Field Vector cannot be null"); + + this.offsetBuffer = offSets; + this.sizeBuffer = sizes; + this.vector = elementFieldVec; + + // set validity bit + final ArrowBuf validityBuffer = this.getValidityBuffer(); + for (int i = 0; i < length; i++) { + if (this.sizeBuffer.getInt(i * SIZE_WIDTH) != -1) { + // only set validity bit if size is not 0 + BitVectorHelper.setBit(validityBuffer, i); + } + } + + } + @Override public void setValueCount(int valueCount) { this.valueCount = valueCount; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 9b06c72185e..5f811c3a3d1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -420,56 +420,10 @@ public void testNestedListVector1() throws Exception { } } - private void setNewValues(ListViewVector listViewVector, List offSets, - List sizes, FieldVector elementFieldVec) { - if (offSets.size() != sizes.size()) { - throw new IllegalArgumentException("Offsets and sizes must be of the same size"); + private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) { + for (int i = 0; i < bufValues.length; i++) { + buffer.setInt(i * bufWidth, bufValues[i]); } - UnionListViewWriter writer = listViewVector.getWriter(); - int lastSet = listViewVector.getLastSet(); - int listViewValueCount = listViewVector.getValueCount(); - int newListViewValueCount = listViewValueCount + offSets.size(); - lastSet += offSets.size(); - - // set validity bit - final ArrowBuf validityBuffer = listViewVector.getValidityBuffer(); - for (int i = listViewValueCount; i < newListViewValueCount; i++) { - if (sizes.get(i - listViewValueCount) != null) { - // only set validity bit if size is not 0 - BitVectorHelper.setBit(validityBuffer, i); - } - } - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // set offset and size buffers - for (int i = listViewValueCount; i < newListViewValueCount; i++) { - writer.setPosition(i); - offSetBuffer.setInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH, - offSets.get(i - listViewValueCount)); - if (sizes.get(i - listViewValueCount) != null) { - sizeBuffer.setInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH, - sizes.get(i - listViewValueCount)); - } else { - sizeBuffer.setInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH, - 0); - } - - } - // update the writer position - writer.setPosition(listViewVector.getOffsetBuffer() - .getInt((newListViewValueCount - 1) * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - FieldVector dataVec = listViewVector.getDataVector(); - - for (int i = 0; i < elementFieldVec.getValueCount(); i++) { - dataVec.copyFromSafe(i, i + dataVec.getValueCount(), elementFieldVec); - } - dataVec.setValueCount(dataVec.getValueCount() + elementFieldVec.getValueCount()); - writer.setPosition(newListViewValueCount); - listViewVector.setLastSet(lastSet); - listViewVector.setValueCount(newListViewValueCount); } /** @@ -482,64 +436,36 @@ private void setNewValues(ListViewVector listViewVector, List offSets, * 1. Only using directly buffer-based inserts. * 2. Default list insertion followed by buffer-based inserts. * 3. Buffer-based inserts followed by default list insertion. - *

- * Steps taken in default list insertion - * 1. startList() - * - startNewValue - * - update lastSet - * - set offset buffer - * - set validity buffer - * - writer set position - * 2. ListWriter will write child vector - * 3. endList() - * - write size to Size buffer - * */ @Test public void testBasicListViewAddition() { - List offSets1 = new ArrayList<>(); - List sizes1 = new ArrayList<>(); - - offSets1.add(3); - offSets1.add(3); - offSets1.add(7); - - sizes1.add(null); - sizes1.add(4); - sizes1.add(0); - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - UnionListViewWriter listViewWriter = listViewVector.getWriter(); - - /* allocate memory */ - listViewWriter.allocate(); - - /* write the first list at index 0 */ - listViewWriter.setPosition(0); - listViewWriter.startList(); - - listViewWriter.bigInt().writeBigInt(12); - listViewWriter.bigInt().writeBigInt(-7); - listViewWriter.bigInt().writeBigInt(25); - listViewWriter.endList(); - - listViewVector.setValueCount(1); - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); try (BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { + elementVector.allocateNew(7); - elementVector.allocateNew(4); + elementVector.set(0, 12); + elementVector.set(1, -7); + elementVector.set(2, 25); + elementVector.set(3, 0); + elementVector.set(4, -127); + elementVector.set(5, 127); + elementVector.set(6, 50); - elementVector.set(0, 0); - elementVector.set(1, -127); - elementVector.set(2, 127); - elementVector.set(3, 50); + elementVector.setValueCount(7); - elementVector.setValueCount(4); + ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); - setNewValues(listViewVector, offSets1, sizes1, elementVector); + int[] offSetValues = new int[]{0, 3, 3, 7}; + int[] sizeValues = new int[]{3, 0, 4, 0}; + + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); + setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + + listViewVector.set(newOffSetBuf, newSizeBuffer, elementVector); // check offset buffer assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); @@ -561,9 +487,11 @@ public void testBasicListViewAddition() { assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + assertEquals(3, listViewVector.getLastSet()); } - setNewValues(listViewVector, offSets1, sizes1, listViewVector.getDataVector()); + listViewVector.setNewValues(offSets1, sizes1, listViewVector.getDataVector()); listViewWriter.setPosition(4); listViewWriter.startList(); @@ -600,6 +528,45 @@ public void testBasicListViewAddition() { assertEquals(121, ((BigIntVector) listViewVector.getDataVector()).get(7)); assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); + + assertEquals(4, listViewVector.getLastSet()); + } + } + + @Test + public void testNestedListViewAddition() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // [[[50,100,200],[75,125,150,175]], [[10],[15,20],[25,30,35]]] + List offSets1 = new ArrayList<>(); + List sizes1 = new ArrayList<>(); + offSets1.add(0); + offSets1.add(2); + + sizes1.add(2); + sizes1.add(3); + + try (BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { + elementVector.allocateNew(13); + + elementVector.set(0, 50); + elementVector.set(1, 100); + elementVector.set(2, 200); + elementVector.set(3, 75); + elementVector.set(4, 125); + elementVector.set(5, 150); + elementVector.set(6, 175); + elementVector.set(7, 10); + elementVector.set(8, 15); + elementVector.set(9, 20); + elementVector.set(10, 25); + elementVector.set(11, 30); + elementVector.set(12, 35); + elementVector.setValueCount(13); + + listViewVector.setNewValues(offSets1, sizes1, elementVector); + } + + System.out.println(listViewVector); } } } From 39b5c7e9d41fdd6a559bc792dd40414ff2a39128 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Sat, 4 May 2024 12:38:07 +0530 Subject: [PATCH 10/34] fix: adding initial set API --- .../arrow/vector/complex/ListViewVector.java | 52 ++++--- .../arrow/vector/TestListViewVector.java | 130 +++++++++++------- 2 files changed, 116 insertions(+), 66 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 2e9a6a583a3..4027053b1a9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -550,15 +550,15 @@ public void setNewValues( private int getLengthOfChildVector() { int length = 0; - for (int i = 0; i <= lastSet + 1; i++) { + for (int i = 0; i < lastSet + 1; i++) { length += sizeBuffer.getInt(i * SIZE_WIDTH); } return length; } - private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) { - for (int i = 0; i < bufValues.length; i++) { - buffer.setInt(i * bufWidth, bufValues[i]); + private void setValuesInBuffer(ArrowBuf srcBuf, ArrowBuf destBuf, long bufWidth, int length) { + for (int i = 0; i < length; i++) { + destBuf.setInt(i * bufWidth, srcBuf.getInt(i * bufWidth)); } } @@ -568,29 +568,41 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) * Steps taken follow the workflow used in creating a ListViewVector with the API * used in ListVector. * - * @param offSets new offSets to be set - * @param sizes new sizes to be set + * @param offSetBuffer new offSet buffer to be set + * @param sizeBuffer new size buffer to be set + * @param validityBuffer new validity buffer to be set * @param elementFieldVec new elements to be appended to the field vector + * @param valueCount number of lists to be set */ - public void set(ArrowBuf offSets, ArrowBuf sizes, FieldVector elementFieldVec, int length) { + public void set(ArrowBuf offSetBuffer, ArrowBuf sizeBuffer, ArrowBuf validityBuffer, + FieldVector elementFieldVec, int valueCount) { // Null checks - Objects.requireNonNull(offSets, "Offsets cannot be null"); - Objects.requireNonNull(sizes, "Sizes cannot be null"); + Objects.requireNonNull(offSetBuffer, "Offset buffer cannot be null"); + Objects.requireNonNull(sizeBuffer, "Size buffer cannot be null"); + Objects.requireNonNull(validityBuffer, "Validity buffer cannot be null"); Objects.requireNonNull(elementFieldVec, "Element Field Vector cannot be null"); - this.offsetBuffer = offSets; - this.sizeBuffer = sizes; + // clear buffers + this.offsetBuffer.clear(); + this.sizeBuffer.clear(); + this.validityBuffer.clear(); + // clear child vector + this.vector.clear(); + // allocate memory + this.vector.allocateNew(); + + // set buffers + setValuesInBuffer(offSetBuffer, this.offsetBuffer, OFFSET_WIDTH, valueCount); + setValuesInBuffer(sizeBuffer, this.sizeBuffer, SIZE_WIDTH, valueCount); + setValuesInBuffer(validityBuffer, this.validityBuffer, 1, valueCount); + + // set child vector this.vector = elementFieldVec; + this.vector.setValueCount(elementFieldVec.getValueCount()); - // set validity bit - final ArrowBuf validityBuffer = this.getValidityBuffer(); - for (int i = 0; i < length; i++) { - if (this.sizeBuffer.getInt(i * SIZE_WIDTH) != -1) { - // only set validity bit if size is not 0 - BitVectorHelper.setBit(validityBuffer, i); - } - } - + this.lastSet = valueCount - 1; + this.setValueCount(valueCount); + this.getWriter().setPosition(valueCount); } @Override diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 5f811c3a3d1..0ddd26c3d72 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -21,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import java.util.ArrayList; -import java.util.List; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -32,6 +31,7 @@ import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.DataSizeRoundingUtil; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -439,11 +439,15 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) */ @Test public void testBasicListViewAddition() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); + BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { - try (BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { + listViewVector.allocateNew(); + + try (ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); + ArrowBuf validityBuffer = allocator.buffer( + DataSizeRoundingUtil.divideBy8Ceil(1024))) { elementVector.allocateNew(7); elementVector.set(0, 12); @@ -456,16 +460,84 @@ public void testBasicListViewAddition() { elementVector.setValueCount(7); - ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); + int[] offSetValues = new int[]{0, 3, 3, 7}; + int[] sizeValues = new int[]{3, 0, 4, 0}; + + BitVectorHelper.setBit(validityBuffer, 0); + BitVectorHelper.setBit(validityBuffer, 2); + BitVectorHelper.setBit(validityBuffer, 3); + + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); + setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + + listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, elementVector, 4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + assertEquals(3, listViewVector.getLastSet()); + } + } + } + + @Test + public void testBasicListViewAdditionWithListViewWriter() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); + BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { + + listViewVector.allocateNew(); + + try (ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); + ArrowBuf validityBuffer = allocator.buffer( + DataSizeRoundingUtil.divideBy8Ceil(1024))) { + elementVector.allocateNew(7); + + elementVector.set(0, 12); + elementVector.set(1, -7); + elementVector.set(2, 25); + elementVector.set(3, 0); + elementVector.set(4, -127); + elementVector.set(5, 127); + elementVector.set(6, 50); + + elementVector.setValueCount(7); int[] offSetValues = new int[]{0, 3, 3, 7}; int[] sizeValues = new int[]{3, 0, 4, 0}; + BitVectorHelper.setBit(validityBuffer, 0); + BitVectorHelper.setBit(validityBuffer, 2); + BitVectorHelper.setBit(validityBuffer, 3); + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); - listViewVector.set(newOffSetBuf, newSizeBuffer, elementVector); + listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, elementVector, 4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); // check offset buffer assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); @@ -491,7 +563,7 @@ public void testBasicListViewAddition() { assertEquals(3, listViewVector.getLastSet()); } - listViewVector.setNewValues(offSets1, sizes1, listViewVector.getDataVector()); + UnionListViewWriter listViewWriter = listViewVector.getWriter(); listViewWriter.setPosition(4); listViewWriter.startList(); @@ -503,6 +575,9 @@ public void testBasicListViewAddition() { listViewVector.setValueCount(5); + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + // check offset buffer assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); @@ -532,41 +607,4 @@ public void testBasicListViewAddition() { assertEquals(4, listViewVector.getLastSet()); } } - - @Test - public void testNestedListViewAddition() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - // [[[50,100,200],[75,125,150,175]], [[10],[15,20],[25,30,35]]] - List offSets1 = new ArrayList<>(); - List sizes1 = new ArrayList<>(); - offSets1.add(0); - offSets1.add(2); - - sizes1.add(2); - sizes1.add(3); - - try (BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { - elementVector.allocateNew(13); - - elementVector.set(0, 50); - elementVector.set(1, 100); - elementVector.set(2, 200); - elementVector.set(3, 75); - elementVector.set(4, 125); - elementVector.set(5, 150); - elementVector.set(6, 175); - elementVector.set(7, 10); - elementVector.set(8, 15); - elementVector.set(9, 20); - elementVector.set(10, 25); - elementVector.set(11, 30); - elementVector.set(12, 35); - elementVector.setValueCount(13); - - listViewVector.setNewValues(offSets1, sizes1, elementVector); - } - - System.out.println(listViewVector); - } - } } From 5060cdb54168815f98a77d6d7b1e00dc31c66421 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 6 May 2024 07:05:35 +0530 Subject: [PATCH 11/34] fix: adding test refactor --- .../java/org/apache/arrow/vector/TestListViewVector.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 0ddd26c3d72..445e478a76b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -439,15 +439,15 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) */ @Test public void testBasicListViewAddition() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); - BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { listViewVector.allocateNew(); try (ArrowBuf newOffSetBuf = allocator.buffer(1024); ArrowBuf newSizeBuffer = allocator.buffer(1024); ArrowBuf validityBuffer = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(1024))) { + DataSizeRoundingUtil.divideBy8Ceil(1024)); + BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { elementVector.allocateNew(7); elementVector.set(0, 12); From c9dd085196eedcb6888d2757e56040e6809809a8 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Mon, 6 May 2024 11:17:01 +0530 Subject: [PATCH 12/34] fix: set api usage v1 --- .../arrow/vector/complex/ListViewVector.java | 15 +- .../arrow/vector/TestListViewVector.java | 313 +++++++++--------- 2 files changed, 160 insertions(+), 168 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 4027053b1a9..e395591ab51 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -583,22 +583,15 @@ public void set(ArrowBuf offSetBuffer, ArrowBuf sizeBuffer, ArrowBuf validityBuf Objects.requireNonNull(elementFieldVec, "Element Field Vector cannot be null"); // clear buffers - this.offsetBuffer.clear(); - this.sizeBuffer.clear(); - this.validityBuffer.clear(); - // clear child vector - this.vector.clear(); - // allocate memory - this.vector.allocateNew(); + this.clear(); // set buffers - setValuesInBuffer(offSetBuffer, this.offsetBuffer, OFFSET_WIDTH, valueCount); - setValuesInBuffer(sizeBuffer, this.sizeBuffer, SIZE_WIDTH, valueCount); - setValuesInBuffer(validityBuffer, this.validityBuffer, 1, valueCount); + this.offsetBuffer = offSetBuffer; + this.sizeBuffer = sizeBuffer; + this.validityBuffer = validityBuffer; // set child vector this.vector = elementFieldVec; - this.vector.setValueCount(elementFieldVec.getValueCount()); this.lastSet = valueCount - 1; this.setValueCount(valueCount); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 445e478a76b..a45e2db4e57 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -439,172 +439,171 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) */ @Test public void testBasicListViewAddition() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - listViewVector.allocateNew(); - - try (ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); - ArrowBuf validityBuffer = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(1024)); - BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { - elementVector.allocateNew(7); - - elementVector.set(0, 12); - elementVector.set(1, -7); - elementVector.set(2, 25); - elementVector.set(3, 0); - elementVector.set(4, -127); - elementVector.set(5, 127); - elementVector.set(6, 50); - - elementVector.setValueCount(7); - - int[] offSetValues = new int[]{0, 3, 3, 7}; - int[] sizeValues = new int[]{3, 0, 4, 0}; - - BitVectorHelper.setBit(validityBuffer, 0); - BitVectorHelper.setBit(validityBuffer, 2); - BitVectorHelper.setBit(validityBuffer, 3); - - setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); - setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); - - listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, elementVector, 4); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - - assertEquals(3, listViewVector.getLastSet()); - } - } - } + ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); + ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); + BigIntVector elementVector = new BigIntVector("element-vector", allocator); + ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); - @Test - public void testBasicListViewAdditionWithListViewWriter() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); - BigIntVector elementVector = new BigIntVector("element-vector", allocator)) { - - listViewVector.allocateNew(); - - try (ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); - ArrowBuf validityBuffer = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(1024))) { - elementVector.allocateNew(7); - - elementVector.set(0, 12); - elementVector.set(1, -7); - elementVector.set(2, 25); - elementVector.set(3, 0); - elementVector.set(4, -127); - elementVector.set(5, 127); - elementVector.set(6, 50); - - elementVector.setValueCount(7); - - int[] offSetValues = new int[]{0, 3, 3, 7}; - int[] sizeValues = new int[]{3, 0, 4, 0}; - - BitVectorHelper.setBit(validityBuffer, 0); - BitVectorHelper.setBit(validityBuffer, 2); - BitVectorHelper.setBit(validityBuffer, 3); - - setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); - setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); - - listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, elementVector, 4); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - - assertEquals(3, listViewVector.getLastSet()); - } + listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = listViewVector.getWriter(); + elementVector.allocateNew(7); - listViewWriter.setPosition(4); - listViewWriter.startList(); + elementVector.set(0, 12); + elementVector.set(1, -7); + elementVector.set(2, 25); + elementVector.set(3, 0); + elementVector.set(4, -127); + elementVector.set(5, 127); + elementVector.set(6, 50); - listViewWriter.bigInt().writeBigInt(121); - listViewWriter.bigInt().writeBigInt(-71); - listViewWriter.bigInt().writeBigInt(251); - listViewWriter.endList(); + elementVector.setValueCount(7); - listViewVector.setValueCount(5); + int[] offSetValues = new int[]{0, 3, 3, 7}; + int[] sizeValues = new int[]{3, 0, 4, 0}; - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + BitVectorHelper.setBit(validityBuffer, 0); + BitVectorHelper.setBit(validityBuffer, 2); + BitVectorHelper.setBit(validityBuffer, 3); - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); + setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - assertEquals(121, ((BigIntVector) listViewVector.getDataVector()).get(7)); - assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); - assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); + listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, elementVector, 4); - assertEquals(4, listViewVector.getLastSet()); - } + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + assertEquals(3, listViewVector.getLastSet()); + + listViewVector.close(); } + + @Test + public void testBasicListViewAdditionWithListViewWriter() { + ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); + ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); + ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); + BigIntVector elementVector = new BigIntVector("element-vector", allocator); + + listViewVector.allocateNew(); + elementVector.allocateNew(7); + + elementVector.set(0, 12); + elementVector.set(1, -7); + elementVector.set(2, 25); + elementVector.set(3, 0); + elementVector.set(4, -127); + elementVector.set(5, 127); + elementVector.set(6, 50); + + elementVector.setValueCount(7); + + int[] offSetValues = new int[]{0, 3, 3, 7}; + int[] sizeValues = new int[]{3, 0, 4, 0}; + + BitVectorHelper.setBit(validityBuffer, 0); + BitVectorHelper.setBit(validityBuffer, 2); + BitVectorHelper.setBit(validityBuffer, 3); + + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); + setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + + listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, elementVector, 4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + assertEquals(3, listViewVector.getLastSet()); + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + listViewWriter.setPosition(4); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(121); + listViewWriter.bigInt().writeBigInt(-71); + listViewWriter.bigInt().writeBigInt(251); + listViewWriter.endList(); + + listViewVector.setValueCount(5); + + // final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + // final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + assertEquals(121, ((BigIntVector) listViewVector.getDataVector()).get(7)); + assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); + assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); + + assertEquals(4, listViewVector.getLastSet()); + + listViewVector.close(); + } + } From 12f93d698b0db618a5100105062a2a3bccf4cd94 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Mon, 6 May 2024 14:32:06 +0530 Subject: [PATCH 13/34] fix: adding nested set-based tests --- .../arrow/vector/TestListViewVector.java | 179 ++++++++++++++---- 1 file changed, 139 insertions(+), 40 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index a45e2db4e57..46ae08ac802 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -50,44 +50,6 @@ public void terminate() throws Exception { allocator.close(); } - @Test - public void testNestedListVector() { - try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { - - UnionListWriter listWriter = listVector.getWriter(); - - /* allocate memory */ - listWriter.allocate(); - - /* the dataVector that backs a listVector will also be a - * listVector for this test. - */ - - /* write one or more inner lists at index 0 */ - listWriter.setPosition(0); - listWriter.startList(); - - listWriter.bigInt().writeBigInt(12); - listWriter.bigInt().writeBigInt(-7); - listWriter.bigInt().writeBigInt(25); - listWriter.endList(); - - listWriter.setPosition(1); - listWriter.startList(); - - listWriter.bigInt().writeBigInt(100); - listWriter.endList(); - - listWriter.bigInt().writeBigInt(180); - - listWriter.setValueCount(3); - listVector.setValueCount(3); - - System.out.println(listVector); - System.out.println(listVector.getDataVector()); - } - } - @Test public void testBasicListViewVector() { try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { @@ -438,7 +400,7 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) * 3. Buffer-based inserts followed by default list insertion. */ @Test - public void testBasicListViewAddition() { + public void testBasicListViewSet() { ArrowBuf newOffSetBuf = allocator.buffer(1024); ArrowBuf newSizeBuffer = allocator.buffer(1024); @@ -502,7 +464,144 @@ public void testBasicListViewAddition() { } @Test - public void testBasicListViewAdditionWithListViewWriter() { + public void testBasicListViewSetNested() { + // Expected listview + // [[[50,100,200],[75,125,150,175]], [[10],[15,20],[25,30,35]]] + + // Setting child vector + // [[50,100,200],[75,125,150,175]], [10],[15,20],[25,30,35]] + ListVector listVector = ListVector.empty("nestedVector", allocator); + + UnionListWriter listWriter = listVector.getWriter(); + listWriter.allocate(); + + listWriter.setPosition(0); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(50); + listWriter.bigInt().writeBigInt(100); + listWriter.bigInt().writeBigInt(200); + + listWriter.endList(); + + listWriter.setPosition(1); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(75); + listWriter.bigInt().writeBigInt(125); + listWriter.bigInt().writeBigInt(150); + listWriter.bigInt().writeBigInt(175); + + listWriter.endList(); + + listWriter.setPosition(2); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(10); + + listWriter.endList(); + + listWriter.startList(); + listWriter.setPosition(3); + + listWriter.bigInt().writeBigInt(15); + listWriter.bigInt().writeBigInt(20); + + listWriter.endList(); + + listWriter.startList(); + listWriter.setPosition(4); + + listWriter.bigInt().writeBigInt(25); + listWriter.bigInt().writeBigInt(30); + listWriter.bigInt().writeBigInt(35); + + listWriter.endList(); + + listVector.setValueCount(2); + + ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); + listViewVector.allocateNew(); + + ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); + ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); + + int[] offSetValues = new int[]{0, 2}; + int[] sizeValues = new int[]{2, 3}; + + BitVectorHelper.setBit(validityBuffer, 0); + BitVectorHelper.setBit(validityBuffer, 1); + + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); + setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + + listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, listVector, 2); + + assertEquals(2, listViewVector.getValueCount()); + + /* get listViewVector value at index 0 -- the value itself is a listViewVector */ + Object result = listViewVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(50), list.get(0)); + assertEquals(Long.valueOf(100), list.get(1)); + assertEquals(Long.valueOf(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(75), list.get(0)); + assertEquals(Long.valueOf(125), list.get(1)); + assertEquals(Long.valueOf(150), list.get(2)); + assertEquals(Long.valueOf(175), list.get(3)); + + /* get listViewVector value at index 1 -- the value itself is a listViewVector */ + result = listViewVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ + assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(10), list.get(0)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(15), list.get(0)); + assertEquals(Long.valueOf(20), list.get(1)); + + list = resultSet.get(2); + assertEquals(Long.valueOf(25), list.get(0)); + assertEquals(Long.valueOf(30), list.get(1)); + assertEquals(Long.valueOf(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listViewVector.isNull(0)); + assertFalse(listViewVector.isNull(1)); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + listVector.close(); + listViewVector.close(); + } + + @Test + public void testBasicListViewSetWithListViewWriter() { ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); ArrowBuf newOffSetBuf = allocator.buffer(1024); ArrowBuf newSizeBuffer = allocator.buffer(1024); From 996a561a929948126517cde557100a4f8e93c888 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Mon, 6 May 2024 15:31:55 +0530 Subject: [PATCH 14/34] fix: adding test cases --- .../arrow/vector/complex/ListViewVector.java | 39 ++++++++- .../arrow/vector/TestListViewVector.java | 82 ++++++++++++++++++- 2 files changed, 117 insertions(+), 4 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index e395591ab51..e571d236608 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -21,6 +21,7 @@ import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Objects; @@ -128,9 +129,33 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers } + /** + * Set the reader and writer indexes for the inner buffers. + */ + private void setReaderAndWriterIndex() { + validityBuffer.readerIndex(0); + offsetBuffer.readerIndex(0); + sizeBuffer.readerIndex(0); + if (valueCount == 0) { + validityBuffer.writerIndex(0); + offsetBuffer.writerIndex(0); + sizeBuffer.writerIndex(0); + } else { + validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); + offsetBuffer.writerIndex(valueCount * OFFSET_WIDTH); + sizeBuffer.writerIndex(valueCount * SIZE_WIDTH); + } + } + @Override public List getFieldBuffers() { - return null; + List result = new ArrayList<>(2); + setReaderAndWriterIndex(); + result.add(validityBuffer); + result.add(offsetBuffer); + result.add(sizeBuffer); + + return result; } @Override @@ -669,4 +694,16 @@ public void setLastSet(int newLastSet) { public int getValueCount() { return valueCount; } + + /** + * Get the density of this ListVector. + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final double totalListSize = getLengthOfChildVector(); + return totalListSize / valueCount; + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 46ae08ac802..9d8397f5992 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -19,8 +19,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; +import java.util.List; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -352,7 +354,7 @@ public void testNestedListViewVector() { } @Test - public void testNestedListVector1() throws Exception { + public void testNestedListVector() throws Exception { try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { MinorType listType = MinorType.LISTVIEW; @@ -466,10 +468,10 @@ public void testBasicListViewSet() { @Test public void testBasicListViewSetNested() { // Expected listview - // [[[50,100,200],[75,125,150,175]], [[10],[15,20],[25,30,35]]] + // [[[50,100,200],[75,125,150,175]],[[10],[15,20],[25,30,35]]] // Setting child vector - // [[50,100,200],[75,125,150,175]], [10],[15,20],[25,30,35]] + // [[50,100,200],[75,125,150,175],[10],[15,20],[25,30,35]] ListVector listVector = ListVector.empty("nestedVector", allocator); UnionListWriter listWriter = listVector.getWriter(); @@ -705,4 +707,78 @@ public void testBasicListViewSetWithListViewWriter() { listViewVector.close(); } + @Test + public void testGetBufferAddress() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("vector", allocator)) { + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + boolean error = false; + + listViewWriter.allocate(); + + listViewWriter.setPosition(0); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(50); + listViewWriter.bigInt().writeBigInt(100); + listViewWriter.bigInt().writeBigInt(200); + listViewWriter.endList(); + + listViewWriter.setPosition(1); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(250); + listViewWriter.bigInt().writeBigInt(300); + listViewWriter.endList(); + + listViewVector.setValueCount(2); + + /* check listVector contents */ + Object result = listViewVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(50), resultSet.get(0)); + assertEquals(Long.valueOf(100), resultSet.get(1)); + assertEquals(Long.valueOf(200), resultSet.get(2)); + + result = listViewVector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(250), resultSet.get(0)); + assertEquals(Long.valueOf(300), resultSet.get(1)); + + List buffers = listViewVector.getFieldBuffers(); + + long bitAddress = listViewVector.getValidityBufferAddress(); + long offsetAddress = listViewVector.getOffsetBufferAddress(); + long sizeAddress = listViewVector.getSizeBufferAddress(); + + try { + listViewVector.getDataBufferAddress(); + } catch (UnsupportedOperationException ue) { + error = true; + } finally { + assertTrue(error); + } + + assertEquals(3, buffers.size()); + assertEquals(bitAddress, buffers.get(0).memoryAddress()); + assertEquals(offsetAddress, buffers.get(1).memoryAddress()); + assertEquals(sizeAddress, buffers.get(2).memoryAddress()); + + /* (3+2)/2 */ + assertEquals(2.5, listViewVector.getDensity(), 0); + } + } + + @Test + public void testConsistentChildName() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + String emptyListStr = listViewVector.getField().toString(); + assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); + + listViewVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + String emptyVectorStr = listViewVector.getField().toString(); + assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); + } + } + } From 8e33ba3b5effde6c44f4dc26edae3c7ca3c3f7f7 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Mon, 6 May 2024 17:49:29 +0530 Subject: [PATCH 15/34] fix: adding more test cases on api functions --- .../complex/BaseRepeatedValueViewVector.java | 52 +++- .../arrow/vector/complex/ListViewVector.java | 41 ++- .../arrow/vector/TestListViewVector.java | 241 ++++++++++++++++++ 3 files changed, 327 insertions(+), 7 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index af23f078b6c..cd4367d8d39 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -29,6 +29,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.UInt4Vector; @@ -199,11 +200,42 @@ public void setInitialCapacity(int numRecords) { @Override public void setInitialCapacity(int numRecords, double density) { + if ((numRecords * density) >= Integer.MAX_VALUE) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); + } + + offsetAllocationSizeInBytes = numRecords * OFFSET_WIDTH; + sizeAllocationSizeInBytes = numRecords * SIZE_WIDTH; + int innerValueCapacity = Math.max((int) (numRecords * density), 1); + + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density); + } else { + vector.setInitialCapacity(innerValueCapacity); + } } + /** + * Specialized version of setInitialTotalCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param totalNumberOfElements the total number of elements to allow + * for in this vector across all records. + */ public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { - + offsetAllocationSizeInBytes = numRecords * OFFSET_WIDTH; + sizeAllocationSizeInBytes = numRecords * SIZE_WIDTH; + vector.setInitialCapacity(totalNumberOfElements); } @Override @@ -221,12 +253,26 @@ protected int getSizeBufferValueCapacity() { @Override public int getBufferSize() { - return 0; + if (valueCount == 0) { + return 0; + } + return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + vector.getBufferSize(); } @Override public int getBufferSizeFor(int valueCount) { - return 0; + if (valueCount == 0) { + return 0; + } + + int innerVectorValueCount = 0; + + for (int i = 0; i < valueCount; i++) { + innerVectorValueCount += sizeBuffer.getInt(i * SIZE_WIDTH); + } + + return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + + vector.getBufferSizeFor(innerVectorValueCount); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index e571d236608..aa3b4520390 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -111,11 +111,13 @@ public void setInitialCapacity(int numRecords) { @Override public void setInitialCapacity(int numRecords, double density) { - + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords, density); } @Override public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); super.setInitialTotalCapacity(numRecords, totalNumberOfElements); } @@ -342,14 +344,30 @@ public UnionListReader getReader() { throw new UnsupportedOperationException(); } + /** + * Get the size (number of bytes) of underlying buffers used by this + * vector. + * @return size of underlying buffers. + */ @Override public int getBufferSize() { - return 0; + if (valueCount == 0) { + return 0; + } + final int offsetBufferSize = valueCount * OFFSET_WIDTH; + final int sizeBufferSize = valueCount * SIZE_WIDTH; + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + return offsetBufferSize + sizeBufferSize + validityBufferSize + vector.getBufferSize(); } @Override public int getBufferSizeFor(int valueCount) { - return 0; + if (valueCount == 0) { + return 0; + } + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + + return super.getBufferSizeFor(valueCount) + validityBufferSize; } @Override @@ -428,7 +446,7 @@ public int getNullCount() { @Override public int getValueCapacity() { - return 0; + return getValidityAndOffsetValueCapacity(); } private int getValidityAndSizeValueCapacity() { @@ -437,6 +455,11 @@ private int getValidityAndSizeValueCapacity() { return Math.min(offsetValueCapacity, sizeValueCapacity); } + private int getValidityAndOffsetValueCapacity() { + final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); + return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); + } + private int getValidityBufferValueCapacity() { return capAtMaxInt(validityBuffer.capacity() * 8); } @@ -706,4 +729,14 @@ public double getDensity() { final double totalListSize = getLengthOfChildVector(); return totalListSize / valueCount; } + + /** + * End the current value. + * + * @param index index of the value to end + * @param size number of elements in the list that was written + */ + public void endValue(int index, int size) { + sizeBuffer.setInt(index * SIZE_WIDTH, size); + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 9d8397f5992..6446aef8e01 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -19,19 +19,27 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.BaseRepeatedValueVector; import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.holders.DurationHolder; +import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; +import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.DataSizeRoundingUtil; import org.junit.jupiter.api.AfterEach; @@ -781,4 +789,237 @@ public void testConsistentChildName() throws Exception { } } + @Test + public void testSetInitialCapacity() { + try (final ListViewVector vector = ListViewVector.empty("", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + vector.setInitialCapacity(512); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 512); + + vector.setInitialCapacity(512, 4); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4); + + vector.setInitialCapacity(512, 0.1); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 51); + + vector.setInitialCapacity(512, 0.01); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 5); + + vector.setInitialCapacity(5, 0.1); + vector.allocateNew(); + assertEquals(8, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 1); + } + } + + @Test + public void testClearAndReuse() { + try (final ListViewVector vector = ListViewVector.empty("list", allocator)) { + BigIntVector bigIntVector = + (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector(); + vector.setInitialCapacity(10); + vector.allocateNew(); + + vector.startNewValue(0); + bigIntVector.setSafe(0, 7); + vector.endValue(0, 1); + vector.startNewValue(1); + bigIntVector.setSafe(1, 8); + vector.endValue(1, 1); + vector.setValueCount(2); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(Long.valueOf(7), resultSet.get(0)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(8), resultSet.get(0)); + + // Clear and release the buffers to trigger a realloc when adding next value + vector.clear(); + + // The list vector should reuse a buffer when reallocating the offset buffer + vector.startNewValue(0); + bigIntVector.setSafe(0, 7); + vector.endValue(0, 1); + vector.startNewValue(1); + bigIntVector.setSafe(1, 8); + vector.endValue(1, 1); + vector.setValueCount(2); + + result = vector.getObject(0); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(7), resultSet.get(0)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(8), resultSet.get(0)); + } + } + + @Test + public void testWriterGetField() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + vector.setValueCount(2); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Int(32, true)), null); + Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + } + } + + @Test + public void testWriterUsingHolderGetTimestampMilliTZField() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); + writer.allocate(); + + TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); + holder.timezone = "SomeFakeTimeZone"; + writer.startList(); + holder.value = 12341234L; + writer.timeStampMilliTZ().write(holder); + holder.value = 55555L; + writer.timeStampMilliTZ().write(holder); + + // Writing with a different timezone should throw + holder.timezone = "AsdfTimeZone"; + holder.value = 77777; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> writer.timeStampMilliTZ().write(holder)); + assertEquals( + "holder.timezone: AsdfTimeZone not equal to vector timezone: SomeFakeTimeZone", + ex.getMessage()); + + writer.endList(); + vector.setValueCount(1); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "SomeFakeTimeZone")), null); + Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + } + } + + @Test + public void testWriterGetDurationField() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); + writer.allocate(); + + DurationHolder durationHolder = new DurationHolder(); + durationHolder.unit = TimeUnit.MILLISECOND; + + writer.startList(); + durationHolder.value = 812374L; + writer.duration().write(durationHolder); + durationHolder.value = 143451L; + writer.duration().write(durationHolder); + + // Writing with a different unit should throw + durationHolder.unit = TimeUnit.SECOND; + durationHolder.value = 8888888; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> writer.duration().write(durationHolder)); + assertEquals( + "holder.unit: SECOND not equal to vector unit: MILLISECOND", ex.getMessage()); + + writer.endList(); + vector.setValueCount(1); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), null); + Field expectedField = new Field(vector.getName(), + FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + } + } + + @Test + public void testClose() throws Exception { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + vector.setValueCount(2); + + assertTrue(vector.getBufferSize() > 0); + assertTrue(vector.getDataVector().getBufferSize() > 0); + + writer.close(); + assertEquals(0, vector.getBufferSize()); + assertEquals(0, vector.getDataVector().getBufferSize()); + } + } + + @Test + public void testGetBufferSizeFor() { + try (final ListViewVector vector = ListViewVector.empty("list", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writeIntValues(writer, new int[] {1, 2}); + writeIntValues(writer, new int[] {3, 4}); + writeIntValues(writer, new int[] {5, 6}); + writeIntValues(writer, new int[] {7, 8, 9, 10}); + writeIntValues(writer, new int[] {11, 12, 13, 14}); + writer.setValueCount(5); + + IntVector dataVector = (IntVector) vector.getDataVector(); + int[] indices = new int[] {0, 2, 4, 6, 10, 14}; + + for (int valueCount = 1; valueCount <= 5; valueCount++) { + int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount); + int offsetBufferSize = valueCount * BaseRepeatedValueViewVector.OFFSET_WIDTH; + int sizeBufferSize = valueCount * BaseRepeatedValueViewVector.SIZE_WIDTH; + + int expectedSize = validityBufferSize + offsetBufferSize + sizeBufferSize + + dataVector.getBufferSizeFor(indices[valueCount]); + assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); + } + } + } + + private void writeIntValues(UnionListViewWriter writer, int[] values) { + writer.startList(); + for (int v: values) { + writer.integer().writeInt(v); + } + writer.endList(); + } + } From 22fedb451f0462e3f3e971d26ab5383e4b33ca56 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Mon, 6 May 2024 18:27:28 +0530 Subject: [PATCH 16/34] fix: adding documentation and missing functions, tbd test --- .../complex/BaseRepeatedValueViewVector.java | 9 +- .../arrow/vector/complex/ListViewVector.java | 147 ++++++++++++++++-- .../arrow/vector/TestListViewVector.java | 48 +++++- 3 files changed, 190 insertions(+), 14 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index cd4367d8d39..8aff3ccb16c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -103,7 +103,7 @@ private void allocateBuffers() { sizeBuffer = allocateSizeBuffer(sizeAllocationSizeInBytes); } - private ArrowBuf allocateOffsetBuffer(final long size) { + protected ArrowBuf allocateOffsetBuffer(final long size) { final int curSize = (int) size; ArrowBuf offsetBuffer = allocator.buffer(curSize); offsetBuffer.readerIndex(0); @@ -112,7 +112,7 @@ private ArrowBuf allocateOffsetBuffer(final long size) { return offsetBuffer; } - private ArrowBuf allocateSizeBuffer(final long size) { + protected ArrowBuf allocateSizeBuffer(final long size) { final int curSize = (int) size; ArrowBuf sizeBuffer = allocator.buffer(curSize); sizeBuffer.readerIndex(0); @@ -291,7 +291,10 @@ public void clear() { @Override public void reset() { - + offsetBuffer.setZero(0, offsetBuffer.capacity()); + sizeBuffer.setZero(0, sizeBuffer.capacity()); + vector.reset(); + valueCount = 0; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index aa3b4520390..22fd85f74d6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -20,6 +20,7 @@ import static java.util.Collections.singletonList; import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +import static org.apache.arrow.util.Preconditions.checkArgument; import java.util.ArrayList; import java.util.Collections; @@ -29,6 +30,8 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.vector.AddOrGetResult; @@ -100,7 +103,15 @@ public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) @Override public void initializeChildrenFromFields(List children) { + checkArgument(children.size() == 1, + "ListViews have one child Field. Found: %s", children.isEmpty() ? "none" : children); + Field field = children.get(0); + AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); + checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); + + addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); + this.field = new Field(this.field.getName(), this.field.getFieldType(), children); } @Override @@ -109,12 +120,52 @@ public void setInitialCapacity(int numRecords) { super.setInitialCapacity(numRecords); } + /** + * Specialized version of setInitialCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param density density of ListViewVector. + * Density is the average size of a list per position in the ListViewVector. + * For example, a + * density value of 10 implies each position in the list + * vector has a list of 10 values. + * A density value of 0.1 implies out of 10 positions in + * the list vector, 1 position has a list of size 1, and + * the remaining positions are null (no lists) or empty lists. + * This helps in tightly controlling the memory we provision + * for inner data vector. + */ @Override public void setInitialCapacity(int numRecords, double density) { validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); super.setInitialCapacity(numRecords, density); } + /** + * Specialized version of setInitialTotalCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param totalNumberOfElements the total number of elements to allow + * for in this vector across all records. + */ @Override public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); @@ -128,7 +179,28 @@ public List getChildrenFromFields() { @Override public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + if (ownBuffers.size() != 3) { + throw new IllegalArgumentException("Illegal buffer count, expected " + + 3 + ", got: " + ownBuffers.size()); + } + + ArrowBuf bitBuffer = ownBuffers.get(0); + ArrowBuf offBuffer = ownBuffers.get(1); + ArrowBuf szBuffer = ownBuffers.get(2); + + validityBuffer.getReferenceManager().release(); + validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); + offsetBuffer.getReferenceManager().release(); + offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); + sizeBuffer.getReferenceManager().release(); + sizeBuffer = offBuffer.getReferenceManager().retain(szBuffer, allocator); + validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); + offsetAllocationSizeInBytes = offsetBuffer.capacity(); + sizeAllocationSizeInBytes = sizeBuffer.capacity(); + + lastSet = fieldNode.getLength() - 1; + valueCount = fieldNode.getLength(); } /** @@ -160,9 +232,14 @@ public List getFieldBuffers() { return result; } + /** + * Export the buffers of the fields for C Data Interface. + * This method traverses the buffers and export buffer and buffer's memory address into a list of + * buffers and a pointer to the list of buffers. + */ @Override public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { - + throw new UnsupportedOperationException("exportCDataBuffers Not implemented yet"); } @Override @@ -244,11 +321,16 @@ private long getNewAllocationSize(int currentBufferCapacity) { @Override public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { + // TODO: https://github.com/apache/arrow/issues/41270 + throw new UnsupportedOperationException( + "ListViewVector does not support copyFromSafe operation yet."); } @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { - + // TODO: https://github.com/apache/arrow/issues/41270 + throw new UnsupportedOperationException( + "ListViewVector does not support copyFrom operation yet."); } @Override @@ -268,17 +350,23 @@ public TransferPair getTransferPair(Field field, BufferAllocator allocator) { @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - throw new UnsupportedOperationException(); + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support getTransferPair(String, BufferAllocator, CallBack) yet"); } @Override public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - throw new UnsupportedOperationException(); + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support getTransferPair(Field, BufferAllocator, CallBack) yet"); } @Override public TransferPair makeTransferPair(ValueVector target) { - throw new UnsupportedOperationException(); + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support makeTransferPair(ValueVector) yet"); } @Override @@ -326,7 +414,16 @@ public int hashCode(int index) { @Override public int hashCode(int index, ArrowBufHasher hasher) { - return 0; + if (isSet(index) == 0) { + return ArrowBufPointer.NULL_HASH_CODE; + } + int hash = 0; + final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); + final int end = sizeBuffer.getInt(index * OFFSET_WIDTH); + for (int i = start; i < end; i++) { + hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher)); + } + return hash; } @Override @@ -336,11 +433,13 @@ public OUT accept(VectorVisitor visitor, IN value) { @Override protected FieldReader getReaderImpl() { + // TODO: throw new UnsupportedOperationException(); } @Override public UnionListReader getReader() { + // TODO: throw new UnsupportedOperationException(); } @@ -393,6 +492,9 @@ public void clear() { @Override public void reset() { + super.reset(); + validityBuffer.setZero(0, validityBuffer.capacity()); + lastSet = -1; } @Override @@ -423,7 +525,11 @@ public boolean isNull(int index) { @Override public boolean isEmpty(int index) { - return false; + if (isNull(index)) { + return true; + } else { + return sizeBuffer.getInt(index * SIZE_WIDTH) == 0; + } } /** @@ -439,9 +545,14 @@ public int isSet(int index) { return (b >> bitIndex) & 0x01; } + /** + * Get the number of elements that are null in the vector. + * + * @return the number of null elements. + */ @Override public int getNullCount() { - return 0; + return BitVectorHelper.getNullCount(validityBuffer, valueCount); } @Override @@ -466,7 +577,23 @@ private int getValidityBufferValueCapacity() { @Override public void setNull(int index) { + // TODO: test this function + while (index >= getValidityAndSizeValueCapacity()) { + reallocValidityAndSizeAndOffsetBuffers(); + } + if (lastSet >= index) { + lastSet = index - 1; + } + + for (int i = lastSet + 1; i <= index; i++) { + final int prevOffSet = offsetBuffer.getInt((i - 1L) * OFFSET_WIDTH); + final int prevSize = sizeBuffer.getInt((i - 1L) * SIZE_WIDTH); + final int currOffSet = prevOffSet + prevSize; + offsetBuffer.setInt(i * OFFSET_WIDTH, currOffSet); + } + BitVectorHelper.unsetBit(validityBuffer, index); + lastSet = index; } /** @@ -665,12 +792,12 @@ public void setValueCount(int valueCount) { @Override public int getElementStartIndex(int index) { - return 0; + return offsetBuffer.getInt(index * OFFSET_WIDTH); } @Override public int getElementEndIndex(int index) { - return 0; + return sizeBuffer.getInt(index * OFFSET_WIDTH); } @Override diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 6446aef8e01..a4b290460d1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -869,6 +869,7 @@ public void testClearAndReuse() { @Test public void testWriterGetField() { + // adopted from ListVector test cases try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { UnionListViewWriter writer = vector.getWriter(); @@ -892,6 +893,7 @@ public void testWriterGetField() { @Test public void testWriterUsingHolderGetTimestampMilliTZField() { + // adopted from ListVector test cases try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); writer.allocate(); @@ -927,6 +929,7 @@ public void testWriterUsingHolderGetTimestampMilliTZField() { @Test public void testWriterGetDurationField() { + // adopted from ListVector test cases try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); writer.allocate(); @@ -986,7 +989,7 @@ public void testClose() throws Exception { @Test public void testGetBufferSizeFor() { - try (final ListViewVector vector = ListViewVector.empty("list", allocator)) { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { UnionListViewWriter writer = vector.getWriter(); writer.allocate(); @@ -1014,6 +1017,49 @@ public void testGetBufferSizeFor() { } } + @Test + public void testIsEmpty() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + // set values [1,2], null, [], [5,6] + writeIntValues(writer, new int[] {1, 2}); + writer.setPosition(2); + writeIntValues(writer, new int[] {}); + writeIntValues(writer, new int[] {5, 6}); + writer.setValueCount(4); + + assertFalse(vector.isEmpty(0)); + assertTrue(vector.isNull(1)); + assertTrue(vector.isEmpty(1)); + assertFalse(vector.isNull(2)); + assertTrue(vector.isEmpty(2)); + assertFalse(vector.isEmpty(3)); + } + } + + @Test + public void testTotalCapacity() { + // adopted from ListVector test cases + final FieldType type = FieldType.nullable(MinorType.INT.getType()); + try (final ListViewVector vector = new ListViewVector("list", allocator, type, null)) { + // Force the child vector to be allocated based on the type + // (this is a bad API: we have to track and repeat the type twice) + vector.addOrGetVector(type); + + // Specify the allocation size but do not allocate + vector.setInitialTotalCapacity(10, 100); + + // Finally, actually do the allocation + vector.allocateNewSafe(); + + // Note: allocator rounds up and can be greater than the requested allocation. + assertTrue(vector.getValueCapacity() >= 10); + assertTrue(vector.getDataVector().getValueCapacity() >= 100); + } + } + private void writeIntValues(UnionListViewWriter writer, int[] values) { writer.startList(); for (int v: values) { From 86ac42513e2e3e7718fe583f0a52b1afa88167fe Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 7 May 2024 08:24:58 +0530 Subject: [PATCH 17/34] fix: adding null tests, validation tests and overwrite tests --- .../arrow/vector/complex/ListViewVector.java | 133 ++--- .../arrow/vector/TestListViewVector.java | 455 +++++++++++++++++- 2 files changed, 488 insertions(+), 100 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 22fd85f74d6..27b0b4e9141 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -38,7 +38,6 @@ import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.UnionListReader; @@ -577,7 +576,6 @@ private int getValidityBufferValueCapacity() { @Override public void setNull(int index) { - // TODO: test this function while (index >= getValidityAndSizeValueCapacity()) { reallocValidityAndSizeAndOffsetBuffers(); } @@ -585,11 +583,15 @@ public void setNull(int index) { lastSet = index - 1; } - for (int i = lastSet + 1; i <= index; i++) { - final int prevOffSet = offsetBuffer.getInt((i - 1L) * OFFSET_WIDTH); - final int prevSize = sizeBuffer.getInt((i - 1L) * SIZE_WIDTH); - final int currOffSet = prevOffSet + prevSize; - offsetBuffer.setInt(i * OFFSET_WIDTH, currOffSet); + if (index == 0) { + offsetBuffer.setInt(0, 0); + sizeBuffer.setInt(0, 0); + } else { + final int prevOffset = offsetBuffer.getInt((index - 1) * OFFSET_WIDTH); + final int prevSize = sizeBuffer.getInt((index - 1) * SIZE_WIDTH); + final int currOffSet = prevOffset + prevSize; + offsetBuffer.setInt(index * OFFSET_WIDTH, currOffSet); + sizeBuffer.setInt(index * SIZE_WIDTH, 0); } BitVectorHelper.unsetBit(validityBuffer, index); @@ -623,6 +625,10 @@ public int startNewValue(int index) { /* when skipping indices, we need to update the offset buffer */ /* setting offset from lastSet + 1 to index (included) */ for (int i = lastSet + 1; i <= index; i++) { + if (i == 0) { + offsetBuffer.setInt(0, 0); + continue; + } final int prevOffSet = offsetBuffer.getInt((i - 1L) * OFFSET_WIDTH); final int prevSize = sizeBuffer.getInt((i - 1L) * SIZE_WIDTH); final int currOffSet = prevOffSet + prevSize; @@ -640,103 +646,14 @@ public int startNewValue(int index) { return offsetBuffer.getInt(index * OFFSET_WIDTH); } - /** - * Constructing a ListViewVector when the offsets, sizes and field vector are available. - *

- * Steps taken follow the workflow used in creating a ListViewVector with the API - * used in ListVector. - * - * @param offSets new offSets to be set - * @param sizes new sizes to be set - * @param elementFieldVec new elements to be appended to the field vector - */ - public void setNewValues( - List offSets, List sizes, FieldVector elementFieldVec) { - // Null checks - Objects.requireNonNull(offSets, "Offsets cannot be null"); - Objects.requireNonNull(sizes, "Sizes cannot be null"); - Objects.requireNonNull(elementFieldVec, "Element Field Vector cannot be null"); - - while (offSets.size() >= getValidityAndSizeValueCapacity()) { - reallocValidityAndSizeAndOffsetBuffers(); - } - - /* TODO: add the validation method - * 0 <= offsets[i] <= length of the child array - * 0 <= offsets[i] + size[i] <= length of the child array - */ - if (offSets.size() != sizes.size()) { - throw new IllegalArgumentException("Offsets and sizes must be of the same size." + - " Offsets size: " + offSets.size() + ", Sizes size: " + sizes.size()); - } - - UnionListViewWriter writer = this.getWriter(); - int lastSet = this.getLastSet(); - int listViewValueCount = this.getValueCount(); - int newListViewValueCount = listViewValueCount + offSets.size(); - lastSet += offSets.size(); - - // set validity bit - final ArrowBuf validityBuffer = this.getValidityBuffer(); - for (int i = listViewValueCount; i < newListViewValueCount; i++) { - if (sizes.get(i - listViewValueCount) != null) { - // only set validity bit if size is not 0 - BitVectorHelper.setBit(validityBuffer, i); - } - } - - final ArrowBuf offSetBuffer = this.getOffsetBuffer(); - final ArrowBuf sizeBuffer = this.getSizeBuffer(); - - // set offset and size buffers - for (int i = listViewValueCount; i < newListViewValueCount; i++) { - writer.setPosition(i); - offSetBuffer.setInt(i * OFFSET_WIDTH, - offSets.get(i - listViewValueCount)); - // we use null to determine a list with size 0 and no list - if (sizes.get(i - listViewValueCount) != null) { - sizeBuffer.setInt(i * SIZE_WIDTH, - sizes.get(i - listViewValueCount)); - } else { - sizeBuffer.setInt(i * SIZE_WIDTH, - 0); - } - } - - // updating field vector - if (this.getDataVector() instanceof NullVector) { - this.vector = DEFAULT_DATA_VECTOR; - this.vector.allocateNew(); - } - - if (this.getDataVector() instanceof NullVector) { - this.vector = elementFieldVec; - } else { - FieldVector dataVec = this.getDataVector(); - for (int i = 0; i < elementFieldVec.getValueCount(); i++) { - dataVec.copyFromSafe(i, i + dataVec.getValueCount(), elementFieldVec); - } - dataVec.setValueCount(dataVec.getValueCount() + elementFieldVec.getValueCount()); - } - writer.setPosition(newListViewValueCount); - this.setLastSet(lastSet); - this.setValueCount(newListViewValueCount); - } - private int getLengthOfChildVector() { int length = 0; - for (int i = 0; i < lastSet + 1; i++) { + for (int i = 0; i < valueCount; i++) { length += sizeBuffer.getInt(i * SIZE_WIDTH); } return length; } - private void setValuesInBuffer(ArrowBuf srcBuf, ArrowBuf destBuf, long bufWidth, int length) { - for (int i = 0; i < length; i++) { - destBuf.setInt(i * bufWidth, srcBuf.getInt(i * bufWidth)); - } - } - /** * Constructing a ListViewVector when the offsets, sizes and field vector are available. *

@@ -857,6 +774,28 @@ public double getDensity() { return totalListSize / valueCount; } + /** + * Validating ListViewVector creation based on the specification guideline. + * 0 <= offsets[i] <= length of the child array + * 0 <= offsets[i] + size[i] <= length of the child array + */ + @Override + public void validate() { + for (int i = 0; i < valueCount; i++) { + final int offset = offsetBuffer.getInt(i * OFFSET_WIDTH); + final int size = sizeBuffer.getInt(i * SIZE_WIDTH); + final int childArrayLength = getLengthOfChildVector(); + if (offset < 0 || offset > childArrayLength) { + throw new IllegalStateException(String.format( + "Offset %d at index %d is out of bounds", offset, i)); + } + if ((offset + size) < 0 || (offset + size) > childArrayLength) { + throw new IllegalStateException(String.format( + "Size %d at index %d is out of bounds", size, i)); + } + } + } + /** * End the current value. * diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index a4b290460d1..cab2da5b90a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -21,6 +21,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.util.ArrayList; import java.util.Arrays; @@ -60,6 +61,14 @@ public void terminate() throws Exception { allocator.close(); } + private void validateVector(ListViewVector vector) { + try { + vector.validate(); + } catch (Exception e) { + fail("Validation threw an exception: " + e.getMessage()); + } + } + @Test public void testBasicListViewVector() { try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { @@ -140,6 +149,8 @@ public void testBasicListViewVector() { assertEquals(2, ((BigIntVector) dataVec).get(8)); assertEquals(3, ((BigIntVector) dataVec).get(9)); assertEquals(4, ((BigIntVector) dataVec).get(10)); + + validateVector(listViewVector); } } @@ -240,6 +251,8 @@ public void testImplicitNullVectors() { assertEquals(10, lastSet11); listViewVector.setValueCount(11); + + validateVector(listViewVector); } } @@ -358,6 +371,8 @@ public void testNestedListViewVector() { // check size buffer assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + validateVector(listViewVector); } } @@ -389,6 +404,8 @@ public void testNestedListVector() throws Exception { innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); listViewVector.setInitialCapacity(128); + + validateVector(listViewVector); } } @@ -398,7 +415,7 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) } } - /** + /* * Setting up the buffers directly needs to be validated with the base method used in * the ListVector class where we use the approach of startList(), * write to the child vector and endList(). @@ -469,7 +486,7 @@ public void testBasicListViewSet() { assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); assertEquals(3, listViewVector.getLastSet()); - + validateVector(listViewVector); listViewVector.close(); } @@ -607,6 +624,7 @@ public void testBasicListViewSetNested() { assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); listVector.close(); + validateVector(listViewVector); listViewVector.close(); } @@ -711,7 +729,7 @@ public void testBasicListViewSetWithListViewWriter() { assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); assertEquals(4, listViewVector.getLastSet()); - + validateVector(listViewVector); listViewVector.close(); } @@ -774,6 +792,7 @@ public void testGetBufferAddress() throws Exception { /* (3+2)/2 */ assertEquals(2.5, listViewVector.getDensity(), 0); + validateVector(listViewVector); } } @@ -818,6 +837,8 @@ public void testSetInitialCapacity() { vector.allocateNew(); assertEquals(8, vector.getValueCapacity()); assertTrue(vector.getDataVector().getValueCapacity() >= 1); + + validateVector(vector); } } @@ -864,6 +885,8 @@ public void testClearAndReuse() { result = vector.getObject(1); resultSet = (ArrayList) result; assertEquals(Long.valueOf(8), resultSet.get(0)); + + validateVector(vector); } } @@ -888,6 +911,8 @@ public void testWriterGetField() { Arrays.asList(expectedDataField)); assertEquals(expectedField, writer.getField()); + + validateVector(vector); } } @@ -924,6 +949,8 @@ public void testWriterUsingHolderGetTimestampMilliTZField() { Arrays.asList(expectedDataField)); assertEquals(expectedField, writer.getField()); + + validateVector(vector); } } @@ -961,6 +988,8 @@ public void testWriterGetDurationField() { Arrays.asList(expectedDataField)); assertEquals(expectedField, writer.getField()); + + validateVector(vector); } } @@ -984,6 +1013,8 @@ public void testClose() throws Exception { writer.close(); assertEquals(0, vector.getBufferSize()); assertEquals(0, vector.getDataVector().getBufferSize()); + + validateVector(vector); } } @@ -1014,6 +1045,7 @@ public void testGetBufferSizeFor() { dataVector.getBufferSizeFor(indices[valueCount]); assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); } + validateVector(vector); } } @@ -1036,6 +1068,8 @@ public void testIsEmpty() { assertFalse(vector.isNull(2)); assertTrue(vector.isEmpty(2)); assertFalse(vector.isEmpty(3)); + + validateVector(vector); } } @@ -1057,9 +1091,424 @@ public void testTotalCapacity() { // Note: allocator rounds up and can be greater than the requested allocation. assertTrue(vector.getValueCapacity() >= 10); assertTrue(vector.getDataVector().getValueCapacity() >= 100); + + validateVector(vector); } } + @Test + public void testSetNull1() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.endList(); + + vector.setNull(1); + + writer.setPosition(2); + writer.startList(); + writer.bigInt().writeBigInt(30); + writer.bigInt().writeBigInt(40); + writer.endList(); + + vector.setNull(3); + vector.setNull(4); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(50); + writer.bigInt().writeBigInt(60); + writer.endList(); + + vector.setValueCount(6); + + assertFalse(vector.isNull(0)); + assertTrue(vector.isNull(1)); + assertFalse(vector.isNull(2)); + assertTrue(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(1, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(4, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(4, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(4, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + + result = vector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(30), resultSet.get(0)); + assertEquals(Long.valueOf(40), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(50), resultSet.get(0)); + assertEquals(Long.valueOf(60), resultSet.get(1)); + + validateVector(vector); + } + } + + @Test + public void testSetNull2() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + // validate setting nulls first and then writing values + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + vector.setNull(0); + vector.setNull(2); + vector.setNull(4); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(3); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.bigInt().writeBigInt(80); + writer.endList(); + + vector.setValueCount(6); + + assertTrue(vector.isNull(0)); + assertFalse(vector.isNull(1)); + assertTrue(vector.isNull(2)); + assertFalse(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(1); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + assertEquals(Long.valueOf(30), resultSet.get(2)); + + result = vector.getObject(3); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(40), resultSet.get(0)); + assertEquals(Long.valueOf(50), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + assertEquals(Long.valueOf(80), resultSet.get(2)); + + validateVector(vector); + } + } + + @Test + public void testSetNull3() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + // validate setting values first and then writing nulls + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(3); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.bigInt().writeBigInt(80); + writer.endList(); + + vector.setNull(0); + vector.setNull(2); + vector.setNull(4); + + vector.setValueCount(6); + + assertTrue(vector.isNull(0)); + assertFalse(vector.isNull(1)); + assertTrue(vector.isNull(2)); + assertFalse(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(1); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + assertEquals(Long.valueOf(30), resultSet.get(2)); + + result = vector.getObject(3); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(40), resultSet.get(0)); + assertEquals(Long.valueOf(50), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + assertEquals(Long.valueOf(80), resultSet.get(2)); + + validateVector(vector); + } + } + + @Test + public void testOverWrite1() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + vector.setValueCount(2); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(80); + writer.bigInt().writeBigInt(90); + writer.endList(); + + vector.setValueCount(2); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(80), resultSet.get(0)); + assertEquals(Long.valueOf(90), resultSet.get(1)); + + validateVector(vector); + } + } + + @Test + public void testOverwriteWithNull() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + vector.setValueCount(2); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setNull(0); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setNull(1); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + assertTrue(vector.isNull(0)); + assertTrue(vector.isNull(1)); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.endList(); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(80); + writer.bigInt().writeBigInt(90); + writer.endList(); + + assertEquals(2, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setValueCount(2); + + assertFalse(vector.isNull(0)); + assertFalse(vector.isNull(1)); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(80), resultSet.get(0)); + assertEquals(Long.valueOf(90), resultSet.get(1)); + + validateVector(vector); + } + } + + @Test + public void testOutOfOrderOffset1() { + // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] + } + private void writeIntValues(UnionListViewWriter writer, int[] values) { writer.startList(); for (int v: values) { From aa1e4cbbe8916bd66801b2ccae66ad6c7bbf1c97 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 7 May 2024 12:43:33 +0530 Subject: [PATCH 18/34] fix: adding out of order offset test --- .../arrow/vector/TestListViewVector.java | 91 ++++++++++++++++++- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index cab2da5b90a..358db51e5c1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -844,7 +844,7 @@ public void testSetInitialCapacity() { @Test public void testClearAndReuse() { - try (final ListViewVector vector = ListViewVector.empty("list", allocator)) { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { BigIntVector bigIntVector = (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector(); vector.setInitialCapacity(10); @@ -1077,7 +1077,7 @@ public void testIsEmpty() { public void testTotalCapacity() { // adopted from ListVector test cases final FieldType type = FieldType.nullable(MinorType.INT.getType()); - try (final ListViewVector vector = new ListViewVector("list", allocator, type, null)) { + try (final ListViewVector vector = new ListViewVector("listview", allocator, type, null)) { // Force the child vector to be allocated based on the type // (this is a bad API: we have to track and repeat the type twice) vector.addOrGetVector(type); @@ -1507,6 +1507,93 @@ public void testOverwriteWithNull() { @Test public void testOutOfOrderOffset1() { // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] + ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); + ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); + SmallIntVector childVector = new SmallIntVector("child-vector", allocator); + ListViewVector listViewVector = ListViewVector.empty("listview", allocator); + + listViewVector.allocateNew(); + + childVector.allocateNew(7); + + childVector.set(0, 0); + childVector.set(1, -127); + childVector.set(2, 127); + childVector.set(3, 50); + childVector.set(4, 12); + childVector.set(5, -7); + childVector.set(6, 25); + + childVector.setValueCount(7); + + int[] offSetValues = new int[]{4, 7, 0, 0, 3}; + int[] sizeValues = new int[]{3, 0, 4, 0, 2}; + + BitVectorHelper.setBit(validityBuffer, 0); + BitVectorHelper.setBit(validityBuffer, 2); + BitVectorHelper.setBit(validityBuffer, 3); + BitVectorHelper.setBit(validityBuffer, 4); + + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); + setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + + listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, childVector, 5); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(4, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check child vector + assertEquals(0, ((SmallIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-127, ((SmallIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(127, ((SmallIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(50, ((SmallIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(12, ((SmallIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(-7, ((SmallIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(25, ((SmallIntVector) listViewVector.getDataVector()).get(6)); + + // check values + Object result = listViewVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Short.valueOf("12"), resultSet.get(0)); + assertEquals(Short.valueOf("-7"), resultSet.get(1)); + assertEquals(Short.valueOf("25"), resultSet.get(2)); + + assertTrue(listViewVector.isNull(1)); + + result = listViewVector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(4, resultSet.size()); + assertEquals(Short.valueOf("0"), resultSet.get(0)); + assertEquals(Short.valueOf("-127"), resultSet.get(1)); + assertEquals(Short.valueOf("127"), resultSet.get(2)); + assertEquals(Short.valueOf("50"), resultSet.get(3)); + + assertTrue(listViewVector.isEmpty(3)); + + result = listViewVector.getObject(4); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Short.valueOf("50"), resultSet.get(0)); + assertEquals(Short.valueOf("12"), resultSet.get(1)); + + validateVector(listViewVector); + listViewVector.close(); } private void writeIntValues(UnionListViewWriter writer, int[] values) { From 3c96f902f6b2f0d3b4b78ea0b153ce5f6d6b510d Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 7 May 2024 17:28:36 +0530 Subject: [PATCH 19/34] fix: adding docs and creating issues --- .../arrow/vector/complex/ListViewVector.java | 77 +++++++++++++++++-- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 27b0b4e9141..a63187a0193 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -23,6 +23,7 @@ import static org.apache.arrow.util.Preconditions.checkArgument; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Objects; @@ -432,14 +433,16 @@ public OUT accept(VectorVisitor visitor, IN value) { @Override protected FieldReader getReaderImpl() { - // TODO: - throw new UnsupportedOperationException(); + // TODO: https://github.com/apache/arrow/issues/41569 + throw new UnsupportedOperationException( + "ListViewVector does not support getReaderImpl operation yet."); } @Override public UnionListReader getReader() { - // TODO: - throw new UnsupportedOperationException(); + // TODO: https://github.com/apache/arrow/issues/41569 + throw new UnsupportedOperationException( + "ListViewVector does not support getReader operation yet."); } /** @@ -458,6 +461,11 @@ public int getBufferSize() { return offsetBufferSize + sizeBufferSize + validityBufferSize + vector.getBufferSize(); } + /** + * Get the size (number of bytes) of underlying buffers used by this. + * @param valueCount the number of values to assume this vector contains + * @return size of underlying buffers. + */ @Override public int getBufferSizeFor(int valueCount) { if (valueCount == 0) { @@ -468,6 +476,10 @@ public int getBufferSizeFor(int valueCount) { return super.getBufferSizeFor(valueCount) + validityBufferSize; } + /** + * Get the field associated with the list view vector. + * @return the field + */ @Override public Field getField() { if (field.getChildren().contains(getDataVector().getField())) { @@ -477,11 +489,18 @@ public Field getField() { return field; } + /** + * Get the minor type for the vector. + * @return the minor type + */ @Override public MinorType getMinorType() { return MinorType.LIST; } + /** + * Clear the vector data. + */ @Override public void clear() { super.clear(); @@ -489,6 +508,9 @@ public void clear() { lastSet = -1; } + /** + * Release the buffers associated with this vector. + */ @Override public void reset() { super.reset(); @@ -496,11 +518,45 @@ public void reset() { lastSet = -1; } + /** + * Return the underlying buffers associated with this vector. Note that this doesn't + * impact the reference counts for this buffer, so it only should be used for in-context + * access. Also note that this buffer changes regularly, thus + * external classes shouldn't hold a reference to it (unless they change it). + * + * @param clear Whether to clear vector before returning, the buffers will still be refcounted + * but the returned array will be the only reference to them + * @return The underlying {@link ArrowBuf buffers} that is used by this + * vector instance. + */ @Override public ArrowBuf[] getBuffers(boolean clear) { - return new ArrowBuf[0]; + setReaderAndWriterIndex(); + final ArrowBuf[] buffers; + if (getBufferSize() == 0) { + buffers = new ArrowBuf[0]; + } else { + List list = new ArrayList<>(); + list.add(offsetBuffer); + list.add(validityBuffer); + list.add(sizeBuffer); + list.addAll(Arrays.asList(vector.getBuffers(false))); + buffers = list.toArray(new ArrowBuf[list.size()]); + } + if (clear) { + for (ArrowBuf buffer : buffers) { + buffer.getReferenceManager().retain(); + } + clear(); + } + return buffers; } + /** + * Get the element in the list view vector at a particular index. + * @param index position of the element + * @return Object at given position + */ @Override public List getObject(int index) { if (isSet(index) == 0) { @@ -517,11 +573,22 @@ public List getObject(int index) { return vals; } + /** + * Check if an element at given index is null. + * + * @param index position of an element + * @return true if an element at given index is null, false otherwise + */ @Override public boolean isNull(int index) { return (isSet(index) == 0); } + /** + * Check if an element at given index is an empty list. + * @param index position of an element + * @return true if an element at given index is an empty list or NULL, false otherwise + */ @Override public boolean isEmpty(int index) { if (isNull(index)) { From ab15958007814a6e0c9a2d88fb1b948000df4b97 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 8 May 2024 06:26:42 +0530 Subject: [PATCH 20/34] fix: improving documentation and test cases: self review 1 --- .../complex/BaseRepeatedValueViewVector.java | 47 +- .../arrow/vector/complex/ListViewVector.java | 61 +- .../arrow/vector/TestListViewVector.java | 670 +++++++++--------- 3 files changed, 416 insertions(+), 362 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index 8aff3ccb16c..dbe75cb4518 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -19,6 +19,7 @@ import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; +import java.util.Collections; import java.util.Iterator; import org.apache.arrow.memory.ArrowBuf; @@ -87,7 +88,6 @@ public boolean allocateNewSafe() { allocateBuffers(); dataAlloc = vector.allocateNewSafe(); } catch (Exception e) { - e.printStackTrace(); clear(); return false; } finally { @@ -103,7 +103,7 @@ private void allocateBuffers() { sizeBuffer = allocateSizeBuffer(sizeAllocationSizeInBytes); } - protected ArrowBuf allocateOffsetBuffer(final long size) { + private ArrowBuf allocateOffsetBuffer(final long size) { final int curSize = (int) size; ArrowBuf offsetBuffer = allocator.buffer(curSize); offsetBuffer.readerIndex(0); @@ -112,7 +112,7 @@ protected ArrowBuf allocateOffsetBuffer(final long size) { return offsetBuffer; } - protected ArrowBuf allocateSizeBuffer(final long size) { + private ArrowBuf allocateSizeBuffer(final long size) { final int curSize = (int) size; ArrowBuf sizeBuffer = allocator.buffer(curSize); sizeBuffer.readerIndex(0); @@ -123,12 +123,16 @@ protected ArrowBuf allocateSizeBuffer(final long size) { @Override public void reAlloc() { + reallocateBuffers(); + vector.reAlloc(); + } + + protected void reallocateBuffers() { reallocOffsetBuffer(); reallocSizeBuffer(); - vector.reAlloc(); } - protected void reallocOffsetBuffer() { + private void reallocOffsetBuffer() { final long currentBufferCapacity = offsetBuffer.capacity(); long newAllocationSize = currentBufferCapacity * 2; if (newAllocationSize == 0) { @@ -155,7 +159,7 @@ protected void reallocOffsetBuffer() { offsetAllocationSizeInBytes = newAllocationSize; } - protected void reallocSizeBuffer() { + private void reallocSizeBuffer() { final long currentBufferCapacity = sizeBuffer.capacity(); long newAllocationSize = currentBufferCapacity * 2; if (newAllocationSize == 0) { @@ -277,7 +281,7 @@ public int getBufferSizeFor(int valueCount) { @Override public Iterator iterator() { - return null; + return Collections.singleton(getDataVector()).iterator(); } @Override @@ -316,10 +320,7 @@ public boolean isNull(int index) { public void setValueCount(int valueCount) { this.valueCount = valueCount; while (valueCount > getOffsetBufferValueCapacity()) { - reallocOffsetBuffer(); - } - while (valueCount > getSizeBufferValueCapacity()) { - reallocSizeBuffer(); + reallocateBuffers(); } final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector(); vector.setValueCount(childValueCount); @@ -368,8 +369,30 @@ public boolean isEmpty(int index) { return false; } + /** + * Start a new value at the given index. + * @param index the index to start the new value at + * @return the offset in the data vector where the new value starts + */ public int startNewValue(int index) { - return 0; + while (index >= getOffsetBufferValueCapacity()) { + reallocOffsetBuffer(); + } + while (index >= getSizeBufferValueCapacity()) { + reallocSizeBuffer(); + } + + if (index == 0) { + offsetBuffer.setInt(0, 0); + } else { + final int prevOffset = offsetBuffer.getInt((index - 1) * OFFSET_WIDTH); + final int prevSize = sizeBuffer.getInt((index - 1) * SIZE_WIDTH); + final int currOffSet = prevOffset + prevSize; + offsetBuffer.setInt(index * OFFSET_WIDTH, currOffSet); + } + + setValueCount(index + 1); + return offsetBuffer.getInt(index * OFFSET_WIDTH); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index a63187a0193..30662a63a27 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -54,6 +54,17 @@ import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.arrow.vector.util.TransferPair; +/** + * A list view vector contains lists of a specific type of elements. + * Its structure contains four elements. + *

    + *
  1. A validity buffer.
  2. + *
  3. An offset buffer, that denotes lists starts.
  4. + *
  5. A size buffer, that denotes lists ends.
  6. + *
  7. A child data vector that contains the elements of lists.
  8. + *
+ * The latter three are managed by its superclass. + */ public class ListViewVector extends BaseRepeatedValueViewVector implements PromotableVector { protected ArrowBuf validityBuffer; @@ -99,8 +110,6 @@ public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) this.lastSet = -1; } - - @Override public void initializeChildrenFromFields(List children) { checkArgument(children.size() == 1, @@ -177,6 +186,11 @@ public List getChildrenFromFields() { return singletonList(getDataVector()); } + /** + * Load the buffers associated with this Field. + * @param fieldNode the fieldNode + * @param ownBuffers the buffers for this Field (own buffers only, children not included) + */ @Override public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { if (ownBuffers.size() != 3) { @@ -193,7 +207,7 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers offsetBuffer.getReferenceManager().release(); offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); sizeBuffer.getReferenceManager().release(); - sizeBuffer = offBuffer.getReferenceManager().retain(szBuffer, allocator); + sizeBuffer = szBuffer.getReferenceManager().retain(szBuffer, allocator); validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); offsetAllocationSizeInBytes = offsetBuffer.capacity(); @@ -284,9 +298,8 @@ public void reAlloc() { } protected void reallocValidityAndSizeAndOffsetBuffers() { - reallocOffsetBuffer(); + reallocateBuffers(); reallocValidityBuffer(); - reallocSizeBuffer(); } private void reallocValidityBuffer() { @@ -407,11 +420,22 @@ public long getSizeBufferAddress() { return sizeBuffer.memoryAddress(); } + /** + * Get the hash code for the element at the given index. + * @param index position of the element + * @return hash code for the element at the given index + */ @Override public int hashCode(int index) { return hashCode(index, null); } + /** + * Get the hash code for the element at the given index. + * @param index position of the element + * @param hasher hasher to use + * @return hash code for the element at the given index + */ @Override public int hashCode(int index, ArrowBufHasher hasher) { if (isSet(index) == 0) { @@ -621,6 +645,13 @@ public int getNullCount() { return BitVectorHelper.getNullCount(validityBuffer, valueCount); } + /** + * Get the value capacity by considering validity and offset capacity. + * Note that the size buffer capacity is not considered here since it has + * the same capacity as the offset buffer. + * + * @return the value capacity + */ @Override public int getValueCapacity() { return getValidityAndOffsetValueCapacity(); @@ -641,6 +672,10 @@ private int getValidityBufferValueCapacity() { return capAtMaxInt(validityBuffer.capacity() * 8); } + /** + * Set the element at the given index to null. + * @param index the value to change + */ @Override public void setNull(int index) { while (index >= getValidityAndSizeValueCapacity()) { @@ -741,13 +776,17 @@ public void set(ArrowBuf offSetBuffer, ArrowBuf sizeBuffer, ArrowBuf validityBuf Objects.requireNonNull(validityBuffer, "Validity buffer cannot be null"); Objects.requireNonNull(elementFieldVec, "Element Field Vector cannot be null"); - // clear buffers - this.clear(); - // set buffers - this.offsetBuffer = offSetBuffer; - this.sizeBuffer = sizeBuffer; - this.validityBuffer = validityBuffer; + this.validityBuffer.getReferenceManager().release(); + this.validityBuffer = validityBuffer.getReferenceManager().retain(validityBuffer, allocator); + this.offsetBuffer.getReferenceManager().release(); + this.offsetBuffer = offSetBuffer.getReferenceManager().retain(offSetBuffer, allocator); + this.sizeBuffer.getReferenceManager().release(); + this.sizeBuffer = sizeBuffer.getReferenceManager().retain(sizeBuffer, allocator); + + validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); + offsetAllocationSizeInBytes = offSetBuffer.capacity(); + sizeAllocationSizeInBytes = sizeBuffer.capacity(); // set child vector this.vector = elementFieldVec; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 358db51e5c1..664e4c15035 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -429,65 +429,64 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) @Test public void testBasicListViewSet() { - ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); - ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); - BigIntVector elementVector = new BigIntVector("element-vector", allocator); - ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); - - listViewVector.allocateNew(); - - elementVector.allocateNew(7); - - elementVector.set(0, 12); - elementVector.set(1, -7); - elementVector.set(2, 25); - elementVector.set(3, 0); - elementVector.set(4, -127); - elementVector.set(5, 127); - elementVector.set(6, 50); - - elementVector.setValueCount(7); - - int[] offSetValues = new int[]{0, 3, 3, 7}; - int[] sizeValues = new int[]{3, 0, 4, 0}; - - BitVectorHelper.setBit(validityBuffer, 0); - BitVectorHelper.setBit(validityBuffer, 2); - BitVectorHelper.setBit(validityBuffer, 3); - - setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); - setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); - - listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, elementVector, 4); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - - assertEquals(3, listViewVector.getLastSet()); - validateVector(listViewVector); - listViewVector.close(); + try (ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); + ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); + BigIntVector childVector = new BigIntVector("element-vector", allocator); + ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + listViewVector.allocateNew(); + + childVector.allocateNew(7); + + childVector.set(0, 12); + childVector.set(1, -7); + childVector.set(2, 25); + childVector.set(3, 0); + childVector.set(4, -127); + childVector.set(5, 127); + childVector.set(6, 50); + + childVector.setValueCount(7); + + int[] offSetValues = new int[]{0, 3, 3, 7}; + int[] sizeValues = new int[]{3, 0, 4, 0}; + + BitVectorHelper.setBit(validityBuffer, 0); + BitVectorHelper.setBit(validityBuffer, 2); + BitVectorHelper.setBit(validityBuffer, 3); + + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); + setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + + listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, childVector, 4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + assertEquals(3, listViewVector.getLastSet()); + validateVector(listViewVector); + } } @Test @@ -497,240 +496,234 @@ public void testBasicListViewSetNested() { // Setting child vector // [[50,100,200],[75,125,150,175],[10],[15,20],[25,30,35]] - ListVector listVector = ListVector.empty("nestedVector", allocator); - - UnionListWriter listWriter = listVector.getWriter(); - listWriter.allocate(); + try (ListVector childVector = ListVector.empty("nestedVector", allocator); + ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); + ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); + ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListWriter listWriter = childVector.getWriter(); + listWriter.allocate(); - listWriter.setPosition(0); - listWriter.startList(); + listWriter.setPosition(0); + listWriter.startList(); - listWriter.bigInt().writeBigInt(50); - listWriter.bigInt().writeBigInt(100); - listWriter.bigInt().writeBigInt(200); + listWriter.bigInt().writeBigInt(50); + listWriter.bigInt().writeBigInt(100); + listWriter.bigInt().writeBigInt(200); - listWriter.endList(); + listWriter.endList(); - listWriter.setPosition(1); - listWriter.startList(); + listWriter.setPosition(1); + listWriter.startList(); - listWriter.bigInt().writeBigInt(75); - listWriter.bigInt().writeBigInt(125); - listWriter.bigInt().writeBigInt(150); - listWriter.bigInt().writeBigInt(175); + listWriter.bigInt().writeBigInt(75); + listWriter.bigInt().writeBigInt(125); + listWriter.bigInt().writeBigInt(150); + listWriter.bigInt().writeBigInt(175); - listWriter.endList(); + listWriter.endList(); - listWriter.setPosition(2); - listWriter.startList(); + listWriter.setPosition(2); + listWriter.startList(); - listWriter.bigInt().writeBigInt(10); + listWriter.bigInt().writeBigInt(10); - listWriter.endList(); + listWriter.endList(); - listWriter.startList(); - listWriter.setPosition(3); + listWriter.startList(); + listWriter.setPosition(3); - listWriter.bigInt().writeBigInt(15); - listWriter.bigInt().writeBigInt(20); + listWriter.bigInt().writeBigInt(15); + listWriter.bigInt().writeBigInt(20); - listWriter.endList(); + listWriter.endList(); - listWriter.startList(); - listWriter.setPosition(4); + listWriter.startList(); + listWriter.setPosition(4); - listWriter.bigInt().writeBigInt(25); - listWriter.bigInt().writeBigInt(30); - listWriter.bigInt().writeBigInt(35); + listWriter.bigInt().writeBigInt(25); + listWriter.bigInt().writeBigInt(30); + listWriter.bigInt().writeBigInt(35); - listWriter.endList(); + listWriter.endList(); - listVector.setValueCount(2); + childVector.setValueCount(2); - ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); - listViewVector.allocateNew(); - ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); - ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); + listViewVector.allocateNew(); - int[] offSetValues = new int[]{0, 2}; - int[] sizeValues = new int[]{2, 3}; + int[] offSetValues = new int[]{0, 2}; + int[] sizeValues = new int[]{2, 3}; - BitVectorHelper.setBit(validityBuffer, 0); - BitVectorHelper.setBit(validityBuffer, 1); + BitVectorHelper.setBit(validityBuffer, 0); + BitVectorHelper.setBit(validityBuffer, 1); - setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); - setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); + setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); - listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, listVector, 2); + listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, childVector, 2); - assertEquals(2, listViewVector.getValueCount()); + assertEquals(2, listViewVector.getValueCount()); - /* get listViewVector value at index 0 -- the value itself is a listViewVector */ - Object result = listViewVector.getObject(0); - ArrayList> resultSet = (ArrayList>) result; - ArrayList list; + /* get listViewVector value at index 0 -- the value itself is a listViewVector */ + Object result = listViewVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; - assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ - assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ - assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ - list = resultSet.get(0); - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); + list = resultSet.get(0); + assertEquals(Long.valueOf(50), list.get(0)); + assertEquals(Long.valueOf(100), list.get(1)); + assertEquals(Long.valueOf(200), list.get(2)); - list = resultSet.get(1); - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - assertEquals(Long.valueOf(150), list.get(2)); - assertEquals(Long.valueOf(175), list.get(3)); + list = resultSet.get(1); + assertEquals(Long.valueOf(75), list.get(0)); + assertEquals(Long.valueOf(125), list.get(1)); + assertEquals(Long.valueOf(150), list.get(2)); + assertEquals(Long.valueOf(175), list.get(3)); - /* get listViewVector value at index 1 -- the value itself is a listViewVector */ - result = listViewVector.getObject(1); - resultSet = (ArrayList>) result; + /* get listViewVector value at index 1 -- the value itself is a listViewVector */ + result = listViewVector.getObject(1); + resultSet = (ArrayList>) result; - assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ - assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ - assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ - assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ + assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ + assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ - list = resultSet.get(0); - assertEquals(Long.valueOf(10), list.get(0)); + list = resultSet.get(0); + assertEquals(Long.valueOf(10), list.get(0)); - list = resultSet.get(1); - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); + list = resultSet.get(1); + assertEquals(Long.valueOf(15), list.get(0)); + assertEquals(Long.valueOf(20), list.get(1)); - list = resultSet.get(2); - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); + list = resultSet.get(2); + assertEquals(Long.valueOf(25), list.get(0)); + assertEquals(Long.valueOf(30), list.get(1)); + assertEquals(Long.valueOf(35), list.get(2)); - /* check underlying bitVector */ - assertFalse(listViewVector.isNull(0)); - assertFalse(listViewVector.isNull(1)); + /* check underlying bitVector */ + assertFalse(listViewVector.isNull(0)); + assertFalse(listViewVector.isNull(1)); - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - // check size buffer - assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + // check size buffer + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - listVector.close(); - validateVector(listViewVector); - listViewVector.close(); + validateVector(listViewVector); + } } @Test public void testBasicListViewSetWithListViewWriter() { - ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); - ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); - ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); - BigIntVector elementVector = new BigIntVector("element-vector", allocator); - - listViewVector.allocateNew(); - elementVector.allocateNew(7); - - elementVector.set(0, 12); - elementVector.set(1, -7); - elementVector.set(2, 25); - elementVector.set(3, 0); - elementVector.set(4, -127); - elementVector.set(5, 127); - elementVector.set(6, 50); - - elementVector.setValueCount(7); - - int[] offSetValues = new int[]{0, 3, 3, 7}; - int[] sizeValues = new int[]{3, 0, 4, 0}; - - BitVectorHelper.setBit(validityBuffer, 0); - BitVectorHelper.setBit(validityBuffer, 2); - BitVectorHelper.setBit(validityBuffer, 3); - - setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); - setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); - - listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, elementVector, 4); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - - assertEquals(3, listViewVector.getLastSet()); - - UnionListViewWriter listViewWriter = listViewVector.getWriter(); - - listViewWriter.setPosition(4); - listViewWriter.startList(); - - listViewWriter.bigInt().writeBigInt(121); - listViewWriter.bigInt().writeBigInt(-71); - listViewWriter.bigInt().writeBigInt(251); - listViewWriter.endList(); - - listViewVector.setValueCount(5); - - // final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - // final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - assertEquals(121, ((BigIntVector) listViewVector.getDataVector()).get(7)); - assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); - assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); - - assertEquals(4, listViewVector.getLastSet()); - validateVector(listViewVector); - listViewVector.close(); + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); + ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); + ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); + BigIntVector childVector = new BigIntVector("element-vector", allocator)) { + listViewVector.allocateNew(); + childVector.allocateNew(7); + + childVector.set(0, 12); + childVector.set(1, -7); + childVector.set(2, 25); + childVector.set(3, 0); + childVector.set(4, -127); + childVector.set(5, 127); + childVector.set(6, 50); + + childVector.setValueCount(7); + + int[] offSetValues = new int[]{0, 3, 3, 7}; + int[] sizeValues = new int[]{3, 0, 4, 0}; + + BitVectorHelper.setBit(validityBuffer, 0); + BitVectorHelper.setBit(validityBuffer, 2); + BitVectorHelper.setBit(validityBuffer, 3); + + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); + setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + + listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, childVector, 4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + assertEquals(3, listViewVector.getLastSet()); + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + listViewWriter.setPosition(4); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(121); + listViewWriter.bigInt().writeBigInt(-71); + listViewWriter.bigInt().writeBigInt(251); + listViewWriter.endList(); + + listViewVector.setValueCount(5); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + assertEquals(121, ((BigIntVector) listViewVector.getDataVector()).get(7)); + assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); + assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); + + assertEquals(4, listViewVector.getLastSet()); + validateVector(listViewVector); + } } @Test @@ -1507,93 +1500,92 @@ public void testOverwriteWithNull() { @Test public void testOutOfOrderOffset1() { // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] - ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); - ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); - SmallIntVector childVector = new SmallIntVector("child-vector", allocator); - ListViewVector listViewVector = ListViewVector.empty("listview", allocator); - - listViewVector.allocateNew(); - - childVector.allocateNew(7); - - childVector.set(0, 0); - childVector.set(1, -127); - childVector.set(2, 127); - childVector.set(3, 50); - childVector.set(4, 12); - childVector.set(5, -7); - childVector.set(6, 25); - - childVector.setValueCount(7); - - int[] offSetValues = new int[]{4, 7, 0, 0, 3}; - int[] sizeValues = new int[]{3, 0, 4, 0, 2}; - - BitVectorHelper.setBit(validityBuffer, 0); - BitVectorHelper.setBit(validityBuffer, 2); - BitVectorHelper.setBit(validityBuffer, 3); - BitVectorHelper.setBit(validityBuffer, 4); - - setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); - setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); - - listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, childVector, 5); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(4, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check child vector - assertEquals(0, ((SmallIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-127, ((SmallIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(127, ((SmallIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(50, ((SmallIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(12, ((SmallIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(-7, ((SmallIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(25, ((SmallIntVector) listViewVector.getDataVector()).get(6)); - - // check values - Object result = listViewVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Short.valueOf("12"), resultSet.get(0)); - assertEquals(Short.valueOf("-7"), resultSet.get(1)); - assertEquals(Short.valueOf("25"), resultSet.get(2)); - - assertTrue(listViewVector.isNull(1)); - - result = listViewVector.getObject(2); - resultSet = (ArrayList) result; - assertEquals(4, resultSet.size()); - assertEquals(Short.valueOf("0"), resultSet.get(0)); - assertEquals(Short.valueOf("-127"), resultSet.get(1)); - assertEquals(Short.valueOf("127"), resultSet.get(2)); - assertEquals(Short.valueOf("50"), resultSet.get(3)); - - assertTrue(listViewVector.isEmpty(3)); - - result = listViewVector.getObject(4); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Short.valueOf("50"), resultSet.get(0)); - assertEquals(Short.valueOf("12"), resultSet.get(1)); - - validateVector(listViewVector); - listViewVector.close(); + try (ArrowBuf newOffSetBuf = allocator.buffer(1024); + ArrowBuf newSizeBuffer = allocator.buffer(1024); + ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); + SmallIntVector childVector = new SmallIntVector("child-vector", allocator); + ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + + childVector.allocateNew(7); + + childVector.set(0, 0); + childVector.set(1, -127); + childVector.set(2, 127); + childVector.set(3, 50); + childVector.set(4, 12); + childVector.set(5, -7); + childVector.set(6, 25); + + childVector.setValueCount(7); + + int[] offSetValues = new int[]{4, 7, 0, 0, 3}; + int[] sizeValues = new int[]{3, 0, 4, 0, 2}; + + BitVectorHelper.setBit(validityBuffer, 0); + BitVectorHelper.setBit(validityBuffer, 2); + BitVectorHelper.setBit(validityBuffer, 3); + BitVectorHelper.setBit(validityBuffer, 4); + + setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); + setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + + listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, childVector, 5); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(4, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check child vector + assertEquals(0, ((SmallIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-127, ((SmallIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(127, ((SmallIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(50, ((SmallIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(12, ((SmallIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(-7, ((SmallIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(25, ((SmallIntVector) listViewVector.getDataVector()).get(6)); + + // check values + Object result = listViewVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Short.valueOf("12"), resultSet.get(0)); + assertEquals(Short.valueOf("-7"), resultSet.get(1)); + assertEquals(Short.valueOf("25"), resultSet.get(2)); + + assertTrue(listViewVector.isNull(1)); + + result = listViewVector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(4, resultSet.size()); + assertEquals(Short.valueOf("0"), resultSet.get(0)); + assertEquals(Short.valueOf("-127"), resultSet.get(1)); + assertEquals(Short.valueOf("127"), resultSet.get(2)); + assertEquals(Short.valueOf("50"), resultSet.get(3)); + + assertTrue(listViewVector.isEmpty(3)); + + result = listViewVector.getObject(4); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Short.valueOf("50"), resultSet.get(0)); + assertEquals(Short.valueOf("12"), resultSet.get(1)); + + validateVector(listViewVector); + } } private void writeIntValues(UnionListViewWriter writer, int[] values) { From 965c365bfee4b7dda1e200530d12e6b5f3565cf6 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 8 May 2024 12:58:55 +0530 Subject: [PATCH 21/34] fix: c data interface update --- .../java/org/apache/arrow/c/BufferImportTypeVisitor.java | 6 ++++++ java/vector/src/main/codegen/data/ArrowTypes.tdd | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java index bc6139cc84c..99873dadad2 100644 --- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java +++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java @@ -53,6 +53,7 @@ import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.util.DataSizeRoundingUtil; /** @@ -328,4 +329,9 @@ public List visit(ArrowType.Interval type) { public List visit(ArrowType.Duration type) { return Arrays.asList(maybeImportBitmap(type), importFixedBytes(type, 1, DurationVector.TYPE_WIDTH)); } + + @Override + public List visit(ListView type) { + throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported"); + } } diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 99578bd5031..72df4779793 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -131,9 +131,9 @@ complex: false }, { - name: "ListView", - fields: [], - complex: true - }, + name: "ListView", + fields: [], + complex: true + } ] } From f9fb7400e3b52e97b57bbfb6508bd6146e777aee Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 8 May 2024 13:28:29 +0530 Subject: [PATCH 22/34] fix: addressing reviews 1: removing validate helper --- .../arrow/vector/TestListViewVector.java | 55 ++++++++----------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 664e4c15035..3bbf7a406e4 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -21,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; import java.util.ArrayList; import java.util.Arrays; @@ -61,14 +60,6 @@ public void terminate() throws Exception { allocator.close(); } - private void validateVector(ListViewVector vector) { - try { - vector.validate(); - } catch (Exception e) { - fail("Validation threw an exception: " + e.getMessage()); - } - } - @Test public void testBasicListViewVector() { try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { @@ -150,7 +141,7 @@ public void testBasicListViewVector() { assertEquals(3, ((BigIntVector) dataVec).get(9)); assertEquals(4, ((BigIntVector) dataVec).get(10)); - validateVector(listViewVector); + listViewVector.validate(); } } @@ -252,7 +243,7 @@ public void testImplicitNullVectors() { listViewVector.setValueCount(11); - validateVector(listViewVector); + listViewVector.validate(); } } @@ -372,7 +363,7 @@ public void testNestedListViewVector() { assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - validateVector(listViewVector); + listViewVector.validate(); } } @@ -405,7 +396,7 @@ public void testNestedListVector() throws Exception { listViewVector.setInitialCapacity(128); - validateVector(listViewVector); + listViewVector.validate(); } } @@ -485,7 +476,7 @@ public void testBasicListViewSet() { assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); assertEquals(3, listViewVector.getLastSet()); - validateVector(listViewVector); + listViewVector.validate(); } } @@ -621,7 +612,7 @@ public void testBasicListViewSetNested() { assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - validateVector(listViewVector); + listViewVector.validate(); } } @@ -722,7 +713,7 @@ public void testBasicListViewSetWithListViewWriter() { assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); assertEquals(4, listViewVector.getLastSet()); - validateVector(listViewVector); + listViewVector.validate(); } } @@ -785,7 +776,7 @@ public void testGetBufferAddress() throws Exception { /* (3+2)/2 */ assertEquals(2.5, listViewVector.getDensity(), 0); - validateVector(listViewVector); + listViewVector.validate(); } } @@ -831,7 +822,7 @@ public void testSetInitialCapacity() { assertEquals(8, vector.getValueCapacity()); assertTrue(vector.getDataVector().getValueCapacity() >= 1); - validateVector(vector); + vector.validate(); } } @@ -879,7 +870,7 @@ public void testClearAndReuse() { resultSet = (ArrayList) result; assertEquals(Long.valueOf(8), resultSet.get(0)); - validateVector(vector); + vector.validate(); } } @@ -905,7 +896,7 @@ public void testWriterGetField() { assertEquals(expectedField, writer.getField()); - validateVector(vector); + vector.validate(); } } @@ -943,7 +934,7 @@ public void testWriterUsingHolderGetTimestampMilliTZField() { assertEquals(expectedField, writer.getField()); - validateVector(vector); + vector.validate(); } } @@ -982,7 +973,7 @@ public void testWriterGetDurationField() { assertEquals(expectedField, writer.getField()); - validateVector(vector); + vector.validate(); } } @@ -1007,7 +998,7 @@ public void testClose() throws Exception { assertEquals(0, vector.getBufferSize()); assertEquals(0, vector.getDataVector().getBufferSize()); - validateVector(vector); + vector.validate(); } } @@ -1038,7 +1029,7 @@ public void testGetBufferSizeFor() { dataVector.getBufferSizeFor(indices[valueCount]); assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); } - validateVector(vector); + vector.validate(); } } @@ -1062,7 +1053,7 @@ public void testIsEmpty() { assertTrue(vector.isEmpty(2)); assertFalse(vector.isEmpty(3)); - validateVector(vector); + vector.validate(); } } @@ -1085,7 +1076,7 @@ public void testTotalCapacity() { assertTrue(vector.getValueCapacity() >= 10); assertTrue(vector.getDataVector().getValueCapacity() >= 100); - validateVector(vector); + vector.validate(); } } @@ -1174,7 +1165,7 @@ public void testSetNull1() { assertEquals(Long.valueOf(50), resultSet.get(0)); assertEquals(Long.valueOf(60), resultSet.get(1)); - validateVector(vector); + vector.validate(); } } @@ -1267,7 +1258,7 @@ public void testSetNull2() { assertEquals(Long.valueOf(70), resultSet.get(1)); assertEquals(Long.valueOf(80), resultSet.get(2)); - validateVector(vector); + vector.validate(); } } @@ -1360,7 +1351,7 @@ public void testSetNull3() { assertEquals(Long.valueOf(70), resultSet.get(1)); assertEquals(Long.valueOf(80), resultSet.get(2)); - validateVector(vector); + vector.validate(); } } @@ -1411,7 +1402,7 @@ public void testOverWrite1() { assertEquals(Long.valueOf(80), resultSet.get(0)); assertEquals(Long.valueOf(90), resultSet.get(1)); - validateVector(vector); + vector.validate(); } } @@ -1493,7 +1484,7 @@ public void testOverwriteWithNull() { assertEquals(Long.valueOf(80), resultSet.get(0)); assertEquals(Long.valueOf(90), resultSet.get(1)); - validateVector(vector); + vector.validate(); } } @@ -1584,7 +1575,7 @@ public void testOutOfOrderOffset1() { assertEquals(Short.valueOf("50"), resultSet.get(0)); assertEquals(Short.valueOf("12"), resultSet.get(1)); - validateVector(listViewVector); + listViewVector.validate(); } } From 743c4bf8e7bfddaa4e3add770e3979d71e459340 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 8 May 2024 13:29:55 +0530 Subject: [PATCH 23/34] fix: addressing reviews 2: update comment --- .../test/java/org/apache/arrow/vector/TestListViewVector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 3bbf7a406e4..d9b98f5f7e3 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -77,7 +77,7 @@ public void testBasicListViewVector() { listViewWriter.bigInt().writeBigInt(25); listViewWriter.endList(); - /* the second list at index 2 is null (we are not setting any)*/ + /* the second list at index 1 is null (we are not setting any)*/ /* write the third list at index 2 */ listViewWriter.setPosition(2); From da9de76df39352b18ac7b554e119e7f672de4262 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 8 May 2024 14:27:18 +0530 Subject: [PATCH 24/34] fix: addressing reviews 3: adding an alternative set buffer model --- .../arrow/vector/complex/ListViewVector.java | 31 +++++++ .../arrow/vector/TestListViewVector.java | 82 +++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 30662a63a27..087f146c56a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -796,6 +796,37 @@ public void set(ArrowBuf offSetBuffer, ArrowBuf sizeBuffer, ArrowBuf validityBuf this.getWriter().setPosition(valueCount); } + /** + * Set the offset at the given index. + * @param index index of the value to set + * @param value value to set + */ + public void setOffSet(int index, int value) { + offsetBuffer.setInt(index * OFFSET_WIDTH, value); + } + + /** + * Set the size at the given index. + * @param index index of the value to set + * @param value value to set + */ + public void setSize(int index, int value) { + sizeBuffer.setInt(index * SIZE_WIDTH, value); + } + + /** + * Set the validity at the given index. + * @param index index of the value to set + * @param value value to set (0 for unset and 1 for a set) + */ + public void setValidity(int index, int value) { + if (value == 0) { + BitVectorHelper.unsetBit(validityBuffer, index); + } else { + BitVectorHelper.setBit(validityBuffer, index); + } + } + @Override public void setValueCount(int valueCount) { this.valueCount = valueCount; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index d9b98f5f7e3..9b27b3dbbad 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.apache.arrow.memory.ArrowBuf; @@ -480,6 +481,87 @@ public void testBasicListViewSet() { } } + /* + * Setting up buffers directly would require the following steps to be taken + * 1. Set offset and size buffers using `setOffSet` and `setSize` methods. + * 2. Set validity buffer using `setValidity` method. + * 3. Set lastSet value using `setLastSet` method. + * 4. Initialize the child vector using `initializeChildrenFromFields` method. + * 5. Set values in the child vector. + */ + @Test + public void testBasicListViewSet1() { + + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + listViewVector.allocateNew(); + + listViewVector.setOffSet(0, 0); + listViewVector.setOffSet(1, 3); + listViewVector.setOffSet(2, 3); + listViewVector.setOffSet(3, 7); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + + listViewVector.setLastSet(3); + + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field field = new Field("element-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; + childVector.allocateNew(7); + + childVector.set(0, 12); + childVector.set(1, -7); + childVector.set(2, 25); + childVector.set(3, 0); + childVector.set(4, -127); + childVector.set(5, 127); + childVector.set(6, 50); + + childVector.setValueCount(7); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + assertEquals(3, listViewVector.getLastSet()); + listViewVector.validate(); + } + } + @Test public void testBasicListViewSetNested() { // Expected listview From c7e30eed893f7ef136e4590a617b4dd7d16e88cb Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Thu, 9 May 2024 09:06:07 +0530 Subject: [PATCH 25/34] fix: addressing reviews 4: updated set methods --- .../arrow/vector/TestListViewVector.java | 254 +++++++++--------- 1 file changed, 132 insertions(+), 122 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 9b27b3dbbad..640883ebbef 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -42,7 +42,6 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -418,16 +417,34 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) * 2. Default list insertion followed by buffer-based inserts. * 3. Buffer-based inserts followed by default list insertion. */ + + /* Setting up buffers directly would require the following steps to be taken + * 0. Allocate buffers in listViewVector by calling `allocateNew` method. + * 1. Initialize the child vector using `initializeChildrenFromFields` method. + * 2. Set values in the child vector. + * 3. Set validity, offset and size buffers using `setValidity`, + * `setOffSet` and `setSize` methods. + * 4. Set lastSet value using `setLastSet` method. + * 5. Set value count using `setValueCount` method. + */ @Test public void testBasicListViewSet() { - try (ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); - ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); - BigIntVector childVector = new BigIntVector("element-vector", allocator); - ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. listViewVector.allocateNew(); + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; childVector.allocateNew(7); childVector.set(0, 12); @@ -440,61 +457,8 @@ public void testBasicListViewSet() { childVector.setValueCount(7); - int[] offSetValues = new int[]{0, 3, 3, 7}; - int[] sizeValues = new int[]{3, 0, 4, 0}; - - BitVectorHelper.setBit(validityBuffer, 0); - BitVectorHelper.setBit(validityBuffer, 2); - BitVectorHelper.setBit(validityBuffer, 3); - - setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); - setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); - - listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, childVector, 4); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - - assertEquals(3, listViewVector.getLastSet()); - listViewVector.validate(); - } - } - - /* - * Setting up buffers directly would require the following steps to be taken - * 1. Set offset and size buffers using `setOffSet` and `setSize` methods. - * 2. Set validity buffer using `setValidity` method. - * 3. Set lastSet value using `setLastSet` method. - * 4. Initialize the child vector using `initializeChildrenFromFields` method. - * 5. Set values in the child vector. - */ - @Test - public void testBasicListViewSet1() { - - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - listViewVector.allocateNew(); - + // Set validity, offset and size buffers using `setValidity`, + // `setOffSet` and `setSize` methods. listViewVector.setOffSet(0, 0); listViewVector.setOffSet(1, 3); listViewVector.setOffSet(2, 3); @@ -510,28 +474,11 @@ public void testBasicListViewSet1() { listViewVector.setValidity(2, 1); listViewVector.setValidity(3, 1); + // Set lastSet value using `setLastSet` method. listViewVector.setLastSet(3); - FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), - null, null); - Field field = new Field("element-vector", fieldType, null); - listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - FieldVector fieldVector = listViewVector.getDataVector(); - fieldVector.clear(); - - BigIntVector childVector = (BigIntVector) fieldVector; - childVector.allocateNew(7); - - childVector.set(0, 12); - childVector.set(1, -7); - childVector.set(2, 25); - childVector.set(3, 0); - childVector.set(4, -127); - childVector.set(5, 127); - childVector.set(6, 50); - - childVector.setValueCount(7); + // Set value count using `setValueCount` method. + listViewVector.setValueCount(4); final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); @@ -569,11 +516,26 @@ public void testBasicListViewSetNested() { // Setting child vector // [[50,100,200],[75,125,150,175],[10],[15,20],[25,30,35]] - try (ListVector childVector = ListVector.empty("nestedVector", allocator); - ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); - ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); - ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.List(), + null, null); + FieldType childFieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field childField = new Field("child-vector", childFieldType, null); + List children = new ArrayList<>(); + children.add(childField); + Field field = new Field("child-vector", fieldType, children); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + ListVector childVector = (ListVector) fieldVector; UnionListWriter listWriter = childVector.getWriter(); listWriter.allocate(); @@ -620,21 +582,25 @@ public void testBasicListViewSetNested() { listWriter.endList(); - childVector.setValueCount(2); + childVector.setValueCount(5); + // Set validity, offset and size buffers using `setValidity`, + // `setOffSet` and `setSize` methods. - listViewVector.allocateNew(); + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 1); - int[] offSetValues = new int[]{0, 2}; - int[] sizeValues = new int[]{2, 3}; + listViewVector.setOffSet(0, 0); + listViewVector.setOffSet(1, 2); - BitVectorHelper.setBit(validityBuffer, 0); - BitVectorHelper.setBit(validityBuffer, 1); + listViewVector.setSize(0, 2); + listViewVector.setSize(1, 3); - setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); - setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + // Set lastSet value using `setLastSet` method. + listViewVector.setLastSet(1); - listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, childVector, 2); + // Set value count using `setValueCount` method. + listViewVector.setValueCount(2); assertEquals(2, listViewVector.getValueCount()); @@ -700,12 +666,21 @@ public void testBasicListViewSetNested() { @Test public void testBasicListViewSetWithListViewWriter() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator); - ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); - ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); - BigIntVector childVector = new BigIntVector("element-vector", allocator)) { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; childVector.allocateNew(7); childVector.set(0, 12); @@ -718,17 +693,29 @@ public void testBasicListViewSetWithListViewWriter() { childVector.setValueCount(7); - int[] offSetValues = new int[]{0, 3, 3, 7}; - int[] sizeValues = new int[]{3, 0, 4, 0}; + // Set validity, offset and size buffers using `setValidity`, + // `setOffSet` and `setSize` methods. - BitVectorHelper.setBit(validityBuffer, 0); - BitVectorHelper.setBit(validityBuffer, 2); - BitVectorHelper.setBit(validityBuffer, 3); + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + + listViewVector.setOffSet(0, 0); + listViewVector.setOffSet(1, 3); + listViewVector.setOffSet(2, 3); + listViewVector.setOffSet(3, 7); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); - setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); - setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + // Set lastSet value using `setLastSet` method. + listViewVector.setLastSet(3); - listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, childVector, 4); + // Set value count using `setValueCount` method. + listViewVector.setValueCount(4); final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); @@ -1573,13 +1560,23 @@ public void testOverwriteWithNull() { @Test public void testOutOfOrderOffset1() { // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] - try (ArrowBuf newOffSetBuf = allocator.buffer(1024); - ArrowBuf newSizeBuffer = allocator.buffer(1024); - ArrowBuf validityBuffer = allocator.buffer(DataSizeRoundingUtil.divideBy8Ceil(1024)); - SmallIntVector childVector = new SmallIntVector("child-vector", allocator); - ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. listViewVector.allocateNew(); + // Initialize the child vector using `initializeChildrenFromFields` method. + + FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + SmallIntVector childVector = (SmallIntVector) fieldVector; + childVector.allocateNew(7); childVector.set(0, 0); @@ -1592,18 +1589,31 @@ public void testOutOfOrderOffset1() { childVector.setValueCount(7); - int[] offSetValues = new int[]{4, 7, 0, 0, 3}; - int[] sizeValues = new int[]{3, 0, 4, 0, 2}; + // Set validity, offset and size buffers using `setValidity`, + // `setOffSet` and `setSize` methods. + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + listViewVector.setValidity(4, 1); - BitVectorHelper.setBit(validityBuffer, 0); - BitVectorHelper.setBit(validityBuffer, 2); - BitVectorHelper.setBit(validityBuffer, 3); - BitVectorHelper.setBit(validityBuffer, 4); + listViewVector.setOffSet(0, 4); + listViewVector.setOffSet(1, 7); + listViewVector.setOffSet(2, 0); + listViewVector.setOffSet(3, 0); + listViewVector.setOffSet(4, 3); - setValuesInBuffer(offSetValues, newOffSetBuf, BaseRepeatedValueViewVector.OFFSET_WIDTH); - setValuesInBuffer(sizeValues, newSizeBuffer, BaseRepeatedValueViewVector.SIZE_WIDTH); + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + listViewVector.setSize(4, 2); + + // Set lastSet value using `setLastSet` method. + listViewVector.setLastSet(4); - listViewVector.set(newOffSetBuf, newSizeBuffer, validityBuffer, childVector, 5); + // Set value count using `setValueCount` method. + listViewVector.setValueCount(5); final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); From d465c237f01b1b37c0faa095c988de041dd8fc3d Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Thu, 9 May 2024 09:23:21 +0530 Subject: [PATCH 26/34] fix: addressing reviews 5: add validation for set funcs --- .../arrow/vector/complex/ListViewVector.java | 57 ++++++------------- 1 file changed, 16 insertions(+), 41 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 087f146c56a..6ae31f1cbed 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -26,7 +26,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.Objects; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -756,61 +755,33 @@ private int getLengthOfChildVector() { return length; } - /** - * Constructing a ListViewVector when the offsets, sizes and field vector are available. - *

- * Steps taken follow the workflow used in creating a ListViewVector with the API - * used in ListVector. - * - * @param offSetBuffer new offSet buffer to be set - * @param sizeBuffer new size buffer to be set - * @param validityBuffer new validity buffer to be set - * @param elementFieldVec new elements to be appended to the field vector - * @param valueCount number of lists to be set - */ - public void set(ArrowBuf offSetBuffer, ArrowBuf sizeBuffer, ArrowBuf validityBuffer, - FieldVector elementFieldVec, int valueCount) { - // Null checks - Objects.requireNonNull(offSetBuffer, "Offset buffer cannot be null"); - Objects.requireNonNull(sizeBuffer, "Size buffer cannot be null"); - Objects.requireNonNull(validityBuffer, "Validity buffer cannot be null"); - Objects.requireNonNull(elementFieldVec, "Element Field Vector cannot be null"); - - // set buffers - this.validityBuffer.getReferenceManager().release(); - this.validityBuffer = validityBuffer.getReferenceManager().retain(validityBuffer, allocator); - this.offsetBuffer.getReferenceManager().release(); - this.offsetBuffer = offSetBuffer.getReferenceManager().retain(offSetBuffer, allocator); - this.sizeBuffer.getReferenceManager().release(); - this.sizeBuffer = sizeBuffer.getReferenceManager().retain(sizeBuffer, allocator); - - validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); - offsetAllocationSizeInBytes = offSetBuffer.capacity(); - sizeAllocationSizeInBytes = sizeBuffer.capacity(); - - // set child vector - this.vector = elementFieldVec; - - this.lastSet = valueCount - 1; - this.setValueCount(valueCount); - this.getWriter().setPosition(valueCount); - } - /** * Set the offset at the given index. + * Make sure to use this function after using `setValidity` * @param index index of the value to set * @param value value to set */ public void setOffSet(int index, int value) { + // 0 <= offsets[i] <= length of the child array + // 0 <= offsets[i] + size[i] <= length of the child array + if (value < 0) { + throw new IllegalArgumentException("Offset cannot be negative"); + } offsetBuffer.setInt(index * OFFSET_WIDTH, value); } /** * Set the size at the given index. + * Make sure to use this function after using `setOffSet`. * @param index index of the value to set * @param value value to set */ public void setSize(int index, int value) { + // 0 <= offsets[i] <= length of the child array + // 0 <= offsets[i] + size[i] <= length of the child array + if (value < 0) { + throw new IllegalArgumentException("Size cannot be negative"); + } sizeBuffer.setInt(index * SIZE_WIDTH, value); } @@ -921,6 +892,10 @@ public void validate() { for (int i = 0; i < valueCount; i++) { final int offset = offsetBuffer.getInt(i * OFFSET_WIDTH); final int size = sizeBuffer.getInt(i * SIZE_WIDTH); + if (size < 0) { + throw new IllegalStateException(String.format( + "Size %d at index %d is negative", size, i)); + } final int childArrayLength = getLengthOfChildVector(); if (offset < 0 || offset > childArrayLength) { throw new IllegalStateException(String.format( From 61928a10ba7423d78048c4b346e13b0bac7bf520 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Thu, 9 May 2024 17:08:21 +0530 Subject: [PATCH 27/34] fix: addressing reviews 6: adding updated length calculating func --- .../complex/BaseRepeatedValueViewVector.java | 17 ++++++++++++----- .../arrow/vector/complex/ListViewVector.java | 8 -------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index dbe75cb4518..421e112d18c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -326,12 +326,19 @@ public void setValueCount(int valueCount) { vector.setValueCount(childValueCount); } - private int getLengthOfChildVector() { - int length = 0; - for (int i = 0; i <= valueCount; i++) { - length += sizeBuffer.getInt(i * SIZE_WIDTH); + protected int getLengthOfChildVector() { + int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); + int minOffset = offsetBuffer.getInt(0); + for (int i = 0; i < valueCount; i++) { + int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); + int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH); + int currentSum = currentOffset + currentSize; + + maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); + minOffset = Math.min(minOffset, currentOffset); } - return length; + + return maxOffsetSizeSum - minOffset; } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 6ae31f1cbed..991854c589e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -747,14 +747,6 @@ public int startNewValue(int index) { return offsetBuffer.getInt(index * OFFSET_WIDTH); } - private int getLengthOfChildVector() { - int length = 0; - for (int i = 0; i < valueCount; i++) { - length += sizeBuffer.getInt(i * SIZE_WIDTH); - } - return length; - } - /** * Set the offset at the given index. * Make sure to use this function after using `setValidity` From 4514f12b80b93b3419d0eee2bc416600226887e3 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Thu, 9 May 2024 17:14:48 +0530 Subject: [PATCH 28/34] fix: addressing reviews 7 --- .../arrow/vector/complex/BaseRepeatedValueViewVector.java | 8 ++------ .../org/apache/arrow/vector/complex/ListViewVector.java | 3 ++- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index 421e112d18c..c1c233aae40 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -312,9 +312,7 @@ public int getValueCount() { } @Override - public boolean isNull(int index) { - return false; - } + public abstract boolean isNull(int index); @Override public void setValueCount(int valueCount) { @@ -372,9 +370,7 @@ protected void replaceDataVector(FieldVector v) { vector = v; } - public boolean isEmpty(int index) { - return false; - } + public abstract boolean isEmpty(int index); /** * Start a new value at the given index. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 991854c589e..89509a15881 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -560,8 +560,9 @@ public ArrowBuf[] getBuffers(boolean clear) { buffers = new ArrowBuf[0]; } else { List list = new ArrayList<>(); - list.add(offsetBuffer); + // the order must be validity, offset and size buffers list.add(validityBuffer); + list.add(offsetBuffer); list.add(sizeBuffer); list.addAll(Arrays.asList(vector.getBuffers(false))); buffers = list.toArray(new ArrowBuf[list.size()]); From 9dc30f83982c8d254a1640bdb9b95c62f4fa2b17 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Fri, 10 May 2024 15:30:56 +0530 Subject: [PATCH 29/34] fix: addressing reviews 7: removing lastSet usage --- .../complex/BaseRepeatedValueViewVector.java | 18 +++- .../arrow/vector/complex/ListViewVector.java | 60 ++----------- .../arrow/vector/TestListViewVector.java | 87 ++++++------------- 3 files changed, 49 insertions(+), 116 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index c1c233aae40..0de3bf967d6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -311,9 +311,6 @@ public int getValueCount() { return valueCount; } - @Override - public abstract boolean isNull(int index); - @Override public void setValueCount(int valueCount) { this.valueCount = valueCount; @@ -339,6 +336,21 @@ protected int getLengthOfChildVector() { return maxOffsetSizeSum - minOffset; } + protected int getLengthOfChildVectorByIndex(int index) { + int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); + int minOffset = offsetBuffer.getInt(0); + for (int i = 0; i < index; i++) { + int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); + int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH); + int currentSum = currentOffset + currentSize; + + maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); + minOffset = Math.min(minOffset, currentOffset); + } + + return maxOffsetSizeSum - minOffset; + } + /** * Initialize the data vector (and execute callback) if it hasn't already been done, * returns the data vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 89509a15881..219144b14ff 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -72,11 +72,6 @@ public class ListViewVector extends BaseRepeatedValueViewVector implements Promo protected Field field; protected int validityAllocationSizeInBytes; - /** - * The maximum index that is actually set. - */ - protected int lastSet; - public static ListViewVector empty(String name, BufferAllocator allocator) { return new ListViewVector(name, allocator, FieldType.nullable(ArrowType.ListView.INSTANCE), null); } @@ -106,7 +101,6 @@ public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) this.field = field; this.callBack = callBack; this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - this.lastSet = -1; } @Override @@ -212,7 +206,6 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers offsetAllocationSizeInBytes = offsetBuffer.capacity(); sizeAllocationSizeInBytes = sizeBuffer.capacity(); - lastSet = fieldNode.getLength() - 1; valueCount = fieldNode.getLength(); } @@ -518,7 +511,7 @@ public Field getField() { */ @Override public MinorType getMinorType() { - return MinorType.LIST; + return MinorType.LISTVIEW; } /** @@ -528,7 +521,6 @@ public MinorType getMinorType() { public void clear() { super.clear(); validityBuffer = releaseBuffer(validityBuffer); - lastSet = -1; } /** @@ -538,7 +530,6 @@ public void clear() { public void reset() { super.reset(); validityBuffer.setZero(0, validityBuffer.capacity()); - lastSet = -1; } /** @@ -681,9 +672,6 @@ public void setNull(int index) { while (index >= getValidityAndSizeValueCapacity()) { reallocValidityAndSizeAndOffsetBuffers(); } - if (lastSet >= index) { - lastSet = index - 1; - } if (index == 0) { offsetBuffer.setInt(0, 0); @@ -697,16 +685,10 @@ public void setNull(int index) { } BitVectorHelper.unsetBit(validityBuffer, index); - lastSet = index; } /** * Start new value in the ListView vector. - * There are a few cases that are handled in this function. - * There are two main scenarios that need to be considered. - * The first scenario is simple insertion where indices are continuously updated. - * The other scenario is the event of non-continuous writing, - * the offset buffer needs to be updated. * * @param index index of the value to start * @return offset of the new value @@ -717,34 +699,12 @@ public int startNewValue(int index) { reallocValidityAndSizeAndOffsetBuffers(); } - if (lastSet >= index) { - lastSet = index - 1; - } - - if (index == 0) { - offsetBuffer.setInt(0, 0); - } else if (index > lastSet) { - /* when skipping indices, we need to update the offset buffer */ - /* setting offset from lastSet + 1 to index (included) */ - for (int i = lastSet + 1; i <= index; i++) { - if (i == 0) { - offsetBuffer.setInt(0, 0); - continue; - } - final int prevOffSet = offsetBuffer.getInt((i - 1L) * OFFSET_WIDTH); - final int prevSize = sizeBuffer.getInt((i - 1L) * SIZE_WIDTH); - final int currOffSet = prevOffSet + prevSize; - offsetBuffer.setInt(i * OFFSET_WIDTH, currOffSet); - } - } else { - final int prevOffset = offsetBuffer.getInt((index - 1) * OFFSET_WIDTH); - final int prevSize = sizeBuffer.getInt((index - 1) * SIZE_WIDTH); - final int currOffSet = prevOffset + prevSize; - offsetBuffer.setInt(index * OFFSET_WIDTH, currOffSet); + if (index > 0) { + final int prevOffset = getLengthOfChildVectorByIndex(index); + offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); } BitVectorHelper.setBit(validityBuffer, index); - lastSet = index; return offsetBuffer.getInt(index * OFFSET_WIDTH); } @@ -754,7 +714,7 @@ public int startNewValue(int index) { * @param index index of the value to set * @param value value to set */ - public void setOffSet(int index, int value) { + public void setOffset(int index, int value) { // 0 <= offsets[i] <= length of the child array // 0 <= offsets[i] + size[i] <= length of the child array if (value < 0) { @@ -765,7 +725,7 @@ public void setOffSet(int index, int value) { /** * Set the size at the given index. - * Make sure to use this function after using `setOffSet`. + * Make sure to use this function after using `setOffset`. * @param index index of the value to set * @param value value to set */ @@ -850,14 +810,6 @@ public UnionListViewWriter getWriter() { return new UnionListViewWriter(this); } - public int getLastSet() { - return lastSet; - } - - public void setLastSet(int newLastSet) { - lastSet = newLastSet; - } - @Override public int getValueCount() { return valueCount; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 640883ebbef..9ae6e818628 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -103,8 +103,6 @@ public void testBasicListViewVector() { listViewWriter.bigInt().writeBigInt(4); listViewWriter.endList(); - assertEquals(4, listViewVector.getLastSet()); - listViewVector.setValueCount(5); // check value count assertEquals(5, listViewVector.getValueCount()); @@ -116,7 +114,7 @@ public void testBasicListViewVector() { // check offset buffer assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); @@ -166,7 +164,6 @@ public void testImplicitNullVectors() { int offSet0 = offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH); int size0 = sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH); - int lastSet0 = listViewVector.getLastSet(); // after the first list is written, // the initial offset must be 0, @@ -175,7 +172,6 @@ public void testImplicitNullVectors() { assertEquals(0, offSet0); assertEquals(3, size0); - assertEquals(0, lastSet0); listViewWriter.setPosition(5); listViewWriter.startList(); @@ -189,13 +185,11 @@ public void testImplicitNullVectors() { for (int i = 1; i < 5; i++) { int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); - int lastSet = listViewVector.getLastSet(); // Since the list is not written, the offset and size must equal to child vector's size // i.e., 3, and size should be 0 as the list is not written. // And the last set value is the value currently being written, which is 5. - assertEquals(3, offSet); + assertEquals(0, offSet); assertEquals(0, size); - assertEquals(5, lastSet); } listViewWriter.bigInt().writeBigInt(12); @@ -204,11 +198,9 @@ public void testImplicitNullVectors() { int offSet5 = offSetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH); int size5 = sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH); - int lastSet5 = listViewVector.getLastSet(); assertEquals(3, offSet5); assertEquals(2, size5); - assertEquals(5, lastSet5); listViewWriter.setPosition(10); listViewWriter.startList(); @@ -221,13 +213,11 @@ public void testImplicitNullVectors() { for (int i = 6; i < 10; i++) { int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); - int lastSet = listViewVector.getLastSet(); - // Since the list is not written, the offset and size must equal to child vector's size - // i.e., 3, and size should be 0 as the list is not written. + // Since the list is not written, the offset and size must equal to 0 + // and size should be 0 as the list is not written. // And the last set value is the value currently being written, which is 10. - assertEquals(5, offSet); + assertEquals(0, offSet); assertEquals(0, size); - assertEquals(10, lastSet); } listViewWriter.bigInt().writeBigInt(12); @@ -235,11 +225,9 @@ public void testImplicitNullVectors() { int offSet11 = offSetBuffer.getInt(10 * BaseRepeatedValueViewVector.OFFSET_WIDTH); int size11 = sizeBuffer.getInt(10 * BaseRepeatedValueViewVector.SIZE_WIDTH); - int lastSet11 = listViewVector.getLastSet(); assertEquals(5, offSet11); assertEquals(1, size11); - assertEquals(10, lastSet11); listViewVector.setValueCount(11); @@ -299,8 +287,6 @@ public void testNestedListViewVector() { listViewWriter.endList(); - assertEquals(1, listViewVector.getLastSet()); - listViewVector.setValueCount(2); // [[[50,100,200],[75,125,150,175]], [[10],[15,20],[25,30,35]]] @@ -423,9 +409,8 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) * 1. Initialize the child vector using `initializeChildrenFromFields` method. * 2. Set values in the child vector. * 3. Set validity, offset and size buffers using `setValidity`, - * `setOffSet` and `setSize` methods. - * 4. Set lastSet value using `setLastSet` method. - * 5. Set value count using `setValueCount` method. + * `setOffset` and `setSize` methods. + * 4. Set value count using `setValueCount` method. */ @Test public void testBasicListViewSet() { @@ -458,11 +443,11 @@ public void testBasicListViewSet() { childVector.setValueCount(7); // Set validity, offset and size buffers using `setValidity`, - // `setOffSet` and `setSize` methods. - listViewVector.setOffSet(0, 0); - listViewVector.setOffSet(1, 3); - listViewVector.setOffSet(2, 3); - listViewVector.setOffSet(3, 7); + // `setOffset` and `setSize` methods. + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 3); + listViewVector.setOffset(2, 3); + listViewVector.setOffset(3, 7); listViewVector.setSize(0, 3); listViewVector.setSize(1, 0); @@ -474,9 +459,6 @@ public void testBasicListViewSet() { listViewVector.setValidity(2, 1); listViewVector.setValidity(3, 1); - // Set lastSet value using `setLastSet` method. - listViewVector.setLastSet(3); - // Set value count using `setValueCount` method. listViewVector.setValueCount(4); @@ -504,7 +486,6 @@ public void testBasicListViewSet() { assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - assertEquals(3, listViewVector.getLastSet()); listViewVector.validate(); } } @@ -585,20 +566,17 @@ public void testBasicListViewSetNested() { childVector.setValueCount(5); // Set validity, offset and size buffers using `setValidity`, - // `setOffSet` and `setSize` methods. + // `setOffset` and `setSize` methods. listViewVector.setValidity(0, 1); listViewVector.setValidity(1, 1); - listViewVector.setOffSet(0, 0); - listViewVector.setOffSet(1, 2); + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 2); listViewVector.setSize(0, 2); listViewVector.setSize(1, 3); - // Set lastSet value using `setLastSet` method. - listViewVector.setLastSet(1); - // Set value count using `setValueCount` method. listViewVector.setValueCount(2); @@ -694,26 +672,23 @@ public void testBasicListViewSetWithListViewWriter() { childVector.setValueCount(7); // Set validity, offset and size buffers using `setValidity`, - // `setOffSet` and `setSize` methods. + // `setOffset` and `setSize` methods. listViewVector.setValidity(0, 1); listViewVector.setValidity(1, 0); listViewVector.setValidity(2, 1); listViewVector.setValidity(3, 1); - listViewVector.setOffSet(0, 0); - listViewVector.setOffSet(1, 3); - listViewVector.setOffSet(2, 3); - listViewVector.setOffSet(3, 7); + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 3); + listViewVector.setOffset(2, 3); + listViewVector.setOffset(3, 7); listViewVector.setSize(0, 3); listViewVector.setSize(1, 0); listViewVector.setSize(2, 4); listViewVector.setSize(3, 0); - // Set lastSet value using `setLastSet` method. - listViewVector.setLastSet(3); - // Set value count using `setValueCount` method. listViewVector.setValueCount(4); @@ -741,8 +716,6 @@ public void testBasicListViewSetWithListViewWriter() { assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - assertEquals(3, listViewVector.getLastSet()); - UnionListViewWriter listViewWriter = listViewVector.getWriter(); listViewWriter.setPosition(4); @@ -781,7 +754,6 @@ public void testBasicListViewSetWithListViewWriter() { assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); - assertEquals(4, listViewVector.getLastSet()); listViewVector.validate(); } } @@ -1293,9 +1265,9 @@ public void testSetNull2() { assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(5, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); @@ -1590,18 +1562,18 @@ public void testOutOfOrderOffset1() { childVector.setValueCount(7); // Set validity, offset and size buffers using `setValidity`, - // `setOffSet` and `setSize` methods. + // `setOffset` and `setSize` methods. listViewVector.setValidity(0, 1); listViewVector.setValidity(1, 0); listViewVector.setValidity(2, 1); listViewVector.setValidity(3, 1); listViewVector.setValidity(4, 1); - listViewVector.setOffSet(0, 4); - listViewVector.setOffSet(1, 7); - listViewVector.setOffSet(2, 0); - listViewVector.setOffSet(3, 0); - listViewVector.setOffSet(4, 3); + listViewVector.setOffset(0, 4); + listViewVector.setOffset(1, 7); + listViewVector.setOffset(2, 0); + listViewVector.setOffset(3, 0); + listViewVector.setOffset(4, 3); listViewVector.setSize(0, 3); listViewVector.setSize(1, 0); @@ -1609,9 +1581,6 @@ public void testOutOfOrderOffset1() { listViewVector.setSize(3, 0); listViewVector.setSize(4, 2); - // Set lastSet value using `setLastSet` method. - listViewVector.setLastSet(4); - // Set value count using `setValueCount` method. listViewVector.setValueCount(5); From 043a2d928c88a3833536cea7efedee6aaac28739 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Fri, 10 May 2024 15:43:55 +0530 Subject: [PATCH 30/34] fix: addressing reviews 9: clean up --- .../vector/complex/BaseRepeatedValueViewVector.java | 10 +++------- .../apache/arrow/vector/complex/ListViewVector.java | 13 ++----------- .../org/apache/arrow/vector/TestListViewVector.java | 10 +++++----- 3 files changed, 10 insertions(+), 23 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index 0de3bf967d6..ce83f808635 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -397,13 +397,9 @@ public int startNewValue(int index) { reallocSizeBuffer(); } - if (index == 0) { - offsetBuffer.setInt(0, 0); - } else { - final int prevOffset = offsetBuffer.getInt((index - 1) * OFFSET_WIDTH); - final int prevSize = sizeBuffer.getInt((index - 1) * SIZE_WIDTH); - final int currOffSet = prevOffset + prevSize; - offsetBuffer.setInt(index * OFFSET_WIDTH, currOffSet); + if (index > 0) { + final int prevOffset = getLengthOfChildVectorByIndex(index); + offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); } setValueCount(index + 1); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 219144b14ff..69d3c902fbb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -673,17 +673,8 @@ public void setNull(int index) { reallocValidityAndSizeAndOffsetBuffers(); } - if (index == 0) { - offsetBuffer.setInt(0, 0); - sizeBuffer.setInt(0, 0); - } else { - final int prevOffset = offsetBuffer.getInt((index - 1) * OFFSET_WIDTH); - final int prevSize = sizeBuffer.getInt((index - 1) * SIZE_WIDTH); - final int currOffSet = prevOffset + prevSize; - offsetBuffer.setInt(index * OFFSET_WIDTH, currOffSet); - sizeBuffer.setInt(index * SIZE_WIDTH, 0); - } - + offsetBuffer.setInt(index * OFFSET_WIDTH, 0); + sizeBuffer.setInt(index * SIZE_WIDTH, 0); BitVectorHelper.unsetBit(validityBuffer, index); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 9ae6e818628..e64ed77b1eb 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -1173,10 +1173,10 @@ public void testSetNull1() { assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(2, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(4, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(4, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(4, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); @@ -1358,9 +1358,9 @@ public void testSetNull3() { assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(5, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); From 5f092848e93f426b032d1c3b9bb5dc7fe20a739c Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Fri, 10 May 2024 16:30:57 +0530 Subject: [PATCH 31/34] fix: addressing reviews 10: adding validation for invariants --- .../arrow/vector/complex/ListViewVector.java | 59 +++++++++++-------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 69d3c902fbb..67a651a1964 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -699,18 +699,42 @@ public int startNewValue(int index) { return offsetBuffer.getInt(index * OFFSET_WIDTH); } + /** + * Validate the invariants of the offset and size buffers. + * 0 <= offsets[i] <= length of the child array + * 0 <= offsets[i] + size[i] <= length of the child array + * @param offset the offset at a given index + * @param size the size at a given index + */ + private void validateInvariants(int offset, int size) { + if (offset < 0) { + throw new IllegalArgumentException("Offset cannot be negative"); + } + + if (size < 0) { + throw new IllegalArgumentException("Size cannot be negative"); + } + + // 0 <= offsets[i] <= length of the child array + if (offset > this.vector.getValueCount()) { + throw new IllegalArgumentException("Offset is out of bounds."); + } + + // 0 <= offsets[i] + size[i] <= length of the child array + if (offset + size > this.vector.getValueCount()) { + throw new IllegalArgumentException("Offset + size <= length of the child array."); + } + } + /** * Set the offset at the given index. - * Make sure to use this function after using `setValidity` + * Make sure to use this function after updating `field` vector and using `setValidity` * @param index index of the value to set * @param value value to set */ public void setOffset(int index, int value) { - // 0 <= offsets[i] <= length of the child array - // 0 <= offsets[i] + size[i] <= length of the child array - if (value < 0) { - throw new IllegalArgumentException("Offset cannot be negative"); - } + validateInvariants(value, sizeBuffer.getInt(index * SIZE_WIDTH)); + offsetBuffer.setInt(index * OFFSET_WIDTH, value); } @@ -721,11 +745,8 @@ public void setOffset(int index, int value) { * @param value value to set */ public void setSize(int index, int value) { - // 0 <= offsets[i] <= length of the child array - // 0 <= offsets[i] + size[i] <= length of the child array - if (value < 0) { - throw new IllegalArgumentException("Size cannot be negative"); - } + validateInvariants(offsetBuffer.getInt(index * SIZE_WIDTH), value); + sizeBuffer.setInt(index * SIZE_WIDTH, value); } @@ -820,27 +841,13 @@ public double getDensity() { /** * Validating ListViewVector creation based on the specification guideline. - * 0 <= offsets[i] <= length of the child array - * 0 <= offsets[i] + size[i] <= length of the child array */ @Override public void validate() { for (int i = 0; i < valueCount; i++) { final int offset = offsetBuffer.getInt(i * OFFSET_WIDTH); final int size = sizeBuffer.getInt(i * SIZE_WIDTH); - if (size < 0) { - throw new IllegalStateException(String.format( - "Size %d at index %d is negative", size, i)); - } - final int childArrayLength = getLengthOfChildVector(); - if (offset < 0 || offset > childArrayLength) { - throw new IllegalStateException(String.format( - "Offset %d at index %d is out of bounds", offset, i)); - } - if ((offset + size) < 0 || (offset + size) > childArrayLength) { - throw new IllegalStateException(String.format( - "Size %d at index %d is out of bounds", size, i)); - } + validateInvariants(offset, size); } } From 1d7d8f090272c95752b8f065e2353db6d7a0eac8 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Mon, 13 May 2024 06:21:19 +0530 Subject: [PATCH 32/34] fix: addressing reviews 10: minor changes --- .../complex/BaseRepeatedValueViewVector.java | 32 ++++++++----------- .../arrow/vector/complex/ListViewVector.java | 2 +- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index ce83f808635..521b9109b94 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -99,26 +99,21 @@ public boolean allocateNewSafe() { } private void allocateBuffers() { - offsetBuffer = allocateOffsetBuffer(offsetAllocationSizeInBytes); - sizeBuffer = allocateSizeBuffer(sizeAllocationSizeInBytes); + offsetBuffer = allocateBuffers(offsetAllocationSizeInBytes, true); + sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes, false); } - private ArrowBuf allocateOffsetBuffer(final long size) { + private ArrowBuf allocateBuffers(final long size, boolean isOffSet) { final int curSize = (int) size; - ArrowBuf offsetBuffer = allocator.buffer(curSize); - offsetBuffer.readerIndex(0); - offsetAllocationSizeInBytes = curSize; - offsetBuffer.setZero(0, offsetBuffer.capacity()); - return offsetBuffer; - } - - private ArrowBuf allocateSizeBuffer(final long size) { - final int curSize = (int) size; - ArrowBuf sizeBuffer = allocator.buffer(curSize); - sizeBuffer.readerIndex(0); - sizeAllocationSizeInBytes = curSize; - sizeBuffer.setZero(0, sizeBuffer.capacity()); - return sizeBuffer; + ArrowBuf buffer = allocator.buffer(curSize); + buffer.readerIndex(0); + buffer.setZero(0, buffer.capacity()); + if (isOffSet) { + offsetAllocationSizeInBytes = curSize; + } else { + sizeAllocationSizeInBytes = curSize; + } + return buffer; } @Override @@ -244,7 +239,8 @@ public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { @Override public int getValueCapacity() { - return 0; + throw new UnsupportedOperationException( + "Get value capacity is not supported in RepeatedValueVector"); } protected int getOffsetBufferValueCapacity() { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 67a651a1964..99a24052b27 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -808,7 +808,7 @@ public UnionVector promoteToUnion() { return vector; } - protected void invalidateReader() { + private void invalidateReader() { reader = null; } From f975552cea21fd32d8c86532ea9e76e2e8110cca Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 14 May 2024 13:33:47 +0530 Subject: [PATCH 33/34] fix: addressing reviews 11: minor changes to allocation logic --- .../vector/complex/BaseRepeatedValueViewVector.java | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index 521b9109b94..73a25738854 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -99,20 +99,15 @@ public boolean allocateNewSafe() { } private void allocateBuffers() { - offsetBuffer = allocateBuffers(offsetAllocationSizeInBytes, true); - sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes, false); + offsetBuffer = allocateBuffers(offsetAllocationSizeInBytes); + sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes); } - private ArrowBuf allocateBuffers(final long size, boolean isOffSet) { + private ArrowBuf allocateBuffers(final long size) { final int curSize = (int) size; ArrowBuf buffer = allocator.buffer(curSize); buffer.readerIndex(0); buffer.setZero(0, buffer.capacity()); - if (isOffSet) { - offsetAllocationSizeInBytes = curSize; - } else { - sizeAllocationSizeInBytes = curSize; - } return buffer; } From 7cf56d715d7703254c6f5b056c2cac82c00f4079 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 14 May 2024 14:09:09 +0530 Subject: [PATCH 34/34] fix: addressing reviews 12: adding comments regarding merging functionality --- .../apache/arrow/vector/complex/ListViewVector.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 99a24052b27..b19691e7aaa 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -64,6 +64,10 @@ * * The latter three are managed by its superclass. */ + +/* +* TODO: consider merging the functionality in `BaseRepeatedValueVector` into this class. +*/ public class ListViewVector extends BaseRepeatedValueViewVector implements PromotableVector { protected ArrowBuf validityBuffer; @@ -259,7 +263,11 @@ public void allocateNew() throws OutOfMemoryException { public boolean allocateNewSafe() { boolean success = false; try { - /* release the current buffers, hence this is a new allocation */ + /* release the current buffers, hence this is a new allocation + * Note that, the `clear` method call below is releasing validityBuffer + * calling the superclass clear method which is releasing the associated buffers + * (sizeBuffer and offsetBuffer). + */ clear(); /* allocate validity buffer */ allocateValidityBuffer(validityAllocationSizeInBytes); @@ -519,6 +527,7 @@ public MinorType getMinorType() { */ @Override public void clear() { + // calling superclass clear method which is releasing the sizeBufer and offsetBuffer super.clear(); validityBuffer = releaseBuffer(validityBuffer); }