From 32765c19d80ed8d527b6fa369bd00d8aa87da689 Mon Sep 17 00:00:00 2001 From: david dali susanibar arce Date: Tue, 27 Feb 2024 20:33:28 -0500 Subject: [PATCH 1/6] feat: initial baseline to support listview --- .../src/main/codegen/data/ArrowTypes.tdd | 5 + .../codegen/templates/UnionListWriter.java | 2 +- .../main/codegen/templates/UnionReader.java | 2 +- .../org/apache/arrow/vector/TypeLayout.java | 15 ++ .../arrow/vector/complex/ListViewVector.java | 182 ++++++++++++++++++ .../org/apache/arrow/vector/types/Types.java | 21 ++ .../arrow/vector/TestListViewVector.java | 126 ++++++++++++ 7 files changed, 351 insertions(+), 2 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 3cf9a968791..72d666b5fe8 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -119,6 +119,11 @@ name: "Duration", fields: [{name: "unit", type: short, valueType: TimeUnit}], complex: false + }, + { + name: "ListView", + fields: [], + complex: true } ] } diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 5c0565ee271..6fd415007f8 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -26,7 +26,7 @@ import java.math.BigDecimal; <@pp.dropOutputFile /> -<#list ["List", "LargeList"] as listName> +<#list ["List", "LargeList", "ListView"] as listName> <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" /> diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java index 822d4822987..a4c4b57edde 100644 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ b/java/vector/src/main/codegen/templates/UnionReader.java @@ -39,7 +39,7 @@ @SuppressWarnings("unused") public class UnionReader extends AbstractFieldReader { - private static final int NUM_SUPPORTED_TYPES = 46; + private static final int NUM_SUPPORTED_TYPES = 47; private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES]; public UnionVector data; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index ae465418cf2..3cd40318f56 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -108,6 +108,15 @@ public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) return new TypeLayout(vectors); } + @Override + public TypeLayout visit(ArrowType.ListView type) { + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.offsetBuffer() + ); + return new TypeLayout(vectors); + } + @Override public TypeLayout visit(ArrowType.LargeList type) { List vectors = asList( @@ -304,6 +313,12 @@ public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { return 2; } + @Override + public Integer visit(ArrowType.ListView type) { + // validity buffer + offset buffer + sizes buffer + return 3; + } + @Override public Integer visit(ArrowType.LargeList type) { // validity buffer + offset buffer diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java new file mode 100644 index 00000000000..15d63bec69a --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.complex; + +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.JsonStringArrayList; + +/** + * A list vector contains lists of a specific type of elements. Its structure contains 4 elements. + *
    + *
  1. A validity buffer.
  2. + *
  3. A child data vector that contains the elements of lists.
  4. + *
  5. An offset buffer, stores the starting index of each list.
  6. + *
  7. A size buffer, stored explicitly each list lengths.
  8. + *
+ * The latter two are managed by its superclass. + */ +public class ListViewVector extends ListVector { + protected ArrowBuf sizeBuffer; + protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; + private int valueCountOfDataVector = 0; + + /** + * Constructs a new instance. + * + * @param name The name of the instance. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param fieldType The type of this list. + * @param callBack A schema change callback. + */ + public ListViewVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { + super(name, allocator, fieldType, callBack); + this.sizeBuffer = allocator.getEmpty(); + } + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param callBack A schema change callback. + */ + public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) { + super(field, allocator, callBack); + this.sizeBuffer = allocator.getEmpty(); + } + + public static ListViewVector empty(String name, BufferAllocator allocator) { + return new ListViewVector(name, allocator, FieldType.nullable(ArrowType.List.INSTANCE), null); + } + + @Override + public void allocateNew() throws OutOfMemoryException { + if (!allocateNewSafe()) { + throw new OutOfMemoryException("Failure while allocating memory"); + } + } + + /** + * Allocate memory for the vector. We internally use a default value count + * of 4096 to allocate memory for at least these many elements in the + * vector. + * + * @return false if memory allocation fails, true otherwise. + */ + @Override + public boolean allocateNewSafe() { + boolean success = false; + try { + clear(); + success = super.allocateNewSafe(); + allocateSizeBuffer(sizeAllocationSizeInBytes); + } finally { + if (!success) { + clear(); + } + } + return success; + } + + @Override + public void clear() { + super.clear(); + sizeBuffer = releaseBuffer(sizeBuffer); + } + + /** + * Update index values for offset and size buffer. + * + * @param index Current index position. + * @param size The number of items added. + */ + public void endValue(int index, int size) { + sizeBuffer.setInt(index * OFFSET_WIDTH, size); + valueCountOfDataVector = valueCountOfDataVector + size; + } + + @Override + public void setValueCount(int valueCount) { + this.valueCount = valueCount; + if (valueCount > 0) { + while (valueCount > getValueCapacity()) { + reallocValidityAndOffsetBuffers(); + } + } + vector.setValueCount(valueCountOfDataVector); + } + + /** + * Start a new value in the list vector. + * + * @param index index of the value to start + * @param offset encode the start position of each slot in the child array + */ + public int startNewValue(int index, int offset) { + while (index >= getValueCapacity()) { + reallocValidityAndOffsetBuffers(); + } + offsetBuffer.setInt(index * OFFSET_WIDTH, offset); + BitVectorHelper.setBit(validityBuffer, index); + return offsetBuffer.getInt(index * OFFSET_WIDTH); + } + + /** + * Get the element in the list vector at a particular index. + * @param index position of the element + * @return Object at given position + */ + @Override + public List getObject(int index) { + if (isSet(index) == 0) { + return null; + } + final List vals = new JsonStringArrayList<>(); + final int end = sizeBuffer.getInt(index * OFFSET_WIDTH); + final ValueVector vv = getDataVector(); + int delta = 0; + if (index > 0) { + for (int i = index; i > 0; i--) { + delta += sizeBuffer.getInt((i - 1) * OFFSET_WIDTH); + } + } + for (int i = 0; i < end; i++) { + vals.add(i, vv.getObject(i + delta)); + } + + return vals; + } + + protected void allocateSizeBuffer(final long size) { + final int curSize = (int) size; + sizeBuffer = allocator.buffer(curSize); + sizeBuffer.readerIndex(0); + sizeAllocationSizeInBytes = curSize; + sizeBuffer.setZero(0, sizeBuffer.capacity()); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 0b0e0d66a98..a702f7ea895 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -69,6 +69,7 @@ import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -131,6 +132,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary; import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8; import org.apache.arrow.vector.types.pojo.ArrowType.List; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Null; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; @@ -658,6 +660,20 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new UnionListWriter((ListVector) vector); } }, + LISTVIEW(ListView.INSTANCE) { + @Override + public FieldVector getNewVector( + Field field, + BufferAllocator allocator, + CallBack schemaChangeCallback) { + return new ListViewVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new UnionListWriter((ListVector) vector); + } + }, LARGELIST(ArrowType.LargeList.INSTANCE) { @Override public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { @@ -861,6 +877,11 @@ public MinorType visit(List type) { return MinorType.LIST; } + @Override + public MinorType visit(ListView type) { + return MinorType.LISTVIEW; + } + @Override public MinorType visit(FixedSizeList type) { return MinorType.FIXED_SIZE_LIST; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java new file mode 100644 index 00000000000..12bb488ce7a --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.junit.Assert.assertEquals; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.ListViewVector; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestListViewVector { + private BufferAllocator allocator; + + @Before + public void init() { + allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); + } + + @After + public void terminate() throws Exception { + allocator.close(); + } + + @Test + public void testOutOfOrderWithoutShareChild() throws Exception { + // data to try to allocate: [[200, 400, 600], [12, -7, 25], [4, 6], [8, 127]] + // values: [12, -7, 25, 0, -127, 200, 400, 600, 4, 6] + // values group by: [[12,-7,25], [0,-127], [200,400,600], [4,6]] + try (ListViewVector listViewVector = ListViewVector.empty("input", allocator); + IntVector inVector = + (IntVector) listViewVector.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())) + .getVector()) { + + listViewVector.allocateNew(); + inVector.allocateNew(); + + listViewVector.startNewValue(0, 3); + inVector.setSafe(0, 12); + inVector.setSafe(1, -7); + inVector.setSafe(2, 25); + listViewVector.endValue(0, 3); + + listViewVector.startNewValue(1, 8); + inVector.setSafe(3, 0); + inVector.setSafe(4, -127); + listViewVector.endValue(1, 2); + + listViewVector.startNewValue(2, 0); + inVector.setSafe(5, 200); + inVector.setSafe(6, 400); + inVector.setSafe(7, 600); + listViewVector.endValue(2, 3); + + listViewVector.startNewValue(3, 6); + inVector.setSafe(8, 4); + inVector.setSafe(9, 6); + listViewVector.endValue(3, 2); + + listViewVector.setValueCount(4); + inVector.setValueCount(10); + + assertEquals(inVector.toString(), "[12, -7, 25, 0, -127, 200, 400, 600, 4, 6]"); + assertEquals(listViewVector.toString(), "[[12,-7,25], [0,-127], [200,400,600], [4,6]]"); + + } + } + + @Test + public void testOutOfOrderWithShareChild() throws Exception { + // data to try to allocate: [[12, -7, 25], [0, -127, 127, 50], [50, 12]] + // values: [0, -127, 127, 50, 12, -7, 25] + // values group by: [ [0, -127, 127, 50], [50, 12], [12, -7, 25]] + try (ListViewVector listViewVector = ListViewVector.empty("input", allocator); + IntVector inVector = + (IntVector) listViewVector.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())) + .getVector()) { + + listViewVector.allocateNew(); + inVector.allocateNew(); + + listViewVector.startNewValue(0, 3); + inVector.setSafe(0, 0); + inVector.setSafe(1, -127); + inVector.setSafe(2, 127); + inVector.setSafe(3, 50); + listViewVector.endValue(0, 4); + + listViewVector.startNewValue(1, 6); + inVector.setSafe(4, 12); + listViewVector.endValue(1, 2); + + listViewVector.startNewValue(2, 8); + inVector.setSafe(5, -7); + inVector.setSafe(6, 25); + listViewVector.endValue(2, 3); + + listViewVector.setValueCount(3); + inVector.setValueCount(7); + System.out.println("IntVector: \n" + inVector); + System.out.println("ListViewVector: \n" + listViewVector); + + assertEquals(inVector.toString(), "[0, -127, 127, 50, 12, -7, 25]"); + // FIXME For borders, try to review sharing of child array values: 50 and 12 should be shared and reused + assertEquals(listViewVector.toString(), "[[0,-127,127,50], [12,-7], [25,null,null]]"); + } + } +} From 22fb2859368c3a310aa015295fbe87ed27c3f130 Mon Sep 17 00:00:00 2001 From: david dali susanibar arce Date: Thu, 29 Feb 2024 05:22:35 -0500 Subject: [PATCH 2/6] fix: adding unit test for listview creation by vectors and writers --- .../templates/UnionListViewWriter.java | 58 +++++ .../codegen/templates/UnionListWriter.java | 4 +- .../arrow/vector/complex/ListViewVector.java | 61 ++++-- .../arrow/vector/TestListViewVector.java | 207 ++++++++++++++---- 4 files changed, 266 insertions(+), 64 deletions(-) create mode 100644 java/vector/src/main/codegen/templates/UnionListViewWriter.java diff --git a/java/vector/src/main/codegen/templates/UnionListViewWriter.java b/java/vector/src/main/codegen/templates/UnionListViewWriter.java new file mode 100644 index 00000000000..d73de4d2bc7 --- /dev/null +++ b/java/vector/src/main/codegen/templates/UnionListViewWriter.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.arrow.vector.complex.ListViewVector; +import org.apache.arrow.vector.complex.impl.UnionListWriter; + +<@pp.dropOutputFile /> +<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionListViewWriter.java" /> + + +<#include "/@includes/license.ftl" /> + +package org.apache.arrow.vector.complex.impl; + +<#include "/@includes/vv_imports.ftl" /> + +/* + * This class is generated using freemarker and the ${.template_name} template. + */ + +/** + *

Writer for ListViewVector. This extends UnionListWriter to simplify writing listview entries to a list + *

+ */ +@SuppressWarnings("unused") +public class UnionListViewWriter extends UnionListWriter { + public UnionListViewWriter(ListViewVector vector) { + super(vector); + } + + public void startList(int offset) { + ((ListViewVector) vector).startNewValue(idx(), offset); + writer.setPosition(((ListViewVector) vector).getOffsetBuffer().getInt((idx()) * 4)); + } + + public void endList(int size) { + ((ListViewVector) vector).endValue(idx(), size); + setPosition(idx() + 1); + } + + public void setValueCount(int count) { + ((ListViewVector) vector).setValueCount(count); + } +} diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 6fd415007f8..8c1cb12a143 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -16,6 +16,8 @@ */ import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory; +import org.apache.arrow.vector.complex.impl.PromotableWriter; import org.apache.arrow.vector.complex.writer.Decimal256Writer; import org.apache.arrow.vector.complex.writer.DecimalWriter; import org.apache.arrow.vector.holders.Decimal256Holder; @@ -26,7 +28,7 @@ import java.math.BigDecimal; <@pp.dropOutputFile /> -<#list ["List", "LargeList", "ListView"] as listName> +<#list ["List", "LargeList"] as listName> <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" /> diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 15d63bec69a..6d005529c15 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -24,6 +24,7 @@ import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; @@ -43,7 +44,8 @@ public class ListViewVector extends ListVector { protected ArrowBuf sizeBuffer; protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; - private int valueCountOfDataVector = 0; + + private int childArraySize = 0; /** * Constructs a new instance. @@ -109,17 +111,6 @@ public void clear() { sizeBuffer = releaseBuffer(sizeBuffer); } - /** - * Update index values for offset and size buffer. - * - * @param index Current index position. - * @param size The number of items added. - */ - public void endValue(int index, int size) { - sizeBuffer.setInt(index * OFFSET_WIDTH, size); - valueCountOfDataVector = valueCountOfDataVector + size; - } - @Override public void setValueCount(int valueCount) { this.valueCount = valueCount; @@ -128,7 +119,7 @@ public void setValueCount(int valueCount) { reallocValidityAndOffsetBuffers(); } } - vector.setValueCount(valueCountOfDataVector); + vector.setValueCount(childArraySize); } /** @@ -146,6 +137,20 @@ public int startNewValue(int index, int offset) { return offsetBuffer.getInt(index * OFFSET_WIDTH); } + /** + * Update index values for offset and size buffer. + * + * @param index Current index position. + * @param size The number of items added. + */ + public void endValue(int index, int size) { + sizeBuffer.setInt(index * OFFSET_WIDTH, size); + int currentchildArraySize = size + offsetBuffer.getInt(index * OFFSET_WIDTH); + if (currentchildArraySize > childArraySize) { + childArraySize = currentchildArraySize; + } + } + /** * Get the element in the list vector at a particular index. * @param index position of the element @@ -157,21 +162,33 @@ public List getObject(int index) { return null; } final List vals = new JsonStringArrayList<>(); - final int end = sizeBuffer.getInt(index * OFFSET_WIDTH); final ValueVector vv = getDataVector(); - int delta = 0; - if (index > 0) { - for (int i = index; i > 0; i--) { - delta += sizeBuffer.getInt((i - 1) * OFFSET_WIDTH); - } - } - for (int i = 0; i < end; i++) { - vals.add(i, vv.getObject(i + delta)); + int items = sizeBuffer.getInt(index * OFFSET_WIDTH); + int position = offsetBuffer.getInt(index * OFFSET_WIDTH); + for (int i = 0; i < items; i++) { + vals.add(vv.getObject(i + position)); } return vals; } + /** + * Sets list at index to be null. + * @param index position in vector + */ + @Override + public void setNull(int index) { + while (index >= getValueCapacity()) { + reallocValidityAndOffsetBuffers(); + } + BitVectorHelper.unsetBit(validityBuffer, index); + } + + @Override + public UnionListViewWriter getWriter() { + return new UnionListViewWriter(this); + } + protected void allocateSizeBuffer(final long size) { final int curSize = (int) size; sizeBuffer = allocator.buffer(curSize); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 12bb488ce7a..b0a1f664689 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -17,10 +17,12 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; + +import static org.junit.jupiter.api.Assertions.assertEquals; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.ListViewVector; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.FieldType; import org.junit.After; @@ -41,10 +43,11 @@ public void terminate() throws Exception { } @Test - public void testOutOfOrderWithoutShareChild() throws Exception { - // data to try to allocate: [[200, 400, 600], [12, -7, 25], [4, 6], [8, 127]] - // values: [12, -7, 25, 0, -127, 200, 400, 600, 4, 6] - // values group by: [[12,-7,25], [0,-127], [200,400,600], [4,6]] + public void testListView() throws Exception { + // values = [12, -7, 25, 0, -127, 127, 50] + // offsets = [0, 7, 3, 0] + // sizes = [3, 0, 4, 0] + // data to get thru listview: [[12,-7,25], null, [0,-127,127,50], []] try (ListViewVector listViewVector = ListViewVector.empty("input", allocator); IntVector inVector = (IntVector) listViewVector.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())) @@ -53,42 +56,39 @@ public void testOutOfOrderWithoutShareChild() throws Exception { listViewVector.allocateNew(); inVector.allocateNew(); - listViewVector.startNewValue(0, 3); inVector.setSafe(0, 12); inVector.setSafe(1, -7); inVector.setSafe(2, 25); - listViewVector.endValue(0, 3); - - listViewVector.startNewValue(1, 8); inVector.setSafe(3, 0); inVector.setSafe(4, -127); - listViewVector.endValue(1, 2); + inVector.setSafe(5, 127); + inVector.setSafe(6, 50); - listViewVector.startNewValue(2, 0); - inVector.setSafe(5, 200); - inVector.setSafe(6, 400); - inVector.setSafe(7, 600); - listViewVector.endValue(2, 3); + listViewVector.startNewValue(0, 0); + listViewVector.endValue(0, 3); - listViewVector.startNewValue(3, 6); - inVector.setSafe(8, 4); - inVector.setSafe(9, 6); - listViewVector.endValue(3, 2); + listViewVector.setNull(1); - listViewVector.setValueCount(4); - inVector.setValueCount(10); + listViewVector.startNewValue(2, 3); + listViewVector.endValue(2, 4); - assertEquals(inVector.toString(), "[12, -7, 25, 0, -127, 200, 400, 600, 4, 6]"); - assertEquals(listViewVector.toString(), "[[12,-7,25], [0,-127], [200,400,600], [4,6]]"); + listViewVector.startNewValue(3, 0); + listViewVector.endValue(3, 0); + listViewVector.setValueCount(4); + + assertEquals(inVector.toString(), "[12, -7, 25, 0, -127, 127, 50]"); + assertEquals(listViewVector.toString(), "[[12,-7,25], null, [0,-127,127,50], []]"); } } @Test - public void testOutOfOrderWithShareChild() throws Exception { - // data to try to allocate: [[12, -7, 25], [0, -127, 127, 50], [50, 12]] - // values: [0, -127, 127, 50, 12, -7, 25] - // values group by: [ [0, -127, 127, 50], [50, 12], [12, -7, 25]] + public void testListViewWithSharedValues() throws Exception { + // values = [0, -127, 127, 50, 12, -7, 25] + // offsets = [4, 7, 0, 0, 3] + // sizes = [3, 0, 4, 0, 2] + // data to get thru listview: [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] + // shared values = [50,] try (ListViewVector listViewVector = ListViewVector.empty("input", allocator); IntVector inVector = (IntVector) listViewVector.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())) @@ -97,30 +97,155 @@ public void testOutOfOrderWithShareChild() throws Exception { listViewVector.allocateNew(); inVector.allocateNew(); - listViewVector.startNewValue(0, 3); inVector.setSafe(0, 0); inVector.setSafe(1, -127); inVector.setSafe(2, 127); inVector.setSafe(3, 50); - listViewVector.endValue(0, 4); - - listViewVector.startNewValue(1, 6); inVector.setSafe(4, 12); - listViewVector.endValue(1, 2); - - listViewVector.startNewValue(2, 8); inVector.setSafe(5, -7); inVector.setSafe(6, 25); - listViewVector.endValue(2, 3); - listViewVector.setValueCount(3); - inVector.setValueCount(7); - System.out.println("IntVector: \n" + inVector); - System.out.println("ListViewVector: \n" + listViewVector); + listViewVector.startNewValue(0, 4); + listViewVector.endValue(0, 3); + + listViewVector.setNull(1); + + listViewVector.startNewValue(2, 0); + listViewVector.endValue(2, 4); + + listViewVector.startNewValue(3, 0); + listViewVector.endValue(3, 0); + + listViewVector.startNewValue(4, 3); + listViewVector.endValue(4, 2); + + listViewVector.setValueCount(5); assertEquals(inVector.toString(), "[0, -127, 127, 50, 12, -7, 25]"); - // FIXME For borders, try to review sharing of child array values: 50 and 12 should be shared and reused - assertEquals(listViewVector.toString(), "[[0,-127,127,50], [12,-7], [25,null,null]]"); + assertEquals(listViewVector.toString(), "[[12,-7,25], null, [0,-127,127,50], [], [50,12]]"); + } + } + + @Test + public void testPartialListViewWithSharedValues() throws Exception { + // values = [12, -7, 25, 17, -127, 200, 400, 600, 4, 6, 11, 0] + // offsets = [2, 5] + // sizes = [5, 3] + // data to get thru listview: [[25,17,-127,200], [200,400,600]] + // shared values = [200, 400, ] + try (ListViewVector listViewVector = ListViewVector.empty("input", allocator); + IntVector inVector = + (IntVector) listViewVector.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())) + .getVector()) { + + listViewVector.allocateNew(); + inVector.allocateNew(); + + inVector.setSafe(0, 12); + inVector.setSafe(1, -7); + inVector.setSafe(2, 25); + inVector.setSafe(3, 17); + inVector.setSafe(4, -127); + inVector.setSafe(5, 200); + inVector.setSafe(6, 400); + inVector.setSafe(7, 600); + inVector.setSafe(8, 4); + inVector.setSafe(9, 6); + inVector.setSafe(10, 11); + inVector.setSafe(11, 0); + + listViewVector.startNewValue(0, 2); + listViewVector.endValue(0, 5); + + listViewVector.startNewValue(1, 5); + listViewVector.endValue(1, 3); + + listViewVector.setValueCount(2); + inVector.setValueCount(12); + + assertEquals(inVector.toString(), "[12, -7, 25, 17, -127, 200, 400, 600, 4, 6, 11, 0]"); + assertEquals(listViewVector.toString(), "[[25,17,-127,200,400], [200,400,600]]"); + } + } + + @Test + public void testListViewWriter() throws Exception { + // values = [12, -7, 25, 0, -127, 127, 50] + // offsets = [0, 7, 3, 0] + // sizes = [3, 0, 4, 0] + // data to get thru listview: [[12,-7,25], null, [0,-127,127,50], []] + try (ListViewVector inVector = ListViewVector.empty("input", allocator)) { + UnionListViewWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(0); + writer.bigInt().writeBigInt(12); + writer.bigInt().writeBigInt(-7); + writer.bigInt().writeBigInt(25); + writer.endList(3); + + + writer.setPosition(2); + writer.startList(3); + writer.bigInt().writeBigInt(0); + writer.bigInt().writeBigInt(-127); + writer.bigInt().writeBigInt(127); + writer.bigInt().writeBigInt(50); + writer.endList(4); + + writer.setPosition(3); + writer.startList(0); + writer.endList(0); + + writer.setValueCount(4); + + assertEquals(inVector.getDataVector().toString(), "[12, -7, 25, 0, -127, 127, 50]"); + assertEquals(inVector.toString(), "[[12,-7,25], null, [0,-127,127,50], []]"); + } + } + + @Test + public void testListViewWriterWithSharedValues() throws Exception { + // values = [0, -127, 127, 50, 12, -7, 25] + // offsets = [4, 7, 0, 0, 3] + // sizes = [3, 0, 4, 0, 2] + // data to get thru listview: [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] + // shared values = [50,] + try (ListViewVector inVector = ListViewVector.empty("input", allocator)) { + UnionListViewWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(4); + writer.bigInt().writeBigInt(12); + writer.bigInt().writeBigInt(-7); + writer.bigInt().writeBigInt(25); + writer.endList(3); + + + writer.setPosition(2); + writer.startList(0); + writer.bigInt().writeBigInt(0); + writer.bigInt().writeBigInt(-127); + writer.bigInt().writeBigInt(127); + writer.bigInt().writeBigInt(50); + writer.endList(4); + + writer.setPosition(3); + writer.startList(0); + writer.endList(0); + + writer.setPosition(4); + writer.startList(3); + writer.bigInt().writeBigInt(50); + writer.bigInt().writeBigInt(12); + writer.endList(2); + + writer.setValueCount(5); + + assertEquals(inVector.getDataVector().toString(), "[0, -127, 127, 50, 12, -7, 25]"); + assertEquals(inVector.toString(), "[[12,-7,25], null, [0,-127,127,50], [], [50,12]]"); } } } From 6a1a0ca41df416501d858d6feff3ca011559b8cd Mon Sep 17 00:00:00 2001 From: david dali susanibar arce Date: Thu, 29 Feb 2024 05:39:40 -0500 Subject: [PATCH 3/6] fix: deleted dependencies added --- java/vector/src/main/codegen/templates/UnionListWriter.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 8c1cb12a143..5c0565ee271 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -16,8 +16,6 @@ */ import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory; -import org.apache.arrow.vector.complex.impl.PromotableWriter; import org.apache.arrow.vector.complex.writer.Decimal256Writer; import org.apache.arrow.vector.complex.writer.DecimalWriter; import org.apache.arrow.vector.holders.Decimal256Holder; From 0751b131186ecbfa583d105f360c4922c79f4441 Mon Sep 17 00:00:00 2001 From: david dali susanibar arce Date: Thu, 29 Feb 2024 07:39:19 -0500 Subject: [PATCH 4/6] Re-trigger listviewvector changes From 76bb5535c6a0c75cb84f952ffa1dbad4270e4309 Mon Sep 17 00:00:00 2001 From: david dali susanibar arce Date: Thu, 29 Feb 2024 09:50:21 -0500 Subject: [PATCH 5/6] fix: add visitor for listview into ColumnBinderArrowTypeVisitor JDBC class --- .../adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java index dc708724043..211718e1ebd 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java @@ -45,6 +45,7 @@ import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -83,6 +84,11 @@ public ColumnBinder visit(ArrowType.List type) { return new ListBinder((ListVector) vector); } + @Override + public ColumnBinder visit(ArrowType.ListView type) { + return new ListBinder((ListViewVector) vector); + } + @Override public ColumnBinder visit(ArrowType.LargeList type) { throw new UnsupportedOperationException("No column binder implemented for type " + type); From be8be52ca2373c3f668bf1fad99805d5607617a5 Mon Sep 17 00:00:00 2001 From: david dali susanibar arce Date: Thu, 29 Feb 2024 11:08:16 -0500 Subject: [PATCH 6/6] fix: add flightsql changes (pending to add unit test) --- .../ListViewAvaticaParameterConverter.java | 74 +++++++++++++++++++ .../jdbc/utils/AvaticaParameterBinder.java | 6 ++ .../arrow/driver/jdbc/utils/ConvertUtils.java | 7 ++ 3 files changed, 87 insertions(+) create mode 100644 java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/ListViewAvaticaParameterConverter.java diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/ListViewAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/ListViewAvaticaParameterConverter.java new file mode 100644 index 00000000000..cb3cdc1543b --- /dev/null +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/ListViewAvaticaParameterConverter.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.driver.jdbc.converter.impl; + +import java.util.List; + +import org.apache.arrow.driver.jdbc.utils.AvaticaParameterBinder; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.complex.ListViewVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.calcite.avatica.AvaticaParameter; +import org.apache.calcite.avatica.remote.TypedValue; + +/** + * AvaticaParameterConverter for List Arrow types. + */ +public class ListViewAvaticaParameterConverter extends BaseAvaticaParameterConverter { + + public ListViewAvaticaParameterConverter(ArrowType.ListView type) { + } + + //FIXME! Add unit test to validate this bindParameter + @Override + public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { + final List values = (List) typedValue.value; + + if (vector instanceof ListViewVector) { + ListViewVector listViewVector = ((ListViewVector) vector); + FieldVector childVector = listViewVector.getDataVector(); + + int startPos = listViewVector.startNewValue(index); + for (int i = 0; i < values.size(); i++) { + Object val = values.get(i); + int childIndex = startPos + i; + if (val == null) { + if (childVector.getField().isNullable()) { + childVector.setNull(childIndex); + } else { + throw new UnsupportedOperationException("Can't set null on non-nullable child list"); + } + } else { + childVector.getField().getType().accept( + new AvaticaParameterBinder.BinderVisitor( + childVector, TypedValue.ofSerial(typedValue.componentType, val), childIndex)); + } + } + listViewVector.endValue(index, values.size()); + listViewVector.setValueCount(index + 1); + return true; + } + return false; + } + + @Override + public AvaticaParameter createParameter(Field field) { + return createParameter(field, false); + } +} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java index b2bd8e745ec..2df71ee1d26 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java @@ -34,6 +34,7 @@ import org.apache.arrow.driver.jdbc.converter.impl.LargeListAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.LargeUtf8AvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.ListAvaticaParameterConverter; +import org.apache.arrow.driver.jdbc.converter.impl.ListViewAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.MapAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.NullAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.StructAvaticaParameterConverter; @@ -155,6 +156,11 @@ public Boolean visit(ArrowType.List type) { return new ListAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); } + @Override + public Boolean visit(ArrowType.ListView type) { + return new ListViewAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); + } + @Override public Boolean visit(ArrowType.LargeList type) { return new LargeListAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java index 843fe0cb89d..8baa50056d1 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java @@ -36,6 +36,7 @@ import org.apache.arrow.driver.jdbc.converter.impl.LargeListAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.LargeUtf8AvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.ListAvaticaParameterConverter; +import org.apache.arrow.driver.jdbc.converter.impl.ListViewAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.MapAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.NullAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.StructAvaticaParameterConverter; @@ -170,6 +171,12 @@ public AvaticaParameter visit(ArrowType.List type) { } + @Override + public AvaticaParameter visit(ArrowType.ListView type) { + return new ListViewAvaticaParameterConverter(type).createParameter(field); + + } + @Override public AvaticaParameter visit(ArrowType.LargeList type) { return new LargeListAvaticaParameterConverter(type).createParameter(field);