From dc24cd4d0c8da16e15873c23a3fe10cdb3a1293a Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 6 Aug 2024 15:26:39 +0530 Subject: [PATCH 1/4] fix: initial commit --- .../vector/complex/AbstractStructVector.java | 2 +- .../complex/BaseRepeatedValueVector.java | 2 +- .../vector/complex/FixedSizeListVector.java | 2 +- .../arrow/vector/complex/LargeListVector.java | 2 +- .../arrow/vector/complex/ListVector.java | 2 +- .../arrow/vector/complex/ListViewVector.java | 2 +- .../arrow/vector/complex/StructVector.java | 2 +- .../apache/arrow/vector/TestVectorReset.java | 129 ++++++++++++++++++ 8 files changed, 136 insertions(+), 7 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java index feb7edfec94..cb0a16ca569 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java @@ -387,7 +387,7 @@ public ArrowBuf[] getBuffers(boolean clear) { final List buffers = new ArrayList<>(); for (final ValueVector vector : vectors.values()) { - for (final ArrowBuf buf : vector.getBuffers(false)) { + for (final ArrowBuf buf : vector.getBuffers(clear)) { buffers.add(buf); if (clear) { buf.getReferenceManager().retain(1); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index 1cdb87eba03..430031fca90 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -279,7 +279,7 @@ public ArrowBuf[] getBuffers(boolean clear) { } else { List list = new ArrayList<>(); list.add(offsetBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); + list.addAll(Arrays.asList(vector.getBuffers(clear))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index cb455084808..885e1d7986f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -369,7 +369,7 @@ public ArrowBuf[] getBuffers(boolean clear) { } else { List list = new ArrayList<>(); list.add(validityBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); + list.addAll(Arrays.asList(vector.getBuffers(clear))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index b5b32c8032d..8413cc294bc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -900,7 +900,7 @@ public ArrowBuf[] getBuffers(boolean clear) { List list = new ArrayList<>(); list.add(offsetBuffer); list.add(validityBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); + list.addAll(Arrays.asList(vector.getBuffers(clear))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index a1e18210fc6..843bc712708 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -744,7 +744,7 @@ public ArrowBuf[] getBuffers(boolean clear) { List list = new ArrayList<>(); list.add(offsetBuffer); list.add(validityBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); + list.addAll(Arrays.asList(vector.getBuffers(clear))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 6ced66d81ec..514c86e0c5b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -719,7 +719,7 @@ public ArrowBuf[] getBuffers(boolean clear) { list.add(validityBuffer); list.add(offsetBuffer); list.add(sizeBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); + list.addAll(Arrays.asList(vector.getBuffers(clear))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java index dda9b6547f7..c736ca23c16 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java @@ -413,7 +413,7 @@ public ArrowBuf[] getBuffers(boolean clear) { } else { List list = new ArrayList<>(); list.add(validityBuffer); - list.addAll(Arrays.asList(super.getBuffers(false))); + list.addAll(Arrays.asList(super.getBuffers(clear))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java index 48cf78a4c2e..8a868221a9a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java @@ -63,6 +63,25 @@ private void resetVectorAndVerify(ValueVector vector, ArrowBuf[] bufs) { assertEquals(0, vector.getValueCount()); } + private void resetClearVectorAndVerify(ValueVector vector, ArrowBuf[] bufs) { + long[] sizeBefore = new long[bufs.length]; + for (int i = 0; i < bufs.length; i++) { + sizeBefore[i] = bufs[i].capacity(); + } + vector.reset(); + for (int i = 0; i < bufs.length; i++) { + assertEquals(sizeBefore[i], bufs[i].capacity()); + } + assertEquals(0, vector.getValueCount()); + // clear the retrieved buffers + for (ArrowBuf buf : bufs) { + // clear the buffer to release the memory + while (buf.refCnt() > 0) { + buf.close(); + } + } + } + private void verifyBufferZeroed(ArrowBuf buf) { for (int i = 0; i < buf.capacity(); i++) { assertTrue((byte) 0 == buf.getByte(i)); @@ -79,6 +98,16 @@ public void testFixedTypeReset() { } } + @Test + public void testFixedTypeResetAndClear() { + try (final UInt4Vector vector = new UInt4Vector("UInt4", allocator)) { + vector.allocateNewSafe(); + vector.setNull(0); + vector.setValueCount(1); + resetClearVectorAndVerify(vector, vector.getBuffers(true)); + } + } + @Test public void testVariableTypeReset() { try (final VarCharVector vector = new VarCharVector("VarChar", allocator)) { @@ -91,6 +120,18 @@ public void testVariableTypeReset() { } } + @Test + public void testVariableTypeResetAndClear() { + try (final VarCharVector vector = new VarCharVector("VarChar", allocator)) { + vector.allocateNewSafe(); + vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); + vector.setLastSet(0); + vector.setValueCount(1); + resetClearVectorAndVerify(vector, vector.getBuffers(true)); + assertEquals(-1, vector.getLastSet()); + } + } + @Test public void testVariableViewTypeReset() { try (final ViewVarCharVector vector = new ViewVarCharVector("ViewVarChar", allocator)) { @@ -103,6 +144,18 @@ public void testVariableViewTypeReset() { } } + @Test + public void testVariableViewTypeResetAndClear() { + try (final ViewVarCharVector vector = new ViewVarCharVector("ViewVarChar", allocator)) { + vector.allocateNewSafe(); + vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); + vector.setLastSet(0); + vector.setValueCount(1); + resetClearVectorAndVerify(vector, vector.getBuffers(true)); + assertEquals(-1, vector.getLastSet()); + } + } + @Test public void testLargeVariableTypeReset() { try (final LargeVarCharVector vector = new LargeVarCharVector("LargeVarChar", allocator)) { @@ -115,6 +168,18 @@ public void testLargeVariableTypeReset() { } } + @Test + public void testLargeVariableTypeResetAndClear() { + try (final LargeVarCharVector vector = new LargeVarCharVector("LargeVarChar", allocator)) { + vector.allocateNewSafe(); + vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); + vector.setLastSet(0); + vector.setValueCount(1); + resetClearVectorAndVerify(vector, vector.getBuffers(true)); + assertEquals(-1, vector.getLastSet()); + } + } + @Test public void testListTypeReset() { try (final ListVector variableList = @@ -139,6 +204,30 @@ public void testListTypeReset() { } } + @Test + public void testListTypeResetAndClear() { + try (final ListVector variableList = + new ListVector( + "VarList", allocator, FieldType.nullable(MinorType.INT.getType()), null); + final FixedSizeListVector fixedList = + new FixedSizeListVector( + "FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null)) { + // ListVector + variableList.allocateNewSafe(); + variableList.startNewValue(0); + variableList.endValue(0, 0); + variableList.setValueCount(1); + resetClearVectorAndVerify(variableList, variableList.getBuffers(true)); + assertEquals(-1, variableList.getLastSet()); + + // FixedSizeListVector + fixedList.allocateNewSafe(); + fixedList.setNull(0); + fixedList.setValueCount(1); + resetClearVectorAndVerify(fixedList, fixedList.getBuffers(true)); + } + } + @Test public void testStructTypeReset() { try (final NonNullableStructVector nonNullableStructVector = @@ -164,6 +253,31 @@ public void testStructTypeReset() { } } + @Test + public void testStructTypeResetAndClear() { + try (final NonNullableStructVector nonNullableStructVector = + new NonNullableStructVector( + "Struct", allocator, FieldType.nullable(MinorType.INT.getType()), null); + final StructVector structVector = + new StructVector( + "NullableStruct", allocator, FieldType.nullable(MinorType.INT.getType()), null)) { + // NonNullableStructVector + nonNullableStructVector.allocateNewSafe(); + IntVector structChild = + nonNullableStructVector.addOrGet( + "child", FieldType.nullable(new Int(32, true)), IntVector.class); + structChild.setNull(0); + nonNullableStructVector.setValueCount(1); + resetClearVectorAndVerify(nonNullableStructVector, nonNullableStructVector.getBuffers(true)); + + // StructVector + structVector.allocateNewSafe(); + structVector.setNull(0); + structVector.setValueCount(1); + resetClearVectorAndVerify(structVector, structVector.getBuffers(true)); + } + } + @Test public void testUnionTypeReset() { try (final UnionVector vector = @@ -178,4 +292,19 @@ public void testUnionTypeReset() { resetVectorAndVerify(vector, vector.getBuffers(false)); } } + + @Test + public void testUnionTypeResetAndClear() { + try (final UnionVector vector = + new UnionVector("Union", allocator, /* field type */ null, /* call-back */ null); + final IntVector dataVector = new IntVector("Int", allocator)) { + vector.getBufferSize(); + vector.allocateNewSafe(); + dataVector.allocateNewSafe(); + vector.addVector(dataVector); + dataVector.setNull(0); + vector.setValueCount(1); + resetClearVectorAndVerify(vector, vector.getBuffers(true)); + } + } } From 62a33ffb4ec39dc53b3eee119a8778c3daf7e0f2 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 6 Aug 2024 15:53:37 +0530 Subject: [PATCH 2/4] fix: adding listviewvector --- .../apache/arrow/vector/TestVectorReset.java | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java index 8a868221a9a..7983fa92388 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java @@ -25,6 +25,7 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -187,7 +188,10 @@ public void testListTypeReset() { "VarList", allocator, FieldType.nullable(MinorType.INT.getType()), null); final FixedSizeListVector fixedList = new FixedSizeListVector( - "FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null)) { + "FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null); + final ListViewVector variableViewList = + new ListViewVector( + "VarListView", allocator, FieldType.nullable(MinorType.INT.getType()), null)) { // ListVector variableList.allocateNewSafe(); variableList.startNewValue(0); @@ -201,6 +205,13 @@ public void testListTypeReset() { fixedList.setNull(0); fixedList.setValueCount(1); resetVectorAndVerify(fixedList, fixedList.getBuffers(false)); + + // ListViewVector + variableViewList.allocateNewSafe(); + variableViewList.startNewValue(0); + variableViewList.endValue(0, 0); + variableViewList.setValueCount(1); + resetVectorAndVerify(variableViewList, variableViewList.getBuffers(false)); } } @@ -211,7 +222,10 @@ public void testListTypeResetAndClear() { "VarList", allocator, FieldType.nullable(MinorType.INT.getType()), null); final FixedSizeListVector fixedList = new FixedSizeListVector( - "FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null)) { + "FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null); + final ListViewVector variableViewList = + new ListViewVector( + "VarListView", allocator, FieldType.nullable(MinorType.INT.getType()), null)) { // ListVector variableList.allocateNewSafe(); variableList.startNewValue(0); @@ -225,6 +239,13 @@ public void testListTypeResetAndClear() { fixedList.setNull(0); fixedList.setValueCount(1); resetClearVectorAndVerify(fixedList, fixedList.getBuffers(true)); + + // ListViewVector + variableViewList.allocateNewSafe(); + variableViewList.startNewValue(0); + variableViewList.endValue(0, 0); + variableViewList.setValueCount(1); + resetClearVectorAndVerify(variableViewList, variableViewList.getBuffers(true)); } } From 2b1d9484cbbc2cc958872f50f8db30a7a2c97b9b Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 13 Aug 2024 17:52:49 +0530 Subject: [PATCH 3/4] fix: updating documentation and clear usage --- .../vector/complex/AbstractStructVector.java | 11 ++ .../complex/BaseRepeatedValueVector.java | 13 +- .../arrow/vector/complex/LargeListVector.java | 11 +- .../vector/complex/LargeListViewVector.java | 5 +- .../arrow/vector/complex/ListVector.java | 11 +- .../arrow/vector/complex/ListViewVector.java | 5 +- .../arrow/vector/complex/StructVector.java | 11 +- .../apache/arrow/vector/TestVectorReset.java | 139 ------------------ 8 files changed, 47 insertions(+), 159 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java index cb0a16ca569..5ca1601055f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java @@ -382,6 +382,17 @@ public VectorWithOrdinal getChildVectorWithOrdinal(String name) { return new VectorWithOrdinal(vector, ordinal); } + /** + * Return the underlying buffers associated with this vector. Note that this doesn't impact the + * reference counts for this buffer, so it only should be used for in-context access. Also note + * that this buffer changes regularly, thus external classes shouldn't hold a reference to it + * (unless they change it). + * + * @param clear Whether to clear vector before returning, the buffers will still be refcounted but + * the returned array will be the only reference to them. Also, this won't clear the child + * buffers. + * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. + */ @Override public ArrowBuf[] getBuffers(boolean clear) { final List buffers = new ArrayList<>(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index 430031fca90..fbe83bad52c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -271,6 +271,17 @@ public void reset() { valueCount = 0; } + /** + * Return the underlying buffers associated with this vector. Note that this doesn't impact the + * reference counts for this buffer, so it only should be used for in-context access. Also note + * that this buffer changes regularly, thus external classes shouldn't hold a reference to it + * (unless they change it). + * + * @param clear Whether to clear vector before returning, the buffers will still be refcounted but + * the returned array will be the only reference to them. Also, this won't clear the child + * buffers. + * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. + */ @Override public ArrowBuf[] getBuffers(boolean clear) { final ArrowBuf[] buffers; @@ -279,7 +290,7 @@ public ArrowBuf[] getBuffers(boolean clear) { } else { List list = new ArrayList<>(); list.add(offsetBuffer); - list.addAll(Arrays.asList(vector.getBuffers(clear))); + list.addAll(Arrays.asList(vector.getBuffers(false))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 8413cc294bc..ed075352c93 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -882,12 +882,13 @@ public void reset() { /** * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer so it only should be used for in-context access. Also note - * that this buffer changes regularly thus external classes shouldn't hold a reference to it + * reference counts for this buffer, so it only should be used for in-context access. Also note + * that this buffer changes regularly, thus external classes shouldn't hold a reference to it * (unless they change it). * - * @param clear Whether to clear vector before returning; the buffers will still be refcounted but - * the returned array will be the only reference to them + * @param clear Whether to clear vector before returning, the buffers will still be refcounted but + * the returned array will be the only reference to them. Also, this won't clear the child + * buffers. * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. */ @Override @@ -900,7 +901,7 @@ public ArrowBuf[] getBuffers(boolean clear) { List list = new ArrayList<>(); list.add(offsetBuffer); list.add(validityBuffer); - list.addAll(Arrays.asList(vector.getBuffers(clear))); + list.addAll(Arrays.asList(vector.getBuffers(false))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index 17ccdbf0eae..f6b3de88b77 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -546,7 +546,8 @@ public void reset() { * (unless they change it). * * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them + * the returned array will be the only reference to them. Also, this won't clear the child + * buffers. * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. */ @Override @@ -561,7 +562,7 @@ public ArrowBuf[] getBuffers(boolean clear) { list.add(validityBuffer); list.add(offsetBuffer); list.add(sizeBuffer); - list.addAll(Arrays.asList(vector.getBuffers(clear))); + list.addAll(Arrays.asList(vector.getBuffers(false))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 843bc712708..76682c28fe6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -726,12 +726,13 @@ public void reset() { /** * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer so it only should be used for in-context access. Also note - * that this buffer changes regularly thus external classes shouldn't hold a reference to it + * reference counts for this buffer, so it only should be used for in-context access. Also note + * that this buffer changes regularly, thus external classes shouldn't hold a reference to it * (unless they change it). * - * @param clear Whether to clear vector before returning; the buffers will still be refcounted but - * the returned array will be the only reference to them + * @param clear Whether to clear vector before returning, the buffers will still be refcounted but + * the returned array will be the only reference to them. Also, this won't clear the child + * buffers. * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. */ @Override @@ -744,7 +745,7 @@ public ArrowBuf[] getBuffers(boolean clear) { List list = new ArrayList<>(); list.add(offsetBuffer); list.add(validityBuffer); - list.addAll(Arrays.asList(vector.getBuffers(clear))); + list.addAll(Arrays.asList(vector.getBuffers(false))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 514c86e0c5b..7f6d92f3be9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -704,7 +704,8 @@ public void reset() { * (unless they change it). * * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them + * the returned array will be the only reference to them. Also, this won't clear the child + * buffers. * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. */ @Override @@ -719,7 +720,7 @@ public ArrowBuf[] getBuffers(boolean clear) { list.add(validityBuffer); list.add(offsetBuffer); list.add(sizeBuffer); - list.addAll(Arrays.asList(vector.getBuffers(clear))); + list.addAll(Arrays.asList(vector.getBuffers(false))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java index c736ca23c16..ca5f572034c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java @@ -396,12 +396,13 @@ public int getValueCapacity() { /** * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer so it only should be used for in-context access. Also note - * that this buffer changes regularly thus external classes shouldn't hold a reference to it + * reference counts for this buffer, so it only should be used for in-context access. Also note + * that this buffer changes regularly, thus external classes shouldn't hold a reference to it * (unless they change it). * - * @param clear Whether to clear vector before returning; the buffers will still be refcounted but - * the returned array will be the only reference to them + * @param clear Whether to clear vector before returning, the buffers will still be refcounted but + * the returned array will be the only reference to them. Also, this won't clear the child + * buffers. * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. */ @Override @@ -413,7 +414,7 @@ public ArrowBuf[] getBuffers(boolean clear) { } else { List list = new ArrayList<>(); list.add(validityBuffer); - list.addAll(Arrays.asList(super.getBuffers(clear))); + list.addAll(Arrays.asList(super.getBuffers(false))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java index 7983fa92388..28d73a8fdff 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java @@ -64,25 +64,6 @@ private void resetVectorAndVerify(ValueVector vector, ArrowBuf[] bufs) { assertEquals(0, vector.getValueCount()); } - private void resetClearVectorAndVerify(ValueVector vector, ArrowBuf[] bufs) { - long[] sizeBefore = new long[bufs.length]; - for (int i = 0; i < bufs.length; i++) { - sizeBefore[i] = bufs[i].capacity(); - } - vector.reset(); - for (int i = 0; i < bufs.length; i++) { - assertEquals(sizeBefore[i], bufs[i].capacity()); - } - assertEquals(0, vector.getValueCount()); - // clear the retrieved buffers - for (ArrowBuf buf : bufs) { - // clear the buffer to release the memory - while (buf.refCnt() > 0) { - buf.close(); - } - } - } - private void verifyBufferZeroed(ArrowBuf buf) { for (int i = 0; i < buf.capacity(); i++) { assertTrue((byte) 0 == buf.getByte(i)); @@ -99,16 +80,6 @@ public void testFixedTypeReset() { } } - @Test - public void testFixedTypeResetAndClear() { - try (final UInt4Vector vector = new UInt4Vector("UInt4", allocator)) { - vector.allocateNewSafe(); - vector.setNull(0); - vector.setValueCount(1); - resetClearVectorAndVerify(vector, vector.getBuffers(true)); - } - } - @Test public void testVariableTypeReset() { try (final VarCharVector vector = new VarCharVector("VarChar", allocator)) { @@ -121,18 +92,6 @@ public void testVariableTypeReset() { } } - @Test - public void testVariableTypeResetAndClear() { - try (final VarCharVector vector = new VarCharVector("VarChar", allocator)) { - vector.allocateNewSafe(); - vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); - vector.setLastSet(0); - vector.setValueCount(1); - resetClearVectorAndVerify(vector, vector.getBuffers(true)); - assertEquals(-1, vector.getLastSet()); - } - } - @Test public void testVariableViewTypeReset() { try (final ViewVarCharVector vector = new ViewVarCharVector("ViewVarChar", allocator)) { @@ -145,18 +104,6 @@ public void testVariableViewTypeReset() { } } - @Test - public void testVariableViewTypeResetAndClear() { - try (final ViewVarCharVector vector = new ViewVarCharVector("ViewVarChar", allocator)) { - vector.allocateNewSafe(); - vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); - vector.setLastSet(0); - vector.setValueCount(1); - resetClearVectorAndVerify(vector, vector.getBuffers(true)); - assertEquals(-1, vector.getLastSet()); - } - } - @Test public void testLargeVariableTypeReset() { try (final LargeVarCharVector vector = new LargeVarCharVector("LargeVarChar", allocator)) { @@ -169,18 +116,6 @@ public void testLargeVariableTypeReset() { } } - @Test - public void testLargeVariableTypeResetAndClear() { - try (final LargeVarCharVector vector = new LargeVarCharVector("LargeVarChar", allocator)) { - vector.allocateNewSafe(); - vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); - vector.setLastSet(0); - vector.setValueCount(1); - resetClearVectorAndVerify(vector, vector.getBuffers(true)); - assertEquals(-1, vector.getLastSet()); - } - } - @Test public void testListTypeReset() { try (final ListVector variableList = @@ -215,40 +150,6 @@ public void testListTypeReset() { } } - @Test - public void testListTypeResetAndClear() { - try (final ListVector variableList = - new ListVector( - "VarList", allocator, FieldType.nullable(MinorType.INT.getType()), null); - final FixedSizeListVector fixedList = - new FixedSizeListVector( - "FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null); - final ListViewVector variableViewList = - new ListViewVector( - "VarListView", allocator, FieldType.nullable(MinorType.INT.getType()), null)) { - // ListVector - variableList.allocateNewSafe(); - variableList.startNewValue(0); - variableList.endValue(0, 0); - variableList.setValueCount(1); - resetClearVectorAndVerify(variableList, variableList.getBuffers(true)); - assertEquals(-1, variableList.getLastSet()); - - // FixedSizeListVector - fixedList.allocateNewSafe(); - fixedList.setNull(0); - fixedList.setValueCount(1); - resetClearVectorAndVerify(fixedList, fixedList.getBuffers(true)); - - // ListViewVector - variableViewList.allocateNewSafe(); - variableViewList.startNewValue(0); - variableViewList.endValue(0, 0); - variableViewList.setValueCount(1); - resetClearVectorAndVerify(variableViewList, variableViewList.getBuffers(true)); - } - } - @Test public void testStructTypeReset() { try (final NonNullableStructVector nonNullableStructVector = @@ -274,31 +175,6 @@ public void testStructTypeReset() { } } - @Test - public void testStructTypeResetAndClear() { - try (final NonNullableStructVector nonNullableStructVector = - new NonNullableStructVector( - "Struct", allocator, FieldType.nullable(MinorType.INT.getType()), null); - final StructVector structVector = - new StructVector( - "NullableStruct", allocator, FieldType.nullable(MinorType.INT.getType()), null)) { - // NonNullableStructVector - nonNullableStructVector.allocateNewSafe(); - IntVector structChild = - nonNullableStructVector.addOrGet( - "child", FieldType.nullable(new Int(32, true)), IntVector.class); - structChild.setNull(0); - nonNullableStructVector.setValueCount(1); - resetClearVectorAndVerify(nonNullableStructVector, nonNullableStructVector.getBuffers(true)); - - // StructVector - structVector.allocateNewSafe(); - structVector.setNull(0); - structVector.setValueCount(1); - resetClearVectorAndVerify(structVector, structVector.getBuffers(true)); - } - } - @Test public void testUnionTypeReset() { try (final UnionVector vector = @@ -313,19 +189,4 @@ public void testUnionTypeReset() { resetVectorAndVerify(vector, vector.getBuffers(false)); } } - - @Test - public void testUnionTypeResetAndClear() { - try (final UnionVector vector = - new UnionVector("Union", allocator, /* field type */ null, /* call-back */ null); - final IntVector dataVector = new IntVector("Int", allocator)) { - vector.getBufferSize(); - vector.allocateNewSafe(); - dataVector.allocateNewSafe(); - vector.addVector(dataVector); - dataVector.setNull(0); - vector.setValueCount(1); - resetClearVectorAndVerify(vector, vector.getBuffers(true)); - } - } } From 23b80c6cfc4726479a99a13da50cfe13cc7636e3 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 14 Aug 2024 05:42:25 +0530 Subject: [PATCH 4/4] fix: addressing reviews --- .../arrow/vector/complex/AbstractStructVector.java | 2 +- .../arrow/vector/complex/FixedSizeListVector.java | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java index 5ca1601055f..2921e43cb64 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java @@ -398,7 +398,7 @@ public ArrowBuf[] getBuffers(boolean clear) { final List buffers = new ArrayList<>(); for (final ValueVector vector : vectors.values()) { - for (final ArrowBuf buf : vector.getBuffers(clear)) { + for (final ArrowBuf buf : vector.getBuffers(false)) { buffers.add(buf); if (clear) { buf.getReferenceManager().retain(1); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 885e1d7986f..c762eb51725 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -360,6 +360,17 @@ public void reset() { valueCount = 0; } + /** + * Return the underlying buffers associated with this vector. Note that this doesn't impact the + * reference counts for this buffer, so it only should be used for in-context access. Also note + * that this buffer changes regularly, thus external classes shouldn't hold a reference to it + * (unless they change it). + * + * @param clear Whether to clear vector before returning, the buffers will still be refcounted but + * the returned array will be the only reference to them. Also, this won't clear the child + * buffers. + * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. + */ @Override public ArrowBuf[] getBuffers(boolean clear) { setReaderAndWriterIndex(); @@ -369,7 +380,7 @@ public ArrowBuf[] getBuffers(boolean clear) { } else { List list = new ArrayList<>(); list.add(validityBuffer); - list.addAll(Arrays.asList(vector.getBuffers(clear))); + list.addAll(Arrays.asList(vector.getBuffers(false))); buffers = list.toArray(new ArrowBuf[list.size()]); } if (clear) {