From 830693cb3cefc9d71bdb6b96677b4084991efc41 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Mon, 30 Aug 2021 20:12:42 +0800 Subject: [PATCH 1/3] ARROW-13792 [Java]: The toString representation is incorrect for unsigned integer vectors --- .../org/apache/arrow/vector/UInt1Vector.java | 6 +++- .../org/apache/arrow/vector/UInt4Vector.java | 6 ++++ .../org/apache/arrow/vector/UInt8Vector.java | 6 ++++ .../arrow/vector/util/ValueVectorUtility.java | 21 ++++++++++-- .../apache/arrow/vector/TestValueVector.java | 32 +++++++++++++++++++ 5 files changed, 67 insertions(+), 4 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java index b735f5fbeb4..bd9a732c108 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java @@ -29,6 +29,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.util.ValueVectorUtility; /** * UInt1Vector implements a fixed width (1 bytes) vector of @@ -328,7 +329,10 @@ public long getValueAsLong(int index) { return this.get(index) & PROMOTION_MASK; } - + @Override + public String toString() { + return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i)); + } private class TransferImpl implements TransferPair { UInt1Vector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java index c5045e6a510..cc954d67ddd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java @@ -29,6 +29,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.util.ValueVectorUtility; /** * UInt4Vector implements a fixed width (4 bytes) vector of @@ -300,6 +301,11 @@ public long getValueAsLong(int index) { return this.get(index) & PROMOTION_MASK; } + @Override + public String toString() { + return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i)); + } + private class TransferImpl implements TransferPair { UInt4Vector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java index 3aa4451711d..98eaf25a6e2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java @@ -31,6 +31,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.util.ValueVectorUtility; /** * UInt8Vector implements a fixed width vector (8 bytes) of @@ -296,6 +297,11 @@ public long getValueAsLong(int index) { return this.get(index); } + @Override + public String toString() { + return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i)); + } + private class TransferImpl implements TransferPair { UInt8Vector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java index 60553b4e342..ceb7081e1ea 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java @@ -19,6 +19,8 @@ import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow; +import java.util.function.BiFunction; + import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.ValueVector; @@ -37,7 +39,7 @@ private ValueVectorUtility() { /** * Get the toString() representation of vector suitable for debugging. - * Note since vectors may have millions of values, this method only show max 20 values. + * Note since vectors may have millions of values, this method only shows max 20 values. * Examples as below (v represents value): *
  • * vector with 0 value: @@ -52,7 +54,20 @@ private ValueVectorUtility() { * [v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, ..., v90, v91, v92, v93, v94, v95, v96, v97, v98, v99] *
  • */ - public static String getToString(ValueVector vector, int start, int end) { + public static String getToString(V vector, int start, int end) { + return getToString(vector, start, end, (v, i) -> v.getObject(i)); + } + + /** + * Get the toString() representation of vector suitable for debugging. + * Note since vectors may have millions of values, this method only shows at most 20 values. + * @param vector the vector for which to get toString representation. + * @param start the starting index, inclusive. + * @param end the end index, exclusive. + * @param valueToString the function to transform individual elements to strings. + */ + public static String getToString( + V vector, int start, int end, BiFunction valueToString) { Preconditions.checkNotNull(vector); final int length = end - start; Preconditions.checkArgument(length >= 0); @@ -77,7 +92,7 @@ public static String getToString(ValueVector vector, int start, int end) { i = end - window - 1; skipComma = true; } else { - sb.append(vector.getObject(i)); + sb.append(valueToString.apply(vector, i)); } if (i == end - 1) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 2d0f8dab37d..13309952cae 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -2791,6 +2791,38 @@ public void testToString() { } } + @Test + public void testUInt1VectorToString() { + try (final UInt1Vector uInt1Vector = new UInt1Vector("uInt1Vector", allocator)) { + setVector(uInt1Vector, (byte) 0xff); + assertEquals("[255]", uInt1Vector.toString()); + } + } + + @Test + public void testUInt2VectorToString() { + try (final UInt2Vector uInt2Vector = new UInt2Vector("uInt2Vector", allocator)) { + setVector(uInt2Vector, (char) 0xffff); + assertEquals("[" + new Character((char) 0xffff) + "]", uInt2Vector.toString()); + } + } + + @Test + public void testUInt4VectorToString() { + try (final UInt4Vector uInt4Vector = new UInt4Vector("uInt4Vector", allocator)) { + setVector(uInt4Vector, 0xffffffff); + assertEquals("[4294967295]", uInt4Vector.toString()); + } + } + + @Test + public void testUInt8VectorToString() { + try (final UInt8Vector uInt8Vector = new UInt8Vector("uInt8Vector", allocator)) { + setVector(uInt8Vector, 0xffffffffffffffffL); + assertEquals("[18446744073709551615]", uInt8Vector.toString()); + } + } + @Test public void testUnloadVariableWidthVector() { try (final VarCharVector varCharVector = new VarCharVector("var char", allocator)) { From 1c745df20680c610374454312289b1b87a2c6c24 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Tue, 31 Aug 2021 14:57:55 +0800 Subject: [PATCH 2/3] ARROW-13792 [Java]: Resolve comments --- .../src/test/java/org/apache/arrow/vector/TestValueVector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 13309952cae..f405201a74c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -2803,7 +2803,7 @@ public void testUInt1VectorToString() { public void testUInt2VectorToString() { try (final UInt2Vector uInt2Vector = new UInt2Vector("uInt2Vector", allocator)) { setVector(uInt2Vector, (char) 0xffff); - assertEquals("[" + new Character((char) 0xffff) + "]", uInt2Vector.toString()); + assertEquals("[\uffff]", uInt2Vector.toString()); } } From 6f35943a42fabf809ebe1ee51a580b6038680b07 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Tue, 31 Aug 2021 18:24:17 +0800 Subject: [PATCH 3/3] ARROW-13792 [Java]: Resolve comments again --- .../src/main/java/org/apache/arrow/vector/UInt2Vector.java | 7 +++++++ .../test/java/org/apache/arrow/vector/TestValueVector.java | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java index 917700e09c6..5c29ab6b321 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java @@ -29,6 +29,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.util.ValueVectorUtility; /** * UInt2Vector implements a fixed width (2 bytes) vector of @@ -305,6 +306,12 @@ public long getValueAsLong(int index) { return this.get(index); } + @Override + public String toString() { + return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) -> + v.isNull(i) ? "null" : Integer.toString(v.get(i) & 0x0000ffff)); + } + private class TransferImpl implements TransferPair { UInt2Vector to; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index f405201a74c..52655bf8fcc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -2803,7 +2803,7 @@ public void testUInt1VectorToString() { public void testUInt2VectorToString() { try (final UInt2Vector uInt2Vector = new UInt2Vector("uInt2Vector", allocator)) { setVector(uInt2Vector, (char) 0xffff); - assertEquals("[\uffff]", uInt2Vector.toString()); + assertEquals("[65535]", uInt2Vector.toString()); } }