From 73b631918e59fc03c29045f996ae0ba3365cc80c Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Tue, 13 Oct 2020 13:31:22 +0800 Subject: [PATCH] ARROW-10294: [Java] Resolve problems of DecimalVector APIs on ArrowBufs --- .../main/codegen/data/ValueVectorTypes.tdd | 2 +- .../codegen/templates/ComplexWriters.java | 4 ++-- .../templates/UnionFixedSizeListWriter.java | 2 +- .../codegen/templates/UnionListWriter.java | 4 ++-- .../apache/arrow/vector/DecimalVector.java | 12 +++++------ .../vector/complex/impl/PromotableWriter.java | 2 +- .../arrow/vector/util/DecimalUtility.java | 2 +- .../arrow/vector/ITTestLargeVector.java | 21 ++++++++++++++++++- 8 files changed, 34 insertions(+), 15 deletions(-) diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd index b9e052941ed..7612d3690b9 100644 --- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd +++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd @@ -125,7 +125,7 @@ maxPrecisionDigits: 38, nDecimalDigits: 4, friendlyType: "BigDecimal", typeParams: [ {name: "scale", type: "int"}, { name: "precision", type: "int"}], arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Decimal", - fields: [{name: "start", type: "int"}, {name: "buffer", type: "ArrowBuf"}] + fields: [{name: "start", type: "long"}, {name: "buffer", type: "ArrowBuf"}] } ] }, diff --git a/java/vector/src/main/codegen/templates/ComplexWriters.java b/java/vector/src/main/codegen/templates/ComplexWriters.java index ab99ac38dcd..5f5025ff59e 100644 --- a/java/vector/src/main/codegen/templates/ComplexWriters.java +++ b/java/vector/src/main/codegen/templates/ComplexWriters.java @@ -139,12 +139,12 @@ public void write(NullableDecimalHolder h){ vector.setValueCount(idx() + 1); } - public void writeDecimal(int start, ArrowBuf buffer){ + public void writeDecimal(long start, ArrowBuf buffer){ vector.setSafe(idx(), 1, start, buffer); vector.setValueCount(idx() + 1); } - public void writeDecimal(int start, ArrowBuf buffer, ArrowType arrowType){ + public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType){ DecimalUtility.checkPrecisionAndScale(((ArrowType.Decimal) arrowType).getPrecision(), ((ArrowType.Decimal) arrowType).getScale(), vector.getPrecision(), vector.getScale()); vector.setSafe(idx(), 1, start, buffer); diff --git a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java index 0574dcf572d..94c7d8f6490 100644 --- a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java @@ -189,7 +189,7 @@ public void writeNull() { writer.writeNull(); } - public void writeDecimal(int start, ArrowBuf buffer, ArrowType arrowType) { + public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) { if (writer.idx() >= (idx() + 1) * listSize) { throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); } diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index a2664436acc..bb0cff4e06c 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -204,12 +204,12 @@ public void writeNull() { writer.writeNull(); } - public void writeDecimal(int start, ArrowBuf buffer, ArrowType arrowType) { + public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) { writer.writeDecimal(start, buffer, arrowType); writer.setPosition(writer.idx()+1); } - public void writeDecimal(int start, ArrowBuf buffer) { + public void writeDecimal(long start, ArrowBuf buffer) { writer.writeDecimal(start, buffer); writer.setPosition(writer.idx()+1); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java index 554e174dc2b..04344c35e34 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java @@ -246,7 +246,7 @@ public void setBigEndian(int index, byte[] value) { * @param start start index of data in the buffer * @param buffer ArrowBuf containing decimal value. */ - public void set(int index, int start, ArrowBuf buffer) { + public void set(int index, long start, ArrowBuf buffer) { BitVectorHelper.setBit(validityBuffer, index); valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, start, TYPE_WIDTH); } @@ -258,7 +258,7 @@ public void set(int index, int start, ArrowBuf buffer) { * @param buffer contains the decimal in little endian bytes * @param length length of the value in the buffer */ - public void setSafe(int index, int start, ArrowBuf buffer, int length) { + public void setSafe(int index, long start, ArrowBuf buffer, int length) { handleSafe(index); BitVectorHelper.setBit(validityBuffer, index); @@ -285,7 +285,7 @@ public void setSafe(int index, int start, ArrowBuf buffer, int length) { * @param buffer contains the decimal in big endian bytes * @param length length of the value in the buffer */ - public void setBigEndianSafe(int index, int start, ArrowBuf buffer, int length) { + public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length) { handleSafe(index); BitVectorHelper.setBit(validityBuffer, index); @@ -394,7 +394,7 @@ public void setBigEndianSafe(int index, byte[] value) { * @param start start index of data in the buffer * @param buffer ArrowBuf containing decimal value. */ - public void setSafe(int index, int start, ArrowBuf buffer) { + public void setSafe(int index, long start, ArrowBuf buffer) { handleSafe(index); set(index, start, buffer); } @@ -460,7 +460,7 @@ public void setSafe(int index, DecimalHolder holder) { * @param start start position of the value in the buffer * @param buffer buffer containing the value to be stored in the vector */ - public void set(int index, int isSet, int start, ArrowBuf buffer) { + public void set(int index, int isSet, long start, ArrowBuf buffer) { if (isSet > 0) { set(index, start, buffer); } else { @@ -478,7 +478,7 @@ public void set(int index, int isSet, int start, ArrowBuf buffer) { * @param start start position of the value in the buffer * @param buffer buffer containing the value to be stored in the vector */ - public void setSafe(int index, int isSet, int start, ArrowBuf buffer) { + public void setSafe(int index, int isSet, long start, ArrowBuf buffer) { handleSafe(index); set(index, isSet, start, buffer); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index 6f40836e06b..51decee39fd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -320,7 +320,7 @@ public void write(DecimalHolder holder) { } @Override - public void writeDecimal(int start, ArrowBuf buffer, ArrowType arrowType) { + public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) { getWriter(MinorType.DECIMAL, new ArrowType.Decimal(MAX_DECIMAL_PRECISION, ((ArrowType.Decimal) arrowType).getScale())).writeDecimal(start, buffer, arrowType); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java index 711fa3b9cbf..36c988fac7e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java @@ -42,7 +42,7 @@ private DecimalUtility() {} public static BigDecimal getBigDecimalFromArrowBuf(ArrowBuf bytebuf, int index, int scale) { byte[] value = new byte[DECIMAL_BYTE_LENGTH]; byte temp; - final int startIndex = index * DECIMAL_BYTE_LENGTH; + final long startIndex = (long) index * DECIMAL_BYTE_LENGTH; // Decimal stored as little endian, need to swap bytes to make BigDecimal bytebuf.getBytes(startIndex, value, 0, DECIMAL_BYTE_LENGTH); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java index 8b824d6a291..19648dc9e13 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java @@ -21,9 +21,12 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.math.BigDecimal; + import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.holders.NullableDecimalHolder; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -114,7 +117,7 @@ public void testLargeDecimalVector() { final int vecLength = (int) (bufSize / DecimalVector.TYPE_WIDTH); try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - DecimalVector largeVec = new DecimalVector("vec", allocator, 38, 16)) { + DecimalVector largeVec = new DecimalVector("vec", allocator, 38, 0)) { largeVec.allocateNew(vecLength); logger.trace("Successfully allocated a vector with capacity {}", vecLength); @@ -139,6 +142,22 @@ public void testLargeDecimalVector() { } } logger.trace("Successfully read {} values", vecLength); + + // try setting values with a large offset in the buffer + largeVec.set(vecLength - 1, 12345L); + assertEquals(12345L, largeVec.getObject(vecLength - 1).longValue()); + + NullableDecimalHolder holder = new NullableDecimalHolder(); + holder.buffer = largeVec.valueBuffer; + holder.isSet = 1; + holder.start = (long) (vecLength - 1) * largeVec.getTypeWidth(); + assertTrue(holder.start > Integer.MAX_VALUE); + largeVec.set(0, holder); + + BigDecimal decimal = largeVec.getObject(0); + assertEquals(12345L, decimal.longValue()); + + logger.trace("Successfully setting values from large offsets"); } logger.trace("Successfully released the large vector."); }