Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public void endList() {

<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
@Override
public void write(${name}Holder holder) {
fail("${name}");
Expand All @@ -62,6 +63,12 @@ public void write(${name}Holder holder) {
fail("${name}");
}

<#if minor.class == "Decimal">
public void write${minor.class}(${friendlyType} value) {
fail("${name}");
}
</#if>

</#list></#list>

public void writeNull() {
Expand Down
11 changes: 11 additions & 0 deletions java/vector/src/main/codegen/templates/ComplexWriters.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
<#assign eName = name />
<#assign javaType = (minor.javaType!type.javaType) />
<#assign fields = minor.fields!type.fields />
<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />

<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${eName}WriterImpl.java" />
<#include "/@includes/license.ftl" />
Expand Down Expand Up @@ -115,7 +116,13 @@ public void write(Nullable${minor.class}Holder h) {
mutator.setSafe(idx()<#if mode == "Nullable">, 1</#if><#list fields as field><#if field.include!true >, ${field.name}</#if></#list>);
vector.getMutator().setValueCount(idx()+1);
}
<#if minor.class == "Decimal">

public void write${minor.class}(${friendlyType} value) {
mutator.setSafe(idx(), value);
vector.getMutator().setValueCount(idx()+1);
}
</#if>
<#if mode == "Nullable">

public void writeNull() {
Expand All @@ -140,6 +147,10 @@ public interface ${eName}Writer extends BaseWriter {
public void write(${minor.class}Holder h);

public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>);
<#if minor.class == "Decimal">

public void write${minor.class}(${friendlyType} value);
</#if>
}

</#list>
Expand Down
18 changes: 15 additions & 3 deletions java/vector/src/main/codegen/templates/FixedValueVectors.java
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ public void get(int index, Nullable${minor.class}Holder holder) {

@Override
public ${friendlyType} getObject(int index) {
return org.apache.arrow.vector.util.DecimalUtility.getBigDecimalFromArrowBuf(data, ${type.width} * index, scale);
return DecimalUtility.getBigDecimalFromArrowBuf(data, index, scale);
}

<#else>
Expand Down Expand Up @@ -596,10 +596,10 @@ void set(int index, Nullable${minor.class}Holder holder){
set(index, holder.start, holder.buffer);
}

public void setSafe(int index, Nullable${minor.class}Holder holder){
public void setSafe(int index, Nullable${minor.class}Holder holder){
setSafe(index, holder.start, holder.buffer);
}
public void setSafe(int index, ${minor.class}Holder holder){
public void setSafe(int index, ${minor.class}Holder holder){
setSafe(index, holder.start, holder.buffer);
}

Expand All @@ -614,6 +614,18 @@ public void set(int index, int start, ArrowBuf buffer){
data.setBytes(index * ${type.width}, buffer, start, ${type.width});
}

public void set(int index, ${friendlyType} value){
DecimalUtility.checkPrecisionAndScale(value, precision, scale);
DecimalUtility.writeBigDecimalToArrowBuf(value, data, index);
}

public void setSafe(int index, ${friendlyType} value){
while(index >= getValueCapacity()) {
reAlloc();
}
set(index, value);
}

<#else>
protected void set(int index, ${minor.class}Holder holder){
set(index, holder.start, holder.buffer);
Expand Down
13 changes: 13 additions & 0 deletions java/vector/src/main/codegen/templates/NullableValueVectors.java
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,19 @@ public void setSafe(int index, ${minor.javaType!type.javaType} value) {
setCount++;
}

</#if>
<#if minor.class == "Decimal">
public void set(int index, ${friendlyType} value) {
bits.getMutator().setToOne(index);
values.getMutator().set(index, value);
}

public void setSafe(int index, ${friendlyType} value) {
bits.getMutator().setSafeToOne(index);
values.getMutator().setSafe(index, value);
setCount++;
}

</#if>
@Override
public void setValueCount(int valueCount) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.arrow.vector.BufferBacked;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
Expand Down Expand Up @@ -72,6 +73,7 @@
import org.apache.arrow.vector.schema.ArrowVectorType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.DecimalUtility;
import org.apache.arrow.vector.util.DictionaryUtility;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
Expand Down Expand Up @@ -235,14 +237,16 @@ private void readVector(Field field, FieldVector vector) throws JsonParseExcepti
nextFieldIs(vectorType.getName());
readToken(START_ARRAY);
ValueVector valueVector = (ValueVector) innerVector;
valueVector.allocateNew();
Mutator mutator = valueVector.getMutator();

int innerVectorCount = vectorType.equals(OFFSET) ? count + 1 : count;
valueVector.setInitialCapacity(innerVectorCount);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of the vectors do not use setSafe so reading beyond the default capacity would cause problems. Maybe for cases where the majority of values are null this is overdoing it, but I think that is less common.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

valueVector.allocateNew();

for (int i = 0; i < innerVectorCount; i++) {
parser.nextToken();
setValueFromParser(valueVector, i);
}
Mutator mutator = valueVector.getMutator();
mutator.setValueCount(innerVectorCount);
readToken(END_ARRAY);
}
Expand Down Expand Up @@ -312,6 +316,12 @@ private void setValueFromParser(ValueVector valueVector, int i) throws IOExcepti
case FLOAT8:
((Float8Vector) valueVector).getMutator().set(i, parser.readValueAs(Double.class));
break;
case DECIMAL: {
DecimalVector decimalVector = ((DecimalVector) valueVector);
byte[] value = decodeHexSafe(parser.readValueAs(String.class));
DecimalUtility.writeByteArrayToArrowBuf(value, decimalVector.getBuffer(), i);
}
break;
case VARBINARY:
((VarBinaryVector) valueVector).getMutator().setSafe(i, decodeHexSafe(parser.readValueAs(String.class)));
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@
import java.util.Set;

import com.google.common.collect.ImmutableList;
import io.netty.buffer.ArrowBuf;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.BufferBacked;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.TimeMicroVector;
import org.apache.arrow.vector.TimeMilliVector;
Expand All @@ -54,6 +56,7 @@
import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
import com.fasterxml.jackson.core.util.DefaultPrettyPrinter.NopIndenter;
import com.fasterxml.jackson.databind.MappingJsonFactory;
import org.apache.arrow.vector.util.DecimalUtility;
import org.apache.arrow.vector.util.DictionaryUtility;
import org.apache.commons.codec.binary.Hex;

Expand Down Expand Up @@ -233,9 +236,16 @@ private void writeValueToGenerator(ValueVector valueVector, int i) throws IOExce
case BIT:
generator.writeNumber(((BitVector) valueVector).getAccessor().get(i));
break;
case VARBINARY:
String hexString = Hex.encodeHexString(((VarBinaryVector) valueVector).getAccessor().get(i));
generator.writeObject(hexString);
case VARBINARY: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we remove the brackets?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's there to limit the scope of the variables declared in the case block to that case only. Since now there are 2 blocks decoding hex values, just to prevent using the wrong variables.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

String hexString = Hex.encodeHexString(((VarBinaryVector) valueVector).getAccessor().get(i));
generator.writeString(hexString);
}
break;
case DECIMAL: {
ArrowBuf bytebuf = valueVector.getDataBuffer();
String hexString = Hex.encodeHexString(DecimalUtility.getByteArrayFromArrowBuf(bytebuf, i));
generator.writeString(hexString);
}
break;
default:
// TODO: each type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@
package org.apache.arrow.vector.util;

import io.netty.buffer.ArrowBuf;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.UnpooledByteBufAllocator;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.types.pojo.ArrowType;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.Arrays;


public class DecimalUtility {
Expand Down Expand Up @@ -69,7 +68,7 @@ public class DecimalUtility {

public static final int DECIMAL_BYTE_LENGTH = 16;

/*
/**
* Simple function that returns the static precomputed
* power of ten, instead of using Math.pow
*/
Expand All @@ -78,7 +77,7 @@ public static long getPowerOfTen(int power) {
return scale_long_constants[(power)];
}

/*
/**
* Math.pow returns a double and while multiplying with large digits
* in the decimal data type we encounter noise. So instead of multiplying
* with Math.pow we use the static constants to perform the multiplication
Expand All @@ -103,7 +102,8 @@ public static long adjustScaleDivide(long input, int factor) {
}
}

/* Returns a string representation of the given integer
/**
* Returns a string representation of the given integer
* If the length of the given integer is less than the
* passed length, this function will prepend zeroes to the string
*/
Expand Down Expand Up @@ -136,33 +136,86 @@ public static StringBuilder toStringWithZeroes(long number, int desiredLength) {
return str;
}

public static BigDecimal getBigDecimalFromArrowBuf(ArrowBuf bytebuf, int startIndex, int scale) {
/**
* Read an ArrowType.Decimal at the given value index in the ArrowBuf and convert to a BigDecimal
* with the given scale.
*/
public static BigDecimal getBigDecimalFromArrowBuf(ArrowBuf bytebuf, int index, int scale) {
byte[] value = new byte[DECIMAL_BYTE_LENGTH];
final int startIndex = index * DECIMAL_BYTE_LENGTH;
bytebuf.getBytes(startIndex, value, 0, DECIMAL_BYTE_LENGTH);
BigInteger unscaledValue = new BigInteger(value);
return new BigDecimal(unscaledValue, scale);
}

public static BigDecimal getBigDecimalFromByteBuffer(ByteBuffer bytebuf, int start, int scale) {
/**
* Read an ArrowType.Decimal from the ByteBuffer and convert to a BigDecimal with the given
* scale.
*/
public static BigDecimal getBigDecimalFromByteBuffer(ByteBuffer bytebuf, int scale) {
byte[] value = new byte[DECIMAL_BYTE_LENGTH];
bytebuf.get(value);
BigInteger unscaledValue = new BigInteger(value);
return new BigDecimal(unscaledValue, scale);
}

/**
* Read an ArrowType.Decimal from the ArrowBuf at the given value index and return it as a byte
* array.
*/
public static byte[] getByteArrayFromArrowBuf(ArrowBuf bytebuf, int index) {
final byte[] value = new byte[DECIMAL_BYTE_LENGTH];
final int startIndex = index * DECIMAL_BYTE_LENGTH;
bytebuf.getBytes(startIndex, value, 0, DECIMAL_BYTE_LENGTH);
return value;
}

/**
* Check that the BigDecimal scale equals the vectorScale and that the BigDecimal precision is
* less than or equal to the vectorPrecision. If not, then an UnsupportedOperationException is
* thrown, otherwise returns true.
*/
public static boolean checkPrecisionAndScale(BigDecimal value, int vectorPrecision, int vectorScale) {
if (value.scale() != vectorScale) {
throw new UnsupportedOperationException("BigDecimal scale must equal that in the Arrow vector: " +
value.scale() + " != " + vectorScale);
}
if (value.precision() > vectorPrecision) {
throw new UnsupportedOperationException("BigDecimal precision can not be greater than that in the Arrow vector: " +
value.precision() + " > " + vectorPrecision);
}
return true;
}

/**
* Write the given BigDecimal to the ArrowBuf at the given value index. Will throw an
* UnsupportedOperationException if the decimal size is greater than the Decimal vector byte
* width.
*/
public static void writeBigDecimalToArrowBuf(BigDecimal value, ArrowBuf bytebuf, int index) {
final byte[] bytes = value.unscaledValue().toByteArray();
final int padValue = value.signum() == -1 ? 0xFF : 0;
writeByteArrayToArrowBuf(bytes, bytebuf, index, padValue);
}

/**
* Write the given byte array to the ArrowBuf at the given value index. Will throw an
* UnsupportedOperationException if the decimal size is greater than the Decimal vector byte
* width.
*/
public static void writeByteArrayToArrowBuf(byte[] bytes, ArrowBuf bytebuf, int index) {
writeByteArrayToArrowBuf(bytes, bytebuf, index, 0);
}

private static void writeByteArrayToArrowBuf(byte[] bytes, ArrowBuf bytebuf, int index, int padValue) {
final int startIndex = index * DECIMAL_BYTE_LENGTH;
if (bytes.length > DECIMAL_BYTE_LENGTH) {
throw new UnsupportedOperationException("Decimal size greater than 16 bytes");
}
final int padLength = DECIMAL_BYTE_LENGTH - bytes.length;
final int padValue = value.signum() == -1 ? 0xFF : 0;
for (int i = 0; i < padLength; i++) {
bytebuf.setByte(startIndex + i, padValue);
}
bytebuf.setBytes(startIndex + padLength, bytes, 0, bytes.length);
}
}


Loading