diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java new file mode 100644 index 00000000000..74387de9486 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +/** + * Interface for all int type vectors. + */ +public interface BaseIntVector extends ValueVector { + + /** + * set the encoded value from a {@link org.apache.arrow.vector.dictionary.Dictionary}. + */ + void setEncodedValue(int index, int value); +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java index 65ce53e2581..416ffd53fd3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java @@ -35,7 +35,7 @@ * integer values which could be null. A validity buffer (bit vector) is * maintained to track which elements in the vector are null. */ -public class BigIntVector extends BaseFixedWidthVector { +public class BigIntVector extends BaseFixedWidthVector implements BaseIntVector { public static final byte TYPE_WIDTH = 8; private final FieldReader reader; @@ -339,6 +339,11 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((BigIntVector) to); } + @Override + public void setEncodedValue(int index, int value) { + this.setSafe(index, value); + } + private class TransferImpl implements TransferPair { BigIntVector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java index 3a8207f0abc..5255d87a2ed 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java @@ -35,7 +35,7 @@ * integer values which could be null. A validity buffer (bit vector) is * maintained to track which elements in the vector are null. */ -public class IntVector extends BaseFixedWidthVector { +public class IntVector extends BaseFixedWidthVector implements BaseIntVector { public static final byte TYPE_WIDTH = 4; private final FieldReader reader; @@ -343,6 +343,11 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((IntVector) to); } + @Override + public void setEncodedValue(int index, int value) { + this.setSafe(index, value); + } + private class TransferImpl implements TransferPair { IntVector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java index dddc46fef2b..2d3f78f9766 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java @@ -20,6 +20,7 @@ import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.complex.impl.SmallIntReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.holders.NullableSmallIntHolder; @@ -35,7 +36,7 @@ * short values which could be null. A validity buffer (bit vector) is * maintained to track which elements in the vector are null. */ -public class SmallIntVector extends BaseFixedWidthVector { +public class SmallIntVector extends BaseFixedWidthVector implements BaseIntVector { public static final byte TYPE_WIDTH = 2; private final FieldReader reader; @@ -370,6 +371,12 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((SmallIntVector) to); } + @Override + public void setEncodedValue(int index, int value) { + Preconditions.checkArgument(value <= Short.MAX_VALUE, "value is overflow: %s", value); + this.setSafe(index, value); + } + private class TransferImpl implements TransferPair { SmallIntVector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java index df40b6e57cc..66f7ca35d01 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java @@ -20,6 +20,7 @@ import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.complex.impl.TinyIntReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.holders.NullableTinyIntHolder; @@ -35,7 +36,7 @@ * byte values which could be null. A validity buffer (bit vector) is * maintained to track which elements in the vector are null. */ -public class TinyIntVector extends BaseFixedWidthVector { +public class TinyIntVector extends BaseFixedWidthVector implements BaseIntVector { public static final byte TYPE_WIDTH = 1; private final FieldReader reader; @@ -370,6 +371,12 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((TinyIntVector) to); } + @Override + public void setEncodedValue(int index, int value) { + Preconditions.checkArgument(value <= Byte.MAX_VALUE, "value is overflow: %s", value); + this.setSafe(index, value); + } + private class TransferImpl implements TransferPair { TinyIntVector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java index c5133344fe8..85d48ad9e37 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java @@ -20,6 +20,7 @@ import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.complex.impl.UInt1ReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.holders.NullableUInt1Holder; @@ -35,7 +36,7 @@ * integer values which could be null. A validity buffer (bit vector) is * maintained to track which elements in the vector are null. */ -public class UInt1Vector extends BaseFixedWidthVector { +public class UInt1Vector extends BaseFixedWidthVector implements BaseIntVector { private static final byte TYPE_WIDTH = 1; private final FieldReader reader; @@ -150,7 +151,7 @@ public void copyFrom(int fromIndex, int thisIndex, UInt1Vector from) { } /** - * Identical to {@link #copyFrom()} but reallocates buffer if index is larger + * Identical to {@link #copyFrom(int, int, UInt1Vector)} but reallocates buffer if index is larger * than capacity. */ public void copyFromSafe(int fromIndex, int thisIndex, UInt1Vector from) { @@ -329,6 +330,14 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((UInt1Vector) to); } + @Override + public void setEncodedValue(int index, int value) { + Preconditions.checkArgument(value <= 0xFF, "value is overflow: %s", value); + this.setSafe(index, value); + } + + + private class TransferImpl implements TransferPair { UInt1Vector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java index 631050d57a2..dbea9f82b6e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java @@ -20,6 +20,7 @@ import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.complex.impl.UInt2ReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.holders.NullableUInt2Holder; @@ -35,7 +36,7 @@ * integer values which could be null. A validity buffer (bit vector) is * maintained to track which elements in the vector are null. */ -public class UInt2Vector extends BaseFixedWidthVector { +public class UInt2Vector extends BaseFixedWidthVector implements BaseIntVector { private static final byte TYPE_WIDTH = 2; private final FieldReader reader; @@ -308,6 +309,12 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((UInt2Vector) to); } + @Override + public void setEncodedValue(int index, int value) { + Preconditions.checkArgument(value <= Character.MAX_VALUE, "value is overflow: %s", value); + this.setSafe(index, value); + } + private class TransferImpl implements TransferPair { UInt2Vector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java index 84e6b8f3788..b2eadc2a22b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java @@ -35,7 +35,7 @@ * integer values which could be null. A validity buffer (bit vector) is * maintained to track which elements in the vector are null. */ -public class UInt4Vector extends BaseFixedWidthVector { +public class UInt4Vector extends BaseFixedWidthVector implements BaseIntVector { private static final byte TYPE_WIDTH = 4; private final FieldReader reader; @@ -301,6 +301,11 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((UInt4Vector) to); } + @Override + public void setEncodedValue(int index, int value) { + this.setSafe(index, value); + } + private class TransferImpl implements TransferPair { UInt4Vector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java index 0f8da381ee5..a1b3bdabdee 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java @@ -37,7 +37,7 @@ * integer values which could be null. A validity buffer (bit vector) is * maintained to track which elements in the vector are null. */ -public class UInt8Vector extends BaseFixedWidthVector { +public class UInt8Vector extends BaseFixedWidthVector implements BaseIntVector { private static final byte TYPE_WIDTH = 8; private final FieldReader reader; @@ -302,6 +302,11 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((UInt8Vector) to); } + @Override + public void setEncodedValue(int index, int value) { + this.setSafe(index, value); + } + private class TransferImpl implements TransferPair { UInt8Vector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java index 1c2a0aced17..698191c2ca2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java @@ -17,12 +17,10 @@ package org.apache.arrow.vector.dictionary; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.Arrays; import java.util.HashMap; import java.util.Map; +import org.apache.arrow.vector.BaseIntVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.types.Types.MinorType; @@ -61,43 +59,27 @@ public static ValueVector encode(ValueVector vector, Dictionary dictionary) { Field indexField = new Field(valueField.getName(), indexFieldType, null); // vector to hold our indices (dictionary encoded values) - FieldVector indices = indexField.createVector(vector.getAllocator()); - - // use reflection to pull out the set method - // TODO implement a common interface for int vectors - Method setter = null; - for (Class c : Arrays.asList(int.class, long.class)) { - try { - setter = indices.getClass().getMethod("setSafe", int.class, c); - break; - } catch (NoSuchMethodException e) { - // ignore - } + FieldVector createdVector = indexField.createVector(vector.getAllocator()); + if (! (createdVector instanceof BaseIntVector)) { + throw new IllegalArgumentException("Dictionary encoding does not have a valid int type:" + + createdVector.getClass()); } - if (setter == null) { - throw new IllegalArgumentException("Dictionary encoding does not have a valid int type:" + indices.getClass()); - } - - int count = vector.getValueCount(); + BaseIntVector indices = (BaseIntVector) createdVector; indices.allocateNew(); - try { - for (int i = 0; i < count; i++) { - Object value = vector.getObject(i); - if (value != null) { // if it's null leave it null - // note: this may fail if value was not included in the dictionary - Object encoded = lookUps.get(value); - if (encoded == null) { - throw new IllegalArgumentException("Dictionary encoding not defined for value:" + value); - } - setter.invoke(indices, i, encoded); + int count = vector.getValueCount(); + + for (int i = 0; i < count; i++) { + Object value = vector.getObject(i); + if (value != null) { // if it's null leave it null + // note: this may fail if value was not included in the dictionary + Integer encoded = lookUps.get(value); + if (encoded == null) { + throw new IllegalArgumentException("Dictionary encoding not defined for value:" + value); } + indices.setEncodedValue(i, encoded); } - } catch (IllegalAccessException e) { - throw new RuntimeException("IllegalAccessException invoking vector mutator set():", e); - } catch (InvocationTargetException e) { - throw new RuntimeException("InvocationTargetException invoking vector mutator set():", e.getCause()); } indices.setValueCount(count);