From 0407e63756151bca3c0d192b55d74180ab4f1ff5 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Mon, 20 Mar 2017 22:34:12 -0700 Subject: [PATCH] ARROW-676: move from MinorType to FieldType in ValueVectors to carry all the relevant type bits --- .../apache/arrow/tools/EchoServerTest.java | 55 ++-- .../src/main/codegen/data/ArrowTypes.tdd | 38 ++- .../src/main/codegen/templates/ArrowType.java | 81 +++++- .../main/codegen/templates/MapWriters.java | 10 +- .../templates/NullableValueVectors.java | 42 +-- .../main/codegen/templates/UnionVector.java | 20 +- .../org/apache/arrow/vector/FieldVector.java | 3 +- .../apache/arrow/vector/VectorSchemaRoot.java | 6 +- .../complex/AbstractContainerVector.java | 20 +- .../vector/complex/AbstractMapVector.java | 29 +- .../complex/BaseRepeatedValueVector.java | 11 +- .../arrow/vector/complex/ListVector.java | 13 +- .../arrow/vector/complex/MapVector.java | 6 +- .../vector/complex/NullableMapVector.java | 1 + .../complex/impl/ComplexWriterImpl.java | 5 +- .../vector/complex/impl/PromotableWriter.java | 5 +- .../org/apache/arrow/vector/types/Types.java | 264 ++++-------------- .../apache/arrow/vector/types/pojo/Field.java | 82 +++--- .../arrow/vector/types/pojo/FieldType.java | 60 ++++ .../arrow/vector/TestDecimalVector.java | 14 +- .../arrow/vector/TestDictionaryVector.java | 16 +- .../org/apache/arrow/vector/TestUtils.java | 39 +++ .../apache/arrow/vector/TestValueVector.java | 15 +- .../complex/impl/TestPromotableWriter.java | 3 +- .../arrow/vector/file/TestArrowFile.java | 20 +- .../vector/file/TestArrowReaderWriter.java | 12 +- 26 files changed, 461 insertions(+), 409 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java diff --git a/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java b/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java index 5970c57f465..7d07588892c 100644 --- a/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java +++ b/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java @@ -18,7 +18,19 @@ package org.apache.arrow.tools; -import com.google.common.collect.ImmutableList; +import static java.util.Arrays.asList; +import static org.apache.arrow.vector.types.Types.MinorType.TINYINT; +import static org.apache.arrow.vector.types.Types.MinorType.VARCHAR; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.net.Socket; +import java.net.UnknownHostException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -39,6 +51,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Text; import org.junit.AfterClass; @@ -46,17 +59,7 @@ import org.junit.BeforeClass; import org.junit.Test; -import java.io.IOException; -import java.net.Socket; -import java.net.UnknownHostException; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -import static java.util.Arrays.asList; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import com.google.common.collect.ImmutableList; public class EchoServerTest { @@ -133,9 +136,12 @@ private void testEchoServer(int serverPort, public void basicTest() throws InterruptedException, IOException { BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE); - Field field = new Field("testField", true, new ArrowType.Int(8, true), Collections - .emptyList()); - NullableTinyIntVector vector = new NullableTinyIntVector("testField", alloc, null); + Field field = new Field( + "testField", true, + new ArrowType.Int(8, true), + Collections.emptyList()); + NullableTinyIntVector vector = + new NullableTinyIntVector("testField", FieldType.nullable(TINYINT.getType()), alloc); Schema schema = new Schema(asList(field)); // Try an empty stream, just the header. @@ -152,9 +158,16 @@ public void basicTest() throws InterruptedException, IOException { public void testFlatDictionary() throws IOException { DictionaryEncoding writeEncoding = new DictionaryEncoding(1L, false, null); try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - NullableIntVector writeVector = new NullableIntVector("varchar", allocator, writeEncoding); - NullableVarCharVector writeDictionaryVector = new NullableVarCharVector("dict", - allocator, null)) { + NullableIntVector writeVector = + new NullableIntVector( + "varchar", + new FieldType(true, MinorType.INT.getType(), writeEncoding), + allocator); + NullableVarCharVector writeDictionaryVector = + new NullableVarCharVector( + "dict", + FieldType.nullable(VARCHAR.getType()), + allocator)) { writeVector.allocateNewSafe(); NullableIntVector.Mutator mutator = writeVector.getMutator(); mutator.set(0, 0); @@ -222,8 +235,8 @@ public void testFlatDictionary() throws IOException { public void testNestedDictionary() throws IOException { DictionaryEncoding writeEncoding = new DictionaryEncoding(2L, false, null); try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - NullableVarCharVector writeDictionaryVector = new NullableVarCharVector("dictionary", - allocator, null); + NullableVarCharVector writeDictionaryVector = + new NullableVarCharVector("dictionary", FieldType.nullable(VARCHAR.getType()), allocator); ListVector writeVector = new ListVector("list", allocator, null, null)) { // data being written: @@ -234,7 +247,7 @@ public void testNestedDictionary() throws IOException { writeDictionaryVector.getMutator().set(1, "bar".getBytes(StandardCharsets.UTF_8)); writeDictionaryVector.getMutator().setValueCount(2); - writeVector.addOrGetVector(MinorType.INT, writeEncoding); + writeVector.addOrGetVector(new FieldType(true, MinorType.INT.getType(), writeEncoding)); writeVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(writeVector); listWriter.startList(); diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 67785ad6b4d..e1fb5e0619a 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -14,59 +14,73 @@ types: [ { name: "Null", - fields: [] + fields: [], + complex: false }, { name: "Struct_", - fields: [] + fields: [], + complex: true }, { name: "List", - fields: [] + fields: [], + complex: true }, { name: "Union", - fields: [{name: "mode", type: short, valueType: UnionMode}, {name: "typeIds", type: "int[]"}] + fields: [{name: "mode", type: short, valueType: UnionMode}, {name: "typeIds", type: "int[]"}], + complex: true }, { name: "Int", - fields: [{name: "bitWidth", type: int}, {name: "isSigned", type: boolean}] + fields: [{name: "bitWidth", type: int}, {name: "isSigned", type: boolean}], + complex: false }, { name: "FloatingPoint", - fields: [{name: precision, type: short, valueType: FloatingPointPrecision}] + fields: [{name: precision, type: short, valueType: FloatingPointPrecision}], + complex: false }, { name: "Utf8", - fields: [] + fields: [], + complex: false }, { name: "Binary", - fields: [] + fields: [], + complex: false }, { name: "Bool", - fields: [] + fields: [], + complex: false }, { name: "Decimal", - fields: [{name: "precision", type: int}, {name: "scale", type: int}] + fields: [{name: "precision", type: int}, {name: "scale", type: int}], + complex: false }, { name: "Date", fields: [{name: "unit", type: short, valueType: DateUnit}] + complex: false }, { name: "Time", - fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "bitWidth", type: int}] + fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "bitWidth", type: int}], + complex: false }, { name: "Timestamp", fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "timezone", type: String}] + complex: false }, { name: "Interval", - fields: [{name: "unit", type: short, valueType: IntervalUnit}] + fields: [{name: "unit", type: short, valueType: IntervalUnit}], + complex: false } ] } diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java index 91cbe98196b..a9e875a2095 100644 --- a/java/vector/src/main/codegen/templates/ArrowType.java +++ b/java/vector/src/main/codegen/templates/ArrowType.java @@ -50,13 +50,35 @@ }) public abstract class ArrowType { + public static abstract class PrimitiveType extends ArrowType { + + private PrimitiveType() { + } + + @Override + public boolean isComplex() { + return false; + } + } + + public static abstract class ComplexType extends ArrowType { + + private ComplexType() { + } + + @Override + public boolean isComplex() { + return true; + } + } + public static enum ArrowTypeID { <#list arrowTypes.types as type> <#assign name = type.name> ${name?remove_ending("_")}(Type.${name}), NONE(Type.NONE); - + private final byte flatbufType; public byte getFlatbufID() { @@ -70,6 +92,8 @@ private ArrowTypeID(byte flatbufType) { @JsonIgnore public abstract ArrowTypeID getTypeID(); + @JsonIgnore + public abstract boolean isComplex(); public abstract int getType(FlatBufferBuilder builder); public abstract T accept(ArrowTypeVisitor visitor); @@ -87,21 +111,56 @@ public static interface ArrowTypeVisitor { } + /** + * to visit the Complex ArrowTypes and bundle Primitive ones in one case + */ + public static abstract class ComplexTypeVisitor implements ArrowTypeVisitor { + + public T visit(PrimitiveType type) { + throw new UnsupportedOperationException("Unexpected Primitive type: " + type); + } + + <#list arrowTypes.types as type> + <#if !type.complex> + public final T visit(${type.name?remove_ending("_")} type) { + return visit((PrimitiveType) type); + } + + + } + + /** + * to visit the Primitive ArrowTypes and bundle Complex ones under one case + */ + public static abstract class PrimitiveTypeVisitor implements ArrowTypeVisitor { + + public T visit(ComplexType type) { + throw new UnsupportedOperationException("Unexpected Complex type: " + type); + } + + <#list arrowTypes.types as type> + <#if type.complex> + public final T visit(${type.name?remove_ending("_")} type) { + return visit((ComplexType) type); + } + + + } + <#list arrowTypes.types as type> <#assign name = type.name?remove_ending("_")> <#assign fields = type.fields> - public static class ${name} extends ArrowType { + public static class ${name} extends <#if type.complex>ComplexType<#else>PrimitiveType { public static final ArrowTypeID TYPE_TYPE = ArrowTypeID.${name}; <#if type.fields?size == 0> public static final ${name} INSTANCE = new ${name}(); - + <#else> <#list fields as field> <#assign fieldType = field.valueType!field.type> ${fieldType} ${field.name}; - <#if type.fields?size != 0> @JsonCreator public ${type.name}( <#list type.fields as field> @@ -113,6 +172,13 @@ public static class ${name} extends ArrowType { this.${field.name} = ${field.name}; } + + <#list fields as field> + <#assign fieldType = field.valueType!field.type> + public ${fieldType} get${field.name?cap_first}() { + return ${field.name}; + } + @Override @@ -143,13 +209,6 @@ public int getType(FlatBufferBuilder builder) { return org.apache.arrow.flatbuf.${type.name}.end${type.name}(builder); } - <#list fields as field> - <#assign fieldType = field.valueType!field.type> - public ${fieldType} get${field.name?cap_first}() { - return ${field.name}; - } - - public String toString() { return "${name}" <#if fields?size != 0> diff --git a/java/vector/src/main/codegen/templates/MapWriters.java b/java/vector/src/main/codegen/templates/MapWriters.java index 428ce0427d4..d3e6de95271 100644 --- a/java/vector/src/main/codegen/templates/MapWriters.java +++ b/java/vector/src/main/codegen/templates/MapWriters.java @@ -64,7 +64,7 @@ public class ${mode}MapWriter extends AbstractFieldWriter { list(child.getName()); break; case UNION: - UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), MinorType.UNION, UnionVector.class, null), getNullableMapWriterFactory()); + UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), FieldType.nullable(MinorType.UNION.getType()), UnionVector.class), getNullableMapWriterFactory()); fields.put(handleCase(child.getName()), writer); break; <#list vv.types as type><#list type.minor as minor> @@ -113,7 +113,7 @@ public MapWriter map(String name) { FieldWriter writer = fields.get(finalName); if(writer == null){ int vectorCount=container.size(); - NullableMapVector vector = container.addOrGet(name, MinorType.MAP, NullableMapVector.class, null); + NullableMapVector vector = container.addOrGet(name, FieldType.nullable(MinorType.MAP.getType()), NullableMapVector.class); writer = new PromotableWriter(vector, container, getNullableMapWriterFactory()); if(vectorCount != container.size()) { writer.allocate(); @@ -157,7 +157,7 @@ public ListWriter list(String name) { FieldWriter writer = fields.get(finalName); int vectorCount = container.size(); if(writer == null) { - writer = new PromotableWriter(container.addOrGet(name, MinorType.LIST, ListVector.class, null), container, getNullableMapWriterFactory()); + writer = new PromotableWriter(container.addOrGet(name, FieldType.nullable(MinorType.LIST.getType()), ListVector.class), container, getNullableMapWriterFactory()); if (container.size() > vectorCount) { writer.allocate(); } @@ -222,7 +222,9 @@ public void end() { if(writer == null) { ValueVector vector; ValueVector currentVector = container.getChild(name); - ${vectName}Vector v = container.addOrGet(name, MinorType.${upperName}, ${vectName}Vector.class, null<#if minor.class == "Decimal"> , new int[] {precision, scale}); + ${vectName}Vector v = container.addOrGet(name, + FieldType.nullable(<#if minor.class == "Decimal">new Decimal(precision, scale)<#else>MinorType.${upperName}.getType()), + ${vectName}Vector.class); writer = new PromotableWriter(v, container, getNullableMapWriterFactory()); vector = v; if (currentVector == null || currentVector != vector) { diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index ec2ce7930cf..8e1727ca6c8 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -64,28 +64,21 @@ public final class ${className} extends BaseDataValueVector implements <#if type <#if minor.class == "Decimal"> private final int precision; private final int scale; + - public ${className}(String name, BufferAllocator allocator, DictionaryEncoding dictionary, int precision, int scale) { + public ${className}(String name, FieldType fieldType, BufferAllocator allocator) { super(name, allocator); - values = new ${valuesName}(valuesField, allocator, precision, scale); - this.precision = precision; - this.scale = scale; - mutator = new Mutator(); - accessor = new Accessor(); - field = new Field(name, true, new Decimal(precision, scale), dictionary, null); - innerVectors = Collections.unmodifiableList(Arrays.asList( - bits, - values - )); - } - <#else> - public ${className}(String name, BufferAllocator allocator, DictionaryEncoding dictionary) { - super(name, allocator); - values = new ${valuesName}(valuesField, allocator); - mutator = new Mutator(); - accessor = new Accessor(); - ArrowType type = Types.MinorType.${minor.class?upper_case}.getType(); - field = new Field(name, true, type, dictionary, null); + <#if minor.class == "Decimal"> + Decimal decimal = (Decimal)fieldType.getType(); + this.precision = decimal.getPrecision(); + this.scale = decimal.getScale(); + this.values = new ${valuesName}(valuesField, allocator, precision, scale); + <#else> + this.values = new ${valuesName}(valuesField, allocator); + + this.mutator = new Mutator(); + this.accessor = new Accessor(); + this.field = new Field(name, fieldType, null); innerVectors = Collections.unmodifiableList(Arrays.asList( bits, <#if type.major = "VarLen"> @@ -94,7 +87,6 @@ public final class ${className} extends BaseDataValueVector implements <#if type values )); } - @Override public BitVector getValidityVector() { @@ -341,12 +333,8 @@ public void splitAndTransferTo(int startIndex, int length, ${className} target) private class TransferImpl implements TransferPair { ${className} to; - public TransferImpl(String name, BufferAllocator allocator){ - <#if minor.class == "Decimal"> - to = new ${className}(name, allocator, field.getDictionary(), precision, scale); - <#else> - to = new ${className}(name, allocator, field.getDictionary()); - + public TransferImpl(String ref, BufferAllocator allocator){ + to = new ${className}(ref, field.getFieldType(), allocator); } public TransferImpl(${className} to){ diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index d17935b08ee..797b29342e4 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -119,10 +119,22 @@ public List getFieldInnerVectors() { return this.innerVectors; } + private String fieldName(MinorType type) { + return type.name().toLowerCase(); + } + + private FieldType fieldType(MinorType type) { + return new FieldType(true, type.getType(), null); + } + + private T addOrGet(MinorType minorType, Class c) { + return internalMap.addOrGet(fieldName(minorType), fieldType(minorType), c); + } + public NullableMapVector getMap() { if (mapVector == null) { int vectorCount = internalMap.size(); - mapVector = internalMap.addOrGet("map", MinorType.MAP, NullableMapVector.class, null); + mapVector = addOrGet(MinorType.MAP, NullableMapVector.class); if (internalMap.size() > vectorCount) { mapVector.allocateNew(); if (callBack != null) { @@ -144,7 +156,7 @@ public NullableMapVector getMap() { public Nullable${name}Vector get${name}Vector() { if (${uncappedName}Vector == null) { int vectorCount = internalMap.size(); - ${uncappedName}Vector = internalMap.addOrGet("${lowerCaseName}", MinorType.${name?upper_case}, Nullable${name}Vector.class, null); + ${uncappedName}Vector = addOrGet(MinorType.${name?upper_case}, Nullable${name}Vector.class); if (internalMap.size() > vectorCount) { ${uncappedName}Vector.allocateNew(); if (callBack != null) { @@ -162,7 +174,7 @@ public NullableMapVector getMap() { public ListVector getList() { if (listVector == null) { int vectorCount = internalMap.size(); - listVector = internalMap.addOrGet("list", MinorType.LIST, ListVector.class, null); + listVector = addOrGet(MinorType.LIST, ListVector.class); if (internalMap.size() > vectorCount) { listVector.allocateNew(); if (callBack != null) { @@ -267,7 +279,7 @@ public void copyFromSafe(int inIndex, int outIndex, UnionVector from) { public FieldVector addVector(FieldVector v) { String name = v.getMinorType().name().toLowerCase(); Preconditions.checkState(internalMap.getChild(name) == null, String.format("%s vector already exists", name)); - final FieldVector newVector = internalMap.addOrGet(name, v.getMinorType(), v.getClass(), v.getField().getDictionary()); + final FieldVector newVector = internalMap.addOrGet(name, v.getField().getFieldType(), v.getClass()); v.makeTransferPair(newVector).transfer(); internalMap.putChild(name, newVector); if (callBack != null) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java index 0fdbc48552a..6c2c8302a7b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java @@ -19,10 +19,11 @@ import java.util.List; -import io.netty.buffer.ArrowBuf; import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.types.pojo.Field; +import io.netty.buffer.ArrowBuf; + /** * A vector corresponding to a Field in the schema * It has inner vectors backed by buffers (validity, offsets, data, ...) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java index 7e626fb1430..29b96736001 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java @@ -23,8 +23,6 @@ import java.util.Map; import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; @@ -60,9 +58,7 @@ public VectorSchemaRoot(List fields, List fieldVectors, int public static VectorSchemaRoot create(Schema schema, BufferAllocator allocator) { List fieldVectors = new ArrayList<>(); for (Field field : schema.getFields()) { - MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); - FieldVector vector = minorType.getNewVector(field.getName(), allocator, field.getDictionary(), null); - vector.initializeChildrenFromFields(field.getChildren()); + FieldVector vector = field.createVector(allocator); fieldVectors.add(vector); } if (fieldVectors.size() != schema.getFields().size()) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java index 86a5e821198..71f2bea5b8f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java @@ -22,7 +22,9 @@ import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.ArrowType.List; +import org.apache.arrow.vector.types.pojo.ArrowType.Struct; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; /** @@ -85,12 +87,24 @@ protected boolean supportsDirectRead() { // return the number of child vectors public abstract int size(); - // add a new vector with the input MajorType or return the existing vector if we already added one with the same type - public abstract T addOrGet(String name, MinorType minorType, Class clazz, DictionaryEncoding dictionary, int... precisionScale); + // add a new vector with the input FieldType or return the existing vector if we already added one with the same name + public abstract T addOrGet(String name, FieldType fieldType, Class clazz); // return the child vector with the input name public abstract T getChild(String name, Class clazz); // return the child vector's ordinal in the composite container public abstract VectorWithOrdinal getChildVectorWithOrdinal(String name); + + public NullableMapVector addOrGetMap(String name) { + return addOrGet(name, FieldType.nullable(new Struct()), NullableMapVector.class); + } + + public ListVector addOrGetList(String name) { + return addOrGet(name, FieldType.nullable(new List()), ListVector.class); + } + + public UnionVector addOrGetUnion(String name) { + return addOrGet(name, FieldType.nullable(MinorType.UNION.getType()), UnionVector.class); + } } \ No newline at end of file diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java index baeeb078737..dc833edbed8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java @@ -25,8 +25,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.MapWithOrdinal; @@ -102,8 +101,8 @@ public boolean allocateNewSafe() { * * * - * @param name the name of the field - * @param minorType the minorType for the vector + * @param childName the name of the field + * @param fieldType the type for the vector * @param clazz class of expected vector type * @param class type of expected vector type * @throws java.lang.IllegalStateException raised if there is a hard schema change @@ -111,8 +110,8 @@ public boolean allocateNewSafe() { * @return resultant {@link org.apache.arrow.vector.ValueVector} */ @Override - public T addOrGet(String name, MinorType minorType, Class clazz, DictionaryEncoding dictionary, int... precisionScale) { - final ValueVector existing = getChild(name); + public T addOrGet(String childName, FieldType fieldType, Class clazz) { + final ValueVector existing = getChild(childName); boolean create = false; if (existing == null) { create = true; @@ -123,9 +122,9 @@ public T addOrGet(String name, MinorType minorType, Clas create = true; } if (create) { - final T vector = clazz.cast(minorType.getNewVector(name, allocator, dictionary, callBack, precisionScale)); - putChild(name, vector); - if (callBack!=null) { + final T vector = clazz.cast(fieldType.createNewSingleVector(childName, allocator, callBack)); + putChild(childName, vector); + if (callBack != null) { callBack.doWork(); } return vector; @@ -163,14 +162,14 @@ public T getChild(String name, Class clazz) { return typeify(v, clazz); } - protected ValueVector add(String name, MinorType minorType, DictionaryEncoding dictionary, int... precisionScale) { - final ValueVector existing = getChild(name); + protected ValueVector add(String childName, FieldType fieldType) { + final ValueVector existing = getChild(childName); if (existing != null) { - throw new IllegalStateException(String.format("Vector already exists: Existing[%s], Requested[%s] ", existing.getClass().getSimpleName(), minorType)); + throw new IllegalStateException(String.format("Vector already exists: Existing[%s], Requested[%s] ", existing.getClass().getSimpleName(), fieldType)); } - FieldVector vector = minorType.getNewVector(name, allocator, dictionary, callBack, precisionScale); - putChild(name, vector); - if (callBack!=null) { + FieldVector vector = fieldType.createNewSingleVector(childName, allocator, callBack); + putChild(childName, vector); + if (callBack != null) { callBack.doWork(); } return vector; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index eda1f3bc80a..6b240c04f71 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -27,8 +27,7 @@ import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.SchemaChangeRuntimeException; @@ -154,10 +153,10 @@ public int size() { return vector == DEFAULT_DATA_VECTOR ? 0:1; } - public AddOrGetResult addOrGetVector(MinorType minorType, DictionaryEncoding dictionary) { + public AddOrGetResult addOrGetVector(FieldType fieldType) { boolean created = false; if (vector instanceof ZeroVector) { - vector = minorType.getNewVector(DATA_VECTOR_NAME, allocator, dictionary, callBack); + vector = fieldType.createNewSingleVector(DATA_VECTOR_NAME, allocator, callBack); // returned vector must have the same field created = true; if (callBack != null) { @@ -165,9 +164,9 @@ public AddOrGetResult addOrGetVector(MinorType minorT } } - if (vector.getField().getType().getTypeID() != minorType.getType().getTypeID()) { + if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]", - minorType.getType().getTypeID(), vector.getField().getType().getTypeID()); + fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); throw new SchemaChangeRuntimeException(msg); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 54b051b9781..d138ca339e3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -40,10 +40,10 @@ import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.schema.ArrowFieldNode; -import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; @@ -80,8 +80,7 @@ public void initializeChildrenFromFields(List children) { throw new IllegalArgumentException("Lists have only one child. Found: " + children); } Field field = children.get(0); - MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); - AddOrGetResult addOrGetVector = addOrGetVector(minorType, field.getDictionary()); + AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); if (!addOrGetVector.isCreated()) { throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector()); } @@ -164,11 +163,11 @@ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { public TransferImpl(ListVector to) { this.to = to; - to.addOrGetVector(vector.getMinorType(), vector.getField().getDictionary()); + to.addOrGetVector(vector.getField().getFieldType()); pairs[0] = offsets.makeTransferPair(to.offsets); pairs[1] = bits.makeTransferPair(to.bits); if (to.getDataVector() instanceof ZeroVector) { - to.addOrGetVector(vector.getMinorType(), vector.getField().getDictionary()); + to.addOrGetVector(vector.getField().getFieldType()); } pairs[2] = getDataVector().makeTransferPair(to.getDataVector()); } @@ -241,8 +240,8 @@ public boolean allocateNewSafe() { return success; } - public AddOrGetResult addOrGetVector(MinorType minorType, DictionaryEncoding dictionary) { - AddOrGetResult result = super.addOrGetVector(minorType, dictionary); + public AddOrGetResult addOrGetVector(FieldType fieldType) { + AddOrGetResult result = super.addOrGetVector(fieldType); reader = new UnionListReader(this); return result; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index cb67537c446..997a6a38a08 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -32,7 +32,6 @@ import org.apache.arrow.vector.complex.impl.SingleMapReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.holders.ComplexHolder; -import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.Field; @@ -165,7 +164,7 @@ protected MapTransferPair(MapVector from, MapVector to, boolean allocate) { // (This is similar to what happens in ScanBatch where the children cannot be added till they are // read). To take care of this, we ensure that the hashCode of the MaterializedField does not // include the hashCode of the children but is based only on MaterializedField$key. - final FieldVector newVector = to.addOrGet(child, vector.getMinorType(), vector.getClass(), vector.getField().getDictionary()); + final FieldVector newVector = to.addOrGet(child, vector.getField().getFieldType(), vector.getClass()); if (allocate && to.size() != preSize) { newVector.allocateNew(); } @@ -318,8 +317,7 @@ public void close() { public void initializeChildrenFromFields(List children) { for (Field field : children) { - MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); - FieldVector vector = (FieldVector)this.add(field.getName(), minorType, field.getDictionary()); + FieldVector vector = (FieldVector)this.add(field.getName(), field.getFieldType()); vector.initializeChildrenFromFields(field.getChildren()); } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java index de1d1857370..7fe35e8253a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java @@ -270,4 +270,5 @@ public Accessor getAccessor() { public Mutator getMutator() { return mutator; } + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java index 6d053167848..6851d6d45d5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java @@ -22,7 +22,6 @@ import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.complex.StateTool; import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; -import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Field; import com.google.common.base.Preconditions; @@ -150,7 +149,7 @@ public MapWriter rootAsMap() { case INIT: // TODO allow dictionaries in complex types - NullableMapVector map = container.addOrGet(name, MinorType.MAP, NullableMapVector.class, null); + NullableMapVector map = container.addOrGetMap(name); mapRoot = nullableMapWriterFactory.build(map); mapRoot.setPosition(idx()); mode = Mode.MAP; @@ -182,7 +181,7 @@ public ListWriter rootAsList() { case INIT: int vectorCount = container.size(); // TODO allow dictionaries in complex types - ListVector listVector = container.addOrGet(name, MinorType.LIST, ListVector.class, null); + ListVector listVector = container.addOrGetList(name); if (container.size() > vectorCount) { listVector.allocateNew(); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index 1880c9b490c..d16718e75a7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -27,6 +27,7 @@ import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; /** @@ -125,7 +126,7 @@ protected FieldWriter getWriter(MinorType type) { // ??? return null; } - ValueVector v = listVector.addOrGetVector(type, null).getVector(); + ValueVector v = listVector.addOrGetVector(FieldType.nullable(type.getType())).getVector(); v.allocateNew(); setWriter(v); writer.setPosition(position); @@ -151,7 +152,7 @@ private FieldWriter promoteToUnion() { tp.transfer(); if (parentContainer != null) { // TODO allow dictionaries in complex types - unionVector = parentContainer.addOrGet(name, MinorType.UNION, UnionVector.class, null); + unionVector = parentContainer.addOrGetUnion(name); unionVector.allocateNew(); } else if (listVector != null) { unionVector = listVector.promoteToUnion(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 2f070237101..f07bb585f81 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -92,45 +92,15 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; public class Types { - private static final Field NULL_FIELD = new Field("", true, Null.INSTANCE, null); - private static final Field TINYINT_FIELD = new Field("", true, new Int(8, true), null); - private static final Field SMALLINT_FIELD = new Field("", true, new Int(16, true), null); - private static final Field INT_FIELD = new Field("", true, new Int(32, true), null); - private static final Field BIGINT_FIELD = new Field("", true, new Int(64, true), null); - private static final Field UINT1_FIELD = new Field("", true, new Int(8, false), null); - private static final Field UINT2_FIELD = new Field("", true, new Int(16, false), null); - private static final Field UINT4_FIELD = new Field("", true, new Int(32, false), null); - private static final Field UINT8_FIELD = new Field("", true, new Int(64, false), null); - private static final Field DATE_FIELD = new Field("", true, new Date(DateUnit.MILLISECOND), null); - private static final Field TIME_FIELD = new Field("", true, new Time(TimeUnit.MILLISECOND, 32), null); - private static final Field TIMESTAMPSEC_FIELD = new Field("", true, new Timestamp(TimeUnit.SECOND, "UTC"), null); - private static final Field TIMESTAMPMILLI_FIELD = new Field("", true, new Timestamp(TimeUnit.MILLISECOND, "UTC"), null); - private static final Field TIMESTAMPMICRO_FIELD = new Field("", true, new Timestamp(TimeUnit.MICROSECOND, "UTC"), null); - private static final Field TIMESTAMPNANO_FIELD = new Field("", true, new Timestamp(TimeUnit.NANOSECOND, "UTC"), null); - private static final Field INTERVALDAY_FIELD = new Field("", true, new Interval(IntervalUnit.DAY_TIME), null); - private static final Field INTERVALYEAR_FIELD = new Field("", true, new Interval(IntervalUnit.YEAR_MONTH), null); - private static final Field FLOAT4_FIELD = new Field("", true, new FloatingPoint(FloatingPointPrecision.SINGLE), null); - private static final Field FLOAT8_FIELD = new Field("", true, new FloatingPoint(FloatingPointPrecision.DOUBLE), null); - private static final Field VARCHAR_FIELD = new Field("", true, Utf8.INSTANCE, null); - private static final Field VARBINARY_FIELD = new Field("", true, Binary.INSTANCE, null); - private static final Field BIT_FIELD = new Field("", true, Bool.INSTANCE, null); - - public enum MinorType { NULL(Null.INSTANCE) { @Override - public Field getField() { - return NULL_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { return ZeroVector.INSTANCE; } @@ -141,13 +111,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, MAP(Struct.INSTANCE) { @Override - public Field getField() { - throw new UnsupportedOperationException("Cannot get simple field for Map type"); - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableMapVector(name, allocator, dictionary, callBack); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableMapVector(name, allocator, fieldType.getDictionary(), schemaChangeCallback); } @Override @@ -157,13 +122,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, TINYINT(new Int(8, true)) { @Override - public Field getField() { - return TINYINT_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableTinyIntVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableTinyIntVector(name, fieldType, allocator); } @Override @@ -173,13 +133,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, SMALLINT(new Int(16, true)) { @Override - public Field getField() { - return SMALLINT_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableSmallIntVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableSmallIntVector(name, fieldType, allocator); } @Override @@ -189,13 +144,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, INT(new Int(32, true)) { @Override - public Field getField() { - return INT_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableIntVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableIntVector(name, fieldType, allocator); } @Override @@ -205,13 +155,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, BIGINT(new Int(64, true)) { @Override - public Field getField() { - return BIGINT_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableBigIntVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableBigIntVector(name, fieldType, allocator); } @Override @@ -221,13 +166,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, DATE(new Date(DateUnit.MILLISECOND)) { @Override - public Field getField() { - return DATE_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableDateVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableDateVector(name, fieldType, allocator); } @Override @@ -237,13 +177,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, TIME(new Time(TimeUnit.MILLISECOND, 32)) { @Override - public Field getField() { - return TIME_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableTimeVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableTimeVector(name, fieldType, allocator); } @Override @@ -254,13 +189,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { // time in second from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC. TIMESTAMPSEC(new Timestamp(org.apache.arrow.vector.types.TimeUnit.SECOND, "UTC")) { @Override - public Field getField() { - return TIMESTAMPSEC_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableTimeStampSecVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableTimeStampSecVector(name, fieldType, allocator); } @Override @@ -271,13 +201,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { // time in millis from the Unix epoch, 00:00:00.000 on 1 January 1970, UTC. TIMESTAMPMILLI(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC")) { @Override - public Field getField() { - return TIMESTAMPMILLI_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableTimeStampMilliVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableTimeStampMilliVector(name, fieldType, allocator); } @Override @@ -288,13 +213,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { // time in microsecond from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC. TIMESTAMPMICRO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC")) { @Override - public Field getField() { - return TIMESTAMPMICRO_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableTimeStampMicroVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableTimeStampMicroVector(name, fieldType, allocator); } @Override @@ -305,13 +225,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { // time in nanosecond from the Unix epoch, 00:00:00.000000000 on 1 January 1970, UTC. TIMESTAMPNANO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.NANOSECOND, "UTC")) { @Override - public Field getField() { - return TIMESTAMPNANO_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableTimeStampNanoVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableTimeStampNanoVector(name, fieldType, allocator); } @Override @@ -321,13 +236,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, INTERVALDAY(new Interval(IntervalUnit.DAY_TIME)) { @Override - public Field getField() { - return INTERVALDAY_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableIntervalDayVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableIntervalDayVector(name, fieldType, allocator); } @Override @@ -337,13 +247,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, INTERVALYEAR(new Interval(IntervalUnit.YEAR_MONTH)) { @Override - public Field getField() { - return INTERVALYEAR_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableIntervalDayVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableIntervalYearVector(name, fieldType, allocator); } @Override @@ -354,13 +259,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { // 4 byte ieee 754 FLOAT4(new FloatingPoint(SINGLE)) { @Override - public Field getField() { - return FLOAT4_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableFloat4Vector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableFloat4Vector(name, fieldType, allocator); } @Override @@ -371,13 +271,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { // 8 byte ieee 754 FLOAT8(new FloatingPoint(DOUBLE)) { @Override - public Field getField() { - return FLOAT8_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableFloat8Vector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableFloat8Vector(name, fieldType, allocator); } @Override @@ -387,13 +282,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, BIT(Bool.INSTANCE) { @Override - public Field getField() { - return BIT_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableBitVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableBitVector(name, fieldType, allocator); } @Override @@ -403,13 +293,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, VARCHAR(Utf8.INSTANCE) { @Override - public Field getField() { - return VARCHAR_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableVarCharVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableVarCharVector(name, fieldType, allocator); } @Override @@ -419,13 +304,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, VARBINARY(Binary.INSTANCE) { @Override - public Field getField() { - return VARBINARY_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableVarBinaryVector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableVarBinaryVector(name, fieldType, allocator); } @Override @@ -438,14 +318,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { public ArrowType getType() { throw new UnsupportedOperationException("Cannot get simple type for Decimal type"); } - @Override - public Field getField() { - throw new UnsupportedOperationException("Cannot get simple field for Decimal type"); - } @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableDecimalVector(name, allocator, dictionary, precisionScale[0], precisionScale[1]); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableDecimalVector(name, fieldType, allocator); } @Override @@ -455,13 +331,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, UINT1(new Int(8, false)) { @Override - public Field getField() { - return UINT1_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableUInt1Vector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableUInt1Vector(name, fieldType, allocator); } @Override @@ -471,13 +342,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, UINT2(new Int(16, false)) { @Override - public Field getField() { - return UINT2_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableUInt2Vector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableUInt2Vector(name, fieldType, allocator); } @Override @@ -487,13 +353,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, UINT4(new Int(32, false)) { @Override - public Field getField() { - return UINT4_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableUInt4Vector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableUInt4Vector(name, fieldType, allocator); } @Override @@ -503,13 +364,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, UINT8(new Int(64, false)) { @Override - public Field getField() { - return UINT8_FIELD; - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new NullableUInt8Vector(name, allocator, dictionary); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableUInt8Vector(name, fieldType, allocator); } @Override @@ -519,13 +375,8 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, LIST(List.INSTANCE) { @Override - public Field getField() { - throw new UnsupportedOperationException("Cannot get simple field for List type"); - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - return new ListVector(name, allocator, dictionary, callBack); + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new ListVector(name, allocator, fieldType.getDictionary(), schemaChangeCallback); } @Override @@ -535,16 +386,11 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { }, UNION(new Union(Sparse, null)) { @Override - public Field getField() { - throw new UnsupportedOperationException("Cannot get simple field for Union type"); - } - - @Override - public FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale) { - if (dictionary != null) { + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + if (fieldType.getDictionary() != null) { throw new UnsupportedOperationException("Dictionary encoding not supported for complex types"); } - return new UnionVector(name, allocator, callBack); + return new UnionVector(name, allocator, schemaChangeCallback); } @Override @@ -563,9 +409,7 @@ public ArrowType getType() { return type; } - public abstract Field getField(); - - public abstract FieldVector getNewVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack, int... precisionScale); + public abstract FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback); public abstract FieldWriter getNewFieldWriter(ValueVector vector); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index 011f0e6e446..05eb9cdceac 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -28,11 +28,10 @@ import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.schema.TypeLayout; import org.apache.arrow.vector.schema.VectorLayout; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonInclude.Include; import com.fasterxml.jackson.annotation.JsonProperty; @@ -41,10 +40,17 @@ import com.google.flatbuffers.FlatBufferBuilder; public class Field { + + public static Field nullablePrimitive(String name, ArrowType.PrimitiveType type) { + return nullable(name, type); + } + + public static Field nullable(String name, ArrowType type) { + return new Field(name, true, type, null, null); + } + private final String name; - private final boolean nullable; - private final ArrowType type; - private final DictionaryEncoding dictionary; + private final FieldType fieldType; private final List children; private final TypeLayout typeLayout; @@ -56,29 +62,31 @@ private Field( @JsonProperty("dictionary") DictionaryEncoding dictionary, @JsonProperty("children") List children, @JsonProperty("typeLayout") TypeLayout typeLayout) { + this(name, new FieldType(nullable, type, dictionary), children, typeLayout); + } + + private Field(String name, FieldType fieldType, List children, TypeLayout typeLayout) { + super(); this.name = name; - this.nullable = nullable; - this.type = checkNotNull(type); - this.dictionary = dictionary; - if (children == null) { - this.children = ImmutableList.of(); - } else { - this.children = children; - } + this.fieldType = checkNotNull(fieldType); + this.children = children == null ? ImmutableList.of() : children; this.typeLayout = checkNotNull(typeLayout); } + public Field(String name, FieldType fieldType, List children) { + this(name, fieldType, children, TypeLayout.getTypeLayout(fieldType.getType())); + } + public Field(String name, boolean nullable, ArrowType type, List children) { - this(name, nullable, type, null, children, TypeLayout.getTypeLayout(checkNotNull(type))); + this(name, nullable, type, null, children); } public Field(String name, boolean nullable, ArrowType type, DictionaryEncoding dictionary, List children) { - this(name, nullable, type, dictionary, children, TypeLayout.getTypeLayout(checkNotNull(type))); + this(name, new FieldType(nullable, type, dictionary), children); } public FieldVector createVector(BufferAllocator allocator) { - MinorType minorType = Types.getMinorTypeForArrowType(type); - FieldVector vector = minorType.getNewVector(name, allocator, dictionary, null); + FieldVector vector = fieldType.createNewSingleVector(name, allocator, null); vector.initializeChildrenFromFields(children); return vector; } @@ -110,7 +118,7 @@ public static Field convertField(org.apache.arrow.flatbuf.Field field) { } public void validate() { - TypeLayout expectedLayout = TypeLayout.getTypeLayout(type); + TypeLayout expectedLayout = TypeLayout.getTypeLayout(getType()); if (!expectedLayout.equals(typeLayout)) { throw new IllegalArgumentException("Deserialized field does not match expected vectors. expected: " + expectedLayout + " got " + typeLayout); } @@ -118,8 +126,9 @@ public void validate() { public int getField(FlatBufferBuilder builder) { int nameOffset = name == null ? -1 : builder.createString(name); - int typeOffset = type.getType(builder); + int typeOffset = getType().getType(builder); int dictionaryOffset = -1; + DictionaryEncoding dictionary = getDictionary(); if (dictionary != null) { int dictionaryType = dictionary.getIndexType().getType(builder); org.apache.arrow.flatbuf.DictionaryEncoding.startDictionaryEncoding(builder); @@ -143,8 +152,8 @@ public int getField(FlatBufferBuilder builder) { if (name != null) { org.apache.arrow.flatbuf.Field.addName(builder, nameOffset); } - org.apache.arrow.flatbuf.Field.addNullable(builder, nullable); - org.apache.arrow.flatbuf.Field.addTypeType(builder, type.getTypeID().getFlatbufID()); + org.apache.arrow.flatbuf.Field.addNullable(builder, isNullable()); + org.apache.arrow.flatbuf.Field.addTypeType(builder, getType().getTypeID().getFlatbufID()); org.apache.arrow.flatbuf.Field.addType(builder, typeOffset); org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset); org.apache.arrow.flatbuf.Field.addLayout(builder, layoutOffset); @@ -159,15 +168,22 @@ public String getName() { } public boolean isNullable() { - return nullable; + return fieldType.isNullable(); } public ArrowType getType() { - return type; + return fieldType.getType(); + } + + @JsonIgnore + public FieldType getFieldType() { + return fieldType; } @JsonInclude(Include.NON_NULL) - public DictionaryEncoding getDictionary() { return dictionary; } + public DictionaryEncoding getDictionary() { + return fieldType.getDictionary(); + } public List getChildren() { return children; @@ -179,7 +195,7 @@ public TypeLayout getTypeLayout() { @Override public int hashCode() { - return Objects.hash(name, nullable, type, dictionary, children); + return Objects.hash(name, isNullable(), getType(), getDictionary(), children); } @Override @@ -189,10 +205,10 @@ public boolean equals(Object obj) { } Field that = (Field) obj; return Objects.equals(this.name, that.name) && - Objects.equals(this.nullable, that.nullable) && - Objects.equals(this.type, that.type) && - Objects.equals(this.dictionary, that.dictionary) && - Objects.equals(this.children, that.children); + Objects.equals(this.isNullable(), that.isNullable()) && + Objects.equals(this.getType(), that.getType()) && + Objects.equals(this.getDictionary(), that.getDictionary()) && + Objects.equals(this.children, that.children); } @Override @@ -201,14 +217,14 @@ public String toString() { if (name != null) { sb.append(name).append(": "); } - sb.append(type); - if (dictionary != null) { - sb.append("[dictionary: ").append(dictionary.getId()).append("]"); + sb.append(getType()); + if (getDictionary() != null) { + sb.append("[dictionary: ").append(getDictionary().getId()).append("]"); } if (!children.isEmpty()) { sb.append("<").append(Joiner.on(", ").join(children)).append(">"); } - if (!nullable) { + if (!isNullable()) { sb.append(" not null"); } return sb.toString(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java new file mode 100644 index 00000000000..fe99e631360 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.types.pojo; + +import static com.google.common.base.Preconditions.checkNotNull; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.util.CallBack; + +public class FieldType { + + public static FieldType nullable(ArrowType type) { + return new FieldType(true, type, null); + } + + private final boolean nullable; + private final ArrowType type; + private final DictionaryEncoding dictionary; + + public FieldType(boolean nullable, ArrowType type, DictionaryEncoding dictionary) { + super(); + this.nullable = nullable; + this.type = checkNotNull(type); + this.dictionary = dictionary; + } + + public boolean isNullable() { + return nullable; + } + public ArrowType getType() { + return type; + } + public DictionaryEncoding getDictionary() { + return dictionary; + } + + public FieldVector createNewSingleVector(String name, BufferAllocator allocator, CallBack schemaCallBack) { + MinorType minorType = Types.getMinorTypeForArrowType(type); + return minorType.getNewVector(name, this, allocator, schemaCallBack); + } + +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java index 20f4aa8cf64..ee7530c8d10 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java @@ -17,16 +17,16 @@ */ package org.apache.arrow.vector; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; -import org.apache.arrow.vector.util.DecimalUtility; -import org.junit.Test; +import static org.junit.Assert.assertEquals; import java.math.BigDecimal; import java.math.BigInteger; -import static org.junit.Assert.assertEquals; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.util.DecimalUtility; +import org.junit.Test; public class TestDecimalVector { @@ -44,7 +44,7 @@ public class TestDecimalVector { @Test public void test() { BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - NullableDecimalVector decimalVector = new NullableDecimalVector("decimal", allocator, null, 10, scale); + NullableDecimalVector decimalVector = TestUtils.newVector(NullableDecimalVector.class, "decimal", new ArrowType.Decimal(10, scale), allocator); decimalVector.allocateNew(); BigDecimal[] values = new BigDecimal[intValues.length]; for (int i = 0; i < intValues.length; i++) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java index e3087ef8c95..3bf3b1cedff 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java @@ -17,19 +17,19 @@ */ package org.apache.arrow.vector; +import static org.apache.arrow.vector.TestUtils.newNullableVarCharVector; +import static org.junit.Assert.assertEquals; + +import java.nio.charset.StandardCharsets; + import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.dictionary.DictionaryEncoder; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.junit.After; import org.junit.Before; import org.junit.Test; -import java.nio.charset.StandardCharsets; - -import static org.junit.Assert.assertEquals; - public class TestDictionaryVector { private BufferAllocator allocator; @@ -51,8 +51,8 @@ public void terminate() throws Exception { @Test public void testEncodeStrings() { // Create a new value vector - try (final NullableVarCharVector vector = (NullableVarCharVector) MinorType.VARCHAR.getNewVector("foo", allocator, null, null); - final NullableVarCharVector dictionaryVector = (NullableVarCharVector) MinorType.VARCHAR.getNewVector("dict", allocator, null, null)) { + try (final NullableVarCharVector vector = newNullableVarCharVector("foo", allocator); + final NullableVarCharVector dictionaryVector = newNullableVarCharVector("dict", allocator);) { final NullableVarCharVector.Mutator m = vector.getMutator(); vector.allocateNew(512, 5); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java new file mode 100644 index 00000000000..b79f2da9210 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.FieldType; + +public class TestUtils { + + public static NullableVarCharVector newNullableVarCharVector(String name, BufferAllocator allocator) { + return (NullableVarCharVector)FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector(name, allocator, null); + } + + public static T newVector(Class c, String name, ArrowType type, BufferAllocator allocator) { + return c.cast(FieldType.nullable(type).createNewSingleVector(name, allocator, null)); + } + + public static T newVector(Class c, String name, MinorType type, BufferAllocator allocator) { + return c.cast(FieldType.nullable(type.getType()).createNewSingleVector(name, allocator, null)); + } + +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 6917638d74e..78ca14dc406 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -17,6 +17,8 @@ */ package org.apache.arrow.vector; +import static org.apache.arrow.vector.TestUtils.newNullableVarCharVector; +import static org.apache.arrow.vector.TestUtils.newVector; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -28,6 +30,7 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.schema.TypeLayout; import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.junit.After; import org.junit.Assert; @@ -86,7 +89,7 @@ public void testFixedType() { public void testNullableVarLen2() { // Create a new value vector for 1024 integers. - try (final NullableVarCharVector vector = new NullableVarCharVector(EMPTY_SCHEMA_PATH, allocator, null)) { + try (final NullableVarCharVector vector = newNullableVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { final NullableVarCharVector.Mutator m = vector.getMutator(); vector.allocateNew(1024 * 10, 1024); @@ -116,7 +119,7 @@ public void testNullableVarLen2() { public void testNullableFixedType() { // Create a new value vector for 1024 integers. - try (final NullableUInt4Vector vector = new NullableUInt4Vector(EMPTY_SCHEMA_PATH, allocator, null)) { + try (final NullableUInt4Vector vector = newVector(NullableUInt4Vector.class, EMPTY_SCHEMA_PATH, new ArrowType.Int(32, false), allocator);) { final NullableUInt4Vector.Mutator m = vector.getMutator(); vector.allocateNew(1024); @@ -186,7 +189,7 @@ public void testNullableFixedType() { @Test public void testNullableFloat() { // Create a new value vector for 1024 integers - try (final NullableFloat4Vector vector = (NullableFloat4Vector) MinorType.FLOAT4.getNewVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { + try (final NullableFloat4Vector vector = newVector(NullableFloat4Vector.class, EMPTY_SCHEMA_PATH, MinorType.FLOAT4, allocator);) { final NullableFloat4Vector.Mutator m = vector.getMutator(); vector.allocateNew(1024); @@ -233,7 +236,7 @@ public void testNullableFloat() { @Test public void testNullableInt() { // Create a new value vector for 1024 integers - try (final NullableIntVector vector = (NullableIntVector) MinorType.INT.getNewVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { + try (final NullableIntVector vector = newVector(NullableIntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, allocator)) { final NullableIntVector.Mutator m = vector.getMutator(); vector.allocateNew(1024); @@ -403,7 +406,7 @@ private void validateRange(int length, int start, int count) { @Test public void testReAllocNullableFixedWidthVector() { // Create a new value vector for 1024 integers - try (final NullableFloat4Vector vector = (NullableFloat4Vector) MinorType.FLOAT4.getNewVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { + try (final NullableFloat4Vector vector = newVector(NullableFloat4Vector.class, EMPTY_SCHEMA_PATH, MinorType.FLOAT4, allocator)) { final NullableFloat4Vector.Mutator m = vector.getMutator(); vector.allocateNew(1024); @@ -436,7 +439,7 @@ public void testReAllocNullableFixedWidthVector() { @Test public void testReAllocNullableVariableWidthVector() { // Create a new value vector for 1024 integers - try (final NullableVarCharVector vector = (NullableVarCharVector) MinorType.VARCHAR.getNewVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { + try (final NullableVarCharVector vector = newVector(NullableVarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { final NullableVarCharVector.Mutator m = vector.getMutator(); vector.allocateNew(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index 2b49d8ed4b5..65b193c0aee 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -27,7 +27,6 @@ import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; -import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.Field; import org.junit.After; @@ -53,7 +52,7 @@ public void terminate() throws Exception { public void testPromoteToUnion() throws Exception { try (final MapVector container = new MapVector(EMPTY_SCHEMA_PATH, allocator, null); - final NullableMapVector v = container.addOrGet("test", MinorType.MAP, NullableMapVector.class, null); + final NullableMapVector v = container.addOrGetMap("test"); final PromotableWriter writer = new PromotableWriter(v, container)) { container.allocateNew(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java index 75e5d2d6e5c..a1104ffe545 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java @@ -17,6 +17,8 @@ */ package org.apache.arrow.vector.file; +import static org.apache.arrow.vector.TestUtils.newNullableVarCharVector; + import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; @@ -28,8 +30,6 @@ import java.util.Arrays; import java.util.List; -import com.google.common.collect.ImmutableList; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.NullableTinyIntVector; @@ -40,19 +40,19 @@ import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.dictionary.DictionaryEncoder; import org.apache.arrow.vector.dictionary.DictionaryProvider; import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; import org.apache.arrow.vector.schema.ArrowBuffer; import org.apache.arrow.vector.schema.ArrowMessage; import org.apache.arrow.vector.schema.ArrowRecordBatch; import org.apache.arrow.vector.stream.ArrowStreamReader; import org.apache.arrow.vector.stream.ArrowStreamWriter; import org.apache.arrow.vector.stream.MessageSerializerTest; -import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Text; import org.junit.Assert; @@ -60,6 +60,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.ImmutableList; + public class TestArrowFile extends BaseFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(TestArrowFile.class); @@ -380,8 +382,8 @@ public void testWriteReadDictionary() throws IOException { // write try (BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - NullableVarCharVector vector = new NullableVarCharVector("varchar", originalVectorAllocator, null); - NullableVarCharVector dictionaryVector = new NullableVarCharVector("dict", originalVectorAllocator, null)) { + NullableVarCharVector vector = newNullableVarCharVector("varchar", originalVectorAllocator); + NullableVarCharVector dictionaryVector = newNullableVarCharVector("dict", originalVectorAllocator)) { vector.allocateNewSafe(); NullableVarCharVector.Mutator mutator = vector.getMutator(); mutator.set(0, "foo".getBytes(StandardCharsets.UTF_8)); @@ -483,7 +485,7 @@ public void testWriteReadNestedDictionary() throws IOException { // [['foo', 'bar'], ['foo'], ['bar']] -> [[0, 1], [0], [1]] // write - try (NullableVarCharVector dictionaryVector = new NullableVarCharVector("dictionary", allocator, null); + try (NullableVarCharVector dictionaryVector = newNullableVarCharVector("dictionary", allocator); ListVector listVector = new ListVector("list", allocator, null, null)) { Dictionary dictionary = new Dictionary(dictionaryVector, encoding); @@ -495,7 +497,7 @@ public void testWriteReadNestedDictionary() throws IOException { dictionaryVector.getMutator().set(1, "bar".getBytes(StandardCharsets.UTF_8)); dictionaryVector.getMutator().setValueCount(2); - listVector.addOrGetVector(MinorType.INT, encoding); + listVector.addOrGetVector(new FieldType(true, new Int(32, true), encoding)); listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); listWriter.startList(); @@ -511,7 +513,7 @@ public void testWriteReadNestedDictionary() throws IOException { listWriter.setValueCount(3); List fields = ImmutableList.of(listVector.getField()); - List vectors = ImmutableList.of((FieldVector) listVector); + List vectors = ImmutableList.of(listVector); VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 3); try ( diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java index 914dfe4319d..d00cb0f8c00 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java @@ -24,11 +24,8 @@ import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.channels.Channels; import java.util.Collections; import java.util.List; @@ -38,13 +35,10 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullableIntVector; -import org.apache.arrow.vector.NullableTinyIntVector; +import org.apache.arrow.vector.TestUtils; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.schema.ArrowRecordBatch; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; @@ -78,8 +72,8 @@ byte[] array(ArrowBuf buf) { @Test public void test() throws IOException { Schema schema = new Schema(asList(new Field("testField", true, new ArrowType.Int(8, true), Collections.emptyList()))); - MinorType minorType = Types.getMinorTypeForArrowType(schema.getFields().get(0).getType()); - FieldVector vector = minorType.getNewVector("testField", allocator, null,null); + ArrowType type = schema.getFields().get(0).getType(); + FieldVector vector = TestUtils.newVector(FieldVector.class, "testField", type, allocator); vector.initializeChildrenFromFields(schema.getFields().get(0).getChildren()); byte[] validity = new byte[] { (byte) 255, 0};