From 756dc8a0e71257d0761940e6ba2db693257dbe64 Mon Sep 17 00:00:00 2001 From: Emilio Lahr-Vivaz Date: Tue, 4 Apr 2017 10:36:02 -0400 Subject: [PATCH 1/5] ARROW-725: [Formats/Java] FixedSizeList message and java implementation --- format/Schema.fbs | 8 +- .../src/main/codegen/data/ArrowTypes.tdd | 5 + .../templates/AbstractFieldWriter.java | 6 + .../AbstractPromotableFieldWriter.java | 56 ++- .../main/codegen/templates/BaseWriter.java | 1 + .../main/codegen/templates/ComplexCopier.java | 1 + .../main/codegen/templates/MapWriters.java | 6 +- .../codegen/templates/UnionListWriter.java | 54 ++- .../vector/complex/FixedSizeListVector.java | 393 ++++++++++++++++++ .../arrow/vector/complex/ListVector.java | 14 +- .../vector/complex/NullableMapVector.java | 8 +- .../arrow/vector/complex/Positionable.java | 1 + .../vector/complex/PromotableVector.java | 32 ++ .../complex/impl/AbstractBaseReader.java | 5 + .../complex/impl/AbstractBaseWriter.java | 5 + .../vector/complex/impl/PromotableWriter.java | 52 ++- .../impl/UnionFixedSizeListReader.java | 103 +++++ .../arrow/vector/schema/TypeLayout.java | 8 + .../org/apache/arrow/vector/types/Types.java | 23 + .../arrow/vector/types/pojo/FieldType.java | 6 + .../vector/util/JsonStringArrayList.java | 8 + .../arrow/vector/TestFixedSizeListVector.java | 304 ++++++++++++++ .../arrow/vector/file/TestArrowFile.java | 69 ++- 23 files changed, 1105 insertions(+), 63 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java diff --git a/format/Schema.fbs b/format/Schema.fbs index badc7ea8bef..ff6119931dd 100644 --- a/format/Schema.fbs +++ b/format/Schema.fbs @@ -39,6 +39,11 @@ table Struct_ { table List { } +table FixedSizeList { + /// Number of list items per value + listSize: int; +} + enum UnionMode:short { Sparse, Dense } /// A union is a complex type with children in Field @@ -159,7 +164,8 @@ union Type { List, Struct_, Union, - FixedSizeBinary + FixedSizeBinary, + FixedSizeList } /// ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index e1fb5e0619a..ce92c1333a5 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -27,6 +27,11 @@ fields: [], complex: true }, + { + name: "FixedSizeList", + fields: [{name: "listSize", type: int}], + complex: true + }, { name: "Union", fields: [{name: "mode", type: short, valueType: UnionMode}, {name: "typeIds", type: "int[]"}], diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java index de076fc46ff..c18c8bf111f 100644 --- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -97,6 +97,12 @@ public ListWriter list() { return null; } + @Override + public ListWriter list(int size) { + fail("FixedSizeList"); + return null; + } + @Override public MapWriter map(String name) { fail("Map"); diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index 60dd0c7b7ad..3d1660fd223 100644 --- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -42,7 +42,7 @@ abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter { * @param type * @return */ - abstract protected FieldWriter getWriter(MinorType type); + abstract protected FieldWriter getWriter(ArrowType type); /** * Return the current FieldWriter @@ -52,46 +52,63 @@ abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter { @Override public void start() { - getWriter(MinorType.MAP).start(); + getWriter(MinorType.MAP.getType()).start(); } @Override public void end() { - getWriter(MinorType.MAP).end(); + getWriter(MinorType.MAP.getType()).end(); setPosition(idx() + 1); } @Override public void startList() { - getWriter(MinorType.LIST).startList(); + getListWriter().startList(); } @Override public void endList() { - getWriter(MinorType.LIST).endList(); + getListWriter().endList(); setPosition(idx() + 1); } + /** + * Gets or creates a list writer, backed by either a FixedSizeListVector or a variable length ListVector. + * If the writer doesn't exist, a variable length writer will be created. + * + * This allows us to re-use `startList()`, `endList()` etc methods for fixed and variable size lists + * + * @return list writer + */ + protected ListWriter getListWriter() { + FieldWriter writer = getWriter(); + if (writer != null && writer instanceof UnionListWriter) { + return writer; + } else { + return getWriter(MinorType.LIST.getType()); + } + } + <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#if !minor.class?starts_with("Decimal") > @Override public void write(${name}Holder holder) { - getWriter(MinorType.${name?upper_case}).write(holder); + getWriter(MinorType.${name?upper_case}.getType()).write(holder); } public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, ); + getWriter(MinorType.${name?upper_case}.getType()).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, ); } <#else> @Override public void write(DecimalHolder holder) { - getWriter(MinorType.DECIMAL).write(holder); + getWriter(new Decimal(holder.precision, holder.scale)).write(holder); } - public void writeDecimal(int start, ArrowBuf buffer) { - getWriter(MinorType.DECIMAL).writeDecimal(start, buffer); + public void writeDecimal(int precision, int scale, int start, ArrowBuf buffer) { + getWriter(new Decimal(precision, scale)).writeDecimal(start, buffer); } @@ -103,22 +120,27 @@ public void writeNull() { @Override public MapWriter map() { - return getWriter(MinorType.LIST).map(); + return getListWriter().map(); } @Override public ListWriter list() { - return getWriter(MinorType.LIST).list(); + return getListWriter().list(); + } + + @Override + public ListWriter list(int size) { + return getWriter(new FixedSizeList(size)).list(); } @Override public MapWriter map(String name) { - return getWriter(MinorType.MAP).map(name); + return getWriter(MinorType.MAP.getType()).map(name); } @Override public ListWriter list(String name) { - return getWriter(MinorType.MAP).list(name); + return getWriter(MinorType.MAP.getType()).list(name); } <#list vv.types as type><#list type.minor as minor> @@ -128,17 +150,17 @@ public ListWriter list(String name) { <#assign capName = minor.class?cap_first /> <#if minor.class?starts_with("Decimal") > public ${capName}Writer ${lowerName}(String name, int scale, int precision) { - return getWriter(MinorType.MAP).${lowerName}(name, scale, precision); + return getWriter(MinorType.MAP.getType()).${lowerName}(name, scale, precision); } @Override public ${capName}Writer ${lowerName}(String name) { - return getWriter(MinorType.MAP).${lowerName}(name); + return getWriter(MinorType.MAP.getType()).${lowerName}(name); } @Override public ${capName}Writer ${lowerName}() { - return getWriter(MinorType.LIST).${lowerName}(); + return getListWriter().${lowerName}(); } diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java index 08bd39eae23..46aecb18075 100644 --- a/java/vector/src/main/codegen/templates/BaseWriter.java +++ b/java/vector/src/main/codegen/templates/BaseWriter.java @@ -71,6 +71,7 @@ public interface ListWriter extends BaseWriter { void endList(); MapWriter map(); ListWriter list(); + ListWriter list(int size); void copyReader(FieldReader reader); <#list vv.types as type><#list type.minor as minor> diff --git a/java/vector/src/main/codegen/templates/ComplexCopier.java b/java/vector/src/main/codegen/templates/ComplexCopier.java index 0dffe5e30be..738527173e1 100644 --- a/java/vector/src/main/codegen/templates/ComplexCopier.java +++ b/java/vector/src/main/codegen/templates/ComplexCopier.java @@ -47,6 +47,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { switch (mt) { case LIST: + case FIXED_SIZE_LIST: if (reader.isSet()) { writer.startList(); while (reader.next()) { diff --git a/java/vector/src/main/codegen/templates/MapWriters.java b/java/vector/src/main/codegen/templates/MapWriters.java index d3e6de95271..4f254e7a58a 100644 --- a/java/vector/src/main/codegen/templates/MapWriters.java +++ b/java/vector/src/main/codegen/templates/MapWriters.java @@ -123,7 +123,7 @@ public MapWriter map(String name) { } else { if (writer instanceof PromotableWriter) { // ensure writers are initialized - ((PromotableWriter)writer).getWriter(MinorType.MAP); + ((PromotableWriter)writer).getWriter(MinorType.MAP.getType()); } } return writer; @@ -166,7 +166,7 @@ public ListWriter list(String name) { } else { if (writer instanceof PromotableWriter) { // ensure writers are initialized - ((PromotableWriter)writer).getWriter(MinorType.LIST); + ((PromotableWriter)writer).getWriter(MinorType.LIST.getType()); } } return writer; @@ -235,7 +235,7 @@ public void end() { } else { if (writer instanceof PromotableWriter) { // ensure writers are initialized - ((PromotableWriter)writer).getWriter(MinorType.${upperName}); + ((PromotableWriter)writer).getWriter(MinorType.${upperName}.getType()); } } return writer; diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index d980830923b..85e5eb9bbf3 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -32,11 +32,22 @@ * This class is generated using freemarker and the ${.template_name} template. */ +/** + * Writer for ListVector and fixedSizeListVector. If multiple types are written, list will be promoted + * to a union type. + */ @SuppressWarnings("unused") public class UnionListWriter extends AbstractFieldWriter { - private ListVector vector; - private UInt4Vector offsets; + // only one of listVector/fixedSizeListVector will be set + private final FixedSizeListVector fixedSizeListVector; + private final ListVector listVector; + // will only be set if listVector is set + private final UInt4Vector offsets; + + // for convenience, is whichever list vector is set + private final FieldVector vector; + private PromotableWriter writer; private boolean inMap = false; private String mapName; @@ -48,12 +59,22 @@ public UnionListWriter(ListVector vector) { public UnionListWriter(ListVector vector, NullableMapWriterFactory nullableMapWriterFactory) { this.vector = vector; - this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableMapWriterFactory); + this.listVector = vector; + this.fixedSizeListVector = null; this.offsets = vector.getOffsetVector(); + this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableMapWriterFactory); } - public UnionListWriter(ListVector vector, AbstractFieldWriter parent) { - this(vector); + public UnionListWriter(FixedSizeListVector vector) { + this(vector, NullableMapWriterFactory.getNullableMapWriterFactoryInstance()); + } + + public UnionListWriter(FixedSizeListVector vector, NullableMapWriterFactory nullableMapWriterFactory) { + this.vector = vector; + this.fixedSizeListVector = vector; + this.listVector = null; + this.offsets = null; + this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableMapWriterFactory); } @Override @@ -123,10 +144,16 @@ public ListWriter list() { return writer; } + @Override + public ListWriter list(int size) { + // prime the writer with the fixed size list + writer.getWriter(new FixedSizeList(size)); + return writer; + } + @Override public ListWriter list(String name) { - ListWriter listWriter = writer.list(name); - return listWriter; + return writer.list(name); } @Override @@ -137,13 +164,20 @@ public MapWriter map(String name) { @Override public void startList() { - vector.getMutator().startNewValue(idx()); - writer.setPosition(offsets.getAccessor().get(idx() + 1)); + if (listVector != null) { + listVector.getMutator().startNewValue(idx()); + writer.setPosition(offsets.getAccessor().get(idx() + 1)); + } else if (fixedSizeListVector != null) { + fixedSizeListVector.getMutator().setNotNull(idx()); + writer.setPosition(idx() * fixedSizeListVector.getListSize()); + } } @Override public void endList() { - offsets.getMutator().set(idx() + 1, writer.idx()); + if (listVector != null) { + offsets.getMutator().set(idx() + 1, writer.idx()); + } setPosition(idx() + 1); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java new file mode 100644 index 00000000000..61416ebbde4 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -0,0 +1,393 @@ +/******************************************************************************* + + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.apache.arrow.vector.complex; + +import static java.util.Collections.singletonList; +import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ObjectArrays; + +import io.netty.buffer.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BaseDataValueVector; +import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.complex.impl.ComplexCopier; +import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader; +import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.schema.ArrowFieldNode; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.JsonStringArrayList; +import org.apache.arrow.vector.util.SchemaChangeRuntimeException; +import org.apache.arrow.vector.util.TransferPair; + +public class FixedSizeListVector extends BaseValueVector implements FieldVector, PromotableVector { + + private FieldVector vector; + private final BitVector bits; + private final int listSize; + private final DictionaryEncoding dictionary; + private final List innerVectors; + + private UnionFixedSizeListReader reader; + + private Mutator mutator = new Mutator(); + private Accessor accessor = new Accessor(); + + public FixedSizeListVector(String name, + BufferAllocator allocator, + int listSize, + DictionaryEncoding dictionary, + CallBack schemaChangeCallback) { + super(name, allocator); + Preconditions.checkArgument(listSize > 0, "list size must be positive"); + this.bits = new BitVector("$bits$", allocator); + this.vector = ZeroVector.INSTANCE; + this.listSize = listSize; + this.dictionary = dictionary; + this.innerVectors = Collections.singletonList((BufferBacked) bits); + this.reader = new UnionFixedSizeListReader(this); + } + + @Override + public Field getField() { + List children = ImmutableList.of(getDataVector().getField()); + return new Field(name, true, new ArrowType.FixedSizeList(listSize), children); + } + + @Override + public MinorType getMinorType() { + return MinorType.FIXED_SIZE_LIST; + } + + public int getListSize() { + return listSize; + } + + @Override + public void initializeChildrenFromFields(List children) { + if (children.size() != 1) { + throw new IllegalArgumentException("Lists have only one child. Found: " + children); + } + Field field = children.get(0); + FieldType type = new FieldType(field.isNullable(), field.getType(), field.getDictionary()); + AddOrGetResult addOrGetVector = addOrGetVector(type); + if (!addOrGetVector.isCreated()) { + throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector()); + } + addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); + } + + @Override + public List getChildrenFromFields() { + return singletonList(vector); + } + + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + BaseDataValueVector.load(fieldNode, innerVectors, ownBuffers); + } + + @Override + public List getFieldBuffers() { + return BaseDataValueVector.unload(innerVectors); + } + + @Override + public List getFieldInnerVectors() { + return innerVectors; + } + + public UnionListWriter getWriter() { + return new UnionListWriter(this); + } + + @Override + public Accessor getAccessor() { + return accessor; + } + + @Override + public Mutator getMutator() { + return mutator; + } + + @Override + public UnionFixedSizeListReader getReader() { + return reader; + } + + @Override + public void allocateNew() throws OutOfMemoryException { + allocateNewSafe(); + } + + @Override + public boolean allocateNewSafe() { + /* boolean to keep track if all the memory allocation were successful + * Used in the case of composite vectors when we need to allocate multiple + * buffers for multiple vectors. If one of the allocations failed we need to + * clear all the memory that we allocated + */ + boolean success = false; + try { + success = bits.allocateNewSafe() && vector.allocateNewSafe(); + } finally { + if (!success) { + clear(); + } + } + if (success) { + bits.zeroVector(); + } + return success; + } + + public FieldVector getDataVector() { + return vector; + } + + @Override + public void setInitialCapacity(int numRecords) { + bits.setInitialCapacity(numRecords); + vector.setInitialCapacity(numRecords * listSize); + } + + @Override + public int getValueCapacity() { + if (vector == ZeroVector.INSTANCE) { + return 0; + } + return vector.getValueCapacity() / listSize; + } + + @Override + public int getBufferSize() { + if (accessor.getValueCount() == 0) { + return 0; + } + return bits.getBufferSize() + vector.getBufferSize(); + } + + @Override + public int getBufferSizeFor(int valueCount) { + if (valueCount == 0) { + return 0; + } + return bits.getBufferSizeFor(valueCount) + vector.getBufferSizeFor(valueCount * listSize); + } + + @Override + public Iterator iterator() { + return Collections.singleton(vector).iterator(); + } + + @Override + public void clear() { + bits.clear(); + vector.clear(); + super.clear(); + } + + @Override + public ArrowBuf[] getBuffers(boolean clear) { + final ArrowBuf[] buffers = ObjectArrays.concat(bits.getBuffers(false), vector.getBuffers(false), ArrowBuf.class); + if (clear) { + for (ArrowBuf buffer: buffers) { + buffer.retain(); + } + clear(); + } + return buffers; + } + + /** + * Returns 1 if inner vector is explicitly set via #addOrGetVector else 0 + */ + public int size() { + return vector == ZeroVector.INSTANCE ? 0 : 1; + } + + @Override + @SuppressWarnings("unchecked") + public AddOrGetResult addOrGetVector(FieldType type) { + boolean created = false; + if (vector instanceof ZeroVector) { + vector = type.createNewSingleVector(DATA_VECTOR_NAME, allocator, null); + this.reader = new UnionFixedSizeListReader(this); + created = true; + } + // returned vector must have the same field + if (!Objects.equals(vector.getField().getType(), type.getType())) { + final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]", + type.getType(), vector.getField().getType()); + throw new SchemaChangeRuntimeException(msg); + } + + return new AddOrGetResult<>((T) vector, created); + } + + public void copyFromSafe(int inIndex, int outIndex, FixedSizeListVector from) { + copyFrom(inIndex, outIndex, from); + } + + public void copyFrom(int inIndex, int outIndex, FixedSizeListVector from) { + FieldReader in = from.getReader(); + in.setPosition(inIndex); + FieldWriter out = getWriter(); + out.setPosition(outIndex); + ComplexCopier.copy(in, out); + } + + @Override + public UnionVector promoteToUnion() { + UnionVector vector = new UnionVector(name, allocator, null); + this.vector.clear(); + this.vector = vector; + this.reader = new UnionFixedSizeListReader(this); + return vector; + } + + public class Accessor extends BaseValueVector.BaseAccessor { + + @Override + public Object getObject(int index) { + if (isNull(index)) { + return null; + } + final List vals = new JsonStringArrayList<>(listSize); + final ValueVector.Accessor valuesAccessor = vector.getAccessor(); + for(int i = 0; i < listSize; i++) { + vals.add(valuesAccessor.getObject(index * listSize + i)); + } + return vals; + } + + @Override + public boolean isNull(int index) { + return bits.getAccessor().get(index) == 0; + } + + @Override + public int getNullCount() { + return bits.getAccessor().getNullCount(); + } + + @Override + public int getValueCount() { + return bits.getAccessor().getValueCount(); + } + } + + public class Mutator extends BaseValueVector.BaseMutator { + + public void setNull(int index) { + bits.getMutator().setSafe(index, 0); + } + + public void setNotNull(int index) { + bits.getMutator().setSafe(index, 1); + } + + @Override + public void setValueCount(int valueCount) { + bits.getMutator().setValueCount(valueCount); + vector.getMutator().setValueCount(valueCount * listSize); + } + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return getTransferPair(ref, allocator, null); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + return new TransferImpl(ref, allocator, callBack); + } + + @Override + public TransferPair makeTransferPair(ValueVector target) { + return new TransferImpl((FixedSizeListVector) target); + } + + private class TransferImpl implements TransferPair { + + FixedSizeListVector to; + TransferPair pairs[] = new TransferPair[2]; + + public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { + this(new FixedSizeListVector(name, allocator, listSize, dictionary, callBack)); + } + + public TransferImpl(FixedSizeListVector to) { + this.to = to; + Field field = vector.getField(); + FieldType type = new FieldType(field.isNullable(), field.getType(), field.getDictionary()); + to.addOrGetVector(type); + pairs[0] = bits.makeTransferPair(to.bits); + pairs[1] = getDataVector().makeTransferPair(to.getDataVector()); + } + + @Override + public void transfer() { + for (TransferPair pair : pairs) { + pair.transfer(); + } + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + to.allocateNew(); + for (int i = 0; i < length; i++) { + copyValueSafe(startIndex + i, i); + } + } + + @Override + public ValueVector getTo() { + return to; + } + + @Override + public void copyValueSafe(int from, int to) { + this.to.copyFrom(from, to, FixedSizeListVector.this); + } + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 63235dfda87..677039748dc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -24,6 +24,10 @@ import java.util.Collections; import java.util.List; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ObjectArrays; + +import io.netty.buffer.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.AddOrGetResult; @@ -48,12 +52,7 @@ import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ObjectArrays; - -import io.netty.buffer.ArrowBuf; - -public class ListVector extends BaseRepeatedValueVector implements FieldVector { +public class ListVector extends BaseRepeatedValueVector implements FieldVector, PromotableVector { final UInt4Vector offsets; final BitVector bits; @@ -220,7 +219,7 @@ public Mutator getMutator() { } @Override - public FieldReader getReader() { + public UnionListReader getReader() { return reader; } @@ -297,6 +296,7 @@ public ArrowBuf[] getBuffers(boolean clear) { return buffers; } + @Override public UnionVector promoteToUnion() { UnionVector vector = new UnionVector(name, allocator, callBack); replaceDataVector(vector); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java index 647ab28352f..6456efba0dc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java @@ -31,6 +31,7 @@ import org.apache.arrow.vector.NullableVectorDefinitionSetter; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.impl.NullableMapReaderImpl; +import org.apache.arrow.vector.complex.impl.NullableMapWriter; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.holders.ComplexHolder; import org.apache.arrow.vector.schema.ArrowFieldNode; @@ -45,6 +46,7 @@ public class NullableMapVector extends MapVector implements FieldVector { private final NullableMapReaderImpl reader = new NullableMapReaderImpl(this); + private final NullableMapWriter writer = new NullableMapWriter(this); protected final BitVector bits; @@ -84,10 +86,14 @@ public List getFieldInnerVectors() { } @Override - public FieldReader getReader() { + public NullableMapReaderImpl getReader() { return reader; } + public NullableMapWriter getWriter() { + return writer; + } + @Override public TransferPair getTransferPair(BufferAllocator allocator) { return new NullableMapTransferPair(this, new NullableMapVector(name, allocator, dictionary, null), false); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java index 93451181ca9..e1a4f362969 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java @@ -18,5 +18,6 @@ package org.apache.arrow.vector.complex; public interface Positionable { + public int getPosition(); public void setPosition(int index); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java new file mode 100644 index 00000000000..8b528b4ccab --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java @@ -0,0 +1,32 @@ +/******************************************************************************* + + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.apache.arrow.vector.complex; + +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.FieldType; + +public interface PromotableVector { + + AddOrGetResult addOrGetVector(FieldType type); + + UnionVector promoteToUnion(); +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java index e7c3c8c7e4b..7c73c27ecff 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java @@ -35,6 +35,11 @@ public AbstractBaseReader() { super(); } + @Override + public int getPosition() { + return index; + } + public void setPosition(int index){ this.index = index; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java index e6cf098f16f..13a0a6bd9e2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java @@ -34,6 +34,11 @@ int idx() { return index; } + @Override + public int getPosition() { + return index; + } + @Override public void setPosition(int index) { this.index = index; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index d16718e75a7..f46cd58b682 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -17,15 +17,20 @@ */ package org.apache.arrow.vector.complex.impl; +import java.util.Objects; + import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.complex.AbstractMapVector; +import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NullableMapVector; +import org.apache.arrow.vector.complex.PromotableVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; @@ -39,7 +44,7 @@ public class PromotableWriter extends AbstractPromotableFieldWriter { private final AbstractMapVector parentContainer; - private final ListVector listVector; + private final PromotableVector promotableVector; private final NullableMapWriterFactory nullableMapWriterFactory; private int position; @@ -47,7 +52,7 @@ private enum State { UNTYPED, SINGLE, UNION } - private MinorType type; + private ArrowType type; private ValueVector vector; private UnionVector unionVector; private State state; @@ -59,17 +64,17 @@ public PromotableWriter(ValueVector v, AbstractMapVector parentContainer) { public PromotableWriter(ValueVector v, AbstractMapVector parentContainer, NullableMapWriterFactory nullableMapWriterFactory) { this.parentContainer = parentContainer; - this.listVector = null; + this.promotableVector = null; this.nullableMapWriterFactory = nullableMapWriterFactory; init(v); } - public PromotableWriter(ValueVector v, ListVector listVector) { + public PromotableWriter(ValueVector v, PromotableVector listVector) { this(v, listVector, NullableMapWriterFactory.getNullableMapWriterFactoryInstance()); } - public PromotableWriter(ValueVector v, ListVector listVector, NullableMapWriterFactory nullableMapWriterFactory) { - this.listVector = listVector; + public PromotableWriter(ValueVector v, PromotableVector listVector, NullableMapWriterFactory nullableMapWriterFactory) { + this.promotableVector = listVector; this.parentContainer = null; this.nullableMapWriterFactory = nullableMapWriterFactory; init(v); @@ -90,19 +95,22 @@ private void init(ValueVector v) { private void setWriter(ValueVector v) { state = State.SINGLE; vector = v; - type = v.getMinorType(); - switch (type) { - case MAP: + type = v.getField().getFieldType().getType(); + switch (type.getTypeID()) { + case Struct: writer = nullableMapWriterFactory.build((NullableMapVector) vector); break; - case LIST: + case List: writer = new UnionListWriter((ListVector) vector, nullableMapWriterFactory); break; - case UNION: + case FixedSizeList: + writer = new UnionListWriter((FixedSizeListVector) vector, nullableMapWriterFactory); + break; + case Union: writer = new UnionWriter((UnionVector) vector, nullableMapWriterFactory); break; default: - writer = type.getNewFieldWriter(vector); + writer = FieldType.nullable(type).createNewFieldWriter(vector); break; } } @@ -118,21 +126,22 @@ public void setPosition(int index) { } } - protected FieldWriter getWriter(MinorType type) { + @Override + protected FieldWriter getWriter(ArrowType type) { if (state == State.UNION) { - ((UnionWriter)writer).getWriter(type); + ((UnionWriter) writer).getWriter(Types.getMinorTypeForArrowType(type)); } else if (state == State.UNTYPED) { if (type == null) { // ??? return null; } - ValueVector v = listVector.addOrGetVector(FieldType.nullable(type.getType())).getVector(); + ValueVector v = promotableVector.addOrGetVector(FieldType.nullable(type)).getVector(); v.allocateNew(); setWriter(v); writer.setPosition(position); - } else if (type != this.type) { + } else if (!Objects.equals(type, this.type)) { promoteToUnion(); - ((UnionWriter)writer).getWriter(type); + ((UnionWriter) writer).getWriter(Types.getMinorTypeForArrowType(type)); } return writer; } @@ -146,7 +155,7 @@ protected FieldWriter getWriter() { return writer; } - private FieldWriter promoteToUnion() { + private void promoteToUnion() { String name = vector.getField().getName(); TransferPair tp = vector.getTransferPair(vector.getMinorType().name().toLowerCase(), vector.getAllocator()); tp.transfer(); @@ -154,8 +163,8 @@ private FieldWriter promoteToUnion() { // TODO allow dictionaries in complex types unionVector = parentContainer.addOrGetUnion(name); unionVector.allocateNew(); - } else if (listVector != null) { - unionVector = listVector.promoteToUnion(); + } else if (promotableVector != null) { + unionVector = promotableVector.promoteToUnion(); } unionVector.addVector((FieldVector)tp.getTo()); writer = new UnionWriter(unionVector, nullableMapWriterFactory); @@ -165,7 +174,6 @@ private FieldWriter promoteToUnion() { } vector = null; state = State.UNION; - return writer; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java new file mode 100644 index 00000000000..515d4ab8ce9 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java @@ -0,0 +1,103 @@ +/******************************************************************************* + + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.holders.UnionHolder; +import org.apache.arrow.vector.types.Types.MinorType; + +/** + * Reader for fixed size list vectors + */ +public class UnionFixedSizeListReader extends AbstractFieldReader { + + private final FixedSizeListVector vector; + private final ValueVector data; + private final int listSize; + + private int currentOffset; + + public UnionFixedSizeListReader(FixedSizeListVector vector) { + this.vector = vector; + this.data = vector.getDataVector(); + this.listSize = vector.getListSize(); + } + + @Override + public boolean isSet() { + return !vector.getAccessor().isNull(idx()); + } + + @Override + public FieldReader reader() { + return data.getReader(); + } + + @Override + public Object readObject() { + return vector.getAccessor().getObject(idx()); + } + + @Override + public MinorType getMinorType() { + return vector.getMinorType(); + } + + @Override + public void setPosition(int index) { + super.setPosition(index); + data.getReader().setPosition(index * listSize); + currentOffset = 0; + } + + @Override + public void read(int index, UnionHolder holder) { + setPosition(idx()); + for (int i = -1; i < index; i++) { + if (!next()) { + throw new IndexOutOfBoundsException("Requested " + index + ", size " + listSize); + } + } + holder.reader = data.getReader(); + holder.isSet = vector.getAccessor().isNull(idx()) ? 0 : 1; + } + + @Override + public int size() { + return listSize; + } + + @Override + public boolean next() { + if (currentOffset < listSize) { + data.getReader().setPosition(idx() * listSize + currentOffset++); + return true; + } else { + return false; + } + } + + public void copyAsValue(ListWriter writer) { + ComplexCopier.copy(this, (FieldWriter) writer); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java index 69d550fc9f7..24840ec988a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java @@ -35,6 +35,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Bool; import org.apache.arrow.vector.types.pojo.ArrowType.Date; import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; +import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.ArrowType.Interval; @@ -105,6 +106,13 @@ public static TypeLayout getTypeLayout(final ArrowType arrowType) { return new TypeLayout(vectors); } + @Override public TypeLayout visit(FixedSizeList type) { + List vectors = asList( + validityVector() + ); + return new TypeLayout(vectors); + } + @Override public TypeLayout visit(FloatingPoint type) { int bitWidth; switch (type.getPrecision()) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index b0455fa14e4..77ed1f3d6bc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -51,6 +51,7 @@ import org.apache.arrow.vector.NullableVarCharVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.complex.UnionVector; @@ -90,6 +91,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Bool; import org.apache.arrow.vector.types.pojo.ArrowType.Date; import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; +import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.ArrowType.Interval; @@ -436,6 +438,23 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new UnionListWriter((ListVector) vector); } }, + FIXED_SIZE_LIST(null) { + @Override + public ArrowType getType() { + throw new UnsupportedOperationException("Cannot get simple type for FixedSizeList type"); + } + + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + int size = ((FixedSizeList)fieldType.getType()).getListSize(); + return new FixedSizeListVector(name, allocator, size, fieldType.getDictionary(), schemaChangeCallback); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new UnionListWriter((FixedSizeListVector) vector); + } + }, UNION(new Union(Sparse, null)) { @Override public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { @@ -480,6 +499,10 @@ public static MinorType getMinorTypeForArrowType(ArrowType arrowType) { return MinorType.LIST; } + @Override public MinorType visit(FixedSizeList type) { + return MinorType.FIXED_SIZE_LIST; + } + @Override public MinorType visit(Union type) { return MinorType.UNION; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java index fe99e631360..befb49d0554 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java @@ -21,6 +21,8 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.util.CallBack; @@ -57,4 +59,8 @@ public FieldVector createNewSingleVector(String name, BufferAllocator allocator, return minorType.getNewVector(name, this, allocator, schemaCallBack); } + public FieldWriter createNewFieldWriter(ValueVector vector) { + MinorType minorType = Types.getMinorTypeForArrowType(type); + return minorType.getNewFieldWriter(vector); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java index 6291bfeaee6..c598069c2c3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java @@ -31,6 +31,14 @@ public class JsonStringArrayList extends ArrayList { mapper = new ObjectMapper(); } + public JsonStringArrayList() { + super(); + } + + public JsonStringArrayList(int size) { + super(size); + } + @Override public boolean equals(Object obj) { if (this == obj) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java new file mode 100644 index 00000000000..3aa4cd040d6 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java @@ -0,0 +1,304 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Lists; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader; +import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; +import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; +import org.apache.arrow.vector.util.JsonStringHashMap; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestFixedSizeListVector { + + private BufferAllocator allocator; + + @Before + public void init() { + allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); + } + + @After + public void terminate() throws Exception { + allocator.close(); + } + + @Test + public void testIntType() { + try (FixedSizeListVector vector = new FixedSizeListVector("list", allocator, 2, null, null)) { + vector.allocateNew(); + UnionListWriter writer = new UnionListWriter(vector); + for (int i = 0; i < 10; i++) { + writer.setPosition(i); + writer.startList(); + writer.writeInt(i); + writer.writeInt(i + 10); + writer.endList(); + } + writer.setValueCount(10); + + UnionFixedSizeListReader reader = vector.getReader(); + for (int i = 0; i < 10; i++) { + reader.setPosition(i); + Assert.assertTrue(reader.isSet()); + Assert.assertTrue(reader.next()); + Assert.assertEquals(i, reader.reader().readInteger().intValue()); + Assert.assertTrue(reader.next()); + Assert.assertEquals(i + 10, reader.reader().readInteger().intValue()); + Assert.assertFalse(reader.next()); + Assert.assertEquals(Lists.newArrayList(i, i + 10), reader.readObject()); + } + } + } + + @Test + public void testFloatTypeNullable() { + try (FixedSizeListVector vector = new FixedSizeListVector("list", allocator, 2, null, null)) { + vector.allocateNew(); + UnionListWriter writer = new UnionListWriter(vector); + for (int i = 0; i < 10; i++) { + if (i % 2 == 0) { + writer.setPosition(i); + writer.startList(); + writer.writeFloat4(i + 0.1f); + writer.writeFloat4(i + 10.1f); + writer.endList(); + } + } + writer.setValueCount(10); + + UnionFixedSizeListReader reader = vector.getReader(); + for (int i = 0; i < 10; i++) { + reader.setPosition(i); + if (i % 2 == 0) { + Assert.assertTrue(reader.isSet()); + Assert.assertTrue(reader.next()); + Assert.assertEquals(i + 0.1f, reader.reader().readFloat(), 0.00001); + Assert.assertTrue(reader.next()); + Assert.assertEquals(i + 10.1f, reader.reader().readFloat(), 0.00001); + Assert.assertFalse(reader.next()); + Assert.assertEquals(Lists.newArrayList(i + 0.1f, i + 10.1f), reader.readObject()); + } else { + Assert.assertFalse(reader.isSet()); + Assert.assertNull(reader.readObject()); + } + } + } + } + + @Test + @SuppressWarnings("unchecked") + public void testMapType() { + try (FixedSizeListVector vector = new FixedSizeListVector("list", allocator, 2, null, null)) { + vector.allocateNew(); + UnionListWriter writer = new UnionListWriter(vector); + MapWriter mapWriter = writer.map(); + for (int i = 0; i < 10; i++) { + if (i % 2 == 0) { + writer.setPosition(i); + writer.startList(); + for (int j = 0; j < 2; j++) { + mapWriter.start(); + mapWriter.integer("int").writeInt(i * 10 + j); + mapWriter.bigInt("bigint").writeBigInt(i * 10 + j); + mapWriter.end(); + } + writer.endList(); + } + } + writer.setValueCount(10); + + UnionFixedSizeListReader reader = vector.getReader(); + for (int i = 0; i < 10; i++) { + reader.setPosition(i); + if (i % 2 == 0) { + Assert.assertTrue(reader.isSet()); + Assert.assertTrue(reader.next()); + Assert.assertEquals(i * 10, reader.reader().reader("int").readInteger().intValue()); + Assert.assertEquals(i * 10, reader.reader().reader("bigint").readLong().intValue()); + Assert.assertTrue(reader.next()); + Assert.assertEquals(i * 10 + 1, reader.reader().reader("int").readInteger().intValue()); + Assert.assertEquals(i * 10 + 1, reader.reader().reader("bigint").readLong().intValue()); + Assert.assertFalse(reader.next()); + Map expected0 = new JsonStringHashMap<>(); + expected0.put("int", i * 10); + expected0.put("bigint", i * 10L); + Map expected1 = new JsonStringHashMap<>(); + expected1.put("int", i * 10 + 1); + expected1.put("bigint", i * 10L + 1); + Assert.assertEquals(Lists.newArrayList(expected0, expected1), reader.readObject()); + } else { + Assert.assertFalse(reader.isSet()); + Assert.assertNull(reader.readObject()); + } + } + } + } + + @Test + public void testListType() { + try (FixedSizeListVector vector = new FixedSizeListVector("list", allocator, 2, null, null)) { + vector.allocateNew(); + UnionListWriter writer = new UnionListWriter(vector); + ListWriter listWriter = writer.list(); + for (int i = 0; i < 10; i++) { + if (i % 2 == 0) { + writer.setPosition(i); + writer.startList(); + for (int j = 0; j < 2; j++) { + listWriter.startList(); + for (int k = 0; k < i % 7; k++) { + listWriter.integer().writeInt(k + j); + } + listWriter.endList(); + } + writer.endList(); + } + } + writer.setValueCount(10); + + UnionFixedSizeListReader reader = vector.getReader(); + for (int i = 0; i < 10; i++) { + reader.setPosition(i); + if (i % 2 == 0) { + Assert.assertTrue(reader.isSet()); + Assert.assertTrue(reader.next()); + FieldReader listReader = reader.reader(); + List expected0 = new ArrayList<>(); + for (int k = 0; k < i % 7; k++) { + listReader.next(); + Assert.assertEquals(k, listReader.reader().readInteger().intValue()); + expected0.add(k); + } + Assert.assertTrue(reader.next()); + List expected1 = new ArrayList<>(); + for (int k = 0; k < i % 7; k++) { + listReader.next(); + Assert.assertEquals(k + 1, listReader.reader().readInteger().intValue()); + expected1.add(k + 1); + } + Assert.assertFalse(reader.next()); + Assert.assertEquals(Lists.newArrayList(expected0, expected1), reader.readObject()); + } else { + Assert.assertFalse(reader.isSet()); + Assert.assertNull(reader.readObject()); + } + } + } + } + + @Test + public void testNestedInList() { + try (ListVector vector = new ListVector("list", allocator, null, null)) { + vector.allocateNew(); + UnionListWriter writer = vector.getWriter(); + ListWriter listWriter = writer.list(2); + + for (int i = 0; i < 10; i++) { + if (i % 2 == 0) { + writer.setPosition(i); + writer.startList(); + for (int j = 0; j < i % 7; j++) { + listWriter.startList(); + for (int k = 0; k < 2; k++) { + listWriter.integer().writeInt(k + j); + } + listWriter.endList(); + } + writer.endList(); + } + } + writer.setValueCount(10); + + UnionListReader reader = vector.getReader(); + for (int i = 0; i < 10; i++) { + reader.setPosition(i); + if (i % 2 == 0) { + for (int j = 0; j < i % 7; j++) { + Assert.assertTrue(reader.next()); + FieldReader innerListReader = reader.reader(); + for (int k = 0; k < 2; k++) { + Assert.assertTrue(innerListReader.next()); + Assert.assertEquals(k + j, innerListReader.reader().readInteger().intValue()); + } + Assert.assertFalse(innerListReader.next()); + } + Assert.assertFalse(reader.next()); + } else { + Assert.assertFalse(reader.isSet()); + Assert.assertNull(reader.readObject()); + } + } + } + } + + @Test + public void testCopyFrom() throws Exception { + try (FixedSizeListVector inVector = new FixedSizeListVector("input", allocator, 2, null, null); + FixedSizeListVector outVector = new FixedSizeListVector("output", allocator, 2, null, null)) { + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + // populate input vector with the following records + // [1, 2] + // null + // [] + writer.setPosition(0); // optional + writer.startList(); + writer.bigInt().writeBigInt(1); + writer.bigInt().writeBigInt(2); + writer.endList(); + + writer.setPosition(2); + writer.startList(); + writer.endList(); + + writer.setValueCount(3); + + // copy values from input to output + outVector.allocateNew(); + for (int i = 0; i < 3; i++) { + outVector.copyFrom(i, i, inVector); + } + outVector.getMutator().setValueCount(3); + + // assert the output vector is correct + FieldReader reader = outVector.getReader(); + reader.setPosition(0); + Assert.assertTrue(reader.isSet()); + reader.setPosition(1); + Assert.assertFalse(reader.isSet()); + reader.setPosition(2); + Assert.assertTrue(reader.isSet()); + } + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java index 11730afd554..cd2d7aa9fb0 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java @@ -30,11 +30,16 @@ import java.util.Arrays; import java.util.List; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.NullableIntVector; import org.apache.arrow.vector.NullableTinyIntVector; import org.apache.arrow.vector.NullableVarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.NullableMapVector; @@ -49,6 +54,7 @@ import org.apache.arrow.vector.stream.ArrowStreamReader; import org.apache.arrow.vector.stream.ArrowStreamWriter; import org.apache.arrow.vector.stream.MessageSerializerTest; +import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.Field; @@ -60,8 +66,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.ImmutableList; - public class TestArrowFile extends BaseFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(TestArrowFile.class); @@ -576,6 +580,67 @@ private void validateNestedDictionary(ListVector vector, DictionaryProvider prov Assert.assertEquals(new Text("bar"), dictionaryAccessor.getObject(1)); } + @Test + public void testWriteReadFixedSizeList() throws IOException { + File file = new File("target/mytest_fixed_list.arrow"); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + int count = COUNT; + + // write + try (BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + NullableMapVector parent = new NullableMapVector("parent", originalVectorAllocator, null, null)) { + FixedSizeListVector tuples = parent.addOrGet("float-pairs", new FieldType(true, new FixedSizeList(2), null), FixedSizeListVector.class); + NullableIntVector ints = parent.addOrGet("ints", new FieldType(true, new Int(32, true), null), NullableIntVector.class); + tuples.allocateNew(); + ints.allocateNew(); + UnionListWriter writer = tuples.getWriter(); + for (int i = 0; i < 10; i++) { + writer.setPosition(i); + writer.startList(); + writer.writeFloat4(i + 0.1f); + writer.writeFloat4(i + 10.1f); + writer.endList(); + ints.getMutator().set(i, i); + } + + parent.getMutator().setValueCount(10); + write(parent, file, stream); + } + + // read + try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); + FileInputStream fileInputStream = new FileInputStream(file); + ArrowFileReader arrowReader = new ArrowFileReader(fileInputStream.getChannel(), readerAllocator)) { + VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); + Schema schema = root.getSchema(); + LOGGER.debug("reading schema: " + schema); + + for (ArrowBlock rbBlock : arrowReader.getRecordBlocks()) { + arrowReader.loadRecordBatch(rbBlock); + Assert.assertEquals(count, root.getRowCount()); + for (int i = 0; i < 10; i++) { + Assert.assertEquals(Lists.newArrayList(i + 0.1f, i + 10.1f), root.getVector("float-pairs").getAccessor().getObject(i)); + Assert.assertEquals(i, root.getVector("ints").getAccessor().getObject(i)); + } + } + } + + // read from stream + try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); + ByteArrayInputStream input = new ByteArrayInputStream(stream.toByteArray()); + ArrowStreamReader arrowReader = new ArrowStreamReader(input, readerAllocator)) { + VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); + Schema schema = root.getSchema(); + LOGGER.debug("reading schema: " + schema); + arrowReader.loadNextBatch(); + Assert.assertEquals(count, root.getRowCount()); + for (int i = 0; i < 10; i++) { + Assert.assertEquals(Lists.newArrayList(i + 0.1f, i + 10.1f), root.getVector("float-pairs").getAccessor().getObject(i)); + Assert.assertEquals(i, root.getVector("ints").getAccessor().getObject(i)); + } + } + } + /** * Writes the contents of parents to file. If outStream is non-null, also writes it * to outStream in the streaming serialized format. From 7cb232468e7d010f082f202fa32b79b37de68553 Mon Sep 17 00:00:00 2001 From: Emilio Lahr-Vivaz Date: Tue, 11 Apr 2017 11:56:22 -0400 Subject: [PATCH 2/5] reverting writer changes, adding examples of writing fixed size list using vector mutators --- .../templates/AbstractFieldWriter.java | 6 - .../AbstractPromotableFieldWriter.java | 56 ++--- .../main/codegen/templates/BaseWriter.java | 1 - .../main/codegen/templates/ComplexCopier.java | 3 +- .../main/codegen/templates/MapWriters.java | 6 +- .../codegen/templates/UnionListWriter.java | 54 +---- .../vector/complex/FixedSizeListVector.java | 14 +- .../vector/complex/impl/PromotableWriter.java | 52 ++--- .../org/apache/arrow/vector/types/Types.java | 2 +- .../arrow/vector/types/pojo/FieldType.java | 6 - .../arrow/vector/TestFixedSizeListVector.java | 203 +++--------------- .../arrow/vector/file/TestArrowFile.java | 16 +- 12 files changed, 92 insertions(+), 327 deletions(-) diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java index c18c8bf111f..de076fc46ff 100644 --- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -97,12 +97,6 @@ public ListWriter list() { return null; } - @Override - public ListWriter list(int size) { - fail("FixedSizeList"); - return null; - } - @Override public MapWriter map(String name) { fail("Map"); diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index 3d1660fd223..60dd0c7b7ad 100644 --- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -42,7 +42,7 @@ abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter { * @param type * @return */ - abstract protected FieldWriter getWriter(ArrowType type); + abstract protected FieldWriter getWriter(MinorType type); /** * Return the current FieldWriter @@ -52,63 +52,46 @@ abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter { @Override public void start() { - getWriter(MinorType.MAP.getType()).start(); + getWriter(MinorType.MAP).start(); } @Override public void end() { - getWriter(MinorType.MAP.getType()).end(); + getWriter(MinorType.MAP).end(); setPosition(idx() + 1); } @Override public void startList() { - getListWriter().startList(); + getWriter(MinorType.LIST).startList(); } @Override public void endList() { - getListWriter().endList(); + getWriter(MinorType.LIST).endList(); setPosition(idx() + 1); } - /** - * Gets or creates a list writer, backed by either a FixedSizeListVector or a variable length ListVector. - * If the writer doesn't exist, a variable length writer will be created. - * - * This allows us to re-use `startList()`, `endList()` etc methods for fixed and variable size lists - * - * @return list writer - */ - protected ListWriter getListWriter() { - FieldWriter writer = getWriter(); - if (writer != null && writer instanceof UnionListWriter) { - return writer; - } else { - return getWriter(MinorType.LIST.getType()); - } - } - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#if !minor.class?starts_with("Decimal") > @Override public void write(${name}Holder holder) { - getWriter(MinorType.${name?upper_case}.getType()).write(holder); + getWriter(MinorType.${name?upper_case}).write(holder); } public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - getWriter(MinorType.${name?upper_case}.getType()).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, ); + getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, ); } <#else> @Override public void write(DecimalHolder holder) { - getWriter(new Decimal(holder.precision, holder.scale)).write(holder); + getWriter(MinorType.DECIMAL).write(holder); } - public void writeDecimal(int precision, int scale, int start, ArrowBuf buffer) { - getWriter(new Decimal(precision, scale)).writeDecimal(start, buffer); + public void writeDecimal(int start, ArrowBuf buffer) { + getWriter(MinorType.DECIMAL).writeDecimal(start, buffer); } @@ -120,27 +103,22 @@ public void writeNull() { @Override public MapWriter map() { - return getListWriter().map(); + return getWriter(MinorType.LIST).map(); } @Override public ListWriter list() { - return getListWriter().list(); - } - - @Override - public ListWriter list(int size) { - return getWriter(new FixedSizeList(size)).list(); + return getWriter(MinorType.LIST).list(); } @Override public MapWriter map(String name) { - return getWriter(MinorType.MAP.getType()).map(name); + return getWriter(MinorType.MAP).map(name); } @Override public ListWriter list(String name) { - return getWriter(MinorType.MAP.getType()).list(name); + return getWriter(MinorType.MAP).list(name); } <#list vv.types as type><#list type.minor as minor> @@ -150,17 +128,17 @@ public ListWriter list(String name) { <#assign capName = minor.class?cap_first /> <#if minor.class?starts_with("Decimal") > public ${capName}Writer ${lowerName}(String name, int scale, int precision) { - return getWriter(MinorType.MAP.getType()).${lowerName}(name, scale, precision); + return getWriter(MinorType.MAP).${lowerName}(name, scale, precision); } @Override public ${capName}Writer ${lowerName}(String name) { - return getWriter(MinorType.MAP.getType()).${lowerName}(name); + return getWriter(MinorType.MAP).${lowerName}(name); } @Override public ${capName}Writer ${lowerName}() { - return getListWriter().${lowerName}(); + return getWriter(MinorType.LIST).${lowerName}(); } diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java index 46aecb18075..08bd39eae23 100644 --- a/java/vector/src/main/codegen/templates/BaseWriter.java +++ b/java/vector/src/main/codegen/templates/BaseWriter.java @@ -71,7 +71,6 @@ public interface ListWriter extends BaseWriter { void endList(); MapWriter map(); ListWriter list(); - ListWriter list(int size); void copyReader(FieldReader reader); <#list vv.types as type><#list type.minor as minor> diff --git a/java/vector/src/main/codegen/templates/ComplexCopier.java b/java/vector/src/main/codegen/templates/ComplexCopier.java index 738527173e1..89368ce6e0b 100644 --- a/java/vector/src/main/codegen/templates/ComplexCopier.java +++ b/java/vector/src/main/codegen/templates/ComplexCopier.java @@ -47,7 +47,6 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { switch (mt) { case LIST: - case FIXED_SIZE_LIST: if (reader.isSet()) { writer.startList(); while (reader.next()) { @@ -56,6 +55,8 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { writer.endList(); } break; + case FIXED_SIZE_LIST: + throw new UnsupportedOperationException("Copy fixed size list"); case MAP: if (reader.isSet()) { writer.start(); diff --git a/java/vector/src/main/codegen/templates/MapWriters.java b/java/vector/src/main/codegen/templates/MapWriters.java index 4f254e7a58a..d3e6de95271 100644 --- a/java/vector/src/main/codegen/templates/MapWriters.java +++ b/java/vector/src/main/codegen/templates/MapWriters.java @@ -123,7 +123,7 @@ public MapWriter map(String name) { } else { if (writer instanceof PromotableWriter) { // ensure writers are initialized - ((PromotableWriter)writer).getWriter(MinorType.MAP.getType()); + ((PromotableWriter)writer).getWriter(MinorType.MAP); } } return writer; @@ -166,7 +166,7 @@ public ListWriter list(String name) { } else { if (writer instanceof PromotableWriter) { // ensure writers are initialized - ((PromotableWriter)writer).getWriter(MinorType.LIST.getType()); + ((PromotableWriter)writer).getWriter(MinorType.LIST); } } return writer; @@ -235,7 +235,7 @@ public void end() { } else { if (writer instanceof PromotableWriter) { // ensure writers are initialized - ((PromotableWriter)writer).getWriter(MinorType.${upperName}.getType()); + ((PromotableWriter)writer).getWriter(MinorType.${upperName}); } } return writer; diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 85e5eb9bbf3..d980830923b 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -32,22 +32,11 @@ * This class is generated using freemarker and the ${.template_name} template. */ -/** - * Writer for ListVector and fixedSizeListVector. If multiple types are written, list will be promoted - * to a union type. - */ @SuppressWarnings("unused") public class UnionListWriter extends AbstractFieldWriter { - // only one of listVector/fixedSizeListVector will be set - private final FixedSizeListVector fixedSizeListVector; - private final ListVector listVector; - // will only be set if listVector is set - private final UInt4Vector offsets; - - // for convenience, is whichever list vector is set - private final FieldVector vector; - + private ListVector vector; + private UInt4Vector offsets; private PromotableWriter writer; private boolean inMap = false; private String mapName; @@ -59,22 +48,12 @@ public UnionListWriter(ListVector vector) { public UnionListWriter(ListVector vector, NullableMapWriterFactory nullableMapWriterFactory) { this.vector = vector; - this.listVector = vector; - this.fixedSizeListVector = null; - this.offsets = vector.getOffsetVector(); this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableMapWriterFactory); + this.offsets = vector.getOffsetVector(); } - public UnionListWriter(FixedSizeListVector vector) { - this(vector, NullableMapWriterFactory.getNullableMapWriterFactoryInstance()); - } - - public UnionListWriter(FixedSizeListVector vector, NullableMapWriterFactory nullableMapWriterFactory) { - this.vector = vector; - this.fixedSizeListVector = vector; - this.listVector = null; - this.offsets = null; - this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableMapWriterFactory); + public UnionListWriter(ListVector vector, AbstractFieldWriter parent) { + this(vector); } @Override @@ -144,16 +123,10 @@ public ListWriter list() { return writer; } - @Override - public ListWriter list(int size) { - // prime the writer with the fixed size list - writer.getWriter(new FixedSizeList(size)); - return writer; - } - @Override public ListWriter list(String name) { - return writer.list(name); + ListWriter listWriter = writer.list(name); + return listWriter; } @Override @@ -164,20 +137,13 @@ public MapWriter map(String name) { @Override public void startList() { - if (listVector != null) { - listVector.getMutator().startNewValue(idx()); - writer.setPosition(offsets.getAccessor().get(idx() + 1)); - } else if (fixedSizeListVector != null) { - fixedSizeListVector.getMutator().setNotNull(idx()); - writer.setPosition(idx() * fixedSizeListVector.getListSize()); - } + vector.getMutator().startNewValue(idx()); + writer.setPosition(offsets.getAccessor().get(idx() + 1)); } @Override public void endList() { - if (listVector != null) { - offsets.getMutator().set(idx() + 1, writer.idx()); - } + offsets.getMutator().set(idx() + 1, writer.idx()); setPosition(idx() + 1); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 61416ebbde4..7f68e2efc37 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -41,11 +41,7 @@ import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.complex.impl.ComplexCopier; import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -134,10 +130,6 @@ public List getFieldInnerVectors() { return innerVectors; } - public UnionListWriter getWriter() { - return new UnionListWriter(this); - } - @Override public Accessor getAccessor() { return accessor; @@ -268,11 +260,7 @@ public void copyFromSafe(int inIndex, int outIndex, FixedSizeListVector from) { } public void copyFrom(int inIndex, int outIndex, FixedSizeListVector from) { - FieldReader in = from.getReader(); - in.setPosition(inIndex); - FieldWriter out = getWriter(); - out.setPosition(outIndex); - ComplexCopier.copy(in, out); + throw new UnsupportedOperationException("FixedSizeListVector.copyFrom"); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index f46cd58b682..d16718e75a7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -17,20 +17,15 @@ */ package org.apache.arrow.vector.complex.impl; -import java.util.Objects; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.complex.AbstractMapVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NullableMapVector; -import org.apache.arrow.vector.complex.PromotableVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; @@ -44,7 +39,7 @@ public class PromotableWriter extends AbstractPromotableFieldWriter { private final AbstractMapVector parentContainer; - private final PromotableVector promotableVector; + private final ListVector listVector; private final NullableMapWriterFactory nullableMapWriterFactory; private int position; @@ -52,7 +47,7 @@ private enum State { UNTYPED, SINGLE, UNION } - private ArrowType type; + private MinorType type; private ValueVector vector; private UnionVector unionVector; private State state; @@ -64,17 +59,17 @@ public PromotableWriter(ValueVector v, AbstractMapVector parentContainer) { public PromotableWriter(ValueVector v, AbstractMapVector parentContainer, NullableMapWriterFactory nullableMapWriterFactory) { this.parentContainer = parentContainer; - this.promotableVector = null; + this.listVector = null; this.nullableMapWriterFactory = nullableMapWriterFactory; init(v); } - public PromotableWriter(ValueVector v, PromotableVector listVector) { + public PromotableWriter(ValueVector v, ListVector listVector) { this(v, listVector, NullableMapWriterFactory.getNullableMapWriterFactoryInstance()); } - public PromotableWriter(ValueVector v, PromotableVector listVector, NullableMapWriterFactory nullableMapWriterFactory) { - this.promotableVector = listVector; + public PromotableWriter(ValueVector v, ListVector listVector, NullableMapWriterFactory nullableMapWriterFactory) { + this.listVector = listVector; this.parentContainer = null; this.nullableMapWriterFactory = nullableMapWriterFactory; init(v); @@ -95,22 +90,19 @@ private void init(ValueVector v) { private void setWriter(ValueVector v) { state = State.SINGLE; vector = v; - type = v.getField().getFieldType().getType(); - switch (type.getTypeID()) { - case Struct: + type = v.getMinorType(); + switch (type) { + case MAP: writer = nullableMapWriterFactory.build((NullableMapVector) vector); break; - case List: + case LIST: writer = new UnionListWriter((ListVector) vector, nullableMapWriterFactory); break; - case FixedSizeList: - writer = new UnionListWriter((FixedSizeListVector) vector, nullableMapWriterFactory); - break; - case Union: + case UNION: writer = new UnionWriter((UnionVector) vector, nullableMapWriterFactory); break; default: - writer = FieldType.nullable(type).createNewFieldWriter(vector); + writer = type.getNewFieldWriter(vector); break; } } @@ -126,22 +118,21 @@ public void setPosition(int index) { } } - @Override - protected FieldWriter getWriter(ArrowType type) { + protected FieldWriter getWriter(MinorType type) { if (state == State.UNION) { - ((UnionWriter) writer).getWriter(Types.getMinorTypeForArrowType(type)); + ((UnionWriter)writer).getWriter(type); } else if (state == State.UNTYPED) { if (type == null) { // ??? return null; } - ValueVector v = promotableVector.addOrGetVector(FieldType.nullable(type)).getVector(); + ValueVector v = listVector.addOrGetVector(FieldType.nullable(type.getType())).getVector(); v.allocateNew(); setWriter(v); writer.setPosition(position); - } else if (!Objects.equals(type, this.type)) { + } else if (type != this.type) { promoteToUnion(); - ((UnionWriter) writer).getWriter(Types.getMinorTypeForArrowType(type)); + ((UnionWriter)writer).getWriter(type); } return writer; } @@ -155,7 +146,7 @@ protected FieldWriter getWriter() { return writer; } - private void promoteToUnion() { + private FieldWriter promoteToUnion() { String name = vector.getField().getName(); TransferPair tp = vector.getTransferPair(vector.getMinorType().name().toLowerCase(), vector.getAllocator()); tp.transfer(); @@ -163,8 +154,8 @@ private void promoteToUnion() { // TODO allow dictionaries in complex types unionVector = parentContainer.addOrGetUnion(name); unionVector.allocateNew(); - } else if (promotableVector != null) { - unionVector = promotableVector.promoteToUnion(); + } else if (listVector != null) { + unionVector = listVector.promoteToUnion(); } unionVector.addVector((FieldVector)tp.getTo()); writer = new UnionWriter(unionVector, nullableMapWriterFactory); @@ -174,6 +165,7 @@ private void promoteToUnion() { } vector = null; state = State.UNION; + return writer; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 77ed1f3d6bc..6023f1c9500 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -452,7 +452,7 @@ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocato @Override public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UnionListWriter((FixedSizeListVector) vector); + throw new UnsupportedOperationException("FieldWriter not implemented for FixedSizeList type"); } }, UNION(new Union(Sparse, null)) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java index befb49d0554..fe99e631360 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java @@ -21,8 +21,6 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.util.CallBack; @@ -59,8 +57,4 @@ public FieldVector createNewSingleVector(String name, BufferAllocator allocator, return minorType.getNewVector(name, this, allocator, schemaCallBack); } - public FieldWriter createNewFieldWriter(ValueVector vector) { - MinorType minorType = Types.getMinorTypeForArrowType(type); - return minorType.getNewFieldWriter(vector); - } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java index 3aa4cd040d6..d2b0dab7bcd 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java @@ -17,10 +17,6 @@ */ package org.apache.arrow.vector; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - import com.google.common.collect.Lists; import org.apache.arrow.memory.BufferAllocator; @@ -28,11 +24,10 @@ import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader; import org.apache.arrow.vector.complex.impl.UnionListReader; -import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; -import org.apache.arrow.vector.util.JsonStringHashMap; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.FieldType; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -55,16 +50,16 @@ public void terminate() throws Exception { @Test public void testIntType() { try (FixedSizeListVector vector = new FixedSizeListVector("list", allocator, 2, null, null)) { + NullableIntVector nested = (NullableIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())).getVector(); + NullableIntVector.Mutator mutator = nested.getMutator(); vector.allocateNew(); - UnionListWriter writer = new UnionListWriter(vector); + for (int i = 0; i < 10; i++) { - writer.setPosition(i); - writer.startList(); - writer.writeInt(i); - writer.writeInt(i + 10); - writer.endList(); + vector.getMutator().setNotNull(i); + mutator.set(i * 2, i); + mutator.set(i * 2 + 1, i + 10); } - writer.setValueCount(10); + vector.getMutator().setValueCount(10); UnionFixedSizeListReader reader = vector.getReader(); for (int i = 0; i < 10; i++) { @@ -83,18 +78,18 @@ public void testIntType() { @Test public void testFloatTypeNullable() { try (FixedSizeListVector vector = new FixedSizeListVector("list", allocator, 2, null, null)) { + NullableFloat4Vector nested = (NullableFloat4Vector) vector.addOrGetVector(FieldType.nullable(MinorType.FLOAT4.getType())).getVector(); + NullableFloat4Vector.Mutator mutator = nested.getMutator(); vector.allocateNew(); - UnionListWriter writer = new UnionListWriter(vector); + for (int i = 0; i < 10; i++) { if (i % 2 == 0) { - writer.setPosition(i); - writer.startList(); - writer.writeFloat4(i + 0.1f); - writer.writeFloat4(i + 10.1f); - writer.endList(); + vector.getMutator().setNotNull(i); + mutator.set(i * 2, i + 0.1f); + mutator.set(i * 2 + 1, i + 10.1f); } } - writer.setValueCount(10); + vector.getMutator().setValueCount(10); UnionFixedSizeListReader reader = vector.getReader(); for (int i = 0; i < 10; i++) { @@ -115,129 +110,28 @@ public void testFloatTypeNullable() { } } - @Test - @SuppressWarnings("unchecked") - public void testMapType() { - try (FixedSizeListVector vector = new FixedSizeListVector("list", allocator, 2, null, null)) { - vector.allocateNew(); - UnionListWriter writer = new UnionListWriter(vector); - MapWriter mapWriter = writer.map(); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - writer.setPosition(i); - writer.startList(); - for (int j = 0; j < 2; j++) { - mapWriter.start(); - mapWriter.integer("int").writeInt(i * 10 + j); - mapWriter.bigInt("bigint").writeBigInt(i * 10 + j); - mapWriter.end(); - } - writer.endList(); - } - } - writer.setValueCount(10); - - UnionFixedSizeListReader reader = vector.getReader(); - for (int i = 0; i < 10; i++) { - reader.setPosition(i); - if (i % 2 == 0) { - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); - Assert.assertEquals(i * 10, reader.reader().reader("int").readInteger().intValue()); - Assert.assertEquals(i * 10, reader.reader().reader("bigint").readLong().intValue()); - Assert.assertTrue(reader.next()); - Assert.assertEquals(i * 10 + 1, reader.reader().reader("int").readInteger().intValue()); - Assert.assertEquals(i * 10 + 1, reader.reader().reader("bigint").readLong().intValue()); - Assert.assertFalse(reader.next()); - Map expected0 = new JsonStringHashMap<>(); - expected0.put("int", i * 10); - expected0.put("bigint", i * 10L); - Map expected1 = new JsonStringHashMap<>(); - expected1.put("int", i * 10 + 1); - expected1.put("bigint", i * 10L + 1); - Assert.assertEquals(Lists.newArrayList(expected0, expected1), reader.readObject()); - } else { - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); - } - } - } - } - - @Test - public void testListType() { - try (FixedSizeListVector vector = new FixedSizeListVector("list", allocator, 2, null, null)) { - vector.allocateNew(); - UnionListWriter writer = new UnionListWriter(vector); - ListWriter listWriter = writer.list(); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - writer.setPosition(i); - writer.startList(); - for (int j = 0; j < 2; j++) { - listWriter.startList(); - for (int k = 0; k < i % 7; k++) { - listWriter.integer().writeInt(k + j); - } - listWriter.endList(); - } - writer.endList(); - } - } - writer.setValueCount(10); - - UnionFixedSizeListReader reader = vector.getReader(); - for (int i = 0; i < 10; i++) { - reader.setPosition(i); - if (i % 2 == 0) { - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); - FieldReader listReader = reader.reader(); - List expected0 = new ArrayList<>(); - for (int k = 0; k < i % 7; k++) { - listReader.next(); - Assert.assertEquals(k, listReader.reader().readInteger().intValue()); - expected0.add(k); - } - Assert.assertTrue(reader.next()); - List expected1 = new ArrayList<>(); - for (int k = 0; k < i % 7; k++) { - listReader.next(); - Assert.assertEquals(k + 1, listReader.reader().readInteger().intValue()); - expected1.add(k + 1); - } - Assert.assertFalse(reader.next()); - Assert.assertEquals(Lists.newArrayList(expected0, expected1), reader.readObject()); - } else { - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); - } - } - } - } - @Test public void testNestedInList() { try (ListVector vector = new ListVector("list", allocator, null, null)) { + ListVector.Mutator mutator = vector.getMutator(); + FixedSizeListVector tuples = (FixedSizeListVector) vector.addOrGetVector(FieldType.nullable(new ArrowType.FixedSizeList(2))).getVector(); + FixedSizeListVector.Mutator tupleMutator = tuples.getMutator(); + NullableIntVector.Mutator innerMutator = (NullableIntVector.Mutator) tuples.addOrGetVector(FieldType.nullable(MinorType.INT.getType())).getVector().getMutator(); vector.allocateNew(); - UnionListWriter writer = vector.getWriter(); - ListWriter listWriter = writer.list(2); for (int i = 0; i < 10; i++) { if (i % 2 == 0) { - writer.setPosition(i); - writer.startList(); + mutator.startNewValue(i); + int position = vector.getOffsetVector().getAccessor().get(i + 1); for (int j = 0; j < i % 7; j++) { - listWriter.startList(); - for (int k = 0; k < 2; k++) { - listWriter.integer().writeInt(k + j); - } - listWriter.endList(); + tupleMutator.setNotNull(position + j); + innerMutator.set((position + j) * 2, j); + innerMutator.set((position + j) * 2 + 1, j + 1); } - writer.endList(); + vector.getOffsetVector().getMutator().set(i + 1, position + i % 7); } } - writer.setValueCount(10); + mutator.setValueCount(10); UnionListReader reader = vector.getReader(); for (int i = 0; i < 10; i++) { @@ -260,45 +154,4 @@ public void testNestedInList() { } } } - - @Test - public void testCopyFrom() throws Exception { - try (FixedSizeListVector inVector = new FixedSizeListVector("input", allocator, 2, null, null); - FixedSizeListVector outVector = new FixedSizeListVector("output", allocator, 2, null, null)) { - UnionListWriter writer = inVector.getWriter(); - writer.allocate(); - - // populate input vector with the following records - // [1, 2] - // null - // [] - writer.setPosition(0); // optional - writer.startList(); - writer.bigInt().writeBigInt(1); - writer.bigInt().writeBigInt(2); - writer.endList(); - - writer.setPosition(2); - writer.startList(); - writer.endList(); - - writer.setValueCount(3); - - // copy values from input to output - outVector.allocateNew(); - for (int i = 0; i < 3; i++) { - outVector.copyFrom(i, i, inVector); - } - outVector.getMutator().setValueCount(3); - - // assert the output vector is correct - FieldReader reader = outVector.getReader(); - reader.setPosition(0); - Assert.assertTrue(reader.isSet()); - reader.setPosition(1); - Assert.assertFalse(reader.isSet()); - reader.setPosition(2); - Assert.assertTrue(reader.isSet()); - } - } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java index cd2d7aa9fb0..3bed45361fc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java @@ -35,6 +35,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.NullableFloat4Vector; import org.apache.arrow.vector.NullableIntVector; import org.apache.arrow.vector.NullableTinyIntVector; import org.apache.arrow.vector.NullableVarCharVector; @@ -54,6 +55,7 @@ import org.apache.arrow.vector.stream.ArrowStreamReader; import org.apache.arrow.vector.stream.ArrowStreamWriter; import org.apache.arrow.vector.stream.MessageSerializerTest; +import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; @@ -590,16 +592,14 @@ public void testWriteReadFixedSizeList() throws IOException { try (BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); NullableMapVector parent = new NullableMapVector("parent", originalVectorAllocator, null, null)) { FixedSizeListVector tuples = parent.addOrGet("float-pairs", new FieldType(true, new FixedSizeList(2), null), FixedSizeListVector.class); + NullableFloat4Vector floats = (NullableFloat4Vector) tuples.addOrGetVector(new FieldType(true, MinorType.FLOAT4.getType(), null)).getVector(); NullableIntVector ints = parent.addOrGet("ints", new FieldType(true, new Int(32, true), null), NullableIntVector.class); - tuples.allocateNew(); - ints.allocateNew(); - UnionListWriter writer = tuples.getWriter(); + parent.allocateNew(); + for (int i = 0; i < 10; i++) { - writer.setPosition(i); - writer.startList(); - writer.writeFloat4(i + 0.1f); - writer.writeFloat4(i + 10.1f); - writer.endList(); + tuples.getMutator().setNotNull(i); + floats.getMutator().set(i * 2, i + 0.1f); + floats.getMutator().set(i * 2 + 1, i + 10.1f); ints.getMutator().set(i, i); } From 594c0a210eb1a17de633348cdcf988202160375b Mon Sep 17 00:00:00 2001 From: Emilio Lahr-Vivaz Date: Wed, 12 Apr 2017 10:22:39 -0400 Subject: [PATCH 3/5] simplifying writing of list vectors through mutator --- .../vector/complex/BaseRepeatedValueVector.java | 6 ++++-- .../org/apache/arrow/vector/complex/ListVector.java | 13 ++++++++++++- .../arrow/vector/complex/RepeatedValueVector.java | 6 +++--- .../arrow/vector/TestFixedSizeListVector.java | 5 ++--- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index da221e33013..c9a9319c691 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -213,12 +213,14 @@ public boolean isEmpty(int index) { public abstract class BaseRepeatedMutator extends BaseValueVector.BaseMutator implements RepeatedMutator { @Override - public void startNewValue(int index) { + public int startNewValue(int index) { while (offsets.getValueCapacity() <= index) { offsets.reAlloc(); } - offsets.getMutator().setSafe(index+1, offsets.getAccessor().get(index)); + int offset = offsets.getAccessor().get(index); + offsets.getMutator().setSafe(index+1, offset); setValueCount(index+1); + return offset; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 677039748dc..1455aba4073 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -345,12 +345,23 @@ public void setNotNull(int index) { } @Override - public void startNewValue(int index) { + public int startNewValue(int index) { for (int i = lastSet; i <= index; i++) { offsets.getMutator().setSafe(i + 1, offsets.getAccessor().get(i)); } setNotNull(index); lastSet = index + 1; + return offsets.getAccessor().get(lastSet); + } + + /** + * End the current value + * + * @param index index of the value to end + * @param size number of elements in the list that was written + */ + public void endValue(int index, int size) { + offsets.getMutator().set(index + 1, offsets.getAccessor().get(index + 1) + size); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java index 54db393e831..b01a4e7cf49 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java @@ -73,13 +73,13 @@ interface RepeatedAccessor extends ValueVector.Accessor { } interface RepeatedMutator extends ValueVector.Mutator { + /** * Starts a new value that is a container of cells. * * @param index index of new value to start + * @return index into the child vector */ - void startNewValue(int index); - - + int startNewValue(int index); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java index d2b0dab7bcd..cfb7b3d2a26 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java @@ -121,14 +121,13 @@ public void testNestedInList() { for (int i = 0; i < 10; i++) { if (i % 2 == 0) { - mutator.startNewValue(i); - int position = vector.getOffsetVector().getAccessor().get(i + 1); + int position = mutator.startNewValue(i); for (int j = 0; j < i % 7; j++) { tupleMutator.setNotNull(position + j); innerMutator.set((position + j) * 2, j); innerMutator.set((position + j) * 2 + 1, j + 1); } - vector.getOffsetVector().getMutator().set(i + 1, position + i % 7); + mutator.endValue(i, i % 7); } } mutator.setValueCount(10); From 229e24ae9c69e37049db2411cfc808ba6541bb20 Mon Sep 17 00:00:00 2001 From: Emilio Lahr-Vivaz Date: Fri, 14 Apr 2017 16:37:12 -0400 Subject: [PATCH 4/5] re-ordering imports --- .../java/org/apache/arrow/vector/complex/ListVector.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 1455aba4073..9392afbccda 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -24,10 +24,6 @@ import java.util.Collections; import java.util.List; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ObjectArrays; - -import io.netty.buffer.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.AddOrGetResult; @@ -52,6 +48,11 @@ import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ObjectArrays; + +import io.netty.buffer.ArrowBuf; + public class ListVector extends BaseRepeatedValueVector implements FieldVector, PromotableVector { final UInt4Vector offsets; From b139d3dd13cc8ccc52a5ee8ab78bfffe518e8c94 Mon Sep 17 00:00:00 2001 From: Emilio Lahr-Vivaz Date: Fri, 14 Apr 2017 16:41:44 -0400 Subject: [PATCH 5/5] adding reAlloc to FixedSizeListVector --- .../apache/arrow/vector/complex/FixedSizeListVector.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 7f68e2efc37..7ac9f3bd513 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -171,6 +171,12 @@ public boolean allocateNewSafe() { return success; } + @Override + public void reAlloc() { + bits.reAlloc(); + vector.reAlloc(); + } + public FieldVector getDataVector() { return vector; }