From f2f0596a243c5d72370e115dabbced90a47e651e Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Sat, 20 Aug 2016 10:19:08 -0700 Subject: [PATCH 01/21] Update FieldNode structure to be more explicit and reflect schema --- format/Message.fbs | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/format/Message.fbs b/format/Message.fbs index a78009b6e5f..cd3028efacd 100644 --- a/format/Message.fbs +++ b/format/Message.fbs @@ -17,7 +17,7 @@ table Tuple { table List { } -enum UnionMode:int { Sparse, Dense } +enum UnionMode:short { Sparse, Dense } table Union { mode: UnionMode; @@ -28,7 +28,7 @@ table Int { is_signed: bool; } -enum Precision:int {SINGLE, DOUBLE} +enum Precision:short {SINGLE, DOUBLE} table FloatingPoint { precision: Precision; @@ -114,7 +114,7 @@ table Field { /// ---------------------------------------------------------------------- /// Endianness of the platform that produces the RecordBatch -enum Endianness:int { Little, Big } +enum Endianness:short { Little, Big } /// ---------------------------------------------------------------------- /// A Schema describes the columns in a row batch @@ -133,8 +133,19 @@ table Schema { /// Data structures for describing a table row batch (a collection of /// equal-length Arrow arrays) +enum VectorType: short { + /// used in List type Dense Union and variable length primitive types (String, Binary) + OFFSET, + /// fixed length primitive values + VALUES, + /// Bit vector indicated if each value is null + VALIDITY, + /// Type vector used in Union type + TYPE +} + /// A Buffer represents a single contiguous memory segment -struct Buffer { +table Buffer { /// The shared memory page id where this buffer is located. Currently this is /// not used page: int; @@ -146,6 +157,9 @@ struct Buffer { /// The absolute length (in bytes) of the memory buffer. The memory is found /// from offset (inclusive) to offset + length (non-inclusive). length: long; + + /// the type of the vector to be explicit + type: VectorType; } /// Metadata about a field at some level of a nested type tree (but not @@ -154,7 +168,7 @@ struct Buffer { /// For example, a List with values [[1, 2, 3], null, [4], [5, 6], null] /// would have {length: 5, null_count: 2} for its List node, and {length: 6, /// null_count: 0} for its Int16 node, as separate FieldNode structs -struct FieldNode { +table FieldNode { /// The number of value slots in the Arrow array at this level of a nested /// tree length: int; @@ -163,26 +177,30 @@ struct FieldNode { /// to write their physical validity bitmap out as a materialized buffer, /// instead setting the length of the bitmap buffer to 0. null_count: int; + + /// children according to the schema + children: [FieldNode]; + + /// Buffers correspond to the pre-ordered flattened buffer tree + /// + /// The number of buffers appended to this list depends on the field type and length. + /// For example, most primitive arrays will have 2 buffers, 1 for the validity + /// bitmap and 1 for the values. For struct arrays, there will only be a + /// single buffer for the validity (nulls) bitmap + vectors: [Buffer]; } /// A data header describing the shared memory layout of a "record" or "row" /// batch. Some systems call this a "row batch" internally and others a "record /// batch". table RecordBatch { - /// number of records / rows. The arrays in the batch should all have this + /// number of records / rows. The root arrays in the batch should all have this /// length length: int; - /// Nodes correspond to the pre-ordered flattened logical schema + /// Nodes correspond to the first level of the logical schema nodes: [FieldNode]; - /// Buffers correspond to the pre-ordered flattened buffer tree - /// - /// The number of buffers appended to this list depends on the schema. For - /// example, most primitive arrays will have 2 buffers, 1 for the validity - /// bitmap and 1 for the values. For struct arrays, there will only be a - /// single buffer for the validity (nulls) bitmap - buffers: [Buffer]; } /// ---------------------------------------------------------------------- From 807db51b4fecbc4d217f0bdfdbf8bacbc7cb29f4 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Sun, 21 Aug 2016 19:05:41 -0700 Subject: [PATCH 02/21] move information to schema --- format/Message.fbs | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/format/Message.fbs b/format/Message.fbs index cd3028efacd..fd40751e46a 100644 --- a/format/Message.fbs +++ b/format/Message.fbs @@ -107,6 +107,10 @@ table Field { // present only if the field is dictionary encoded // will point to a dictionary provided by a DictionaryBatch message dictionary: long; + /// the buffers produced for this type (as derived from the Type) + /// does not include children + /// each recordbatch will return instances of those Buffers. + buffers: [ VectorType ]; // children apply only to Nested data types like Struct, List and Union children: [Field]; } @@ -145,7 +149,7 @@ enum VectorType: short { } /// A Buffer represents a single contiguous memory segment -table Buffer { +struct Buffer { /// The shared memory page id where this buffer is located. Currently this is /// not used page: int; @@ -157,9 +161,6 @@ table Buffer { /// The absolute length (in bytes) of the memory buffer. The memory is found /// from offset (inclusive) to offset + length (non-inclusive). length: long; - - /// the type of the vector to be explicit - type: VectorType; } /// Metadata about a field at some level of a nested type tree (but not @@ -168,7 +169,7 @@ table Buffer { /// For example, a List with values [[1, 2, 3], null, [4], [5, 6], null] /// would have {length: 5, null_count: 2} for its List node, and {length: 6, /// null_count: 0} for its Int16 node, as separate FieldNode structs -table FieldNode { +struct FieldNode { /// The number of value slots in the Arrow array at this level of a nested /// tree length: int; @@ -177,30 +178,26 @@ table FieldNode { /// to write their physical validity bitmap out as a materialized buffer, /// instead setting the length of the bitmap buffer to 0. null_count: int; - - /// children according to the schema - children: [FieldNode]; - - /// Buffers correspond to the pre-ordered flattened buffer tree - /// - /// The number of buffers appended to this list depends on the field type and length. - /// For example, most primitive arrays will have 2 buffers, 1 for the validity - /// bitmap and 1 for the values. For struct arrays, there will only be a - /// single buffer for the validity (nulls) bitmap - vectors: [Buffer]; } /// A data header describing the shared memory layout of a "record" or "row" /// batch. Some systems call this a "row batch" internally and others a "record /// batch". table RecordBatch { - /// number of records / rows. The root arrays in the batch should all have this + /// number of records / rows. The arrays in the batch should all have this /// length length: int; - /// Nodes correspond to the first level of the logical schema + /// Nodes correspond to the pre-ordered flattened logical schema nodes: [FieldNode]; + /// Buffers correspond to the pre-ordered flattened buffer tree + /// + /// The number of buffers appended to this list depends on the schema. For + /// example, most primitive arrays will have 2 buffers, 1 for the validity + /// bitmap and 1 for the values. For struct arrays, there will only be a + /// single buffer for the validity (nulls) bitmap + buffers: [Buffer]; } /// ---------------------------------------------------------------------- From ac6902ab5836432aaa2be0c7a3a80e0f9f12f44d Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Fri, 19 Aug 2016 13:52:45 -0700 Subject: [PATCH 03/21] ARROW-264: File format --- format/File.fbs | 28 +++ java/format/pom.xml | 1 + .../src/main/codegen/data/ArrowTypes.tdd | 2 +- .../src/main/codegen/templates/ArrowType.java | 14 +- .../main/codegen/templates/UnionVector.java | 4 +- .../org/apache/arrow/file/ArrowBlock.java | 82 +++++++ .../org/apache/arrow/file/ArrowFooter.java | 144 +++++++++++ .../org/apache/arrow/file/ArrowReader.java | 144 +++++++++++ .../org/apache/arrow/file/ArrowWriter.java | 151 ++++++++++++ .../arrow/file/InvalidArrowFileException.java | 27 +++ .../org/apache/arrow/schema/ArrowBuffer.java | 81 +++++++ .../apache/arrow/schema/ArrowFieldNode.java | 48 ++++ .../apache/arrow/schema/ArrowRecordBatch.java | 84 +++++++ .../apache/arrow/schema/FBSerializable.java | 24 ++ .../apache/arrow/schema/FBSerializables.java | 37 +++ .../org/apache/arrow/vector/VectorLoader.java | 229 ++++++++++++++++++ .../vector/complex/AbstractMapVector.java | 27 ++- .../vector/complex/ComplexVectorLoader.java | 40 +++ .../arrow/vector/complex/ListVector.java | 9 +- .../arrow/vector/complex/MapVector.java | 7 +- .../arrow/vector/complex/NestedVector.java | 25 ++ .../org/apache/arrow/vector/types/Types.java | 14 +- .../ByteArrayReadableSeekableByteChannel.java | 80 ++++++ .../org/apache/arrow/file/TestArrowFile.java | 144 +++++++++++ .../apache/arrow/file/TestArrowFooter.java | 58 +++++ .../arrow/file/TestArrowReaderWriter.java | 100 ++++++++ 26 files changed, 1578 insertions(+), 26 deletions(-) create mode 100644 format/File.fbs create mode 100644 java/vector/src/main/java/org/apache/arrow/file/ArrowBlock.java create mode 100644 java/vector/src/main/java/org/apache/arrow/file/ArrowFooter.java create mode 100644 java/vector/src/main/java/org/apache/arrow/file/ArrowReader.java create mode 100644 java/vector/src/main/java/org/apache/arrow/file/ArrowWriter.java create mode 100644 java/vector/src/main/java/org/apache/arrow/file/InvalidArrowFileException.java create mode 100644 java/vector/src/main/java/org/apache/arrow/schema/ArrowBuffer.java create mode 100644 java/vector/src/main/java/org/apache/arrow/schema/ArrowFieldNode.java create mode 100644 java/vector/src/main/java/org/apache/arrow/schema/ArrowRecordBatch.java create mode 100644 java/vector/src/main/java/org/apache/arrow/schema/FBSerializable.java create mode 100644 java/vector/src/main/java/org/apache/arrow/schema/FBSerializables.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/NestedVector.java create mode 100644 java/vector/src/test/java/org/apache/arrow/file/ByteArrayReadableSeekableByteChannel.java create mode 100644 java/vector/src/test/java/org/apache/arrow/file/TestArrowFile.java create mode 100644 java/vector/src/test/java/org/apache/arrow/file/TestArrowFooter.java create mode 100644 java/vector/src/test/java/org/apache/arrow/file/TestArrowReaderWriter.java diff --git a/format/File.fbs b/format/File.fbs new file mode 100644 index 00000000000..42a9f99c720 --- /dev/null +++ b/format/File.fbs @@ -0,0 +1,28 @@ +include "Message.fbs"; + +namespace org.apache.arrow.flatbuf; + +/// ---------------------------------------------------------------------- +/// Arrow File metadata +/// + +table Footer { + + schema: org.apache.arrow.flatbuf.Schema; + + dictionaries: [ Block ]; + + recordBatches: [ Block ]; +} + +struct Block { + + offset: long; + + metaDataLength: int; + + bodyLength: long; + +} + +root_type Footer; diff --git a/java/format/pom.xml b/java/format/pom.xml index cb11b5ff3c4..dc5897581b5 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -106,6 +106,7 @@ -o target/generated-sources/ ../../format/Message.fbs + ../../format/File.fbs diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 4ab7f8562f9..7a3eaa013c8 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -30,7 +30,7 @@ }, { name: "Union", - fields: [] + fields: [{name: "mode", type: int}] }, { name: "Int", diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java index 6dfaf216ad0..7ff319b3667 100644 --- a/java/vector/src/main/codegen/templates/ArrowType.java +++ b/java/vector/src/main/codegen/templates/ArrowType.java @@ -24,9 +24,8 @@ <@pp.dropOutputFile /> <@pp.changeOutputFile name="/org/apache/arrow/vector/types/pojo/ArrowType.java" /> - - <#include "/@includes/license.ftl" /> + package org.apache.arrow.vector.types.pojo; import com.google.flatbuffers.FlatBufferBuilder; @@ -38,7 +37,13 @@ public abstract class ArrowType { public abstract byte getTypeType(); public abstract int getType(FlatBufferBuilder builder); + public abstract T accept(ArrowTypeVisitor visitor); + public static interface ArrowTypeVisitor { + <#list arrowTypes.types as type> + T visit(${type.name} type); + + } <#list arrowTypes.types as type> <#assign name = type.name> @@ -102,6 +107,11 @@ public boolean equals(Object obj) { } + + @Override + public T accept(ArrowTypeVisitor visitor) { + return visitor.visit(this); + } } diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index 1fef490d4ec..7e75abb0cea 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -43,6 +43,8 @@ import org.apache.arrow.vector.complex.impl.ComplexCopier; import org.apache.arrow.vector.util.CallBack; +import static org.apache.arrow.flatbuf.UnionMode.Sparse; + /* * This class is generated using freemarker and the ${.template_name} template. */ @@ -203,7 +205,7 @@ public Field getField() { for (ValueVector v : internalMap.getChildren()) { childFields.add(v.getField()); } - return new Field(name, true, new ArrowType.Union(), childFields); + return new Field(name, true, new ArrowType.Union(Sparse), childFields); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/file/ArrowBlock.java b/java/vector/src/main/java/org/apache/arrow/file/ArrowBlock.java new file mode 100644 index 00000000000..dfe392796fa --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/file/ArrowBlock.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.file; + +import org.apache.arrow.flatbuf.Block; +import org.apache.arrow.schema.FBSerializable; + +import com.google.flatbuffers.FlatBufferBuilder; + +public class ArrowBlock implements FBSerializable { + + private final long offset; + private final int metadataLength; + private final long bodyLength; + + public ArrowBlock(long offset, int metadataLength, long bodyLength) { + super(); + this.offset = offset; + this.metadataLength = metadataLength; + this.bodyLength = bodyLength; + } + + public long getOffset() { + return offset; + } + + public int getMetadataLength() { + return metadataLength; + } + + public long getBodyLength() { + return bodyLength; + } + + @Override + public int writeTo(FlatBufferBuilder builder) { + return Block.createBlock(builder, offset, metadataLength, bodyLength); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (int) (bodyLength ^ (bodyLength >>> 32)); + result = prime * result + metadataLength; + result = prime * result + (int) (offset ^ (offset >>> 32)); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + ArrowBlock other = (ArrowBlock) obj; + if (bodyLength != other.bodyLength) + return false; + if (metadataLength != other.metadataLength) + return false; + if (offset != other.offset) + return false; + return true; + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/file/ArrowFooter.java b/java/vector/src/main/java/org/apache/arrow/file/ArrowFooter.java new file mode 100644 index 00000000000..8b30445ec90 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/file/ArrowFooter.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.file; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.arrow.flatbuf.Block; +import org.apache.arrow.flatbuf.Footer; +import org.apache.arrow.schema.FBSerializable; +import org.apache.arrow.vector.types.pojo.Schema; + +import com.google.flatbuffers.FlatBufferBuilder; + +public class ArrowFooter implements FBSerializable { + + private final Schema schema; + + private final List dictionaries; + + private final List recordBatches; + + public ArrowFooter(Schema schema, List dictionaries, List recordBatches) { + super(); + this.schema = schema; + this.dictionaries = dictionaries; + this.recordBatches = recordBatches; + } + + public ArrowFooter(Footer footer) { + this( + Schema.convertSchema(footer.schema()), + dictionaries(footer), + recordBatches(footer) + ); + } + + private static List recordBatches(Footer footer) { + List recordBatches = new ArrayList<>(); + Block tempBLock = new Block(); + int recordBatchesLength = footer.recordBatchesLength(); + for (int i = 0; i < recordBatchesLength; i++) { + Block block = footer.recordBatches(tempBLock, i); + recordBatches.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength())); + } + return recordBatches; + } + + private static List dictionaries(Footer footer) { + List dictionaries = new ArrayList<>(); + Block tempBLock = new Block(); + int dictionariesLength = footer.dictionariesLength(); + for (int i = 0; i < dictionariesLength; i++) { + Block block = footer.dictionaries(tempBLock, i); + dictionaries.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength())); + } + return dictionaries; + } + + public Schema getSchema() { + return schema; + } + + public List getDictionaries() { + return dictionaries; + } + + public List getRecordBatches() { + return recordBatches; + } + + @Override + public int writeTo(FlatBufferBuilder builder) { + int schemaIndex = schema.getSchema(builder); + Footer.startDictionariesVector(builder, dictionaries.size()); + int dicsOffset = endVector(builder, dictionaries); + Footer.startRecordBatchesVector(builder, recordBatches.size()); + int rbsOffset = endVector(builder, recordBatches); + Footer.startFooter(builder); + Footer.addSchema(builder, schemaIndex); + Footer.addDictionaries(builder, dicsOffset); + Footer.addRecordBatches(builder, rbsOffset); + return Footer.endFooter(builder); + } + + private int endVector(FlatBufferBuilder builder, List blocks) { + for (ArrowBlock block : blocks) { + block.writeTo(builder); + } + return builder.endVector(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((dictionaries == null) ? 0 : dictionaries.hashCode()); + result = prime * result + ((recordBatches == null) ? 0 : recordBatches.hashCode()); + result = prime * result + ((schema == null) ? 0 : schema.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + ArrowFooter other = (ArrowFooter) obj; + if (dictionaries == null) { + if (other.dictionaries != null) + return false; + } else if (!dictionaries.equals(other.dictionaries)) + return false; + if (recordBatches == null) { + if (other.recordBatches != null) + return false; + } else if (!recordBatches.equals(other.recordBatches)) + return false; + if (schema == null) { + if (other.schema != null) + return false; + } else if (!schema.equals(other.schema)) + return false; + return true; + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/file/ArrowReader.java b/java/vector/src/main/java/org/apache/arrow/file/ArrowReader.java new file mode 100644 index 00000000000..e16265d844f --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/file/ArrowReader.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.file; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.arrow.flatbuf.Buffer; +import org.apache.arrow.flatbuf.FieldNode; +import org.apache.arrow.flatbuf.Footer; +import org.apache.arrow.flatbuf.RecordBatch; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.schema.ArrowFieldNode; +import org.apache.arrow.schema.ArrowRecordBatch; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.netty.buffer.ArrowBuf; + +public class ArrowReader implements AutoCloseable { + private static final Logger LOGGER = LoggerFactory.getLogger(ArrowReader.class); + + private static final byte[] MAGIC = "ARROW1".getBytes(); + + private final SeekableByteChannel in; + + private final BufferAllocator allocator; + + private ArrowFooter footer; + + public ArrowReader(SeekableByteChannel in, BufferAllocator allocator) { + super(); + this.in = in; + this.allocator = allocator; + } + + private int readFully(ArrowBuf buffer, int l) throws IOException { + int n = readFully(buffer.nioBuffer(buffer.writerIndex(), l)); + buffer.writerIndex(n); + if (n != l) { + throw new IllegalStateException(n + " != " + l); + } + return n; + } + + private int readFully(ByteBuffer buffer) throws IOException { + int total = 0; + int n; + do { + n = in.read(buffer); + total += n; + } while (n >= 0 && buffer.remaining() > 0); + buffer.flip(); + return total; + } + + private static int bytesToInt(byte[] bytes) { + return ((int)(bytes[3] & 255) << 24) + + ((int)(bytes[2] & 255) << 16) + + ((int)(bytes[1] & 255) << 8) + + ((int)(bytes[0] & 255) << 0); + } + + public ArrowFooter readFooter() throws IOException { + if (footer == null) { + ByteBuffer buffer = ByteBuffer.allocate(4 + MAGIC.length); + long footerLengthOffset = in.size() - buffer.remaining(); + in.position(footerLengthOffset); + readFully(buffer); + byte[] array = buffer.array(); + if (!Arrays.equals(MAGIC, Arrays.copyOfRange(array, 4, array.length))) { + throw new InvalidArrowFileException("missing Magic number " + Arrays.toString(buffer.array())); + } + int footerLength = bytesToInt(array); + if (footerLength <= 0 || footerLength + MAGIC.length * 2 + 4 > in.size()) { + throw new InvalidArrowFileException("invalid footer length: " + footerLength); + } + long footerOffset = footerLengthOffset - footerLength; + LOGGER.debug(String.format("Footer starts at %d, length: %d", footerOffset, footerLength)); + ByteBuffer footerBuffer = ByteBuffer.allocate(footerLength); + in.position(footerOffset); + readFully(footerBuffer); + Footer footerFB = Footer.getRootAsFooter(footerBuffer); + this.footer = new ArrowFooter(footerFB); + } + return footer; + } + + // TODO: read dictionaries + + public ArrowRecordBatch readRecordBatch(ArrowBlock recordBatchBlock) throws IOException { + LOGGER.debug(String.format("RecordBatch at %d, metadata: %d, body: %d", recordBatchBlock.getOffset(), recordBatchBlock.getMetadataLength(), recordBatchBlock.getBodyLength())); + int l = (int)(recordBatchBlock.getMetadataLength() + recordBatchBlock.getBodyLength()); + if (l < 0) { + throw new InvalidArrowFileException("block invalid: " + recordBatchBlock); + } + ArrowBuf buffer = allocator.buffer(l); + in.position(recordBatchBlock.getOffset()); + int n = readFully(buffer, l); + if (n != l) { + throw new IllegalStateException(n + " != " + l); + } + RecordBatch recordBatchFB = RecordBatch.getRootAsRecordBatch(buffer.nioBuffer().asReadOnlyBuffer()); + int nodesLength = recordBatchFB.nodesLength(); + ArrowBuf body = buffer.slice(recordBatchBlock.getMetadataLength(), (int)recordBatchBlock.getBodyLength()); + List nodes = new ArrayList<>(); + for (int i = 0; i < nodesLength; ++i) { + FieldNode node = recordBatchFB.nodes(i); + nodes.add(new ArrowFieldNode(node.length(), node.nullCount())); + } + List buffers = new ArrayList<>(); + for (int i = 0; i < recordBatchFB.buffersLength(); ++i) { + Buffer bufferFB = recordBatchFB.buffers(i); + LOGGER.debug(String.format("Buffer in RecordBatch at %d, length: %d", bufferFB.offset(), bufferFB.length())); + ArrowBuf vectorBuffer = body.slice((int)bufferFB.offset(), (int)bufferFB.length()); + buffers.add(vectorBuffer); + } + return new ArrowRecordBatch(recordBatchFB.length(), nodes, buffers); + } + + public void close() throws IOException { + in.close(); + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/file/ArrowWriter.java b/java/vector/src/main/java/org/apache/arrow/file/ArrowWriter.java new file mode 100644 index 00000000000..13992cc74dd --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/file/ArrowWriter.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.file; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.schema.ArrowRecordBatch; +import org.apache.arrow.schema.FBSerializable; +import org.apache.arrow.vector.types.pojo.Schema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.flatbuffers.FlatBufferBuilder; + +import io.netty.buffer.ArrowBuf; + +public class ArrowWriter implements AutoCloseable { + private static final Logger LOGGER = LoggerFactory.getLogger(ArrowWriter.class); + + private static final byte[] MAGIC = "ARROW1".getBytes(); + + private final WritableByteChannel out; + + private final Schema schema; + + private final List recordBatches = new ArrayList<>(); + + private long currentPosition = 0; + + private boolean started = false; + + public ArrowWriter(WritableByteChannel out, Schema schema) { + this.out = out; + this.schema = schema; + } + + private void start() throws IOException { + writeMagic(); + } + + private long write(byte[] buffer) throws IOException { + return write(ByteBuffer.wrap(buffer)); + } + + private long write(ByteBuffer buffer) throws IOException { + long length = buffer.remaining(); + out.write(buffer); + currentPosition += length; + return length; + } + + private static byte[] intToBytes(int value) { + byte[] outBuffer = new byte[4]; + outBuffer[3] = (byte)(value >>> 24); + outBuffer[2] = (byte)(value >>> 16); + outBuffer[1] = (byte)(value >>> 8); + outBuffer[0] = (byte)(value >>> 0); + return outBuffer; + } + + private long writeIntLittleEndian(int v) throws IOException { + return write(intToBytes(v)); + } + + // TODO: write dictionaries + + public void writeRecordBatch(ArrowRecordBatch recordBatch) throws IOException { + checkStarted(); + // write metadata header + long offset = currentPosition; + write(recordBatch); + // write body + long bodyOffset = currentPosition; + for (ArrowBuf buffer : recordBatch.getBuffers()) { + write(buffer); + } + int metadataLength = (int)(bodyOffset - offset); + if (metadataLength <= 0) { + throw new InvalidArrowFileException("invalid recordBatch"); + } + long bodyLength = currentPosition - bodyOffset; + LOGGER.debug(String.format("RecordBatch at %d, metadata: %d, body: %d", offset, metadataLength, bodyLength)); + // add metadata to footer + recordBatches.add(new ArrowBlock(offset, metadataLength, bodyLength)); + } + + private void write(ArrowBuf buffer) throws IOException { + write(buffer.nioBuffer(buffer.readerIndex(), buffer.readableBytes())); + } + + private void checkStarted() throws IOException { + if (!started) { + started = true; + start(); + } + } + + public void close() throws IOException { + try { + long footerStart = currentPosition; + writeFooter(); + int footerLength = (int)(currentPosition - footerStart); + if (footerLength <= 0 ) { + throw new InvalidArrowFileException("invalid footer"); + } + writeIntLittleEndian(footerLength); + LOGGER.debug(String.format("Footer starts at %d, length: %d", footerStart, footerLength)); + writeMagic(); + } finally { + out.close(); + } + } + + private void writeMagic() throws IOException { + write(MAGIC); + LOGGER.debug(String.format("magic written, now at %d", currentPosition)); + } + + private void writeFooter() throws IOException { + // TODO: dictionaries + write(new ArrowFooter(schema, Collections.emptyList(), recordBatches)); + } + + private long write(FBSerializable writer) throws IOException { + FlatBufferBuilder builder = new FlatBufferBuilder(); + int root = writer.writeTo(builder); + builder.finish(root); + return write(builder.dataBuffer()); + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/file/InvalidArrowFileException.java b/java/vector/src/main/java/org/apache/arrow/file/InvalidArrowFileException.java new file mode 100644 index 00000000000..943714c430e --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/file/InvalidArrowFileException.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.file; + +public class InvalidArrowFileException extends RuntimeException { + private static final long serialVersionUID = 1L; + + public InvalidArrowFileException(String message) { + super(message); + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/schema/ArrowBuffer.java b/java/vector/src/main/java/org/apache/arrow/schema/ArrowBuffer.java new file mode 100644 index 00000000000..6ebf0954d6b --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/schema/ArrowBuffer.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.schema; + +import org.apache.arrow.flatbuf.Buffer; + +import com.google.flatbuffers.FlatBufferBuilder; + +public class ArrowBuffer implements FBSerializable { + + private int page; + private long offset; + private long size; + + public ArrowBuffer(int page, long offset, long size) { + super(); + this.page = page; + this.offset = offset; + this.size = size; + } + + public int getPage() { + return page; + } + + public long getOffset() { + return offset; + } + + public long getSize() { + return size; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (int) (offset ^ (offset >>> 32)); + result = prime * result + page; + result = prime * result + (int) (size ^ (size >>> 32)); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + ArrowBuffer other = (ArrowBuffer) obj; + if (offset != other.offset) + return false; + if (page != other.page) + return false; + if (size != other.size) + return false; + return true; + } + + @Override + public int writeTo(FlatBufferBuilder builder) { + return Buffer.createBuffer(builder, page, offset, size); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/schema/ArrowFieldNode.java b/java/vector/src/main/java/org/apache/arrow/schema/ArrowFieldNode.java new file mode 100644 index 00000000000..1e64d47ca0a --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/schema/ArrowFieldNode.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.schema; + +import org.apache.arrow.flatbuf.FieldNode; + +import com.google.flatbuffers.FlatBufferBuilder; + +public class ArrowFieldNode implements FBSerializable { + + private final int length; + private final int nullCount; + + public ArrowFieldNode(int length, int nullCount) { + super(); + this.length = length; + this.nullCount = nullCount; + } + + @Override + public int writeTo(FlatBufferBuilder builder) { + return FieldNode.createFieldNode(builder, length, nullCount); + } + + public int getNullCount() { + return nullCount; + } + + public int getLength() { + return length; + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/schema/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/schema/ArrowRecordBatch.java new file mode 100644 index 00000000000..0bdaa3f0844 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/schema/ArrowRecordBatch.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.schema; + +import static org.apache.arrow.schema.FBSerializables.writeAllStructsToVector; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.arrow.flatbuf.RecordBatch; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.flatbuffers.FlatBufferBuilder; + +import io.netty.buffer.ArrowBuf; + +public class ArrowRecordBatch implements FBSerializable { + private static final Logger LOGGER = LoggerFactory.getLogger(ArrowRecordBatch.class); + + /** number of records */ + private final int length; + + /** Nodes correspond to the pre-ordered flattened logical schema */ + private final List nodes; + + private final List buffers; + + public ArrowRecordBatch(int length, List nodes, List buffers) { + super(); + this.length = length; + this.nodes = nodes; + this.buffers = buffers; + } + + public int getLength() { + return length; + } + + public List getNodes() { + return nodes; + } + + public List getBuffers() { + return buffers; + } + + @Override + public int writeTo(FlatBufferBuilder builder) { + RecordBatch.startNodesVector(builder, nodes.size()); + int nodesOffset = writeAllStructsToVector(builder, nodes); + List arrowBuffers = new ArrayList<>(); + long offset = 0; + for (ArrowBuf buffer : buffers) {; + long size = buffer.readableBytes(); + arrowBuffers.add(new ArrowBuffer(0, offset, size)); + LOGGER.debug(String.format("Buffer in RecordBatch at %d, length: %d", offset, size)); + offset += size; + } + RecordBatch.startBuffersVector(builder, buffers.size()); + int buffersOffset = writeAllStructsToVector(builder, arrowBuffers); + RecordBatch.startRecordBatch(builder); + RecordBatch.addLength(builder, length); + RecordBatch.addNodes(builder, nodesOffset); + RecordBatch.addBuffers(builder, buffersOffset); + return RecordBatch.endRecordBatch(builder); + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/schema/FBSerializable.java b/java/vector/src/main/java/org/apache/arrow/schema/FBSerializable.java new file mode 100644 index 00000000000..3f31483a4df --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/schema/FBSerializable.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.schema; + +import com.google.flatbuffers.FlatBufferBuilder; + +public interface FBSerializable { + int writeTo(FlatBufferBuilder builder); +} \ No newline at end of file diff --git a/java/vector/src/main/java/org/apache/arrow/schema/FBSerializables.java b/java/vector/src/main/java/org/apache/arrow/schema/FBSerializables.java new file mode 100644 index 00000000000..19bfeba07e0 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/schema/FBSerializables.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.schema; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import com.google.flatbuffers.FlatBufferBuilder; + +public class FBSerializables { + + public static int writeAllStructsToVector(FlatBufferBuilder builder, List all) { + // struct vectors have to be created in reverse order + List reversed = new ArrayList<>(all); + Collections.reverse(reversed); + for (FBSerializable element : reversed) { + element.writeTo(builder); + } + return builder.endVector(); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java new file mode 100644 index 00000000000..b1f0e3ca08b --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.util.Iterator; +import java.util.List; + +import org.apache.arrow.flatbuf.Precision; +import org.apache.arrow.schema.ArrowFieldNode; +import org.apache.arrow.vector.complex.ComplexVectorLoader; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.NestedVector; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.ArrowType.Bool; +import org.apache.arrow.vector.types.pojo.ArrowType.Date; +import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; +import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; +import org.apache.arrow.vector.types.pojo.ArrowType.Int; +import org.apache.arrow.vector.types.pojo.ArrowType.IntervalDay; +import org.apache.arrow.vector.types.pojo.ArrowType.IntervalYear; +import org.apache.arrow.vector.types.pojo.ArrowType.Null; +import org.apache.arrow.vector.types.pojo.ArrowType.Time; +import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; +import org.apache.arrow.vector.types.pojo.ArrowType.Tuple; +import org.apache.arrow.vector.types.pojo.ArrowType.Union; +import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; +import org.apache.arrow.vector.types.pojo.Field; + +import io.netty.buffer.ArrowBuf; + +public class VectorLoader { + + public static void addChild(final NestedVector container, final Field field, final Iterator nodes, final Iterator buffers) { + MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); + ValueVector vector = container.add(field.getName(), minorType); + loadVector(vector, field, nodes, buffers); + List children = field.getChildren(); + for (Field child : children) { + addChild((NestedVector)vector, child, nodes, buffers); + } + } + + public static void loadVector(final ValueVector vector, Field field, Iterator nodes, final Iterator buffers) { + final ArrowFieldNode node = nodes.next(); + field.getType().accept(new ArrowType.ArrowTypeVisitor() { + @Override + public Void visit(Null type) { + return null; + } + + @Override + public Void visit(Tuple type) { + MapVector mapVector = (MapVector)vector; + ComplexVectorLoader.load(mapVector, node, buffers); + return null; + } + + @Override + public Void visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + ListVector listVector = (ListVector)vector; + ComplexVectorLoader.load(listVector, node, buffers); + return null; + } + + @Override + public Void visit(Union type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override + public Void visit(Int type) { + switch (type.getBitWidth()) { + case 8: + if (type.getIsSigned()) { + NullableTinyIntVector intVector = (NullableTinyIntVector)vector; + intVector.bits.data = buffers.next(); + intVector.values.data = buffers.next(); + } else { + NullableUInt1Vector intVector = (NullableUInt1Vector)vector; + intVector.bits.data = buffers.next(); + intVector.values.data = buffers.next(); + } + break; + case 16: + if (type.getIsSigned()) { + NullableSmallIntVector intVector = (NullableSmallIntVector)vector; + intVector.bits.data = buffers.next(); + intVector.values.data = buffers.next(); + } else { + NullableUInt2Vector intVector = (NullableUInt2Vector)vector; + intVector.bits.data = buffers.next(); + intVector.values.data = buffers.next(); + } + break; + case 32: + if (type.getIsSigned()) { + NullableIntVector intVector = (NullableIntVector)vector; + intVector.bits.data = buffers.next(); + intVector.values.data = buffers.next(); + } else { + NullableUInt4Vector intVector = (NullableUInt4Vector)vector; + intVector.bits.data = buffers.next(); + intVector.values.data = buffers.next(); + } + break; + case 64: + if (type.getIsSigned()) { + NullableBigIntVector intVector = (NullableBigIntVector)vector; + intVector.bits.data = buffers.next(); + intVector.values.data = buffers.next(); + } else { + NullableUInt8Vector intVector = (NullableUInt8Vector)vector; + intVector.bits.data = buffers.next(); + intVector.values.data = buffers.next(); + } + break; + default: + throw new IllegalArgumentException("Illegal bit width: " + type.getBitWidth()); + } + // TODO: the vector has an unused data field? + return null; + } + + @Override + public Void visit(FloatingPoint type) { + switch (type.getPrecision()) { + case Precision.SINGLE: + NullableFloat4Vector fVector = (NullableFloat4Vector)vector; + fVector.bits.data = buffers.next(); + fVector.values.data = buffers.next(); + break; + case Precision.DOUBLE: + NullableFloat8Vector dVector = (NullableFloat8Vector)vector; + dVector.bits.data = buffers.next(); + dVector.values.data = buffers.next(); + break; + default: + throw new IllegalArgumentException("unknown precision: " + type.getPrecision()); + } + // TODO: the vector has an unused data field? + return null; + } + + @Override + public Void visit(Utf8 type) { + NullableVarCharVector stringVector = (NullableVarCharVector)vector; + stringVector.bits.data = buffers.next(); + stringVector.values.offsetVector.data = buffers.next(); + stringVector.values.data = buffers.next(); + // TODO: the vector has an unused data field? + return null; + } + + @Override + public Void visit(Binary type) { + NullableVarBinaryVector bVector = (NullableVarBinaryVector)vector; + bVector.bits.data = buffers.next(); + bVector.values.offsetVector.data = buffers.next(); + bVector.values.data = buffers.next(); + // TODO: the vector has an unused data field? + return null; + } + + @Override + public Void visit(Bool type) { + NullableBitVector bVector = (NullableBitVector)vector; + bVector.bits.data = buffers.next(); + bVector.values.data = buffers.next(); + // TODO: the vector has an unused data field? + return null; + } + + @Override + public Void visit(Decimal type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override + public Void visit(Date type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override + public Void visit(Time type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override + public Void visit(Timestamp type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override + public Void visit(IntervalDay type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override + public Void visit(IntervalYear type) { + throw new UnsupportedOperationException("NYI"); + } + }); + + } + + public static void load(BaseDataValueVector vector, ArrowBuf buffer) { + vector.data = buffer; + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java index 5964f800791..f4714ea2b26 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java @@ -17,14 +17,10 @@ */ package org.apache.arrow.vector.complex; -import com.google.common.collect.ImmutableList; -import io.netty.buffer.ArrowBuf; - import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import org.apache.arrow.flatbuf.Field; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.types.Types.MinorType; @@ -32,12 +28,15 @@ import org.apache.arrow.vector.util.MapWithOrdinal; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import io.netty.buffer.ArrowBuf; + /* * Base class for MapVectors. Currently used by RepeatedMapVector and MapVector */ -public abstract class AbstractMapVector extends AbstractContainerVector { +public abstract class AbstractMapVector extends AbstractContainerVector implements NestedVector { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class); // Maintains a map with key as field name and value is the vector itself @@ -115,13 +114,13 @@ public T addOrGet(String name, MinorType minorType, Clas if (existing == null) { create = true; } else if (clazz.isAssignableFrom(existing.getClass())) { - return (T) existing; + return clazz.cast(existing); } else if (nullFilled(existing)) { existing.clear(); create = true; } if (create) { - final T vector = (T) minorType.getNewVector(name, allocator, callBack, precisionScale); + final T vector = clazz.cast(minorType.getNewVector(name, allocator, callBack, precisionScale)); putChild(name, vector); if (callBack!=null) { callBack.doWork(); @@ -161,6 +160,20 @@ public T getChild(String name, Class clazz) { return typeify(v, clazz); } + @Override + public ValueVector add(String name, MinorType minorType, int... precisionScale) { + final ValueVector existing = getChild(name); + if (existing == null) { + throw new IllegalStateException(String.format("Vector already exists: Existing[%s], Requested[%s] ", existing.getClass().getSimpleName(), minorType)); + } + ValueVector vector = minorType.getNewVector(name, allocator, callBack, precisionScale); + putChild(name, vector); + if (callBack!=null) { + callBack.doWork(); + } + return vector; + } + /** * Inserts the vector with the given name if it does not exist else replaces it with the new value. * diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java new file mode 100644 index 00000000000..06f13746711 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex; + +import java.util.Iterator; + +import org.apache.arrow.schema.ArrowFieldNode; +import org.apache.arrow.vector.VectorLoader; + +import io.netty.buffer.ArrowBuf; + +public class ComplexVectorLoader { + + public static void load(ListVector listVector, ArrowFieldNode node, Iterator buffers) { + // listVector.valueCount = node.getLength(); ? + VectorLoader.load(listVector.offsets, buffers.next()); + VectorLoader.load(listVector.bits, buffers.next()); + } + + public static void load(MapVector mapVector, ArrowFieldNode node, Iterator buffers) { + mapVector.valueCount = node.getLength(); + // no vector of it's own? + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index c6c6b090db6..b6dbc515091 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -18,10 +18,6 @@ ******************************************************************************/ package org.apache.arrow.vector.complex; -import com.google.common.collect.ImmutableList; -import com.google.flatbuffers.FlatBufferBuilder; -import io.netty.buffer.ArrowBuf; - import java.util.List; import org.apache.arrow.memory.BufferAllocator; @@ -42,11 +38,14 @@ import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ObjectArrays; +import io.netty.buffer.ArrowBuf; + public class ListVector extends BaseRepeatedValueVector { - UInt4Vector offsets; + UInt4Vector offsets;// TODO: THis masks the same vector in the parent final UInt1Vector bits; private Mutator mutator = new Mutator(); private Accessor accessor = new Accessor(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 0cb613e2f7a..db10ac66b80 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -17,8 +17,6 @@ */ package org.apache.arrow.vector.complex; -import io.netty.buffer.ArrowBuf; - import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; @@ -34,7 +32,6 @@ import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.holders.ComplexHolder; import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.Tuple; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.CallBack; @@ -45,6 +42,8 @@ import com.google.common.collect.Ordering; import com.google.common.primitives.Ints; +import io.netty.buffer.ArrowBuf; + public class MapVector extends AbstractMapVector { //private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MapVector.class); @@ -120,7 +119,7 @@ public ArrowBuf[] getBuffers(boolean clear) { int expectedSize = getBufferSize(); int actualSize = super.getBufferSize(); - Preconditions.checkArgument(expectedSize == actualSize); + Preconditions.checkArgument(expectedSize == actualSize, expectedSize + " != " + actualSize); return super.getBuffers(clear); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NestedVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NestedVector.java new file mode 100644 index 00000000000..53ad2112e09 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NestedVector.java @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex; + +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.types.Types.MinorType; + +public interface NestedVector { + ValueVector add(String name, MinorType minorType, int... precisionScale); +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index c34882a8fb1..db74afd6adf 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -17,7 +17,12 @@ */ package org.apache.arrow.vector.types; +import java.util.HashMap; +import java.util.Map; + +import org.apache.arrow.flatbuf.Precision; import org.apache.arrow.flatbuf.Type; +import org.apache.arrow.flatbuf.UnionMode; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.NullableBigIntVector; import org.apache.arrow.vector.NullableBitVector; @@ -85,9 +90,6 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.CallBack; -import java.util.HashMap; -import java.util.Map; - public class Types { public static final Field NULL_FIELD = new Field("", true, Null.INSTANCE, null); @@ -104,8 +106,8 @@ public class Types { public static final Field TIMESTAMP_FIELD = new Field("", true, new Timestamp(""), null); public static final Field INTERVALDAY_FIELD = new Field("", true, IntervalDay.INSTANCE, null); public static final Field INTERVALYEAR_FIELD = new Field("", true, IntervalYear.INSTANCE, null); - public static final Field FLOAT4_FIELD = new Field("", true, new FloatingPoint(0), null); - public static final Field FLOAT8_FIELD = new Field("", true, new FloatingPoint(1), null); + public static final Field FLOAT4_FIELD = new Field("", true, new FloatingPoint(Precision.SINGLE), null); + public static final Field FLOAT8_FIELD = new Field("", true, new FloatingPoint(Precision.DOUBLE), null); public static final Field LIST_FIELD = new Field("", true, List.INSTANCE, null); public static final Field VARCHAR_FIELD = new Field("", true, Utf8.INSTANCE, null); public static final Field VARBINARY_FIELD = new Field("", true, Binary.INSTANCE, null); @@ -470,7 +472,7 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new UnionListWriter((ListVector) vector); } }, - UNION(Union.INSTANCE) { + UNION(new Union(UnionMode.Sparse)) { @Override public Field getField() { throw new UnsupportedOperationException("Cannot get simple field for Union type"); diff --git a/java/vector/src/test/java/org/apache/arrow/file/ByteArrayReadableSeekableByteChannel.java b/java/vector/src/test/java/org/apache/arrow/file/ByteArrayReadableSeekableByteChannel.java new file mode 100644 index 00000000000..c6185f30986 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/file/ByteArrayReadableSeekableByteChannel.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.file; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; + +public class ByteArrayReadableSeekableByteChannel implements SeekableByteChannel { + private byte[] byteArray; + private int position = 0; + + public ByteArrayReadableSeekableByteChannel(byte[] byteArray) { + if (byteArray == null) { + throw new NullPointerException(); + } + this.byteArray = byteArray; + } + + @Override + public boolean isOpen() { + return byteArray != null; + } + + @Override + public void close() throws IOException { + byteArray = null; + } + + @Override + public int read(final ByteBuffer dst) throws IOException { + int remainingInBuf = byteArray.length - this.position; + int length = Math.min(dst.remaining(), remainingInBuf); + dst.put(this.byteArray, this.position, length); + this.position += length; + return length; + } + + @Override + public long position() throws IOException { + return this.position; + } + + @Override + public SeekableByteChannel position(final long newPosition) throws IOException { + this.position = (int)newPosition; + return this; + } + + @Override + public long size() throws IOException { + return this.byteArray.length; + } + + @Override + public int write(final ByteBuffer src) throws IOException { + throw new UnsupportedOperationException("Read only"); + } + + @Override + public SeekableByteChannel truncate(final long size) throws IOException { + throw new UnsupportedOperationException("Read only"); + } + +} diff --git a/java/vector/src/test/java/org/apache/arrow/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/file/TestArrowFile.java new file mode 100644 index 00000000000..76bbb30e484 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/file/TestArrowFile.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.file; + +import static java.util.Arrays.asList; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.schema.ArrowFieldNode; +import org.apache.arrow.schema.ArrowRecordBatch; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ValueVector.Accessor; +import org.apache.arrow.vector.VectorLoader; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; +import org.apache.arrow.vector.complex.impl.SingleMapReaderImpl; +import org.apache.arrow.vector.complex.reader.BaseReader.MapReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; +import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; +import org.apache.arrow.vector.complex.writer.BigIntWriter; +import org.apache.arrow.vector.complex.writer.IntWriter; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.Assert; +import org.junit.Test; + +import io.netty.buffer.ArrowBuf; + +public class TestArrowFile { + static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + + @Test + public void test() throws IOException { + File file = new File("target/mytest.arrow"); + int count = 10000; + + { + MapVector parent = new MapVector("parent", allocator, null); + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter rootWriter = writer.rootAsMap(); + IntWriter intWriter = rootWriter.integer("int"); + BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); + for (int i = 0; i < count; i++) { + intWriter.setPosition(i); + intWriter.writeInt(i); + bigIntWriter.setPosition(i); + bigIntWriter.writeBigInt(i); + } + writer.setValueCount(count); + + write(parent, file); + parent.close(); + } + + { + try ( + FileOutputStream fileOutputStream = new FileOutputStream(file); + ArrowReader arrowReader = new ArrowReader(fileOutputStream.getChannel(), allocator) + ) { + ArrowFooter footer = arrowReader.readFooter(); + org.apache.arrow.vector.types.pojo.Schema schema = footer.getSchema(); + List recordBatches = footer.getRecordBatches(); + for (ArrowBlock rbBlock : recordBatches) { + ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock); + Iterator nodes = recordBatch.getNodes().iterator(); + Iterator buffers = recordBatch.getBuffers().iterator(); + MapVector parent = new MapVector("parent", allocator, null); + List fields = schema.getFields(); + for (Field field : fields) { + VectorLoader.addChild(parent, field, nodes, buffers); + } + + MapReader rootReader = new SingleMapReaderImpl(parent).reader("root"); + for (int i = 0; i < count; i++) { + rootReader.setPosition(i); + Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue()); + Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); + } + + parent.close(); + } + } + + } + } + + private void write(MapVector parent, File file) throws FileNotFoundException, IOException { + Field rootField = parent.getField(); + Schema schema = new Schema(rootField.getChildren()); + try ( + FileOutputStream fileOutputStream = new FileOutputStream(file); + ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema) + ) { + List nodes = new ArrayList<>(); + for (ValueVector vector : parent) { + appendNodes(vector, nodes); + } + List buffers = new ArrayList<>(asList(parent.getBuffers(false))); + arrowWriter.writeRecordBatch(new ArrowRecordBatch(parent.getAccessor().getValueCount(), nodes, buffers)); + } + } + + private void appendNodes(ValueVector vector, List nodes) { + Accessor accessor = vector.getAccessor(); + int nullCount = 0; + // TODO: should not have to do that + // we can do that a lot more efficiently (for example with Long.bitCount(i)) + for (int i = 0; i < accessor.getValueCount(); i++) { + if (accessor.isNull(i)) { + nullCount ++; + } + } + nodes.add(new ArrowFieldNode(accessor.getValueCount(), nullCount)); + for (ValueVector child : vector) { + appendNodes(child, nodes); + } + } + + + +} diff --git a/java/vector/src/test/java/org/apache/arrow/file/TestArrowFooter.java b/java/vector/src/test/java/org/apache/arrow/file/TestArrowFooter.java new file mode 100644 index 00000000000..3516ff0f3d5 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/file/TestArrowFooter.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.file; + +import static java.util.Arrays.asList; +import static org.junit.Assert.assertEquals; + +import java.nio.ByteBuffer; +import java.util.Collections; + +import org.apache.arrow.flatbuf.Footer; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.Test; + +import com.google.flatbuffers.FlatBufferBuilder; + +public class TestArrowFooter { + + @Test + public void test() { + Schema schema = new Schema(asList( + new Field("a", true, new ArrowType.Int(8, true), Collections.emptyList()) + )); + ArrowFooter footer = new ArrowFooter(schema, Collections.emptyList(), Collections.emptyList()); + ArrowFooter newFooter = roundTrip(footer); + assertEquals(footer, newFooter); + } + + + private ArrowFooter roundTrip(ArrowFooter footer) { + FlatBufferBuilder builder = new FlatBufferBuilder(); + int i = footer.writeTo(builder); + builder.finish(i); + ByteBuffer dataBuffer = builder.dataBuffer(); + System.out.println(dataBuffer); + ArrowFooter newFooter = new ArrowFooter(Footer.getRootAsFooter(dataBuffer)); + System.out.println(dataBuffer); + return newFooter; + } + +} diff --git a/java/vector/src/test/java/org/apache/arrow/file/TestArrowReaderWriter.java b/java/vector/src/test/java/org/apache/arrow/file/TestArrowReaderWriter.java new file mode 100644 index 00000000000..a8237566842 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/file/TestArrowReaderWriter.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.file; + +import static java.util.Arrays.asList; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.channels.Channels; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.schema.ArrowFieldNode; +import org.apache.arrow.schema.ArrowRecordBatch; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.Before; +import org.junit.Test; + +import io.netty.buffer.ArrowBuf; + +public class TestArrowReaderWriter { + + private BufferAllocator allocator; + + @Before + public void init() { + allocator = new RootAllocator(Long.MAX_VALUE); + } + + ArrowBuf buf(byte[] bytes) { + ArrowBuf buffer = allocator.buffer(bytes.length); + buffer.writeBytes(bytes); + return buffer; + } + + byte[] array(ArrowBuf buf) { + byte[] bytes = new byte[buf.readableBytes()]; + buf.readBytes(bytes); + return bytes; + } + + @Test + public void test() throws IOException { + Schema schema = new Schema(asList(new Field("testField", true, new ArrowType.Int(8, true), Collections.emptyList()))); + byte[] validity = new byte[] { (byte)255, 0}; + // second half is "undefined" + byte[] values = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (ArrowWriter writer = new ArrowWriter(Channels.newChannel(out), schema)) { + ArrowBuf validityb = buf(validity); + ArrowBuf valuesb = buf(values); + writer.writeRecordBatch(new ArrowRecordBatch(16, asList(new ArrowFieldNode(16, 8)), asList(validityb, valuesb))); + } + + byte[] byteArray = out.toByteArray(); + + try (ArrowReader reader = new ArrowReader(new ByteArrayReadableSeekableByteChannel(byteArray), allocator)) { + ArrowFooter footer = reader.readFooter(); + Schema readSchema = footer.getSchema(); + assertEquals(schema, readSchema); + // TODO: dictionaries + List recordBatches = footer.getRecordBatches(); + assertEquals(1, recordBatches.size()); + ArrowRecordBatch recordBatch = reader.readRecordBatch(recordBatches.get(0)); + List nodes = recordBatch.getNodes(); + assertEquals(1, nodes.size()); + ArrowFieldNode node = nodes.get(0); + assertEquals(16, node.getLength()); + assertEquals(8, node.getNullCount()); + List buffers = recordBatch.getBuffers(); + assertEquals(2, buffers.size()); + assertArrayEquals(validity, array(buffers.get(0))); + assertArrayEquals(values, array(buffers.get(1))); + + } + } + +} From 0cc97189c6581d35aedf7ac73836e4938a3a5ef8 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Mon, 22 Aug 2016 13:34:49 -0700 Subject: [PATCH 04/21] add vector type --- cpp/src/arrow/ipc/metadata-internal.cc | 1 + format/Message.fbs | 34 +++++++++---------- .../apache/arrow/schema/ArrowVectorType.java | 11 ++++++ .../apache/arrow/vector/types/pojo/Field.java | 23 +++++++++++-- 4 files changed, 49 insertions(+), 20 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/schema/ArrowVectorType.java diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 50db730d208..c921e4d8e01 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -219,6 +219,7 @@ static Status FieldToFlatbuffer( RETURN_NOT_OK(TypeToFlatbuffer(fbb, field->type, &children, &type_enum, &type_data)); auto fb_children = fbb.CreateVector(children); + // TODO: produce the list of VectorTypes *offset = flatbuf::CreateField( fbb, fb_name, field->nullable, type_enum, type_data, field->dictionary, fb_children); diff --git a/format/Message.fbs b/format/Message.fbs index fd40751e46a..da47d0d267c 100644 --- a/format/Message.fbs +++ b/format/Message.fbs @@ -91,6 +91,21 @@ union Type { JSONScalar } +/// ---------------------------------------------------------------------- +/// Data structures for describing a table row batch (a collection of +/// equal-length Arrow arrays) + +enum VectorType: short { + /// used in List type Dense Union and variable length primitive types (String, Binary) + OFFSET, + /// fixed length primitive values + VALUES, + /// Bit vector indicated if each value is null + VALIDITY, + /// Type vector used in Union type + TYPE +} + /// ---------------------------------------------------------------------- /// A field represents a named column in a record / row batch or child of a /// nested type. @@ -107,12 +122,12 @@ table Field { // present only if the field is dictionary encoded // will point to a dictionary provided by a DictionaryBatch message dictionary: long; + // children apply only to Nested data types like Struct, List and Union + children: [Field]; /// the buffers produced for this type (as derived from the Type) /// does not include children /// each recordbatch will return instances of those Buffers. buffers: [ VectorType ]; - // children apply only to Nested data types like Struct, List and Union - children: [Field]; } /// ---------------------------------------------------------------------- @@ -133,21 +148,6 @@ table Schema { fields: [Field]; } -/// ---------------------------------------------------------------------- -/// Data structures for describing a table row batch (a collection of -/// equal-length Arrow arrays) - -enum VectorType: short { - /// used in List type Dense Union and variable length primitive types (String, Binary) - OFFSET, - /// fixed length primitive values - VALUES, - /// Bit vector indicated if each value is null - VALIDITY, - /// Type vector used in Union type - TYPE -} - /// A Buffer represents a single contiguous memory segment struct Buffer { /// The shared memory page id where this buffer is located. Currently this is diff --git a/java/vector/src/main/java/org/apache/arrow/schema/ArrowVectorType.java b/java/vector/src/main/java/org/apache/arrow/schema/ArrowVectorType.java new file mode 100644 index 00000000000..4216620fd58 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/schema/ArrowVectorType.java @@ -0,0 +1,11 @@ +package org.apache.arrow.schema; + +public class ArrowVectorType { + + private short vectorType; + + public ArrowVectorType(short vectorType) { + this.vectorType = vectorType; + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index 49d0503e470..26045006ec4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -18,19 +18,23 @@ package org.apache.arrow.vector.types.pojo; -import com.google.common.collect.ImmutableList; -import com.google.flatbuffers.FlatBufferBuilder; +import static org.apache.arrow.vector.types.pojo.ArrowType.getTypeForField; +import java.util.ArrayList; import java.util.List; import java.util.Objects; -import static org.apache.arrow.vector.types.pojo.ArrowType.getTypeForField; +import org.apache.arrow.schema.ArrowVectorType; + +import com.google.common.collect.ImmutableList; +import com.google.flatbuffers.FlatBufferBuilder; public class Field { private final String name; private final boolean nullable; private final ArrowType type; private final List children; + private final List buffers; public Field(String name, boolean nullable, ArrowType type, List children) { this.name = name; @@ -41,12 +45,21 @@ public Field(String name, boolean nullable, ArrowType type, List children } else { this.children = children; } + this.buffers = getBuffersForType(type); + } + + protected static List getBuffersForType(ArrowType type) { + type.accept(visitor) } public static Field convertField(org.apache.arrow.flatbuf.Field field) { String name = field.name(); boolean nullable = field.nullable(); ArrowType type = getTypeForField(field); + List buffers = new ArrayList<>(); + for (int i = 0; i < field.buffersLength(); ++i) { + buffers.add(new ArrowVectorType(field.buffers(i))); + } ImmutableList.Builder childrenBuilder = ImmutableList.builder(); for (int i = 0; i < field.childrenLength(); i++) { childrenBuilder.add(convertField(field.children(i))); @@ -88,6 +101,10 @@ public List getChildren() { return children; } + public List getBuffers() { + return buffers; + } + @Override public boolean equals(Object obj) { if (!(obj instanceof Field)) { From e43f26b6314ef6ba8c615e3cb5bda7c1270d57ee Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Mon, 15 Aug 2016 08:43:47 -0700 Subject: [PATCH 05/21] add layout spec --- java/vector/pom.xml | 5 + .../layout/ByteAlignedVectorLayout.java | 16 ++ .../vector/layout/PrimitiveTypeLayout.java | 21 ++ .../arrow/vector/layout/TypeLayout.java | 197 ++++++++++++++++++ .../arrow/vector/layout/VectorLayout.java | 44 ++++ 5 files changed, 283 insertions(+) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/layout/ByteAlignedVectorLayout.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/layout/PrimitiveTypeLayout.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java diff --git a/java/vector/pom.xml b/java/vector/pom.xml index 1a2921f6ea5..3aeb5e157cd 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -32,6 +32,11 @@ arrow-memory ${project.version} + + org.apache.arrow + arrow-format + ${project.version} + joda-time joda-time diff --git a/java/vector/src/main/java/org/apache/arrow/vector/layout/ByteAlignedVectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/layout/ByteAlignedVectorLayout.java new file mode 100644 index 00000000000..109f957264d --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/layout/ByteAlignedVectorLayout.java @@ -0,0 +1,16 @@ +package org.apache.arrow.vector.layout; + +public class ByteAlignedVectorLayout extends VectorLayout { + + private final int typeByteWidth; + + public ByteAlignedVectorLayout(int typeByteWidth) { + super(typeByteWidth * 8); + this.typeByteWidth = typeByteWidth; + } + + public int getTypeByteWidth() { + return typeByteWidth; + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/layout/PrimitiveTypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/layout/PrimitiveTypeLayout.java new file mode 100644 index 00000000000..46c42e359ad --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/layout/PrimitiveTypeLayout.java @@ -0,0 +1,21 @@ +package org.apache.arrow.vector.layout; + +import static java.util.Arrays.asList; + +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.vector.types.pojo.ArrowType; + + +public class PrimitiveTypeLayout extends TypeLayout { + + public PrimitiveTypeLayout(ArrowType type, List vectors) { + super(type, vectors, Collections.emptyList()); + } + + public PrimitiveTypeLayout(ArrowType type, VectorLayout... vectors) { + this(type, asList(vectors)); + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java new file mode 100644 index 00000000000..accfc7ceae6 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java @@ -0,0 +1,197 @@ +package org.apache.arrow.vector.layout; + +import static java.util.Arrays.asList; +import static org.apache.arrow.flatbuf.Precision.DOUBLE; +import static org.apache.arrow.flatbuf.Precision.SINGLE; +import static org.apache.arrow.vector.layout.VectorLayout.newBooleanVectorLayout; +import static org.apache.arrow.vector.layout.VectorLayout.newByteVectorLayout; +import static org.apache.arrow.vector.layout.VectorLayout.newIntVectorLayout; +import static org.apache.arrow.vector.layout.VectorLayout.newOffsetVectorLayout; +import static org.apache.arrow.vector.layout.VectorLayout.newValidityVectorLayout; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.flatbuf.UnionMode; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor; +import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.ArrowType.Bool; +import org.apache.arrow.vector.types.pojo.ArrowType.Date; +import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; +import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; +import org.apache.arrow.vector.types.pojo.ArrowType.Int; +import org.apache.arrow.vector.types.pojo.ArrowType.IntervalDay; +import org.apache.arrow.vector.types.pojo.ArrowType.IntervalYear; +import org.apache.arrow.vector.types.pojo.ArrowType.Null; +import org.apache.arrow.vector.types.pojo.ArrowType.Time; +import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; +import org.apache.arrow.vector.types.pojo.ArrowType.Tuple; +import org.apache.arrow.vector.types.pojo.ArrowType.Union; +import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; +import org.apache.arrow.vector.types.pojo.Field; + +/** + * The layout of vectors for a given type + * It defines its own vectors followed by the vectors for the children + * if it is a nested type (Tuple, List, Union) + */ +public class TypeLayout { + + public static TypeLayout newTypeLayout(Field field) { + final org.apache.arrow.vector.types.pojo.ArrowType arrowType = field.getType(); + final List children = field.getChildren(); + TypeLayout layout = arrowType.accept(new ArrowTypeVisitor() { + + @Override public TypeLayout visit(Int type) { + return new FixedWidthTypeLayout( + arrowType, + newIntVectorLayout(type.getBitWidth())); + } + + @Override public TypeLayout visit(Union type) { + List childLayouts = childrenLayout(children); + List vectors; + switch (type.getMode()) { + case UnionMode.Dense: + vectors = asList( + // TODO: validate this + newValidityVectorLayout(), + newIntVectorLayout(8), // type vector + newOffsetVectorLayout() // offset to find the vector + ); + break; + case UnionMode.Sparse: + vectors = asList( + newValidityVectorLayout(), + newIntVectorLayout(8) // type vector + ); + break; + default: + throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode()); + } + return new TypeLayout(arrowType, vectors, childLayouts); + } + + @Override public TypeLayout visit(Tuple type) { + List childLayouts = childrenLayout(children); + List vectors = asList( + newValidityVectorLayout() + ); + return new TypeLayout(arrowType, vectors, childLayouts); + } + + @Override public TypeLayout visit(Timestamp type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + if (children.size() != 1) { + throw new IllegalArgumentException("Lists should have exactly one child. Found " + children); + } + List childLayouts = childrenLayout(children); + List vectors = asList( + newValidityVectorLayout() + ); + return new TypeLayout(arrowType, vectors, childLayouts); + } + + @Override public TypeLayout visit(FloatingPoint type) { + int bitWidth; + switch (type.getPrecision()) { + case SINGLE: + bitWidth = 32; + break; + case DOUBLE: + bitWidth = 64; + break; + default: + throw new UnsupportedOperationException("Unsupported Precision: " + type.getPrecision()); + } + return new FixedWidthTypeLayout( + arrowType, + newIntVectorLayout(bitWidth)); + } + + @Override public TypeLayout visit(Decimal type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override public TypeLayout visit(Bool type) { + return new FixedWidthTypeLayout( + arrowType, + newBooleanVectorLayout()); + } + + @Override public TypeLayout visit(Binary type) { + return new VariableWidthTypeLayout( + arrowType, + newByteVectorLayout()); + } + + @Override public TypeLayout visit(Utf8 type) { + return new VariableWidthTypeLayout( + arrowType, + newByteVectorLayout()); + } + + @Override + public TypeLayout visit(Null type) { + return new TypeLayout(type, Collections.emptyList(), Collections.emptyList()); + } + + @Override + public TypeLayout visit(Date type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override + public TypeLayout visit(Time type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override + public TypeLayout visit(IntervalDay type) { + throw new UnsupportedOperationException("NYI"); + } + + @Override + public TypeLayout visit(IntervalYear type) { + throw new UnsupportedOperationException("NYI"); + } + + private List childrenLayout(final List children) { + List childLayouts = new ArrayList(); + for (Field child : children) { + childLayouts.add(newTypeLayout(child)); + } + return childLayouts; + } + }); + return layout; + } + + private final ArrowType type; + private final List vectors; + private final List children; + + public TypeLayout(ArrowType type, List vectors, List children) { + super(); + this.type = type; + this.vectors = vectors; + this.children = children; + } + + public ArrowType getType() { + return type; + } + + public List getVectors() { + return vectors; + } + + public List getChildren() { + return children; + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java new file mode 100644 index 00000000000..143fe9e2dd5 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java @@ -0,0 +1,44 @@ +package org.apache.arrow.vector.layout; + +public class VectorLayout { + + public static ByteAlignedVectorLayout newOffsetVectorLayout() { + return newIntVectorLayout(32); + } + + public static ByteAlignedVectorLayout newIntVectorLayout(int typeBitWidth) { + switch (typeBitWidth) { + case 8: + case 16: + case 32: + case 64: + return new ByteAlignedVectorLayout(typeBitWidth / 8); + default: + throw new IllegalArgumentException("only 8, 16, 32, or 64 bits supported"); + } + } + + public static VectorLayout newBooleanVectorLayout() { + return new VectorLayout(1); + } + + public static VectorLayout newValidityVectorLayout() { + return newBooleanVectorLayout(); + } + + public static ByteAlignedVectorLayout newByteVectorLayout() { + return newIntVectorLayout(8); + } + + private final int typeBitWidth; + + public VectorLayout(int typeBitWidth) { + super(); + this.typeBitWidth = typeBitWidth; + } + + public int getTypeBitWidth() { + return typeBitWidth; + } + +} From b907aa5422a17b24db7161a04acc6eb24a14c1dc Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Mon, 22 Aug 2016 14:20:15 -0700 Subject: [PATCH 06/21] simplify --- .../arrow/vector/BaseDataValueVector.java | 6 +++-- .../layout/ByteAlignedVectorLayout.java | 16 ------------- .../vector/layout/PrimitiveTypeLayout.java | 21 ---------------- .../arrow/vector/layout/TypeLayout.java | 24 +++++++++++++------ .../arrow/vector/layout/VectorLayout.java | 8 +++---- 5 files changed, 25 insertions(+), 50 deletions(-) delete mode 100644 java/vector/src/main/java/org/apache/arrow/vector/layout/ByteAlignedVectorLayout.java delete mode 100644 java/vector/src/main/java/org/apache/arrow/vector/layout/PrimitiveTypeLayout.java diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java index 05b7cf10067..983f56a6b9e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java @@ -17,15 +17,17 @@ */ package org.apache.arrow.vector; -import io.netty.buffer.ArrowBuf; - import org.apache.arrow.memory.BufferAllocator; +import io.netty.buffer.ArrowBuf; + public abstract class BaseDataValueVector extends BaseValueVector { protected final static byte[] emptyByteArray = new byte[]{}; // Nullable vectors use this + // TODO: Nullable vectors extend BaseDataValueVector but do not use the data field + // We should fix the inheritance tree protected ArrowBuf data; public BaseDataValueVector(String name, BufferAllocator allocator) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/layout/ByteAlignedVectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/layout/ByteAlignedVectorLayout.java deleted file mode 100644 index 109f957264d..00000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/layout/ByteAlignedVectorLayout.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.apache.arrow.vector.layout; - -public class ByteAlignedVectorLayout extends VectorLayout { - - private final int typeByteWidth; - - public ByteAlignedVectorLayout(int typeByteWidth) { - super(typeByteWidth * 8); - this.typeByteWidth = typeByteWidth; - } - - public int getTypeByteWidth() { - return typeByteWidth; - } - -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/layout/PrimitiveTypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/layout/PrimitiveTypeLayout.java deleted file mode 100644 index 46c42e359ad..00000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/layout/PrimitiveTypeLayout.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.apache.arrow.vector.layout; - -import static java.util.Arrays.asList; - -import java.util.Collections; -import java.util.List; - -import org.apache.arrow.vector.types.pojo.ArrowType; - - -public class PrimitiveTypeLayout extends TypeLayout { - - public PrimitiveTypeLayout(ArrowType type, List vectors) { - super(type, vectors, Collections.emptyList()); - } - - public PrimitiveTypeLayout(ArrowType type, VectorLayout... vectors) { - this(type, asList(vectors)); - } - -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java index accfc7ceae6..ebace91a8c1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java @@ -45,7 +45,7 @@ public static TypeLayout newTypeLayout(Field field) { TypeLayout layout = arrowType.accept(new ArrowTypeVisitor() { @Override public TypeLayout visit(Int type) { - return new FixedWidthTypeLayout( + return newFixedWidthTypeLayout( arrowType, newIntVectorLayout(type.getBitWidth())); } @@ -109,7 +109,7 @@ public static TypeLayout newTypeLayout(Field field) { default: throw new UnsupportedOperationException("Unsupported Precision: " + type.getPrecision()); } - return new FixedWidthTypeLayout( + return newFixedWidthTypeLayout( arrowType, newIntVectorLayout(bitWidth)); } @@ -119,19 +119,29 @@ public static TypeLayout newTypeLayout(Field field) { } @Override public TypeLayout visit(Bool type) { - return new FixedWidthTypeLayout( + return newFixedWidthTypeLayout( arrowType, newBooleanVectorLayout()); } @Override public TypeLayout visit(Binary type) { - return new VariableWidthTypeLayout( - arrowType, - newByteVectorLayout()); + return newVariableWidthTypeLayout(arrowType, newByteVectorLayout()); + } + + private TypeLayout newVariableWidthTypeLayout(ArrowType arrowType, VectorLayout values) { + return newPrimitiveTypeLayout(arrowType, newValidityVectorLayout(), newOffsetVectorLayout(), values); + } + + private TypeLayout newPrimitiveTypeLayout(ArrowType type, VectorLayout... vectors) { + return new TypeLayout(type, asList(vectors), Collections.emptyList()); + } + + public TypeLayout newFixedWidthTypeLayout(ArrowType type, VectorLayout dataVector) { + return newPrimitiveTypeLayout(type, newValidityVectorLayout(), dataVector); } @Override public TypeLayout visit(Utf8 type) { - return new VariableWidthTypeLayout( + return newVariableWidthTypeLayout( arrowType, newByteVectorLayout()); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java index 143fe9e2dd5..ccdcb92d16d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java @@ -2,17 +2,17 @@ public class VectorLayout { - public static ByteAlignedVectorLayout newOffsetVectorLayout() { + public static VectorLayout newOffsetVectorLayout() { return newIntVectorLayout(32); } - public static ByteAlignedVectorLayout newIntVectorLayout(int typeBitWidth) { + public static VectorLayout newIntVectorLayout(int typeBitWidth) { switch (typeBitWidth) { case 8: case 16: case 32: case 64: - return new ByteAlignedVectorLayout(typeBitWidth / 8); + return new VectorLayout(typeBitWidth); default: throw new IllegalArgumentException("only 8, 16, 32, or 64 bits supported"); } @@ -26,7 +26,7 @@ public static VectorLayout newValidityVectorLayout() { return newBooleanVectorLayout(); } - public static ByteAlignedVectorLayout newByteVectorLayout() { + public static VectorLayout newByteVectorLayout() { return newIntVectorLayout(8); } From aacf61e45363d46a8c19baf46fed0d540f8fae45 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Mon, 22 Aug 2016 14:23:42 -0700 Subject: [PATCH 07/21] fix pom --- java/vector/pom.xml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/java/vector/pom.xml b/java/vector/pom.xml index 3aeb5e157cd..1a2921f6ea5 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -32,11 +32,6 @@ arrow-memory ${project.version} - - org.apache.arrow - arrow-format - ${project.version} - joda-time joda-time From 2067e01273a0d28c22c023d78d69a36591fe4cac Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Tue, 23 Aug 2016 10:12:39 -0700 Subject: [PATCH 08/21] update format --- .../src/main/codegen/data/ArrowTypes.tdd | 4 +- .../src/main/codegen/templates/ArrowType.java | 8 ++ .../templates/NullableValueVectors.java | 32 +++++- .../apache/arrow/schema/ArrowVectorType.java | 11 -- .../org/apache/arrow/vector/FieldVector.java | 28 +++++ .../org/apache/arrow/vector/ValueVector.java | 9 +- .../org/apache/arrow/vector/VectorLoader.java | 2 +- .../vector/complex/ComplexVectorLoader.java | 2 +- .../arrow/vector/complex/MapVector.java | 23 ++++ .../arrow/{ => vector}/file/ArrowBlock.java | 4 +- .../arrow/{ => vector}/file/ArrowFooter.java | 4 +- .../arrow/{ => vector}/file/ArrowReader.java | 9 +- .../arrow/{ => vector}/file/ArrowWriter.java | 6 +- .../file/InvalidArrowFileException.java | 2 +- .../arrow/vector/layout/VectorLayout.java | 44 ------- .../{ => vector}/schema/ArrowBuffer.java | 2 +- .../{ => vector}/schema/ArrowFieldNode.java | 7 +- .../{ => vector}/schema/ArrowRecordBatch.java | 4 +- .../arrow/vector/schema/ArrowVectorType.java | 30 +++++ .../{ => vector}/schema/FBSerializable.java | 2 +- .../{ => vector}/schema/FBSerializables.java | 2 +- .../vector/{layout => schema}/TypeLayout.java | 108 +++++++----------- .../arrow/vector/schema/VectorLayout.java | 76 ++++++++++++ .../org/apache/arrow/vector/types/Types.java | 4 +- .../apache/arrow/vector/types/pojo/Field.java | 37 ++++-- .../arrow/vector/types/pojo/Schema.java | 13 ++- .../ByteArrayReadableSeekableByteChannel.java | 2 +- .../{ => vector}/file/TestArrowFile.java | 107 ++++++++++++++--- .../{ => vector}/file/TestArrowFooter.java | 4 +- .../file/TestArrowReaderWriter.java | 12 +- .../apache/arrow/vector/pojo/TestConvert.java | 14 +-- 31 files changed, 417 insertions(+), 195 deletions(-) delete mode 100644 java/vector/src/main/java/org/apache/arrow/schema/ArrowVectorType.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java rename java/vector/src/main/java/org/apache/arrow/{ => vector}/file/ArrowBlock.java (96%) rename java/vector/src/main/java/org/apache/arrow/{ => vector}/file/ArrowFooter.java (98%) rename java/vector/src/main/java/org/apache/arrow/{ => vector}/file/ArrowReader.java (94%) rename java/vector/src/main/java/org/apache/arrow/{ => vector}/file/ArrowWriter.java (96%) rename java/vector/src/main/java/org/apache/arrow/{ => vector}/file/InvalidArrowFileException.java (96%) delete mode 100644 java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java rename java/vector/src/main/java/org/apache/arrow/{ => vector}/schema/ArrowBuffer.java (98%) rename java/vector/src/main/java/org/apache/arrow/{ => vector}/schema/ArrowFieldNode.java (89%) rename java/vector/src/main/java/org/apache/arrow/{ => vector}/schema/ArrowRecordBatch.java (95%) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java rename java/vector/src/main/java/org/apache/arrow/{ => vector}/schema/FBSerializable.java (95%) rename java/vector/src/main/java/org/apache/arrow/{ => vector}/schema/FBSerializables.java (97%) rename java/vector/src/main/java/org/apache/arrow/vector/{layout => schema}/TypeLayout.java (54%) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java rename java/vector/src/test/java/org/apache/arrow/{ => vector}/file/ByteArrayReadableSeekableByteChannel.java (98%) rename java/vector/src/test/java/org/apache/arrow/{ => vector}/file/TestArrowFile.java (53%) rename java/vector/src/test/java/org/apache/arrow/{ => vector}/file/TestArrowFooter.java (93%) rename java/vector/src/test/java/org/apache/arrow/{ => vector}/file/TestArrowReaderWriter.java (86%) diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 7a3eaa013c8..2ecad3d3140 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -30,7 +30,7 @@ }, { name: "Union", - fields: [{name: "mode", type: int}] + fields: [{name: "mode", type: short}] }, { name: "Int", @@ -38,7 +38,7 @@ }, { name: "FloatingPoint", - fields: [{name: precision, type: int}] + fields: [{name: precision, type: short}] }, { name: "Utf8", diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java index 7ff319b3667..cbd64cb2484 100644 --- a/java/vector/src/main/codegen/templates/ArrowType.java +++ b/java/vector/src/main/codegen/templates/ArrowType.java @@ -87,6 +87,14 @@ public int getType(FlatBufferBuilder builder) { return ${field.name}; } + + public String toString() { + return "${name}{" + <#list fields as field> + + ", " + ${field.name} + + + "}"; + } @Override public int hashCode() { diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index df508979c48..54ecf2524c5 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -29,6 +29,9 @@ package org.apache.arrow.vector; +import org.apache.arrow.vector.schema.ArrowFieldNode; +import java.util.Collections; + <#include "/@includes/vv_imports.ftl" /> /** @@ -39,7 +42,7 @@ * NB: this class is automatically generated from ${.template_name} and ValueVectorTypes.tdd using FreeMarker. */ @SuppressWarnings("unused") -public final class ${className} extends BaseDataValueVector implements <#if type.major == "VarLen">VariableWidth<#else>FixedWidthVector, NullableVector{ +public final class ${className} extends BaseDataValueVector implements <#if type.major == "VarLen">VariableWidth<#else>FixedWidthVector, NullableVector, FieldVector{ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(${className}.class); private final FieldReader reader = new ${minor.class}ReaderImpl(Nullable${minor.class}Vector.this); @@ -88,9 +91,9 @@ public final class ${className} extends BaseDataValueVector implements <#if type <#elseif minor.class == "Time"> field = new Field(name, true, new org.apache.arrow.vector.types.pojo.ArrowType.Time(), null); <#elseif minor.class == "Float4"> - field = new Field(name, true, new FloatingPoint(0), null); + field = new Field(name, true, new FloatingPoint(org.apache.arrow.flatbuf.Precision.SINGLE), null); <#elseif minor.class == "Float8"> - field = new Field(name, true, new FloatingPoint(1), null); + field = new Field(name, true, new FloatingPoint(org.apache.arrow.flatbuf.Precision.DOUBLE), null); <#elseif minor.class == "TimeStamp"> field = new Field(name, true, new org.apache.arrow.vector.types.pojo.ArrowType.Timestamp(""), null); <#elseif minor.class == "IntervalDay"> @@ -107,6 +110,29 @@ public final class ${className} extends BaseDataValueVector implements <#if type } + /** + * Initializes the child vectors + * to be later loaded with loadBuffers + * @param children + */ + public void initializeChildrenFromFields(List children) { + throw new UnsupportedOperationException(); + } + + public List getChildrenFromFields() { + return Collections.emptyList(); + } + + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + throw new UnsupportedOperationException(); + } + + public List getFieldBuffers() { + bits.getBuffer().readerIndex(0); + values.getBuffer().readerIndex(0); + return Arrays.asList(bits.getBuffer(), values.getBuffer()); + } + @Override public Field getField() { return field; diff --git a/java/vector/src/main/java/org/apache/arrow/schema/ArrowVectorType.java b/java/vector/src/main/java/org/apache/arrow/schema/ArrowVectorType.java deleted file mode 100644 index 4216620fd58..00000000000 --- a/java/vector/src/main/java/org/apache/arrow/schema/ArrowVectorType.java +++ /dev/null @@ -1,11 +0,0 @@ -package org.apache.arrow.schema; - -public class ArrowVectorType { - - private short vectorType; - - public ArrowVectorType(short vectorType) { - this.vectorType = vectorType; - } - -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java new file mode 100644 index 00000000000..f6b4217c1a4 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java @@ -0,0 +1,28 @@ +package org.apache.arrow.vector; + +import java.util.List; + +import org.apache.arrow.vector.schema.ArrowFieldNode; +import org.apache.arrow.vector.types.pojo.Field; + +import io.netty.buffer.ArrowBuf; + +public interface FieldVector extends ValueVector { + + /** + * Initializes the child vectors + * to be later loaded with loadBuffers + * @param children + */ + void initializeChildrenFromFields(List children); + + List getChildrenFromFields(); + + void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers); + + /** + * Returns the own buffers for this vector + * @return the + */ + List getFieldBuffers(); +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index 35321c947db..4a1eeb6f774 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -19,14 +19,14 @@ import java.io.Closeable; -import io.netty.buffer.ArrowBuf; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.util.TransferPair; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.util.TransferPair; + +import io.netty.buffer.ArrowBuf; /** * An abstraction that is used to store a sequence of values in an individual column. @@ -137,6 +137,8 @@ public interface ValueVector extends Closeable, Iterable { */ // SerializedField getMetadata(); +// TypeLayout getTypeLayout(); + /** * Returns the number of bytes that is used by this vector instance. */ @@ -221,4 +223,5 @@ interface Mutator { @Deprecated void generateTestData(int values); } + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index b1f0e3ca08b..fa8ff368428 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -21,11 +21,11 @@ import java.util.List; import org.apache.arrow.flatbuf.Precision; -import org.apache.arrow.schema.ArrowFieldNode; import org.apache.arrow.vector.complex.ComplexVectorLoader; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.NestedVector; +import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java index 06f13746711..28a08abb3bd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java @@ -19,8 +19,8 @@ import java.util.Iterator; -import org.apache.arrow.schema.ArrowFieldNode; import org.apache.arrow.vector.VectorLoader; +import org.apache.arrow.vector.schema.ArrowFieldNode; import io.netty.buffer.ArrowBuf; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index db10ac66b80..f9bacdceacd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -27,10 +27,12 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.impl.SingleMapReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.holders.ComplexHolder; +import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.Tuple; import org.apache.arrow.vector.types.pojo.Field; @@ -323,4 +325,25 @@ public void close() { super.close(); } + + private List fieldChildren; + + public void initializeChildren(List children) { + if (fieldChildren != null) { + throw new IllegalArgumentException(children.toString()); //TODO + } + for (Field field : children) { + MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); + FieldVector vector = (FieldVector)this.add(field.getName(), minorType); + fieldChildren.add(vector); + // TODO: clean this up + vector.initializeChildrenFromFields(field.getChildren()); + } + } + + public List getFieldVectors() { + // TODO: clean this up + return (List)(List)getChildren(); + } + } diff --git a/java/vector/src/main/java/org/apache/arrow/file/ArrowBlock.java b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowBlock.java similarity index 96% rename from java/vector/src/main/java/org/apache/arrow/file/ArrowBlock.java rename to java/vector/src/main/java/org/apache/arrow/vector/file/ArrowBlock.java index dfe392796fa..90fb02b0597 100644 --- a/java/vector/src/main/java/org/apache/arrow/file/ArrowBlock.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowBlock.java @@ -15,10 +15,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.file; +package org.apache.arrow.vector.file; import org.apache.arrow.flatbuf.Block; -import org.apache.arrow.schema.FBSerializable; +import org.apache.arrow.vector.schema.FBSerializable; import com.google.flatbuffers.FlatBufferBuilder; diff --git a/java/vector/src/main/java/org/apache/arrow/file/ArrowFooter.java b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowFooter.java similarity index 98% rename from java/vector/src/main/java/org/apache/arrow/file/ArrowFooter.java rename to java/vector/src/main/java/org/apache/arrow/vector/file/ArrowFooter.java index 8b30445ec90..01e175b31b8 100644 --- a/java/vector/src/main/java/org/apache/arrow/file/ArrowFooter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowFooter.java @@ -15,14 +15,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.file; +package org.apache.arrow.vector.file; import java.util.ArrayList; import java.util.List; import org.apache.arrow.flatbuf.Block; import org.apache.arrow.flatbuf.Footer; -import org.apache.arrow.schema.FBSerializable; +import org.apache.arrow.vector.schema.FBSerializable; import org.apache.arrow.vector.types.pojo.Schema; import com.google.flatbuffers.FlatBufferBuilder; diff --git a/java/vector/src/main/java/org/apache/arrow/file/ArrowReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java similarity index 94% rename from java/vector/src/main/java/org/apache/arrow/file/ArrowReader.java rename to java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java index e16265d844f..74f89e65417 100644 --- a/java/vector/src/main/java/org/apache/arrow/file/ArrowReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.file; +package org.apache.arrow.vector.file; import java.io.IOException; import java.nio.ByteBuffer; @@ -29,8 +29,8 @@ import org.apache.arrow.flatbuf.Footer; import org.apache.arrow.flatbuf.RecordBatch; import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.schema.ArrowFieldNode; -import org.apache.arrow.schema.ArrowRecordBatch; +import org.apache.arrow.vector.schema.ArrowFieldNode; +import org.apache.arrow.vector.schema.ArrowRecordBatch; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -82,6 +82,9 @@ private static int bytesToInt(byte[] bytes) { public ArrowFooter readFooter() throws IOException { if (footer == null) { + if (in.size() <= (MAGIC.length * 2 + 4)) { + throw new InvalidArrowFileException("file too small: " + in.size()); + } ByteBuffer buffer = ByteBuffer.allocate(4 + MAGIC.length); long footerLengthOffset = in.size() - buffer.remaining(); in.position(footerLengthOffset); diff --git a/java/vector/src/main/java/org/apache/arrow/file/ArrowWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java similarity index 96% rename from java/vector/src/main/java/org/apache/arrow/file/ArrowWriter.java rename to java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java index 13992cc74dd..a82e4cb1d0e 100644 --- a/java/vector/src/main/java/org/apache/arrow/file/ArrowWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.file; +package org.apache.arrow.vector.file; import java.io.IOException; import java.nio.ByteBuffer; @@ -24,8 +24,8 @@ import java.util.Collections; import java.util.List; -import org.apache.arrow.schema.ArrowRecordBatch; -import org.apache.arrow.schema.FBSerializable; +import org.apache.arrow.vector.schema.ArrowRecordBatch; +import org.apache.arrow.vector.schema.FBSerializable; import org.apache.arrow.vector.types.pojo.Schema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/vector/src/main/java/org/apache/arrow/file/InvalidArrowFileException.java b/java/vector/src/main/java/org/apache/arrow/vector/file/InvalidArrowFileException.java similarity index 96% rename from java/vector/src/main/java/org/apache/arrow/file/InvalidArrowFileException.java rename to java/vector/src/main/java/org/apache/arrow/vector/file/InvalidArrowFileException.java index 943714c430e..3ec75dcb12a 100644 --- a/java/vector/src/main/java/org/apache/arrow/file/InvalidArrowFileException.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/InvalidArrowFileException.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.file; +package org.apache.arrow.vector.file; public class InvalidArrowFileException extends RuntimeException { private static final long serialVersionUID = 1L; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java deleted file mode 100644 index ccdcb92d16d..00000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/layout/VectorLayout.java +++ /dev/null @@ -1,44 +0,0 @@ -package org.apache.arrow.vector.layout; - -public class VectorLayout { - - public static VectorLayout newOffsetVectorLayout() { - return newIntVectorLayout(32); - } - - public static VectorLayout newIntVectorLayout(int typeBitWidth) { - switch (typeBitWidth) { - case 8: - case 16: - case 32: - case 64: - return new VectorLayout(typeBitWidth); - default: - throw new IllegalArgumentException("only 8, 16, 32, or 64 bits supported"); - } - } - - public static VectorLayout newBooleanVectorLayout() { - return new VectorLayout(1); - } - - public static VectorLayout newValidityVectorLayout() { - return newBooleanVectorLayout(); - } - - public static VectorLayout newByteVectorLayout() { - return newIntVectorLayout(8); - } - - private final int typeBitWidth; - - public VectorLayout(int typeBitWidth) { - super(); - this.typeBitWidth = typeBitWidth; - } - - public int getTypeBitWidth() { - return typeBitWidth; - } - -} diff --git a/java/vector/src/main/java/org/apache/arrow/schema/ArrowBuffer.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowBuffer.java similarity index 98% rename from java/vector/src/main/java/org/apache/arrow/schema/ArrowBuffer.java rename to java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowBuffer.java index 6ebf0954d6b..3aa3e52582b 100644 --- a/java/vector/src/main/java/org/apache/arrow/schema/ArrowBuffer.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowBuffer.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.schema; +package org.apache.arrow.vector.schema; import org.apache.arrow.flatbuf.Buffer; diff --git a/java/vector/src/main/java/org/apache/arrow/schema/ArrowFieldNode.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowFieldNode.java similarity index 89% rename from java/vector/src/main/java/org/apache/arrow/schema/ArrowFieldNode.java rename to java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowFieldNode.java index 1e64d47ca0a..71dd0abc6bc 100644 --- a/java/vector/src/main/java/org/apache/arrow/schema/ArrowFieldNode.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowFieldNode.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.schema; +package org.apache.arrow.vector.schema; import org.apache.arrow.flatbuf.FieldNode; @@ -45,4 +45,9 @@ public int getLength() { return length; } + @Override + public String toString() { + return "ArrowFieldNode [length=" + length + ", nullCount=" + nullCount + "]"; + } + } diff --git a/java/vector/src/main/java/org/apache/arrow/schema/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java similarity index 95% rename from java/vector/src/main/java/org/apache/arrow/schema/ArrowRecordBatch.java rename to java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java index 0bdaa3f0844..dc91f53e560 100644 --- a/java/vector/src/main/java/org/apache/arrow/schema/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java @@ -15,9 +15,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.schema; +package org.apache.arrow.vector.schema; -import static org.apache.arrow.schema.FBSerializables.writeAllStructsToVector; +import static org.apache.arrow.vector.schema.FBSerializables.writeAllStructsToVector; import java.util.ArrayList; import java.util.List; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java new file mode 100644 index 00000000000..070b7748abf --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java @@ -0,0 +1,30 @@ +package org.apache.arrow.vector.schema; + +import org.apache.arrow.flatbuf.VectorType; + +public class ArrowVectorType { + + public static final ArrowVectorType VALUES = new ArrowVectorType(VectorType.VALUES); + public static final ArrowVectorType OFFSET = new ArrowVectorType(VectorType.OFFSET); + public static final ArrowVectorType VALIDITY = new ArrowVectorType(VectorType.VALIDITY); + public static final ArrowVectorType TYPE = new ArrowVectorType(VectorType.TYPE); + + private final short type; + + public ArrowVectorType(short type) { + this.type = type; + } + + public short getType() { + return type; + } + + @Override + public String toString() { + try { + return VectorType.name(type); + } catch (ArrayIndexOutOfBoundsException e) { + return "Unlnown type " + type; + } + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/schema/FBSerializable.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/FBSerializable.java similarity index 95% rename from java/vector/src/main/java/org/apache/arrow/schema/FBSerializable.java rename to java/vector/src/main/java/org/apache/arrow/vector/schema/FBSerializable.java index 3f31483a4df..d23ed91948e 100644 --- a/java/vector/src/main/java/org/apache/arrow/schema/FBSerializable.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/FBSerializable.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.schema; +package org.apache.arrow.vector.schema; import com.google.flatbuffers.FlatBufferBuilder; diff --git a/java/vector/src/main/java/org/apache/arrow/schema/FBSerializables.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/FBSerializables.java similarity index 97% rename from java/vector/src/main/java/org/apache/arrow/schema/FBSerializables.java rename to java/vector/src/main/java/org/apache/arrow/vector/schema/FBSerializables.java index 19bfeba07e0..31c17ad6df0 100644 --- a/java/vector/src/main/java/org/apache/arrow/schema/FBSerializables.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/FBSerializables.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.schema; +package org.apache.arrow.vector.schema; import java.util.ArrayList; import java.util.Collections; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java similarity index 54% rename from java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java rename to java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java index ebace91a8c1..e92ff599c33 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/layout/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java @@ -1,13 +1,14 @@ -package org.apache.arrow.vector.layout; +package org.apache.arrow.vector.schema; import static java.util.Arrays.asList; import static org.apache.arrow.flatbuf.Precision.DOUBLE; import static org.apache.arrow.flatbuf.Precision.SINGLE; -import static org.apache.arrow.vector.layout.VectorLayout.newBooleanVectorLayout; -import static org.apache.arrow.vector.layout.VectorLayout.newByteVectorLayout; -import static org.apache.arrow.vector.layout.VectorLayout.newIntVectorLayout; -import static org.apache.arrow.vector.layout.VectorLayout.newOffsetVectorLayout; -import static org.apache.arrow.vector.layout.VectorLayout.newValidityVectorLayout; +import static org.apache.arrow.vector.schema.VectorLayout.booleanVector; +import static org.apache.arrow.vector.schema.VectorLayout.byteVector; +import static org.apache.arrow.vector.schema.VectorLayout.dataVector; +import static org.apache.arrow.vector.schema.VectorLayout.offsetVector; +import static org.apache.arrow.vector.schema.VectorLayout.typeVector; +import static org.apache.arrow.vector.schema.VectorLayout.validityVector; import java.util.ArrayList; import java.util.Collections; @@ -30,7 +31,6 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Tuple; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import org.apache.arrow.vector.types.pojo.Field; /** * The layout of vectors for a given type @@ -39,47 +39,41 @@ */ public class TypeLayout { - public static TypeLayout newTypeLayout(Field field) { - final org.apache.arrow.vector.types.pojo.ArrowType arrowType = field.getType(); - final List children = field.getChildren(); + public static TypeLayout getTypeLayout(final ArrowType arrowType) { TypeLayout layout = arrowType.accept(new ArrowTypeVisitor() { @Override public TypeLayout visit(Int type) { - return newFixedWidthTypeLayout( - arrowType, - newIntVectorLayout(type.getBitWidth())); + return newFixedWidthTypeLayout(dataVector(type.getBitWidth())); } @Override public TypeLayout visit(Union type) { - List childLayouts = childrenLayout(children); List vectors; switch (type.getMode()) { case UnionMode.Dense: vectors = asList( // TODO: validate this - newValidityVectorLayout(), - newIntVectorLayout(8), // type vector - newOffsetVectorLayout() // offset to find the vector + validityVector(), + typeVector(), + offsetVector() // offset to find the vector ); break; case UnionMode.Sparse: vectors = asList( - newValidityVectorLayout(), - newIntVectorLayout(8) // type vector + validityVector(), + typeVector() ); break; default: throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode()); } - return new TypeLayout(arrowType, vectors, childLayouts); + return new TypeLayout(vectors); } @Override public TypeLayout visit(Tuple type) { - List childLayouts = childrenLayout(children); List vectors = asList( - newValidityVectorLayout() + validityVector() ); - return new TypeLayout(arrowType, vectors, childLayouts); + return new TypeLayout(vectors); } @Override public TypeLayout visit(Timestamp type) { @@ -87,14 +81,10 @@ public static TypeLayout newTypeLayout(Field field) { } @Override public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { - if (children.size() != 1) { - throw new IllegalArgumentException("Lists should have exactly one child. Found " + children); - } - List childLayouts = childrenLayout(children); List vectors = asList( - newValidityVectorLayout() + validityVector() ); - return new TypeLayout(arrowType, vectors, childLayouts); + return new TypeLayout(vectors); } @Override public TypeLayout visit(FloatingPoint type) { @@ -109,9 +99,7 @@ public static TypeLayout newTypeLayout(Field field) { default: throw new UnsupportedOperationException("Unsupported Precision: " + type.getPrecision()); } - return newFixedWidthTypeLayout( - arrowType, - newIntVectorLayout(bitWidth)); + return newFixedWidthTypeLayout(dataVector(bitWidth)); } @Override public TypeLayout visit(Decimal type) { @@ -119,36 +107,32 @@ public static TypeLayout newTypeLayout(Field field) { } @Override public TypeLayout visit(Bool type) { - return newFixedWidthTypeLayout( - arrowType, - newBooleanVectorLayout()); + return newFixedWidthTypeLayout(booleanVector()); } @Override public TypeLayout visit(Binary type) { - return newVariableWidthTypeLayout(arrowType, newByteVectorLayout()); + return newVariableWidthTypeLayout(); } - private TypeLayout newVariableWidthTypeLayout(ArrowType arrowType, VectorLayout values) { - return newPrimitiveTypeLayout(arrowType, newValidityVectorLayout(), newOffsetVectorLayout(), values); + @Override public TypeLayout visit(Utf8 type) { + return newVariableWidthTypeLayout(); } - private TypeLayout newPrimitiveTypeLayout(ArrowType type, VectorLayout... vectors) { - return new TypeLayout(type, asList(vectors), Collections.emptyList()); + private TypeLayout newVariableWidthTypeLayout() { + return newPrimitiveTypeLayout(validityVector(), offsetVector(), byteVector()); } - public TypeLayout newFixedWidthTypeLayout(ArrowType type, VectorLayout dataVector) { - return newPrimitiveTypeLayout(type, newValidityVectorLayout(), dataVector); + private TypeLayout newPrimitiveTypeLayout(VectorLayout... vectors) { + return new TypeLayout(asList(vectors)); } - @Override public TypeLayout visit(Utf8 type) { - return newVariableWidthTypeLayout( - arrowType, - newByteVectorLayout()); + public TypeLayout newFixedWidthTypeLayout(VectorLayout dataVector) { + return newPrimitiveTypeLayout(validityVector(), dataVector); } @Override public TypeLayout visit(Null type) { - return new TypeLayout(type, Collections.emptyList(), Collections.emptyList()); + return new TypeLayout(Collections.emptyList()); } @Override @@ -170,38 +154,30 @@ public TypeLayout visit(IntervalDay type) { public TypeLayout visit(IntervalYear type) { throw new UnsupportedOperationException("NYI"); } - - private List childrenLayout(final List children) { - List childLayouts = new ArrayList(); - for (Field child : children) { - childLayouts.add(newTypeLayout(child)); - } - return childLayouts; - } }); return layout; } - private final ArrowType type; private final List vectors; - private final List children; - public TypeLayout(ArrowType type, List vectors, List children) { + public TypeLayout(List vectors) { super(); - this.type = type; this.vectors = vectors; - this.children = children; - } - - public ArrowType getType() { - return type; } public List getVectors() { return vectors; } - public List getChildren() { - return children; + public List getVectorTypes() { + List types = new ArrayList<>(vectors.size()); + for (VectorLayout vector : vectors) { + types.add(vector.getType()); + } + return types; + } + + public String toString() { + return "TypeLayout{" + vectors + "}"; } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java new file mode 100644 index 00000000000..f2f18af626b --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java @@ -0,0 +1,76 @@ +package org.apache.arrow.vector.schema; + +import static org.apache.arrow.vector.schema.ArrowVectorType.OFFSET; +import static org.apache.arrow.vector.schema.ArrowVectorType.TYPE; +import static org.apache.arrow.vector.schema.ArrowVectorType.VALIDITY; +import static org.apache.arrow.vector.schema.ArrowVectorType.VALUES; + +public class VectorLayout { + + private static final VectorLayout VALIDITY_VECTOR = new VectorLayout(VALIDITY, 1); + private static final VectorLayout OFFSET_VECTOR = new VectorLayout(OFFSET, 32); + private static final VectorLayout TYPE_VECTOR = new VectorLayout(TYPE, 32); + private static final VectorLayout BOOLEAN_VECTOR = new VectorLayout(VALUES, 1); + private static final VectorLayout VALUES_64 = new VectorLayout(VALUES, 64); + private static final VectorLayout VALUES_32 = new VectorLayout(VALUES, 32); + private static final VectorLayout VALUES_16 = new VectorLayout(VALUES, 16); + private static final VectorLayout VALUES_8 = new VectorLayout(VALUES, 8); + + public static VectorLayout typeVector() { + return TYPE_VECTOR; + } + + public static VectorLayout offsetVector() { + return OFFSET_VECTOR; + } + + public static VectorLayout dataVector(int typeBitWidth) { + switch (typeBitWidth) { + case 8: + return VALUES_8; + case 16: + return VALUES_16; + case 32: + return VALUES_32; + case 64: + return VALUES_64; + default: + throw new IllegalArgumentException("only 8, 16, 32, or 64 bits supported"); + } + } + + public static VectorLayout booleanVector() { + return BOOLEAN_VECTOR; + } + + public static VectorLayout validityVector() { + return VALIDITY_VECTOR; + } + + public static VectorLayout byteVector() { + return dataVector(8); + } + + private final int typeBitWidth; + + private final ArrowVectorType type; + + private VectorLayout(ArrowVectorType type, int typeBitWidth) { + super(); + this.type = type; + this.typeBitWidth = typeBitWidth; + } + + public int getTypeBitWidth() { + return typeBitWidth; + } + + public ArrowVectorType getType() { + return type; + } + + @Override + public String toString() { + return String.format("{width=%s,type=%s}", typeBitWidth, type); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index db74afd6adf..4b979ff1ca5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -292,7 +292,7 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new IntervalYearWriterImpl((NullableIntervalYearVector) vector); } }, - FLOAT4(new FloatingPoint(0)) { + FLOAT4(new FloatingPoint(Precision.SINGLE)) { @Override public Field getField() { return FLOAT4_FIELD; @@ -308,7 +308,7 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new Float4WriterImpl((NullableFloat4Vector) vector); } }, // 4 byte ieee 754 - FLOAT8(new FloatingPoint(1)) { + FLOAT8(new FloatingPoint(Precision.DOUBLE)) { @Override public Field getField() { return FLOAT8_FIELD; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index 26045006ec4..36712b9bea3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -24,7 +24,8 @@ import java.util.List; import java.util.Objects; -import org.apache.arrow.schema.ArrowVectorType; +import org.apache.arrow.vector.schema.ArrowVectorType; +import org.apache.arrow.vector.schema.TypeLayout; import com.google.common.collect.ImmutableList; import com.google.flatbuffers.FlatBufferBuilder; @@ -34,7 +35,7 @@ public class Field { private final boolean nullable; private final ArrowType type; private final List children; - private final List buffers; + private final TypeLayout typeLayout; public Field(String name, boolean nullable, ArrowType type, List children) { this.name = name; @@ -45,11 +46,7 @@ public Field(String name, boolean nullable, ArrowType type, List children } else { this.children = children; } - this.buffers = getBuffersForType(type); - } - - protected static List getBuffersForType(ArrowType type) { - type.accept(visitor) + this.typeLayout = TypeLayout.getTypeLayout(type); } public static Field convertField(org.apache.arrow.flatbuf.Field field) { @@ -65,7 +62,16 @@ public static Field convertField(org.apache.arrow.flatbuf.Field field) { childrenBuilder.add(convertField(field.children(i))); } List children = childrenBuilder.build(); - return new Field(name, nullable, type, children); + Field result = new Field(name, nullable, type, children); + TypeLayout typeLayout = result.getTypeLayout(); + if (typeLayout.getVectors().size() != field.buffersLength()) { + List types = new ArrayList<>(); + for (int i = 0; i < field.buffersLength(); i++) { + types.add(new ArrowVectorType(field.buffers(i))); + } + throw new IllegalArgumentException("Deserialized field does not match expected vectors. expected: " + typeLayout.getVectorTypes() + " got " + types); + } + return result; } public int getField(FlatBufferBuilder builder) { @@ -76,12 +82,18 @@ public int getField(FlatBufferBuilder builder) { childrenData[i] = children.get(i).getField(builder); } int childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, childrenData); + short[] buffersData = new short[typeLayout.getVectors().size()]; + for (int i = 0; i < buffersData.length; i++) { + buffersData[i] = typeLayout.getVectors().get(i).getType().getType(); + } + int buffersOffset = org.apache.arrow.flatbuf.Field.createBuffersVector(builder, buffersData ); org.apache.arrow.flatbuf.Field.startField(builder); org.apache.arrow.flatbuf.Field.addName(builder, nameOffset); org.apache.arrow.flatbuf.Field.addNullable(builder, nullable); org.apache.arrow.flatbuf.Field.addTypeType(builder, type.getTypeType()); org.apache.arrow.flatbuf.Field.addType(builder, typeOffset); org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset); + org.apache.arrow.flatbuf.Field.addBuffers(builder, buffersOffset); return org.apache.arrow.flatbuf.Field.endField(builder); } @@ -101,8 +113,8 @@ public List getChildren() { return children; } - public List getBuffers() { - return buffers; + public TypeLayout getTypeLayout() { + return typeLayout; } @Override @@ -119,4 +131,9 @@ public boolean equals(Object obj) { (this.children.size() == 0 && that.children == null)); } + + @Override + public String toString() { + return String.format("Field{name=%s, type=%s, children=%s, layout=%s}", name, type, children, typeLayout); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java index 9e2894170b2..231be9bd55c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java @@ -18,15 +18,13 @@ package org.apache.arrow.vector.types.pojo; -import com.google.common.collect.ImmutableList; -import com.google.flatbuffers.FlatBufferBuilder; +import static org.apache.arrow.vector.types.pojo.Field.convertField; -import java.nio.ByteBuffer; import java.util.List; import java.util.Objects; -import static org.apache.arrow.vector.types.pojo.ArrowType.getTypeForField; -import static org.apache.arrow.vector.types.pojo.Field.convertField; +import com.google.common.collect.ImmutableList; +import com.google.flatbuffers.FlatBufferBuilder; public class Schema { private List fields; @@ -71,4 +69,9 @@ public static Schema convertSchema(org.apache.arrow.flatbuf.Schema schema) { List fields = childrenBuilder.build(); return new Schema(fields); } + + @Override + public String toString() { + return "Schema" + fields; + } } diff --git a/java/vector/src/test/java/org/apache/arrow/file/ByteArrayReadableSeekableByteChannel.java b/java/vector/src/test/java/org/apache/arrow/vector/file/ByteArrayReadableSeekableByteChannel.java similarity index 98% rename from java/vector/src/test/java/org/apache/arrow/file/ByteArrayReadableSeekableByteChannel.java rename to java/vector/src/test/java/org/apache/arrow/vector/file/ByteArrayReadableSeekableByteChannel.java index c6185f30986..7c423d5881a 100644 --- a/java/vector/src/test/java/org/apache/arrow/file/ByteArrayReadableSeekableByteChannel.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/ByteArrayReadableSeekableByteChannel.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.file; +package org.apache.arrow.vector.file; import java.io.IOException; import java.nio.ByteBuffer; diff --git a/java/vector/src/test/java/org/apache/arrow/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java similarity index 53% rename from java/vector/src/test/java/org/apache/arrow/file/TestArrowFile.java rename to java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java index 76bbb30e484..33eb12af415 100644 --- a/java/vector/src/test/java/org/apache/arrow/file/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java @@ -15,9 +15,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.file; +package org.apache.arrow.vector.file; -import static java.util.Arrays.asList; +import static com.google.common.base.Preconditions.checkArgument; import java.io.File; import java.io.FileNotFoundException; @@ -29,11 +29,8 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.schema.ArrowFieldNode; -import org.apache.arrow.schema.ArrowRecordBatch; -import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector.Accessor; -import org.apache.arrow.vector.VectorLoader; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; import org.apache.arrow.vector.complex.impl.SingleMapReaderImpl; @@ -42,6 +39,9 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BigIntWriter; import org.apache.arrow.vector.complex.writer.IntWriter; +import org.apache.arrow.vector.schema.ArrowFieldNode; +import org.apache.arrow.vector.schema.ArrowRecordBatch; +import org.apache.arrow.vector.schema.VectorLayout; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Assert; @@ -71,7 +71,7 @@ public void test() throws IOException { } writer.setValueCount(count); - write(parent, file); + write((MapVector)parent.getChild("root"), file); parent.close(); } @@ -82,17 +82,48 @@ public void test() throws IOException { ) { ArrowFooter footer = arrowReader.readFooter(); org.apache.arrow.vector.types.pojo.Schema schema = footer.getSchema(); + + // initialize vectors + MapVector parent = new MapVector("parent", allocator, null); + + List fields = schema.getFields(); + parent.initializeChildren(fields); + List fieldVectors = parent.getFieldVectors(); + if (fieldVectors.size() != fields.size()) { + throw new IllegalArgumentException(); //TODO + } + +// validateLayout(fields, parent); List recordBatches = footer.getRecordBatches(); for (ArrowBlock rbBlock : recordBatches) { ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock); - Iterator nodes = recordBatch.getNodes().iterator(); + Iterator buffers = recordBatch.getBuffers().iterator(); - MapVector parent = new MapVector("parent", allocator, null); - List fields = schema.getFields(); - for (Field field : fields) { - VectorLoader.addChild(parent, field, nodes, buffers); + Iterator nodes = recordBatch.getNodes().iterator(); + + for (int i = 0; i < fields.size(); ++i) { + Field field = fields.get(i); + FieldVector fieldVector = fieldVectors.get(i); + loadBuffers(fieldVector, field, buffers, nodes); } +// public void load(List fields, int length, Iterator nodes, Iterator buffers) { +// +// for (Field field : fields) { +// MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); +// ValueVector vector = this.add(field.getName(), minorType); +// +// +// vector.loadBuffers(typeLayout, ownBuffers); +// List children = field.getChildren(); +// for (Field child : children) { +// addChild((NestedVector)vector, child, nodes, buffers); +// vector.loadChild() +// } +// } +// } +// parent.load(fields, recordBatch.getLength(), nodes, buffers); + MapReader rootReader = new SingleMapReaderImpl(parent).reader("root"); for (int i = 0; i < count; i++) { rootReader.setPosition(i); @@ -107,6 +138,44 @@ public void test() throws IOException { } } + private void loadBuffers(FieldVector vector, Field field, Iterator buffers, Iterator nodes) { + ArrowFieldNode fieldNode = nodes.next(); + List typeLayout = field.getTypeLayout().getVectors(); + List ownBuffers = new ArrayList<>(typeLayout.size()); + for (int j = 0; j < typeLayout.size(); j++) { + ownBuffers.add(buffers.next()); + } + vector.loadFieldBuffers(fieldNode, ownBuffers); + List children = field.getChildren(); + if (children.size() > 0) { + List childrenFromFields = vector.getChildrenFromFields(); + int i = 0; + checkArgument(children.size() == childrenFromFields.size(), "should have as many children as in the schema: found " + childrenFromFields.size() + " expected " + children.size()); + for (Field child : children) { + FieldVector fieldVector = childrenFromFields.get(i); + loadBuffers(fieldVector, child, buffers, nodes); + ++i; + } + } + } + +// private void validateLayout(List fields, Iterable childVectors) { +// int i = 0; +// for (ValueVector valueVector : childVectors) { +// Field field = fields.get(i); +// TypeLayout typeLayout = field.getTypeLayout(); +// TypeLayout expectedTypeLayout = valueVector.getTypeLayout(); +// if (!expectedTypeLayout.equals(typeLayout)) { +// throw new InvalidArrowFileException("The type layout does not match the expected layout: expected " + expectedTypeLayout + " found " + typeLayout); +// } +// if (field.getChildren().size() > 0) { +// validateLayout(field.getChildren(), valueVector); +// } +// ++i; +// } +// Preconditions.checkArgument(i == fields.size(), "should have as many children as in the schema: found " + i + " expected " + fields.size()); +// } + private void write(MapVector parent, File file) throws FileNotFoundException, IOException { Field rootField = parent.getField(); Schema schema = new Schema(rootField.getChildren()); @@ -115,15 +184,15 @@ private void write(MapVector parent, File file) throws FileNotFoundException, IO ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema) ) { List nodes = new ArrayList<>(); - for (ValueVector vector : parent) { - appendNodes(vector, nodes); + List buffers = new ArrayList<>(); + for (FieldVector vector : parent.getFieldVectors()) { + appendNodes(vector, nodes, buffers); } - List buffers = new ArrayList<>(asList(parent.getBuffers(false))); arrowWriter.writeRecordBatch(new ArrowRecordBatch(parent.getAccessor().getValueCount(), nodes, buffers)); } } - private void appendNodes(ValueVector vector, List nodes) { + private void appendNodes(FieldVector vector, List nodes, List buffers) { Accessor accessor = vector.getAccessor(); int nullCount = 0; // TODO: should not have to do that @@ -134,8 +203,10 @@ private void appendNodes(ValueVector vector, List nodes) { } } nodes.add(new ArrowFieldNode(accessor.getValueCount(), nullCount)); - for (ValueVector child : vector) { - appendNodes(child, nodes); + // TODO: validate buffer count + buffers.addAll(vector.getFieldBuffers()); + for (FieldVector child : vector.getChildrenFromFields()) { + appendNodes(child, nodes, buffers); } } diff --git a/java/vector/src/test/java/org/apache/arrow/file/TestArrowFooter.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFooter.java similarity index 93% rename from java/vector/src/test/java/org/apache/arrow/file/TestArrowFooter.java rename to java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFooter.java index 3516ff0f3d5..addabf78a3b 100644 --- a/java/vector/src/test/java/org/apache/arrow/file/TestArrowFooter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFooter.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.file; +package org.apache.arrow.vector.file; import static java.util.Arrays.asList; import static org.junit.Assert.assertEquals; @@ -24,6 +24,8 @@ import java.util.Collections; import org.apache.arrow.flatbuf.Footer; +import org.apache.arrow.vector.file.ArrowBlock; +import org.apache.arrow.vector.file.ArrowFooter; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; diff --git a/java/vector/src/test/java/org/apache/arrow/file/TestArrowReaderWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java similarity index 86% rename from java/vector/src/test/java/org/apache/arrow/file/TestArrowReaderWriter.java rename to java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java index a8237566842..f90329aca11 100644 --- a/java/vector/src/test/java/org/apache/arrow/file/TestArrowReaderWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java @@ -15,11 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.file; +package org.apache.arrow.vector.file; import static java.util.Arrays.asList; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -29,8 +30,12 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.schema.ArrowFieldNode; -import org.apache.arrow.schema.ArrowRecordBatch; +import org.apache.arrow.vector.file.ArrowBlock; +import org.apache.arrow.vector.file.ArrowFooter; +import org.apache.arrow.vector.file.ArrowReader; +import org.apache.arrow.vector.file.ArrowWriter; +import org.apache.arrow.vector.schema.ArrowFieldNode; +import org.apache.arrow.vector.schema.ArrowRecordBatch; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; @@ -80,6 +85,7 @@ public void test() throws IOException { ArrowFooter footer = reader.readFooter(); Schema readSchema = footer.getSchema(); assertEquals(schema, readSchema); + assertTrue(readSchema.getFields().get(0).getTypeLayout().getVectorTypes().toString(), readSchema.getFields().get(0).getTypeLayout().getVectors().size() > 0); // TODO: dictionaries List recordBatches = footer.getRecordBatches(); assertEquals(1, recordBatches.size()); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java index 06a1149c0d6..bbd5b6a8955 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java @@ -17,8 +17,9 @@ */ package org.apache.arrow.vector.pojo; -import com.google.common.collect.ImmutableList; -import com.google.flatbuffers.FlatBufferBuilder; +import static org.apache.arrow.flatbuf.Precision.SINGLE; +import static org.junit.Assert.assertEquals; + import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.ArrowType.Tuple; @@ -27,9 +28,8 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Test; -import java.util.List; - -import static org.junit.Assert.assertEquals; +import com.google.common.collect.ImmutableList; +import com.google.flatbuffers.FlatBufferBuilder; /** * Test conversion between Flatbuf and Pojo field representations @@ -46,7 +46,7 @@ public void simple() { public void complex() { ImmutableList.Builder childrenBuilder = ImmutableList.builder(); childrenBuilder.add(new Field("child1", true, Utf8.INSTANCE, null)); - childrenBuilder.add(new Field("child2", true, new FloatingPoint(0), ImmutableList.of())); + childrenBuilder.add(new Field("child2", true, new FloatingPoint(SINGLE), ImmutableList.of())); Field initialField = new Field("a", true, Tuple.INSTANCE, childrenBuilder.build()); run(initialField); @@ -56,7 +56,7 @@ public void complex() { public void schema() { ImmutableList.Builder childrenBuilder = ImmutableList.builder(); childrenBuilder.add(new Field("child1", true, Utf8.INSTANCE, null)); - childrenBuilder.add(new Field("child2", true, new FloatingPoint(0), ImmutableList.of())); + childrenBuilder.add(new Field("child2", true, new FloatingPoint(SINGLE), ImmutableList.of())); Schema initialSchema = new Schema(childrenBuilder.build()); run(initialSchema); From aa1b766a86ea2a38dc72de866c09901e9a472c5c Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Tue, 23 Aug 2016 11:59:46 -0700 Subject: [PATCH 09/21] better test --- .../templates/NullableValueVectors.java | 11 +++++++++-- .../vector/complex/AbstractMapVector.java | 2 +- .../apache/arrow/vector/complex/MapVector.java | 2 +- .../apache/arrow/vector/schema/TypeLayout.java | 17 +++++++++-------- .../arrow/vector/file/TestArrowFile.java | 18 +++++++++++++----- 5 files changed, 33 insertions(+), 17 deletions(-) diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index 54ecf2524c5..a5afcfa97c5 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -116,7 +116,9 @@ public final class ${className} extends BaseDataValueVector implements <#if type * @param children */ public void initializeChildrenFromFields(List children) { - throw new UnsupportedOperationException(); + if (!children.isEmpty()) { + throw new IllegalArgumentException("primitive type vector ${className} can not have children: " + children); + } } public List getChildrenFromFields() { @@ -124,7 +126,12 @@ public List getChildrenFromFields() { } public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - throw new UnsupportedOperationException(); + if (ownBuffers.size() != 2) { + throw new IllegalArgumentException("Illegal buffer count, expected 2, got: " + ownBuffers.size()); + } + bits.data = ownBuffers.get(0); + values.data = ownBuffers.get(1); + // TODO: do something with the sizes in fieldNode? } public List getFieldBuffers() { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java index f4714ea2b26..71bd7cac9b2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java @@ -163,7 +163,7 @@ public T getChild(String name, Class clazz) { @Override public ValueVector add(String name, MinorType minorType, int... precisionScale) { final ValueVector existing = getChild(name); - if (existing == null) { + if (existing != null) { throw new IllegalStateException(String.format("Vector already exists: Existing[%s], Requested[%s] ", existing.getClass().getSimpleName(), minorType)); } ValueVector vector = minorType.getNewVector(name, allocator, callBack, precisionScale); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index f9bacdceacd..a9b6e306476 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -332,11 +332,11 @@ public void initializeChildren(List children) { if (fieldChildren != null) { throw new IllegalArgumentException(children.toString()); //TODO } + fieldChildren = new ArrayList<>(); for (Field field : children) { MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); FieldVector vector = (FieldVector)this.add(field.getName(), minorType); fieldChildren.add(vector); - // TODO: clean this up vector.initializeChildrenFromFields(field.getChildren()); } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java index e92ff599c33..be2ce0a0630 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java @@ -77,7 +77,7 @@ public static TypeLayout getTypeLayout(final ArrowType arrowType) { } @Override public TypeLayout visit(Timestamp type) { - throw new UnsupportedOperationException("NYI"); + return newFixedWidthTypeLayout(dataVector(64)); } @Override public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { @@ -103,7 +103,8 @@ public static TypeLayout getTypeLayout(final ArrowType arrowType) { } @Override public TypeLayout visit(Decimal type) { - throw new UnsupportedOperationException("NYI"); + // TODO: check size + return newFixedWidthTypeLayout(dataVector(64)); // actually depends on the type fields } @Override public TypeLayout visit(Bool type) { @@ -137,22 +138,22 @@ public TypeLayout visit(Null type) { @Override public TypeLayout visit(Date type) { - throw new UnsupportedOperationException("NYI"); + return newFixedWidthTypeLayout(dataVector(64)); } @Override public TypeLayout visit(Time type) { - throw new UnsupportedOperationException("NYI"); + return newFixedWidthTypeLayout(dataVector(64)); } @Override - public TypeLayout visit(IntervalDay type) { - throw new UnsupportedOperationException("NYI"); + public TypeLayout visit(IntervalDay type) { // TODO: check size + return newFixedWidthTypeLayout(dataVector(64)); } @Override - public TypeLayout visit(IntervalYear type) { - throw new UnsupportedOperationException("NYI"); + public TypeLayout visit(IntervalYear type) { // TODO: check size + return newFixedWidthTypeLayout(dataVector(64)); } }); return layout; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java index 33eb12af415..7ddf59436c9 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java @@ -20,6 +20,7 @@ import static com.google.common.base.Preconditions.checkArgument; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; @@ -75,20 +76,25 @@ public void test() throws IOException { parent.close(); } + System.out.println(file.length()); { try ( - FileOutputStream fileOutputStream = new FileOutputStream(file); - ArrowReader arrowReader = new ArrowReader(fileOutputStream.getChannel(), allocator) + FileInputStream fileInputStream = new FileInputStream(file); + ArrowReader arrowReader = new ArrowReader(fileInputStream.getChannel(), allocator) ) { ArrowFooter footer = arrowReader.readFooter(); org.apache.arrow.vector.types.pojo.Schema schema = footer.getSchema(); + System.out.println("reading schema: " + schema); // initialize vectors MapVector parent = new MapVector("parent", allocator, null); + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter rootWriter = writer.rootAsMap(); + MapVector root = (MapVector)parent.getChild("root"); List fields = schema.getFields(); - parent.initializeChildren(fields); - List fieldVectors = parent.getFieldVectors(); + root.initializeChildren(fields); + List fieldVectors = root.getFieldVectors(); if (fieldVectors.size() != fields.size()) { throw new IllegalArgumentException(); //TODO } @@ -100,7 +106,8 @@ public void test() throws IOException { Iterator buffers = recordBatch.getBuffers().iterator(); Iterator nodes = recordBatch.getNodes().iterator(); - + System.out.println(recordBatch.getNodes().size() + " nodes"); + System.out.println(recordBatch.getBuffers().size() + " buffers"); for (int i = 0; i < fields.size(); ++i) { Field field = fields.get(i); FieldVector fieldVector = fieldVectors.get(i); @@ -179,6 +186,7 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf private void write(MapVector parent, File file) throws FileNotFoundException, IOException { Field rootField = parent.getField(); Schema schema = new Schema(rootField.getChildren()); + System.out.println("writing schema: " + schema); try ( FileOutputStream fileOutputStream = new FileOutputStream(file); ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema) From 81863c5acfd1ef79ef48bb8faa0247db71396ec6 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Tue, 23 Aug 2016 12:43:58 -0700 Subject: [PATCH 10/21] fixed loader --- .../vector/src/main/codegen/templates/NullableValueVectors.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index a5afcfa97c5..e713a18e2fa 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -130,7 +130,9 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers throw new IllegalArgumentException("Illegal buffer count, expected 2, got: " + ownBuffers.size()); } bits.data = ownBuffers.get(0); + bits.data.retain(allocator); values.data = ownBuffers.get(1); + values.data.retain(allocator); // TODO: do something with the sizes in fieldNode? } From d6a1788a6962062fabe4b21ebd179a79f8f20787 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Tue, 23 Aug 2016 13:01:38 -0700 Subject: [PATCH 11/21] refactoring --- .../org/apache/arrow/vector/VectorLoader.java | 253 +++--------------- .../apache/arrow/vector/VectorUnloader.java | 55 ++++ .../vector/complex/ComplexVectorLoader.java | 40 --- .../arrow/vector/file/TestArrowFile.java | 104 +------ 4 files changed, 107 insertions(+), 345 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java delete mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index fa8ff368428..2e9c8ddb7b2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -1,229 +1,62 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package org.apache.arrow.vector; +import static com.google.common.base.Preconditions.checkArgument; + +import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import org.apache.arrow.flatbuf.Precision; -import org.apache.arrow.vector.complex.ComplexVectorLoader; -import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.NestedVector; import org.apache.arrow.vector.schema.ArrowFieldNode; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.Binary; -import org.apache.arrow.vector.types.pojo.ArrowType.Bool; -import org.apache.arrow.vector.types.pojo.ArrowType.Date; -import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; -import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.apache.arrow.vector.types.pojo.ArrowType.IntervalDay; -import org.apache.arrow.vector.types.pojo.ArrowType.IntervalYear; -import org.apache.arrow.vector.types.pojo.ArrowType.Null; -import org.apache.arrow.vector.types.pojo.ArrowType.Time; -import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; -import org.apache.arrow.vector.types.pojo.ArrowType.Tuple; -import org.apache.arrow.vector.types.pojo.ArrowType.Union; -import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; +import org.apache.arrow.vector.schema.ArrowRecordBatch; +import org.apache.arrow.vector.schema.VectorLayout; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; import io.netty.buffer.ArrowBuf; public class VectorLoader { - - public static void addChild(final NestedVector container, final Field field, final Iterator nodes, final Iterator buffers) { - MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); - ValueVector vector = container.add(field.getName(), minorType); - loadVector(vector, field, nodes, buffers); - List children = field.getChildren(); - for (Field child : children) { - addChild((NestedVector)vector, child, nodes, buffers); + private final List fieldVectors; + private final List fields; + + public VectorLoader(Schema schema, MapVector root) { + super(); + this.fields = schema.getFields(); + root.initializeChildren(fields); + this.fieldVectors = root.getFieldVectors(); + if (this.fieldVectors.size() != fields.size()) { + throw new IllegalArgumentException(); //TODO } } - public static void loadVector(final ValueVector vector, Field field, Iterator nodes, final Iterator buffers) { - final ArrowFieldNode node = nodes.next(); - field.getType().accept(new ArrowType.ArrowTypeVisitor() { - @Override - public Void visit(Null type) { - return null; - } - - @Override - public Void visit(Tuple type) { - MapVector mapVector = (MapVector)vector; - ComplexVectorLoader.load(mapVector, node, buffers); - return null; - } - - @Override - public Void visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { - ListVector listVector = (ListVector)vector; - ComplexVectorLoader.load(listVector, node, buffers); - return null; - } - - @Override - public Void visit(Union type) { - throw new UnsupportedOperationException("NYI"); - } - - @Override - public Void visit(Int type) { - switch (type.getBitWidth()) { - case 8: - if (type.getIsSigned()) { - NullableTinyIntVector intVector = (NullableTinyIntVector)vector; - intVector.bits.data = buffers.next(); - intVector.values.data = buffers.next(); - } else { - NullableUInt1Vector intVector = (NullableUInt1Vector)vector; - intVector.bits.data = buffers.next(); - intVector.values.data = buffers.next(); - } - break; - case 16: - if (type.getIsSigned()) { - NullableSmallIntVector intVector = (NullableSmallIntVector)vector; - intVector.bits.data = buffers.next(); - intVector.values.data = buffers.next(); - } else { - NullableUInt2Vector intVector = (NullableUInt2Vector)vector; - intVector.bits.data = buffers.next(); - intVector.values.data = buffers.next(); - } - break; - case 32: - if (type.getIsSigned()) { - NullableIntVector intVector = (NullableIntVector)vector; - intVector.bits.data = buffers.next(); - intVector.values.data = buffers.next(); - } else { - NullableUInt4Vector intVector = (NullableUInt4Vector)vector; - intVector.bits.data = buffers.next(); - intVector.values.data = buffers.next(); - } - break; - case 64: - if (type.getIsSigned()) { - NullableBigIntVector intVector = (NullableBigIntVector)vector; - intVector.bits.data = buffers.next(); - intVector.values.data = buffers.next(); - } else { - NullableUInt8Vector intVector = (NullableUInt8Vector)vector; - intVector.bits.data = buffers.next(); - intVector.values.data = buffers.next(); - } - break; - default: - throw new IllegalArgumentException("Illegal bit width: " + type.getBitWidth()); - } - // TODO: the vector has an unused data field? - return null; - } - - @Override - public Void visit(FloatingPoint type) { - switch (type.getPrecision()) { - case Precision.SINGLE: - NullableFloat4Vector fVector = (NullableFloat4Vector)vector; - fVector.bits.data = buffers.next(); - fVector.values.data = buffers.next(); - break; - case Precision.DOUBLE: - NullableFloat8Vector dVector = (NullableFloat8Vector)vector; - dVector.bits.data = buffers.next(); - dVector.values.data = buffers.next(); - break; - default: - throw new IllegalArgumentException("unknown precision: " + type.getPrecision()); - } - // TODO: the vector has an unused data field? - return null; - } - - @Override - public Void visit(Utf8 type) { - NullableVarCharVector stringVector = (NullableVarCharVector)vector; - stringVector.bits.data = buffers.next(); - stringVector.values.offsetVector.data = buffers.next(); - stringVector.values.data = buffers.next(); - // TODO: the vector has an unused data field? - return null; - } - - @Override - public Void visit(Binary type) { - NullableVarBinaryVector bVector = (NullableVarBinaryVector)vector; - bVector.bits.data = buffers.next(); - bVector.values.offsetVector.data = buffers.next(); - bVector.values.data = buffers.next(); - // TODO: the vector has an unused data field? - return null; - } - - @Override - public Void visit(Bool type) { - NullableBitVector bVector = (NullableBitVector)vector; - bVector.bits.data = buffers.next(); - bVector.values.data = buffers.next(); - // TODO: the vector has an unused data field? - return null; - } - - @Override - public Void visit(Decimal type) { - throw new UnsupportedOperationException("NYI"); - } - - @Override - public Void visit(Date type) { - throw new UnsupportedOperationException("NYI"); - } - - @Override - public Void visit(Time type) { - throw new UnsupportedOperationException("NYI"); - } - - @Override - public Void visit(Timestamp type) { - throw new UnsupportedOperationException("NYI"); - } - - @Override - public Void visit(IntervalDay type) { - throw new UnsupportedOperationException("NYI"); - } - - @Override - public Void visit(IntervalYear type) { - throw new UnsupportedOperationException("NYI"); - } - }); - + public void load(ArrowRecordBatch recordBatch) { + Iterator buffers = recordBatch.getBuffers().iterator(); + Iterator nodes = recordBatch.getNodes().iterator(); + for (int i = 0; i < fields.size(); ++i) { + Field field = fields.get(i); + FieldVector fieldVector = fieldVectors.get(i); + loadBuffers(fieldVector, field, buffers, nodes); + } } - public static void load(BaseDataValueVector vector, ArrowBuf buffer) { - vector.data = buffer; + private void loadBuffers(FieldVector vector, Field field, Iterator buffers, Iterator nodes) { + ArrowFieldNode fieldNode = nodes.next(); + List typeLayout = field.getTypeLayout().getVectors(); + List ownBuffers = new ArrayList<>(typeLayout.size()); + for (int j = 0; j < typeLayout.size(); j++) { + ownBuffers.add(buffers.next()); + } + vector.loadFieldBuffers(fieldNode, ownBuffers); + List children = field.getChildren(); + if (children.size() > 0) { + List childrenFromFields = vector.getChildrenFromFields(); + int i = 0; + checkArgument(children.size() == childrenFromFields.size(), "should have as many children as in the schema: found " + childrenFromFields.size() + " expected " + children.size()); + for (Field child : children) { + FieldVector fieldVector = childrenFromFields.get(i); + loadBuffers(fieldVector, child, buffers, nodes); + ++i; + } + } } - } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java new file mode 100644 index 00000000000..e5bcf35bc68 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -0,0 +1,55 @@ +package org.apache.arrow.vector; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.arrow.vector.ValueVector.Accessor; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.schema.ArrowFieldNode; +import org.apache.arrow.vector.schema.ArrowRecordBatch; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; + +import io.netty.buffer.ArrowBuf; + +public class VectorUnloader { + + private final MapVector parent; + + public VectorUnloader(MapVector parent) { + super(); + this.parent = parent; + } + + public Schema getSchema() { + Field rootField = parent.getField(); + return new Schema(rootField.getChildren()); + } + + public ArrowRecordBatch getRecordBatch() { + List nodes = new ArrayList<>(); + List buffers = new ArrayList<>(); + for (FieldVector vector : parent.getFieldVectors()) { + appendNodes(vector, nodes, buffers); + } + return new ArrowRecordBatch(parent.getAccessor().getValueCount(), nodes, buffers); + } + + private void appendNodes(FieldVector vector, List nodes, List buffers) { + Accessor accessor = vector.getAccessor(); + int nullCount = 0; + // TODO: should not have to do that + // we can do that a lot more efficiently (for example with Long.bitCount(i)) + for (int i = 0; i < accessor.getValueCount(); i++) { + if (accessor.isNull(i)) { + nullCount ++; + } + } + nodes.add(new ArrowFieldNode(accessor.getValueCount(), nullCount)); + // TODO: validate buffer count + buffers.addAll(vector.getFieldBuffers()); + for (FieldVector child : vector.getChildrenFromFields()) { + appendNodes(child, nodes, buffers); + } + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java deleted file mode 100644 index 28a08abb3bd..00000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ComplexVectorLoader.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import java.util.Iterator; - -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.schema.ArrowFieldNode; - -import io.netty.buffer.ArrowBuf; - -public class ComplexVectorLoader { - - public static void load(ListVector listVector, ArrowFieldNode node, Iterator buffers) { - // listVector.valueCount = node.getLength(); ? - VectorLoader.load(listVector.offsets, buffers.next()); - VectorLoader.load(listVector.bits, buffers.next()); - } - - public static void load(MapVector mapVector, ArrowFieldNode node, Iterator buffers) { - mapVector.valueCount = node.getLength(); - // no vector of it's own? - } - -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java index 7ddf59436c9..530d333efbc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java @@ -17,21 +17,17 @@ */ package org.apache.arrow.vector.file; -import static com.google.common.base.Preconditions.checkArgument; - import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector.Accessor; +import org.apache.arrow.vector.VectorLoader; +import org.apache.arrow.vector.VectorUnloader; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; import org.apache.arrow.vector.complex.impl.SingleMapReaderImpl; @@ -40,16 +36,11 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BigIntWriter; import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.schema.ArrowRecordBatch; -import org.apache.arrow.vector.schema.VectorLayout; -import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Assert; import org.junit.Test; -import io.netty.buffer.ArrowBuf; - public class TestArrowFile { static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); @@ -76,14 +67,13 @@ public void test() throws IOException { parent.close(); } - System.out.println(file.length()); { try ( FileInputStream fileInputStream = new FileInputStream(file); ArrowReader arrowReader = new ArrowReader(fileInputStream.getChannel(), allocator) ) { ArrowFooter footer = arrowReader.readFooter(); - org.apache.arrow.vector.types.pojo.Schema schema = footer.getSchema(); + Schema schema = footer.getSchema(); System.out.println("reading schema: " + schema); // initialize vectors @@ -92,44 +82,13 @@ public void test() throws IOException { MapWriter rootWriter = writer.rootAsMap(); MapVector root = (MapVector)parent.getChild("root"); - List fields = schema.getFields(); - root.initializeChildren(fields); - List fieldVectors = root.getFieldVectors(); - if (fieldVectors.size() != fields.size()) { - throw new IllegalArgumentException(); //TODO - } + VectorLoader vectorLoader = new VectorLoader(schema, root); -// validateLayout(fields, parent); List recordBatches = footer.getRecordBatches(); for (ArrowBlock rbBlock : recordBatches) { ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock); - Iterator buffers = recordBatch.getBuffers().iterator(); - Iterator nodes = recordBatch.getNodes().iterator(); - System.out.println(recordBatch.getNodes().size() + " nodes"); - System.out.println(recordBatch.getBuffers().size() + " buffers"); - for (int i = 0; i < fields.size(); ++i) { - Field field = fields.get(i); - FieldVector fieldVector = fieldVectors.get(i); - loadBuffers(fieldVector, field, buffers, nodes); - } - -// public void load(List fields, int length, Iterator nodes, Iterator buffers) { -// -// for (Field field : fields) { -// MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); -// ValueVector vector = this.add(field.getName(), minorType); -// -// -// vector.loadBuffers(typeLayout, ownBuffers); -// List children = field.getChildren(); -// for (Field child : children) { -// addChild((NestedVector)vector, child, nodes, buffers); -// vector.loadChild() -// } -// } -// } -// parent.load(fields, recordBatch.getLength(), nodes, buffers); + vectorLoader.load(recordBatch); MapReader rootReader = new SingleMapReaderImpl(parent).reader("root"); for (int i = 0; i < count; i++) { @@ -138,34 +97,13 @@ public void test() throws IOException { Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); } - parent.close(); } + parent.close(); } } } - private void loadBuffers(FieldVector vector, Field field, Iterator buffers, Iterator nodes) { - ArrowFieldNode fieldNode = nodes.next(); - List typeLayout = field.getTypeLayout().getVectors(); - List ownBuffers = new ArrayList<>(typeLayout.size()); - for (int j = 0; j < typeLayout.size(); j++) { - ownBuffers.add(buffers.next()); - } - vector.loadFieldBuffers(fieldNode, ownBuffers); - List children = field.getChildren(); - if (children.size() > 0) { - List childrenFromFields = vector.getChildrenFromFields(); - int i = 0; - checkArgument(children.size() == childrenFromFields.size(), "should have as many children as in the schema: found " + childrenFromFields.size() + " expected " + children.size()); - for (Field child : children) { - FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes); - ++i; - } - } - } - // private void validateLayout(List fields, Iterable childVectors) { // int i = 0; // for (ValueVector valueVector : childVectors) { @@ -184,40 +122,16 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf // } private void write(MapVector parent, File file) throws FileNotFoundException, IOException { - Field rootField = parent.getField(); - Schema schema = new Schema(rootField.getChildren()); + VectorUnloader vectorUnloader = new VectorUnloader(parent); + Schema schema = vectorUnloader.getSchema(); System.out.println("writing schema: " + schema); try ( FileOutputStream fileOutputStream = new FileOutputStream(file); ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema) ) { - List nodes = new ArrayList<>(); - List buffers = new ArrayList<>(); - for (FieldVector vector : parent.getFieldVectors()) { - appendNodes(vector, nodes, buffers); - } - arrowWriter.writeRecordBatch(new ArrowRecordBatch(parent.getAccessor().getValueCount(), nodes, buffers)); - } - } - - private void appendNodes(FieldVector vector, List nodes, List buffers) { - Accessor accessor = vector.getAccessor(); - int nullCount = 0; - // TODO: should not have to do that - // we can do that a lot more efficiently (for example with Long.bitCount(i)) - for (int i = 0; i < accessor.getValueCount(); i++) { - if (accessor.isNull(i)) { - nullCount ++; - } - } - nodes.add(new ArrowFieldNode(accessor.getValueCount(), nullCount)); - // TODO: validate buffer count - buffers.addAll(vector.getFieldBuffers()); - for (FieldVector child : vector.getChildrenFromFields()) { - appendNodes(child, nodes, buffers); + arrowWriter.writeRecordBatch(vectorUnloader.getRecordBatch()); } } - } From 4247b1a9217f575b17b35bd29fd7d856866a8373 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Tue, 23 Aug 2016 13:17:27 -0700 Subject: [PATCH 12/21] fix whitespace --- java/vector/src/main/codegen/templates/ArrowType.java | 2 +- .../src/main/codegen/templates/NullableValueVectors.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java index cbd64cb2484..6aa5870ab02 100644 --- a/java/vector/src/main/codegen/templates/ArrowType.java +++ b/java/vector/src/main/codegen/templates/ArrowType.java @@ -87,7 +87,7 @@ public int getType(FlatBufferBuilder builder) { return ${field.name}; } - + public String toString() { return "${name}{" <#list fields as field> diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index e713a18e2fa..fecd866ee12 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -42,7 +42,7 @@ * NB: this class is automatically generated from ${.template_name} and ValueVectorTypes.tdd using FreeMarker. */ @SuppressWarnings("unused") -public final class ${className} extends BaseDataValueVector implements <#if type.major == "VarLen">VariableWidth<#else>FixedWidthVector, NullableVector, FieldVector{ +public final class ${className} extends BaseDataValueVector implements <#if type.major == "VarLen">VariableWidth<#else>FixedWidthVector, NullableVector, FieldVector { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(${className}.class); private final FieldReader reader = new ${minor.class}ReaderImpl(Nullable${minor.class}Vector.this); @@ -141,7 +141,7 @@ public List getFieldBuffers() { values.getBuffer().readerIndex(0); return Arrays.asList(bits.getBuffer(), values.getBuffer()); } - + @Override public Field getField() { return field; From b0bf6bcb2adc24a1da790c87635d34ae28e37230 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Tue, 23 Aug 2016 13:32:02 -0700 Subject: [PATCH 13/21] cleanup --- format/File.fbs | 6 ++--- format/Message.fbs | 8 +++--- .../src/main/codegen/templates/ArrowType.java | 2 +- .../org/apache/arrow/vector/ValueVector.java | 9 +++---- .../vector/complex/AbstractMapVector.java | 5 ++-- .../arrow/vector/complex/NestedVector.java | 25 ------------------- .../arrow/vector/file/TestArrowFooter.java | 4 --- 7 files changed, 13 insertions(+), 46 deletions(-) delete mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/NestedVector.java diff --git a/format/File.fbs b/format/File.fbs index 42a9f99c720..f7ad1e1594a 100644 --- a/format/File.fbs +++ b/format/File.fbs @@ -7,9 +7,9 @@ namespace org.apache.arrow.flatbuf; /// table Footer { - + schema: org.apache.arrow.flatbuf.Schema; - + dictionaries: [ Block ]; recordBatches: [ Block ]; @@ -20,7 +20,7 @@ struct Block { offset: long; metaDataLength: int; - + bodyLength: long; } diff --git a/format/Message.fbs b/format/Message.fbs index da47d0d267c..b02f3fa3869 100644 --- a/format/Message.fbs +++ b/format/Message.fbs @@ -91,10 +91,6 @@ union Type { JSONScalar } -/// ---------------------------------------------------------------------- -/// Data structures for describing a table row batch (a collection of -/// equal-length Arrow arrays) - enum VectorType: short { /// used in List type Dense Union and variable length primitive types (String, Binary) OFFSET, @@ -148,6 +144,10 @@ table Schema { fields: [Field]; } +/// ---------------------------------------------------------------------- +/// Data structures for describing a table row batch (a collection of +/// equal-length Arrow arrays) + /// A Buffer represents a single contiguous memory segment struct Buffer { /// The shared memory page id where this buffer is located. Currently this is diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java index 6aa5870ab02..ddcfb46ba17 100644 --- a/java/vector/src/main/codegen/templates/ArrowType.java +++ b/java/vector/src/main/codegen/templates/ArrowType.java @@ -41,7 +41,7 @@ public abstract class ArrowType { public static interface ArrowTypeVisitor { <#list arrowTypes.types as type> - T visit(${type.name} type); + T visit(${type.name} type); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index 4a1eeb6f774..35321c947db 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -19,14 +19,14 @@ import java.io.Closeable; +import io.netty.buffer.ArrowBuf; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.TransferPair; - -import io.netty.buffer.ArrowBuf; +import org.apache.arrow.vector.types.pojo.Field; /** * An abstraction that is used to store a sequence of values in an individual column. @@ -137,8 +137,6 @@ public interface ValueVector extends Closeable, Iterable { */ // SerializedField getMetadata(); -// TypeLayout getTypeLayout(); - /** * Returns the number of bytes that is used by this vector instance. */ @@ -223,5 +221,4 @@ interface Mutator { @Deprecated void generateTestData(int values); } - } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java index 71bd7cac9b2..6b7578b6318 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java @@ -36,7 +36,7 @@ /* * Base class for MapVectors. Currently used by RepeatedMapVector and MapVector */ -public abstract class AbstractMapVector extends AbstractContainerVector implements NestedVector { +public abstract class AbstractMapVector extends AbstractContainerVector { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class); // Maintains a map with key as field name and value is the vector itself @@ -160,8 +160,7 @@ public T getChild(String name, Class clazz) { return typeify(v, clazz); } - @Override - public ValueVector add(String name, MinorType minorType, int... precisionScale) { + protected ValueVector add(String name, MinorType minorType, int... precisionScale) { final ValueVector existing = getChild(name); if (existing != null) { throw new IllegalStateException(String.format("Vector already exists: Existing[%s], Requested[%s] ", existing.getClass().getSimpleName(), minorType)); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NestedVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NestedVector.java deleted file mode 100644 index 53ad2112e09..00000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NestedVector.java +++ /dev/null @@ -1,25 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.types.Types.MinorType; - -public interface NestedVector { - ValueVector add(String name, MinorType minorType, int... precisionScale); -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFooter.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFooter.java index addabf78a3b..707dba2af98 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFooter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFooter.java @@ -24,8 +24,6 @@ import java.util.Collections; import org.apache.arrow.flatbuf.Footer; -import org.apache.arrow.vector.file.ArrowBlock; -import org.apache.arrow.vector.file.ArrowFooter; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; @@ -51,9 +49,7 @@ private ArrowFooter roundTrip(ArrowFooter footer) { int i = footer.writeTo(builder); builder.finish(i); ByteBuffer dataBuffer = builder.dataBuffer(); - System.out.println(dataBuffer); ArrowFooter newFooter = new ArrowFooter(Footer.getRootAsFooter(dataBuffer)); - System.out.println(dataBuffer); return newFooter; } From 50fe680a368132950e3309588aa3ac5cfcede47d Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Tue, 23 Aug 2016 19:37:01 -0700 Subject: [PATCH 14/21] nested support --- .../src/main/codegen/templates/ArrowType.java | 7 +- .../templates/NullableValueVectors.java | 28 ++- .../arrow/vector/BaseDataValueVector.java | 4 + .../org/apache/arrow/vector/FieldVector.java | 1 + .../org/apache/arrow/vector/VectorLoader.java | 13 +- .../apache/arrow/vector/VectorUnloader.java | 28 ++- .../arrow/vector/complex/ListVector.java | 44 +++- .../arrow/vector/complex/MapVector.java | 35 ++- .../complex/impl/ComplexWriterImpl.java | 2 +- .../apache/arrow/vector/file/ArrowReader.java | 9 +- .../arrow/vector/schema/ArrowRecordBatch.java | 8 +- .../arrow/vector/schema/TypeLayout.java | 8 +- .../arrow/vector/file/TestArrowFile.java | 231 +++++++++++++----- .../apache/arrow/vector/pojo/TestConvert.java | 24 ++ 14 files changed, 344 insertions(+), 98 deletions(-) diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java index ddcfb46ba17..29dee20040a 100644 --- a/java/vector/src/main/codegen/templates/ArrowType.java +++ b/java/vector/src/main/codegen/templates/ArrowType.java @@ -75,9 +75,14 @@ public byte getTypeType() { @Override public int getType(FlatBufferBuilder builder) { + <#list type.fields as field> + <#if field.type == "String"> + int ${field.name} = builder.createString(this.${field.name}); + + org.apache.arrow.flatbuf.${type.name}.start${type.name}(builder); <#list type.fields as field> - org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, <#if field.type == "String">builder.createString(${field.name})<#else>${field.name}); + org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, ${field.name}); return org.apache.arrow.flatbuf.${type.name}.end${type.name}(builder); } diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index fecd866ee12..b75aed98928 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -126,20 +126,32 @@ public List getChildrenFromFields() { } public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 2) { - throw new IllegalArgumentException("Illegal buffer count, expected 2, got: " + ownBuffers.size()); - } - bits.data = ownBuffers.get(0); - bits.data.retain(allocator); - values.data = ownBuffers.get(1); - values.data.retain(allocator); + int expectedSize = <#if type.major = "VarLen">3<#else>2; + if (ownBuffers.size() != expectedSize) { + throw new IllegalArgumentException("Illegal buffer count, expected " + expectedSize + ", got: " + ownBuffers.size()); + } + bits.load(ownBuffers.get(0)); + <#if type.major = "VarLen"> + values.offsetVector.load(ownBuffers.get(1)); + values.load(ownBuffers.get(2)); + <#else> + values.load(ownBuffers.get(1)); + // TODO: do something with the sizes in fieldNode? } public List getFieldBuffers() { bits.getBuffer().readerIndex(0); + <#if type.major = "VarLen"> + values.offsetVector.getBuffer().readerIndex(0); + values.getBuffer().readerIndex(0); - return Arrays.asList(bits.getBuffer(), values.getBuffer()); + return Arrays.asList( + bits.getBuffer(), + <#if type.major = "VarLen"> + values.offsetVector.getBuffer(), + + values.getBuffer()); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java index 983f56a6b9e..782fd75c04a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java @@ -84,6 +84,10 @@ public ArrowBuf getBuffer() { return data; } + public void load(ArrowBuf data) { + this.data = data.retain(allocator); + } + /** * This method has a similar effect of allocateNew() without actually clearing and reallocating * the value vector. The purpose is to move the value vector to a "mutate" state diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java index f6b4217c1a4..a975fc74efc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java @@ -25,4 +25,5 @@ public interface FieldVector extends ValueVector { * @return the */ List getFieldBuffers(); + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 2e9c8ddb7b2..ed643c21169 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -6,7 +6,6 @@ import java.util.Iterator; import java.util.List; -import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.schema.ArrowRecordBatch; import org.apache.arrow.vector.schema.VectorLayout; @@ -19,11 +18,11 @@ public class VectorLoader { private final List fieldVectors; private final List fields; - public VectorLoader(Schema schema, MapVector root) { + public VectorLoader(Schema schema, FieldVector root) { super(); this.fields = schema.getFields(); - root.initializeChildren(fields); - this.fieldVectors = root.getFieldVectors(); + root.initializeChildrenFromFields(fields); + this.fieldVectors = root.getChildrenFromFields(); if (this.fieldVectors.size() != fields.size()) { throw new IllegalArgumentException(); //TODO } @@ -46,7 +45,11 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf for (int j = 0; j < typeLayout.size(); j++) { ownBuffers.add(buffers.next()); } - vector.loadFieldBuffers(fieldNode, ownBuffers); + try { + vector.loadFieldBuffers(fieldNode, ownBuffers); + } catch (RuntimeException e) { + throw new IllegalArgumentException("Could not load buffers for field " + field); + } List children = field.getChildren(); if (children.size() > 0) { List childrenFromFields = vector.getChildrenFromFields(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java index e5bcf35bc68..bfdc8f311db 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -4,35 +4,37 @@ import java.util.List; import org.apache.arrow.vector.ValueVector.Accessor; -import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.schema.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.schema.ArrowVectorType; import org.apache.arrow.vector.types.pojo.Schema; import io.netty.buffer.ArrowBuf; public class VectorUnloader { - private final MapVector parent; + private final Schema schema; + private final int valueCount; + private final List vectors; - public VectorUnloader(MapVector parent) { + public VectorUnloader(FieldVector parent) { super(); - this.parent = parent; + this.schema = new Schema(parent.getField().getChildren()); + this.valueCount = parent.getAccessor().getValueCount(); + this.vectors = parent.getChildrenFromFields(); } public Schema getSchema() { - Field rootField = parent.getField(); - return new Schema(rootField.getChildren()); + return schema; } public ArrowRecordBatch getRecordBatch() { List nodes = new ArrayList<>(); List buffers = new ArrayList<>(); - for (FieldVector vector : parent.getFieldVectors()) { + for (FieldVector vector : vectors) { appendNodes(vector, nodes, buffers); } - return new ArrowRecordBatch(parent.getAccessor().getValueCount(), nodes, buffers); + return new ArrowRecordBatch(valueCount, nodes, buffers); } private void appendNodes(FieldVector vector, List nodes, List buffers) { @@ -46,8 +48,12 @@ private void appendNodes(FieldVector vector, List nodes, List fieldBuffers = vector.getFieldBuffers(); + List expectedBuffers = vector.getField().getTypeLayout().getVectorTypes(); + if (fieldBuffers.size() != expectedBuffers.size()) { + throw new IllegalArgumentException("wrong number of buffers for field " + vector.getField() + ". found: " + fieldBuffers); + } + buffers.addAll(fieldBuffers); for (FieldVector child : vector.getChildrenFromFields()) { appendNodes(child, nodes, buffers); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index b6dbc515091..904a7bc3416 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -18,11 +18,15 @@ ******************************************************************************/ package org.apache.arrow.vector.complex; +import static java.util.Arrays.asList; + +import java.util.Arrays; import java.util.List; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; @@ -32,6 +36,8 @@ import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.schema.ArrowFieldNode; +import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.CallBack; @@ -43,7 +49,7 @@ import io.netty.buffer.ArrowBuf; -public class ListVector extends BaseRepeatedValueVector { +public class ListVector extends BaseRepeatedValueVector implements FieldVector { UInt4Vector offsets;// TODO: THis masks the same vector in the parent final UInt1Vector bits; @@ -62,6 +68,41 @@ public ListVector(String name, BufferAllocator allocator, CallBack callBack) { this.callBack = callBack; } + @Override + public void initializeChildrenFromFields(List children) { + if (children.size() != 1) { + throw new IllegalArgumentException("Lists have only one child. Found: " + children); + } + Field field = children.get(0); + MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); + AddOrGetResult addOrGetVector = addOrGetVector(minorType); + if (!addOrGetVector.isCreated()) { + throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector()); + } + } + + @Override + public List getChildrenFromFields() { + // TODO: data vector should be that type + return Arrays.asList((FieldVector)getDataVector()); + } + + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + if (ownBuffers.size() != 2) { + throw new IllegalArgumentException("Lists have a validity and offset vector. Found: " + ownBuffers); + } + this.bits.load(ownBuffers.get(0)); + this.offsets.load(ownBuffers.get(0)); + } + + @Override + public List getFieldBuffers() { + bits.getBuffer().readerIndex(0); + offsets.getBuffer().readerIndex(0); + return asList(bits.getBuffer(), offsets.getBuffer()); + } + public UnionListWriter getWriter() { return writer; } @@ -297,4 +338,5 @@ public void setValueCount(int valueCount) { bits.getMutator().setValueCount(valueCount); } } + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index a9b6e306476..c98fcd409e4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -17,6 +17,8 @@ */ package org.apache.arrow.vector.complex; +import static java.util.Arrays.asList; + import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; @@ -32,6 +34,7 @@ import org.apache.arrow.vector.complex.impl.SingleMapReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.holders.ComplexHolder; +import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.Tuple; @@ -46,7 +49,7 @@ import io.netty.buffer.ArrowBuf; -public class MapVector extends AbstractMapVector { +public class MapVector extends AbstractMapVector implements FieldVector { //private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MapVector.class); private final SingleMapReaderImpl reader = new SingleMapReaderImpl(MapVector.this); @@ -326,24 +329,34 @@ public void close() { super.close(); } - private List fieldChildren; - - public void initializeChildren(List children) { - if (fieldChildren != null) { - throw new IllegalArgumentException(children.toString()); //TODO - } - fieldChildren = new ArrayList<>(); + @Override + public void initializeChildrenFromFields(List children) { for (Field field : children) { MinorType minorType = Types.getMinorTypeForArrowType(field.getType()); FieldVector vector = (FieldVector)this.add(field.getName(), minorType); - fieldChildren.add(vector); vector.initializeChildrenFromFields(field.getChildren()); } } - public List getFieldVectors() { - // TODO: clean this up + @Override + public List getChildrenFromFields() { + // TODO: children should be the right type return (List)(List)getChildren(); } + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + if (ownBuffers.size() != 1) { + throw new IllegalArgumentException("Tuples have a validity. Found: " + ownBuffers); + } +// this.bits.load(ownBuffers.get(0)); + // TODO: add validity vector to make maps nullable + } + + @Override + public List getFieldBuffers() { + // TODO: add validity vector to make maps nullable + return asList(allocator.getEmpty()); + } + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java index 4d2adfb3256..89bfefc8f19 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java @@ -22,9 +22,9 @@ import org.apache.arrow.vector.complex.StateTool; import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; import com.google.common.base.Preconditions; -import org.apache.arrow.vector.types.pojo.Field; public class ComplexWriterImpl extends AbstractFieldWriter implements ComplexWriter { // private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ComplexWriterImpl.class); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java index 74f89e65417..7ac08475886 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java @@ -116,7 +116,8 @@ public ArrowRecordBatch readRecordBatch(ArrowBlock recordBatchBlock) throws IOEx if (l < 0) { throw new InvalidArrowFileException("block invalid: " + recordBatchBlock); } - ArrowBuf buffer = allocator.buffer(l); + final ArrowBuf buffer = allocator.buffer(l); + LOGGER.debug("allocated buffer " + buffer); in.position(recordBatchBlock.getOffset()); int n = readFully(buffer, l); if (n != l) { @@ -124,7 +125,8 @@ public ArrowRecordBatch readRecordBatch(ArrowBlock recordBatchBlock) throws IOEx } RecordBatch recordBatchFB = RecordBatch.getRootAsRecordBatch(buffer.nioBuffer().asReadOnlyBuffer()); int nodesLength = recordBatchFB.nodesLength(); - ArrowBuf body = buffer.slice(recordBatchBlock.getMetadataLength(), (int)recordBatchBlock.getBodyLength()); + final ArrowBuf body = buffer.slice(recordBatchBlock.getMetadataLength(), (int)recordBatchBlock.getBodyLength()); + LOGGER.debug("sliced body " + body); List nodes = new ArrayList<>(); for (int i = 0; i < nodesLength; ++i) { FieldNode node = recordBatchFB.nodes(i); @@ -135,8 +137,11 @@ public ArrowRecordBatch readRecordBatch(ArrowBlock recordBatchBlock) throws IOEx Buffer bufferFB = recordBatchFB.buffers(i); LOGGER.debug(String.format("Buffer in RecordBatch at %d, length: %d", bufferFB.offset(), bufferFB.length())); ArrowBuf vectorBuffer = body.slice((int)bufferFB.offset(), (int)bufferFB.length()); + LOGGER.debug("sliced vectorBuffer " + vectorBuffer); + vectorBuffer.retain(); buffers.add(vectorBuffer); } + buffer.release(); return new ArrowRecordBatch(recordBatchFB.length(), nodes, buffers); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java index dc91f53e560..b2a35711224 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java @@ -30,7 +30,7 @@ import io.netty.buffer.ArrowBuf; -public class ArrowRecordBatch implements FBSerializable { +public class ArrowRecordBatch implements FBSerializable, AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(ArrowRecordBatch.class); /** number of records */ @@ -81,4 +81,10 @@ public int writeTo(FlatBufferBuilder builder) { return RecordBatch.endRecordBatch(builder); } + public void close() { + for (ArrowBuf arrowBuf : buffers) { + arrowBuf.release(); + } + } + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java index be2ce0a0630..29f9a19d660 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java @@ -82,7 +82,8 @@ public static TypeLayout getTypeLayout(final ArrowType arrowType) { @Override public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { List vectors = asList( - validityVector() + validityVector(), + offsetVector() ); return new TypeLayout(vectors); } @@ -166,6 +167,11 @@ public TypeLayout(List vectors) { this.vectors = vectors; } + public TypeLayout(VectorLayout... vectors) { + this(asList(vectors)); + } + + public List getVectors() { return vectors; } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java index 530d333efbc..f86fe98e315 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java @@ -33,93 +33,212 @@ import org.apache.arrow.vector.complex.impl.SingleMapReaderImpl; import org.apache.arrow.vector.complex.reader.BaseReader.MapReader; import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; +import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BigIntWriter; import org.apache.arrow.vector.complex.writer.IntWriter; +import org.apache.arrow.vector.complex.writer.TimeStampWriter; import org.apache.arrow.vector.schema.ArrowRecordBatch; import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.After; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; +import io.netty.buffer.ArrowBuf; + public class TestArrowFile { - static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + private BufferAllocator allocator; + + @Before + public void init() { + allocator = new RootAllocator(Integer.MAX_VALUE); + } + + @After + public void tearDown() { + allocator.close(); + } @Test - public void test() throws IOException { + public void testWrite() throws IOException { + File file = new File("target/mytest_write.arrow"); + int count = 10000; + try ( + BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + MapVector parent = new MapVector("parent", vectorAllocator, null)) { + writeData(count, parent); + write((MapVector)parent.getChild("root"), file); + } + } + + @Test + public void testWriteComplex() throws IOException { + File file = new File("target/mytest_write_complex.arrow"); + int count = 10000; + try ( + BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + MapVector parent = new MapVector("parent", vectorAllocator, null)) { + writeComplexData(count, parent); + write((MapVector)parent.getChild("root"), file); + } + } + + private void writeComplexData(int count, MapVector parent) { + ArrowBuf varchar = allocator.buffer(3); + varchar.readerIndex(0); + varchar.setByte(0, 'a'); + varchar.setByte(1, 'b'); + varchar.setByte(2, 'c'); + varchar.writerIndex(3); + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter rootWriter = writer.rootAsMap(); + IntWriter intWriter = rootWriter.integer("int"); + BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); + ListWriter listWriter = rootWriter.list("list"); + MapWriter mapWriter = rootWriter.map("map"); + TimeStampWriter timeStampNested = mapWriter.timeStamp("timestamp"); + for (int i = 0; i < count; i++) { + intWriter.setPosition(i); + intWriter.writeInt(i); + bigIntWriter.setPosition(i); + bigIntWriter.writeBigInt(i); + listWriter.setPosition(i); + listWriter.startList(); + for (int j = 0; j < i % 3; j++) { + listWriter.varChar().writeVarChar(0, 3, varchar); + } + listWriter.endList(); + mapWriter.setPosition(i); + mapWriter.start(); + mapWriter.timeStamp("timestamp").writeTimeStamp(123456789L); + mapWriter.end(); + } + writer.setValueCount(count); + varchar.release(); + } + + + private void writeData(int count, MapVector parent) { + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter rootWriter = writer.rootAsMap(); + IntWriter intWriter = rootWriter.integer("int"); + BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); + for (int i = 0; i < count; i++) { + intWriter.setPosition(i); + intWriter.writeInt(i); + bigIntWriter.setPosition(i); + bigIntWriter.writeBigInt(i); + } + writer.setValueCount(count); + } + + + @Test + public void testWriteRead() throws IOException { File file = new File("target/mytest.arrow"); int count = 10000; - { - MapVector parent = new MapVector("parent", allocator, null); + // write + try ( + BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + MapVector parent = new MapVector("parent", originalVectorAllocator, null)) { + writeData(count, parent); + write((MapVector)parent.getChild("root"), file); + } + + // read + try ( + BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); + FileInputStream fileInputStream = new FileInputStream(file); + ArrowReader arrowReader = new ArrowReader(fileInputStream.getChannel(), readerAllocator); + BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); + MapVector parent = new MapVector("parent", vectorAllocator, null) + ) { + ArrowFooter footer = arrowReader.readFooter(); + Schema schema = footer.getSchema(); + System.out.println("reading schema: " + schema); + + // initialize vectors + ComplexWriter writer = new ComplexWriterImpl("root", parent); MapWriter rootWriter = writer.rootAsMap(); - IntWriter intWriter = rootWriter.integer("int"); - BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); - for (int i = 0; i < count; i++) { - intWriter.setPosition(i); - intWriter.writeInt(i); - bigIntWriter.setPosition(i); - bigIntWriter.writeBigInt(i); + MapVector root = (MapVector)parent.getChild("root"); + + VectorLoader vectorLoader = new VectorLoader(schema, root); + + List recordBatches = footer.getRecordBatches(); + for (ArrowBlock rbBlock : recordBatches) { + try (ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock)) { + vectorLoader.load(recordBatch); + } + validateContent(count, parent); } - writer.setValueCount(count); + } + } - write((MapVector)parent.getChild("root"), file); - parent.close(); + private void validateContent(int count, MapVector parent) { + MapReader rootReader = new SingleMapReaderImpl(parent).reader("root"); + for (int i = 0; i < count; i++) { + rootReader.setPosition(i); + Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue()); + Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); } + } - { - try ( - FileInputStream fileInputStream = new FileInputStream(file); - ArrowReader arrowReader = new ArrowReader(fileInputStream.getChannel(), allocator) - ) { - ArrowFooter footer = arrowReader.readFooter(); - Schema schema = footer.getSchema(); - System.out.println("reading schema: " + schema); + @Test + public void testWriteReadComplex() throws IOException { + File file = new File("target/mytest.arrow"); + int count = 10000; - // initialize vectors - MapVector parent = new MapVector("parent", allocator, null); - ComplexWriter writer = new ComplexWriterImpl("root", parent); - MapWriter rootWriter = writer.rootAsMap(); - MapVector root = (MapVector)parent.getChild("root"); + // write + try ( + BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + MapVector parent = new MapVector("parent", originalVectorAllocator, null)) { + writeComplexData(count, parent); + write((MapVector)parent.getChild("root"), file); + } - VectorLoader vectorLoader = new VectorLoader(schema, root); + // read + try ( + BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); + FileInputStream fileInputStream = new FileInputStream(file); + ArrowReader arrowReader = new ArrowReader(fileInputStream.getChannel(), readerAllocator); + BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); + MapVector parent = new MapVector("parent", vectorAllocator, null) + ) { + ArrowFooter footer = arrowReader.readFooter(); + Schema schema = footer.getSchema(); + System.out.println("reading schema: " + schema); - List recordBatches = footer.getRecordBatches(); - for (ArrowBlock rbBlock : recordBatches) { - ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock); + // initialize vectors - vectorLoader.load(recordBatch); + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter rootWriter = writer.rootAsMap(); + MapVector root = (MapVector)parent.getChild("root"); - MapReader rootReader = new SingleMapReaderImpl(parent).reader("root"); - for (int i = 0; i < count; i++) { - rootReader.setPosition(i); - Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue()); - Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); - } + VectorLoader vectorLoader = new VectorLoader(schema, root); + List recordBatches = footer.getRecordBatches(); + for (ArrowBlock rbBlock : recordBatches) { + try (ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock)) { + vectorLoader.load(recordBatch); } - parent.close(); + validateComplexContent(count, parent); } - } } -// private void validateLayout(List fields, Iterable childVectors) { -// int i = 0; -// for (ValueVector valueVector : childVectors) { -// Field field = fields.get(i); -// TypeLayout typeLayout = field.getTypeLayout(); -// TypeLayout expectedTypeLayout = valueVector.getTypeLayout(); -// if (!expectedTypeLayout.equals(typeLayout)) { -// throw new InvalidArrowFileException("The type layout does not match the expected layout: expected " + expectedTypeLayout + " found " + typeLayout); -// } -// if (field.getChildren().size() > 0) { -// validateLayout(field.getChildren(), valueVector); -// } -// ++i; -// } -// Preconditions.checkArgument(i == fields.size(), "should have as many children as in the schema: found " + i + " expected " + fields.size()); -// } + private void validateComplexContent(int count, MapVector parent) { + MapReader rootReader = new SingleMapReaderImpl(parent).reader("root"); + for (int i = 0; i < count; i++) { + rootReader.setPosition(i); + Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue()); + Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); + Assert.assertEquals(i % 3, rootReader.reader("list").size()); + Assert.assertEquals(123456789L, rootReader.reader("map").reader("timestamp").readDateTime().getMillis()); + } + } private void write(MapVector parent, File file) throws FileNotFoundException, IOException { VectorUnloader vectorUnloader = new VectorUnloader(parent); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java index bbd5b6a8955..61327f1970e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java @@ -17,12 +17,17 @@ */ package org.apache.arrow.vector.pojo; +import static org.apache.arrow.flatbuf.Precision.DOUBLE; import static org.apache.arrow.flatbuf.Precision.SINGLE; import static org.junit.Assert.assertEquals; +import org.apache.arrow.flatbuf.UnionMode; import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; import org.apache.arrow.vector.types.pojo.ArrowType.Int; +import org.apache.arrow.vector.types.pojo.ArrowType.List; +import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Tuple; +import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; @@ -59,7 +64,26 @@ public void schema() { childrenBuilder.add(new Field("child2", true, new FloatingPoint(SINGLE), ImmutableList.of())); Schema initialSchema = new Schema(childrenBuilder.build()); run(initialSchema); + } + @Test + public void nestedSchema() { + ImmutableList.Builder childrenBuilder = ImmutableList.builder(); + childrenBuilder.add(new Field("child1", true, Utf8.INSTANCE, null)); + childrenBuilder.add(new Field("child2", true, new FloatingPoint(SINGLE), ImmutableList.of())); + childrenBuilder.add(new Field("child3", true, new Tuple(), ImmutableList.of( + new Field("child3.1", true, Utf8.INSTANCE, null), + new Field("child3.2", true, new FloatingPoint(DOUBLE), ImmutableList.of()) + ))); + childrenBuilder.add(new Field("child4", true, new List(), ImmutableList.of( + new Field("child4.1", true, Utf8.INSTANCE, null) + ))); + childrenBuilder.add(new Field("child5", true, new Union(UnionMode.Sparse), ImmutableList.of( + new Field("child5.1", true, new Timestamp("UTC"), null), + new Field("child5.2", true, new FloatingPoint(DOUBLE), ImmutableList.of()) + ))); + Schema initialSchema = new Schema(childrenBuilder.build()); + run(initialSchema); } private void run(Field initialField) { From 2fd3bc1cf9d55a23d04682841e1ee445b80df962 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Tue, 23 Aug 2016 19:42:09 -0700 Subject: [PATCH 15/21] cleanup --- .../src/main/java/org/apache/arrow/vector/VectorLoader.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index ed643c21169..f899de74116 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -53,12 +53,11 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf List children = field.getChildren(); if (children.size() > 0) { List childrenFromFields = vector.getChildrenFromFields(); - int i = 0; checkArgument(children.size() == childrenFromFields.size(), "should have as many children as in the schema: found " + childrenFromFields.size() + " expected " + children.size()); - for (Field child : children) { + for (int i = 0; i < childrenFromFields.size(); i++) { + Field child = children.get(i); FieldVector fieldVector = childrenFromFields.get(i); loadBuffers(fieldVector, child, buffers, nodes); - ++i; } } } From b8249389de1f16d47017980d9d5324d5b19efcbe Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Wed, 24 Aug 2016 11:47:04 -0700 Subject: [PATCH 16/21] fix types; add licenses; more tests; more complex --- .../main/codegen/templates/UnionVector.java | 30 ++++++- .../org/apache/arrow/vector/FieldVector.java | 17 ++++ .../org/apache/arrow/vector/ValueVector.java | 6 +- .../org/apache/arrow/vector/VectorLoader.java | 17 ++++ .../apache/arrow/vector/VectorUnloader.java | 17 ++++ .../org/apache/arrow/vector/ZeroVector.java | 34 +++++-- .../complex/AbstractContainerVector.java | 21 ++--- .../vector/complex/AbstractMapVector.java | 20 +++-- .../complex/BaseRepeatedValueVector.java | 21 ++--- .../arrow/vector/complex/ListVector.java | 7 +- .../arrow/vector/complex/MapVector.java | 11 ++- .../vector/complex/impl/PromotableWriter.java | 3 +- .../apache/arrow/vector/file/ArrowReader.java | 7 +- .../arrow/vector/schema/ArrowRecordBatch.java | 15 +++- .../arrow/vector/schema/ArrowVectorType.java | 17 ++++ .../arrow/vector/schema/TypeLayout.java | 17 ++++ .../arrow/vector/schema/VectorLayout.java | 17 ++++ .../org/apache/arrow/vector/types/Types.java | 52 +++++------ .../arrow/vector/TestVectorUnloadLoad.java | 89 +++++++++++++++++++ .../arrow/vector/file/TestArrowFile.java | 56 ++++++++---- 20 files changed, 369 insertions(+), 105 deletions(-) create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index 7e75abb0cea..4eac3b57459 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -42,9 +42,11 @@ import java.util.Iterator; import org.apache.arrow.vector.complex.impl.ComplexCopier; import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.schema.ArrowFieldNode; import static org.apache.arrow.flatbuf.UnionMode.Sparse; + /* * This class is generated using freemarker and the ${.template_name} template. */ @@ -59,7 +61,7 @@ * For performance reasons, UnionVector stores a cached reference to each subtype vector, to avoid having to do the map lookup * each time the vector is accessed. */ -public class UnionVector implements ValueVector { +public class UnionVector implements FieldVector { private String name; private BufferAllocator allocator; @@ -97,6 +99,28 @@ public MinorType getMinorType() { return MinorType.UNION; } + @Override + public void initializeChildrenFromFields(List children) { + getMap().initializeChildrenFromFields(children); + } + + @Override + public List getChildrenFromFields() { + return getMap().getChildrenFromFields(); + } + + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + // TODO + throw new UnsupportedOperationException(); + } + + @Override + public List getFieldBuffers() { + // TODO + throw new UnsupportedOperationException(); + } + public MapVector getMap() { if (mapVector == null) { int vectorCount = internalMap.size(); @@ -239,10 +263,10 @@ public void copyFromSafe(int inIndex, int outIndex, UnionVector from) { copyFrom(inIndex, outIndex, from); } - public ValueVector addVector(ValueVector v) { + public FieldVector addVector(FieldVector v) { String name = v.getMinorType().name().toLowerCase(); Preconditions.checkState(internalMap.getChild(name) == null, String.format("%s vector already exists", name)); - final ValueVector newVector = internalMap.addOrGet(name, v.getMinorType(), v.getClass()); + final FieldVector newVector = internalMap.addOrGet(name, v.getMinorType(), v.getClass()); v.makeTransferPair(newVector).transfer(); internalMap.putChild(name, newVector); if (callBack != null) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java index a975fc74efc..c656b8d9bbb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.arrow.vector; import java.util.List; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index 35321c947db..ba7790e47ef 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -19,14 +19,14 @@ import java.io.Closeable; -import io.netty.buffer.ArrowBuf; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.util.TransferPair; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.util.TransferPair; + +import io.netty.buffer.ArrowBuf; /** * An abstraction that is used to store a sequence of values in an individual column. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index f899de74116..672f35adc62 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.arrow.vector; import static com.google.common.base.Preconditions.checkArgument; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java index bfdc8f311db..e4d37bf47d1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.arrow.vector; import java.util.ArrayList; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java index 705a24b02fe..1c874b4f27a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java @@ -17,25 +17,23 @@ */ package org.apache.arrow.vector; -import com.google.flatbuffers.FlatBufferBuilder; -import io.netty.buffer.ArrowBuf; - import java.util.Collections; import java.util.Iterator; +import java.util.List; -import org.apache.arrow.flatbuf.Type; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.complex.impl.NullReader; import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.Null; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.TransferPair; -import com.google.common.collect.Iterators; +import io.netty.buffer.ArrowBuf; -public class ZeroVector implements ValueVector { +public class ZeroVector implements FieldVector { public final static ZeroVector INSTANCE = new ZeroVector(); private final String name = "[DEFAULT]"; @@ -175,4 +173,28 @@ public Mutator getMutator() { public FieldReader getReader() { return NullReader.INSTANCE; } + + @Override + public void initializeChildrenFromFields(List children) { + if (!children.isEmpty()) { + throw new IllegalArgumentException("Zero vector has no children"); + } + } + + @Override + public List getChildrenFromFields() { + return Collections.emptyList(); + } + + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + if (!ownBuffers.isEmpty()) { + throw new IllegalArgumentException("Zero vector has no buffers"); + } + } + + @Override + public List getFieldBuffers() { + return Collections.emptyList(); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java index ed7797576d6..2f68886a169 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java @@ -17,22 +17,13 @@ */ package org.apache.arrow.vector.complex; -import java.util.Collection; - -import javax.annotation.Nullable; - -import org.apache.arrow.flatbuf.Field; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.util.CallBack; -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; - /** * Base class for composite vectors. * @@ -65,8 +56,8 @@ public BufferAllocator getAllocator() { /** * Returns a {@link org.apache.arrow.vector.ValueVector} corresponding to the given field name if exists or null. */ - public ValueVector getChild(String name) { - return getChild(name, ValueVector.class); + public FieldVector getChild(String name) { + return getChild(name, FieldVector.class); } /** @@ -81,7 +72,7 @@ public void close() { protected T typeify(ValueVector v, Class clazz) { if (clazz.isAssignableFrom(v.getClass())) { - return (T) v; + return clazz.cast(v); } throw new IllegalStateException(String.format("Vector requested [%s] was different than type stored [%s]. Arrow doesn't yet support hetergenous types.", clazz.getSimpleName(), v.getClass().getSimpleName())); } @@ -94,10 +85,10 @@ protected boolean supportsDirectRead() { public abstract int size(); // add a new vector with the input MajorType or return the existing vector if we already added one with the same type - public abstract T addOrGet(String name, MinorType minorType, Class clazz, int... precisionScale); + public abstract T addOrGet(String name, MinorType minorType, Class clazz, int... precisionScale); // return the child vector with the input name - public abstract T getChild(String name, Class clazz); + public abstract T getChild(String name, Class clazz); // return the child vector's ordinal in the composite container public abstract VectorWithOrdinal getChildVectorWithOrdinal(String name); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java index 6b7578b6318..23b4997f4f5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractMapVector.java @@ -18,10 +18,12 @@ package org.apache.arrow.vector.complex; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.util.CallBack; @@ -40,7 +42,7 @@ public abstract class AbstractMapVector extends AbstractContainerVector { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class); // Maintains a map with key as field name and value is the vector itself - private final MapWithOrdinal vectors = new MapWithOrdinal<>(); + private final MapWithOrdinal vectors = new MapWithOrdinal<>(); protected AbstractMapVector(String name, BufferAllocator allocator, CallBack callBack) { super(name, allocator, callBack); @@ -108,7 +110,7 @@ public boolean allocateNewSafe() { * @return resultant {@link org.apache.arrow.vector.ValueVector} */ @Override - public T addOrGet(String name, MinorType minorType, Class clazz, int... precisionScale) { + public T addOrGet(String name, MinorType minorType, Class clazz, int... precisionScale) { final ValueVector existing = getChild(name); boolean create = false; if (existing == null) { @@ -152,7 +154,7 @@ public ValueVector getChildByOrdinal(int id) { * field name if exists or null. */ @Override - public T getChild(String name, Class clazz) { + public T getChild(String name, Class clazz) { final ValueVector v = vectors.get(name.toLowerCase()); if (v == null) { return null; @@ -165,7 +167,7 @@ protected ValueVector add(String name, MinorType minorType, int... precisionScal if (existing != null) { throw new IllegalStateException(String.format("Vector already exists: Existing[%s], Requested[%s] ", existing.getClass().getSimpleName(), minorType)); } - ValueVector vector = minorType.getNewVector(name, allocator, callBack, precisionScale); + FieldVector vector = minorType.getNewVector(name, allocator, callBack, precisionScale); putChild(name, vector); if (callBack!=null) { callBack.doWork(); @@ -178,7 +180,7 @@ protected ValueVector add(String name, MinorType minorType, int... precisionScal * * Note that this method does not enforce any vector type check nor throws a schema change exception. */ - protected void putChild(String name, ValueVector vector) { + protected void putChild(String name, FieldVector vector) { putVector(name, vector); } @@ -187,7 +189,7 @@ protected void putChild(String name, ValueVector vector) { * @param name field name * @param vector vector to be inserted */ - protected void putVector(String name, ValueVector vector) { + protected void putVector(String name, FieldVector vector) { final ValueVector old = vectors.put( Preconditions.checkNotNull(name, "field name cannot be null").toLowerCase(), Preconditions.checkNotNull(vector, "vector cannot be null") @@ -201,9 +203,9 @@ protected void putVector(String name, ValueVector vector) { /** * Returns a sequence of underlying child vectors. */ - protected List getChildren() { + protected List getChildren() { int size = vectors.size(); - List children = new ArrayList<>(); + List children = new ArrayList<>(); for (int i = 0; i < size; i++) { children.add(vectors.getByOrdinal(i)); } @@ -228,7 +230,7 @@ public int size() { @Override public Iterator iterator() { - return vectors.values().iterator(); + return Collections.unmodifiableCollection(vectors.values()).iterator(); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index 42262741df9..517d20c77a9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -17,8 +17,6 @@ */ package org.apache.arrow.vector.complex; -import io.netty.buffer.ArrowBuf; - import java.util.Collections; import java.util.Iterator; @@ -26,29 +24,32 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.AddOrGetResult; import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.util.SchemaChangeRuntimeException; import com.google.common.base.Preconditions; import com.google.common.collect.ObjectArrays; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.util.SchemaChangeRuntimeException; + +import io.netty.buffer.ArrowBuf; public abstract class BaseRepeatedValueVector extends BaseValueVector implements RepeatedValueVector { - public final static ValueVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; + public final static FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; public final static String OFFSETS_VECTOR_NAME = "$offsets$"; public final static String DATA_VECTOR_NAME = "$data$"; protected final UInt4Vector offsets; - protected ValueVector vector; + protected FieldVector vector; protected BaseRepeatedValueVector(String name, BufferAllocator allocator) { this(name, allocator, DEFAULT_DATA_VECTOR); } - protected BaseRepeatedValueVector(String name, BufferAllocator allocator, ValueVector vector) { + protected BaseRepeatedValueVector(String name, BufferAllocator allocator, FieldVector vector) { super(name, allocator); this.offsets = new UInt4Vector(OFFSETS_VECTOR_NAME, allocator); this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null"); @@ -83,7 +84,7 @@ public UInt4Vector getOffsetVector() { } @Override - public ValueVector getDataVector() { + public FieldVector getDataVector() { return vector; } @@ -121,7 +122,7 @@ public int getBufferSizeFor(int valueCount) { @Override public Iterator iterator() { - return Collections.singleton(getDataVector()).iterator(); + return Collections.singleton(getDataVector()).iterator(); } @Override @@ -167,7 +168,7 @@ public AddOrGetResult addOrGetVector(MinorType minorT return new AddOrGetResult<>((T)vector, created); } - protected void replaceDataVector(ValueVector v) { + protected void replaceDataVector(FieldVector v) { vector.clear(); vector = v; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 904a7bc3416..8990d740149 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -19,8 +19,8 @@ package org.apache.arrow.vector.complex; import static java.util.Arrays.asList; +import static java.util.Collections.singletonList; -import java.util.Arrays; import java.util.List; import org.apache.arrow.memory.BufferAllocator; @@ -83,8 +83,7 @@ public void initializeChildrenFromFields(List children) { @Override public List getChildrenFromFields() { - // TODO: data vector should be that type - return Arrays.asList((FieldVector)getDataVector()); + return singletonList(getDataVector()); } @Override @@ -126,7 +125,7 @@ public void copyFrom(int inIndex, int outIndex, ListVector from) { } @Override - public ValueVector getDataVector() { + public FieldVector getDataVector() { return vector; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index c98fcd409e4..2fe59853b7c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -163,7 +163,7 @@ protected MapTransferPair(MapVector from, MapVector to, boolean allocate) { this.to.ephPair = null; int i = 0; - ValueVector vector; + FieldVector vector; for (String child:from.getChildFieldNames()) { int preSize = to.size(); vector = from.getChild(child); @@ -179,7 +179,7 @@ protected MapTransferPair(MapVector from, MapVector to, boolean allocate) { // (This is similar to what happens in ScanBatch where the children cannot be added till they are // read). To take care of this, we ensure that the hashCode of the MaterializedField does not // include the hashCode of the children but is based only on MaterializedField$key. - final ValueVector newVector = to.addOrGet(child, vector.getMinorType(), vector.getClass()); + final FieldVector newVector = to.addOrGet(child, vector.getMinorType(), vector.getClass()); if (allocate && to.size() != preSize) { newVector.allocateNew(); } @@ -319,8 +319,8 @@ public MinorType getMinorType() { @Override public void close() { - final Collection vectors = getChildren(); - for (final ValueVector v : vectors) { + final Collection vectors = getChildren(); + for (final FieldVector v : vectors) { v.close(); } vectors.clear(); @@ -340,8 +340,7 @@ public void initializeChildrenFromFields(List children) { @Override public List getChildrenFromFields() { - // TODO: children should be the right type - return (List)(List)getChildren(); + return getChildren(); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index 586b1283fe8..c282688530b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -17,6 +17,7 @@ */ package org.apache.arrow.vector.complex.impl; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.complex.AbstractMapVector; @@ -129,7 +130,7 @@ private FieldWriter promoteToUnion() { } else if (listVector != null) { unionVector = listVector.promoteToUnion(); } - unionVector.addVector(tp.getTo()); + unionVector.addVector((FieldVector)tp.getTo()); writer = new UnionWriter(unionVector); writer.setPosition(idx()); for (int i = 0; i < idx(); i++) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java index 7ac08475886..bbcd3e9f470 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java @@ -126,7 +126,6 @@ public ArrowRecordBatch readRecordBatch(ArrowBlock recordBatchBlock) throws IOEx RecordBatch recordBatchFB = RecordBatch.getRootAsRecordBatch(buffer.nioBuffer().asReadOnlyBuffer()); int nodesLength = recordBatchFB.nodesLength(); final ArrowBuf body = buffer.slice(recordBatchBlock.getMetadataLength(), (int)recordBatchBlock.getBodyLength()); - LOGGER.debug("sliced body " + body); List nodes = new ArrayList<>(); for (int i = 0; i < nodesLength; ++i) { FieldNode node = recordBatchFB.nodes(i); @@ -137,12 +136,12 @@ public ArrowRecordBatch readRecordBatch(ArrowBlock recordBatchBlock) throws IOEx Buffer bufferFB = recordBatchFB.buffers(i); LOGGER.debug(String.format("Buffer in RecordBatch at %d, length: %d", bufferFB.offset(), bufferFB.length())); ArrowBuf vectorBuffer = body.slice((int)bufferFB.offset(), (int)bufferFB.length()); - LOGGER.debug("sliced vectorBuffer " + vectorBuffer); - vectorBuffer.retain(); buffers.add(vectorBuffer); } + ArrowRecordBatch arrowRecordBatch = new ArrowRecordBatch(recordBatchFB.length(), nodes, buffers); + LOGGER.debug("released buffer " + buffer); buffer.release(); - return new ArrowRecordBatch(recordBatchFB.length(), nodes, buffers); + return arrowRecordBatch; } public void close() throws IOException { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java index b2a35711224..336979c11b1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java @@ -41,11 +41,16 @@ public class ArrowRecordBatch implements FBSerializable, AutoCloseable { private final List buffers; + private boolean closed = false; + public ArrowRecordBatch(int length, List nodes, List buffers) { super(); this.length = length; this.nodes = nodes; this.buffers = buffers; + for (ArrowBuf arrowBuf : buffers) { + arrowBuf.retain(); + } } public int getLength() { @@ -57,6 +62,9 @@ public List getNodes() { } public List getBuffers() { + if (closed) { + throw new IllegalStateException("already closed"); + } return buffers; } @@ -82,8 +90,11 @@ public int writeTo(FlatBufferBuilder builder) { } public void close() { - for (ArrowBuf arrowBuf : buffers) { - arrowBuf.release(); + if (!closed) { + closed = true; + for (ArrowBuf arrowBuf : buffers) { + arrowBuf.release(); + } } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java index 070b7748abf..e3d3e34e0ae 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.arrow.vector.schema; import org.apache.arrow.flatbuf.VectorType; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java index 29f9a19d660..9f1044fba4b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.arrow.vector.schema; import static java.util.Arrays.asList; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java index f2f18af626b..421ebcb8376 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.arrow.vector.schema; import static org.apache.arrow.vector.schema.ArrowVectorType.OFFSET; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 4b979ff1ca5..4d0d9ee114a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -24,6 +24,7 @@ import org.apache.arrow.flatbuf.Type; import org.apache.arrow.flatbuf.UnionMode; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.NullableBigIntVector; import org.apache.arrow.vector.NullableBitVector; import org.apache.arrow.vector.NullableDateVector; @@ -43,7 +44,6 @@ import org.apache.arrow.vector.NullableUInt8Vector; import org.apache.arrow.vector.NullableVarBinaryVector; import org.apache.arrow.vector.NullableVarCharVector; -import org.apache.arrow.vector.SmallIntVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.complex.ListVector; @@ -122,7 +122,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return ZeroVector.INSTANCE; } @@ -138,7 +138,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new MapVector(name, allocator, callBack); } @@ -155,7 +155,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableTinyIntVector(name, allocator); } @@ -171,8 +171,8 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { - return new SmallIntVector(name, allocator); + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + return new NullableSmallIntVector(name, allocator); } @Override @@ -187,7 +187,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableIntVector(name, allocator); } @@ -203,7 +203,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableBigIntVector(name, allocator); } @@ -219,7 +219,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableDateVector(name, allocator); } @@ -235,7 +235,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableTimeVector(name, allocator); } @@ -251,7 +251,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableTimeStampVector(name, allocator); } @@ -267,7 +267,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableIntervalDayVector(name, allocator); } @@ -283,7 +283,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableIntervalDayVector(name, allocator); } @@ -299,7 +299,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableFloat4Vector(name, allocator); } @@ -315,7 +315,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableFloat8Vector(name, allocator); } @@ -331,7 +331,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableBitVector(name, allocator); } @@ -347,7 +347,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableVarCharVector(name, allocator); } @@ -363,7 +363,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableVarBinaryVector(name, allocator); } @@ -383,7 +383,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableDecimalVector(name, allocator, precisionScale[0], precisionScale[1]); } @@ -399,7 +399,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableUInt1Vector(name, allocator); } @@ -415,7 +415,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableUInt2Vector(name, allocator); } @@ -431,7 +431,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableUInt4Vector(name, allocator); } @@ -447,7 +447,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new NullableUInt8Vector(name, allocator); } @@ -463,7 +463,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new ListVector(name, allocator, callBack); } @@ -479,7 +479,7 @@ public Field getField() { } @Override - public ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { + public FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale) { return new UnionVector(name, allocator, callBack); } @@ -501,7 +501,7 @@ public ArrowType getType() { public abstract Field getField(); - public abstract ValueVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale); + public abstract FieldVector getNewVector(String name, BufferAllocator allocator, CallBack callBack, int... precisionScale); public abstract FieldWriter getNewFieldWriter(ValueVector vector); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java new file mode 100644 index 00000000000..85bb2cfc99f --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.io.IOException; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; +import org.apache.arrow.vector.complex.impl.SingleMapReaderImpl; +import org.apache.arrow.vector.complex.reader.BaseReader.MapReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; +import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; +import org.apache.arrow.vector.complex.writer.BigIntWriter; +import org.apache.arrow.vector.complex.writer.IntWriter; +import org.apache.arrow.vector.schema.ArrowRecordBatch; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Test; + +public class TestVectorUnloadLoad { + + static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + + @Test + public void test() throws IOException { + int count = 10000; + Schema schema; + + try ( + BufferAllocator originalVectorsAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + MapVector parent = new MapVector("parent", originalVectorsAllocator, null)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter rootWriter = writer.rootAsMap(); + IntWriter intWriter = rootWriter.integer("int"); + BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); + for (int i = 0; i < count; i++) { + intWriter.setPosition(i); + intWriter.writeInt(i); + bigIntWriter.setPosition(i); + bigIntWriter.writeBigInt(i); + } + writer.setValueCount(count); + + VectorUnloader vectorUnloader = new VectorUnloader((MapVector)parent.getChild("root")); + schema = vectorUnloader.getSchema(); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); + MapVector newParent = new MapVector("parent", finalVectorsAllocator, null)) { + MapVector root = newParent.addOrGet("root", MinorType.MAP, MapVector.class); + VectorLoader vectorLoader = new VectorLoader(schema, root); + + vectorLoader.load(recordBatch); + + MapReader rootReader = new SingleMapReaderImpl(newParent).reader("root"); + for (int i = 0; i < count; i++) { + rootReader.setPosition(i); + Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue()); + Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); + } + } + } + } + + @AfterClass + public static void afterClass() { + allocator.close(); + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java index f86fe98e315..4b5a3aff51f 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java @@ -26,6 +26,8 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector.Accessor; import org.apache.arrow.vector.VectorLoader; import org.apache.arrow.vector.VectorUnloader; import org.apache.arrow.vector.complex.MapVector; @@ -37,8 +39,8 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BigIntWriter; import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.complex.writer.TimeStampWriter; import org.apache.arrow.vector.schema.ArrowRecordBatch; +import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Schema; import org.junit.After; import org.junit.Assert; @@ -48,6 +50,7 @@ import io.netty.buffer.ArrowBuf; public class TestArrowFile { + private static final int COUNT = 10; private BufferAllocator allocator; @Before @@ -63,7 +66,7 @@ public void tearDown() { @Test public void testWrite() throws IOException { File file = new File("target/mytest_write.arrow"); - int count = 10000; + int count = COUNT; try ( BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); MapVector parent = new MapVector("parent", vectorAllocator, null)) { @@ -75,11 +78,12 @@ public void testWrite() throws IOException { @Test public void testWriteComplex() throws IOException { File file = new File("target/mytest_write_complex.arrow"); - int count = 10000; + int count = COUNT; try ( BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); MapVector parent = new MapVector("parent", vectorAllocator, null)) { writeComplexData(count, parent); + validateComplexContent(count, parent); write((MapVector)parent.getChild("root"), file); } } @@ -97,7 +101,6 @@ private void writeComplexData(int count, MapVector parent) { BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); ListWriter listWriter = rootWriter.list("list"); MapWriter mapWriter = rootWriter.map("map"); - TimeStampWriter timeStampNested = mapWriter.timeStamp("timestamp"); for (int i = 0; i < count; i++) { intWriter.setPosition(i); intWriter.writeInt(i); @@ -111,7 +114,7 @@ private void writeComplexData(int count, MapVector parent) { listWriter.endList(); mapWriter.setPosition(i); mapWriter.start(); - mapWriter.timeStamp("timestamp").writeTimeStamp(123456789L); + mapWriter.timeStamp("timestamp").writeTimeStamp(i); mapWriter.end(); } writer.setValueCount(count); @@ -137,7 +140,7 @@ private void writeData(int count, MapVector parent) { @Test public void testWriteRead() throws IOException { File file = new File("target/mytest.arrow"); - int count = 10000; + int count = COUNT; // write try ( @@ -161,16 +164,25 @@ public void testWriteRead() throws IOException { // initialize vectors - ComplexWriter writer = new ComplexWriterImpl("root", parent); - MapWriter rootWriter = writer.rootAsMap(); - MapVector root = (MapVector)parent.getChild("root"); + MapVector root = parent.addOrGet("root", MinorType.MAP, MapVector.class); VectorLoader vectorLoader = new VectorLoader(schema, root); List recordBatches = footer.getRecordBatches(); + List buffers; for (ArrowBlock rbBlock : recordBatches) { try (ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock)) { vectorLoader.load(recordBatch); + buffers = recordBatch.getBuffers(); + for (ArrowBuf arrowBuf : buffers) { + System.out.println(arrowBuf + " " + arrowBuf.refCnt()); +// arrowBuf.release(); + } + } + System.out.println("after"); + for (ArrowBuf arrowBuf : buffers) { + System.out.println(arrowBuf + " " + arrowBuf.refCnt()); +// arrowBuf.release(); } validateContent(count, parent); } @@ -189,7 +201,7 @@ private void validateContent(int count, MapVector parent) { @Test public void testWriteReadComplex() throws IOException { File file = new File("target/mytest.arrow"); - int count = 10000; + int count = COUNT; // write try ( @@ -213,9 +225,7 @@ public void testWriteReadComplex() throws IOException { // initialize vectors - ComplexWriter writer = new ComplexWriterImpl("root", parent); - MapWriter rootWriter = writer.rootAsMap(); - MapVector root = (MapVector)parent.getChild("root"); + MapVector root = parent.addOrGet("root", MinorType.MAP, MapVector.class); VectorLoader vectorLoader = new VectorLoader(schema, root); @@ -229,14 +239,27 @@ public void testWriteReadComplex() throws IOException { } } + public void printVectors(List vectors) { + for (FieldVector vector : vectors) { + System.out.println(vector.getField().getName()); + Accessor accessor = vector.getAccessor(); + int valueCount = accessor.getValueCount(); + for (int i = 0; i < valueCount; i++) { + System.out.println(accessor.getObject(i)); + } + } + } + private void validateComplexContent(int count, MapVector parent) { + printVectors(parent.getChildrenFromFields()); + MapReader rootReader = new SingleMapReaderImpl(parent).reader("root"); for (int i = 0; i < count; i++) { rootReader.setPosition(i); Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue()); Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); Assert.assertEquals(i % 3, rootReader.reader("list").size()); - Assert.assertEquals(123456789L, rootReader.reader("map").reader("timestamp").readDateTime().getMillis()); + Assert.assertEquals(i, rootReader.reader("map").reader("timestamp").readDateTime().getMillis() % COUNT); } } @@ -246,9 +269,10 @@ private void write(MapVector parent, File file) throws FileNotFoundException, IO System.out.println("writing schema: " + schema); try ( FileOutputStream fileOutputStream = new FileOutputStream(file); - ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema) + ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema); + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); ) { - arrowWriter.writeRecordBatch(vectorUnloader.getRecordBatch()); + arrowWriter.writeRecordBatch(recordBatch); } } From 31e95e64cec4d8c04652e1d76efe402c573968d9 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Wed, 24 Aug 2016 14:22:33 -0700 Subject: [PATCH 17/21] fix list vector --- .../org/apache/arrow/vector/VectorLoader.java | 20 ++++++++++++++++++- .../arrow/vector/complex/ListVector.java | 6 +++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 672f35adc62..58ac68b8282 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -29,22 +29,37 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; +import com.google.common.collect.Iterators; + import io.netty.buffer.ArrowBuf; +/** + * Loads buffers into vectors + */ public class VectorLoader { private final List fieldVectors; private final List fields; + /** + * will create children in root based on schema + * @param schema the expected schema + * @param root the root to add vectors to based on schema + */ public VectorLoader(Schema schema, FieldVector root) { super(); this.fields = schema.getFields(); root.initializeChildrenFromFields(fields); this.fieldVectors = root.getChildrenFromFields(); if (this.fieldVectors.size() != fields.size()) { - throw new IllegalArgumentException(); //TODO + throw new IllegalArgumentException("The root vector did not create the right number of children. found " + fieldVectors.size() + " expected " + fields.size()); } } + /** + * Loads the record batch in the vectors + * will not close the record batch + * @param recordBatch + */ public void load(ArrowRecordBatch recordBatch) { Iterator buffers = recordBatch.getBuffers().iterator(); Iterator nodes = recordBatch.getNodes().iterator(); @@ -53,6 +68,9 @@ public void load(ArrowRecordBatch recordBatch) { FieldVector fieldVector = fieldVectors.get(i); loadBuffers(fieldVector, field, buffers, nodes); } + if (nodes.hasNext() || buffers.hasNext()) { + throw new IllegalArgumentException("not all nodes and buffers where consumed. nodes: " + Iterators.toString(nodes) + " buffers: " + Iterators.toString(buffers)); + } } private void loadBuffers(FieldVector vector, Field field, Iterator buffers, Iterator nodes) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 8990d740149..b4f99eb5cca 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -51,7 +51,7 @@ public class ListVector extends BaseRepeatedValueVector implements FieldVector { - UInt4Vector offsets;// TODO: THis masks the same vector in the parent + final UInt4Vector offsets;// TODO: THis masks the same vector in the parent which is assigned to this in the constructor. final UInt1Vector bits; private Mutator mutator = new Mutator(); private Accessor accessor = new Accessor(); @@ -62,7 +62,7 @@ public class ListVector extends BaseRepeatedValueVector implements FieldVector { public ListVector(String name, BufferAllocator allocator, CallBack callBack) { super(name, allocator); this.bits = new UInt1Vector("$bits$", allocator); - offsets = getOffsetVector(); + this.offsets = getOffsetVector(); this.writer = new UnionListWriter(this); this.reader = new UnionListReader(this); this.callBack = callBack; @@ -92,7 +92,7 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers throw new IllegalArgumentException("Lists have a validity and offset vector. Found: " + ownBuffers); } this.bits.load(ownBuffers.get(0)); - this.offsets.load(ownBuffers.get(0)); + this.offsets.load(ownBuffers.get(1)); } @Override From 8b8b823f0339a0ca97e8a68645171c5679cebaf8 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Wed, 24 Aug 2016 15:39:32 -0700 Subject: [PATCH 18/21] refactoring --- .../main/java/io/netty/buffer/ArrowBuf.java | 71 +++++++++++-------- .../templates/NullableValueVectors.java | 50 ++++++------- .../main/codegen/templates/UnionVector.java | 6 ++ .../arrow/vector/BaseDataValueVector.java | 29 +++++++- .../org/apache/arrow/vector/BufferBacked.java | 31 ++++++++ .../org/apache/arrow/vector/FieldVector.java | 25 ++++++- .../org/apache/arrow/vector/ZeroVector.java | 5 ++ .../arrow/vector/complex/ListVector.java | 22 +++--- .../arrow/vector/complex/MapVector.java | 29 +++++--- 9 files changed, 190 insertions(+), 78 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java diff --git a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java index bbec26aa85c..d10f00247e6 100644 --- a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java +++ b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java @@ -17,8 +17,6 @@ */ package io.netty.buffer; -import io.netty.util.internal.PlatformDependent; - import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -30,16 +28,18 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import org.apache.arrow.memory.AllocationManager.BufferLedger; import org.apache.arrow.memory.BaseAllocator; +import org.apache.arrow.memory.BaseAllocator.Verbosity; import org.apache.arrow.memory.BoundsChecking; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.BufferManager; -import org.apache.arrow.memory.AllocationManager.BufferLedger; -import org.apache.arrow.memory.BaseAllocator.Verbosity; import org.apache.arrow.memory.util.HistoricalLog; import com.google.common.base.Preconditions; +import io.netty.util.internal.PlatformDependent; + public final class ArrowBuf extends AbstractByteBuf implements AutoCloseable { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ArrowBuf.class); @@ -307,7 +307,7 @@ public ByteOrder order() { } @Override - public ByteBuf order(ByteOrder endianness) { + public ArrowBuf order(ByteOrder endianness) { return this; } @@ -344,7 +344,7 @@ public ByteBuf copy(int index, int length) { } @Override - public ByteBuf slice() { + public ArrowBuf slice() { return slice(readerIndex(), readableBytes()); } @@ -467,7 +467,7 @@ public boolean equals(Object obj) { } @Override - public ByteBuf retain(int increment) { + public ArrowBuf retain(int increment) { Preconditions.checkArgument(increment > 0, "retain(%d) argument is not positive", increment); if (isEmpty) { @@ -484,7 +484,7 @@ public ByteBuf retain(int increment) { } @Override - public ByteBuf retain() { + public ArrowBuf retain() { return retain(1); } @@ -535,49 +535,49 @@ public short getShort(int index) { } @Override - public ByteBuf setShort(int index, int value) { + public ArrowBuf setShort(int index, int value) { chk(index, 2); PlatformDependent.putShort(addr(index), (short) value); return this; } @Override - public ByteBuf setInt(int index, int value) { + public ArrowBuf setInt(int index, int value) { chk(index, 4); PlatformDependent.putInt(addr(index), value); return this; } @Override - public ByteBuf setLong(int index, long value) { + public ArrowBuf setLong(int index, long value) { chk(index, 8); PlatformDependent.putLong(addr(index), value); return this; } @Override - public ByteBuf setChar(int index, int value) { + public ArrowBuf setChar(int index, int value) { chk(index, 2); PlatformDependent.putShort(addr(index), (short) value); return this; } @Override - public ByteBuf setFloat(int index, float value) { + public ArrowBuf setFloat(int index, float value) { chk(index, 4); PlatformDependent.putInt(addr(index), Float.floatToRawIntBits(value)); return this; } @Override - public ByteBuf setDouble(int index, double value) { + public ArrowBuf setDouble(int index, double value) { chk(index, 8); PlatformDependent.putLong(addr(index), Double.doubleToRawLongBits(value)); return this; } @Override - public ByteBuf writeShort(int value) { + public ArrowBuf writeShort(int value) { ensure(2); PlatformDependent.putShort(addr(writerIndex), (short) value); writerIndex += 2; @@ -585,7 +585,7 @@ public ByteBuf writeShort(int value) { } @Override - public ByteBuf writeInt(int value) { + public ArrowBuf writeInt(int value) { ensure(4); PlatformDependent.putInt(addr(writerIndex), value); writerIndex += 4; @@ -593,7 +593,7 @@ public ByteBuf writeInt(int value) { } @Override - public ByteBuf writeLong(long value) { + public ArrowBuf writeLong(long value) { ensure(8); PlatformDependent.putLong(addr(writerIndex), value); writerIndex += 8; @@ -601,7 +601,7 @@ public ByteBuf writeLong(long value) { } @Override - public ByteBuf writeChar(int value) { + public ArrowBuf writeChar(int value) { ensure(2); PlatformDependent.putShort(addr(writerIndex), (short) value); writerIndex += 2; @@ -609,7 +609,7 @@ public ByteBuf writeChar(int value) { } @Override - public ByteBuf writeFloat(float value) { + public ArrowBuf writeFloat(float value) { ensure(4); PlatformDependent.putInt(addr(writerIndex), Float.floatToRawIntBits(value)); writerIndex += 4; @@ -617,7 +617,7 @@ public ByteBuf writeFloat(float value) { } @Override - public ByteBuf writeDouble(double value) { + public ArrowBuf writeDouble(double value) { ensure(8); PlatformDependent.putLong(addr(writerIndex), Double.doubleToRawLongBits(value)); writerIndex += 8; @@ -625,19 +625,19 @@ public ByteBuf writeDouble(double value) { } @Override - public ByteBuf getBytes(int index, byte[] dst, int dstIndex, int length) { + public ArrowBuf getBytes(int index, byte[] dst, int dstIndex, int length) { udle.getBytes(index + offset, dst, dstIndex, length); return this; } @Override - public ByteBuf getBytes(int index, ByteBuffer dst) { + public ArrowBuf getBytes(int index, ByteBuffer dst) { udle.getBytes(index + offset, dst); return this; } @Override - public ByteBuf setByte(int index, int value) { + public ArrowBuf setByte(int index, int value) { chk(index, 1); PlatformDependent.putByte(addr(index), (byte) value); return this; @@ -699,13 +699,13 @@ protected void _setLong(int index, long value) { } @Override - public ByteBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) { + public ArrowBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) { udle.getBytes(index + offset, dst, dstIndex, length); return this; } @Override - public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException { + public ArrowBuf getBytes(int index, OutputStream out, int length) throws IOException { udle.getBytes(index + offset, out, length); return this; } @@ -724,12 +724,12 @@ public int getBytes(int index, GatheringByteChannel out, int length) throws IOEx } @Override - public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) { + public ArrowBuf setBytes(int index, ByteBuf src, int srcIndex, int length) { udle.setBytes(index + offset, src, srcIndex, length); return this; } - public ByteBuf setBytes(int index, ByteBuffer src, int srcIndex, int length) { + public ArrowBuf setBytes(int index, ByteBuffer src, int srcIndex, int length) { if (src.isDirect()) { checkIndex(index, length); PlatformDependent.copyMemory(PlatformDependent.directBufferAddress(src) + srcIndex, this.memoryAddress() + index, @@ -749,13 +749,13 @@ public ByteBuf setBytes(int index, ByteBuffer src, int srcIndex, int length) { } @Override - public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) { + public ArrowBuf setBytes(int index, byte[] src, int srcIndex, int length) { udle.setBytes(index + offset, src, srcIndex, length); return this; } @Override - public ByteBuf setBytes(int index, ByteBuffer src) { + public ArrowBuf setBytes(int index, ByteBuffer src) { udle.setBytes(index + offset, src); return this; } @@ -860,4 +860,17 @@ public void print(StringBuilder sb, int indent, Verbosity verbosity) { } } + @Override + public ArrowBuf readerIndex(int readerIndex) { + super.readerIndex(readerIndex); + return this; + } + + @Override + public ArrowBuf writerIndex(int writerIndex) { + super.writerIndex(writerIndex); + return this; + } + + } diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index b75aed98928..6b1aa040a5b 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -57,6 +57,8 @@ public final class ${className} extends BaseDataValueVector implements <#if type private final Mutator mutator; private final Accessor accessor; + private final List innerVectors; + <#if minor.class == "Decimal"> private final int precision; private final int scale; @@ -69,6 +71,10 @@ public final class ${className} extends BaseDataValueVector implements <#if type mutator = new Mutator(); accessor = new Accessor(); field = new Field(name, true, new Decimal(precision, scale), null); + innerVectors = Collections.unmodifiableList(Arrays.asList( + bits, + values + )); } <#else> public ${className}(String name, BufferAllocator allocator) { @@ -107,51 +113,41 @@ public final class ${className} extends BaseDataValueVector implements <#if type <#elseif minor.class == "Bit"> field = new Field(name, true, new Bool(), null); + innerVectors = Collections.unmodifiableList(Arrays.asList( + bits, + <#if type.major = "VarLen"> + values.offsetVector, + + values + )); } - /** - * Initializes the child vectors - * to be later loaded with loadBuffers - * @param children - */ + @Override + public List getFieldInnerVectors() { + return innerVectors; + } + + @Override public void initializeChildrenFromFields(List children) { if (!children.isEmpty()) { throw new IllegalArgumentException("primitive type vector ${className} can not have children: " + children); } } + @Override public List getChildrenFromFields() { return Collections.emptyList(); } + @Override public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - int expectedSize = <#if type.major = "VarLen">3<#else>2; - if (ownBuffers.size() != expectedSize) { - throw new IllegalArgumentException("Illegal buffer count, expected " + expectedSize + ", got: " + ownBuffers.size()); - } - bits.load(ownBuffers.get(0)); - <#if type.major = "VarLen"> - values.offsetVector.load(ownBuffers.get(1)); - values.load(ownBuffers.get(2)); - <#else> - values.load(ownBuffers.get(1)); - + org.apache.arrow.vector.BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers); // TODO: do something with the sizes in fieldNode? } public List getFieldBuffers() { - bits.getBuffer().readerIndex(0); - <#if type.major = "VarLen"> - values.offsetVector.getBuffer().readerIndex(0); - - values.getBuffer().readerIndex(0); - return Arrays.asList( - bits.getBuffer(), - <#if type.major = "VarLen"> - values.offsetVector.getBuffer(), - - values.getBuffer()); + return org.apache.arrow.vector.BaseDataValueVector.unload(getFieldInnerVectors()); } @Override diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index 4eac3b57459..72125fa50fb 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -121,6 +121,12 @@ public List getFieldBuffers() { throw new UnsupportedOperationException(); } + @Override + public List getFieldInnerVectors() { + // TODO + throw new UnsupportedOperationException(); + } + public MapVector getMap() { if (mapVector == null) { int vectorCount = internalMap.size(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java index 782fd75c04a..5b21ceadc6f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java @@ -17,15 +17,36 @@ */ package org.apache.arrow.vector; +import java.util.ArrayList; +import java.util.List; + import org.apache.arrow.memory.BufferAllocator; import io.netty.buffer.ArrowBuf; -public abstract class BaseDataValueVector extends BaseValueVector { +public abstract class BaseDataValueVector extends BaseValueVector implements BufferBacked { protected final static byte[] emptyByteArray = new byte[]{}; // Nullable vectors use this + public static void load(List vectors, List buffers) { + int expectedSize = vectors.size(); + if (buffers.size() != expectedSize) { + throw new IllegalArgumentException("Illegal buffer count, expected " + expectedSize + ", got: " + buffers.size()); + } + for (int i = 0; i < expectedSize; i++) { + vectors.get(i).load(buffers.get(i)); + } + } + + public static List unload(List vectors) { + List result = new ArrayList<>(vectors.size()); + for (BufferBacked vector : vectors) { + result.add(vector.unLoad()); + } + return result; + } + // TODO: Nullable vectors extend BaseDataValueVector but do not use the data field // We should fix the inheritance tree protected ArrowBuf data; @@ -84,10 +105,16 @@ public ArrowBuf getBuffer() { return data; } + @Override public void load(ArrowBuf data) { this.data = data.retain(allocator); } + @Override + public ArrowBuf unLoad() { + return this.data.readerIndex(0); + } + /** * This method has a similar effect of allocateNew() without actually clearing and reallocating * the value vector. The purpose is to move the value vector to a "mutate" state diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java b/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java new file mode 100644 index 00000000000..d1c262d2265 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import io.netty.buffer.ArrowBuf; + +/** + * Content is backed by a buffer and can be loaded/unloaded + */ +public interface BufferBacked { + + void load(ArrowBuf data); + + ArrowBuf unLoad(); + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java index c656b8d9bbb..b28433cfd0d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java @@ -24,23 +24,42 @@ import io.netty.buffer.ArrowBuf; +/** + * A vector corresponding to a Field in the schema + * It has inner vectors backed by buffers (validity, offsets, data, ...) + */ public interface FieldVector extends ValueVector { /** * Initializes the child vectors * to be later loaded with loadBuffers - * @param children + * @param children the schema */ void initializeChildrenFromFields(List children); + /** + * the returned list is the same size as the list passed to initializeChildrenFromFields + * @return the children according to schema (empty for primitive types) + */ List getChildrenFromFields(); + /** + * loads data in the vectors + * (ownBuffers must be the same size as getFieldVectors()) + * @param fieldNode the fieldNode + * @param ownBuffers the buffers for this Field (own buffers only, children not included) + */ void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers); /** - * Returns the own buffers for this vector - * @return the + * (same size as getFieldVectors() since it is their content) + * @return the buffers containing the data for this vector (ready for reading) */ List getFieldBuffers(); + /** + * @return the inner vectors for this field as defined by the TypeLayout + */ + List getFieldInnerVectors(); + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java index 1c874b4f27a..c2482adefec 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java @@ -197,4 +197,9 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers public List getFieldBuffers() { return Collections.emptyList(); } + + @Override + public List getFieldInnerVectors() { + return Collections.emptyList(); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index b4f99eb5cca..1aedba95567 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -18,14 +18,17 @@ ******************************************************************************/ package org.apache.arrow.vector.complex; -import static java.util.Arrays.asList; import static java.util.Collections.singletonList; +import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BaseDataValueVector; +import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.UInt4Vector; @@ -53,6 +56,7 @@ public class ListVector extends BaseRepeatedValueVector implements FieldVector { final UInt4Vector offsets;// TODO: THis masks the same vector in the parent which is assigned to this in the constructor. final UInt1Vector bits; + private final List innerVectors; private Mutator mutator = new Mutator(); private Accessor accessor = new Accessor(); private UnionListWriter writer; @@ -63,6 +67,7 @@ public ListVector(String name, BufferAllocator allocator, CallBack callBack) { super(name, allocator); this.bits = new UInt1Vector("$bits$", allocator); this.offsets = getOffsetVector(); + this.innerVectors = Collections.unmodifiableList(Arrays.asList(bits, offsets)); this.writer = new UnionListWriter(this); this.reader = new UnionListReader(this); this.callBack = callBack; @@ -88,18 +93,17 @@ public List getChildrenFromFields() { @Override public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 2) { - throw new IllegalArgumentException("Lists have a validity and offset vector. Found: " + ownBuffers); - } - this.bits.load(ownBuffers.get(0)); - this.offsets.load(ownBuffers.get(1)); + BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers); } @Override public List getFieldBuffers() { - bits.getBuffer().readerIndex(0); - offsets.getBuffer().readerIndex(0); - return asList(bits.getBuffer(), offsets.getBuffer()); + return BaseDataValueVector.unload(getFieldInnerVectors()); + } + + @Override + public List getFieldInnerVectors() { + return innerVectors; } public UnionListWriter getWriter() { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 2fe59853b7c..0c013312724 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -17,10 +17,10 @@ */ package org.apache.arrow.vector.complex; -import static java.util.Arrays.asList; - import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -28,8 +28,11 @@ import javax.annotation.Nullable; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.BaseDataValueVector; import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.impl.SingleMapReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -57,6 +60,10 @@ public class MapVector extends AbstractMapVector implements FieldVector { private final Mutator mutator = new Mutator(); int valueCount; + // TODO: validity vector + private final UInt1Vector bits = new UInt1Vector("$bits$", allocator); + private final List innerVectors = Collections.unmodifiableList(Arrays.asList(bits)); + public MapVector(String name, BufferAllocator allocator, CallBack callBack){ super(name, allocator, callBack); } @@ -300,6 +307,7 @@ public void clear() { for (final ValueVector v : getChildren()) { v.clear(); } + bits.clear(); valueCount = 0; } @@ -324,6 +332,8 @@ public void close() { v.close(); } vectors.clear(); + bits.close(); + valueCount = 0; super.close(); @@ -345,17 +355,18 @@ public List getChildrenFromFields() { @Override public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 1) { - throw new IllegalArgumentException("Tuples have a validity. Found: " + ownBuffers); - } -// this.bits.load(ownBuffers.get(0)); - // TODO: add validity vector to make maps nullable + BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers); + // TODO: something with fieldNode? } @Override public List getFieldBuffers() { - // TODO: add validity vector to make maps nullable - return asList(allocator.getEmpty()); + return BaseDataValueVector.unload(getFieldInnerVectors()); + } + + @Override + public List getFieldInnerVectors() { + return innerVectors; } } From e8359b36391873466992a88af13f7d3c2e904f95 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Wed, 24 Aug 2016 16:40:40 -0700 Subject: [PATCH 19/21] align on 8 byte boundaries; more tests --- .../arrow/vector/BaseDataValueVector.java | 1 + .../apache/arrow/vector/file/ArrowWriter.java | 30 +++++++- .../arrow/vector/schema/ArrowRecordBatch.java | 44 ++++++++--- .../arrow/vector/file/TestArrowFile.java | 77 +++++++++++++++---- 4 files changed, 129 insertions(+), 23 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java index 5b21ceadc6f..c22258d4265 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java @@ -107,6 +107,7 @@ public ArrowBuf getBuffer() { @Override public void load(ArrowBuf data) { + this.data.release(); this.data = data.retain(allocator); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java index a82e4cb1d0e..9881a229c23 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java @@ -24,6 +24,7 @@ import java.util.Collections; import java.util.List; +import org.apache.arrow.vector.schema.ArrowBuffer; import org.apache.arrow.vector.schema.ArrowRecordBatch; import org.apache.arrow.vector.schema.FBSerializable; import org.apache.arrow.vector.types.pojo.Schema; @@ -62,6 +63,17 @@ private long write(byte[] buffer) throws IOException { return write(ByteBuffer.wrap(buffer)); } + private long writeZeros(int zeroCount) throws IOException { + return write(new byte[zeroCount]); + } + + private long align() throws IOException { + if (currentPosition % 8 != 0) { // align on 8 byte boundaries + return writeZeros(8 - (int)(currentPosition % 8)); + } + return 0; + } + private long write(ByteBuffer buffer) throws IOException { long length = buffer.remaining(); out.write(buffer); @@ -86,13 +98,29 @@ private long writeIntLittleEndian(int v) throws IOException { public void writeRecordBatch(ArrowRecordBatch recordBatch) throws IOException { checkStarted(); + align(); // write metadata header long offset = currentPosition; write(recordBatch); + align(); // write body long bodyOffset = currentPosition; - for (ArrowBuf buffer : recordBatch.getBuffers()) { + List buffers = recordBatch.getBuffers(); + List buffersLayout = recordBatch.getBuffersLayout(); + if (buffers.size() != buffersLayout.size()) { + throw new IllegalStateException("the layout does not match: " + buffers.size() + " != " + buffersLayout.size()); + } + for (int i = 0; i < buffers.size(); i++) { + ArrowBuf buffer = buffers.get(i); + ArrowBuffer layout = buffersLayout.get(i); + long startPosition = bodyOffset + layout.getOffset(); + if (startPosition != currentPosition) { + writeZeros((int)(startPosition - currentPosition)); + } write(buffer); + if (currentPosition != startPosition + layout.getSize()) { + throw new IllegalStateException("wrong buffer size: " + currentPosition + " != " + startPosition + layout.getSize()); + } } int metadataLength = (int)(bodyOffset - offset); if (metadataLength <= 0) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java index 336979c11b1..9162efd29f8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowRecordBatch.java @@ -20,6 +20,7 @@ import static org.apache.arrow.vector.schema.FBSerializables.writeAllStructsToVector; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.apache.arrow.flatbuf.RecordBatch; @@ -41,26 +42,49 @@ public class ArrowRecordBatch implements FBSerializable, AutoCloseable { private final List buffers; + private final List buffersLayout; + private boolean closed = false; + /** + * @param length how many rows in this batch + * @param nodes field level info + * @param buffers will be retained until this recordBatch is closed + */ public ArrowRecordBatch(int length, List nodes, List buffers) { super(); this.length = length; this.nodes = nodes; this.buffers = buffers; + List arrowBuffers = new ArrayList<>(); + long offset = 0; for (ArrowBuf arrowBuf : buffers) { arrowBuf.retain(); + long size = arrowBuf.readableBytes(); + arrowBuffers.add(new ArrowBuffer(0, offset, size)); + LOGGER.debug(String.format("Buffer in RecordBatch at %d, length: %d", offset, size)); + offset += size; + if (offset % 8 != 0) { // align on 8 byte boundaries + offset += 8 - (offset % 8); + } } + this.buffersLayout = Collections.unmodifiableList(arrowBuffers); } public int getLength() { return length; } + /** + * @return the FieldNodes corresponding to the schema + */ public List getNodes() { return nodes; } + /** + * @return the buffers containing the data + */ public List getBuffers() { if (closed) { throw new IllegalStateException("already closed"); @@ -68,20 +92,19 @@ public List getBuffers() { return buffers; } + /** + * @return the serialized layout if we send the buffers on the wire + */ + public List getBuffersLayout() { + return buffersLayout; + } + @Override public int writeTo(FlatBufferBuilder builder) { RecordBatch.startNodesVector(builder, nodes.size()); int nodesOffset = writeAllStructsToVector(builder, nodes); - List arrowBuffers = new ArrayList<>(); - long offset = 0; - for (ArrowBuf buffer : buffers) {; - long size = buffer.readableBytes(); - arrowBuffers.add(new ArrowBuffer(0, offset, size)); - LOGGER.debug(String.format("Buffer in RecordBatch at %d, length: %d", offset, size)); - offset += size; - } RecordBatch.startBuffersVector(builder, buffers.size()); - int buffersOffset = writeAllStructsToVector(builder, arrowBuffers); + int buffersOffset = writeAllStructsToVector(builder, buffersLayout); RecordBatch.startRecordBatch(builder); RecordBatch.addLength(builder, length); RecordBatch.addNodes(builder, nodesOffset); @@ -89,6 +112,9 @@ public int writeTo(FlatBufferBuilder builder) { return RecordBatch.endRecordBatch(builder); } + /** + * releases the buffers + */ public void close() { if (!closed) { closed = true; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java index 4b5a3aff51f..11de0a2ef00 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java @@ -39,6 +39,7 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BigIntWriter; import org.apache.arrow.vector.complex.writer.IntWriter; +import org.apache.arrow.vector.schema.ArrowBuffer; import org.apache.arrow.vector.schema.ArrowRecordBatch; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Schema; @@ -136,7 +137,6 @@ private void writeData(int count, MapVector parent) { writer.setValueCount(count); } - @Test public void testWriteRead() throws IOException { File file = new File("target/mytest.arrow"); @@ -169,21 +169,17 @@ public void testWriteRead() throws IOException { VectorLoader vectorLoader = new VectorLoader(schema, root); List recordBatches = footer.getRecordBatches(); - List buffers; for (ArrowBlock rbBlock : recordBatches) { + Assert.assertEquals(0, rbBlock.getOffset() % 8); + Assert.assertEquals(0, rbBlock.getMetadataLength() % 8); try (ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock)) { - vectorLoader.load(recordBatch); - buffers = recordBatch.getBuffers(); - for (ArrowBuf arrowBuf : buffers) { - System.out.println(arrowBuf + " " + arrowBuf.refCnt()); -// arrowBuf.release(); + List buffersLayout = recordBatch.getBuffersLayout(); + for (ArrowBuffer arrowBuffer : buffersLayout) { + Assert.assertEquals(0, arrowBuffer.getOffset() % 8); } + vectorLoader.load(recordBatch); } - System.out.println("after"); - for (ArrowBuf arrowBuf : buffers) { - System.out.println(arrowBuf + " " + arrowBuf.refCnt()); -// arrowBuf.release(); - } + validateContent(count, parent); } } @@ -200,7 +196,7 @@ private void validateContent(int count, MapVector parent) { @Test public void testWriteReadComplex() throws IOException { - File file = new File("target/mytest.arrow"); + File file = new File("target/mytest_complex.arrow"); int count = COUNT; // write @@ -276,5 +272,60 @@ private void write(MapVector parent, File file) throws FileNotFoundException, IO } } + @Test + public void testWriteReadMultipleRBs() throws IOException { + File file = new File("target/mytest_multiple.arrow"); + int count = COUNT; + + // write + try ( + BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + MapVector parent = new MapVector("parent", originalVectorAllocator, null); + FileOutputStream fileOutputStream = new FileOutputStream(file);) { + writeData(count, parent); + VectorUnloader vectorUnloader = new VectorUnloader(parent.getChild("root")); + Schema schema = vectorUnloader.getSchema(); + Assert.assertEquals(2, schema.getFields().size()); + try (ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema);) { + try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch()) { + arrowWriter.writeRecordBatch(recordBatch); + } + parent.allocateNew(); + writeData(count, parent); + try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch()) { + arrowWriter.writeRecordBatch(recordBatch); + } + } + } + + // read + try ( + BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); + FileInputStream fileInputStream = new FileInputStream(file); + ArrowReader arrowReader = new ArrowReader(fileInputStream.getChannel(), readerAllocator); + BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); + MapVector parent = new MapVector("parent", vectorAllocator, null); + ) { + ArrowFooter footer = arrowReader.readFooter(); + Schema schema = footer.getSchema(); + System.out.println("reading schema: " + schema); + MapVector root = parent.addOrGet("root", MinorType.MAP, MapVector.class); + VectorLoader vectorLoader = new VectorLoader(schema, root); + List recordBatches = footer.getRecordBatches(); + Assert.assertEquals(2, recordBatches.size()); + for (ArrowBlock rbBlock : recordBatches) { + Assert.assertEquals(0, rbBlock.getOffset() % 8); + Assert.assertEquals(0, rbBlock.getMetadataLength() % 8); + try (ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock)) { + List buffersLayout = recordBatch.getBuffersLayout(); + for (ArrowBuffer arrowBuffer : buffersLayout) { + Assert.assertEquals(0, arrowBuffer.getOffset() % 8); + } + vectorLoader.load(recordBatch); + validateContent(count, parent); + } + } + } + } } From 04d797f8d1160824221538cab170567da9c47fc9 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Wed, 24 Aug 2016 23:51:59 -0700 Subject: [PATCH 20/21] maps are not nullable yet --- .../java/org/apache/arrow/vector/complex/MapVector.java | 6 +----- .../java/org/apache/arrow/vector/schema/TypeLayout.java | 3 ++- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 0c013312724..e3696588e60 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -32,7 +32,6 @@ import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.impl.SingleMapReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -61,8 +60,7 @@ public class MapVector extends AbstractMapVector implements FieldVector { int valueCount; // TODO: validity vector - private final UInt1Vector bits = new UInt1Vector("$bits$", allocator); - private final List innerVectors = Collections.unmodifiableList(Arrays.asList(bits)); + private final List innerVectors = Collections.unmodifiableList(Arrays.asList()); public MapVector(String name, BufferAllocator allocator, CallBack callBack){ super(name, allocator, callBack); @@ -307,7 +305,6 @@ public void clear() { for (final ValueVector v : getChildren()) { v.clear(); } - bits.clear(); valueCount = 0; } @@ -332,7 +329,6 @@ public void close() { v.close(); } vectors.clear(); - bits.close(); valueCount = 0; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java index 9f1044fba4b..1275e0eb5dc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java @@ -88,7 +88,8 @@ public static TypeLayout getTypeLayout(final ArrowType arrowType) { @Override public TypeLayout visit(Tuple type) { List vectors = asList( - validityVector() + // TODO: add validity vector in Map +// validityVector() ); return new TypeLayout(vectors); } From 252de6dfb9c0672d113a0511f37c95f339359a75 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Thu, 25 Aug 2016 16:46:10 -0700 Subject: [PATCH 21/21] remove outdated comment --- .../main/java/org/apache/arrow/vector/complex/ListVector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 1aedba95567..2984c362514 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -54,7 +54,7 @@ public class ListVector extends BaseRepeatedValueVector implements FieldVector { - final UInt4Vector offsets;// TODO: THis masks the same vector in the parent which is assigned to this in the constructor. + final UInt4Vector offsets; final UInt1Vector bits; private final List innerVectors; private Mutator mutator = new Mutator();