From dc92b839d8fcc09c19f3ef5f47f7720f9a6df5b6 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 26 Feb 2018 06:31:41 -0800 Subject: [PATCH 01/51] fix typo --- js/src/vector.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/src/vector.ts b/js/src/vector.ts index 6c2bbbb86a7..67a4f99be73 100644 --- a/js/src/vector.ts +++ b/js/src/vector.ts @@ -403,7 +403,7 @@ export class DictionaryVector extends Vector[]; From ef1acc769e217b933488cf20bead1a1947681196 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 26 Feb 2018 06:31:53 -0800 Subject: [PATCH 02/51] read union buffers in the correct order --- js/src/ipc/reader/vector.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/src/ipc/reader/vector.ts b/js/src/ipc/reader/vector.ts index b8c4871ebac..c4688f5e2b8 100644 --- a/js/src/ipc/reader/vector.ts +++ b/js/src/ipc/reader/vector.ts @@ -126,6 +126,6 @@ export abstract class TypeDataLoader extends TypeVisitor { protected visitUnionType(type: DenseUnion | SparseUnion, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) { return type.mode === UnionMode.Sparse ? new SparseUnionData(type as SparseUnion, length, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.visitFields(type.children), 0, nullCount) : - new DenseUnionData(type as DenseUnion, length, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.readTypeIds(type), this.visitFields(type.children), 0, nullCount); + new DenseUnionData(type as DenseUnion, length, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.readOffsets(type), this.visitFields(type.children), 0, nullCount); } } From 6522cb006af2a5cba5606507c398b3ba2268f2ec Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 26 Feb 2018 06:32:10 -0800 Subject: [PATCH 03/51] fix Data generics for FixedSizeList --- js/src/data.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/src/data.ts b/js/src/data.ts index 3bfb3209b69..963a6a476ba 100644 --- a/js/src/data.ts +++ b/js/src/data.ts @@ -17,8 +17,8 @@ import { popcnt_bit_range } from './util/bit'; import { VectorLike, Vector } from './vector'; +import { Int, Bool, FlatListType, List, Struct, Map_ } from './type'; import { VectorType, TypedArray, TypedArrayConstructor, Dictionary } from './type'; -import { Int, Bool, FlatListType, List, FixedSizeList, Struct, Map_ } from './type'; import { DataType, FlatType, ListType, NestedType, SingleNestedType, DenseUnion, SparseUnion } from './type'; export function toTypedArray(ArrayType: TypedArrayConstructor, values?: T | ArrayLike | Iterable | null): T { @@ -46,7 +46,7 @@ export interface DataTypes { /* [Type.Struct]*/ 13: NestedData; /* [Type.Union]*/ 14: UnionData; /* [Type.FixedSizeBinary]*/ 15: FlatData; -/* [Type.FixedSizeList]*/ 16: SingleNestedData>; +/* [Type.FixedSizeList]*/ 16: SingleNestedData; /* [Type.Map]*/ 17: NestedData; /* [Type.DenseUnion]*/ DenseUnion: DenseUnionData; /*[Type.SparseUnion]*/ SparseUnion: SparseUnionData; From 43c671f7f201ca383e7d200a203475d35fbfd7dc Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 26 Feb 2018 06:32:49 -0800 Subject: [PATCH 04/51] [WIP] add Binary writer --- js/src/ipc/writer/binary.ts | 565 ++++++++++++++++++++++++++++++++++++ 1 file changed, 565 insertions(+) create mode 100644 js/src/ipc/writer/binary.ts diff --git a/js/src/ipc/writer/binary.ts b/js/src/ipc/writer/binary.ts new file mode 100644 index 00000000000..0b1cb26a361 --- /dev/null +++ b/js/src/ipc/writer/binary.ts @@ -0,0 +1,565 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { DenseUnionData } from '../../data'; +import { RecordBatch } from '../../recordbatch'; +import { VectorVisitor, TypeVisitor } from '../../visitor'; +import { align, getBool, packBools, iterateBits } from '../../util/bit'; +import { Vector, UnionVector, DictionaryVector, NestedVector } from '../../vector'; +import { BufferMetadata, FieldMetadata, Footer, FileBlock, Message, RecordBatchMetadata, DictionaryBatch } from '../metadata'; +import { + Schema, Field, TypedArray, MetadataVersion, + Dictionary, + Null, Int, Float, + Binary, Bool, Utf8, Decimal, + Date_, Time, Timestamp, Interval, + List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, + UnionMode, SparseUnion, DenseUnion, FlatListType, DataType, FlatType, NestedType, +} from '../../type'; + +export class RecordBatchSerializer extends VectorVisitor { + protected byteLength = 0; + // @ts-ignore + protected buffers: TypedArray[]; + // @ts-ignore + protected fieldNodes: FieldMetadata[]; + // @ts-ignore + protected buffersMeta: BufferMetadata[]; + public writeRecordBatch(recordBatch: RecordBatch) { + this.byteLength = 0; + this.buffers = []; + this.fieldNodes = []; + this.buffersMeta = []; + for (let vector: Vector, index = -1, numCols = recordBatch.numCols; ++index < numCols;) { + if (vector = recordBatch.getChildAt(index)!) { + this.visit(vector); + } + } + const b = new Builder(); + _Message.finishMessageBuffer( + b, writeMessage(b, new RecordBatchMetadata( + MetadataVersion.V4, this.byteLength, this.fieldNodes, this.buffersMeta + )) + ); + const metadataBytes = b.asUint8Array(); + // 4 bytes for the metadata length + 4 bytes of padding + the length of the metadata buffer + const metadataBytesOffset = 8 + metadataBytes.byteLength; + // + the length of all the vector buffers + const recordBatchBytes = new Uint8Array(metadataBytesOffset + this.byteLength); + // Write the metadata length as the first 4 bytes + new DataView(recordBatchBytes.buffer).setInt32(0, metadataBytes.byteLength); + // Now write the buffers + const { buffers, buffersMeta } = this; + for (let bufferIndex = -1, buffersLen = buffers.length; ++bufferIndex < buffersLen;) { + const { buffer, byteLength } = buffers[bufferIndex]; + const { offset: byteOffset } = buffersMeta[bufferIndex]; + recordBatchBytes.set( + new Uint8Array(buffer, 0, byteLength), + metadataBytesOffset + byteOffset + ); + } + return recordBatchBytes; + } + public visit(vector: Vector) { + const { data, length, nullCount } = vector; + if (length > 2147483647) { + throw new RangeError('Cannot write arrays larger than 2^31 - 1 in length'); + } + this.fieldNodes.push(new FieldMetadata(length, nullCount)); + this.addBuffer(nullCount <= 0 + ? new Uint8Array(0) // placeholder validity buffer + : this.getTruncatedBitmap(data.offset, length, data.nullBitmap!), 64); + return super.visit(vector); + } + public visitNull (_vector: Vector) { return this.addBuffer(null); } + public visitBool (vector: Vector) { return this.visitFlatVector(vector); } + public visitInt (vector: Vector) { return this.visitFlatVector(vector); } + public visitFloat (vector: Vector) { return this.visitFlatVector(vector); } + public visitUtf8 (vector: Vector) { return this.visitFlatListVector(vector); } + public visitBinary (vector: Vector) { return this.visitFlatListVector(vector); } + public visitFixedSizeBinary(vector: Vector) { return this.visitFlatVector(vector); } + public visitDate (vector: Vector) { return this.visitFlatVector(vector); } + public visitTimestamp (vector: Vector) { return this.visitFlatVector(vector); } + public visitTime (vector: Vector