From 953232c043e3b13cc33ec9bfe6b18683c218849f Mon Sep 17 00:00:00 2001 From: ptaylor Date: Sun, 27 May 2018 12:58:40 -0700 Subject: [PATCH 1/2] write schema metadata --- js/src/ipc/writer/binary.ts | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/js/src/ipc/writer/binary.ts b/js/src/ipc/writer/binary.ts index d8b1d7eb237..166d4b89c8e 100644 --- a/js/src/ipc/writer/binary.ts +++ b/js/src/ipc/writer/binary.ts @@ -588,14 +588,34 @@ function writeMessage(b: Builder, node: Message) { } function writeSchema(b: Builder, node: Schema) { + const fieldOffsets = node.fields.map((f) => writeField(b, f)); const fieldsOffset = _Schema.startFieldsVector(b, fieldOffsets.length) || _Schema.createFieldsVector(b, fieldOffsets); + + let metadata: number | undefined = undefined; + if (node.metadata && node.metadata.size > 0) { + metadata = _Schema.createCustomMetadataVector( + b, + [...node.metadata].map(([k, v]) => { + const key = b.createString(`${k}`); + const val = b.createString(`${v}`); + return ( + _KeyValue.startKeyValue(b) || + _KeyValue.addKey(b, key) || + _KeyValue.addValue(b, val) || + _KeyValue.endKeyValue(b) + ); + }) + ); + } + return ( _Schema.startSchema(b) || _Schema.addFields(b, fieldsOffset) || _Schema.addEndianness(b, platformIsLittleEndian ? _Endianness.Little : _Endianness.Big) || + (metadata !== undefined && _Schema.addCustomMetadata(b, metadata)) || _Schema.endSchema(b) ); } @@ -662,8 +682,8 @@ function writeField(b: Builder, node: Field) { metadata = _Field.createCustomMetadataVector( b, [...node.metadata].map(([k, v]) => { - const key = b.createString(k); - const val = b.createString(v); + const key = b.createString(`${k}`); + const val = b.createString(`${v}`); return ( _KeyValue.startKeyValue(b) || _KeyValue.addKey(b, key) || From 9b199c399fce9d45c878d722181633fd495b5943 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 31 May 2018 01:02:03 -0700 Subject: [PATCH 2/2] finish implementing unions in the JS reader --- js/src/data.ts | 4 ++++ js/src/ipc/reader/json.ts | 37 ++++++++++++++++++++----------------- js/src/type.ts | 6 ++++-- js/src/vector/nested.ts | 28 ++++++++++++++++------------ 4 files changed, 44 insertions(+), 31 deletions(-) diff --git a/js/src/data.ts b/js/src/data.ts index 963a6a476ba..2c791513103 100644 --- a/js/src/data.ts +++ b/js/src/data.ts @@ -216,9 +216,13 @@ export class ListData extends SingleNestedData { export class UnionData extends NestedData { public /* [VectorType.TYPE]:*/ 3: T['TArray']; public get typeIds() { return this[VectorType.TYPE]; } + public readonly typeIdToChildIndex: { [key: number]: number }; constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, typeIds: Iterable, childData: Data[], offset?: number, nullCount?: number) { super(type, length, nullBitmap, childData, offset, nullCount); this[VectorType.TYPE] = toTypedArray(Int8Array, typeIds); + this.typeIdToChildIndex = type.typeIds.reduce((typeIdToChildIndex, typeId, idx) => { + return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex; + }, Object.create(null) as { [key: number]: number }); } public clone(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) { return new UnionData(type, length, this[VectorType.VALIDITY], this[VectorType.TYPE], this.childData, offset, nullCount); diff --git a/js/src/ipc/reader/json.ts b/js/src/ipc/reader/json.ts index 67bb5ca2d65..e8ab498f9aa 100644 --- a/js/src/ipc/reader/json.ts +++ b/js/src/ipc/reader/json.ts @@ -76,6 +76,7 @@ function flattenDataSources(xs: any[]): any[][] { ...buffers, ...(column['VALIDITY'] && [column['VALIDITY']] || []), ...(column['OFFSET'] && [column['OFFSET']] || []), + ...(column['TYPE'] && [column['TYPE']] || []), ...(column['DATA'] && [column['DATA']] || []), ...flattenDataSources(column['children']) ], [] as any[][]); @@ -156,6 +157,7 @@ import * as Schema_ from '../../fb/Schema'; import Type = Schema_.org.apache.arrow.flatbuf.Type; import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit; import TimeUnit = Schema_.org.apache.arrow.flatbuf.TimeUnit; +import UnionMode = Schema_.org.apache.arrow.flatbuf.UnionMode; import Precision = Schema_.org.apache.arrow.flatbuf.Precision; import IntervalUnit = Schema_.org.apache.arrow.flatbuf.IntervalUnit; import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion; @@ -209,6 +211,7 @@ function buffersFromJSON(xs: any[], buffers: BufferMetadata[] = []): BufferMetad const column = xs[i]; column['VALIDITY'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['VALIDITY'].length, 0))); column['OFFSET'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['OFFSET'].length, 0))); + column['TYPE'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['TYPE'].length, 0))); column['DATA'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['DATA'].length, 0))); buffers = buffersFromJSON(column['children'], buffers); } @@ -293,31 +296,31 @@ function typeFromJSON(t: any, children?: Field[]) { throw new Error(`Unrecognized type ${t['name']}`); } -function nullFromJSON (_type: any) { return new Null(); } +function nullFromJSON (_type: any) { return new Null(); } function intFromJSON (_type: any) { switch (_type['bitWidth']) { case 8: return _type['isSigned'] ? new Int8() : new Uint8(); case 16: return _type['isSigned'] ? new Int16() : new Uint16(); case 32: return _type['isSigned'] ? new Int32() : new Uint32(); case 64: return _type['isSigned'] ? new Int64() : new Uint64(); } - return null; } + return null; } function floatingPointFromJSON (_type: any) { switch (Precision[_type['precision']] as any) { case Precision.HALF: return new Float16(); case Precision.SINGLE: return new Float32(); case Precision.DOUBLE: return new Float64(); } - return null; } -function binaryFromJSON (_type: any) { return new Binary(); } -function utf8FromJSON (_type: any) { return new Utf8(); } -function boolFromJSON (_type: any) { return new Bool(); } -function decimalFromJSON (_type: any) { return new Decimal(_type['scale'], _type['precision']); } -function dateFromJSON (_type: any) { return new Date_(DateUnit[_type['unit']] as any); } -function timeFromJSON (_type: any) { return new Time(TimeUnit[_type['unit']] as any, _type['bitWidth'] as TimeBitWidth); } -function timestampFromJSON (_type: any) { return new Timestamp(TimeUnit[_type['unit']] as any, _type['timezone']); } -function intervalFromJSON (_type: any) { return new Interval(IntervalUnit[_type['unit']] as any); } -function listFromJSON (_type: any, children: Field[]) { return new List(children); } -function structFromJSON (_type: any, children: Field[]) { return new Struct(children); } -function unionFromJSON (_type: any, children: Field[]) { return new Union(_type['mode'], (_type['typeIdsArray'] || []) as Type[], children); } -function fixedSizeBinaryFromJSON(_type: any) { return new FixedSizeBinary(_type['byteWidth']); } -function fixedSizeListFromJSON (_type: any, children: Field[]) { return new FixedSizeList(_type['listSize'], children); } -function mapFromJSON (_type: any, children: Field[]) { return new Map_(_type['keysSorted'], children); } + return null; } +function binaryFromJSON (_type: any) { return new Binary(); } +function utf8FromJSON (_type: any) { return new Utf8(); } +function boolFromJSON (_type: any) { return new Bool(); } +function decimalFromJSON (_type: any) { return new Decimal(_type['scale'], _type['precision']); } +function dateFromJSON (_type: any) { return new Date_(DateUnit[_type['unit']] as any); } +function timeFromJSON (_type: any) { return new Time(TimeUnit[_type['unit']] as any, _type['bitWidth'] as TimeBitWidth); } +function timestampFromJSON (_type: any) { return new Timestamp(TimeUnit[_type['unit']] as any, _type['timezone']); } +function intervalFromJSON (_type: any) { return new Interval(IntervalUnit[_type['unit']] as any); } +function listFromJSON (_type: any, children: Field[]) { return new List(children); } +function structFromJSON (_type: any, children: Field[]) { return new Struct(children); } +function unionFromJSON (_type: any, children: Field[]) { return new Union(UnionMode[_type['mode']] as any, (_type['typeIds'] || []) as Type[], children); } +function fixedSizeBinaryFromJSON(_type: any) { return new FixedSizeBinary(_type['byteWidth']); } +function fixedSizeListFromJSON (_type: any, children: Field[]) { return new FixedSizeList(_type['listSize'], children); } +function mapFromJSON (_type: any, children: Field[]) { return new Map_(_type['keysSorted'], children); } diff --git a/js/src/type.ts b/js/src/type.ts index 370be0def0c..26686c74b5d 100644 --- a/js/src/type.ts +++ b/js/src/type.ts @@ -372,9 +372,11 @@ export class Union extends DataType { constructor(public readonly mode: UnionMode, public readonly typeIds: ArrowType[], public readonly children: Field[]) { - super( (mode === UnionMode.Sparse ? Type.SparseUnion : Type.DenseUnion), children); + super( Type.Union, children); } - public toString() { return `${this[Symbol.toStringTag]}<${this.typeIds.map((x) => Type[x]).join(` | `)}>`; } + public toString() { return `${this[Symbol.toStringTag]}<${ + this.children.map((x) => `${x.type}`).join(` | `) + }>`; } protected static [Symbol.toStringTag] = ((proto: Union) => { ( proto).ArrayType = Int8Array; return proto[Symbol.toStringTag] = 'Union'; diff --git a/js/src/vector/nested.ts b/js/src/vector/nested.ts index 1102fe83b71..fe9f7e96d27 100644 --- a/js/src/vector/nested.ts +++ b/js/src/vector/nested.ts @@ -76,31 +76,35 @@ export class UnionView exten public typeIds: Int8Array; // @ts-ignore public valueOffsets?: Int32Array; + // @ts-ignore + protected typeIdToChildIndex: { [key: number]: number }; constructor(data: Data, children?: Vector[]) { super(data, children); this.length = data.length; this.typeIds = data.typeIds; + this.typeIdToChildIndex = data.typeIdToChildIndex; } protected getNested(self: UnionView, index: number): T['TValue'] { - return self.getChildValue(self, index, self.typeIds, self.valueOffsets); + return self.getChildValue(self, index, self.typeIds, self.valueOffsets, self.typeIdToChildIndex); } protected setNested(self: UnionView, index: number, value: T['TValue']): void { - return self.setChildValue(self, index, value, self.typeIds, self.valueOffsets); + return self.setChildValue(self, index, value, self.typeIds, self.valueOffsets, self.typeIdToChildIndex); } - protected getChildValue(self: NestedView, index: number, typeIds: Int8Array, _valueOffsets?: any): any | null { - const child = self.getChildAt(typeIds[index]); + protected getChildValue(self: NestedView, index: number, typeIds: Int8Array, _valueOffsets: any, typeIdToChildIndex: { [key: number]: number }): any | null { + const child = self.getChildAt(typeIdToChildIndex[typeIds[index]]); return child ? child.get(index) : null; } - protected setChildValue(self: NestedView, index: number, value: T['TValue'], typeIds: Int8Array, _valueOffsets?: any): any | null { - const child = self.getChildAt(typeIds[index]); + protected setChildValue(self: NestedView, index: number, value: T['TValue'], typeIds: Int8Array, _valueOffsets: any, typeIdToChildIndex: { [key: number]: number }): any | null { + const child = self.getChildAt(typeIdToChildIndex[typeIds[index]]); return child ? child.set(index, value) : null; } public *[Symbol.iterator](): IterableIterator { const length = this.length; const get = this.getChildValue; + const { typeIdToChildIndex } = this; const { typeIds, valueOffsets } = this; for (let index = -1; ++index < length;) { - yield get(this, index, typeIds, valueOffsets); + yield get(this, index, typeIds, valueOffsets, typeIdToChildIndex); } } } @@ -112,14 +116,14 @@ export class DenseUnionView extends UnionView { this.valueOffsets = data.valueOffsets; } protected getNested(self: DenseUnionView, index: number): any | null { - return self.getChildValue(self, index, self.typeIds, self.valueOffsets); + return self.getChildValue(self, index, self.typeIds, self.valueOffsets, self.typeIdToChildIndex); } - protected getChildValue(self: NestedView, index: number, typeIds: Int8Array, valueOffsets: any): any | null { - const child = self.getChildAt(typeIds[index]); + protected getChildValue(self: NestedView, index: number, typeIds: Int8Array, valueOffsets: any, typeIdToChildIndex: { [key: number]: number }): any | null { + const child = self.getChildAt(typeIdToChildIndex[typeIds[index]]); return child ? child.get(valueOffsets[index]) : null; } - protected setChildValue(self: NestedView, index: number, value: any, typeIds: Int8Array, valueOffsets?: any): any | null { - const child = self.getChildAt(typeIds[index]); + protected setChildValue(self: NestedView, index: number, value: any, typeIds: Int8Array, valueOffsets: any, typeIdToChildIndex: { [key: number]: number }): any | null { + const child = self.getChildAt(typeIdToChildIndex[typeIds[index]]); return child ? child.set(valueOffsets[index], value) : null; } }