Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion integration/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,6 @@ def generate_map_case():

batch_sizes = [7, 10]
skip = set()
skip.add('JS') # TODO(ARROW-1279)
skip.add('Go') # TODO(ARROW-3679)
return _generate_file("map", fields, batch_sizes, skip=skip)

Expand Down
58 changes: 25 additions & 33 deletions js/DEVELOP.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ If you’d like to report a bug but don’t have time to fix it, you can still p
it on JIRA, or email the mailing list
[dev@arrow.apache.org](http://mail-archives.apache.org/mod_mbox/arrow-dev/)



# The npm scripts

* `npm run clean` - cleans targets
Expand Down Expand Up @@ -68,37 +66,31 @@ Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm

# Updating the Arrow format flatbuffers generated code

Once generated, the flatbuffers format code needs to be adjusted for our build scripts.

1. Generate the flatbuffers TypeScript source from the Arrow project root directory:
```sh
cd $ARROW_HOME

flatc --ts -o ./js/src/format ./format/*.fbs

cd ./js/src/format

# Delete Tensor_generated.js (skip this when we support Tensors)
rm ./Tensor_generated.ts

# Remove "_generated" suffix from TS files
mv ./File_generated.ts .File.ts
mv ./Schema_generated.ts .Schema.ts
mv ./Message_generated.ts .Message.ts
```
1. Remove Tensor import from `Schema.ts`
1. Fix all the `flatbuffers` imports
```ts
import { flatbuffers } from "./flatbuffers" // <-- change
import { flatbuffers } from "flatbuffers" // <-- to this
```
1. Remove `_generated` from the ES6 imports of the generated files
```ts
import * as NS16187549871986683199 from "./Schema_generated"; // <-- change
import * as NS16187549871986683199 from "./Schema"; // <------- to this
```
1. Add `/* tslint:disable:class-name */` to the top of `Schema.ts`
1. Execute `npm run lint` to fix all the linting errors
1. Once generated, the flatbuffers format code needs to be adjusted for our build scripts (assumes `gnu-sed`):

```shell
cd $ARROW_HOME

flatc --ts -o ./js/src/fb ./format/{File,Schema,Message}.fbs

cd ./js/src/fb

# Rename the existing files to <filename>.bak.ts
mv File{,.bak}.ts && mv Schema{,.bak}.ts && mv Message{,.bak}.ts

# Remove `_generated` from the ES6 imports of the generated files
sed -i '+s+_generated\";+\";+ig' *_generated.ts
# Fix all the `flatbuffers` imports
sed -i '+s+./flatbuffers+flatbuffers+ig' *_generated.ts
# Fix the Union createTypeIdsVector typings
sed -i -r '+s+static createTypeIdsVector\(builder: flatbuffers.Builder, data: number\[\] \| Uint8Array+static createTypeIdsVector\(builder: flatbuffers.Builder, data: number\[\] \| Int32Array+ig' Schema_generated.ts
# Add `/* tslint:disable:class-name */` to the top of `Schema.ts`
echo -e '/* tslint:disable:class-name */\n' | cat - Schema_generated.ts > Schema1.ts && mv Schema1.ts Schema_generated.ts
# Remove "_generated" suffix from TS files
mv File{_generated,}.ts && mv Schema{_generated,}.ts && mv Message{_generated,}.ts
```
2. Manually remove `Tensor` and `SparseTensor` imports and exports
3. Execute `npm run lint` from the `js` directory to fix the linting errors

[1]: mailto:dev-subscribe@arrow.apache.org
[2]: https://github.com/apache/arrow/tree/master/format
Expand Down
1 change: 0 additions & 1 deletion js/src/Arrow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ export { Column } from './column';
export { Visitor } from './visitor';
export { Schema, Field } from './schema';
export {
Row,
Vector,
BaseVector,
BinaryVector,
Expand Down
2 changes: 1 addition & 1 deletion js/src/bin/arrow2csv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ function batchesToString(state: ToStringState, schema: Schema) {
if (rowId++ % 350 === 0) {
this.push(`${formatRow(header, maxColWidths, sep)}\n`);
}
this.push(`${formatRow([rowId, ...row].map(valueToString), maxColWidths, sep)}\n`);
this.push(`${formatRow([rowId, ...row.toArray()].map(valueToString), maxColWidths, sep)}\n`);
}
}
cb();
Expand Down
4 changes: 2 additions & 2 deletions js/src/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import {
DataType, strideForType,
Float, Int, Decimal, FixedSizeBinary,
Date_, Time, Timestamp, Interval,
Utf8, Binary, List,
Utf8, Binary, List, Map_
} from './type';

/**
Expand Down Expand Up @@ -441,7 +441,7 @@ export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary
}

/** @ignore */
export abstract class VariableWidthBuilder<T extends Binary | Utf8 | List, TNull = any> extends Builder<T, TNull> {
export abstract class VariableWidthBuilder<T extends Binary | Utf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
protected _pendingLength: number = 0;
protected _offsets: OffsetsBufferBuilder;
protected _pending: Map<number, any> | undefined;
Expand Down
50 changes: 42 additions & 8 deletions js/src/builder/map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,49 @@
// under the License.

import { Field } from '../schema';
import { Builder } from '../builder';
import { DataType, Map_ } from '../type';
import { DataType, Map_, Struct } from '../type';
import { Builder, VariableWidthBuilder } from '../builder';

/** @ignore */ type MapValue<K extends DataType = any, V extends DataType = any> = Map_<K, V>['TValue'];
/** @ignore */ type MapValues<K extends DataType = any, V extends DataType = any> = Map<number, MapValue<K, V> | undefined>;
/** @ignore */ type MapValueExt<K extends DataType = any, V extends DataType = any> = MapValue<K, V> | { [key: string]: V } | { [key: number]: V } ;

/** @ignore */
export class MapBuilder<T extends { [key: string]: DataType } = any, TNull = any> extends Builder<Map_<T>, TNull> {
public addChild(child: Builder, name = `${this.numChildren}`) {
const { children, keysSorted } = this.type;
const childIndex = this.children.push(child);
this.type = new Map_([...children, new Field(name, child.type, true)], keysSorted);
return childIndex;
export class MapBuilder<K extends DataType = any, V extends DataType = any, TNull = any> extends VariableWidthBuilder<Map_<K, V>, TNull> {

protected _pending: MapValues<K, V> | undefined;
public set(index: number, value: MapValueExt<K, V> | TNull) {
return super.set(index, value as MapValue<K, V> | TNull);
}

public setValue(index: number, value: MapValueExt<K, V>) {
value = value instanceof Map ? value : new Map(Object.entries(value));
const pending = this._pending || (this._pending = new Map() as MapValues<K, V>);
const current = pending.get(index);
current && (this._pendingLength -= current.size);
this._pendingLength += value.size;
pending.set(index, value);
}

public addChild(child: Builder<Struct<{ key: K, value: V }>>, name = `${this.numChildren}`) {
if (this.numChildren > 0) {
throw new Error('ListBuilder can only have one child.');
}
this.children[this.numChildren] = child;
this.type = new Map_<K, V>(new Field(name, child.type, true), this.type.keysSorted);
return this.numChildren - 1;
}

protected _flushPending(pending: MapValues<K, V>) {
const offsets = this._offsets;
const setValue = this._setValue;
pending.forEach((value, index) => {
if (value === undefined) {
offsets.set(index, 0);
} else {
offsets.set(index, value.size);
setValue(this, index, value);
}
});
}
}
8 changes: 4 additions & 4 deletions js/src/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ export class Data<T extends DataType = DataType> {
case Type.List: return <unknown> Data.List( <unknown> type as List, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], (childData || [])[0]) as Data<T>;
case Type.FixedSizeList: return <unknown> Data.FixedSizeList( <unknown> type as FixedSizeList, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], (childData || [])[0]) as Data<T>;
case Type.Struct: return <unknown> Data.Struct( <unknown> type as Struct, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], childData || []) as Data<T>;
case Type.Map: return <unknown> Data.Map( <unknown> type as Map_, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], childData || []) as Data<T>;
case Type.Map: return <unknown> Data.Map( <unknown> type as Map_, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], (childData || [])[0]) as Data<T>;
case Type.Union: return <unknown> Data.Union( <unknown> type as Union, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.TYPE] || [], buffers[BufferType.OFFSET] || childData, childData) as Data<T>;
}
throw new Error(`Unrecognized typeId ${type.typeId}`);
Expand Down Expand Up @@ -262,16 +262,16 @@ export class Data<T extends DataType = DataType> {
return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], [child]);
}
/** @nocollapse */
public static FixedSizeList<T extends FixedSizeList>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, child: Data | Vector) {
public static FixedSizeList<T extends FixedSizeList>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, child: Data<T['valueType']> | Vector<T['valueType']>) {
return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], [child]);
}
/** @nocollapse */
public static Struct<T extends Struct>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) {
return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], children);
}
/** @nocollapse */
public static Map<T extends Map_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) {
return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], children);
public static Map<T extends Map_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: (Data | Vector)) {
return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], [child]);
}
public static Union<T extends SparseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, children: (Data | Vector)[], _?: any): Data<T>;
public static Union<T extends DenseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsets: ValueOffsetsBuffer, children: (Data | Vector)[]): Data<T>;
Expand Down
17 changes: 0 additions & 17 deletions js/src/fb/File.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,6 @@ export namespace org.apache.arrow.flatbuf {
return (obj || new Footer).__init(bb.readInt32(bb.position()) + bb.position(), bb);
}

/**
* @param flatbuffers.ByteBuffer bb
* @param Footer= obj
* @returns Footer
*/
static getSizePrefixedRootAsFooter(bb: flatbuffers.ByteBuffer, obj?: Footer): Footer {
return (obj || new Footer).__init(bb.readInt32(bb.position()) + bb.position(), bb);
}

/**
* @returns org.apache.arrow.flatbuf.MetadataVersion
*/
Expand Down Expand Up @@ -168,14 +159,6 @@ export namespace org.apache.arrow.flatbuf {
builder.finish(offset);
}

/**
* @param flatbuffers.Builder builder
* @param flatbuffers.Offset offset
*/
static finishSizePrefixedFooterBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) {
builder.finish(offset, undefined);
}

static createFooter(builder: flatbuffers.Builder, version: NS7624605610262437867.org.apache.arrow.flatbuf.MetadataVersion, schemaOffset: flatbuffers.Offset, dictionariesOffset: flatbuffers.Offset, recordBatchesOffset: flatbuffers.Offset): flatbuffers.Offset {
Footer.startFooter(builder);
Footer.addVersion(builder, version);
Expand Down
35 changes: 0 additions & 35 deletions js/src/fb/Message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,6 @@ export namespace org.apache.arrow.flatbuf {
return (obj || new RecordBatch).__init(bb.readInt32(bb.position()) + bb.position(), bb);
}

/**
* @param flatbuffers.ByteBuffer bb
* @param RecordBatch= obj
* @returns RecordBatch
*/
static getSizePrefixedRootAsRecordBatch(bb: flatbuffers.ByteBuffer, obj?: RecordBatch): RecordBatch {
return (obj || new RecordBatch).__init(bb.readInt32(bb.position()) + bb.position(), bb);
}

/**
* number of records / rows. The arrays in the batch should all have this
* length
Expand Down Expand Up @@ -290,15 +281,6 @@ export namespace org.apache.arrow.flatbuf {
return (obj || new DictionaryBatch).__init(bb.readInt32(bb.position()) + bb.position(), bb);
}

/**
* @param flatbuffers.ByteBuffer bb
* @param DictionaryBatch= obj
* @returns DictionaryBatch
*/
static getSizePrefixedRootAsDictionaryBatch(bb: flatbuffers.ByteBuffer, obj?: DictionaryBatch): DictionaryBatch {
return (obj || new DictionaryBatch).__init(bb.readInt32(bb.position()) + bb.position(), bb);
}

/**
* @returns flatbuffers.Long
*/
Expand Down Expand Up @@ -404,15 +386,6 @@ export namespace org.apache.arrow.flatbuf {
return (obj || new Message).__init(bb.readInt32(bb.position()) + bb.position(), bb);
}

/**
* @param flatbuffers.ByteBuffer bb
* @param Message= obj
* @returns Message
*/
static getSizePrefixedRootAsMessage(bb: flatbuffers.ByteBuffer, obj?: Message): Message {
return (obj || new Message).__init(bb.readInt32(bb.position()) + bb.position(), bb);
}

/**
* @returns org.apache.arrow.flatbuf.MetadataVersion
*/
Expand Down Expand Up @@ -549,14 +522,6 @@ export namespace org.apache.arrow.flatbuf {
builder.finish(offset);
}

/**
* @param flatbuffers.Builder builder
* @param flatbuffers.Offset offset
*/
static finishSizePrefixedMessageBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) {
builder.finish(offset, undefined);
}

static createMessage(builder: flatbuffers.Builder, version: NS7624605610262437867.org.apache.arrow.flatbuf.MetadataVersion, headerType: org.apache.arrow.flatbuf.MessageHeader, headerOffset: flatbuffers.Offset, bodyLength: flatbuffers.Long, customMetadataOffset: flatbuffers.Offset): flatbuffers.Offset {
Message.startMessage(builder);
Message.addVersion(builder, version);
Expand Down
Loading