From ab87daf1807644c4477f9e7fa143e3fc61cc33f2 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Sat, 16 Dec 2023 14:14:38 -0500 Subject: [PATCH 1/5] GH-39251: [JS] Use resizable buffer in builder --- js/src/builder.ts | 2 +- js/src/builder/binary.ts | 4 ++-- js/src/builder/buffer.ts | 44 ++++++++++++++++++++++++------------- js/src/builder/largeutf8.ts | 2 +- js/src/builder/union.ts | 4 ++-- js/src/builder/utf8.ts | 2 +- js/src/util/buffer.ts | 3 +++ 7 files changed, 39 insertions(+), 22 deletions(-) diff --git a/js/src/builder.ts b/js/src/builder.ts index 1a4c52f871b..bf3d8bb11ff 100644 --- a/js/src/builder.ts +++ b/js/src/builder.ts @@ -342,7 +342,7 @@ export abstract class Builder { export abstract class FixedWidthBuilder extends Builder { constructor(opts: BuilderOptions) { super(opts); - this._values = new DataBufferBuilder(new this.ArrayType(0), this.stride); + this._values = new DataBufferBuilder(this.ArrayType, 0, this.stride); } public setValue(index: number, value: T['TValue']) { const values = this._values; diff --git a/js/src/builder/binary.ts b/js/src/builder/binary.ts index 3c12ddf34ab..fa9a11b24ec 100644 --- a/js/src/builder/binary.ts +++ b/js/src/builder/binary.ts @@ -16,15 +16,15 @@ // under the License. import { Binary } from '../type.js'; -import { toUint8Array } from '../util/buffer.js'; import { BufferBuilder } from './buffer.js'; import { VariableWidthBuilder, BuilderOptions } from '../builder.js'; +import { toUint8Array } from '../util/buffer.js'; /** @ignore */ export class BinaryBuilder extends VariableWidthBuilder { constructor(opts: BuilderOptions) { super(opts); - this._values = new BufferBuilder(new Uint8Array(0)); + this._values = new BufferBuilder(Uint8Array); } public get byteLength(): number { let size = this._pendingLength + (this.length * 4); diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts index 40217205968..89c488c75c1 100644 --- a/js/src/builder/buffer.ts +++ b/js/src/builder/buffer.ts @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -import { memcpy } from '../util/buffer.js'; +import { SAFE_ARRAY_SIZE, memcpy } from '../util/buffer.js'; import { TypedArray, BigIntArray, ArrayCtor } from '../interfaces.js'; import { DataType } from '../type.js'; @@ -25,19 +25,33 @@ function roundLengthUpToNearest64Bytes(len: number, BPE: number) { return ((bytesMinus1 - bytesMinus1 % 64 + 64) || 64) / BPE; } /** @ignore */ -const sliceOrExtendArray = (arr: T, len = 0) => ( - arr.length >= len ? arr.subarray(0, len) : memcpy(new (arr.constructor as any)(len), arr, 0) -) as T; +function resizeArray(arr: T, len = 0): T { + // TODO: remove when https://github.com/microsoft/TypeScript/issues/54636 is fixed + const buffer = arr.buffer as ArrayBufferLike & { resizable: boolean; resize: (byteLength: number) => void; maxByteLength: number }; + const byteLength = len * arr.BYTES_PER_ELEMENT; + if (buffer.resizable && byteLength <= buffer.maxByteLength) { + buffer.resize(byteLength); + return new (arr.constructor as any)(buffer) as T; + } + + // Fallback for non-resiable buffers + + if (arr.length >= len) { + return arr.subarray(0, len) as T; + } + return memcpy(new (arr.constructor as any)(len), arr, 0); +} /** @ignore */ export class BufferBuilder { - constructor(buffer: T, stride = 1) { - this.buffer = buffer; + constructor(bufferType: ArrayCtor, initialSize = 0, stride = 1) { + this.length = Math.ceil(initialSize / stride); + // TODO: remove as any when https://github.com/microsoft/TypeScript/issues/54636 is fixed + this.buffer = new bufferType(new (ArrayBuffer as any)(this.length * bufferType.BYTES_PER_ELEMENT, { maxByteLength: SAFE_ARRAY_SIZE })) as T; this.stride = stride; - this.BYTES_PER_ELEMENT = buffer.BYTES_PER_ELEMENT; - this.ArrayType = buffer.constructor as ArrayCtor; - this._resize(this.length = Math.ceil(buffer.length / stride)); + this.BYTES_PER_ELEMENT = bufferType.BYTES_PER_ELEMENT; + this.ArrayType = bufferType; } public buffer: T; @@ -72,17 +86,18 @@ export class BufferBuilder { } public flush(length = this.length) { length = roundLengthUpToNearest64Bytes(length * this.stride, this.BYTES_PER_ELEMENT); - const array = sliceOrExtendArray(this.buffer, length); + const array = resizeArray(this.buffer, length); this.clear(); return array; } public clear() { this.length = 0; - this._resize(0); + // TODO: remove as any when https://github.com/microsoft/TypeScript/issues/54636 is fixed + this.buffer = new this.ArrayType(new (ArrayBuffer as any)(0, { maxByteLength: SAFE_ARRAY_SIZE })) as T; return this; } protected _resize(newLength: number) { - return this.buffer = memcpy(new this.ArrayType(newLength), this.buffer); + return this.buffer = resizeArray(this.buffer, newLength); } } @@ -100,7 +115,7 @@ export class DataBufferBuilder extends Buffe /** @ignore */ export class BitmapBufferBuilder extends DataBufferBuilder { - constructor(data = new Uint8Array(0)) { super(data, 1 / 8); } + constructor() { super(Uint8Array, 0, 1 / 8); } public numValid = 0; public get numInvalid() { return this.length - this.numValid; } @@ -123,9 +138,8 @@ export class BitmapBufferBuilder extends DataBufferBuilder { /** @ignore */ export class OffsetsBufferBuilder extends DataBufferBuilder { constructor(type: T) { - super(new type.OffsetArrayType(1), 1); + super(type.OffsetArrayType as ArrayCtor, 1, 1); } - public append(value: T['TOffsetArray'][0]) { return this.set(this.length - 1, value); } diff --git a/js/src/builder/largeutf8.ts b/js/src/builder/largeutf8.ts index fddfeaf8e7b..0e23a846384 100644 --- a/js/src/builder/largeutf8.ts +++ b/js/src/builder/largeutf8.ts @@ -24,7 +24,7 @@ import { VariableWidthBuilder, BuilderOptions } from '../builder.js'; export class LargeUtf8Builder extends VariableWidthBuilder { constructor(opts: BuilderOptions) { super(opts); - this._values = new BufferBuilder(new Uint8Array(0)); + this._values = new BufferBuilder(Uint8Array); } public get byteLength(): number { let size = this._pendingLength + (this.length * 4); diff --git a/js/src/builder/union.ts b/js/src/builder/union.ts index ac8a13191a5..7bee460a77d 100644 --- a/js/src/builder/union.ts +++ b/js/src/builder/union.ts @@ -31,7 +31,7 @@ export abstract class UnionBuilder extends Builder constructor(options: UnionBuilderOptions) { super(options); - this._typeIds = new DataBufferBuilder(new Int8Array(0), 1); + this._typeIds = new DataBufferBuilder(Int8Array, 0, 1); if (typeof options['valueToChildTypeId'] === 'function') { this._valueToChildTypeId = options['valueToChildTypeId']; } @@ -84,7 +84,7 @@ export class DenseUnionBuilder extends UnionB constructor(options: UnionBuilderOptions) { super(options); - this._offsets = new DataBufferBuilder(new Int32Array(0)); + this._offsets = new DataBufferBuilder(Int32Array); } /** @ignore */ diff --git a/js/src/builder/utf8.ts b/js/src/builder/utf8.ts index 53b8306cbaf..aac0aec54fe 100644 --- a/js/src/builder/utf8.ts +++ b/js/src/builder/utf8.ts @@ -25,7 +25,7 @@ import { VariableWidthBuilder, BuilderOptions } from '../builder.js'; export class Utf8Builder extends VariableWidthBuilder { constructor(opts: BuilderOptions) { super(opts); - this._values = new BufferBuilder(new Uint8Array(0)); + this._values = new BufferBuilder(Uint8Array); } public get byteLength(): number { let size = this._pendingLength + (this.length * 4); diff --git a/js/src/util/buffer.ts b/js/src/util/buffer.ts index 4f4379dedf6..08db2f817ac 100644 --- a/js/src/util/buffer.ts +++ b/js/src/util/buffer.ts @@ -23,6 +23,9 @@ import { ByteBuffer } from 'flatbuffers'; /** @ignore */ const SharedArrayBuf = (typeof SharedArrayBuffer !== 'undefined' ? SharedArrayBuffer : ArrayBuffer); +/** @ignore */ +export const SAFE_ARRAY_SIZE = 2 ** 32 - 1; + /** @ignore */ function collapseContiguousByteRanges(chunks: Uint8Array[]) { const result = chunks[0] ? [chunks[0]] : []; From c7bd7ff99c8e05f6ce5534c71312bc113e089f63 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Sat, 16 Dec 2023 14:18:59 -0500 Subject: [PATCH 2/5] Simplify code --- js/src/builder/buffer.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts index 89c488c75c1..2d99788dcee 100644 --- a/js/src/builder/buffer.ts +++ b/js/src/builder/buffer.ts @@ -34,12 +34,10 @@ function resizeArray(arr: T, len = 0): T { return new (arr.constructor as any)(buffer) as T; } - // Fallback for non-resiable buffers - - if (arr.length >= len) { - return arr.subarray(0, len) as T; - } - return memcpy(new (arr.constructor as any)(len), arr, 0); + // Fallback for non-resizable buffers + return arr.length >= len ? + arr.subarray(0, len) as T : + memcpy(new (arr.constructor as any)(len), arr, 0); } /** @ignore */ From b050b2085676497d4d8a9ad3eadda1f092dd6f33 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Sat, 16 Dec 2023 16:57:43 -0500 Subject: [PATCH 3/5] Add unit test --- js/src/builder/buffer.ts | 4 ++-- js/test/unit/builders/builder-tests.ts | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts index 2d99788dcee..6e332bd4b5e 100644 --- a/js/src/builder/buffer.ts +++ b/js/src/builder/buffer.ts @@ -25,13 +25,13 @@ function roundLengthUpToNearest64Bytes(len: number, BPE: number) { return ((bytesMinus1 - bytesMinus1 % 64 + 64) || 64) / BPE; } /** @ignore */ -function resizeArray(arr: T, len = 0): T { +export function resizeArray(arr: T, len = 0): T { // TODO: remove when https://github.com/microsoft/TypeScript/issues/54636 is fixed const buffer = arr.buffer as ArrayBufferLike & { resizable: boolean; resize: (byteLength: number) => void; maxByteLength: number }; const byteLength = len * arr.BYTES_PER_ELEMENT; if (buffer.resizable && byteLength <= buffer.maxByteLength) { buffer.resize(byteLength); - return new (arr.constructor as any)(buffer) as T; + return arr; } // Fallback for non-resizable buffers diff --git a/js/test/unit/builders/builder-tests.ts b/js/test/unit/builders/builder-tests.ts index 0137c7aa666..f370b28b37d 100644 --- a/js/test/unit/builders/builder-tests.ts +++ b/js/test/unit/builders/builder-tests.ts @@ -25,6 +25,7 @@ import { validateVector } from './utils.js'; import * as generate from '../../generate-test-data.js'; import { Type, DataType, util, Builder, makeBuilder, builderThroughIterable } from 'apache-arrow'; +import { resizeArray } from '../../../src/builder/buffer.js'; const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true'; const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true'; @@ -73,6 +74,23 @@ describe('Generated Test Data', () => { describe('MapBuilder', () => { validateBuilder(generate.map); }); }); +describe('Buffer Utils', () => { + test('resize typed arrays', () => { + const array = new Uint8Array(10); + + expect(resizeArray(array, 5)).toHaveLength(5); + expect(resizeArray(array, 15)).toHaveLength(15); + }); + + test('resize resizable typed arrays', () => { + // TODO: remove as any when https://github.com/microsoft/TypeScript/issues/54636 is fixed + const array = new Uint8Array(new (ArrayBuffer as any)(10, { maxByteLength: 100 })); + + expect(resizeArray(array, 5)).toHaveLength(5); + expect(resizeArray(array, 15)).toHaveLength(15); + }); +}); + function validateBuilder(generate: (length?: number, nullCount?: number, ...args: any[]) => generate.GeneratedVector) { const type = generate(0, 0).vector.type; From 5259e561c04ec755d0b77ed3b5862d65f683ddc6 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Sat, 16 Dec 2023 17:00:24 -0500 Subject: [PATCH 4/5] Remove tests as it breaks tests --- js/src/builder/buffer.ts | 2 +- js/test/unit/builders/builder-tests.ts | 18 ------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts index 6e332bd4b5e..0ea36d821b4 100644 --- a/js/src/builder/buffer.ts +++ b/js/src/builder/buffer.ts @@ -25,7 +25,7 @@ function roundLengthUpToNearest64Bytes(len: number, BPE: number) { return ((bytesMinus1 - bytesMinus1 % 64 + 64) || 64) / BPE; } /** @ignore */ -export function resizeArray(arr: T, len = 0): T { +function resizeArray(arr: T, len = 0): T { // TODO: remove when https://github.com/microsoft/TypeScript/issues/54636 is fixed const buffer = arr.buffer as ArrayBufferLike & { resizable: boolean; resize: (byteLength: number) => void; maxByteLength: number }; const byteLength = len * arr.BYTES_PER_ELEMENT; diff --git a/js/test/unit/builders/builder-tests.ts b/js/test/unit/builders/builder-tests.ts index f370b28b37d..0137c7aa666 100644 --- a/js/test/unit/builders/builder-tests.ts +++ b/js/test/unit/builders/builder-tests.ts @@ -25,7 +25,6 @@ import { validateVector } from './utils.js'; import * as generate from '../../generate-test-data.js'; import { Type, DataType, util, Builder, makeBuilder, builderThroughIterable } from 'apache-arrow'; -import { resizeArray } from '../../../src/builder/buffer.js'; const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true'; const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true'; @@ -74,23 +73,6 @@ describe('Generated Test Data', () => { describe('MapBuilder', () => { validateBuilder(generate.map); }); }); -describe('Buffer Utils', () => { - test('resize typed arrays', () => { - const array = new Uint8Array(10); - - expect(resizeArray(array, 5)).toHaveLength(5); - expect(resizeArray(array, 15)).toHaveLength(15); - }); - - test('resize resizable typed arrays', () => { - // TODO: remove as any when https://github.com/microsoft/TypeScript/issues/54636 is fixed - const array = new Uint8Array(new (ArrayBuffer as any)(10, { maxByteLength: 100 })); - - expect(resizeArray(array, 5)).toHaveLength(5); - expect(resizeArray(array, 15)).toHaveLength(15); - }); -}); - function validateBuilder(generate: (length?: number, nullCount?: number, ...args: any[]) => generate.GeneratedVector) { const type = generate(0, 0).vector.type; From 440430156afd1535afc691b324a2fea45148e161 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Sat, 16 Dec 2023 17:07:05 -0500 Subject: [PATCH 5/5] Move constant --- js/src/builder/buffer.ts | 6 +++++- js/src/util/buffer.ts | 3 --- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts index 0ea36d821b4..18c6dcda738 100644 --- a/js/src/builder/buffer.ts +++ b/js/src/builder/buffer.ts @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -import { SAFE_ARRAY_SIZE, memcpy } from '../util/buffer.js'; +import { memcpy } from '../util/buffer.js'; import { TypedArray, BigIntArray, ArrayCtor } from '../interfaces.js'; import { DataType } from '../type.js'; @@ -24,6 +24,7 @@ function roundLengthUpToNearest64Bytes(len: number, BPE: number) { const bytesMinus1 = Math.ceil(len) * BPE - 1; return ((bytesMinus1 - bytesMinus1 % 64 + 64) || 64) / BPE; } + /** @ignore */ function resizeArray(arr: T, len = 0): T { // TODO: remove when https://github.com/microsoft/TypeScript/issues/54636 is fixed @@ -40,6 +41,9 @@ function resizeArray(arr: T, len = 0): T { memcpy(new (arr.constructor as any)(len), arr, 0); } +/** @ignore */ +export const SAFE_ARRAY_SIZE = 2 ** 32 - 1; + /** @ignore */ export class BufferBuilder { diff --git a/js/src/util/buffer.ts b/js/src/util/buffer.ts index 08db2f817ac..4f4379dedf6 100644 --- a/js/src/util/buffer.ts +++ b/js/src/util/buffer.ts @@ -23,9 +23,6 @@ import { ByteBuffer } from 'flatbuffers'; /** @ignore */ const SharedArrayBuf = (typeof SharedArrayBuffer !== 'undefined' ? SharedArrayBuffer : ArrayBuffer); -/** @ignore */ -export const SAFE_ARRAY_SIZE = 2 ** 32 - 1; - /** @ignore */ function collapseContiguousByteRanges(chunks: Uint8Array[]) { const result = chunks[0] ? [chunks[0]] : [];