diff --git a/js/perf/index.ts b/js/perf/index.ts index 40225a6d8ae..6c54502ce50 100644 --- a/js/perf/index.ts +++ b/js/perf/index.ts @@ -133,6 +133,21 @@ b.suite( b.cycle(cycle) ); +b.suite( + `[index] Vector`, + + ...Object.entries(vectors).map(([name, vector]) => + b.add(`from: ${name}`, () => { + for (let i = -1, n = vector.length; ++i < n;) { + vector[i]; + } + })), + + b.cycle(cycle) +); + + + for (const { name, ipc, table } of config) { b.suite( `Parse`, diff --git a/js/src/recordbatch.ts b/js/src/recordbatch.ts index eed9de66e8e..0f4683e3291 100644 --- a/js/src/recordbatch.ts +++ b/js/src/recordbatch.ts @@ -21,6 +21,8 @@ import { Vector } from './vector.js'; import { Schema, Field } from './schema.js'; import { DataType, Struct, Null, TypeMap } from './type.js'; +import { IndexAccessProxyHandler } from './util/proxyhandler.js' + import { instance as getVisitor } from './visitor/get.js'; import { instance as setVisitor } from './visitor/set.js'; import { instance as indexOfVisitor } from './visitor/indexof.js'; @@ -95,6 +97,13 @@ export class RecordBatch { public readonly schema: Schema; public readonly data: Data>; + /** + * Index access of the record batch elements. While equivalent to + * {@link * RecordBatch.get}, * it is 1-2 orders of magnitude slower than + * {@link * RecordBatch.get}. + */ + [index: number]: T['TValue'] | null; + public get dictionaries() { return this._dictionaries || (this._dictionaries = collectDictionaries(this.schema.fields, this.data.children)); } @@ -280,6 +289,13 @@ export class RecordBatch { protected static [Symbol.toStringTag] = ((proto: RecordBatch) => { (proto as any)._nullCount = -1; (proto as any)[Symbol.isConcatSpreadable] = true; + + // The Proxy object will slow down all method access if it is returned + // from the constructor. By putting it at the root of the prototype + // chain, we do not affect the speed of normal access. That said, index + // access will be much slower than `.get()`. + Object.setPrototypeOf(proto, new Proxy({}, new IndexAccessProxyHandler())) + return 'RecordBatch'; })(RecordBatch.prototype); } diff --git a/js/src/table.ts b/js/src/table.ts index 26f77d74f51..f216b1fdcab 100644 --- a/js/src/table.ts +++ b/js/src/table.ts @@ -33,6 +33,7 @@ import { wrapChunkedIndexOf, sliceChunks, } from './util/chunk.js'; +import { IndexAccessProxyHandler } from './util/proxyhandler.js' import { instance as getVisitor } from './visitor/get.js'; import { instance as setVisitor } from './visitor/set.js'; @@ -152,6 +153,13 @@ export class Table { */ declare public readonly batches: RecordBatch[]; + /** + * Index access of the table elements. While equivalent to + * {@link * Table.get}, * it is 1-2 orders of magnitude slower than + * {@link * Table.get}. + */ + [index: number]: T['TValue'] | null; + /** * The contiguous {@link RecordBatch `RecordBatch`} chunks of the Table rows. */ @@ -389,6 +397,13 @@ export class Table { (proto as any)['set'] = wrapChunkedCall2(setVisitor.getVisitFn(Type.Struct)); (proto as any)['indexOf'] = wrapChunkedIndexOf(indexOfVisitor.getVisitFn(Type.Struct)); (proto as any)['getByteLength'] = wrapChunkedCall1(byteLengthVisitor.getVisitFn(Type.Struct)); + + // The Proxy object will slow down all method access if it is returned + // from the constructor. By putting it at the root of the prototype + // chain, we do not affect the speed of normal access. That said, index + // access will be much slower than `.get()`. + Object.setPrototypeOf(proto, new Proxy({}, new IndexAccessProxyHandler())) + return 'Table'; })(Table.prototype); } diff --git a/js/src/util/proxyhandler.ts b/js/src/util/proxyhandler.ts new file mode 100644 index 00000000000..c5c1f591478 --- /dev/null +++ b/js/src/util/proxyhandler.ts @@ -0,0 +1,24 @@ +export class IndexAccessProxyHandler implements ProxyHandler { + get(target: any, key: string, receiver: any) { + if (typeof key === "string") { // Need to check because key can be a symbol, such as [Symbol.iterator]. + const idx = +key; // Convert the key to a number + if (idx === idx) { // Basically an inverse NaN check + return (receiver || target).get(idx); + } + } + + return Reflect.get(target, key, receiver); + } + + set(target: any, key: string, value: any, receiver: any) { + if (typeof key === "string") { // Need to check because key can be a symbol, such as [Symbol.iterator]. + const idx = +key; // Convert the key to a number + if (idx === idx) { // Basically an inverse NaN check + (receiver || target).set(idx, value); + return true; // Is this correct? + } + } + + return Reflect.set(target, key, value, receiver); + } +} diff --git a/js/src/vector.ts b/js/src/vector.ts index a2baf83c95d..b94db7f16dd 100644 --- a/js/src/vector.ts +++ b/js/src/vector.ts @@ -31,6 +31,7 @@ import { wrapChunkedIndexOf, } from './util/chunk.js'; import { BigInt64Array, BigUint64Array } from './util/compat.js'; +import { IndexAccessProxyHandler } from './util/proxyhandler.js' import { instance as getVisitor } from './visitor/get.js'; import { instance as setVisitor } from './visitor/set.js'; @@ -103,6 +104,12 @@ export class Vector { declare protected _nullCount: number; declare protected _byteLength: number; + /** + * Index access of the vector elements. While equivalent to {@link * Vector.get}, + * it is 1-2 orders of magnitude slower than {@link * Vector.get}. + */ + [index: number]: T['TValue'] | null; + /** * The {@link DataType `DataType`} of this Vector. */ @@ -358,6 +365,30 @@ export class Vector { (proto as any)._offsets = new Uint32Array([0]); (proto as any)[Symbol.isConcatSpreadable] = true; + // The prototype chain of the Vector object is complex to get the best + // possible performance: + // + // - The Proxy object is quite slow, so we put it at the bottom of the + // prototype chain. This means that known access such as functions + // like `vector.get` will be immediately resolved and is fast. Unknown + // access such as index notation (`vector[0]`) will bubble up to the + // proxy object and be resolved. Experimentally, this is about 1-2 + // orders of magnitude slower than using `vector.get(index)`. + // - When the Vector object has multiple chunks in it, we need to find + // the appropriate chunk to iterate through when using methods like + // `.get()`. To do this, in the Vector constructor, it sets the + // prototype of `this` to `vectorPrototypesByTypeId[typeId]`, which + // defines the appropriate methods to find the appropriate chunk. + // The prototypes provided by `vectorPrototypesByTypeId` is also + // chained from the Proxy object, which means Vector objects with + // multiple chunks also retain the index access API. + // - As a note, using `vector.get(i)` is slow as it needs to perform a + // binary search while looking for the right chunk. So operations + // that loop through the array with an index (i.e. `(for i=0; i Type[T] as any) .filter((T: any) => typeof T === 'number' && T !== Type.NONE); @@ -370,6 +401,10 @@ export class Vector { visitorsByTypeId[typeId] = { get, set, indexOf, byteLength }; vectorPrototypesByTypeId[typeId] = Object.create(proto, { + // These object keys are equivalent to string keys. However, by + // putting them in [] (which makes them computed property + // names), the Closure compiler we use to compile this library + // will not optimize out the function names. ['isValid']: { value: wrapChunkedCall1(isChunkedValid) }, ['get']: { value: wrapChunkedCall1(getVisitor.getVisitFnByTypeId(typeId)) }, ['set']: { value: wrapChunkedCall2(setVisitor.getVisitFnByTypeId(typeId)) }, diff --git a/js/test/unit/recordbatch/record-batch-tests.ts b/js/test/unit/recordbatch/record-batch-tests.ts index fe537c86e80..826cf63b282 100644 --- a/js/test/unit/recordbatch/record-batch-tests.ts +++ b/js/test/unit/recordbatch/record-batch-tests.ts @@ -129,4 +129,17 @@ describe(`RecordBatch`, () => { expect(f32sBatch.numRows).toBe(45); }); }); + + describe(`get()`, () => { + test(`can get row with get and []`, () => { + const batch = numsRecordBatch(32, 45); + const row = batch.get(2) + expect(row).not.toBeNull(); + expect(row!.f32).toEqual(2); + expect(row!.i32).toEqual(2); + + const row2 = batch[2]; + expect(row2).toEqual(row); + }); + }); }); diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts index 50c8565f0f1..3d9d43ee879 100644 --- a/js/test/unit/table-tests.ts +++ b/js/test/unit/table-tests.ts @@ -291,6 +291,10 @@ describe(`Table`, () => { expect(row.f32).toEqual(expected[F32]); expect(row.i32).toEqual(expected[I32]); expect(row.dictionary).toEqual(expected[DICT]); + + // Test index access as well + const row2 = table[i]; + expect(row2).toEqual(row) } }); test(`iterates expected values`, () => { diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts index a259cbef877..4e7a0e764f3 100644 --- a/js/test/unit/vector/vector-tests.ts +++ b/js/test/unit/vector/vector-tests.ts @@ -73,7 +73,12 @@ describe(`StructVector`, () => { test(`get value`, () => { for (const [i, value] of values.entries()) { - expect(vector.get(i)!.toJSON()).toEqual(value); + const row = vector.get(i); + expect(row).not.toBeNull(); + expect(row!.toJSON()).toEqual(value); + + const row2 = vector[i]; + expect(row2).toEqual(row); } }); });