diff --git a/js/src/reader/arrow.ts b/js/src/reader/arrow.ts index 9716c7fb601..dbb6acd0e79 100644 --- a/js/src/reader/arrow.ts +++ b/js/src/reader/arrow.ts @@ -23,7 +23,7 @@ import { readFile } from './file'; import { readStream } from './stream'; import { readVector } from './vector'; import { Vector } from '../vector/vector'; -import { readDictionaries } from './dictionary'; +import { readDictionary } from './dictionary'; import ByteBuffer = flatbuffers.ByteBuffer; export import Schema = Schema_.org.apache.arrow.flatbuf.Schema; @@ -51,9 +51,13 @@ export function* readBuffers(...bytes: Array) { let state = { nodeIndex: 0, bufferIndex: 0 }; let index = -1, fieldsLength = schema.fieldsLength(); if (batch.id) { + // A dictionary batch only contain a single vector. Traverse each + // field and its children until we find one that uses this dictionary while (++index < fieldsLength) { - for (let [id, vector] of readDictionaries(schema.fields(index), batch, state, dictionaries)) { - dictionaries[id] = dictionaries[id] && dictionaries[id].concat(vector) || vector; + let vector = readDictionary(schema.fields(index), batch, state, dictionaries); + if (vector) { + dictionaries[batch.id] = dictionaries[batch.id] && dictionaries[batch.id].concat(vector) || vector; + break; } } } else { diff --git a/js/src/reader/dictionary.ts b/js/src/reader/dictionary.ts index abf7ac3dfb9..61698e80c00 100644 --- a/js/src/reader/dictionary.ts +++ b/js/src/reader/dictionary.ts @@ -21,20 +21,16 @@ import * as Schema_ from '../format/Schema_generated'; import { IteratorState, Dictionaries } from './arrow'; import Field = Schema_.org.apache.arrow.flatbuf.Field; -export function* readDictionaries(field: Field | null, - batch: MessageBatch, - iterator: IteratorState, - dictionaries: Dictionaries) { +export function readDictionary(field: Field | null, + batch: MessageBatch, + iterator: IteratorState, + dictionaries: Dictionaries) { let id: string, encoding = field && field.dictionary(); if (encoding && batch.id === (id = encoding.id().toFloat64().toString())) { - yield [id, readVector(field, batch, iterator, null)]; - return; + return readVector(field, batch, iterator, null); } for (let i = -1, n = field && field.childrenLength() || 0; ++i < n;) { - // Since a dictionary batch can only contain a single vector, return early after we find it - for (let result of readDictionaries(field.children(i), batch, iterator, dictionaries)) { - yield result; - return; - } + let vector = readDictionary(field.children(i), batch, iterator, dictionaries); + if (vector) return vector; } }