diff --git a/js/.gitignore b/js/.gitignore index b48f35b960f..6d0f88d191c 100644 --- a/js/.gitignore +++ b/js/.gitignore @@ -57,7 +57,8 @@ build/Release node_modules/ jspm_packages/ -# Typescript v1 declaration files +# Typescript declaration files +types/ typings/ # Optional npm cache directory diff --git a/js/.npmrc b/js/.npmrc index 43c97e719a5..71ffabdd55d 100644 --- a/js/.npmrc +++ b/js/.npmrc @@ -1 +1,2 @@ +save-prefix= package-lock=false diff --git a/js/gulpfile.js b/js/gulpfile.js index 9c945aa53af..9f8e564bd9e 100644 --- a/js/gulpfile.js +++ b/js/gulpfile.js @@ -63,14 +63,17 @@ for (const [target, format] of combinations([`all`, `all`])) { gulp.task(`clean:${combo}`, gulp.series(cleanTask(target, format, combo, `targets/${target}/${format}`))); gulp.task(`build:${combo}`, gulp.series(buildTask(target, format, combo, `targets/${target}/${format}`))); gulp.task(`bundle:${combo}`, gulp.series(bundleTask(target, format, combo, `targets/${target}/${format}`))); + gulp.task(`package:${combo}`, gulp.series(packageTask(target, format, combo, `targets/${target}/${format}`))); gulp.task(`test:debug:${combo}`, gulp.series(testTask(target, format, combo, `targets/${target}/${format}`, true))); } gulp.task(`test`, gulp.series(runTaskCombos(`test`))); gulp.task(`clean`, gulp.parallel(runTaskCombos(`clean`))); -gulp.task(`build`, gulp.parallel(runTaskCombos(`bundle`))); +gulp.task(`build`, gulp.parallel(runTaskCombos(`build`))); +gulp.task(`bundle`, gulp.parallel(runTaskCombos(`bundle`))); +gulp.task(`package`, gulp.parallel(runTaskCombos(`package`))); gulp.task(`test:debug`, gulp.series(runTaskCombos(`test:debug`))); -gulp.task(`default`, gulp.task(`build`)); +gulp.task(`default`, gulp.task(`package`)); function runTaskCombos(name) { const combos = []; @@ -87,7 +90,7 @@ function cleanTask(target, format, taskName, outDir) { return function cleanTask() { const globs = [`${outDir}/**`]; if (target === `es5` && format === `cjs`) { - globs.push(`typings`); + globs.push(`types`, `typings`); } return del(globs); }; @@ -100,30 +103,32 @@ function buildTask(target, format, taskName, outDir) { } function bundleTask(target, format, taskName, outDir) { - return [ - [`build:${taskName}`], - function bundleTask() { - return streamMerge([ - pump(gulp.src([`LICENSE`, `README.md`]), gulp.dest(outDir), onError), - pump( - gulp.src(`package.json`), - gulpJsonTransform((orig) => [ - `version`, `description`, - `author`, `homepage`, `bugs`, `license`, - `keywords`, `repository`, `peerDependencies` - ].reduce((copy, key) => ( - (copy[key] = orig[key]) && copy || copy - ), { - main: `Arrow.js`, - typings: `Arrow.d.ts`, - name: `@apache-arrow/${target}-${format}` - }), 2), - gulp.dest(outDir), - onError - ) - ]) - } - ]; + return function bundleTask() { + return streamMerge([ + pump(gulp.src([`LICENSE`, `README.md`]), gulp.dest(outDir), onError), + pump( + gulp.src(`package.json`), + gulpJsonTransform((orig) => [ + `version`, `description`, `keywords`, + `repository`, `author`, `homepage`, `bugs`, `license`, + `dependencies`, `peerDependencies` + ].reduce((copy, key) => ( + (copy[key] = orig[key]) && copy || copy + ), { + main: `Arrow.js`, + types: `Arrow.d.ts`, + typings: `Arrow.d.ts`, + name: `@apache-arrow/${target}-${format}` + }), 2), + gulp.dest(outDir), + onError + ) + ]); + } +} + +function packageTask(target, format, taskName, outDir) { + return [`build:${taskName}`, `bundle:${taskName}`]; } function testTask(target, format, taskName, outDir, debug) { @@ -240,10 +245,10 @@ function typescriptTask(target, format, taskName, outDir) { js = [js, sourcemaps.write(), gulp.dest(outDir)]; // copy types to the root if (target === `es5` && format === `cjs`) { - dts.push(gulp.dest(`typings`)); + dts.push(gulp.dest(`types`)); } tsProjects.push({ - target, format, + target, format, js: js = pump(...js, onError), dts: dts = pump(...dts, onError) }); diff --git a/js/package.json b/js/package.json index e6848fd641d..03687a8b25c 100644 --- a/js/package.json +++ b/js/package.json @@ -1,26 +1,27 @@ { "name": "apache-arrow", "version": "0.1.2", + "types": "./types/Arrow.d.ts", + "typings": "./types/Arrow.d.ts", "main": "./targets/es5/cjs/Arrow.js", "module": "./targets/es5/esm/Arrow.js", "browser": "./targets/es5/umd/Arrow.js", "jsnext:main": "./targets/es2015/esm/Arrow.js", "esnext:main": "./targets/esnext/esm/Arrow.js", - "typings": "./typings/Arrow.d.ts", "description": "Apache Arrow columnar in-memory format", "scripts": { "lerna": "lerna", - "commit": "git-cz", "test": "gulp test", "build": "gulp build", "clean": "gulp clean", + "bundle": "gulp bundle", + "package": "gulp package", "perf": "node ./perf/index.js", "test:debug": "gulp test:debug", "test:coverage": "gulp test -t esnext -m esm --coverage", - "validate": "npm-run-all lint build test", + "validate": "npm-run-all clean lint build test bundle", "lerna:publish": "lerna exec --bail=false npm publish", "prepublishOnly": "sh ./prepublish.sh", - "commitmsg": "validate-commit-msg", "doc": "shx rm -rf ./doc && esdoc", "lint": "npm-run-all -p lint:*", "lint:src": "tslint --fix --type-check -p tsconfig.json -c tslint.json \"src/**/*.ts\"", @@ -43,64 +44,52 @@ "files": [ "src", "dist", - "typings", + "types", "targets", "LICENSE", "README.md" ], "peerDependencies": { - "tslib": "^1.7.1" + "tslib": "~1.7.1", + "command-line-usage": "4.0.1" }, "dependencies": { - "command-line-args": "~4.0.7", - "command-line-usage": "~4.0.1", - "flatbuffers": "~1.7.0", - "text-encoding": "~0.6.4" + "flatbuffers": "1.7.0", + "text-encoding": "0.6.4" }, "devDependencies": { - "@types/flatbuffers": "~1.6.4", - "@types/jest": "~20.0.8", - "@types/node": "~8.0.24", + "@types/flatbuffers": "1.6.4", + "@types/jest": "20.0.8", + "@types/node": "^8.0.24", "@types/text-encoding": "0.0.32", - "benchmark": "~2.1.4", - "commitizen": "~2.9.6", - "conventional-changelog-cli": "~1.3.2", - "conventional-commits-detector": "~0.1.1", - "conventional-github-releaser": "~1.1.12", - "conventional-recommended-bump": "~1.0.1", - "coveralls": "~2.13.1", - "cz-conventional-changelog": "~2.0.0", - "del": "~3.0.0", - "esdoc": "~1.0.1", - "esdoc-standard-plugin": "~1.0.0", - "google-closure-compiler": "~20170910.0.0", + "benchmark": "2.1.4", + "coveralls": "2.13.1", + "command-line-args": "4.0.7", + "del": "3.0.0", + "esdoc": "1.0.3", + "esdoc-standard-plugin": "1.0.0", + "google-closure-compiler": "20170910.0.0", "gulp": "github:gulpjs/gulp#4.0", - "gulp-json-transform": "~0.4.2", - "gulp-sourcemaps": "~2.6.1", - "gulp-typescript": "~3.2.2", - "jest": "~21.1.0", - "jest-environment-node-debug": "~2.0.0", - "json": "~9.0.6", - "lerna": "~2.1.2", - "lint-staged": "~4.2.1", - "merge2": "~1.2.0", - "mkdirp": "~0.5.1", - "npm-run-all": "~4.1.1", - "pump": "~1.0.2", - "rimraf": "~2.6.1", - "shx": "~0.2.2", - "text-encoding-utf-8": "~1.0.1", - "trash": "~4.0.1", - "ts-jest": "~21.0.1", - "tslib": "~1.7.1", - "tslint": "~5.7.0", - "typescript": "~2.5.2", - "validate-commit-msg": "~2.14.0" - }, - "config": { - "commitizen": { - "path": "cz-conventional-changelog" - } + "gulp-json-transform": "0.4.2", + "gulp-sourcemaps": "2.6.1", + "gulp-typescript": "3.2.2", + "jest": "21.1.0", + "jest-environment-node-debug": "2.0.0", + "json": "9.0.6", + "lerna": "2.2.0", + "lint-staged": "4.2.1", + "merge2": "1.2.0", + "mkdirp": "0.5.1", + "npm-run-all": "4.1.1", + "pump": "1.0.2", + "rimraf": "2.6.2", + "shx": "0.2.2", + "text-encoding-utf-8": "1.0.1", + "trash": "4.1.0", + "ts-jest": "21.0.1", + "tslib": "1.7.1", + "tslint": "5.7.0", + "typescript": "2.5.2" }, "lint-staged": { "*.@(ts)": [ diff --git a/js/prepublish.sh b/js/prepublish.sh index 4ad8db1b439..b40504ae808 100644 --- a/js/prepublish.sh +++ b/js/prepublish.sh @@ -17,10 +17,10 @@ # specific language governing permissions and limitations # under the License. +npm run clean npm run lint npm run build npm run test -preset=`conventional-commits-detector` && echo $preset -bump=`conventional-recommended-bump -p $preset` && echo $bump -npm --no-git-tag-version version $bump &>/dev/null +npm --no-git-tag-version version patch &>/dev/null +npm run bundle npm run lerna:publish \ No newline at end of file diff --git a/js/src/reader/dictionary.ts b/js/src/reader/dictionary.ts index aef2bc9d1cd..abf7ac3dfb9 100644 --- a/js/src/reader/dictionary.ts +++ b/js/src/reader/dictionary.ts @@ -19,21 +19,18 @@ import { readVector } from './vector'; import { MessageBatch } from './message'; import * as Schema_ from '../format/Schema_generated'; import { IteratorState, Dictionaries } from './arrow'; - import Field = Schema_.org.apache.arrow.flatbuf.Field; -import DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding; -export function* readDictionaries(field: Field, +export function* readDictionaries(field: Field | null, batch: MessageBatch, iterator: IteratorState, dictionaries: Dictionaries) { - let id: string, encoding: DictionaryEncoding; - if ((encoding = field.dictionary()) && - batch.id === (id = encoding.id().toFloat64().toString())) { + let id: string, encoding = field && field.dictionary(); + if (encoding && batch.id === (id = encoding.id().toFloat64().toString())) { yield [id, readVector(field, batch, iterator, null)]; return; } - for (let i = -1, n = field.childrenLength(); ++i < n;) { + for (let i = -1, n = field && field.childrenLength() || 0; ++i < n;) { // Since a dictionary batch can only contain a single vector, return early after we find it for (let result of readDictionaries(field.children(i), batch, iterator, dictionaries)) { yield result; diff --git a/js/src/reader/vector.ts b/js/src/reader/vector.ts index a3cd7980364..3b6663be89b 100644 --- a/js/src/reader/vector.ts +++ b/js/src/reader/vector.ts @@ -54,12 +54,13 @@ function readTypedVector(field: Field, batch: MessageBatch, iterator: IteratorSt } function readDictionaryVector(field: Field, batch: MessageBatch, iterator: IteratorState, dictionaries: Dictionaries) { - let encoding: DictionaryEncoding; + let encoding: DictionaryEncoding | null; if (dictionaries && (encoding = field.dictionary())) { let id = encoding.id().toFloat64().toString(); let fieldType = encoding.indexType() || /* a dictionary index defaults to signed 32 bit int if unspecified */ { bitWidth: () => 32, isSigned: () => true }; + // workaround for https://issues.apache.org/jira/browse/ARROW-1363 let indexField = createSyntheticDictionaryIndexField(field, fieldType); let index = readIntVector(indexField, batch, iterator, null, fieldType); return DictionaryVector.create(field, index.length, index, dictionaries[id]); @@ -105,12 +106,16 @@ function createIntVector(field, length, data, validity, offsets, fieldType, batc let type = fieldType || field.type(new Int()), bitWidth = type.bitWidth(); let Vector = valueForBitWidth(bitWidth, intVectors)[+type.isSigned()]; return Vector.create(field, length, validity, data || offsets); - // ---------------------- so this is kinda strange 👆: - // The dictionary encoded vectors I generated from sample mapd-core queries have the indicies' data buffers - // tagged as VectorType.OFFSET (0) in the field metadata. The current TS impl ignores buffers' layout type, - // and assumes the second buffer is the data for a NullableIntVector. Since we've been stricter about enforcing - // the Arrow spec while parsing, the IntVector's data buffer reads empty in this case. If so, fallback to using - // the offsets buffer as the data, since IntVectors don't have offsets. + // ----------------------------------------------- 👆: + // Workaround for https://issues.apache.org/jira/browse/ARROW-1363 + // This bug causes dictionary encoded vector indicies' IntVector data + // buffers to be tagged as VectorType.OFFSET (0) in the field metadata + // instead of VectorType.DATA. The `readVectorLayout` routine strictly + // obeys the types in the field metadata, so if we're parsing an Arrow + // file written by a version of the library published before ARROW-1363 + // was fixed, the IntVector's data buffer will be null, and the offset + // buffer will be the actual data. If data is null, it's safe to assume + // the offset buffer is the data, because IntVectors don't have offsets. } const readFloatVector = readVectorLayout( diff --git a/js/src/table.ts b/js/src/table.ts index 999bb245182..5e781054daf 100644 --- a/js/src/table.ts +++ b/js/src/table.ts @@ -19,6 +19,8 @@ import { readBuffers } from './reader/arrow'; import { StructVector } from './vector/struct'; import { Vector, sliceToRangeArgs } from './vector/vector'; +export type RowObject = { [k: string]: any }; + export class Table implements Iterable> { public length: number; protected _columns: Vector[]; @@ -66,22 +68,30 @@ export class Table implements Iterable> { yield column; } } + getRow(rowIndex: number): RowObject; + getRow(rowIndex: number, compact: boolean): Array; getRow(rowIndex: number, compact?: boolean) { return (compact && rowAsArray || rowAsObject)(rowIndex, this._columns); } - getCell(columnName: string, rowIndex: number) { - return this.getColumn(columnName).get(rowIndex); + getCell(columnName: string, rowIndex: number) { + return this.getColumn>(columnName).get(rowIndex); } - getCellAt(columnIndex: number, rowIndex: number) { - return this.getColumnAt(columnIndex).get(rowIndex); + getCellAt(columnIndex: number, rowIndex: number) { + return this.getColumnAt>(columnIndex).get(rowIndex); } - getColumn(columnName: string) { - return this._columnsMap[columnName] as Vector; + getColumn>(columnName: string) { + return this._columnsMap[columnName] as T; } - getColumnAt(columnIndex: number) { - return this._columns[columnIndex] as Vector; + getColumnAt>(columnIndex: number) { + return this._columns[columnIndex] as T; } - toString({ index = false } = {}) { + toString(): string; + toString(index: boolean): string; + toString(options: { index: boolean }): string; + toString(options?: any) { + const index = typeof options === 'object' ? options && !!options.index + : typeof options === 'boolean' ? !!options + : false; const { length } = this; if (length <= 0) { return ''; } const maxColumnWidths = [];