Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion js/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ build/Release
node_modules/
jspm_packages/

# Typescript v1 declaration files
# Typescript declaration files
types/
typings/

# Optional npm cache directory
Expand Down
1 change: 1 addition & 0 deletions js/.npmrc
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
save-prefix=
package-lock=false
63 changes: 34 additions & 29 deletions js/gulpfile.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,17 @@ for (const [target, format] of combinations([`all`, `all`])) {
gulp.task(`clean:${combo}`, gulp.series(cleanTask(target, format, combo, `targets/${target}/${format}`)));
gulp.task(`build:${combo}`, gulp.series(buildTask(target, format, combo, `targets/${target}/${format}`)));
gulp.task(`bundle:${combo}`, gulp.series(bundleTask(target, format, combo, `targets/${target}/${format}`)));
gulp.task(`package:${combo}`, gulp.series(packageTask(target, format, combo, `targets/${target}/${format}`)));
gulp.task(`test:debug:${combo}`, gulp.series(testTask(target, format, combo, `targets/${target}/${format}`, true)));
}

gulp.task(`test`, gulp.series(runTaskCombos(`test`)));
gulp.task(`clean`, gulp.parallel(runTaskCombos(`clean`)));
gulp.task(`build`, gulp.parallel(runTaskCombos(`bundle`)));
gulp.task(`build`, gulp.parallel(runTaskCombos(`build`)));
gulp.task(`bundle`, gulp.parallel(runTaskCombos(`bundle`)));
gulp.task(`package`, gulp.parallel(runTaskCombos(`package`)));
gulp.task(`test:debug`, gulp.series(runTaskCombos(`test:debug`)));
gulp.task(`default`, gulp.task(`build`));
gulp.task(`default`, gulp.task(`package`));

function runTaskCombos(name) {
const combos = [];
Expand All @@ -87,7 +90,7 @@ function cleanTask(target, format, taskName, outDir) {
return function cleanTask() {
const globs = [`${outDir}/**`];
if (target === `es5` && format === `cjs`) {
globs.push(`typings`);
globs.push(`types`, `typings`);
}
return del(globs);
};
Expand All @@ -100,30 +103,32 @@ function buildTask(target, format, taskName, outDir) {
}

function bundleTask(target, format, taskName, outDir) {
return [
[`build:${taskName}`],
function bundleTask() {
return streamMerge([
pump(gulp.src([`LICENSE`, `README.md`]), gulp.dest(outDir), onError),
pump(
gulp.src(`package.json`),
gulpJsonTransform((orig) => [
`version`, `description`,
`author`, `homepage`, `bugs`, `license`,
`keywords`, `repository`, `peerDependencies`
].reduce((copy, key) => (
(copy[key] = orig[key]) && copy || copy
), {
main: `Arrow.js`,
typings: `Arrow.d.ts`,
name: `@apache-arrow/${target}-${format}`
}), 2),
gulp.dest(outDir),
onError
)
])
}
];
return function bundleTask() {
return streamMerge([
pump(gulp.src([`LICENSE`, `README.md`]), gulp.dest(outDir), onError),
pump(
gulp.src(`package.json`),
gulpJsonTransform((orig) => [
`version`, `description`, `keywords`,
`repository`, `author`, `homepage`, `bugs`, `license`,
`dependencies`, `peerDependencies`
].reduce((copy, key) => (
(copy[key] = orig[key]) && copy || copy
), {
main: `Arrow.js`,
types: `Arrow.d.ts`,
typings: `Arrow.d.ts`,
name: `@apache-arrow/${target}-${format}`
}), 2),
gulp.dest(outDir),
onError
)
]);
}
}

function packageTask(target, format, taskName, outDir) {
return [`build:${taskName}`, `bundle:${taskName}`];
}

function testTask(target, format, taskName, outDir, debug) {
Expand Down Expand Up @@ -240,10 +245,10 @@ function typescriptTask(target, format, taskName, outDir) {
js = [js, sourcemaps.write(), gulp.dest(outDir)];
// copy types to the root
if (target === `es5` && format === `cjs`) {
dts.push(gulp.dest(`typings`));
dts.push(gulp.dest(`types`));
}
tsProjects.push({
target, format,
target, format,
js: js = pump(...js, onError),
dts: dts = pump(...dts, onError)
});
Expand Down
91 changes: 40 additions & 51 deletions js/package.json
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
{
"name": "apache-arrow",
"version": "0.1.2",
"types": "./types/Arrow.d.ts",
"typings": "./types/Arrow.d.ts",
"main": "./targets/es5/cjs/Arrow.js",
"module": "./targets/es5/esm/Arrow.js",
"browser": "./targets/es5/umd/Arrow.js",
"jsnext:main": "./targets/es2015/esm/Arrow.js",
"esnext:main": "./targets/esnext/esm/Arrow.js",
"typings": "./typings/Arrow.d.ts",
"description": "Apache Arrow columnar in-memory format",
"scripts": {
"lerna": "lerna",
"commit": "git-cz",
"test": "gulp test",
"build": "gulp build",
"clean": "gulp clean",
"bundle": "gulp bundle",
"package": "gulp package",
"perf": "node ./perf/index.js",
"test:debug": "gulp test:debug",
"test:coverage": "gulp test -t esnext -m esm --coverage",
"validate": "npm-run-all lint build test",
"validate": "npm-run-all clean lint build test bundle",
"lerna:publish": "lerna exec --bail=false npm publish",
"prepublishOnly": "sh ./prepublish.sh",
"commitmsg": "validate-commit-msg",
"doc": "shx rm -rf ./doc && esdoc",
"lint": "npm-run-all -p lint:*",
"lint:src": "tslint --fix --type-check -p tsconfig.json -c tslint.json \"src/**/*.ts\"",
Expand All @@ -43,64 +44,52 @@
"files": [
"src",
"dist",
"typings",
"types",
"targets",
"LICENSE",
"README.md"
],
"peerDependencies": {
"tslib": "^1.7.1"
"tslib": "~1.7.1",
"command-line-usage": "4.0.1"
},
"dependencies": {
"command-line-args": "~4.0.7",
"command-line-usage": "~4.0.1",
"flatbuffers": "~1.7.0",
"text-encoding": "~0.6.4"
"flatbuffers": "1.7.0",
"text-encoding": "0.6.4"
},
"devDependencies": {
"@types/flatbuffers": "~1.6.4",
"@types/jest": "~20.0.8",
"@types/node": "~8.0.24",
"@types/flatbuffers": "1.6.4",
"@types/jest": "20.0.8",
"@types/node": "^8.0.24",
"@types/text-encoding": "0.0.32",
"benchmark": "~2.1.4",
"commitizen": "~2.9.6",
"conventional-changelog-cli": "~1.3.2",
"conventional-commits-detector": "~0.1.1",
"conventional-github-releaser": "~1.1.12",
"conventional-recommended-bump": "~1.0.1",
"coveralls": "~2.13.1",
"cz-conventional-changelog": "~2.0.0",
"del": "~3.0.0",
"esdoc": "~1.0.1",
"esdoc-standard-plugin": "~1.0.0",
"google-closure-compiler": "~20170910.0.0",
"benchmark": "2.1.4",
"coveralls": "2.13.1",
"command-line-args": "4.0.7",
"del": "3.0.0",
"esdoc": "1.0.3",
"esdoc-standard-plugin": "1.0.0",
"google-closure-compiler": "20170910.0.0",
"gulp": "github:gulpjs/gulp#4.0",
"gulp-json-transform": "~0.4.2",
"gulp-sourcemaps": "~2.6.1",
"gulp-typescript": "~3.2.2",
"jest": "~21.1.0",
"jest-environment-node-debug": "~2.0.0",
"json": "~9.0.6",
"lerna": "~2.1.2",
"lint-staged": "~4.2.1",
"merge2": "~1.2.0",
"mkdirp": "~0.5.1",
"npm-run-all": "~4.1.1",
"pump": "~1.0.2",
"rimraf": "~2.6.1",
"shx": "~0.2.2",
"text-encoding-utf-8": "~1.0.1",
"trash": "~4.0.1",
"ts-jest": "~21.0.1",
"tslib": "~1.7.1",
"tslint": "~5.7.0",
"typescript": "~2.5.2",
"validate-commit-msg": "~2.14.0"
},
"config": {
"commitizen": {
"path": "cz-conventional-changelog"
}
"gulp-json-transform": "0.4.2",
"gulp-sourcemaps": "2.6.1",
"gulp-typescript": "3.2.2",
"jest": "21.1.0",
"jest-environment-node-debug": "2.0.0",
"json": "9.0.6",
"lerna": "2.2.0",
"lint-staged": "4.2.1",
"merge2": "1.2.0",
"mkdirp": "0.5.1",
"npm-run-all": "4.1.1",
"pump": "1.0.2",
"rimraf": "2.6.2",
"shx": "0.2.2",
"text-encoding-utf-8": "1.0.1",
"trash": "4.1.0",
"ts-jest": "21.0.1",
"tslib": "1.7.1",
"tslint": "5.7.0",
"typescript": "2.5.2"
},
"lint-staged": {
"*.@(ts)": [
Expand Down
6 changes: 3 additions & 3 deletions js/prepublish.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
# specific language governing permissions and limitations
# under the License.

npm run clean
npm run lint
npm run build
npm run test
preset=`conventional-commits-detector` && echo $preset
bump=`conventional-recommended-bump -p $preset` && echo $bump
npm --no-git-tag-version version $bump &>/dev/null
npm --no-git-tag-version version patch &>/dev/null
npm run bundle
npm run lerna:publish
11 changes: 4 additions & 7 deletions js/src/reader/dictionary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,18 @@ import { readVector } from './vector';
import { MessageBatch } from './message';
import * as Schema_ from '../format/Schema_generated';
import { IteratorState, Dictionaries } from './arrow';

import Field = Schema_.org.apache.arrow.flatbuf.Field;
import DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding;

export function* readDictionaries(field: Field,
export function* readDictionaries(field: Field | null,
batch: MessageBatch,
iterator: IteratorState,
dictionaries: Dictionaries) {
let id: string, encoding: DictionaryEncoding;
if ((encoding = field.dictionary()) &&
batch.id === (id = encoding.id().toFloat64().toString())) {
let id: string, encoding = field && field.dictionary();
if (encoding && batch.id === (id = encoding.id().toFloat64().toString())) {
yield [id, readVector(field, batch, iterator, null)];
return;
}
for (let i = -1, n = field.childrenLength(); ++i < n;) {
for (let i = -1, n = field && field.childrenLength() || 0; ++i < n;) {
// Since a dictionary batch can only contain a single vector, return early after we find it
for (let result of readDictionaries(field.children(i), batch, iterator, dictionaries)) {
yield result;
Expand Down
19 changes: 12 additions & 7 deletions js/src/reader/vector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,13 @@ function readTypedVector(field: Field, batch: MessageBatch, iterator: IteratorSt
}

function readDictionaryVector(field: Field, batch: MessageBatch, iterator: IteratorState, dictionaries: Dictionaries) {
let encoding: DictionaryEncoding;
let encoding: DictionaryEncoding | null;
if (dictionaries && (encoding = field.dictionary())) {
let id = encoding.id().toFloat64().toString();
let fieldType = encoding.indexType() ||
/* a dictionary index defaults to signed 32 bit int if unspecified */
{ bitWidth: () => 32, isSigned: () => true };
// workaround for https://issues.apache.org/jira/browse/ARROW-1363
let indexField = createSyntheticDictionaryIndexField(field, fieldType);
let index = readIntVector(indexField, batch, iterator, null, fieldType);
return DictionaryVector.create(field, index.length, index, dictionaries[id]);
Expand Down Expand Up @@ -105,12 +106,16 @@ function createIntVector(field, length, data, validity, offsets, fieldType, batc
let type = fieldType || field.type(new Int()), bitWidth = type.bitWidth();
let Vector = valueForBitWidth(bitWidth, intVectors)[+type.isSigned()];
return Vector.create(field, length, validity, data || offsets);
// ---------------------- so this is kinda strange 👆:
// The dictionary encoded vectors I generated from sample mapd-core queries have the indicies' data buffers
// tagged as VectorType.OFFSET (0) in the field metadata. The current TS impl ignores buffers' layout type,
// and assumes the second buffer is the data for a NullableIntVector. Since we've been stricter about enforcing
// the Arrow spec while parsing, the IntVector's data buffer reads empty in this case. If so, fallback to using
// the offsets buffer as the data, since IntVectors don't have offsets.
// ----------------------------------------------- 👆:
// Workaround for https://issues.apache.org/jira/browse/ARROW-1363
// This bug causes dictionary encoded vector indicies' IntVector data
// buffers to be tagged as VectorType.OFFSET (0) in the field metadata
// instead of VectorType.DATA. The `readVectorLayout` routine strictly
// obeys the types in the field metadata, so if we're parsing an Arrow
// file written by a version of the library published before ARROW-1363
// was fixed, the IntVector's data buffer will be null, and the offset
// buffer will be the actual data. If data is null, it's safe to assume
// the offset buffer is the data, because IntVectors don't have offsets.
}

const readFloatVector = readVectorLayout<number, FloatArray>(
Expand Down
28 changes: 19 additions & 9 deletions js/src/table.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import { readBuffers } from './reader/arrow';
import { StructVector } from './vector/struct';
import { Vector, sliceToRangeArgs } from './vector/vector';

export type RowObject = { [k: string]: any };

export class Table implements Iterable<Map<string, any>> {
public length: number;
protected _columns: Vector<any>[];
Expand Down Expand Up @@ -66,22 +68,30 @@ export class Table implements Iterable<Map<string, any>> {
yield column;
}
}
getRow(rowIndex: number): RowObject;
getRow(rowIndex: number, compact: boolean): Array<any>;
getRow(rowIndex: number, compact?: boolean) {
return (compact && rowAsArray || rowAsObject)(rowIndex, this._columns);
}
getCell(columnName: string, rowIndex: number) {
return this.getColumn(columnName).get(rowIndex);
getCell<T extends any>(columnName: string, rowIndex: number) {
return this.getColumn<Vector<T>>(columnName).get(rowIndex);
}
getCellAt(columnIndex: number, rowIndex: number) {
return this.getColumnAt(columnIndex).get(rowIndex);
getCellAt<T extends any>(columnIndex: number, rowIndex: number) {
return this.getColumnAt<Vector<T>>(columnIndex).get(rowIndex);
}
getColumn<T = any>(columnName: string) {
return this._columnsMap[columnName] as Vector<T>;
getColumn<T extends Vector<any>>(columnName: string) {
return this._columnsMap[columnName] as T;
}
getColumnAt<T = any>(columnIndex: number) {
return this._columns[columnIndex] as Vector<T>;
getColumnAt<T extends Vector<any>>(columnIndex: number) {
return this._columns[columnIndex] as T;
}
toString({ index = false } = {}) {
toString(): string;
toString(index: boolean): string;
toString(options: { index: boolean }): string;
toString(options?: any) {
const index = typeof options === 'object' ? options && !!options.index
: typeof options === 'boolean' ? !!options
: false;
const { length } = this;
if (length <= 0) { return ''; }
const maxColumnWidths = [];
Expand Down