apache · trxcllnt · Feb 26, 2018 · Feb 26, 2018 · Feb 26, 2018 · Feb 26, 2018
diff --git a/ci/travis_env_common.sh b/ci/travis_env_common.sh
@@ -17,6 +17,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# hide nodejs experimental-feature warnings
+export NODE_NO_WARNINGS=1
 export MINICONDA=$HOME/miniconda
 export PATH="$MINICONDA/bin:$PATH"
 export CONDA_PKGS_DIRS=$HOME/.conda_packages

diff --git a/integration/integration_test.py b/integration/integration_test.py
@@ -1092,35 +1092,52 @@ def file_to_stream(self, file_path, stream_path):
         os.system(cmd)
 
 class JSTester(Tester):
-    PRODUCER = False
+    PRODUCER = True
     CONSUMER = True
 
-    INTEGRATION_EXE = os.path.join(ARROW_HOME, 'js/bin/integration.js')
+    EXE_PATH = os.path.join(ARROW_HOME, 'js/bin')
+    VALIDATE = os.path.join(EXE_PATH, 'integration.js')
+    JSON_TO_ARROW = os.path.join(EXE_PATH, 'json-to-arrow.js')
+    STREAM_TO_FILE = os.path.join(EXE_PATH, 'stream-to-file.js')
+    FILE_TO_STREAM = os.path.join(EXE_PATH, 'file-to-stream.js')
 
     name = 'JS'
 
-    def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
-        cmd = [self.INTEGRATION_EXE]
+    def _run(self, exe_cmd, arrow_path=None, json_path=None, command='VALIDATE'):
+        cmd = [exe_cmd]
 
         if arrow_path is not None:
             cmd.extend(['-a', arrow_path])
 
         if json_path is not None:
             cmd.extend(['-j', json_path])
 
-        cmd.extend(['--mode', command])
+        cmd.extend(['--mode', command, '-t', 'es5', '-m', 'umd'])
 
         if self.debug:
             print(' '.join(cmd))
 
         run_cmd(cmd)
 
     def validate(self, json_path, arrow_path):
-        return self._run(arrow_path, json_path, 'VALIDATE')
+        return self._run(self.VALIDATE, arrow_path, json_path, 'VALIDATE')
+
+    def json_to_file(self, json_path, arrow_path):
+        cmd = ['node', self.JSON_TO_ARROW, '-a', arrow_path, '-j', json_path]
+        cmd = ' '.join(cmd)
+        if self.debug:
+            print(cmd)
+        os.system(cmd)
 
     def stream_to_file(self, stream_path, file_path):
-        # Just copy stream to file, we can read the stream directly
-        cmd = ['cp', stream_path, file_path]
+        cmd = ['cat', stream_path, '|', 'node', self.STREAM_TO_FILE, '>', file_path]
+        cmd = ' '.join(cmd)
+        if self.debug:
+            print(cmd)
+        os.system(cmd)
+
+    def file_to_stream(self, file_path, stream_path):
+        cmd = ['cat', file_path, '|', 'node', self.FILE_TO_STREAM, '>', stream_path]
         cmd = ' '.join(cmd)
         if self.debug:
             print(cmd)

diff --git a/js/DEVELOP.md b/js/DEVELOP.md
@@ -64,13 +64,11 @@ This argument configuration also applies to `clean` and `test` scripts.
 
 * `npm run deploy`
 
-Uses [learna](https://github.com/lerna/lerna) to publish each build target to npm with [conventional](https://conventionalcommits.org/) [changelogs](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/conventional-changelog-cli).
+Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm with [conventional](https://conventionalcommits.org/) [changelogs](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/conventional-changelog-cli).
 
 # Updating the Arrow format flatbuffers generated code
 
-Once generated, the flatbuffers format code needs to be adjusted for our TS and JS build environments.
-
-## TypeScript
+Once generated, the flatbuffers format code needs to be adjusted for our build scripts.
 
 1. Generate the flatbuffers TypeScript source from the Arrow project root directory:
     ```sh
@@ -101,193 +99,3 @@ Once generated, the flatbuffers format code needs to be adjusted for our TS and
     ```
 1. Add `/* tslint:disable:class-name */` to the top of `Schema.ts`
 1. Execute `npm run lint` to fix all the linting errors
-
-## JavaScript (for Google Closure Compiler builds)
-
-1. Generate the flatbuffers JS source from the Arrow project root directory
-    ```sh
-    cd $ARROW_HOME
-
-    flatc --js --no-js-exports -o ./js/src/format ./format/*.fbs
-
-    cd ./js/src/format
-
-    # Delete Tensor_generated.js (skip this when we support Tensors)
-    rm Tensor_generated.js
-
-    # append an ES6 export to Schema_generated.js
-    echo "$(cat Schema_generated.js)
-    export { org };
-    " > Schema_generated.js
-
-    # import Schema's "org" namespace and
-    # append an ES6 export to File_generated.js
-    echo "import { org } from './Schema';
-    $(cat File_generated.js)
-    export { org };
-    " > File_generated.js
-
-    # import Schema's "org" namespace and
-    # append an ES6 export to Message_generated.js
-    echo "import { org } from './Schema';
-    $(cat Message_generated.js)
-    export { org };
-    " > Message_generated.js
-    ```
-1. Fixup the generated JS enums with the reverse value-to-key mappings to match TypeScript
-    `Message_generated.js`
-    ```js
-    // Replace this
-    org.apache.arrow.flatbuf.MessageHeader = {
-      NONE: 0,
-      Schema: 1,
-      DictionaryBatch: 2,
-      RecordBatch: 3,
-      Tensor: 4
-    };
-    // With this
-    org.apache.arrow.flatbuf.MessageHeader = {
-      NONE: 0, 0: 'NONE',
-      Schema: 1, 1: 'Schema',
-      DictionaryBatch: 2, 2: 'DictionaryBatch',
-      RecordBatch: 3, 3: 'RecordBatch',
-      Tensor: 4, 4: 'Tensor'
-    };
-    ```
-    `Schema_generated.js`
-    ```js
-    /**
-     * @enum
-     */
-    org.apache.arrow.flatbuf.MetadataVersion = {
-      /**
-       * 0.1.0
-       */
-      V1: 0, 0: 'V1',
-
-      /**
-       * 0.2.0
-       */
-      V2: 1, 1: 'V2',
-
-      /**
-       * 0.3.0 -> 0.7.1
-       */
-      V3: 2, 2: 'V3',
-
-      /**
-       * >= 0.8.0
-       */
-      V4: 3, 3: 'V4'
-    };
-
-    /**
-     * @enum
-     */
-    org.apache.arrow.flatbuf.UnionMode = {
-      Sparse: 0, 0: 'Sparse',
-      Dense: 1, 1: 'Dense',
-    };
-
-    /**
-     * @enum
-     */
-    org.apache.arrow.flatbuf.Precision = {
-      HALF: 0, 0: 'HALF',
-      SINGLE: 1, 1: 'SINGLE',
-      DOUBLE: 2, 2: 'DOUBLE',
-    };
-
-    /**
-     * @enum
-     */
-    org.apache.arrow.flatbuf.DateUnit = {
-      DAY: 0, 0: 'DAY',
-      MILLISECOND: 1, 1: 'MILLISECOND',
-    };
-
-    /**
-     * @enum
-     */
-    org.apache.arrow.flatbuf.TimeUnit = {
-      SECOND: 0, 0: 'SECOND',
-      MILLISECOND: 1, 1: 'MILLISECOND',
-      MICROSECOND: 2, 2: 'MICROSECOND',
-      NANOSECOND: 3, 3: 'NANOSECOND',
-    };
-
-    /**
-     * @enum
-     */
-    org.apache.arrow.flatbuf.IntervalUnit = {
-      YEAR_MONTH: 0, 0: 'YEAR_MONTH',
-      DAY_TIME: 1, 1: 'DAY_TIME',
-    };
-
-    /**
-     * ----------------------------------------------------------------------
-     * Top-level Type value, enabling extensible type-specific metadata. We can
-     * add new logical types to Type without breaking backwards compatibility
-     *
-     * @enum
-     */
-    org.apache.arrow.flatbuf.Type = {
-      NONE: 0, 0: 'NONE',
-      Null: 1, 1: 'Null',
-      Int: 2, 2: 'Int',
-      FloatingPoint: 3, 3: 'FloatingPoint',
-      Binary: 4, 4: 'Binary',
-      Utf8: 5, 5: 'Utf8',
-      Bool: 6, 6: 'Bool',
-      Decimal: 7, 7: 'Decimal',
-      Date: 8, 8: 'Date',
-      Time: 9, 9: 'Time',
-      Timestamp: 10, 10: 'Timestamp',
-      Interval: 11, 11: 'Interval',
-      List: 12, 12: 'List',
-      Struct_: 13, 13: 'Struct_',
-      Union: 14, 14: 'Union',
-      FixedSizeBinary: 15, 15: 'FixedSizeBinary',
-      FixedSizeList: 16, 16: 'FixedSizeList',
-      Map: 17, 17: 'Map'
-    };
-
-    /**
-     * ----------------------------------------------------------------------
-     * The possible types of a vector
-     *
-     * @enum
-     */
-    org.apache.arrow.flatbuf.VectorType = {
-      /**
-       * used in List type, Dense Union and variable length primitive types (String, Binary)
-       */
-      OFFSET: 0, 0: 'OFFSET',
-
-      /**
-       * actual data, either wixed width primitive types in slots or variable width delimited by an OFFSET vector
-       */
-      DATA: 1, 1: 'DATA',
-
-      /**
-       * Bit vector indicating if each value is null
-       */
-      VALIDITY: 2, 2: 'VALIDITY',
-
-      /**
-       * Type vector used in Union type
-       */
-      TYPE: 3, 3: 'TYPE'
-    };
-
-    /**
-     * ----------------------------------------------------------------------
-     * Endianness of the platform producing the data
-     *
-     * @enum
-     */
-    org.apache.arrow.flatbuf.Endianness = {
-      Little: 0, 0: 'Little',
-      Big: 1, 1: 'Big',
-    };
-    ```
diff --git a/js/bin/file-to-stream.js b/js/bin/file-to-stream.js
@@ -0,0 +1,37 @@
+#! /usr/bin/env node
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+const fs = require('fs');
+const path = require('path');
+
+const encoding = 'binary';
+const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
+const { util: { PipeIterator } } = require(`../index${ext}`);
+const { Table, serializeStream, fromReadableStream } = require(`../index${ext}`);
+
+(async () => {
+    // Todo (ptaylor): implement `serializeStreamAsync` that accepts an
+    // AsyncIterable<Buffer>, rather than aggregating into a Table first
+    const in_ = process.argv.length < 3
+        ? process.stdin : fs.createReadStream(path.resolve(process.argv[2]));
+    const out = process.argv.length < 4
+        ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3]));
+    new PipeIterator(serializeStream(await Table.fromAsync(fromReadableStream(in_))), encoding).pipe(out);
+
+})().catch((e) => { console.error(e); process.exit(1); });