From bdcb92089f0d96ebdbbe35cee9f9de2ffc4ba436 Mon Sep 17 00:00:00 2001 From: fengmk2 Date: Wed, 2 Aug 2017 22:58:47 +0800 Subject: [PATCH 1/5] feat: improve putRawString performance ignore using Buffer.byteLength(str) use big memory to exchange better performence 10000 * 10 from 869ms down to 489ms --- benchmark/putRawString.js | 40 +++++++++++++++++++++++++++++++++++++++ lib/byte.js | 23 ++++++---------------- package.json | 1 + 3 files changed, 47 insertions(+), 17 deletions(-) create mode 100644 benchmark/putRawString.js diff --git a/benchmark/putRawString.js b/benchmark/putRawString.js new file mode 100644 index 0000000..b8a93a4 --- /dev/null +++ b/benchmark/putRawString.js @@ -0,0 +1,40 @@ +'use strict'; + +var bench = require('fastbench'); +var ByteBuffer = require('..'); + +var bb = ByteBuffer.allocate(1024); +var max = 10; + +bb.putRawString(makeStr('a', 200), '2.0'); +console.log('bytes %s', bb.array().length); +bb.reset(); + +var run = bench([ + function putRawString(cb) { + for (var i = 0; i < max; i++) { + bb.putRawString(makeStr('a', 200), '2.0'); + } + bb.array(); + bb.reset(); + setImmediate(cb); + }, +], 10000); + +run(run); + +function makeStr(str, concats) { + var s = '' + while (concats--) { + s += str + } + return s +} + +// before: +// putRawString*10000: 912.743ms +// putRawString*10000: 869.517ms + +// after: +// putRawString*10000: 502.805ms +// putRawString*10000: 489.996ms diff --git a/lib/byte.js b/lib/byte.js index 9bc6bfc..a9c3ac4 100644 --- a/lib/byte.js +++ b/lib/byte.js @@ -1,19 +1,4 @@ -/*! - * byte - lib/byte.js - * - * Copyright(c) 2013 - 2014 - * MIT Licensed - * - * Authors: - * fengmk2 (http://fengmk2.github.com) - * dead-horse (https://github.com/dead-horse) - */ - -"use strict"; - -/** - * Module dependencies. - */ +'use strict'; var Long = require('long'); var debug = require('debug')('byte'); @@ -359,7 +344,11 @@ ByteBuffer.prototype.putRawString = function (index, str) { index = this._offset; // Note that an UTF-8 encoder will encode a character that is outside BMP // as 4 bytes, yet a CESU-8 encoder will encode as 6 bytes, ergo 6 / 4 = 1.5 - this._checkSize(this._offset + Math.ceil(Buffer.byteLength(str) * 1.5)); + // this._checkSize(this._offset + Math.ceil(Buffer.byteLength(str) * 1.5)); + + // use big memory to exchange better performence + // one char => max bytes is 6 + this._checkSize(this._offset + str.length * 6); } if (!str || str.length === 0) { diff --git a/package.json b/package.json index c015212..ce739c7 100644 --- a/package.json +++ b/package.json @@ -26,6 +26,7 @@ "beautify-benchmark": "0", "benchmark": "1", "contributors": "*", + "fastbench": "^1.0.1", "istanbul": "*", "jshint": "*", "mocha": "*", From 5698ebc0886ce4d590359b8298fa800ed793b313 Mon Sep 17 00:00:00 2001 From: fengmk2 Date: Wed, 2 Aug 2017 23:24:43 +0800 Subject: [PATCH 2/5] f --- benchmark/putRawString.js | 32 ++++++++++++++++++++++++-------- lib/byte.js | 14 ++++++++++++-- test/byte.test.js | 15 ++------------- 3 files changed, 38 insertions(+), 23 deletions(-) diff --git a/benchmark/putRawString.js b/benchmark/putRawString.js index b8a93a4..9c328de 100644 --- a/benchmark/putRawString.js +++ b/benchmark/putRawString.js @@ -3,22 +3,38 @@ var bench = require('fastbench'); var ByteBuffer = require('..'); +var largeStr = JSON.stringify(require('../package.json')) +largeStr += largeStr +largeStr += largeStr + var bb = ByteBuffer.allocate(1024); var max = 10; -bb.putRawString(makeStr('a', 200), '2.0'); -console.log('bytes %s', bb.array().length); +bb.putRawString(makeStr('a', 200)); +console.log('small bytes %s', bb.array().length); +bb.reset(); + +bb.putRawString(makeStr(largeStr, 10)); +console.log('large bytes %s', bb.array().length); bb.reset(); var run = bench([ - function putRawString(cb) { + function putRawStringSmall(cb) { for (var i = 0; i < max; i++) { - bb.putRawString(makeStr('a', 200), '2.0'); + bb.putRawString(makeStr('a', 200)); } bb.array(); bb.reset(); setImmediate(cb); }, + // function putRawStringLarge(cb) { + // for (var i = 0; i < max; i++) { + // bb.putRawString(makeStr(largeStr, 10)); + // } + // bb.array(); + // bb.reset(); + // setImmediate(cb); + // }, ], 10000); run(run); @@ -32,9 +48,9 @@ function makeStr(str, concats) { } // before: -// putRawString*10000: 912.743ms -// putRawString*10000: 869.517ms +// putRawStringSmall*10000: 912.743ms +// putRawStringSmall*10000: 869.517ms // after: -// putRawString*10000: 502.805ms -// putRawString*10000: 489.996ms +// putRawStringSmall*10000: 502.805ms +// putRawStringSmall*10000: 489.996ms diff --git a/lib/byte.js b/lib/byte.js index a9c3ac4..22b6908 100644 --- a/lib/byte.js +++ b/lib/byte.js @@ -9,6 +9,7 @@ var DEFAULT_SIZE = 1024; var BIG_ENDIAN = 1; var LITTLE_ENDIAN = 2; var MAX_INT_31 = Math.pow(2, 31); +var ONE_HUNDRED_MB = 100 * 1024 * 1024; function ByteBuffer(options) { options = options || {}; @@ -56,7 +57,12 @@ ByteBuffer.prototype._checkSize = function (afterSize) { this._size = afterSize * 2; this._limit = this._size; debug('allocate new Buffer: from %d to %d bytes', old, this._size); - var bytes = new Buffer(this._size); + var bytes; + if (Buffer.allocUnsafe) { + bytes = Buffer.allocUnsafe(this._size); + } else { + bytes = new Buffer(this._size); + } this._bytes.copy(bytes, 0); this._bytes = bytes; }; @@ -348,7 +354,11 @@ ByteBuffer.prototype.putRawString = function (index, str) { // use big memory to exchange better performence // one char => max bytes is 6 - this._checkSize(this._offset + str.length * 6); + var maxSize = str.length * 6; + if (maxSize > ONE_HUNDRED_MB) { + maxSize = Math.ceil(Buffer.byteLength(str) * 1.5); + } + this._checkSize(this._offset + maxSize); } if (!str || str.length === 0) { diff --git a/test/byte.test.js b/test/byte.test.js index e63572b..402cff2 100644 --- a/test/byte.test.js +++ b/test/byte.test.js @@ -1,15 +1,4 @@ -/*! - * byte - test/byte.test.js - * - * Copyright(c) 2013 - 2014 - * MIT Licensed - * - * Authors: - * fengmk2 (http://fengmk2.github.com) - * dead-horse (https://github.com/dead-horse) - */ - -"use strict"; +'use strict'; var Long = require('long'); var assert = require('assert'); @@ -514,7 +503,7 @@ describe('byte.test.js', function () { bytes.putRawString(str); assert(bytes.toString() === ''); assert.deepEqual(bytes.getRawString(0, 12), str); - + // Construction of a special test case which triggers the bug // of allocating insufficient space via _checkSize var bytes = ByteBuffer.allocate(4); From bfcda063ed95072f08f386b0f531ec51a60986c7 Mon Sep 17 00:00:00 2001 From: fengmk2 Date: Thu, 3 Aug 2017 06:43:13 +0800 Subject: [PATCH 3/5] f --- lib/byte.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/byte.js b/lib/byte.js index 22b6908..b084557 100644 --- a/lib/byte.js +++ b/lib/byte.js @@ -354,11 +354,11 @@ ByteBuffer.prototype.putRawString = function (index, str) { // use big memory to exchange better performence // one char => max bytes is 6 - var maxSize = str.length * 6; - if (maxSize > ONE_HUNDRED_MB) { - maxSize = Math.ceil(Buffer.byteLength(str) * 1.5); + var maxIncreaseSize = str.length * 6; + if (maxIncreaseSize > ONE_HUNDRED_MB) { + maxIncreaseSize = Math.ceil(Buffer.byteLength(str) * 1.5); } - this._checkSize(this._offset + maxSize); + this._checkSize(this._offset + maxIncreaseSize); } if (!str || str.length === 0) { From 49296baefc71471fe6a037fa53f67ca612ca4825 Mon Sep 17 00:00:00 2001 From: fengmk2 Date: Thu, 3 Aug 2017 07:15:22 +0800 Subject: [PATCH 4/5] f --- lib/byte.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/byte.js b/lib/byte.js index b084557..f8cbea2 100644 --- a/lib/byte.js +++ b/lib/byte.js @@ -350,21 +350,23 @@ ByteBuffer.prototype.putRawString = function (index, str) { index = this._offset; // Note that an UTF-8 encoder will encode a character that is outside BMP // as 4 bytes, yet a CESU-8 encoder will encode as 6 bytes, ergo 6 / 4 = 1.5 + // @see https://en.wikipedia.org/wiki/CESU-8 // this._checkSize(this._offset + Math.ceil(Buffer.byteLength(str) * 1.5)); // use big memory to exchange better performence - // one char => max bytes is 6 - var maxIncreaseSize = str.length * 6; + // one char => max bytes is 3 + var maxIncreaseSize = str.length * 3; if (maxIncreaseSize > ONE_HUNDRED_MB) { maxIncreaseSize = Math.ceil(Buffer.byteLength(str) * 1.5); } this._checkSize(this._offset + maxIncreaseSize); } - if (!str || str.length === 0) { + var len = str && str.length; + if (!len) { return this; } - for (var i = 0, len = str.length; i < len; i++) { + for (var i = 0; i < len; i++) { var ch = str.charCodeAt(i); if (ch < 0x80) { this._bytes[index++] = ch >>> 32; From 590feb8b017155e6f200c023c28253d56f53dadb Mon Sep 17 00:00:00 2001 From: fengmk2 Date: Thu, 3 Aug 2017 08:54:55 +0800 Subject: [PATCH 5/5] f --- lib/byte.js | 12 +++- package.json | 8 ++- test/byte.test.js | 143 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+), 4 deletions(-) diff --git a/lib/byte.js b/lib/byte.js index f8cbea2..f29e6cf 100644 --- a/lib/byte.js +++ b/lib/byte.js @@ -362,15 +362,25 @@ ByteBuffer.prototype.putRawString = function (index, str) { this._checkSize(this._offset + maxIncreaseSize); } + // CESU-8 Bit Distribution + // @see http://www.unicode.org/reports/tr26/ + // + // UTF-16 Code Unit | 1st Byte | 2nd Byte | 3rd Byte + // 000000000xxxxxxx (0x0000 ~ 0x007f) | 0xxxxxxx (0x00 ~ 0x7f) | | + // 00000yyyyyxxxxxx (0x0080 ~ 0x07ff) | 110yyyyy (0xc0 ~ 0xdf) | 10xxxxxx (0x80 ~ 0xbf) | + // zzzzyyyyyyxxxxxx (0x0800 ~ 0xffff) | 1110zzzz (0xe0 ~ 0xef) | 10yyyyyy (0x80 ~ 0xbf) | 10xxxxxx (0x80 ~ 0xbf) + var len = str && str.length; if (!len) { return this; } for (var i = 0; i < len; i++) { var ch = str.charCodeAt(i); + // 0x80: 128 if (ch < 0x80) { - this._bytes[index++] = ch >>> 32; + this._bytes[index++] = ch; } else if (ch < 0x800) { + // 0x800: 2048 this._bytes[index++] = (0xc0 + ((ch >> 6) & 0x1f)) >>> 32; this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32; } else { diff --git a/package.json b/package.json index ce739c7..9d96280 100644 --- a/package.json +++ b/package.json @@ -4,10 +4,10 @@ "description": "Input Buffer and Output Buffer, just like Java ByteBuffer", "main": "lib/byte.js", "files": [ - "lib/" + "lib" ], "scripts": { - "test": "mocha -R spec -t 5000 test/*.test.js", + "test": "mocha --require intelli-espower-loader -R spec -t 5000 test/*.test.js", "test-cov": "node node_modules/.bin/istanbul cover node_modules/.bin/_mocha -- -t 5000 test/*.test.js", "test-travis": "node node_modules/.bin/istanbul cover node_modules/.bin/_mocha --report lcovonly -- -t 5000 test/*.test.js", "jshint": "jshint .", @@ -27,10 +27,12 @@ "benchmark": "1", "contributors": "*", "fastbench": "^1.0.1", + "intelli-espower-loader": "^1.0.1", "istanbul": "*", "jshint": "*", "mocha": "*", - "optimized": "^1.2.0" + "optimized": "^1.2.0", + "power-assert": "^1.4.4" }, "homepage": "https://github.com/node-modules/byte", "repository": { diff --git a/test/byte.test.js b/test/byte.test.js index 402cff2..d3cb1ba 100644 --- a/test/byte.test.js +++ b/test/byte.test.js @@ -483,6 +483,149 @@ describe('byte.test.js', function () { assert(bytes.toString() === ''); }); + it('should 000000000xxxxxxx (0x0000 ~ 0x007f) => 0xxxxxxx (0x00 ~ 0x7f)', function() { + // UTF-8 + var bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0x0000)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0x0000)); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0x0001)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0x0001)); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString('E'); // 0x45 + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString('E'); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0x7F)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0x7F)); + assert(bytes.toString() === ''); + }); + + it('should 00000yyyyyxxxxxx (0x0080 ~ 0x07ff) => 110yyyyy (0xc0 ~ 0xdf) | 10xxxxxx (0x80 ~ 0xbf)', function() { + // UTF-8 + var bytes = ByteBuffer.allocate(1); + bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0x80)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0x80)); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString('ȅ'); // 0x0205: 517 + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString('ȅ'); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0x81)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0x81)); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0x7FE)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0x7FE)); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0x7FF)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0x7FF)); + assert(bytes.toString() === ''); + }); + + it('should zzzzyyyyyyxxxxxx (0x0800 ~ 0xffff) => 1110zzzz (0xe0 ~ 0xef) | 10yyyyyy (0x80 ~ 0xbf) | 10xxxxxx (0x80 ~ 0xbf)', function() { + // UTF-8 + var bytes = ByteBuffer.allocate(1); + bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0x800)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0x800)); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0x801)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0x801)); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString('𐐀'); // 0xD801 0xDC00 + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString('𐐀'); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString('\ud801\udc01'); // 0xD801 0xDC01 + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString('\ud801\udc01'); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0xFFFE)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0xFFFE)); + assert(bytes.toString() === ''); + + // UTF-8 + bytes = ByteBuffer.allocate(1); + bytes.putString(String.fromCharCode(0xFFFF)); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString(String.fromCharCode(0xFFFF)); + assert(bytes.toString() === ''); + }); + it('should put emoji', function () { // utf8 var bytes = ByteBuffer.allocate(1);