From cd7adabc46465ed525d3d2a071241a6c437e95b3 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Mon, 2 May 2016 05:31:48 +0200 Subject: [PATCH 1/5] buffer: fix needle length misestimation for UCS2 Use `StringBytes::Size` to determine the needle string length instead of assuming latin-1 or UTF-8. Previously, `Buffer.indexOf` could fail with an assertion failure when the needle's byte length, but not its character count, exceeded the haystack's byte length. --- src/node_buffer.cc | 4 ++-- test/parallel/test-buffer-indexof.js | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 67b469511b0f2d..30618719d27db4 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -995,9 +995,9 @@ void IndexOfString(const FunctionCallbackInfo& args) { const char* haystack = ts_obj_data; const size_t haystack_length = ts_obj_length; - // Extended latin-1 characters are 2 bytes in Utf8. + const size_t needle_length = - enc == BINARY ? needle->Length() : needle->Utf8Length(); + StringBytes::Size(args.GetIsolate(), needle, enc); if (needle_length == 0 || haystack_length == 0) { return args.GetReturnValue().Set(-1); diff --git a/test/parallel/test-buffer-indexof.js b/test/parallel/test-buffer-indexof.js index 0a20d2ce9af021..0aeb066f21ebbc 100644 --- a/test/parallel/test-buffer-indexof.js +++ b/test/parallel/test-buffer-indexof.js @@ -222,6 +222,12 @@ var allCharsBufferUcs2 = Buffer.from(allCharsString, 'ucs2'); assert.equal(-1, allCharsBufferUtf8.indexOf('notfound')); assert.equal(-1, allCharsBufferUcs2.indexOf('notfound')); +// Needle is longer than haystack, but only because it's encoded as UTF-16 +assert.strictEqual(Buffer.from('aaaa').indexOf('a'.repeat(4), 'ucs2'), -1); + +assert.strictEqual(Buffer.from('aaaa').indexOf('a'.repeat(4), 'utf8'), 0); +assert.strictEqual(Buffer.from('aaaa').indexOf('你好', 'ucs2'), -1); + { // Find substrings in Utf8. const lengths = [1, 3, 15]; // Single char, simple and complex. From ed3852093dbe0f654493970156e8abc01f8c89a4 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Mon, 2 May 2016 07:51:33 +0200 Subject: [PATCH 2/5] buffer: fix UCS2 indexOf for odd buffer length Fix `buffer.indexOf` for the case that the haystack has odd length and the needle is not found in it. `StringSearch()` would return the length of the buffer in multiples of `sizeof(uint16_t)`, but checking that against `haystack_length` would not work if the latter one was odd. --- src/node_buffer.cc | 4 +++- test/parallel/test-buffer-indexof.js | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 30618719d27db4..44020d91a16cc4 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -994,7 +994,9 @@ void IndexOfString(const FunctionCallbackInfo& args) { bool is_forward = args[4]->IsTrue(); const char* haystack = ts_obj_data; - const size_t haystack_length = ts_obj_length; + // Round down to the nearest multiple of 2 in case of UCS2. + const size_t haystack_length = (enc == UCS2) ? + ts_obj_length &~ 1 : ts_obj_length; // NOLINT(whitespace/operators) const size_t needle_length = StringBytes::Size(args.GetIsolate(), needle, enc); diff --git a/test/parallel/test-buffer-indexof.js b/test/parallel/test-buffer-indexof.js index 0aeb066f21ebbc..0cdce9cf258c77 100644 --- a/test/parallel/test-buffer-indexof.js +++ b/test/parallel/test-buffer-indexof.js @@ -228,6 +228,9 @@ assert.strictEqual(Buffer.from('aaaa').indexOf('a'.repeat(4), 'ucs2'), -1); assert.strictEqual(Buffer.from('aaaa').indexOf('a'.repeat(4), 'utf8'), 0); assert.strictEqual(Buffer.from('aaaa').indexOf('你好', 'ucs2'), -1); +// Haystack has odd length, but the needle is UCS2. +assert.strictEqual(Buffer.from('aaaaa').indexOf('b', 'ucs2'), -1); + { // Find substrings in Utf8. const lengths = [1, 3, 15]; // Single char, simple and complex. From 8ec2062099f1c2848d0d92ef05c371aaf7af6029 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Tue, 3 May 2016 03:51:52 +0200 Subject: [PATCH 3/5] src: fix FindFirstCharacter argument alignment --- src/string_search.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/string_search.h b/src/string_search.h index bf246702d7e75e..a9345eb8029278 100644 --- a/src/string_search.h +++ b/src/string_search.h @@ -252,7 +252,7 @@ inline const void* MemrchrFill(const void* haystack, uint8_t needle, // `subject`. Does not check that the whole pattern matches. template inline size_t FindFirstCharacter(Vector pattern, - Vector subject, size_t index) { + Vector subject, size_t index) { const Char pattern_first_char = pattern[0]; const size_t max_n = (subject.length() - pattern.length() + 1); From 4220cb929cc0d9cc57f6d8503e1481f0275f502d Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Mon, 2 May 2016 03:52:46 +0200 Subject: [PATCH 4/5] buffer: fix lastIndexOf crash for overlong needle Return -1 in `Buffer.lastIndexOf` if the needle is longer than the haystack. The previous check only tested the corresponding condition for forward searches. This applies only to Node.js v6, as `lastIndexOf` was added in it. Fixes: https://github.com/nodejs/node/issues/6510 --- src/node_buffer.cc | 6 ++++-- test/parallel/test-buffer-indexof.js | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 44020d91a16cc4..bb77387e8f87b7 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -1011,7 +1011,8 @@ void IndexOfString(const FunctionCallbackInfo& args) { } size_t offset = static_cast(opt_offset); CHECK_LT(offset, haystack_length); - if (is_forward && needle_length + offset > haystack_length) { + if ((is_forward && needle_length + offset > haystack_length) || + needle_length > haystack_length) { return args.GetReturnValue().Set(-1); } @@ -1113,7 +1114,8 @@ void IndexOfBuffer(const FunctionCallbackInfo& args) { } size_t offset = static_cast(opt_offset); CHECK_LT(offset, haystack_length); - if (is_forward && needle_length + offset > haystack_length) { + if ((is_forward && needle_length + offset > haystack_length) || + needle_length > haystack_length) { return args.GetReturnValue().Set(-1); } diff --git a/test/parallel/test-buffer-indexof.js b/test/parallel/test-buffer-indexof.js index 0cdce9cf258c77..205a8dfa6a5c6c 100644 --- a/test/parallel/test-buffer-indexof.js +++ b/test/parallel/test-buffer-indexof.js @@ -354,12 +354,35 @@ assert.equal(b.lastIndexOf('b', {}), 1); assert.equal(b.lastIndexOf('b', []), -1); assert.equal(b.lastIndexOf('b', [2]), 1); +// Test needles longer than the haystack. +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'ucs2'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'utf8'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'binary'), -1); +assert.strictEqual(b.lastIndexOf(Buffer.from('aaaaaaaaaaaaaaa')), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 2, 'ucs2'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 3, 'utf8'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 5, 'binary'), -1); +assert.strictEqual(b.lastIndexOf(Buffer.from('aaaaaaaaaaaaaaa'), 7), -1); + +// 你好 expands to a total of 6 bytes using UTF-8 and 4 bytes using UTF-16 +assert.strictEqual(buf_bc.lastIndexOf('你好', 'ucs2'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 'utf8'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 'binary'), -1); +assert.strictEqual(buf_bc.lastIndexOf(Buffer.from('你好')), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 2, 'ucs2'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 3, 'utf8'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 5, 'binary'), -1); +assert.strictEqual(buf_bc.lastIndexOf(Buffer.from('你好'), 7), -1); + // Test lastIndexOf on a longer buffer: var bufferString = new Buffer('a man a plan a canal panama'); assert.equal(15, bufferString.lastIndexOf('canal')); assert.equal(21, bufferString.lastIndexOf('panama')); assert.equal(0, bufferString.lastIndexOf('a man a plan a canal panama')); assert.equal(-1, bufferString.lastIndexOf('a man a plan a canal mexico')); +assert.equal(-1, bufferString.lastIndexOf('a man a plan a canal mexico city')); +assert.equal(-1, bufferString.lastIndexOf(Buffer.from('a'.repeat(1000)))); +assert.equal(0, bufferString.lastIndexOf('a man a plan', 4)); assert.equal(13, bufferString.lastIndexOf('a ')); assert.equal(13, bufferString.lastIndexOf('a ', 13)); assert.equal(6, bufferString.lastIndexOf('a ', 12)); From b8c7a016a26f8a63eb48e636a3e322ee1d75b834 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Tue, 3 May 2016 03:22:47 +0200 Subject: [PATCH 5/5] buffer: fix lastIndexOf index underflow issue Fix `buffer.lastIndexOf()` for the case that the first character of the needle is contained in the haystack, but in a location that makes it impossible to be part of a full match. For example, when searching for 'abc' in 'abcdef', only the 'cdef' part needs to be searched for 'c', because earlier locations can be excluded by index calculations alone. Previously, such a search would result in an assertion failure. This applies only to Node.js v6, as `lastIndexOf` was added in it. --- src/string_search.h | 12 +++++++----- test/parallel/test-buffer-indexof.js | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/string_search.h b/src/string_search.h index a9345eb8029278..0426fd840c0bd1 100644 --- a/src/string_search.h +++ b/src/string_search.h @@ -261,19 +261,19 @@ inline size_t FindFirstCharacter(Vector pattern, const uint8_t search_byte = GetHighestValueByte(pattern_first_char); size_t pos = index; do { - size_t bytes_to_search; + const size_t bytes_to_search = (max_n - pos) * sizeof(Char); const void* void_pos; if (subject.forward()) { // Assert that bytes_to_search won't overflow CHECK_LE(pos, max_n); CHECK_LE(max_n - pos, SIZE_MAX / sizeof(Char)); - bytes_to_search = (max_n - pos) * sizeof(Char); void_pos = memchr(subject.start() + pos, search_byte, bytes_to_search); } else { CHECK_LE(pos, subject.length()); CHECK_LE(subject.length() - pos, SIZE_MAX / sizeof(Char)); - bytes_to_search = (subject.length() - pos) * sizeof(Char); - void_pos = MemrchrFill(subject.start(), search_byte, bytes_to_search); + void_pos = MemrchrFill(subject.start() + pattern.length() - 1, + search_byte, + bytes_to_search); } const Char* char_pos = static_cast(void_pos); if (char_pos == nullptr) @@ -308,7 +308,9 @@ inline size_t FindFirstCharacter(Vector pattern, if (subject.forward()) { pos = memchr(subject.start() + index, pattern_first_char, max_n - index); } else { - pos = MemrchrFill(subject.start(), pattern_first_char, subj_len - index); + pos = MemrchrFill(subject.start() + pattern.length() - 1, + pattern_first_char, + max_n - index); } const uint8_t* char_pos = static_cast(pos); if (char_pos == nullptr) { diff --git a/test/parallel/test-buffer-indexof.js b/test/parallel/test-buffer-indexof.js index 205a8dfa6a5c6c..7d54ea0d47d0a1 100644 --- a/test/parallel/test-buffer-indexof.js +++ b/test/parallel/test-buffer-indexof.js @@ -391,6 +391,23 @@ assert.equal(13, bufferString.lastIndexOf('a ', -1)); assert.equal(0, bufferString.lastIndexOf('a ', -27)); assert.equal(-1, bufferString.lastIndexOf('a ', -28)); +// Test lastIndexOf for the case that the first character can be found, +// but in a part of the buffer that does not make search to search +// due do length constraints. +const abInUCS2 = Buffer.from('ab', 'ucs2'); +assert.strictEqual(-1, Buffer.from('µaaaa¶bbbb', 'binary').lastIndexOf('µ')); +assert.strictEqual(-1, Buffer.from('bc').lastIndexOf('ab')); +assert.strictEqual(-1, Buffer.from('abc').lastIndexOf('qa')); +assert.strictEqual(-1, Buffer.from('abcdef').lastIndexOf('qabc')); +assert.strictEqual(-1, Buffer.from('bc').lastIndexOf(Buffer.from('ab'))); +assert.strictEqual(-1, Buffer.from('bc', 'ucs2').lastIndexOf('ab', 'ucs2')); +assert.strictEqual(-1, Buffer.from('bc', 'ucs2').lastIndexOf(abInUCS2)); + +assert.strictEqual(0, Buffer.from('abc').lastIndexOf('ab')); +assert.strictEqual(0, Buffer.from('abc').lastIndexOf('ab', 1)); +assert.strictEqual(0, Buffer.from('abc').lastIndexOf('ab', 2)); +assert.strictEqual(0, Buffer.from('abc').lastIndexOf('ab', 3)); + // The above tests test the LINEAR and SINGLE-CHAR strategies. // Now, we test the BOYER-MOORE-HORSPOOL strategy. // Test lastIndexOf on a long buffer w multiple matches: