From 086cd5b5cef718743a0b77cc268cedf5e98ff166 Mon Sep 17 00:00:00 2001 From: Moshe Atlow Date: Sun, 9 Apr 2023 09:18:48 +0300 Subject: [PATCH 1/3] path: add `path.glob` --- LICENSE | 34 ++++++ lib/path.js | 10 ++ node.gyp | 1 + src/node_binding.cc | 1 + src/node_external_reference.h | 7 ++ src/node_path.cc | 131 ++++++++++++++++++++++++ test/parallel/test-bootstrap-modules.js | 1 + test/parallel/test-path-glob.mjs | 96 +++++++++++++++++ tools/license-builder.sh | 3 + 9 files changed, 284 insertions(+) create mode 100644 src/node_path.cc create mode 100644 test/parallel/test-path-glob.mjs diff --git a/LICENSE b/LICENSE index 26221cb042bdd1..2db83ba39778d8 100644 --- a/LICENSE +++ b/LICENSE @@ -2162,3 +2162,37 @@ The externally maintained libraries used by Node.js are: NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ + +- glob(7), located at src/node_path,cc, is licensed as follows: + """ + Valid-License-Identifier: MIT + SPDX-URL: https://spdx.org/licenses/MIT.html + Usage-Guide: + To use the MIT License put the following SPDX tag/value pair into a + comment according to the placement guidelines in the licensing rules + documentation: + SPDX-License-Identifier: MIT + License-Text: + + MIT License + + Copyright (c) + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + """ diff --git a/lib/path.js b/lib/path.js index 625b6261042940..1878d0d37f9cca 100644 --- a/lib/path.js +++ b/lib/path.js @@ -31,6 +31,8 @@ const { StringPrototypeToLowerCase, } = primordials; +const { glob: _glob } = internalBinding('path'); + const { CHAR_UPPERCASE_A, CHAR_LOWERCASE_A, @@ -153,6 +155,12 @@ function _format(sep, pathObject) { return dir === pathObject.root ? `${dir}${base}` : `${dir}${sep}${base}`; } +function glob(pattern, name) { + validateString(pattern, 'pattern'); + validateString(name, 'name'); + return _glob(pattern, name); +} + const win32 = { /** * path.resolve([from ...], to) @@ -1064,6 +1072,7 @@ const win32 = { return ret; }, + glob, sep: '\\', delimiter: ';', @@ -1530,6 +1539,7 @@ const posix = { return ret; }, + glob, sep: '/', delimiter: ':', diff --git a/node.gyp b/node.gyp index 601c458421628a..09890c8fc2fdb5 100644 --- a/node.gyp +++ b/node.gyp @@ -106,6 +106,7 @@ 'src/node_metadata.cc', 'src/node_options.cc', 'src/node_os.cc', + 'src/node_path.cc', 'src/node_perf.cc', 'src/node_platform.cc', 'src/node_postmortem_metadata.cc', diff --git a/src/node_binding.cc b/src/node_binding.cc index 90855aada5dab9..93be229d779173 100644 --- a/src/node_binding.cc +++ b/src/node_binding.cc @@ -52,6 +52,7 @@ V(mksnapshot) \ V(options) \ V(os) \ + V(path) \ V(performance) \ V(permission) \ V(pipe_wrap) \ diff --git a/src/node_external_reference.h b/src/node_external_reference.h index b2a90ba5194316..bda4a53de1f3a6 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -13,6 +13,11 @@ namespace node { using CFunctionCallbackWithOneByteString = uint32_t (*)(v8::Local, const v8::FastOneByteString&); using CFunctionCallback = void (*)(v8::Local receiver); +using CFunctionCallbackWithTwoOneByteStringsReturningBool = + bool (*)(v8::Local, + const v8::FastOneByteString&, + const v8::FastOneByteString&); +using CFunctionCallback = void (*)(v8::Local receiver); using CFunctionCallbackReturnDouble = double (*)(v8::Local receiver); using CFunctionCallbackWithInt64 = void (*)(v8::Local receiver, @@ -29,6 +34,7 @@ class ExternalReferenceRegistry { #define ALLOWED_EXTERNAL_REFERENCE_TYPES(V) \ V(CFunctionCallback) \ V(CFunctionCallbackWithOneByteString) \ + V(CFunctionCallbackWithTwoOneByteStringsReturningBool) \ V(CFunctionCallbackReturnDouble) \ V(CFunctionCallbackWithInt64) \ V(CFunctionCallbackWithBool) \ @@ -90,6 +96,7 @@ class ExternalReferenceRegistry { V(module_wrap) \ V(options) \ V(os) \ + V(path) \ V(performance) \ V(permission) \ V(process_methods) \ diff --git a/src/node_path.cc b/src/node_path.cc new file mode 100644 index 00000000000000..dacdf8bc528529 --- /dev/null +++ b/src/node_path.cc @@ -0,0 +1,131 @@ +#include "env-inl.h" +#include "node_errors.h" +#include "node_external_reference.h" +#include "util-inl.h" +#include "v8-fast-api-calls.h" + +namespace node { + +namespace path { +using v8::Context; +using v8::FunctionCallbackInfo; +using v8::Local; +using v8::Object; +using v8::Value; + +// extracted from +// https://github.com/torvalds/linux/blob/cdc9718d5e590d6905361800b938b93f2b66818e/lib/glob.c +bool glob(char const* pat, char const* str) { + /* + * Backtrack to previous * on mismatch and retry starting one + * character later in the string. Because * matches all characters + * (no exception for /), it can be easily proved that there's + * never a need to backtrack multiple levels. + */ + char const* back_pat = nullptr; + char const* back_str = nullptr; + + /* + * Loop over each token (character or class) in pat, matching + * it against the remaining unmatched tail of str. Return false + * on mismatch, or true after matching the trailing nul bytes. + */ + for (;;) { + unsigned char c = *str++; + unsigned char d = *pat++; + + switch (d) { + case '?': /* Wildcard: anything but nul */ + if (c == '\0') return false; + break; + case '*': /* Any-length wildcard */ + if (*pat == '\0') /* Optimize trailing * case */ + return true; + back_pat = pat; + back_str = --str; /* Allow zero-length match */ + break; + case '[': { /* Character class */ + bool match = false, inverted = (*pat == '!'); + char const* cls = pat + inverted; + unsigned char a = *cls++; + + /* + * Iterate over each span in the character class. + * A span is either a single character a, or a + * range a-b. The first span may begin with ']'. + */ + do { + unsigned char b = a; + + if (a == '\0') /* Malformed */ + goto literal; + + if (cls[0] == '-' && cls[1] != ']') { + b = cls[1]; + + if (b == '\0') goto literal; + + cls += 2; + /* Any special action if a > b? */ + } + match |= (a <= c && c <= b); + } while ((a = *cls++) != ']'); + + if (match == inverted) goto backtrack; + pat = cls; + } break; + case '\\': + d = *pat++; + [[fallthrough]]; + default: /* Literal character */ + literal: + if (c == d) { + if (d == '\0') return true; + break; + } + backtrack: + if (c == '\0' || !back_pat) return false; /* No point continuing */ + /* Try again from last *, one character later in str. */ + pat = back_pat; + str = ++back_str; + break; + } + } +} + +void SlowGlob(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 2); + CHECK(args[0]->IsString()); + CHECK(args[1]->IsString()); + + std::string pattern = Utf8Value(env->isolate(), args[0]).ToString(); + std::string str = Utf8Value(env->isolate(), args[1]).ToString(); + args.GetReturnValue().Set(glob(pattern.c_str(), str.c_str())); +} +bool FastGlob(Local receiver, + const v8::FastOneByteString& pattern, + const v8::FastOneByteString& str) { + return glob(pattern.data, str.data); +} + +v8::CFunction fast_glob_(v8::CFunction::Make(FastGlob)); + +void Initialize(Local target, + Local unused, + Local context, + void* priv) { + SetFastMethod(context, target, "glob", SlowGlob, &fast_glob_); +} + +void RegisterExternalReferences(ExternalReferenceRegistry* registry) { + registry->Register(SlowGlob); + registry->Register(FastGlob); + registry->Register(fast_glob_.GetTypeInfo()); +} +} // namespace path + +} // namespace node + +NODE_BINDING_CONTEXT_AWARE_INTERNAL(path, node::path::Initialize) +NODE_BINDING_EXTERNAL_REFERENCE(path, node::path::RegisterExternalReferences) diff --git a/test/parallel/test-bootstrap-modules.js b/test/parallel/test-bootstrap-modules.js index 3a8cbfd0b69d28..c3517e4663db5c 100644 --- a/test/parallel/test-bootstrap-modules.js +++ b/test/parallel/test-bootstrap-modules.js @@ -47,6 +47,7 @@ const expectedModules = new Set([ 'NativeModule internal/process/task_queues', 'NativeModule timers', 'Internal Binding trace_events', + 'Internal Binding path', 'NativeModule internal/constants', 'NativeModule path', 'NativeModule internal/process/execution', diff --git a/test/parallel/test-path-glob.mjs b/test/parallel/test-path-glob.mjs new file mode 100644 index 00000000000000..952dcad1f71a21 --- /dev/null +++ b/test/parallel/test-path-glob.mjs @@ -0,0 +1,96 @@ +import '../common/index.mjs'; +import { describe, it } from 'node:test'; +import * as assert from 'node:assert'; +import * as path from 'node:path'; + + +// https://github.com/torvalds/linux/blob/cdc9718d5e590d6905361800b938b93f2b66818e/lib/globtest.c +const patterns = [ + { expected: true, pattern: 'a', name: 'a' }, + { expected: false, pattern: 'a', name: 'b' }, + { expected: false, pattern: 'a', name: 'aa' }, + { expected: false, pattern: 'a', name: '' }, + { expected: true, pattern: '', name: '' }, + { expected: false, pattern: '', name: 'a' }, + /* Simple character class tests */ + { expected: true, pattern: '[a]', name: 'a' }, + { expected: false, pattern: '[a]', name: 'b' }, + { expected: false, pattern: '[!a]', name: 'a' }, + { expected: true, pattern: '[!a]', name: 'b' }, + { expected: true, pattern: '[ab]', name: 'a' }, + { expected: true, pattern: '[ab]', name: 'b' }, + { expected: false, pattern: '[ab]', name: 'c' }, + { expected: true, pattern: '[!ab]', name: 'c' }, + { expected: true, pattern: '[a-c]', name: 'b' }, + { expected: false, pattern: '[a-c]', name: 'd' }, + /* Corner cases in character class parsing */ + { expected: true, pattern: '[a-c-e-g]', name: '-' }, + { expected: false, pattern: '[a-c-e-g]', name: 'd' }, + { expected: true, pattern: '[a-c-e-g]', name: 'f' }, + { expected: true, pattern: '[]a-ceg-ik[]', name: 'a' }, + { expected: true, pattern: '[]a-ceg-ik[]', name: ']' }, + { expected: true, pattern: '[]a-ceg-ik[]', name: '[' }, + { expected: true, pattern: '[]a-ceg-ik[]', name: 'h' }, + { expected: false, pattern: '[]a-ceg-ik[]', name: 'f' }, + { expected: false, pattern: '[!]a-ceg-ik[]', name: 'h' }, + { expected: false, pattern: '[!]a-ceg-ik[]', name: ']' }, + { expected: true, pattern: '[!]a-ceg-ik[]', name: 'f' }, + /* Simple wild cards */ + { expected: true, pattern: '?', name: 'a' }, + { expected: false, pattern: '?', name: 'aa' }, + { expected: false, pattern: '??', name: 'a' }, + { expected: true, pattern: '?x?', name: 'axb' }, + { expected: false, pattern: '?x?', name: 'abx' }, + { expected: false, pattern: '?x?', name: 'xab' }, + /* Asterisk wild cards (backtracking) */ + { expected: false, pattern: '*??', name: 'a' }, + { expected: true, pattern: '*??', name: 'ab' }, + { expected: true, pattern: '*??', name: 'abc' }, + { expected: true, pattern: '*??', name: 'abcd' }, + { expected: false, pattern: '??*', name: 'a' }, + { expected: true, pattern: '??*', name: 'ab' }, + { expected: true, pattern: '??*', name: 'abc' }, + { expected: true, pattern: '??*', name: 'abcd' }, + { expected: false, pattern: '?*?', name: 'a' }, + { expected: true, pattern: '?*?', name: 'ab' }, + { expected: true, pattern: '?*?', name: 'abc' }, + { expected: true, pattern: '?*?', name: 'abcd' }, + { expected: true, pattern: '*b', name: 'b' }, + { expected: true, pattern: '*b', name: 'ab' }, + { expected: false, pattern: '*b', name: 'ba' }, + { expected: true, pattern: '*b', name: 'bb' }, + { expected: true, pattern: '*b', name: 'abb' }, + { expected: true, pattern: '*b', name: 'bab' }, + { expected: true, pattern: '*bc', name: 'abbc' }, + { expected: true, pattern: '*bc', name: 'bc' }, + { expected: true, pattern: '*bc', name: 'bbc' }, + { expected: true, pattern: '*bc', name: 'bcbc' }, + /* Multiple asterisks (complex backtracking) */ + { expected: true, pattern: '*ac*', name: 'abacadaeafag' }, + { expected: true, pattern: '*ac*ae*ag*', name: 'abacadaeafag' }, + { expected: true, pattern: '*a*b*[bc]*[ef]*g*', name: 'abacadaeafag' }, + { expected: false, pattern: '*a*b*[ef]*[cd]*g*', name: 'abacadaeafag' }, + { expected: true, pattern: '*abcd*', name: 'abcabcabcabcdefg' }, + { expected: true, pattern: '*ab*cd*', name: 'abcabcabcabcdefg' }, + { expected: true, pattern: '*abcd*abcdef*', name: 'abcabcdabcdeabcdefg' }, + { expected: false, pattern: '*abcd*', name: 'abcabcabcabcefg' }, + { expected: false, pattern: '*ab*cd*', name: 'abcabcabcabcefg' }, +]; + +const invalid = [null, undefined, 1, Number.MAX_SAFE_INTEGER, true, false, Symbol(), {}, [], () => {}]; + +describe('path.glob', () => { + for (const { expected, pattern, name } of patterns) { + it(`pattern "${pattern}" should ${expected ? '' : 'not '}match "${name}"`, () => { + assert.strictEqual(path.glob(pattern, name), expected); + }); + } + + for (const x of invalid) { + const name = typeof x === 'symbol' ? 'Symnol()' : x; + it(`${name} should throw as a parameter`, () => { + assert.throws(() => path.glob(x, ''), { code: 'ERR_INVALID_ARG_TYPE' }); + assert.throws(() => path.glob('', x), { code: 'ERR_INVALID_ARG_TYPE' }); + }); + } +}); diff --git a/tools/license-builder.sh b/tools/license-builder.sh index 1b52a473a15bf2..112be4a51e2958 100755 --- a/tools/license-builder.sh +++ b/tools/license-builder.sh @@ -146,4 +146,7 @@ addlicense "node-fs-extra" "lib/internal/fs/cp" "$licenseText" addlicense "base64" "deps/base64/base64/" "$(cat "${rootdir}/deps/base64/base64/LICENSE" || true)" +licenseText="$(curl -sL https://raw.githubusercontent.com/torvalds/linux/09a9639e56c01c7a00d6c0ca63f4c7c41abe075d/LICENSES/preferred/MIT)" +addlicense "glob(7)" "src/node_path,cc" "$licenseText" + mv "$tmplicense" "$licensefile" From 577b74fa8a5632635b6c8cbbd6f2f248969538c0 Mon Sep 17 00:00:00 2001 From: Moshe Atlow Date: Mon, 10 Apr 2023 00:15:02 +0300 Subject: [PATCH 2/3] docs --- doc/api/path.md | 23 +++++++++++++++++++++++ lib/path.js | 6 +++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/doc/api/path.md b/doc/api/path.md index d58fc044476bb9..e27c4a7f3c0bc4 100644 --- a/doc/api/path.md +++ b/doc/api/path.md @@ -279,6 +279,29 @@ path.format({ // Returns: 'C:\\path\\dir\\file.txt' ``` +## `path.glob(pattenrn, path)` + + + +> Stability: 1 - Experimental + +* `pattern` {string} The glob pattern to match against. +* `path` {string} The path to test against the glob pattern. +* Returns: {boolean} + +The `path.glob()` method returns `true` if the `path` matches the glob +`pattern`, otherwise `false`. +Refer to the POSIX glob(7) documentation for more detail. + +```js +path.glob('*.js', 'foo.js'); +// Returns: true +path.glob('*.js', 'foo.json'); +// Returns: false +``` + ## `path.isAbsolute(path)`