From 1aa688fb421fd243ac2786f286ba303f9e73d60a Mon Sep 17 00:00:00 2001 From: Superchupu <53496941+SuperchupuDev@users.noreply.github.com> Date: Fri, 17 Jan 2025 23:12:42 +0100 Subject: [PATCH 1/5] improve exclude optimizer Co-authored-by: Joachim Viide --- src/index.ts | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/src/index.ts b/src/index.ts index a9c63da..d7262d4 100644 --- a/src/index.ts +++ b/src/index.ts @@ -116,26 +116,17 @@ function processPatterns( const newPattern = normalizePattern(pattern, expandDirectories, cwd, properties, false); matchPatterns.push(newPattern); const split = newPattern.split('/'); - if (split[split.length - 1] === '**') { - if (split[split.length - 2] !== '..') { - split[split.length - 2] = '**'; - split.pop(); - } - transformed.push(split.length ? split.join('/') : '*'); - } else { - transformed.push(split.length > 1 ? split.slice(0, -1).join('/') : split.join('/')); - } - for (let i = split.length - 2; i > 0; i--) { - const part = split.slice(0, i); - if (part[part.length - 1] === '**') { - part.pop(); - if (part.length > 1) { - part.pop(); - } - } - transformed.push(part.join('/')); - } + transformed.push( + split + .map((part, index) => { + if (index === 0) { + return part; + } + return `?(/${part}`; + }) + .join('') + ')'.repeat(Math.max(split.length - 1, 0)) + ); } else if (pattern[1] !== '!' || pattern[2] === '(') { const newPattern = normalizePattern(pattern.slice(1), expandDirectories, cwd, properties, true); ignorePatterns.push(newPattern); @@ -147,7 +138,7 @@ function processPatterns( // TODO: this is slow, find a better way to do this function getRelativePath(path: string, cwd: string, root: string) { - return posix.relative(cwd, `${root}/${path}`); + return posix.relative(cwd, `${root}/${path}`) || '.'; } function processPath(path: string, cwd: string, root: string, isDirectory: boolean, absolute?: boolean) { From 9edfa0f7c36c5026870b76a3cb7d23462d2c5cc7 Mon Sep 17 00:00:00 2001 From: Superchupu <53496941+SuperchupuDev@users.noreply.github.com> Date: Sat, 18 Jan 2025 00:13:26 +0100 Subject: [PATCH 2/5] fix pattern splitting --- src/index.ts | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/index.ts b/src/index.ts index d7262d4..5a93ef6 100644 --- a/src/index.ts +++ b/src/index.ts @@ -54,20 +54,20 @@ function normalizePattern( properties.depthOffset = -(parentDirectoryMatch[0].length + 1) / 3; } } else if (!isIgnore && properties.depthOffset >= 0) { - const current = result.split('/'); - properties.commonPath ??= current; + const parts = splitPattern(result); + properties.commonPath ??= parts; const newCommonPath = []; - for (let i = 0; i < Math.min(properties.commonPath.length, current.length); i++) { - const part = current[i]; + for (let i = 0; i < Math.min(properties.commonPath.length, parts.length); i++) { + const part = parts[i]; - if (part === '**' && !current[i + 1]) { + if (part === '**' && !parts[i + 1]) { newCommonPath.pop(); break; } - if (part !== properties.commonPath[i] || isDynamicPattern(part) || i === current.length - 1) { + if (part !== properties.commonPath[i] || isDynamicPattern(part) || i === parts.length - 1) { break; } @@ -115,7 +115,7 @@ function processPatterns( if (!pattern.startsWith('!') || pattern[1] === '(') { const newPattern = normalizePattern(pattern, expandDirectories, cwd, properties, false); matchPatterns.push(newPattern); - const split = newPattern.split('/'); + const split = splitPattern(newPattern); transformed.push( split @@ -136,6 +136,12 @@ function processPatterns( return { match: matchPatterns, ignore: ignorePatterns, transformed }; } +// if a pattern has no slashes outside glob symbols, results.parts is [] +function splitPattern(path: string) { + const result = picomatch.scan(path, { parts: true }); + return result.parts?.length ? result.parts : [path]; +} + // TODO: this is slow, find a better way to do this function getRelativePath(path: string, cwd: string, root: string) { return posix.relative(cwd, `${root}/${path}`) || '.'; From fe3b3428fadaf6d91017e8239be68c6fd58990bb Mon Sep 17 00:00:00 2001 From: Superchupu <53496941+SuperchupuDev@users.noreply.github.com> Date: Wed, 29 Jan 2025 21:03:37 +0100 Subject: [PATCH 3/5] fix slashes inside parts by just not optimizing them didn't want to do this, but this pr is important and a release is being delayed too much. thankfully, it seems like almost no one uses patterns like that. any optimizations here are welcome --- src/index.ts | 33 +++++++++++++++++++++++---------- test/index.test.ts | 5 +++++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/index.ts b/src/index.ts index 5a93ef6..8eba5fc 100644 --- a/src/index.ts +++ b/src/index.ts @@ -117,16 +117,29 @@ function processPatterns( matchPatterns.push(newPattern); const split = splitPattern(newPattern); - transformed.push( - split - .map((part, index) => { - if (index === 0) { - return part; - } - return `?(/${part}`; - }) - .join('') + ')'.repeat(Math.max(split.length - 1, 0)) - ); + let brackets = 0; + let finalPattern = ''; + for (let i = 0; i < split.length; i++) { + const part = split[i]; + // we can't easily optimize patterns that contain parts with slashes such as `*(a/b)` + // so we just convert them to `**` for now. any optimizations here would are welcome + if (part.includes('/')) { + if (i > 0) { + brackets++; + } + finalPattern += '**'; + break; + } + + if (i === 0) { + finalPattern += part; + continue; + } + + brackets++; + finalPattern += `?(/${part}`; + } + transformed.push(finalPattern + ')'.repeat(Math.max(brackets, 0))); } else if (pattern[1] !== '!' || pattern[2] === '(') { const newPattern = normalizePattern(pattern.slice(1), expandDirectories, cwd, properties, true); ignorePatterns.push(newPattern); diff --git a/test/index.test.ts b/test/index.test.ts index bbe30bd..5f9e5a4 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -307,6 +307,11 @@ test('matching files with specific naming pattern', async () => { assert.deepEqual(files.sort(), ['a/a.txt', 'a/b.txt', 'b/a.txt', 'b/b.txt']); }); +test('dynamic patterns that include slashes inside parts', async () => { + const files = await glob({ patterns: ['{.a/a,a}/a.txt'], cwd }); + assert.deepEqual(files.sort(), ['.a/a/a.txt', 'a/a.txt']); +}); + test('using extglob patterns', async () => { const files = await glob({ patterns: ['a/*(a|b).txt'], cwd }); assert.deepEqual(files.sort(), ['a/a.txt', 'a/b.txt']); From 04356bff2e411c320414b8d376b5d44431274186 Mon Sep 17 00:00:00 2001 From: Superchupu <53496941+SuperchupuDev@users.noreply.github.com> Date: Fri, 14 Feb 2025 20:48:25 +0100 Subject: [PATCH 4/5] complete rework from the ground up --- src/index.ts | 47 +++------------- src/utils.ts | 61 +++++++++++++++++++- test/utils/partial-matcher.test.ts | 90 ++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 41 deletions(-) create mode 100644 test/utils/partial-matcher.test.ts diff --git a/src/index.ts b/src/index.ts index 8eba5fc..1fa2502 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,7 @@ import path, { posix } from 'node:path'; import { type Options as FdirOptions, fdir } from 'fdir'; import picomatch from 'picomatch'; -import { escapePath, isDynamicPattern } from './utils.ts'; +import { escapePath, getPartialMatcher, isDynamicPattern, splitPattern } from './utils.ts'; export interface GlobOptions { absolute?: boolean; @@ -110,49 +110,17 @@ function processPatterns( } } - const transformed: string[] = []; for (const pattern of patterns) { if (!pattern.startsWith('!') || pattern[1] === '(') { const newPattern = normalizePattern(pattern, expandDirectories, cwd, properties, false); matchPatterns.push(newPattern); - const split = splitPattern(newPattern); - - let brackets = 0; - let finalPattern = ''; - for (let i = 0; i < split.length; i++) { - const part = split[i]; - // we can't easily optimize patterns that contain parts with slashes such as `*(a/b)` - // so we just convert them to `**` for now. any optimizations here would are welcome - if (part.includes('/')) { - if (i > 0) { - brackets++; - } - finalPattern += '**'; - break; - } - - if (i === 0) { - finalPattern += part; - continue; - } - - brackets++; - finalPattern += `?(/${part}`; - } - transformed.push(finalPattern + ')'.repeat(Math.max(brackets, 0))); } else if (pattern[1] !== '!' || pattern[2] === '(') { const newPattern = normalizePattern(pattern.slice(1), expandDirectories, cwd, properties, true); ignorePatterns.push(newPattern); } } - return { match: matchPatterns, ignore: ignorePatterns, transformed }; -} - -// if a pattern has no slashes outside glob symbols, results.parts is [] -function splitPattern(path: string) { - const result = picomatch.scan(path, { parts: true }); - return result.parts?.length ? result.parts : [path]; + return { match: matchPatterns, ignore: ignorePatterns }; } // TODO: this is slow, find a better way to do this @@ -196,10 +164,9 @@ function crawl(options: GlobOptions, cwd: string, sync: boolean) { nocase: options.caseSensitiveMatch === false }); - const exclude = picomatch('*(../)**', { - dot: true, - nocase: options.caseSensitiveMatch === false, - ignore: processed.transformed + const partialMatcher = getPartialMatcher(processed.match, { + dot: options.dot, + nocase: options.caseSensitiveMatch === false }); if (process.env.TINYGLOBBY_DEBUG) { @@ -225,7 +192,7 @@ function crawl(options: GlobOptions, cwd: string, sync: boolean) { exclude: options.debug ? (_, p) => { const relativePath = processPath(p, cwd, properties.root, true, true); - const skipped = ignore(relativePath) || exclude(relativePath); + const skipped = (relativePath !== '.' && !partialMatcher(relativePath)) || ignore(relativePath); if (!skipped) { console.log(`[tinyglobby ${new Date().toLocaleTimeString('es')}] crawling ${p}`); @@ -235,7 +202,7 @@ function crawl(options: GlobOptions, cwd: string, sync: boolean) { } : (_, p) => { const relativePath = processPath(p, cwd, properties.root, true, true); - return ignore(relativePath) || exclude(relativePath); + return (relativePath !== '.' && !partialMatcher(relativePath)) || ignore(relativePath); }, pathSeparator: '/', relativePaths: true, diff --git a/src/utils.ts b/src/utils.ts index 4e6b345..11b2076 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,4 +1,63 @@ -import picomatch from 'picomatch'; +import picomatch, { type Matcher } from 'picomatch'; + +// #region PARTIAL MATCHER +export interface PartialMatcherOptions { + dot?: boolean; + nocase?: boolean; +} + +// the result of over 4 months of figuring stuff out and a LOT of help +export function getPartialMatcher(patterns: string[], options?: PartialMatcherOptions): Matcher { + const regexes = patterns.map(pattern => splitPattern(pattern).map(part => picomatch.makeRe(part, options))); + return (input: string) => { + // no need to `splitPattern` as this is indeed not a pattern + const inputParts = input.split('/'); + for (let i = 0; i < patterns.length; i++) { + const patternParts = splitPattern(patterns[i]); + const regex = regexes[i]; + let j = 0; + while (j < inputParts.length) { + const part = patternParts[j]; + + // handling slashes in parts is very hard, not even fast-glob does it + // unlike fast-glob we should return true in this case + // for us, better to have a false positive than a false negative here + if (part.includes('/')) { + return true; + } + + const match = regex[j].test(inputParts[j]); + + if (!match) { + break; + } + + // unlike popular belief, `**` doesn't return true in *all* cases + // some examples are when matching it to `.a` with dot: false or `..` + // so it needs to match to return early + if (part === '**' && match) { + return true; + } + + j++; + } + if (j === inputParts.length) { + return true; + } + } + + return false; + }; +} +// #endregion + +// #region splitPattern +// if a pattern has no slashes outside glob symbols, results.parts is [] +export function splitPattern(path: string): string[] { + const result = picomatch.scan(path, { parts: true }); + return result.parts?.length ? result.parts : [path]; +} +// #endregion // #region convertPathToPattern const ESCAPED_WIN32_BACKSLASHES = /\\(?![()[\]{}!+@])/g; diff --git a/test/utils/partial-matcher.test.ts b/test/utils/partial-matcher.test.ts new file mode 100644 index 0000000..bb1bbfc --- /dev/null +++ b/test/utils/partial-matcher.test.ts @@ -0,0 +1,90 @@ +import assert from 'node:assert/strict'; +import test, { describe } from 'node:test'; +import { getPartialMatcher } from '../../src/utils.ts'; + +describe('getPartialMatcher', () => { + test('works with exact path', () => { + const matcher = getPartialMatcher(['test/utils/a']); + assert.ok(matcher('test/utils/a')); + }); + + test('works with partial path', () => { + const matcher = getPartialMatcher(['test/utils/a']); + assert.ok(matcher('test/utils')); + }); + + test("static pattern doesn't give false positives", () => { + const matcher = getPartialMatcher(['test/utils/a']); + assert.ok(!matcher('test/utils/b')); + assert.ok(!matcher('test/tests')); + assert.ok(!matcher('src')); + }); + + test('works with dynamic pattern', () => { + const matcher = getPartialMatcher(['test/util?/a']); + assert.ok(matcher('test/utils')); + }); + + test('works with brace expansion', () => { + const matcher = getPartialMatcher(['test/{utils,tests}/a']); + assert.ok(matcher('test/utils/a')); + assert.ok(matcher('test/tests/a')); + assert.ok(matcher('test/utils')); + assert.ok(matcher('test/tests')); + + assert.ok(!matcher('test/other/a')); + assert.ok(!matcher('test/other')); + }); + + test('works with **', () => { + const matcher = getPartialMatcher(['test/utils/**']); + assert.ok(matcher('test')); + assert.ok(matcher('test/utils')); + assert.ok(matcher('test/utils/a')); + assert.ok(!matcher('test/tests/a')); + }); + + test("** doesn't match ..", () => { + const matcher = getPartialMatcher(['**']); + assert.ok(!matcher('..')); + }); + + test('for now treats parts with / as **', () => { + const matcher = getPartialMatcher(['test/{utils/a,b}']); + assert.ok(matcher('test')); + assert.ok(matcher('test/utils')); + assert.ok(matcher('test/utils/a')); + + // only happens when treating it as ** + assert.ok(matcher('test/notutils')); + assert.ok(matcher('test/notutils/a')); + }); + + test('works with weird parentheses combinations', () => { + const matcher = getPartialMatcher(['test/utils/(a)']); + assert.ok(matcher('test/utils/a')); + assert.ok(matcher('test/utils')); + assert.ok(!matcher('test/utils/c')); + }); + + test('dot: true', () => { + const matcher = getPartialMatcher(['test/utils/*/c'], { dot: true }); + assert.ok(matcher('test/utils/a/c')); + assert.ok(matcher('test/utils/.a/c')); + assert.ok(matcher('test/utils')); + }); + + test('dot: false', () => { + const matcher = getPartialMatcher(['test/utils/*/c']); + assert.ok(matcher('test/utils/a/c')); + assert.ok(!matcher('test/utils/.a/c')); + assert.ok(matcher('test/utils')); + }); + + test('dot: false and **', () => { + const matcher = getPartialMatcher(['test/utils/**/c']); + assert.ok(matcher('test/utils/a/c')); + assert.ok(!matcher('test/utils/.a/c')); + assert.ok(matcher('test/utils')); + }); +}); From 6d5b0bb37377ffebeae59239e458097a1982ecbc Mon Sep 17 00:00:00 2001 From: Superchupu <53496941+SuperchupuDev@users.noreply.github.com> Date: Sat, 15 Feb 2025 00:07:49 +0100 Subject: [PATCH 5/5] do not overflow `patternParts` also add more partial tests --- src/utils.ts | 3 ++- test/utils/partial-matcher.test.ts | 33 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/utils.ts b/src/utils.ts index 11b2076..36297e8 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -15,8 +15,9 @@ export function getPartialMatcher(patterns: string[], options?: PartialMatcherOp for (let i = 0; i < patterns.length; i++) { const patternParts = splitPattern(patterns[i]); const regex = regexes[i]; + const minParts = Math.min(inputParts.length, patternParts.length); let j = 0; - while (j < inputParts.length) { + while (j < minParts) { const part = patternParts[j]; // handling slashes in parts is very hard, not even fast-glob does it diff --git a/test/utils/partial-matcher.test.ts b/test/utils/partial-matcher.test.ts index bb1bbfc..bd0357e 100644 --- a/test/utils/partial-matcher.test.ts +++ b/test/utils/partial-matcher.test.ts @@ -87,4 +87,37 @@ describe('getPartialMatcher', () => { assert.ok(!matcher('test/utils/.a/c')); assert.ok(matcher('test/utils')); }); + + test('path initially matching pattern but more input than pattern parts', () => { + const matcher = getPartialMatcher(['test/utils/a']); + assert.ok(!matcher('test/utils/a/c')); + }); + + test('multiple patterns', () => { + const matcher = getPartialMatcher(['test/util?/a', 'test/utils/a/c']); + assert.ok(matcher('test/utils/a/c')); + assert.ok(matcher('test/utilg/a')); + assert.ok(matcher('test/utilg')); + assert.ok(!matcher('test/utilg/a/c')); + }); + + test('..', () => { + const matcher = getPartialMatcher(['../test/util?/a']); + assert.ok(matcher('..')); + assert.ok(matcher('../test/utilg/a')); + assert.ok(!matcher('a/test/utilg/a')); + assert.ok(!matcher('test/utilg/a')); + }); + + test('.. mixed with normal pattern', () => { + const matcher = getPartialMatcher(['../test/util?/a', 'src/utils/a']); + assert.ok(matcher('..')); + assert.ok(matcher('../test/utilg/a')); + assert.ok(!matcher('a/test/utilg/a')); + assert.ok(!matcher('test/utilg/a')); + + assert.ok(matcher('src')); + assert.ok(matcher('src/utils')); + assert.ok(!matcher('src/gaming')); + }); });