diff --git a/src/index.ts b/src/index.ts index a9c63da..1fa2502 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,7 @@ import path, { posix } from 'node:path'; import { type Options as FdirOptions, fdir } from 'fdir'; import picomatch from 'picomatch'; -import { escapePath, isDynamicPattern } from './utils.ts'; +import { escapePath, getPartialMatcher, isDynamicPattern, splitPattern } from './utils.ts'; export interface GlobOptions { absolute?: boolean; @@ -54,20 +54,20 @@ function normalizePattern( properties.depthOffset = -(parentDirectoryMatch[0].length + 1) / 3; } } else if (!isIgnore && properties.depthOffset >= 0) { - const current = result.split('/'); - properties.commonPath ??= current; + const parts = splitPattern(result); + properties.commonPath ??= parts; const newCommonPath = []; - for (let i = 0; i < Math.min(properties.commonPath.length, current.length); i++) { - const part = current[i]; + for (let i = 0; i < Math.min(properties.commonPath.length, parts.length); i++) { + const part = parts[i]; - if (part === '**' && !current[i + 1]) { + if (part === '**' && !parts[i + 1]) { newCommonPath.pop(); break; } - if (part !== properties.commonPath[i] || isDynamicPattern(part) || i === current.length - 1) { + if (part !== properties.commonPath[i] || isDynamicPattern(part) || i === parts.length - 1) { break; } @@ -110,44 +110,22 @@ function processPatterns( } } - const transformed: string[] = []; for (const pattern of patterns) { if (!pattern.startsWith('!') || pattern[1] === '(') { const newPattern = normalizePattern(pattern, expandDirectories, cwd, properties, false); matchPatterns.push(newPattern); - const split = newPattern.split('/'); - if (split[split.length - 1] === '**') { - if (split[split.length - 2] !== '..') { - split[split.length - 2] = '**'; - split.pop(); - } - transformed.push(split.length ? split.join('/') : '*'); - } else { - transformed.push(split.length > 1 ? split.slice(0, -1).join('/') : split.join('/')); - } - - for (let i = split.length - 2; i > 0; i--) { - const part = split.slice(0, i); - if (part[part.length - 1] === '**') { - part.pop(); - if (part.length > 1) { - part.pop(); - } - } - transformed.push(part.join('/')); - } } else if (pattern[1] !== '!' || pattern[2] === '(') { const newPattern = normalizePattern(pattern.slice(1), expandDirectories, cwd, properties, true); ignorePatterns.push(newPattern); } } - return { match: matchPatterns, ignore: ignorePatterns, transformed }; + return { match: matchPatterns, ignore: ignorePatterns }; } // TODO: this is slow, find a better way to do this function getRelativePath(path: string, cwd: string, root: string) { - return posix.relative(cwd, `${root}/${path}`); + return posix.relative(cwd, `${root}/${path}`) || '.'; } function processPath(path: string, cwd: string, root: string, isDirectory: boolean, absolute?: boolean) { @@ -186,10 +164,9 @@ function crawl(options: GlobOptions, cwd: string, sync: boolean) { nocase: options.caseSensitiveMatch === false }); - const exclude = picomatch('*(../)**', { - dot: true, - nocase: options.caseSensitiveMatch === false, - ignore: processed.transformed + const partialMatcher = getPartialMatcher(processed.match, { + dot: options.dot, + nocase: options.caseSensitiveMatch === false }); if (process.env.TINYGLOBBY_DEBUG) { @@ -215,7 +192,7 @@ function crawl(options: GlobOptions, cwd: string, sync: boolean) { exclude: options.debug ? (_, p) => { const relativePath = processPath(p, cwd, properties.root, true, true); - const skipped = ignore(relativePath) || exclude(relativePath); + const skipped = (relativePath !== '.' && !partialMatcher(relativePath)) || ignore(relativePath); if (!skipped) { console.log(`[tinyglobby ${new Date().toLocaleTimeString('es')}] crawling ${p}`); @@ -225,7 +202,7 @@ function crawl(options: GlobOptions, cwd: string, sync: boolean) { } : (_, p) => { const relativePath = processPath(p, cwd, properties.root, true, true); - return ignore(relativePath) || exclude(relativePath); + return (relativePath !== '.' && !partialMatcher(relativePath)) || ignore(relativePath); }, pathSeparator: '/', relativePaths: true, diff --git a/src/utils.ts b/src/utils.ts index 4e6b345..36297e8 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,4 +1,64 @@ -import picomatch from 'picomatch'; +import picomatch, { type Matcher } from 'picomatch'; + +// #region PARTIAL MATCHER +export interface PartialMatcherOptions { + dot?: boolean; + nocase?: boolean; +} + +// the result of over 4 months of figuring stuff out and a LOT of help +export function getPartialMatcher(patterns: string[], options?: PartialMatcherOptions): Matcher { + const regexes = patterns.map(pattern => splitPattern(pattern).map(part => picomatch.makeRe(part, options))); + return (input: string) => { + // no need to `splitPattern` as this is indeed not a pattern + const inputParts = input.split('/'); + for (let i = 0; i < patterns.length; i++) { + const patternParts = splitPattern(patterns[i]); + const regex = regexes[i]; + const minParts = Math.min(inputParts.length, patternParts.length); + let j = 0; + while (j < minParts) { + const part = patternParts[j]; + + // handling slashes in parts is very hard, not even fast-glob does it + // unlike fast-glob we should return true in this case + // for us, better to have a false positive than a false negative here + if (part.includes('/')) { + return true; + } + + const match = regex[j].test(inputParts[j]); + + if (!match) { + break; + } + + // unlike popular belief, `**` doesn't return true in *all* cases + // some examples are when matching it to `.a` with dot: false or `..` + // so it needs to match to return early + if (part === '**' && match) { + return true; + } + + j++; + } + if (j === inputParts.length) { + return true; + } + } + + return false; + }; +} +// #endregion + +// #region splitPattern +// if a pattern has no slashes outside glob symbols, results.parts is [] +export function splitPattern(path: string): string[] { + const result = picomatch.scan(path, { parts: true }); + return result.parts?.length ? result.parts : [path]; +} +// #endregion // #region convertPathToPattern const ESCAPED_WIN32_BACKSLASHES = /\\(?![()[\]{}!+@])/g; diff --git a/test/index.test.ts b/test/index.test.ts index bbe30bd..5f9e5a4 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -307,6 +307,11 @@ test('matching files with specific naming pattern', async () => { assert.deepEqual(files.sort(), ['a/a.txt', 'a/b.txt', 'b/a.txt', 'b/b.txt']); }); +test('dynamic patterns that include slashes inside parts', async () => { + const files = await glob({ patterns: ['{.a/a,a}/a.txt'], cwd }); + assert.deepEqual(files.sort(), ['.a/a/a.txt', 'a/a.txt']); +}); + test('using extglob patterns', async () => { const files = await glob({ patterns: ['a/*(a|b).txt'], cwd }); assert.deepEqual(files.sort(), ['a/a.txt', 'a/b.txt']); diff --git a/test/utils/partial-matcher.test.ts b/test/utils/partial-matcher.test.ts new file mode 100644 index 0000000..bd0357e --- /dev/null +++ b/test/utils/partial-matcher.test.ts @@ -0,0 +1,123 @@ +import assert from 'node:assert/strict'; +import test, { describe } from 'node:test'; +import { getPartialMatcher } from '../../src/utils.ts'; + +describe('getPartialMatcher', () => { + test('works with exact path', () => { + const matcher = getPartialMatcher(['test/utils/a']); + assert.ok(matcher('test/utils/a')); + }); + + test('works with partial path', () => { + const matcher = getPartialMatcher(['test/utils/a']); + assert.ok(matcher('test/utils')); + }); + + test("static pattern doesn't give false positives", () => { + const matcher = getPartialMatcher(['test/utils/a']); + assert.ok(!matcher('test/utils/b')); + assert.ok(!matcher('test/tests')); + assert.ok(!matcher('src')); + }); + + test('works with dynamic pattern', () => { + const matcher = getPartialMatcher(['test/util?/a']); + assert.ok(matcher('test/utils')); + }); + + test('works with brace expansion', () => { + const matcher = getPartialMatcher(['test/{utils,tests}/a']); + assert.ok(matcher('test/utils/a')); + assert.ok(matcher('test/tests/a')); + assert.ok(matcher('test/utils')); + assert.ok(matcher('test/tests')); + + assert.ok(!matcher('test/other/a')); + assert.ok(!matcher('test/other')); + }); + + test('works with **', () => { + const matcher = getPartialMatcher(['test/utils/**']); + assert.ok(matcher('test')); + assert.ok(matcher('test/utils')); + assert.ok(matcher('test/utils/a')); + assert.ok(!matcher('test/tests/a')); + }); + + test("** doesn't match ..", () => { + const matcher = getPartialMatcher(['**']); + assert.ok(!matcher('..')); + }); + + test('for now treats parts with / as **', () => { + const matcher = getPartialMatcher(['test/{utils/a,b}']); + assert.ok(matcher('test')); + assert.ok(matcher('test/utils')); + assert.ok(matcher('test/utils/a')); + + // only happens when treating it as ** + assert.ok(matcher('test/notutils')); + assert.ok(matcher('test/notutils/a')); + }); + + test('works with weird parentheses combinations', () => { + const matcher = getPartialMatcher(['test/utils/(a)']); + assert.ok(matcher('test/utils/a')); + assert.ok(matcher('test/utils')); + assert.ok(!matcher('test/utils/c')); + }); + + test('dot: true', () => { + const matcher = getPartialMatcher(['test/utils/*/c'], { dot: true }); + assert.ok(matcher('test/utils/a/c')); + assert.ok(matcher('test/utils/.a/c')); + assert.ok(matcher('test/utils')); + }); + + test('dot: false', () => { + const matcher = getPartialMatcher(['test/utils/*/c']); + assert.ok(matcher('test/utils/a/c')); + assert.ok(!matcher('test/utils/.a/c')); + assert.ok(matcher('test/utils')); + }); + + test('dot: false and **', () => { + const matcher = getPartialMatcher(['test/utils/**/c']); + assert.ok(matcher('test/utils/a/c')); + assert.ok(!matcher('test/utils/.a/c')); + assert.ok(matcher('test/utils')); + }); + + test('path initially matching pattern but more input than pattern parts', () => { + const matcher = getPartialMatcher(['test/utils/a']); + assert.ok(!matcher('test/utils/a/c')); + }); + + test('multiple patterns', () => { + const matcher = getPartialMatcher(['test/util?/a', 'test/utils/a/c']); + assert.ok(matcher('test/utils/a/c')); + assert.ok(matcher('test/utilg/a')); + assert.ok(matcher('test/utilg')); + assert.ok(!matcher('test/utilg/a/c')); + }); + + test('..', () => { + const matcher = getPartialMatcher(['../test/util?/a']); + assert.ok(matcher('..')); + assert.ok(matcher('../test/utilg/a')); + assert.ok(!matcher('a/test/utilg/a')); + assert.ok(!matcher('test/utilg/a')); + }); + + test('.. mixed with normal pattern', () => { + const matcher = getPartialMatcher(['../test/util?/a', 'src/utils/a']); + assert.ok(matcher('..')); + assert.ok(matcher('../test/utilg/a')); + assert.ok(!matcher('a/test/utilg/a')); + assert.ok(!matcher('test/utilg/a')); + + assert.ok(matcher('src')); + assert.ok(matcher('src/utils')); + assert.ok(!matcher('src/gaming')); + }); +});