From 99a807bf024d168f9d8f26939887ea2650502f35 Mon Sep 17 00:00:00 2001 From: axb Date: Tue, 1 Jul 2025 14:06:46 +0800 Subject: [PATCH 1/4] list-files must include at least the first-level directory contents --- src/services/glob/list-files.ts | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts index 3164ed1eb01..cf3d85ca504 100644 --- a/src/services/glob/list-files.ts +++ b/src/services/glob/list-files.ts @@ -39,8 +39,26 @@ export async function listFiles(dirPath: string, recursive: boolean, limit: numb const ignoreInstance = await createIgnoreInstance(dirPath) const directories = await listFilteredDirectories(dirPath, recursive, ignoreInstance) + let allFiles = files + let allDirectories = directories + const limitReached = files.length + directories.length >= limit + + // If limit is reached in recursive mode, fetch one more layer non-recursively and merge results + if (recursive && limitReached) { + // Get files and directories in one layer (non-recursive) + const filesNonRecursive = await listFilesWithRipgrep(rgPath, dirPath, false, limit) + const directoriesNonRecursive = await listFilteredDirectories(dirPath, false, []) + + // Merge recursive and non-recursive results, deduplicate + const filesSet = new Set([...files, ...filesNonRecursive]) + const directoriesSet = new Set([...directories, ...directoriesNonRecursive]) + + allFiles = Array.from(filesSet) + allDirectories = Array.from(directoriesSet) + } + // Combine and format the results - return formatAndCombineResults(files, directories, limit) + return formatAndCombineResults(allFiles, allDirectories, limit) } /** @@ -326,6 +344,16 @@ function formatAndCombineResults(files: string[], directories: string[], limit: // Sort to ensure directories come first, followed by files uniquePaths.sort((a: string, b: string) => { + // Remove trailing slash for depth calculation + const aPath = a.endsWith("/") ? a.slice(0, -1) : a + const bPath = b.endsWith("/") ? b.slice(0, -1) : b + const aDepth = aPath.split("/").length + const bDepth = bPath.split("/").length + + if (aDepth !== bDepth) { + return aDepth - bDepth + } + const aIsDir = a.endsWith("/") const bIsDir = b.endsWith("/") From 8cc571d26b5025b5994d402d6c665917beaea879 Mon Sep 17 00:00:00 2001 From: axb Date: Tue, 8 Jul 2025 00:45:54 +0800 Subject: [PATCH 2/4] feat: implement breadth-first file listing to include first-level directories --- .../glob/__tests__/list-files.spec.ts | 212 +++++++++++++++++- 1 file changed, 211 insertions(+), 1 deletion(-) diff --git a/src/services/glob/__tests__/list-files.spec.ts b/src/services/glob/__tests__/list-files.spec.ts index 2a154248307..805df89edad 100644 --- a/src/services/glob/__tests__/list-files.spec.ts +++ b/src/services/glob/__tests__/list-files.spec.ts @@ -1,5 +1,7 @@ -import { describe, it, expect, vi } from "vitest" +import { vi, describe, it, expect, beforeEach } from "vitest" +import * as path from "path" import { listFiles } from "../list-files" +import * as childProcess from "child_process" vi.mock("../list-files", async () => { const actual = await vi.importActual("../list-files") @@ -16,3 +18,211 @@ describe("listFiles", () => { expect(result).toEqual([[], false]) }) }) + +// Mock ripgrep to avoid filesystem dependencies +vi.mock("../../ripgrep", () => ({ + getBinPath: vi.fn().mockResolvedValue("/mock/path/to/rg"), +})) + +// Mock vscode +vi.mock("vscode", () => ({ + env: { + appRoot: "/mock/app/root", + }, +})) + +// Mock filesystem operations +vi.mock("fs", () => ({ + promises: { + access: vi.fn().mockRejectedValue(new Error("Not found")), + readFile: vi.fn().mockResolvedValue(""), + readdir: vi.fn().mockResolvedValue([]), + }, +})) + +// Import fs to set up mocks +import * as fs from "fs" + +vi.mock("child_process", () => ({ + spawn: vi.fn(), +})) + +vi.mock("../../path", () => ({ + arePathsEqual: vi.fn().mockReturnValue(false), +})) + +describe("list-files symlink support", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it("should include --follow flag in ripgrep arguments", async () => { + const mockSpawn = vi.mocked(childProcess.spawn) + const mockProcess = { + stdout: { + on: vi.fn((event, callback) => { + if (event === "data") { + // Simulate some output to complete the process + setTimeout(() => callback("test-file.txt\n"), 10) + } + }), + }, + stderr: { + on: vi.fn(), + }, + on: vi.fn((event, callback) => { + if (event === "close") { + setTimeout(() => callback(0), 20) + } + if (event === "error") { + // No error simulation + } + }), + kill: vi.fn(), + } + + mockSpawn.mockReturnValue(mockProcess as any) + + // Call listFiles to trigger ripgrep execution + await listFiles("/test/dir", false, 100) + + // Verify that spawn was called with --follow flag (the critical fix) + const [rgPath, args] = mockSpawn.mock.calls[0] + expect(rgPath).toBe("/mock/path/to/rg") + expect(args).toContain("--files") + expect(args).toContain("--hidden") + expect(args).toContain("--follow") // This is the critical assertion - the fix should add this flag + + // Platform-agnostic path check - verify the last argument is the resolved path + const expectedPath = path.resolve("/test/dir") + expect(args[args.length - 1]).toBe(expectedPath) + }) + + it("should include --follow flag for recursive listings too", async () => { + const mockSpawn = vi.mocked(childProcess.spawn) + const mockProcess = { + stdout: { + on: vi.fn((event, callback) => { + if (event === "data") { + setTimeout(() => callback("test-file.txt\n"), 10) + } + }), + }, + stderr: { + on: vi.fn(), + }, + on: vi.fn((event, callback) => { + if (event === "close") { + setTimeout(() => callback(0), 20) + } + if (event === "error") { + // No error simulation + } + }), + kill: vi.fn(), + } + + mockSpawn.mockReturnValue(mockProcess as any) + + // Mock readdir to return some test entries + vi.mocked(fs.promises.readdir).mockResolvedValue([ + { name: "test-file.txt", isDirectory: () => false, isSymbolicLink: () => false, isFile: () => true } as any, + { name: "test-dir", isDirectory: () => true, isSymbolicLink: () => false, isFile: () => false } as any, + ]) + + // Call listFiles with recursive=true + await listFiles("/test/dir", true, 100) + + // For breadth-first implementation, ripgrep is called for filtering files + // It should still include the --follow flag + expect(mockSpawn).toHaveBeenCalled() + const calls = mockSpawn.mock.calls + + // Find a call that includes --follow flag + const hasFollowFlag = calls.some((call) => { + const [, args] = call + return args && args.includes("--follow") + }) + + expect(hasFollowFlag).toBe(true) + }) + + it("should ensure first-level directories are included when limit is reached", async () => { + // Mock fs.promises.readdir to simulate a directory structure + const mockReaddir = vi.mocked(fs.promises.readdir) + + // Root directory with many items + mockReaddir.mockResolvedValueOnce([ + { name: "a_dir", isDirectory: () => true, isSymbolicLink: () => false, isFile: () => false } as any, + { name: "b_dir", isDirectory: () => true, isSymbolicLink: () => false, isFile: () => false } as any, + { name: "c_dir", isDirectory: () => true, isSymbolicLink: () => false, isFile: () => false } as any, + { name: "file1.txt", isDirectory: () => false, isSymbolicLink: () => false, isFile: () => true } as any, + { name: "file2.txt", isDirectory: () => false, isSymbolicLink: () => false, isFile: () => true } as any, + ]) + + // a_dir contents (many files to trigger limit) + const manyFiles = Array.from( + { length: 50 }, + (_, i) => + ({ + name: `file_a_${i}.txt`, + isDirectory: () => false, + isSymbolicLink: () => false, + isFile: () => true, + }) as any, + ) + mockReaddir.mockResolvedValueOnce(manyFiles) + + // b_dir and c_dir won't be read due to limit + + // Mock ripgrep to approve all files + const mockSpawn = vi.mocked(childProcess.spawn) + const mockProcess = { + stdout: { + on: vi.fn((event, callback) => { + if (event === "data") { + // Return many file names + const fileNames = + Array.from({ length: 52 }, (_, i) => + i < 2 ? `file${i + 1}.txt` : `file_a_${i - 2}.txt`, + ).join("\n") + "\n" + setTimeout(() => callback(fileNames), 10) + } + }), + }, + stderr: { + on: vi.fn(), + }, + on: vi.fn((event, callback) => { + if (event === "close") { + setTimeout(() => callback(0), 20) + } + }), + kill: vi.fn(), + } + mockSpawn.mockReturnValue(mockProcess as any) + + // Call listFiles with recursive=true and a small limit + const [results, limitReached] = await listFiles("/test/dir", true, 10) + + // Verify that we got results and hit the limit + expect(results.length).toBe(10) + expect(limitReached).toBe(true) + + // Count directories in results + const directories = results.filter((r) => r.endsWith("/")) + + // With breadth-first, we should have at least the 3 first-level directories + // even if we hit the limit while processing subdirectories + expect(directories.length).toBeGreaterThanOrEqual(3) + + // Verify all first-level directories are included + const hasADir = results.some((r) => r.endsWith("a_dir/")) + const hasBDir = results.some((r) => r.endsWith("b_dir/")) + const hasCDir = results.some((r) => r.endsWith("c_dir/")) + + expect(hasADir).toBe(true) + expect(hasBDir).toBe(true) + expect(hasCDir).toBe(true) + }) +}) From 07610dca70b01216234c172ab35b4a1806326e34 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 1 Jul 2025 12:34:06 -0500 Subject: [PATCH 3/4] fix: ensure first-level directories are always included when file limit is reached - Modified list-files to check if limit was reached and ensure all first-level directories are included - Added helper functions to get first-level directories and adjust results - Updated tests to properly verify the new behavior - Removed the breadth-first implementation in favor of a simpler approach that modifies results after the fact --- .../glob/__tests__/list-files.spec.ts | 74 +++++------- src/services/glob/list-files.ts | 110 +++++++++++++++--- 2 files changed, 123 insertions(+), 61 deletions(-) diff --git a/src/services/glob/__tests__/list-files.spec.ts b/src/services/glob/__tests__/list-files.spec.ts index 805df89edad..6c133a732a3 100644 --- a/src/services/glob/__tests__/list-files.spec.ts +++ b/src/services/glob/__tests__/list-files.spec.ts @@ -124,34 +124,26 @@ describe("list-files symlink support", () => { mockSpawn.mockReturnValue(mockProcess as any) - // Mock readdir to return some test entries - vi.mocked(fs.promises.readdir).mockResolvedValue([ - { name: "test-file.txt", isDirectory: () => false, isSymbolicLink: () => false, isFile: () => true } as any, - { name: "test-dir", isDirectory: () => true, isSymbolicLink: () => false, isFile: () => false } as any, - ]) - // Call listFiles with recursive=true await listFiles("/test/dir", true, 100) - // For breadth-first implementation, ripgrep is called for filtering files - // It should still include the --follow flag - expect(mockSpawn).toHaveBeenCalled() - const calls = mockSpawn.mock.calls - - // Find a call that includes --follow flag - const hasFollowFlag = calls.some((call) => { - const [, args] = call - return args && args.includes("--follow") - }) + // Verify that spawn was called with --follow flag (the critical fix) + const [rgPath, args] = mockSpawn.mock.calls[0] + expect(rgPath).toBe("/mock/path/to/rg") + expect(args).toContain("--files") + expect(args).toContain("--hidden") + expect(args).toContain("--follow") // This should be present in recursive mode too - expect(hasFollowFlag).toBe(true) + // Platform-agnostic path check - verify the last argument is the resolved path + const expectedPath = path.resolve("/test/dir") + expect(args[args.length - 1]).toBe(expectedPath) }) it("should ensure first-level directories are included when limit is reached", async () => { // Mock fs.promises.readdir to simulate a directory structure const mockReaddir = vi.mocked(fs.promises.readdir) - // Root directory with many items + // Root directory with first-level directories mockReaddir.mockResolvedValueOnce([ { name: "a_dir", isDirectory: () => true, isSymbolicLink: () => false, isFile: () => false } as any, { name: "b_dir", isDirectory: () => true, isSymbolicLink: () => false, isFile: () => false } as any, @@ -160,33 +152,28 @@ describe("list-files symlink support", () => { { name: "file2.txt", isDirectory: () => false, isSymbolicLink: () => false, isFile: () => true } as any, ]) - // a_dir contents (many files to trigger limit) - const manyFiles = Array.from( - { length: 50 }, - (_, i) => - ({ - name: `file_a_${i}.txt`, - isDirectory: () => false, - isSymbolicLink: () => false, - isFile: () => true, - }) as any, - ) - mockReaddir.mockResolvedValueOnce(manyFiles) - - // b_dir and c_dir won't be read due to limit - - // Mock ripgrep to approve all files + // Mock ripgrep to return many files (simulating hitting the limit) const mockSpawn = vi.mocked(childProcess.spawn) const mockProcess = { stdout: { on: vi.fn((event, callback) => { if (event === "data") { - // Return many file names - const fileNames = - Array.from({ length: 52 }, (_, i) => - i < 2 ? `file${i + 1}.txt` : `file_a_${i - 2}.txt`, - ).join("\n") + "\n" - setTimeout(() => callback(fileNames), 10) + // Return many file paths to trigger the limit + const paths = + [ + "/test/dir/a_dir/", + "/test/dir/a_dir/subdir1/", + "/test/dir/a_dir/subdir1/file1.txt", + "/test/dir/a_dir/subdir1/file2.txt", + "/test/dir/a_dir/subdir2/", + "/test/dir/a_dir/subdir2/file3.txt", + "/test/dir/a_dir/file4.txt", + "/test/dir/a_dir/file5.txt", + "/test/dir/file1.txt", + "/test/dir/file2.txt", + // Note: b_dir and c_dir are missing from ripgrep output + ].join("\n") + "\n" + setTimeout(() => callback(paths), 10) } }), }, @@ -202,6 +189,9 @@ describe("list-files symlink support", () => { } mockSpawn.mockReturnValue(mockProcess as any) + // Mock fs.promises.access to simulate .gitignore doesn't exist + vi.mocked(fs.promises.access).mockRejectedValue(new Error("File not found")) + // Call listFiles with recursive=true and a small limit const [results, limitReached] = await listFiles("/test/dir", true, 10) @@ -212,8 +202,8 @@ describe("list-files symlink support", () => { // Count directories in results const directories = results.filter((r) => r.endsWith("/")) - // With breadth-first, we should have at least the 3 first-level directories - // even if we hit the limit while processing subdirectories + // We should have at least the 3 first-level directories + // even if ripgrep didn't return all of them expect(directories.length).toBeGreaterThanOrEqual(3) // Verify all first-level directories are included diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts index cf3d85ca504..22e21838c16 100644 --- a/src/services/glob/list-files.ts +++ b/src/services/glob/list-files.ts @@ -32,33 +32,105 @@ export async function listFiles(dirPath: string, recursive: boolean, limit: numb // Get ripgrep path const rgPath = await getRipgrepPath() - // Get files using ripgrep - const files = await listFilesWithRipgrep(rgPath, dirPath, recursive, limit) + if (!recursive) { + // For non-recursive, use the existing approach + const files = await listFilesWithRipgrep(rgPath, dirPath, false, limit) + const ignoreInstance = await createIgnoreInstance(dirPath) + const directories = await listFilteredDirectories(dirPath, false, ignoreInstance) + return formatAndCombineResults(files, directories, limit) + } - // Get directories with proper filtering using ignore library + // For recursive mode, use the original approach but ensure first-level directories are included + const files = await listFilesWithRipgrep(rgPath, dirPath, true, limit) const ignoreInstance = await createIgnoreInstance(dirPath) - const directories = await listFilteredDirectories(dirPath, recursive, ignoreInstance) + const directories = await listFilteredDirectories(dirPath, true, ignoreInstance) + + // Combine and check if we hit the limit + const [results, limitReached] = formatAndCombineResults(files, directories, limit) + + // If we hit the limit, ensure all first-level directories are included + if (limitReached) { + const firstLevelDirs = await getFirstLevelDirectories(dirPath, ignoreInstance) + return ensureFirstLevelDirectoriesIncluded(results, firstLevelDirs, limit) + } - let allFiles = files - let allDirectories = directories - const limitReached = files.length + directories.length >= limit + return [results, limitReached] +} - // If limit is reached in recursive mode, fetch one more layer non-recursively and merge results - if (recursive && limitReached) { - // Get files and directories in one layer (non-recursive) - const filesNonRecursive = await listFilesWithRipgrep(rgPath, dirPath, false, limit) - const directoriesNonRecursive = await listFilteredDirectories(dirPath, false, []) +/** + * Get only the first-level directories in a path + */ +async function getFirstLevelDirectories(dirPath: string, ignoreInstance: ReturnType): Promise { + const absolutePath = path.resolve(dirPath) + const directories: string[] = [] - // Merge recursive and non-recursive results, deduplicate - const filesSet = new Set([...files, ...filesNonRecursive]) - const directoriesSet = new Set([...directories, ...directoriesNonRecursive]) + try { + const entries = await fs.promises.readdir(absolutePath, { withFileTypes: true }) - allFiles = Array.from(filesSet) - allDirectories = Array.from(directoriesSet) + for (const entry of entries) { + if (entry.isDirectory() && !entry.isSymbolicLink()) { + const fullDirPath = path.join(absolutePath, entry.name) + if (shouldIncludeDirectory(entry.name, fullDirPath, dirPath, ignoreInstance)) { + const formattedPath = fullDirPath.endsWith("/") ? fullDirPath : `${fullDirPath}/` + directories.push(formattedPath) + } + } + } + } catch (err) { + console.warn(`Could not read directory ${absolutePath}: ${err}`) } - // Combine and format the results - return formatAndCombineResults(allFiles, allDirectories, limit) + return directories +} + +/** + * Ensure all first-level directories are included in the results + */ +function ensureFirstLevelDirectoriesIncluded( + results: string[], + firstLevelDirs: string[], + limit: number, +): [string[], boolean] { + // Create a set of existing paths for quick lookup + const existingPaths = new Set(results) + + // Find missing first-level directories + const missingDirs = firstLevelDirs.filter((dir) => !existingPaths.has(dir)) + + if (missingDirs.length === 0) { + // All first-level directories are already included + return [results, true] + } + + // We need to make room for the missing directories + // Remove items from the end (which are likely deeper in the tree) + const itemsToRemove = Math.min(missingDirs.length, results.length) + const adjustedResults = results.slice(0, results.length - itemsToRemove) + + // Add the missing directories at the beginning (after any existing first-level dirs) + // First, separate existing results into first-level and others + const resultPaths = adjustedResults.map((r) => path.resolve(r)) + const basePath = path.resolve(firstLevelDirs[0]).split(path.sep).slice(0, -1).join(path.sep) + + const firstLevelResults: string[] = [] + const otherResults: string[] = [] + + for (let i = 0; i < adjustedResults.length; i++) { + const resolvedPath = resultPaths[i] + const relativePath = path.relative(basePath, resolvedPath) + const depth = relativePath.split(path.sep).length + + if (depth === 1) { + firstLevelResults.push(adjustedResults[i]) + } else { + otherResults.push(adjustedResults[i]) + } + } + + // Combine: existing first-level dirs + missing first-level dirs + other results + const finalResults = [...firstLevelResults, ...missingDirs, ...otherResults].slice(0, limit) + + return [finalResults, true] } /** From 8118c0709c6e31b9bd8465daf5f9b3223a6ceb0e Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 15 Jul 2025 16:00:52 -0500 Subject: [PATCH 4/4] fix: restore original sorting behavior while keeping first-level directory guarantee --- src/services/glob/list-files.ts | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts index 22e21838c16..05fa8a1d7b4 100644 --- a/src/services/glob/list-files.ts +++ b/src/services/glob/list-files.ts @@ -402,7 +402,6 @@ function isDirectoryExplicitlyIgnored(dirName: string): boolean { return false } - /** * Combine file and directory results and format them properly */ @@ -416,16 +415,6 @@ function formatAndCombineResults(files: string[], directories: string[], limit: // Sort to ensure directories come first, followed by files uniquePaths.sort((a: string, b: string) => { - // Remove trailing slash for depth calculation - const aPath = a.endsWith("/") ? a.slice(0, -1) : a - const bPath = b.endsWith("/") ? b.slice(0, -1) : b - const aDepth = aPath.split("/").length - const bDepth = bPath.split("/").length - - if (aDepth !== bDepth) { - return aDepth - bDepth - } - const aIsDir = a.endsWith("/") const bIsDir = b.endsWith("/")