diff --git a/actions/setup/js/qmd_index.cjs b/actions/setup/js/qmd_index.cjs index 6c4a239982b..816d02c3d53 100644 --- a/actions/setup/js/qmd_index.cjs +++ b/actions/setup/js/qmd_index.cjs @@ -22,6 +22,30 @@ function resolveEnvVars(p) { return p.replace(/\$\{([^}]+)\}/g, (_, name) => process.env[name] || ""); } +/** + * Counts files matching a glob pattern in basePath using the Node.js 22+ built-in + * `fs.globSync`. Returns null when the API is unavailable (older Node.js) or an error + * occurs, so callers can treat the count as unknown and log accordingly. + * @param {string} basePath + * @param {string} pattern + * @returns {number | null} + */ +function countGlobMatches(basePath, pattern) { + try { + const { globSync } = require("node:fs"); + if (typeof globSync !== "function") return null; + const files = globSync(pattern, { cwd: basePath }); + return Array.isArray(files) ? files.length : null; + } catch (/** @type {any} */ err) { + // Log at debug level: the count is best-effort and a failure here (e.g. invalid + // pattern syntax, unsupported Node.js version) should not block indexing. + if (typeof core !== "undefined" && typeof core.debug === "function") { + core.debug(`countGlobMatches("${basePath}", "${pattern}") failed: ${err.message}`); + } + return null; + } +} + /** * Returns an Octokit client for the given token env var, or the default github client. * @param {string | undefined} tokenEnvVar @@ -139,22 +163,36 @@ async function main() { const rawPath = checkout.path; const resolvedPath = resolveEnvVars(rawPath); const patterns = checkout.patterns || ["**/*.md"]; - const pattern = patterns.join(","); - core.info(`Collection "${checkout.name}": path="${rawPath}" -> "${resolvedPath}" pattern="${pattern}"`); + core.info(`Checkout "${checkout.name}": path="${rawPath}" -> "${resolvedPath}" (${patterns.length} pattern(s))`); const pathExists = fs.existsSync(resolvedPath); if (!pathExists) { - core.warning(`Collection "${checkout.name}": path "${resolvedPath}" does not exist — no files will be indexed`); - } else { - core.info(`Collection "${checkout.name}": path exists`); + core.warning(`Checkout "${checkout.name}": path "${resolvedPath}" does not exist — no files will be indexed`); } - collections[checkout.name] = { - path: resolvedPath, - pattern, - ...(checkout.context ? { context: { "/": checkout.context } } : {}), - }; + // Create one collection per pattern so the qmd SDK receives a single unambiguous + // glob expression per collection. Joining multiple patterns with commas does NOT work — + // the SDK treats the combined string as one literal pattern and matches nothing. + for (let pi = 0; pi < patterns.length; pi++) { + const pattern = patterns[pi]; + // For single-pattern checkouts keep the original name; for multi-pattern checkouts + // append "-0", "-1", … to ensure collection names are unique in the qmd store. + const colName = patterns.length === 1 ? checkout.name : `${checkout.name}-${pi}`; + + let hitInfo = ""; + if (pathExists) { + const count = countGlobMatches(resolvedPath, pattern); + hitInfo = count !== null ? ` (${count} file(s) matched)` : ""; + } + core.info(` -> collection "${colName}": pattern="${pattern}"${hitInfo}`); + + collections[colName] = { + path: resolvedPath, + pattern, + ...(checkout.context ? { context: { "/": checkout.context } } : {}), + }; + } } // ── Process search entries ─────────────────────────────────────────────── @@ -274,10 +312,10 @@ async function main() { core.warning( "No files were indexed. Possible causes:\n" + " - The checkout path does not exist or was not checked out\n" + - " - The glob patterns do not match any files (check for dotfile exclusions in patterns starting with '.')\n" + - " - The pattern uses a comma-separated list that the qmd SDK does not support\n" + + " - The glob patterns do not match any files in the configured path\n" + + " - Patterns starting with '.' require the path to exist (e.g., '.github/agents/**')\n" + " - The checkout path resolves to an empty string (check ${ENV_VAR} placeholders in 'path')\n" + - "Review the collection log lines above for the resolved path and pattern." + "Review the collection log lines above for the resolved path, pattern, and file count." ); } diff --git a/actions/setup/js/qmd_index.test.cjs b/actions/setup/js/qmd_index.test.cjs index 848ae8afe41..a5a7c323e7b 100644 --- a/actions/setup/js/qmd_index.test.cjs +++ b/actions/setup/js/qmd_index.test.cjs @@ -153,7 +153,7 @@ describe("qmd_index.cjs", () => { // ── Error path: missing config ───────────────────────────────────────────── it("fails when QMD_CONFIG_JSON is not set", async () => { await runMain(undefined); - expect(mockCore.setFailed).toHaveBeenCalledWith("QMD_CONFIG_JSON environment variable not set"); + expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("QMD_CONFIG_JSON environment variable not set")); expect(mockStore.update).not.toHaveBeenCalled(); }); @@ -419,6 +419,77 @@ describe("qmd_index.cjs", () => { expect(mockStore.close).toHaveBeenCalledOnce(); }); + // ── Checkout collection: multiple patterns create sub-collections ───────── + it("creates separate collections for each pattern in a multi-pattern checkout", async () => { + const docsDir = path.join(tmpDir, "docs"); + fs.mkdirSync(docsDir); + fs.writeFileSync(path.join(docsDir, "readme.md"), "# README"); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [{ name: "gh-aw", path: docsDir, patterns: ["**/*.md", "**/*.mdx", "**/*.txt"] }], + }); + + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + + // Each pattern must produce a separate sub-collection; commas in pattern strings + // are not supported by the qmd SDK and would result in 0 files being indexed. + const infoCalls = mockCore.info.mock.calls.flat().join("\n"); + expect(infoCalls).toContain('collection "gh-aw-0": pattern="**/*.md"'); + expect(infoCalls).toContain('collection "gh-aw-1": pattern="**/*.mdx"'); + expect(infoCalls).toContain('collection "gh-aw-2": pattern="**/*.txt"'); + // The combined comma form must not appear in any collection registration line + expect(infoCalls).not.toContain("**/*.md,**/*.mdx"); + expect(infoCalls).not.toContain("**/*.md, **/*.mdx"); + }); + + // ── Checkout collection: single pattern keeps original name ─────────────── + it("keeps the original collection name when checkout has exactly one pattern", async () => { + const docsDir = path.join(tmpDir, "docs"); + fs.mkdirSync(docsDir); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [{ name: "docs", path: docsDir, patterns: ["**/*.md"] }], + }); + + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + + // Single-pattern checkout: use the plain name "docs", not "docs-0" + const infoCalls = mockCore.info.mock.calls.flat().join("\n"); + expect(infoCalls).toContain('collection "docs": pattern="**/*.md"'); + expect(infoCalls).not.toContain('"docs-0"'); + }); + + // ── Checkout collection: dotfile patterns work as sub-collections ───────── + it("registers dotfile patterns (e.g. .github/**) as individual collections", async () => { + const wsDir = path.join(tmpDir, "workspace"); + const ghDir = path.join(wsDir, ".github", "agents"); + fs.mkdirSync(ghDir, { recursive: true }); + fs.writeFileSync(path.join(ghDir, "agent.md"), "# Agent"); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [ + { + name: "repo", + path: wsDir, + patterns: ["docs/**", ".github/agents/**", ".github/aw/**"], + }, + ], + }); + + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + + const infoCalls = mockCore.info.mock.calls.flat().join("\n"); + expect(infoCalls).toContain('collection "repo-0": pattern="docs/**"'); + expect(infoCalls).toContain('collection "repo-1": pattern=".github/agents/**"'); + expect(infoCalls).toContain('collection "repo-2": pattern=".github/aw/**"'); + }); + // ── writeSummary: checkouts section ────────────────────────────────────── it("writes step summary with a collections table for checkouts", async () => { const docsDir = path.join(tmpDir, "docs"); diff --git a/pkg/workflow/qmd.go b/pkg/workflow/qmd.go index 8afa5043fd4..55f5df96242 100644 --- a/pkg/workflow/qmd.go +++ b/pkg/workflow/qmd.go @@ -68,9 +68,11 @@ package workflow import ( "encoding/json" "fmt" + "os" "strconv" "strings" + "github.com/github/gh-aw/pkg/console" "github.com/github/gh-aw/pkg/constants" "github.com/github/gh-aw/pkg/logger" ) @@ -381,6 +383,41 @@ func buildQmdConfig(qmdConfig *QmdToolConfig) qmdBuildConfig { return cfg } +// validateQmdPatterns performs compile-time validation of the glob patterns declared +// in each qmd checkout collection and emits warnings to stderr for common mistakes: +// +// - Empty patterns are useless and will not match any files. +// - Patterns containing a comma suggest the user intended comma-separated globs, +// which the qmd SDK does not support; each pattern must be listed separately. +// +// It also logs each checkout and its patterns at debug level so users can verify +// the compiler saw the expected configuration. +func validateQmdPatterns(qmdConfig *QmdToolConfig) { + for _, col := range qmdConfig.Checkouts { + name := col.Name + if name == "" { + name = "docs" + } + // Only validate explicitly-specified patterns; the runtime default ("**/*.md") + // is always valid and does not need compile-time checking. + if len(col.Paths) == 0 { + qmdLog.Printf("checkout %q: no paths specified, runtime will default to **/*.md", name) + continue + } + qmdLog.Printf("checkout %q: %d pattern(s): %v", name, len(col.Paths), col.Paths) + for i, p := range col.Paths { + if p == "" { + msg := fmt.Sprintf("qmd checkout %q: pattern[%d] is empty — it will not match any files; remove it or replace it with a valid glob (e.g. \"**/*.md\")", name, i) + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(msg)) + } + if strings.Contains(p, ",") { + msg := fmt.Sprintf("qmd checkout %q: pattern[%d] %q contains a comma — list each glob pattern separately under 'paths' instead of combining them with commas", name, i, p) + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(msg)) + } + } + } +} + // generateQmdCollectionCheckoutStep generates a checkout step YAML string for a qmd // collection that targets a non-default repository. Returns an empty string when the // collection uses the current repository (no checkout needed). @@ -456,6 +493,10 @@ func generateQmdIndexSteps(qmdConfig *QmdToolConfig) []string { qmdLog.Printf("Generating qmd index steps: checkouts=%d searches=%d cacheKey=%q cacheOnly=%v", len(qmdConfig.Checkouts), len(qmdConfig.Searches), cacheKey, isCacheOnlyMode) + // Validate patterns at compile time — warn about empty or comma-containing patterns + // that would silently produce an empty index at runtime. + validateQmdPatterns(qmdConfig) + version := string(constants.DefaultQmdVersion) var steps []string