Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 51 additions & 13 deletions actions/setup/js/qmd_index.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,30 @@ function resolveEnvVars(p) {
return p.replace(/\$\{([^}]+)\}/g, (_, name) => process.env[name] || "");
}

/**
* Counts files matching a glob pattern in basePath using the Node.js 22+ built-in
* `fs.globSync`. Returns null when the API is unavailable (older Node.js) or an error
* occurs, so callers can treat the count as unknown and log accordingly.
* @param {string} basePath
* @param {string} pattern
* @returns {number | null}
*/
function countGlobMatches(basePath, pattern) {
try {
const { globSync } = require("node:fs");
if (typeof globSync !== "function") return null;
const files = globSync(pattern, { cwd: basePath });
return Array.isArray(files) ? files.length : null;
Comment on lines +37 to +38
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

countGlobMatches() logs "file(s) matched" but the current globSync() call will also return directory matches for patterns like docs/**, so the count can be misleading. Consider calling globSync(pattern, { cwd: basePath, withFileTypes: true }) and counting only entries where dirent.isFile() (or otherwise filtering out directories) so the logged number reflects files.

Suggested change
const files = globSync(pattern, { cwd: basePath });
return Array.isArray(files) ? files.length : null;
const entries = globSync(pattern, { cwd: basePath, withFileTypes: true });
if (!Array.isArray(entries)) return null;
const fileCount = entries.filter((entry) =>
entry && typeof entry.isFile === "function" ? entry.isFile() : true
).length;
return fileCount;

Copilot uses AI. Check for mistakes.
} catch (/** @type {any} */ err) {
// Log at debug level: the count is best-effort and a failure here (e.g. invalid
// pattern syntax, unsupported Node.js version) should not block indexing.
if (typeof core !== "undefined" && typeof core.debug === "function") {
core.debug(`countGlobMatches("${basePath}", "${pattern}") failed: ${err.message}`);
}
return null;
}
}

/**
* Returns an Octokit client for the given token env var, or the default github client.
* @param {string | undefined} tokenEnvVar
Expand Down Expand Up @@ -139,22 +163,36 @@ async function main() {
const rawPath = checkout.path;
const resolvedPath = resolveEnvVars(rawPath);
const patterns = checkout.patterns || ["**/*.md"];
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const patterns = checkout.patterns || ["**/*.md"]; will not apply the default when checkout.patterns is an empty array (which is truthy). That results in zero collections being registered for that checkout. Consider defaulting when the array is missing or empty (e.g., Array.isArray(checkout.patterns) && checkout.patterns.length > 0 ? checkout.patterns : ["**/*.md"]).

Suggested change
const patterns = checkout.patterns || ["**/*.md"];
const patterns =
Array.isArray(checkout.patterns) && checkout.patterns.length > 0
? checkout.patterns
: ["**/*.md"];

Copilot uses AI. Check for mistakes.
const pattern = patterns.join(",");

core.info(`Collection "${checkout.name}": path="${rawPath}" -> "${resolvedPath}" pattern="${pattern}"`);
core.info(`Checkout "${checkout.name}": path="${rawPath}" -> "${resolvedPath}" (${patterns.length} pattern(s))`);

const pathExists = fs.existsSync(resolvedPath);
if (!pathExists) {
core.warning(`Collection "${checkout.name}": path "${resolvedPath}" does not exist — no files will be indexed`);
} else {
core.info(`Collection "${checkout.name}": path exists`);
core.warning(`Checkout "${checkout.name}": path "${resolvedPath}" does not exist — no files will be indexed`);
}

collections[checkout.name] = {
path: resolvedPath,
pattern,
...(checkout.context ? { context: { "/": checkout.context } } : {}),
};
// Create one collection per pattern so the qmd SDK receives a single unambiguous
// glob expression per collection. Joining multiple patterns with commas does NOT work —
// the SDK treats the combined string as one literal pattern and matches nothing.
for (let pi = 0; pi < patterns.length; pi++) {
const pattern = patterns[pi];
// For single-pattern checkouts keep the original name; for multi-pattern checkouts
// append "-0", "-1", … to ensure collection names are unique in the qmd store.
const colName = patterns.length === 1 ? checkout.name : `${checkout.name}-${pi}`;

let hitInfo = "";
if (pathExists) {
const count = countGlobMatches(resolvedPath, pattern);
hitInfo = count !== null ? ` (${count} file(s) matched)` : "";
}
core.info(` -> collection "${colName}": pattern="${pattern}"${hitInfo}`);

collections[colName] = {
path: resolvedPath,
pattern,
...(checkout.context ? { context: { "/": checkout.context } } : {}),
};
}
Comment on lines +179 to +195
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When deriving colName for multi-pattern checkouts, the code can silently overwrite an existing entry in collections if another checkout already uses the same name (e.g. a separate checkout named repo-0 collides with repo pattern index 0). Consider detecting collisions before assignment and either failing fast with a clear error or generating a guaranteed-unique name (e.g. include the checkout index in the suffix).

Copilot uses AI. Check for mistakes.
}

// ── Process search entries ───────────────────────────────────────────────
Expand Down Expand Up @@ -274,10 +312,10 @@ async function main() {
core.warning(
"No files were indexed. Possible causes:\n" +
" - The checkout path does not exist or was not checked out\n" +
" - The glob patterns do not match any files (check for dotfile exclusions in patterns starting with '.')\n" +
" - The pattern uses a comma-separated list that the qmd SDK does not support\n" +
" - The glob patterns do not match any files in the configured path\n" +
" - Patterns starting with '.' require the path to exist (e.g., '.github/agents/**')\n" +
" - The checkout path resolves to an empty string (check ${ENV_VAR} placeholders in 'path')\n" +
"Review the collection log lines above for the resolved path and pattern."
"Review the collection log lines above for the resolved path, pattern, and file count."
);
}

Expand Down
73 changes: 72 additions & 1 deletion actions/setup/js/qmd_index.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ describe("qmd_index.cjs", () => {
// ── Error path: missing config ─────────────────────────────────────────────
it("fails when QMD_CONFIG_JSON is not set", async () => {
await runMain(undefined);
expect(mockCore.setFailed).toHaveBeenCalledWith("QMD_CONFIG_JSON environment variable not set");
expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("QMD_CONFIG_JSON environment variable not set"));
expect(mockStore.update).not.toHaveBeenCalled();
});

Expand Down Expand Up @@ -419,6 +419,77 @@ describe("qmd_index.cjs", () => {
expect(mockStore.close).toHaveBeenCalledOnce();
});

// ── Checkout collection: multiple patterns create sub-collections ─────────
it("creates separate collections for each pattern in a multi-pattern checkout", async () => {
const docsDir = path.join(tmpDir, "docs");
fs.mkdirSync(docsDir);
fs.writeFileSync(path.join(docsDir, "readme.md"), "# README");

await runMain({
dbPath: path.join(tmpDir, "index"),
checkouts: [{ name: "gh-aw", path: docsDir, patterns: ["**/*.md", "**/*.mdx", "**/*.txt"] }],
});

expect(mockCore.setFailed).not.toHaveBeenCalled();
expect(mockStore.update).toHaveBeenCalledOnce();

// Each pattern must produce a separate sub-collection; commas in pattern strings
// are not supported by the qmd SDK and would result in 0 files being indexed.
const infoCalls = mockCore.info.mock.calls.flat().join("\n");
expect(infoCalls).toContain('collection "gh-aw-0": pattern="**/*.md"');
expect(infoCalls).toContain('collection "gh-aw-1": pattern="**/*.mdx"');
expect(infoCalls).toContain('collection "gh-aw-2": pattern="**/*.txt"');
// The combined comma form must not appear in any collection registration line
expect(infoCalls).not.toContain("**/*.md,**/*.mdx");
expect(infoCalls).not.toContain("**/*.md, **/*.mdx");
});

// ── Checkout collection: single pattern keeps original name ───────────────
it("keeps the original collection name when checkout has exactly one pattern", async () => {
const docsDir = path.join(tmpDir, "docs");
fs.mkdirSync(docsDir);

await runMain({
dbPath: path.join(tmpDir, "index"),
checkouts: [{ name: "docs", path: docsDir, patterns: ["**/*.md"] }],
});

expect(mockCore.setFailed).not.toHaveBeenCalled();
expect(mockStore.update).toHaveBeenCalledOnce();

// Single-pattern checkout: use the plain name "docs", not "docs-0"
const infoCalls = mockCore.info.mock.calls.flat().join("\n");
expect(infoCalls).toContain('collection "docs": pattern="**/*.md"');
expect(infoCalls).not.toContain('"docs-0"');
});

// ── Checkout collection: dotfile patterns work as sub-collections ─────────
it("registers dotfile patterns (e.g. .github/**) as individual collections", async () => {
const wsDir = path.join(tmpDir, "workspace");
const ghDir = path.join(wsDir, ".github", "agents");
fs.mkdirSync(ghDir, { recursive: true });
fs.writeFileSync(path.join(ghDir, "agent.md"), "# Agent");

await runMain({
dbPath: path.join(tmpDir, "index"),
checkouts: [
{
name: "repo",
path: wsDir,
patterns: ["docs/**", ".github/agents/**", ".github/aw/**"],
},
],
});

expect(mockCore.setFailed).not.toHaveBeenCalled();
expect(mockStore.update).toHaveBeenCalledOnce();

const infoCalls = mockCore.info.mock.calls.flat().join("\n");
expect(infoCalls).toContain('collection "repo-0": pattern="docs/**"');
expect(infoCalls).toContain('collection "repo-1": pattern=".github/agents/**"');
expect(infoCalls).toContain('collection "repo-2": pattern=".github/aw/**"');
});

// ── writeSummary: checkouts section ──────────────────────────────────────
it("writes step summary with a collections table for checkouts", async () => {
const docsDir = path.join(tmpDir, "docs");
Expand Down
41 changes: 41 additions & 0 deletions pkg/workflow/qmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,11 @@ package workflow
import (
"encoding/json"
"fmt"
"os"
"strconv"
"strings"

"github.com/github/gh-aw/pkg/console"
"github.com/github/gh-aw/pkg/constants"
"github.com/github/gh-aw/pkg/logger"
)
Expand Down Expand Up @@ -381,6 +383,41 @@ func buildQmdConfig(qmdConfig *QmdToolConfig) qmdBuildConfig {
return cfg
}

// validateQmdPatterns performs compile-time validation of the glob patterns declared
// in each qmd checkout collection and emits warnings to stderr for common mistakes:
//
// - Empty patterns are useless and will not match any files.
// - Patterns containing a comma suggest the user intended comma-separated globs,
// which the qmd SDK does not support; each pattern must be listed separately.
//
// It also logs each checkout and its patterns at debug level so users can verify
// the compiler saw the expected configuration.
func validateQmdPatterns(qmdConfig *QmdToolConfig) {
for _, col := range qmdConfig.Checkouts {
name := col.Name
if name == "" {
name = "docs"
}
// Only validate explicitly-specified patterns; the runtime default ("**/*.md")
// is always valid and does not need compile-time checking.
if len(col.Paths) == 0 {
qmdLog.Printf("checkout %q: no paths specified, runtime will default to **/*.md", name)
continue
}
qmdLog.Printf("checkout %q: %d pattern(s): %v", name, len(col.Paths), col.Paths)
for i, p := range col.Paths {
if p == "" {
msg := fmt.Sprintf("qmd checkout %q: pattern[%d] is empty — it will not match any files; remove it or replace it with a valid glob (e.g. \"**/*.md\")", name, i)
fmt.Fprintln(os.Stderr, console.FormatWarningMessage(msg))
}
if strings.Contains(p, ",") {
msg := fmt.Sprintf("qmd checkout %q: pattern[%d] %q contains a comma — list each glob pattern separately under 'paths' instead of combining them with commas", name, i, p)
fmt.Fprintln(os.Stderr, console.FormatWarningMessage(msg))
}
}
}
}

// generateQmdCollectionCheckoutStep generates a checkout step YAML string for a qmd
// collection that targets a non-default repository. Returns an empty string when the
// collection uses the current repository (no checkout needed).
Expand Down Expand Up @@ -456,6 +493,10 @@ func generateQmdIndexSteps(qmdConfig *QmdToolConfig) []string {
qmdLog.Printf("Generating qmd index steps: checkouts=%d searches=%d cacheKey=%q cacheOnly=%v",
len(qmdConfig.Checkouts), len(qmdConfig.Searches), cacheKey, isCacheOnlyMode)

// Validate patterns at compile time — warn about empty or comma-containing patterns
// that would silently produce an empty index at runtime.
validateQmdPatterns(qmdConfig)

version := string(constants.DefaultQmdVersion)
var steps []string

Expand Down
Loading