Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions actions/setup/js/sanitize_content.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -1663,6 +1663,51 @@ describe("sanitize_content.cjs", () => {
const result = sanitizeContent("@author is allowed", { allowedAliases: ["author"] });
expect(result).toBe("@author is allowed");
});

it("should decode > entity to > to prevent literal > in output", () => {
const result = sanitizeContent("value > threshold");
expect(result).toBe("value > threshold");
});

it("should decode double-encoded > entity to >", () => {
const result = sanitizeContent("value > threshold");
expect(result).toBe("value > threshold");
});

it("should decode &lt; entity to < and then neutralize resulting tags", () => {
const result = sanitizeContent("&lt;script&gt; injection");
// &lt; → < and &gt; → >, then convertXmlTags turns <script> into (script)
expect(result).toBe("(script) injection");
});

it("should decode &amp; entity to &", () => {
const result = sanitizeContent("cats &amp; dogs");
expect(result).toBe("cats & dogs");
});

it("should decode double-encoded &amp;amp; entity to &", () => {
const result = sanitizeContent("cats &amp;amp; dogs");
expect(result).toBe("cats & dogs");
});
Comment on lines +1667 to +1691
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new entity-decoding tests cover &amp;gt; and &amp;amp; but not cases where the entire entity is double-encoded (e.g. &amp;amp;gt;, &amp;amp;lt;...&amp;amp;gt;, &amp;amp;commat;user). Adding assertions for these would both prevent regressions and catch the partial-decoding/ordering issue in decodeHtmlEntities.

Copilot uses AI. Check for mistakes.

it("should be idempotent - applying sanitizeContent twice gives same result for > character", () => {
const input = "value > threshold";
const once = sanitizeContent(input);
const twice = sanitizeContent(once);
expect(once).toBe("value > threshold");
expect(twice).toBe(once);
});

it("should be idempotent - sanitizing &gt; twice should not produce &gt; in output", () => {
// If agent outputs &gt; because it received &gt; in context, sanitizing should decode it
const input = "value &gt; threshold";
const once = sanitizeContent(input);
const twice = sanitizeContent(once);
expect(once).not.toContain("&gt;");
expect(once).toBe("value > threshold");
// Idempotency: a second pass on the decoded result should not re-introduce &gt;
expect(twice).toBe(once);
});
});

describe("template delimiter neutralization (T24)", () => {
Expand Down
15 changes: 13 additions & 2 deletions actions/setup/js/sanitize_content_core.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -643,8 +643,9 @@ function applyTruncation(content, maxLength) {
}

/**
* Decodes HTML entities to prevent bypass of @mention detection.
* Handles named entities (e.g., &commat;), decimal entities (e.g., &#64;),
* Decodes HTML entities to prevent bypass of @mention detection and to ensure
* HTML-encoded characters do not persist in sanitized output (e.g. &gt; in titles).
* Handles named entities (e.g., &commat;, &gt;, &lt;, &amp;), decimal entities (e.g., &#64;),
* and hex entities (e.g., &#x40;), including double-encoded variants (e.g., &amp;commat;).
*
* @param {string} text - Input text that may contain HTML entities
Expand All @@ -661,6 +662,16 @@ function decodeHtmlEntities(text) {
// &commat; and &amp;commat; → @
result = result.replace(/&(?:amp;)?commat;/gi, "@");

// Decode common named HTML entities (including double-encoded variants)
// These prevent HTML-encoded characters from persisting as literal entities
// in sanitized output (e.g. a title containing &gt; instead of >).
// &gt; and &amp;gt; → >
result = result.replace(/&(?:amp;)?gt;/gi, ">");
// &lt; and &amp;lt; → < (convertXmlTags will then neutralise any resulting tags)
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo/inconsistency in comment: the codebase uses American spelling "neutralize" throughout (e.g., function name neutralizeBotTriggers and other comments), but this comment says "neutralise". Consider updating to "neutralize" for consistency.

Suggested change
// &lt; and &amp;lt; → < (convertXmlTags will then neutralise any resulting tags)
// &lt; and &amp;lt; → < (convertXmlTags will then neutralize any resulting tags)

Copilot uses AI. Check for mistakes.
result = result.replace(/&(?:amp;)?lt;/gi, "<");
// &amp; and &amp;amp; → & (decoded after gt/lt so &amp;gt; is already handled above)
result = result.replace(/&(?:amp;)?amp;/gi, "&");

// Decode decimal entities (including double-encoded variants)
// &#64; and &amp;#64; → @
// &#NNN; and &amp;#NNN; → corresponding character
Expand Down
38 changes: 38 additions & 0 deletions actions/setup/js/sanitize_title.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -240,5 +240,43 @@ describe("sanitize_title", () => {
const sanitized = sanitizeTitle(title);
expect(sanitized).toBe("`@user` Testtitle with (redacted)");
});

it("should decode &gt; in title to prevent literal &gt; appearing in issues", () => {
// If an agent outputs a title with &gt; (e.g. because the prompt context contained it),
// the sanitizer must decode it back to > so the issue title is not &gt; in markdown.
expect(sanitizeTitle("value &gt; threshold")).toBe("value > threshold");
});

it("should decode double-encoded &amp;gt; in title to >", () => {
expect(sanitizeTitle("value &amp;gt; threshold")).toBe("value > threshold");
});

it("should decode &lt; in title and neutralize any resulting HTML tags", () => {
// &lt;tag&gt; → <tag> → convertXmlTags → (tag)
expect(sanitizeTitle("&lt;script&gt; injection")).toBe("(script) injection");
});

it("should decode &amp; in title to &", () => {
expect(sanitizeTitle("cats &amp; dogs")).toBe("cats & dogs");
});

Comment on lines +244 to +262
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding title-focused coverage for fully double-encoded entities like &amp;amp;gt; / &amp;amp;lt;...&amp;amp;gt; and &amp;amp;commat;user. These inputs are plausible after multiple encoding passes and can reveal ordering issues in the underlying HTML-entity decoding.

Copilot uses AI. Check for mistakes.
it("should be idempotent - sanitizing a title with > twice gives same result", () => {
const title = "Fix bug: value > 5";
const once = sanitizeTitle(title);
const twice = sanitizeTitle(once);
expect(once).toBe("Fix bug: value > 5");
expect(twice).toBe(once);
});

it("should be idempotent - sanitizing &gt; title twice should not produce &gt;", () => {
// Simulates agent outputting &gt; in title because prompt context had HTML-encoded >
const title = "Fix bug: value &gt; 5";
const once = sanitizeTitle(title);
const twice = sanitizeTitle(once);
expect(once).not.toContain("&gt;");
expect(once).toBe("Fix bug: value > 5");
// Idempotency: a second pass on the decoded result should not re-introduce &gt;
expect(twice).toBe(once);
});
});
});
Loading