-
Notifications
You must be signed in to change notification settings - Fork 296
[WIP] Fix double escaping of text in titles #19545
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -643,8 +643,9 @@ function applyTruncation(content, maxLength) { | |||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Decodes HTML entities to prevent bypass of @mention detection. | ||||||
| * Handles named entities (e.g., @), decimal entities (e.g., @), | ||||||
| * Decodes HTML entities to prevent bypass of @mention detection and to ensure | ||||||
| * HTML-encoded characters do not persist in sanitized output (e.g. > in titles). | ||||||
| * Handles named entities (e.g., @, >, <, &), decimal entities (e.g., @), | ||||||
| * and hex entities (e.g., @), including double-encoded variants (e.g., &commat;). | ||||||
| * | ||||||
| * @param {string} text - Input text that may contain HTML entities | ||||||
|
|
@@ -661,6 +662,16 @@ function decodeHtmlEntities(text) { | |||||
| // @ and &commat; → @ | ||||||
| result = result.replace(/&(?:amp;)?commat;/gi, "@"); | ||||||
|
|
||||||
| // Decode common named HTML entities (including double-encoded variants) | ||||||
| // These prevent HTML-encoded characters from persisting as literal entities | ||||||
| // in sanitized output (e.g. a title containing > instead of >). | ||||||
| // > and &gt; → > | ||||||
| result = result.replace(/&(?:amp;)?gt;/gi, ">"); | ||||||
| // < and &lt; → < (convertXmlTags will then neutralise any resulting tags) | ||||||
|
||||||
| // < and &lt; → < (convertXmlTags will then neutralise any resulting tags) | |
| // < and &lt; → < (convertXmlTags will then neutralize any resulting tags) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -240,5 +240,43 @@ describe("sanitize_title", () => { | |
| const sanitized = sanitizeTitle(title); | ||
| expect(sanitized).toBe("`@user` Testtitle with (redacted)"); | ||
| }); | ||
|
|
||
| it("should decode > in title to prevent literal > appearing in issues", () => { | ||
| // If an agent outputs a title with > (e.g. because the prompt context contained it), | ||
| // the sanitizer must decode it back to > so the issue title is not > in markdown. | ||
| expect(sanitizeTitle("value > threshold")).toBe("value > threshold"); | ||
| }); | ||
|
|
||
| it("should decode double-encoded &gt; in title to >", () => { | ||
| expect(sanitizeTitle("value &gt; threshold")).toBe("value > threshold"); | ||
| }); | ||
|
|
||
| it("should decode < in title and neutralize any resulting HTML tags", () => { | ||
| // <tag> → <tag> → convertXmlTags → (tag) | ||
| expect(sanitizeTitle("<script> injection")).toBe("(script) injection"); | ||
| }); | ||
|
|
||
| it("should decode & in title to &", () => { | ||
| expect(sanitizeTitle("cats & dogs")).toBe("cats & dogs"); | ||
| }); | ||
|
|
||
|
Comment on lines
+244
to
+262
|
||
| it("should be idempotent - sanitizing a title with > twice gives same result", () => { | ||
| const title = "Fix bug: value > 5"; | ||
| const once = sanitizeTitle(title); | ||
| const twice = sanitizeTitle(once); | ||
| expect(once).toBe("Fix bug: value > 5"); | ||
| expect(twice).toBe(once); | ||
| }); | ||
|
|
||
| it("should be idempotent - sanitizing > title twice should not produce >", () => { | ||
| // Simulates agent outputting > in title because prompt context had HTML-encoded > | ||
| const title = "Fix bug: value > 5"; | ||
| const once = sanitizeTitle(title); | ||
| const twice = sanitizeTitle(once); | ||
| expect(once).not.toContain(">"); | ||
| expect(once).toBe("Fix bug: value > 5"); | ||
| // Idempotency: a second pass on the decoded result should not re-introduce > | ||
| expect(twice).toBe(once); | ||
| }); | ||
| }); | ||
| }); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new entity-decoding tests cover
&gt;and&amp;but not cases where the entire entity is double-encoded (e.g.&amp;gt;,&amp;lt;...&amp;gt;,&amp;commat;user). Adding assertions for these would both prevent regressions and catch the partial-decoding/ordering issue indecodeHtmlEntities.