Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions actions/setup/js/sanitize_content.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,88 @@ describe("sanitize_content.cjs", () => {
});
});

describe("XML/HTML tag conversion: dangerous attribute stripping", () => {
it("should strip ontoggle event handler from details tag", () => {
const input = '<details ontoggle="alert(document.cookie)">content</details>';
const result = sanitizeContent(input);
expect(result).toBe("<details>content</details>");
});

it("should strip style attribute enabling CSS overlay attacks from span tag", () => {
const input = '<span style="position:fixed;top:0;left:0;width:100%;height:100%">overlay</span>';
const result = sanitizeContent(input);
expect(result).toBe("<span>overlay</span>");
});

it("should strip onclick event handler from allowed tags", () => {
const result = sanitizeContent('<p onclick="stealData()">text</p>');
expect(result).toBe("<p>text</p>");
});

it("should strip onerror event handler from allowed tags", () => {
const result = sanitizeContent('<strong onerror="bad()">text</strong>');
expect(result).toBe("<strong>text</strong>");
});

it("should strip style attribute with single-quoted value", () => {
const result = sanitizeContent("<span style='color:red'>text</span>");
expect(result).toBe("<span>text</span>");
});

it("should strip style attribute with unquoted value", () => {
const result = sanitizeContent("<span style=color:red>text</span>");
expect(result).toBe("<span>text</span>");
});

it("should strip style attribute with unquoted value (simple, no special chars)", () => {
const result = sanitizeContent("<span style=red>text</span>");
expect(result).toBe("<span>text</span>");
});

it("should strip on* attributes case-insensitively (uppercase)", () => {
const result = sanitizeContent('<span ONCLICK="bad()">text</span>');
expect(result).toBe("<span>text</span>");
});

it("should strip multiple dangerous attributes from a single tag", () => {
const result = sanitizeContent('<span onclick="bad()" style="position:fixed" title="ok">text</span>');
expect(result).toBe('<span title="ok">text</span>');
});
Comment on lines +530 to +533
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests cover whitespace-separated attributes and tag-name/attribute concatenation, but they don't cover two important edge cases for this stripping approach: (1) no whitespace between attributes after a quoted value (e.g. <span title="ok"onclick="bad()">) and (2) safe attribute values containing substrings like " onclick"/" style" (to ensure the stripper doesn’t mutate values). Adding regression tests for these will help prevent bypasses and unintended content changes.

Copilot uses AI. Check for mistakes.

it("should preserve safe attributes (title, class, open) while stripping dangerous ones", () => {
const result = sanitizeContent('<details open onclick="bad()">content</details>');
expect(result).toBe("<details open>content</details>");
});

it("should preserve span title attribute after stripping style", () => {
const result = sanitizeContent('<span title="safe" style="evil">text</span>');
expect(result).toBe('<span title="safe">text</span>');
});

it("should preserve closing tags of allowed elements unchanged", () => {
// Closing tags do not carry attributes in HTML; verify they pass through unmodified
const result = sanitizeContent("<span>text</span>");
expect(result).toBe("<span>text</span>");
});

it("should strip on* attribute with extra whitespace around equals sign", () => {
const result = sanitizeContent('<span onclick = "bad()">text</span>');
expect(result).toBe("<span>text</span>");
});

it("should treat concatenated tag+attribute name as a single unknown tag (not strip attributes)", () => {
// <spanonclick="bad()"> is not a valid <span> tag — the tag name is "spanonclick"
// which is not in the allowlist, so it gets converted to parentheses entirely
const result = sanitizeContent('<spanonclick="bad()">text</spanonclick>');
expect(result).toBe('(spanonclick="bad()")text(/spanonclick)');
});

it("should not affect disallowed tags (still converted to parentheses with attributes)", () => {
const result = sanitizeContent('<div onclick="bad()">content</div>');
expect(result).toBe('(div onclick="bad()")content(/div)');
});
});

describe("XML/HTML tag conversion: code-region awareness", () => {
it("should preserve angle brackets inside fenced code blocks (backticks)", () => {
const input = "Before\n```\nVBuffer<float32> x;\n```\nAfter";
Expand Down
28 changes: 26 additions & 2 deletions actions/setup/js/sanitize_content_core.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -568,17 +568,41 @@ function convertXmlTags(s) {
return `(![CDATA[${convertedContent}]])`;
});

/**
* Strips dangerous HTML attributes from an allowed tag's content string.
* Removes on* event handler attributes (e.g. onclick, ontoggle) and style
* attributes in all quoting forms (double-quoted, single-quoted, unquoted, bare).
* Safe attributes such as title, class, open, lang, id, etc. are preserved.
*
* Note: `\s+` (requiring at least one whitespace before the attribute name) is
* intentional — HTML attributes are always separated from the tag name and from
* each other by at least one whitespace character. Using `\s*` would risk false
* matches inside tag names (e.g. matching "ong" inside "strong").
*
* @param {string} tagContent - Tag content without surrounding angle brackets
* @returns {string} Tag content with dangerous attributes removed
*/
function stripDangerousAttributes(tagContent) {
// Match: one-or-more whitespace + (on* | style) + optional =value
// Value forms: "...", '...', or unquoted (no whitespace / > / quote chars), or bare (no =)
// The unquoted form excludes >, whitespace, and all quote characters (', ", `) so it
// cannot consume the closing > of the tag or straddle other attribute values.
return tagContent.replace(/\s+(?:on\w+|style)(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s>"'`]*))?/gi, "");
Comment on lines +577 to +590
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Relying on \s+ before on*/style assumes attributes are always whitespace-separated, but browsers commonly treat <tag a="1"onclick="..."> (no whitespace between attributes) as having an onclick attribute. This means dangerous attributes may still survive on allowlisted tags. Update the stripping logic to handle attribute boundaries even when whitespace is missing (ideally via a small HTML-attribute tokenizer), and add a regression test for the no-whitespace case.

This issue also appears on line 585 of the same file.

Suggested change
* Note: `\s+` (requiring at least one whitespace before the attribute name) is
* intentional HTML attributes are always separated from the tag name and from
* each other by at least one whitespace character. Using `\s*` would risk false
* matches inside tag names (e.g. matching "ong" inside "strong").
*
* @param {string} tagContent - Tag content without surrounding angle brackets
* @returns {string} Tag content with dangerous attributes removed
*/
function stripDangerousAttributes(tagContent) {
// Match: one-or-more whitespace + (on* | style) + optional =value
// Value forms: "...", '...', or unquoted (no whitespace / > / quote chars), or bare (no =)
// The unquoted form excludes >, whitespace, and all quote characters (', ", `) so it
// cannot consume the closing > of the tag or straddle other attribute values.
return tagContent.replace(/\s+(?:on\w+|style)(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s>"'`]*))?/gi, "");
* This function matches dangerous attributes at HTML attribute boundaries:
* the start of the tag content or after whitespace, quotes, "/", or ">".
* This correctly handles cases like a="1"onclick="..." where there is no
* whitespace between attributes, while avoiding matches inside tag names
* such as "strong".
*
* @param {string} tagContent - Tag content without surrounding angle brackets
* @returns {string} Tag content with dangerous attributes removed
*/
function stripDangerousAttributes(tagContent) {
// Match: attribute boundary + (on* | style) + optional =value
// Boundary: start-of-string or one of whitespace, quotes, "/", or ">"
// Value forms: "...", '...', or unquoted (no whitespace / > / quote chars), or bare (no =)
return tagContent.replace(
/(^|[\s"'`\/>])(?:on\w+|style)(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s>"'`]*))?/gi,
"$1"
);

Copilot uses AI. Check for mistakes.
}

// Convert opening tags: <tag> or <tag attr="value"> to (tag) or (tag attr="value")
// Convert closing tags: </tag> to (/tag)
// Convert self-closing tags: <tag/> or <tag /> to (tag/) or (tag /)
// But preserve allowed safe tags
// But preserve allowed safe tags (with dangerous attributes stripped)
return s.replace(/<(\/?[A-Za-z!][^>]*?)>/g, (match, tagContent) => {
// Extract tag name from the content (handle closing tags and attributes)
const tagNameMatch = tagContent.match(/^\/?\s*([A-Za-z][A-Za-z0-9]*)/);
if (tagNameMatch) {
const tagName = tagNameMatch[1].toLowerCase();
if (allowedTags.includes(tagName)) {
return match; // Preserve allowed tags
// Strip dangerous attributes (on* event handlers and style) before preserving
const sanitizedContent = stripDangerousAttributes(tagContent);
return `<${sanitizedContent}>`;
}
}
return `(${tagContent})`; // Convert other tags to parentheses
Expand Down
Loading