github · dsyme · Mar 21, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 21, 2026
diff --git a/actions/setup/js/sanitize_content.cjs b/actions/setup/js/sanitize_content.cjs
@@ -19,6 +19,7 @@ const {
   neutralizeGitHubReferences,
   removeXmlComments,
   convertXmlTags,
+  applyToNonCodeRegions,
   neutralizeBotTriggers,
   applyTruncation,
   hardenUnicodeText,
@@ -90,11 +91,11 @@ function sanitizeContent(content, maxLengthOrOptions) {
   // Neutralize @mentions with selective filtering (custom logic for allowed aliases)
   sanitized = neutralizeMentions(sanitized, allowedAliasesLowercase);
 
-  // Remove XML comments
-  sanitized = removeXmlComments(sanitized);
+  // Remove XML comments – skip code blocks and inline code
+  sanitized = applyToNonCodeRegions(sanitized, removeXmlComments);
 
-  // Convert XML tags
-  sanitized = convertXmlTags(sanitized);
+  // Convert XML tags – skip code blocks and inline code
+  sanitized = applyToNonCodeRegions(sanitized, convertXmlTags);
 
   // URI filtering (shared with core)
   sanitized = sanitizeUrlProtocols(sanitized);

diff --git a/actions/setup/js/sanitize_content.test.cjs b/actions/setup/js/sanitize_content.test.cjs
@@ -484,6 +484,81 @@ describe("sanitize_content.cjs", () => {
     });
   });
 
+  describe("XML/HTML tag conversion: code-region awareness", () => {
+    it("should preserve angle brackets inside fenced code blocks (backticks)", () => {
+      const input = "Before\n```\nVBuffer<float32> x;\n```\nAfter";
+      const result = sanitizeContent(input);
+      expect(result).toContain("VBuffer<float32>");
+      expect(result).not.toContain("VBuffer(float32)");
+    });
+
+    it("should preserve angle brackets inside fenced code blocks (tildes)", () => {
+      const input = "Before\n~~~\nfoo<int> bar;\n~~~\nAfter";
+      const result = sanitizeContent(input);
+      expect(result).toContain("foo<int>");
+      expect(result).not.toContain("foo(int)");
+    });
+
+    it("should preserve angle brackets inside inline code spans", () => {
+      const result = sanitizeContent("Use `VBuffer<float32>` for vectors");
+      expect(result).toContain("`VBuffer<float32>`");
+      expect(result).not.toContain("VBuffer(float32)");
+    });
+
+    it("should still convert angle brackets in regular text", () => {
+      const result = sanitizeContent("Watch out for <script>alert(1)</script> here");
+      expect(result).toContain("(script)");
+      expect(result).not.toContain("<script>");
+    });
+
+    it("should handle mixed content: code block with tags and regular text with tags", () => {
+      const input = "Normal: <div>bad</div>\n```\n<div>safe code</div>\n```\nNormal again: <img src=x>";
+      const result = sanitizeContent(input);
+      // Regular text: tags converted
+      expect(result).toContain("(div)bad(/div)");
+      // Code block: tags preserved
+      expect(result).toContain("<div>safe code</div>");
+      // Regular text after block: tags converted
+      expect(result).toContain("(img src=x)");
+    });
+
+    it("should handle a fenced block with a language specifier", () => {
+      const input = "```typescript\nconst arr: Array<string> = [];\n```";
+      const result = sanitizeContent(input);
+      expect(result).toContain("Array<string>");
+      expect(result).not.toContain("Array(string)");
+    });
+
+    it("should preserve XML comments inside fenced code blocks", () => {
+      const input = "```xml\n<!-- comment -->\n<tag>value</tag>\n```";
+      const result = sanitizeContent(input);
+      expect(result).toContain("<!-- comment -->");
+      expect(result).toContain("<tag>value</tag>");
+    });
+
+    it("should still remove XML comments outside code blocks", () => {
+      const result = sanitizeContent("text <!-- remove me --> end");
+      expect(result).not.toContain("<!-- remove me -->");
+      expect(result).toContain("text");
+      expect(result).toContain("end");
+    });
+
+    it("should preserve inline code with multiple backticks", () => {
+      const result = sanitizeContent("Use ``VBuffer<float32>`` inline");
+      expect(result).toContain("``VBuffer<float32>``");
+      expect(result).not.toContain("VBuffer(float32)");
+    });
+
+    it("should handle issue title example: VBuffer<float32>", () => {
+      // Simulates a title where type parameters are in inline code
+      const result = sanitizeContent("Support for `VBuffer<float32>` and `VBuffer<float>`");
+      expect(result).toContain("`VBuffer<float32>`");
+      expect(result).toContain("`VBuffer<float>`");
+      expect(result).not.toContain("VBuffer(float32)");
+      expect(result).not.toContain("VBuffer(float)");
+    });
-    });
+    });
+
+    it("should treat fenced code blocks inside blockquotes as code regions", () => {
+      const markdown = [
+        "> ```yaml",
+        "> apiVersion: v1",
+        "> kind: Pod<V1>",
+        "> ```",
+      ].join("\n");
+      const result = sanitizeContent(markdown);
+      expect(result).toContain("kind: Pod<V1>");
+      expect(result).not.toContain("kind: Pod(V1)");
+    });
+
+    it("should treat fenced code blocks inside list items as code regions", () => {
+      const markdown = [
+        "- ```csharp",
+        "  var list = new List<string>();",
+        "  ```",
+      ].join("\n");
+      const result = sanitizeContent(markdown);
+      expect(result).toContain("List<string>");
+      expect(result).not.toContain("List(string)");
+    });
-    });
+    });
+
+    it("should treat fenced code blocks inside blockquotes as code regions", () => {
+      const markdown = [
+        "> ```yaml",
+        "> apiVersion: v1",
+        "> kind: Pod<V1>",
+        "> ```",
+      ].join("\n");
+      const result = sanitizeContent(markdown);
+      expect(result).toContain("kind: Pod<V1>");
+      expect(result).not.toContain("kind: Pod(V1)");
+    });
+
+    it("should treat fenced code blocks inside list items as code regions", () => {
+      const markdown = [
+        "- ```csharp",
+        "  var list = new List<string>();",
+        "  ```",
+      ].join("\n");
+      const result = sanitizeContent(markdown);
+      expect(result).toContain("List<string>");
+      expect(result).not.toContain("List(string)");
+    });
+  });
+
   describe("ANSI escape sequence removal", () => {
     it("should remove ANSI color codes", () => {
       const result = sanitizeContent("\x1b[31mred text\x1b[0m");

diff --git a/actions/setup/js/sanitize_content_core.cjs b/actions/setup/js/sanitize_content_core.cjs
@@ -314,6 +314,186 @@ function neutralizeAllMentions(s) {
   });
 }
 
+/**
+ * Returns the character ranges [start, end) of fenced code blocks in markdown content.
+ * Fenced code blocks are delimited by lines starting with 3+ backticks or 3+ tildes.
+ * The returned ranges span from the first character of the opening fence line through
+ * the last character of the closing fence line (inclusive of any trailing newline).
+ *
+ * @param {string} s - Markdown content to scan
+ * @returns {Array<[number, number]>} Array of [start, end) character positions
+ */
+function getFencedCodeRanges(s) {
+  /** @type {Array<[number, number]>} */
+  const ranges = [];
+  const lines = s.split("\n");
+  let pos = 0;
+  let inBlock = false;
+  let blockStart = -1;
+  let fenceChar = "";
+  let fenceLen = 0;
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const trimmed = line.trim();
+    // Character position of the end of this line's content (not including the newline separator)
+    const lineContentEnd = pos + line.length;
+    // Character position after the newline separator (or same as lineContentEnd for the last line)
+    const lineEnd = i < lines.length - 1 ? lineContentEnd + 1 : lineContentEnd;
+
+    if (!inBlock) {
+      const m = trimmed.match(/^(`{3,}|~{3,})/);
+      if (m) {
+        inBlock = true;
+        blockStart = pos;
+        fenceChar = m[1][0];
+        fenceLen = m[1].length;
+      }
+    } else {
+      // A closing fence: same character, at least as long, only whitespace after
+      const fc = fenceChar === "`" ? "\\`" : "~";
+      const closingRegex = new RegExp(`^[${fc}]{${fenceLen},}\\s*$`);
+      if (closingRegex.test(trimmed)) {
+        ranges.push([blockStart, lineEnd]);
+        inBlock = false;
+        blockStart = -1;
+        fenceChar = "";
+        fenceLen = 0;
+      }
+    }
+
+    pos = lineEnd;
+  }
+
+  // Unclosed fence – treat the rest as code (safer fallback)
+  if (inBlock && blockStart !== -1) {
+    ranges.push([blockStart, s.length]);
+  }
+
+  return ranges;
+}
+
+/**
+ * Applies a transformation function to a text segment while skipping inline code spans
+ * (backtick-delimited sequences).  The transformation is applied to each run of
+ * non-code text; inline code spans are preserved verbatim.
+ *
+ * @param {string} text - The text to process (should not contain fenced code blocks)
+ * @param {function(string): string} fn - Transformation to apply to non-code portions
+ * @returns {string} The processed text
+ */
+function applyFnOutsideInlineCode(text, fn) {
+  if (!text) return fn(text || "");
+
+  const parts = [];
+  let i = 0;
+  let textStart = 0;
+
+  while (i < text.length) {
+    if (text[i] !== "`") {
+      i++;
+      continue;
+    }
+
+    // Count consecutive backticks at the current position
+    const btStart = i;
+    let btCount = 0;
+    while (i < text.length && text[i] === "`") {
+      btCount++;
+      i++;
+    }
+    // i is now past the opening backtick sequence
+
+    // Look for the matching closing sequence of exactly btCount backticks
+    let closeIdx = -1;
+    let j = i;
+    while (j < text.length) {
+      if (text[j] === "`") {
+        let closeCount = 0;
+        const jStart = j;
+        while (j < text.length && text[j] === "`") {
+          closeCount++;
+          j++;
+        }
+        if (closeCount === btCount) {
+          closeIdx = jStart;
+          break;
+        }
+        // Different length – keep scanning (j already advanced past these backticks)
+      } else {
+        j++;
+      }
+    }
+
+    if (closeIdx !== -1) {
+      // Valid inline code span found: apply fn to the text before it, then keep the code span
+      if (textStart < btStart) {
+        parts.push(fn(text.slice(textStart, btStart)));
+      }
+      parts.push(text.slice(btStart, closeIdx + btCount));
+      textStart = closeIdx + btCount;
+      i = textStart;
+    }
+    // If no matching close was found, the backticks are treated as regular text (i already advanced)
-  while (i < text.length) {
-    if (text[i] !== "`") {
-      i++;
-      continue;
-    }
-
-    // Count consecutive backticks at the current position
-    const btStart = i;
-    let btCount = 0;
-    while (i < text.length && text[i] === "`") {
-      btCount++;
-      i++;
-    }
-    // i is now past the opening backtick sequence
-
-    // Look for the matching closing sequence of exactly btCount backticks
-    let closeIdx = -1;
-    let j = i;
-    while (j < text.length) {
-      if (text[j] === "`") {
-        let closeCount = 0;
-        const jStart = j;
-        while (j < text.length && text[j] === "`") {
-          closeCount++;
-          j++;
-        }
-        if (closeCount === btCount) {
-          closeIdx = jStart;
-          break;
-        }
-        // Different length – keep scanning (j already advanced past these backticks)
-      } else {
-        j++;
-      }
-    }
-
-    if (closeIdx !== -1) {
-      // Valid inline code span found: apply fn to the text before it, then keep the code span
-      if (textStart < btStart) {
-        parts.push(fn(text.slice(textStart, btStart)));
-      }
-      parts.push(text.slice(btStart, closeIdx + btCount));
-      textStart = closeIdx + btCount;
-      i = textStart;
-    }
-    // If no matching close was found, the backticks are treated as regular text (i already advanced)
+  // To avoid O(n^2) behavior when scanning for matching backtick runs,
+  // we pre-scan the string once to find all backtick runs, then pair them
+  // by length in a single pass. This preserves the original semantics:
+  // for a given run length, the first occurrence opens a span, the next
+  // closes it, and so on, ignoring intervening runs of different lengths.
+
+  /** @type {{ start: number, length: number }[]} */
+  const runs = [];
+  for (let idx = 0; idx < text.length;) {
+    if (text[idx] !== "`") {
+      idx++;
+      continue;
+    }
+    const start = idx;
+    let count = 0;
+    while (idx < text.length && text[idx] === "`") {
+      count++;
+      idx++;
+    }
+    runs.push({ start, length: count });
+  }
+
+  /**
+   * Map from backtick run length to the start index of the currently
+   * pending opener (if any) for that length.
+   * @type {Record<string, number | undefined>}
+   */
+  const pending = Object.create(null);
+
+  /** @type {{ openStart: number, openLen: number, closeStart: number }[]} */
+  const spans = [];
+  for (const run of runs) {
+    const lenKey = String(run.length);
+    const prev = pending[lenKey];
+    if (prev === undefined) {
+      // No pending opener for this length: mark this run as an opener.
+      pending[lenKey] = run.start;
+    } else {
+      // Found a closer for the existing opener of this length.
+      spans.push({
+        openStart: prev,
+        openLen: run.length,
+        closeStart: run.start,
+      });
+      pending[lenKey] = undefined;
+    }
+  }
+
+  // spans are generated in the order in which their closing runs appear.
+  // Since each openStart is always before its closeStart and runs are
+  // iterated in increasing order of start, spans are effectively ordered
+  // by appearance in the text and do not overlap for a given length.
+  // We now walk through the spans, emitting processed text and raw code.
+  for (const span of spans) {
+    const btStart = span.openStart;
+    const btCount = span.openLen;
+    const closeIdx = span.closeStart;
+
+    // Skip spans that start before the current textStart (e.g., if any
+    // earlier spans have already advanced textStart past them).
+    if (btStart < textStart) {
+      continue;
+    }
+
+    // Apply fn to non-code text before this code span.
+    if (textStart < btStart) {
+      parts.push(fn(text.slice(textStart, btStart)));
+    }
+
+    // Preserve the code span (including backticks) unchanged.
+    parts.push(text.slice(btStart, closeIdx + btCount));
+    textStart = closeIdx + btCount;
-  while (i < text.length) {
-    if (text[i] !== "`") {
-      i++;
-      continue;
-    }
-
-    // Count consecutive backticks at the current position
-    const btStart = i;
-    let btCount = 0;
-    while (i < text.length && text[i] === "`") {
-      btCount++;
-      i++;
-    }
-    // i is now past the opening backtick sequence
-
-    // Look for the matching closing sequence of exactly btCount backticks
-    let closeIdx = -1;
-    let j = i;
-    while (j < text.length) {
-      if (text[j] === "`") {
-        let closeCount = 0;
-        const jStart = j;
-        while (j < text.length && text[j] === "`") {
-          closeCount++;
-          j++;
-        }
-        if (closeCount === btCount) {
-          closeIdx = jStart;
-          break;
-        }
-        // Different length – keep scanning (j already advanced past these backticks)
-      } else {
-        j++;
-      }
-    }
-
-    if (closeIdx !== -1) {
-      // Valid inline code span found: apply fn to the text before it, then keep the code span
-      if (textStart < btStart) {
-        parts.push(fn(text.slice(textStart, btStart)));
-      }
-      parts.push(text.slice(btStart, closeIdx + btCount));
-      textStart = closeIdx + btCount;
-      i = textStart;
-    }
-    // If no matching close was found, the backticks are treated as regular text (i already advanced)
+  // To avoid O(n^2) behavior when scanning for matching backtick runs,
+  // we pre-scan the string once to find all backtick runs, then pair them
+  // by length in a single pass. This preserves the original semantics:
+  // for a given run length, the first occurrence opens a span, the next
+  // closes it, and so on, ignoring intervening runs of different lengths.
+
+  /** @type {{ start: number, length: number }[]} */
+  const runs = [];
+  for (let idx = 0; idx < text.length;) {
+    if (text[idx] !== "`") {
+      idx++;
+      continue;
+    }
+    const start = idx;
+    let count = 0;
+    while (idx < text.length && text[idx] === "`") {
+      count++;
+      idx++;
+    }
+    runs.push({ start, length: count });
+  }
+
+  /**
+   * Map from backtick run length to the start index of the currently
+   * pending opener (if any) for that length.
+   * @type {Record<string, number | undefined>}
+   */
+  const pending = Object.create(null);
+
+  /** @type {{ openStart: number, openLen: number, closeStart: number }[]} */
+  const spans = [];
+  for (const run of runs) {
+    const lenKey = String(run.length);
+    const prev = pending[lenKey];
+    if (prev === undefined) {
+      // No pending opener for this length: mark this run as an opener.
+      pending[lenKey] = run.start;
+    } else {
+      // Found a closer for the existing opener of this length.
+      spans.push({
+        openStart: prev,
+        openLen: run.length,
+        closeStart: run.start,
+      });
+      pending[lenKey] = undefined;
+    }
+  }
+
+  // spans are generated in the order in which their closing runs appear.
+  // Since each openStart is always before its closeStart and runs are
+  // iterated in increasing order of start, spans are effectively ordered
+  // by appearance in the text and do not overlap for a given length.
+  // We now walk through the spans, emitting processed text and raw code.
+  for (const span of spans) {
+    const btStart = span.openStart;
+    const btCount = span.openLen;
+    const closeIdx = span.closeStart;
+
+    // Skip spans that start before the current textStart (e.g., if any
+    // earlier spans have already advanced textStart past them).
+    if (btStart < textStart) {
+      continue;
+    }
+
+    // Apply fn to non-code text before this code span.
+    if (textStart < btStart) {
+      parts.push(fn(text.slice(textStart, btStart)));
+    }
+
+    // Preserve the code span (including backticks) unchanged.
+    parts.push(text.slice(btStart, closeIdx + btCount));
+    textStart = closeIdx + btCount;
+  }
+
+  // Apply fn to any remaining non-code text
+  if (textStart < text.length) {
+    parts.push(fn(text.slice(textStart)));
+  }
+
+  return parts.join("");
+}
+
+/**
+ * Applies a transformation function only to the non-code regions of markdown content.
+ * Skips both fenced code blocks (``` / ~~~ delimited) and inline code spans (backtick
+ * delimited) so that the transformation is not applied to code content.
+ *
+ * Falls back to applying fn to the entire string if any parsing error occurs.
+ *
+ * @param {string} s - Markdown content to process
+ * @param {function(string): string} fn - Transformation to apply outside code regions
+ * @returns {string} The content with the transformation applied only outside code regions
+ */
+function applyToNonCodeRegions(s, fn) {
+  if (!s || typeof s !== "string") {
+    return s || "";
+  }
+
+  try {
+    const codeRanges = getFencedCodeRanges(s);
+
+    if (codeRanges.length === 0) {
+      // No fenced code blocks – still protect inline code spans
+      return applyFnOutsideInlineCode(s, fn);
+    }
+
+    const parts = [];
+    let pos = 0;
+
+    for (const [start, end] of codeRanges) {
+      if (pos < start) {
+        // Non-code text before this code block: protect inline code spans
+        parts.push(applyFnOutsideInlineCode(s.slice(pos, start), fn));
+      }
+      // Fenced code block: preserve verbatim
+      parts.push(s.slice(start, end));
+      pos = end;
+    }
+
+    // Non-code text after the last code block
+    if (pos < s.length) {
+      parts.push(applyFnOutsideInlineCode(s.slice(pos), fn));
+    }
+
+    return parts.join("");
+  } catch (_e) {
+    // Fallback: apply fn to the entire string (conservative – redacts more, never less)
+    return fn(s);
+  }
+}
+
 /**
  * Removes XML comments from content
  * @param {string} s - The string to process
@@ -783,11 +963,12 @@ function sanitizeContentCore(content, maxLength, maxBotMentions) {
   // Neutralize ALL @mentions (no filtering in core version)
   sanitized = neutralizeAllMentions(sanitized);
 
-  // Remove XML comments first
-  sanitized = removeXmlComments(sanitized);
+  // Remove XML comments – skip code blocks and inline code to avoid altering code content
+  sanitized = applyToNonCodeRegions(sanitized, removeXmlComments);
 
-  // Convert XML tags to parentheses format to prevent injection
-  sanitized = convertXmlTags(sanitized);
+  // Convert XML tags to parentheses format – skip code blocks and inline code so that
+  // type parameters (e.g. VBuffer<float32>) and code containing angle brackets are preserved
+  sanitized = applyToNonCodeRegions(sanitized, convertXmlTags);
 
   // URI filtering - replace non-https protocols with "(redacted)"
   sanitized = sanitizeUrlProtocols(sanitized);
@@ -834,6 +1015,7 @@ module.exports = {
   neutralizeGitHubReferences,
   removeXmlComments,
   convertXmlTags,
+  applyToNonCodeRegions,
   neutralizeBotTriggers,
   MAX_BOT_TRIGGER_REFERENCES,
   neutralizeTemplateDelimiters,