From 639a28df8d7f4b7a895899cec497eb177af57000 Mon Sep 17 00:00:00 2001
From: shenxianpeng <xianpeng.shen@gmail.com>
Date: Mon, 27 Apr 2026 19:12:09 +0300
Subject: [PATCH] fix: handle AssertionError from html.parser on edge-case
 markup

Python 3.13's html.parser throws AssertionError when encountering certain
edge-case markup like `<<>>` in the content. This occurs in the
_RawHTMLPreprocessor which feeds raw markdown into HTMLParser to extract
anchor IDs.

Wrap the parser.feed() call in a try/except to catch AssertionError and
RuntimeError, allowing the build to continue gracefully.

Fixes #4001
---
 mkdocs/structure/pages.py            | 11 +++++++++--
 mkdocs/tests/structure/page_tests.py | 12 ++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/mkdocs/structure/pages.py b/mkdocs/structure/pages.py
index 8fd47dd7..74fe872a 100644
--- a/mkdocs/structure/pages.py
+++ b/mkdocs/structure/pages.py
@@ -572,8 +572,15 @@ def __init__(self) -> None:
 
     def run(self, lines: list[str]) -> list[str]:
         parser = _HTMLHandler()
-        parser.feed("\n".join(lines))
-        parser.close()
+        try:
+            parser.feed("\n".join(lines))
+            parser.close()
+        except (AssertionError, RuntimeError):
+            # Python's html.parser can throw AssertionError on edge-case
+            # markup such as "<<>>" (Python 3.13+ regression).
+            # RuntimeError is raised when the parser encounters deeply
+            # nested or otherwise problematic input.
+            pass
         self.present_anchor_ids = parser.present_anchor_ids
         return lines
 
diff --git a/mkdocs/tests/structure/page_tests.py b/mkdocs/tests/structure/page_tests.py
index 9754cde9..53af9c75 100644
--- a/mkdocs/tests/structure/page_tests.py
+++ b/mkdocs/tests/structure/page_tests.py
@@ -401,6 +401,18 @@ def test_page_title_from_markdown_html_entity(self):
     def test_page_title_from_markdown_strip_raw_html(self):
         self._test_extract_title("""# Hello <b>world</b>""", expected="Hello world")
 
+    def test_raw_html_preprocessor_edge_case_markup(self):
+        # Regression test for https://github.com/mkdocs/mkdocs/issues/4001
+        # Python 3.13's html.parser throws AssertionError on "<<>>" markup.
+        from mkdocs.structure.pages import _RawHTMLPreprocessor as RHP
+
+        proc = RHP()
+        # The preprocessor should handle edge-case markup without raising.
+        lines = ["# Title", "", "The PDF object as an `obj<</>>endobj` text block."]
+        result = proc.run(lines)
+        self.assertEqual(result, lines)
+        self.assertEqual(proc.present_anchor_ids, set())
+
     def test_page_title_from_markdown_strip_comments(self):
         self._test_extract_title(
             """# foo <!-- comment with <em> --> bar""", expected="foo bar"