From 639a28df8d7f4b7a895899cec497eb177af57000 Mon Sep 17 00:00:00 2001 From: shenxianpeng Date: Mon, 27 Apr 2026 19:12:09 +0300 Subject: [PATCH] fix: handle AssertionError from html.parser on edge-case markup Python 3.13's html.parser throws AssertionError when encountering certain edge-case markup like `<<>>` in the content. This occurs in the _RawHTMLPreprocessor which feeds raw markdown into HTMLParser to extract anchor IDs. Wrap the parser.feed() call in a try/except to catch AssertionError and RuntimeError, allowing the build to continue gracefully. Fixes #4001 --- mkdocs/structure/pages.py | 11 +++++++++-- mkdocs/tests/structure/page_tests.py | 12 ++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/mkdocs/structure/pages.py b/mkdocs/structure/pages.py index 8fd47dd7..74fe872a 100644 --- a/mkdocs/structure/pages.py +++ b/mkdocs/structure/pages.py @@ -572,8 +572,15 @@ def __init__(self) -> None: def run(self, lines: list[str]) -> list[str]: parser = _HTMLHandler() - parser.feed("\n".join(lines)) - parser.close() + try: + parser.feed("\n".join(lines)) + parser.close() + except (AssertionError, RuntimeError): + # Python's html.parser can throw AssertionError on edge-case + # markup such as "<<>>" (Python 3.13+ regression). + # RuntimeError is raised when the parser encounters deeply + # nested or otherwise problematic input. + pass self.present_anchor_ids = parser.present_anchor_ids return lines diff --git a/mkdocs/tests/structure/page_tests.py b/mkdocs/tests/structure/page_tests.py index 9754cde9..53af9c75 100644 --- a/mkdocs/tests/structure/page_tests.py +++ b/mkdocs/tests/structure/page_tests.py @@ -401,6 +401,18 @@ def test_page_title_from_markdown_html_entity(self): def test_page_title_from_markdown_strip_raw_html(self): self._test_extract_title("""# Hello world""", expected="Hello world") + def test_raw_html_preprocessor_edge_case_markup(self): + # Regression test for https://github.com/mkdocs/mkdocs/issues/4001 + # Python 3.13's html.parser throws AssertionError on "<<>>" markup. + from mkdocs.structure.pages import _RawHTMLPreprocessor as RHP + + proc = RHP() + # The preprocessor should handle edge-case markup without raising. + lines = ["# Title", "", "The PDF object as an `obj<>endobj` text block."] + result = proc.run(lines) + self.assertEqual(result, lines) + self.assertEqual(proc.present_anchor_ids, set()) + def test_page_title_from_markdown_strip_comments(self): self._test_extract_title( """# foo bar""", expected="foo bar"