From 13ffc4eedd4e807e39492f6caa276d458fda6af4 Mon Sep 17 00:00:00 2001 From: shenxianpeng Date: Tue, 28 Apr 2026 03:15:32 +0300 Subject: [PATCH] Strip HTML tags from search entry titles Sanitize page and section titles in the search index by stripping any HTML tags using the existing _strip_tags utility. This prevents raw HTML from appearing in search results, which is both a UI issue and a potential XSS vector when page titles contain inline HTML from Markdown rendering. Fixes #3560 --- mkdocs/contrib/search/search_index.py | 7 +++++++ mkdocs/tests/search_tests.py | 29 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/mkdocs/contrib/search/search_index.py b/mkdocs/contrib/search/search_index.py index 62d09d87..be24c1f9 100644 --- a/mkdocs/contrib/search/search_index.py +++ b/mkdocs/contrib/search/search_index.py @@ -8,6 +8,8 @@ from html.parser import HTMLParser from typing import TYPE_CHECKING +from mkdocs.utils.rendering import _strip_tags + if TYPE_CHECKING: from mkdocs.structure.pages import Page from mkdocs.structure.toc import AnchorLink, TableOfContents @@ -50,6 +52,11 @@ def _add_entry(self, title: str | None, text: str, loc: str) -> None: text = text.replace("\u00a0", " ") text = re.sub(r"[ \t\n\r\f\v]+", " ", text.strip()) + # Strip HTML tags from the title to prevent raw HTML from appearing + # in search results (which could also be an XSS vector). + if title is not None: + title = _strip_tags(title) + self._entries.append({"title": title, "text": text, "location": loc}) def add_entry_from_context(self, page: Page) -> None: diff --git a/mkdocs/tests/search_tests.py b/mkdocs/tests/search_tests.py index 946e5e9c..7e9986e4 100644 --- a/mkdocs/tests/search_tests.py +++ b/mkdocs/tests/search_tests.py @@ -651,3 +651,32 @@ def test_prebuild_index_node(self, mock_popen): self.assertEqual(mock_popen.call_count, 1) self.assertEqual(mock_popen_obj.communicate.call_count, 1) self.assertEqual(result, expected) + + def test_html_stripped_from_titles(self): + """HTML tags in page and section titles are stripped from search entries.""" + plugin = search.SearchPlugin() + errors, warnings = plugin.load_config({}) + self.assertEqual(errors, []) + self.assertEqual(warnings, []) + + config = load_config(plugins=["search"]) + # A page title with inline HTML from Markdown (e.g. `foo`) + page = Page( + "The mkdocs Project", + File( + "index.md", config.docs_dir, config.site_dir, config.use_directory_urls + ), + config, + ) + page.content = """ +

Heading one

+

Content

""" + page.markdown = "# Heading 1\n\nContent" + page.toc = get_toc(get_markdown_toc(page.markdown)) + + index = search_index.SearchIndex(**plugin.config) + index.add_entry_from_context(page) + + self.assertEqual(len(index._entries), 2) + self.assertEqual(index._entries[0]["title"], "The mkdocs Project") + self.assertEqual(index._entries[1]["title"], "Heading 1")