From 13ffc4eedd4e807e39492f6caa276d458fda6af4 Mon Sep 17 00:00:00 2001
From: shenxianpeng <xianpeng.shen@gmail.com>
Date: Tue, 28 Apr 2026 03:15:32 +0300
Subject: [PATCH] Strip HTML tags from search entry titles

Sanitize page and section titles in the search index by stripping
any HTML tags using the existing _strip_tags utility. This prevents
raw HTML from appearing in search results, which is both a UI issue
and a potential XSS vector when page titles contain inline HTML
from Markdown rendering.

Fixes #3560
---
 mkdocs/contrib/search/search_index.py |  7 +++++++
 mkdocs/tests/search_tests.py          | 29 +++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
diff --git a/mkdocs/contrib/search/search_index.py b/mkdocs/contrib/search/search_index.py
index 62d09d87..be24c1f9 100644
--- a/mkdocs/contrib/search/search_index.py
+++ b/mkdocs/contrib/search/search_index.py
@@ -8,6 +8,8 @@
 from html.parser import HTMLParser
 from typing import TYPE_CHECKING
 
+from mkdocs.utils.rendering import _strip_tags
+
 if TYPE_CHECKING:
     from mkdocs.structure.pages import Page
     from mkdocs.structure.toc import AnchorLink, TableOfContents
@@ -50,6 +52,11 @@ def _add_entry(self, title: str | None, text: str, loc: str) -> None:
         text = text.replace("\u00a0", " ")
         text = re.sub(r"[ \t\n\r\f\v]+", " ", text.strip())
 
+        # Strip HTML tags from the title to prevent raw HTML from appearing
+        # in search results (which could also be an XSS vector).
+        if title is not None:
+            title = _strip_tags(title)
+
         self._entries.append({"title": title, "text": text, "location": loc})
 
     def add_entry_from_context(self, page: Page) -> None:
diff --git a/mkdocs/tests/search_tests.py b/mkdocs/tests/search_tests.py
index 946e5e9c..7e9986e4 100644
--- a/mkdocs/tests/search_tests.py
+++ b/mkdocs/tests/search_tests.py
@@ -651,3 +651,32 @@ def test_prebuild_index_node(self, mock_popen):
         self.assertEqual(mock_popen.call_count, 1)
         self.assertEqual(mock_popen_obj.communicate.call_count, 1)
         self.assertEqual(result, expected)
+
+    def test_html_stripped_from_titles(self):
+        """HTML tags in page and section titles are stripped from search entries."""
+        plugin = search.SearchPlugin()
+        errors, warnings = plugin.load_config({})
+        self.assertEqual(errors, [])
+        self.assertEqual(warnings, [])
+
+        config = load_config(plugins=["search"])
+        # A page title with inline HTML from Markdown (e.g. `<code>foo</code>`)
+        page = Page(
+            "The <code>mkdocs</code> Project",
+            File(
+                "index.md", config.docs_dir, config.site_dir, config.use_directory_urls
+            ),
+            config,
+        )
+        page.content = """
+            <h1 id="heading-1">Heading <em>one</em></h1>
+            <p>Content</p>"""
+        page.markdown = "# Heading 1\n\nContent"
+        page.toc = get_toc(get_markdown_toc(page.markdown))
+
+        index = search_index.SearchIndex(**plugin.config)
+        index.add_entry_from_context(page)
+
+        self.assertEqual(len(index._entries), 2)
+        self.assertEqual(index._entries[0]["title"], "The mkdocs Project")
+        self.assertEqual(index._entries[1]["title"], "Heading 1")