diff --git a/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts b/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts index 929058980f75..58ee24c26edd 100644 --- a/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts +++ b/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts @@ -9,19 +9,18 @@ import u from 'unist-builder'; import {removePosition} from 'unist-util-remove-position'; -import {toString} from 'mdast-util-to-string'; import {visit} from 'unist-util-visit'; import {escapeMarkdownHeadingIds} from '@docusaurus/utils'; import plugin from '../index'; import type {PluginOptions} from '../index'; import type {Plugin} from 'unified'; import type {Parent} from 'unist'; -import type {Root} from 'mdast'; +import type {Heading, Root} from 'mdast'; async function process( input: string, plugins: Plugin[] = [], - options: PluginOptions = {anchorsMaintainCase: false}, + options: Partial = {anchorsMaintainCase: false}, format: 'md' | 'mdx' = 'mdx', ): Promise { const {remark} = await import('remark'); @@ -46,11 +45,11 @@ async function process( return result as unknown as Root; } -function heading(label: string | null, id: string) { +function h(text: string | null, depth: number, id: string) { return u( 'heading', - {depth: 2, data: {id, hProperties: {id}}}, - label ? [u('text', label)] : [], + {depth, data: {id, hProperties: {id}}}, + text ? [u('text', text)] : [], ); } @@ -58,11 +57,7 @@ describe('headings remark plugin', () => { it('patches `id`s and `data.hProperties.id', async () => { const result = await process('# Normal\n\n## Table of Contents\n\n# Baz\n'); const expected = u('root', [ - u( - 'heading', - {depth: 1, data: {hProperties: {id: 'normal'}, id: 'normal'}}, - [u('text', 'Normal')], - ), + h('Normal', 1, 'normal'), u( 'heading', { @@ -133,9 +128,13 @@ describe('headings remark plugin', () => { '## Something also', ].join('\n\n'), [ - () => (root) => { - (root as Parent).children[1]!.data = {hProperties: {id: 'here'}}; - (root as Parent).children[3]!.data = {hProperties: {id: 'something'}}; + function customIdPlugin() { + return (root) => { + (root as Parent).children[1]!.data = {hProperties: {id: 'here'}}; + (root as Parent).children[3]!.data = { + hProperties: {id: 'something'}, + }; + }; }, ], ); @@ -216,6 +215,15 @@ describe('headings remark plugin', () => { '', ].join('\n'), ); + + function heading(label: string | null, id: string) { + return u( + 'heading', + {depth: 2, data: {id, hProperties: {id}}}, + label ? [u('text', label)] : [], + ); + } + const expected = u('root', [ heading('I ♥ unicode', 'i--unicode'), heading('Dash-dash', 'dash-dash'), @@ -278,23 +286,26 @@ describe('headings remark plugin', () => { expect(result).toEqual(expected); }); - describe('creates custom headings ids', () => { - async function headingIdFor(input: string, format: 'md' | 'mdx' = 'mdx') { - const result = await process( - input, - [], - {anchorsMaintainCase: false}, - format, - ); - const headers: {text: string; id: string}[] = []; + describe('headings ids', () => { + async function processHeading( + input: string, + format: 'md' | 'mdx' = 'mdx', + ): Promise { + const result = await process(input, [], {}, format); + const headings: Heading[] = []; visit(result, 'heading', (node) => { - headers.push({ - text: toString(node), - id: (node.data! as {id: string}).id, - }); + headings.push(node); }); - expect(headers).toHaveLength(1); - return headers[0]!.id; + expect(headings).toHaveLength(1); + return headings[0]!; + } + + async function headingIdFor( + input: string, + format: 'md' | 'mdx' = 'mdx', + ): Promise { + const {data} = await processHeading(input, format); + return (data! as {id: string}).id; } describe('historical syntax', () => { @@ -347,6 +358,181 @@ describe('headings remark plugin', () => { await testHeadingIds('mdx'); }); }); + + describe('comment syntax', () => { + describe('works for format CommonMark', () => { + it('extracts id from HTML comment with # prefix at end of heading', async () => { + await expect( + headingIdFor('# Heading One ', 'md'), + ).resolves.toEqual('custom_h1'); + + await expect( + headingIdFor('## Heading Two ', 'md'), + ).resolves.toEqual('custom-heading-two'); + + await expect( + headingIdFor('# Snake-cased ', 'md'), + ).resolves.toEqual('this_is_custom_id'); + }); + + it('extracts id when comment is the only heading content', async () => { + await expect( + headingIdFor('# ', 'md'), + ).resolves.toEqual('id-only'); + }); + + it('extracts id when heading has inline markup before comment', async () => { + await expect( + headingIdFor('# With *Bold* ', 'md'), + ).resolves.toEqual('custom-with-bold'); + }); + + it('does NOT extract id when HTML comment is not the last node', async () => { + await expect( + headingIdFor('# some text', 'md'), + ).resolves.not.toEqual('custom-id'); + }); + + it('does NOT extract id when HTML comment has no # prefix', async () => { + const id = await headingIdFor('# Heading ', 'md'); + expect(id).not.toEqual('my-id'); + expect(id).toMatchInlineSnapshot(`"heading-"`); + }); + + it('does NOT extract id when HTML comment is just #', async () => { + const id = await headingIdFor('## Heading ', 'md'); + expect(id).not.toEqual(''); + expect(id).toMatchInlineSnapshot(`"heading-"`); + }); + + it('extracts id when MDX comment has spaces', async () => { + const id = await headingIdFor( + '## Heading ', + 'md', + ); + expect(id).toEqual('id1'); + }); + + it('removes the comment node from heading AST', async () => { + const heading = await processHeading( + '## Heading ', + 'md', + ); + expect(heading).toEqual(h('Heading', 2, 'my-id')); + }); + + it('removes the comment node when it is the only heading content', async () => { + const heading = await processHeading('## ', 'md'); + expect(heading).toEqual(h(null, 2, 'id-only')); + }); + + it('does NOT support MDX comment syntax {/* #id */} in CommonMark', async () => { + // In CommonMark (no remark-mdx), {/* #id */} is regular text + const id = await headingIdFor('# Heading {/* #my-id */}', 'md'); + expect(id).not.toEqual('my-id'); + }); + }); + + describe('works for format MDX', () => { + it('extracts id from MDX comment with # prefix at end of heading', async () => { + await expect( + headingIdFor('# Heading One {/* #custom_h1 */}', 'mdx'), + ).resolves.toEqual('custom_h1'); + + await expect( + headingIdFor('## Heading Two {/* #custom-heading-two */}', 'mdx'), + ).resolves.toEqual('custom-heading-two'); + + await expect( + headingIdFor('# Snake-cased {/* #this_is_custom_id */}', 'mdx'), + ).resolves.toEqual('this_is_custom_id'); + }); + + it('extracts id when comment is the only heading content', async () => { + await expect( + headingIdFor('# {/* #id-only */}', 'mdx'), + ).resolves.toEqual('id-only'); + }); + + it('extracts id when heading has inline markup before comment', async () => { + await expect( + headingIdFor('# With *Bold* {/* #custom-with-bold */}', 'mdx'), + ).resolves.toEqual('custom-with-bold'); + }); + + it('does NOT extract id when MDX comment is not the last node', async () => { + const id = await headingIdFor( + '# {/* #custom-id */} some text', + 'mdx', + ); + expect(id).not.toEqual('custom-id'); + expect(id).toMatchInlineSnapshot(`"-custom-id--some-text"`); + }); + + it('does NOT extract id when MDX comment is not the only part of the expression', async () => { + const id = await headingIdFor( + '# some text {someExpression /* #custom-id */}', + 'mdx', + ); + expect(id).not.toEqual('custom-id'); + expect(id).toMatchInlineSnapshot( + `"some-text-someexpression--custom-id-"`, + ); + }); + + it('does NOT extract id when MDX expression has multiple comments', async () => { + const id = await headingIdFor( + '# some text {/* #id1 *//* #id2 */}', + 'mdx', + ); + expect(id).not.toEqual('id1'); + expect(id).not.toEqual('id2'); + expect(id).toMatchInlineSnapshot(`"some-text--id1--id2-"`); + }); + + it('does NOT extract id when MDX comment has no # prefix', async () => { + const id = await headingIdFor('## Heading {/* my-id */}', 'mdx'); + expect(id).not.toEqual('my-id'); + expect(id).toMatchInlineSnapshot(`"heading--my-id-"`); + }); + + it('does NOT extract id when MDX comment is just #', async () => { + const id = await headingIdFor('## Heading {/* # */}', 'mdx'); + expect(id).not.toEqual(''); + expect(id).toMatchInlineSnapshot(`"heading---"`); + }); + + it('extracts id when MDX comment has spaces', async () => { + const id = await headingIdFor( + '## Heading {/* #id1 whatever comment #id2 */}', + 'mdx', + ); + expect(id).toEqual('id1'); + }); + + it('removes the comment node from heading AST', async () => { + const heading = await processHeading( + '## Heading {/* #my-id */}', + 'mdx', + ); + expect(heading).toEqual(h('Heading', 2, 'my-id')); + }); + + it('removes the comment node when it is the only heading content', async () => { + const heading = await processHeading('## {/* #id-only */}', 'mdx'); + expect(heading).toEqual(h(null, 2, 'id-only')); + }); + + it('does NOT support HTML comment syntax in MDX', async () => { + // MDX throws a parse error for HTML comments inside headings + await expect( + processHeading('## Heading ', 'mdx'), + ).rejects.toThrowErrorMatchingInlineSnapshot( + `"Unexpected character \`!\` (U+0021) before name, expected a character that can start a name, such as a letter, \`$\`, or \`_\` (note: to create a comment in MDX, use \`{/* text */}\`)"`, + ); + }); + }); + }); }); it('preserve anchors case then "anchorsMaintainCase" option is set', async () => { diff --git a/packages/docusaurus-mdx-loader/src/remark/headings/index.ts b/packages/docusaurus-mdx-loader/src/remark/headings/index.ts index 64d1b8123371..ad5a3db0c47f 100644 --- a/packages/docusaurus-mdx-loader/src/remark/headings/index.ts +++ b/packages/docusaurus-mdx-loader/src/remark/headings/index.ts @@ -9,12 +9,100 @@ import {parseMarkdownHeadingId, createSlugger} from '@docusaurus/utils'; import type {Plugin, Transformer} from 'unified'; -import type {Root, Text} from 'mdast'; +import type {Heading, Root, Text} from 'mdast'; export interface PluginOptions { anchorsMaintainCase: boolean; } +function getCommentContentHeadingId(comment: string): string | undefined { + // If the comment has spaces, we only consider the first part + const firstPart = comment.trim().split(' ')[0]; + // We ignore comments that don't start with # on purpose + // Forcing users to use a leading # is more explicit + // In the future it's possible we'd want to allow other types of comments + // For example class comments like {/* .my-class */} + if (firstPart?.startsWith('#')) { + return firstPart.slice(1) || undefined; + } + return undefined; +} + +function getCommentHeadingId(heading: Heading): string | undefined { + const lastChild = heading.children.at(-1); + + // MDX comment: {/* my-id */} or {/* #my-id */} + if ( + lastChild && + lastChild.type === 'mdxTextExpression' && + lastChild.data?.estree + ) { + const program = lastChild.data.estree; + // We only extract the id from single-comment MDX expressions + // ✅ {/* #my-id */} + // ❌ {/* #my-id */ /* #my-id2 */} + // ❌ {someExpression /* #my-id */} + if (program.body.length === 0 && program.comments?.length === 1) { + const commentContent = program.comments[0]!.value; + return getCommentContentHeadingId(commentContent); + } + } + + // HTML comment: or + if (lastChild?.type === 'html') { + const match = /^$/.exec( + (lastChild as unknown as {value: string}).value, + ); + if (match?.groups?.comment) { + const commentContent = match.groups.comment; + return getCommentContentHeadingId(commentContent); + } + } + + return undefined; +} + +// Try to find an explicit id in MD/MDX comments +function extractCommentId(heading: Heading) { + const commentId = getCommentHeadingId(heading); + if (commentId) { + // Remove the last comment node + heading.children.pop(); + // Trim the trailing space from the last text node ("text " → "text") + const newLast = heading.children.at(-1); + if (newLast?.type === 'text') { + newLast.value = newLast.value.trimEnd(); + } + return commentId; + } + return undefined; +} + +// Try to find an explicit id in the heading text (legacy {#id} syntax) +function extractLegacySyntaxId(heading: Heading, headingText: string) { + const parsedHeading = parseMarkdownHeadingId(headingText); + // Remove the heading text from its id (legacy syntax) + if (parsedHeading.id) { + // When there's an id, it is always in the last child node + const lastNode = heading.children.at(-1) as Text; + if (heading.children.length > 1) { + const lastNodeText = parseMarkdownHeadingId(lastNode.value).text; + // When the last part contains text + id, remove the id + if (lastNodeText) { + lastNode.value = lastNodeText; + } + // When last part contains only the id: completely remove that node + else { + heading.children.pop(); + } + } else { + lastNode.value = parsedHeading.text; + } + return parsedHeading.id; + } + return undefined; +} + const plugin: Plugin = function plugin({ anchorsMaintainCase, }): Transformer { @@ -22,56 +110,43 @@ const plugin: Plugin = function plugin({ const {toString} = await import('mdast-util-to-string'); const {visit} = await import('unist-util-visit'); + function getHeadingText(heading: Heading) { + const headingTextNodes = heading.children.filter( + ({type}) => !['html', 'jsx'].includes(type), + ); + return toString(headingTextNodes.length > 0 ? headingTextNodes : heading); + } + const slugs = createSlugger(); - visit(root, 'heading', (headingNode) => { - const data = headingNode.data ?? (headingNode.data = {}); - const properties = (data.hProperties || (data.hProperties = {})) as { - id: string; - }; - let {id} = properties; - - if (id) { - id = slugs.slug(id, {maintainCase: true}); - } else { - const headingTextNodes = headingNode.children.filter( - ({type}) => !['html', 'jsx'].includes(type), - ); - const heading = toString( - headingTextNodes.length > 0 ? headingTextNodes : headingNode, - ); + visit(root, 'heading', (heading) => { + const data = heading.data ?? (heading.data = {}); + const properties = data.hProperties ?? (data.hProperties = {}); - // Support explicit heading IDs - const parsedHeading = parseMarkdownHeadingId(heading); - - id = - parsedHeading.id ?? - slugs.slug(heading, {maintainCase: anchorsMaintainCase}); - - if (parsedHeading.id) { - // When there's an id, it is always in the last child node - // Sometimes heading is in multiple "parts" (** syntax creates a child - // node): - // ## part1 *part2* part3 {#id} - const lastNode = headingNode.children[ - headingNode.children.length - 1 - ] as Text; - - if (headingNode.children.length > 1) { - const lastNodeText = parseMarkdownHeadingId(lastNode.value).text; - // When last part contains test+id, remove the id - if (lastNodeText) { - lastNode.value = lastNodeText; - } - // When last part contains only the id: completely remove that node - else { - headingNode.children.pop(); - } - } else { - lastNode.value = parsedHeading.text; - } + // Gives the ability to provide/write a remark plugin that sets an id + // When an id is already set, we use it instead of running our own plugin + function extractAlreadyExistingId() { + if (properties.id) { + // Not sure why we need to slugify here, historical code + return slugs.slug(properties.id, {maintainCase: true}); } + return undefined; } + function extractIdFromText() { + const headingText = getHeadingText(heading); + return ( + extractLegacySyntaxId(heading, headingText) ?? + slugs.slug(headingText, {maintainCase: anchorsMaintainCase}) + ); + } + + // All the ways we can extract an id, ordered by priority + // /!\ the extraction methods can perform AST cleanup side effects + const id = + extractAlreadyExistingId() ?? + extractCommentId(heading) ?? + extractIdFromText(); + data.id = id; properties.id = id; }); diff --git a/packages/docusaurus-mdx-loader/src/types.d.mts b/packages/docusaurus-mdx-loader/src/types.d.mts index 84d1c03cef60..012c32ec73ef 100644 --- a/packages/docusaurus-mdx-loader/src/types.d.mts +++ b/packages/docusaurus-mdx-loader/src/types.d.mts @@ -33,4 +33,8 @@ declare module 'mdast' { hName?: string; hProperties?: Record; } + + interface HeadingData { + hProperties?: {id?: string}; + } } diff --git a/project-words.txt b/project-words.txt index d72ecff0aca2..a72e9528bc69 100644 --- a/project-words.txt +++ b/project-words.txt @@ -297,6 +297,7 @@ sluggify Smoosh Solana solana +someexpression spâce stackblitz stackoverflow diff --git a/website/docs/guides/markdown-features/markdown-features-toc.mdx b/website/docs/guides/markdown-features/markdown-features-toc.mdx index 8b73297a9077..58ab892a79fb 100644 --- a/website/docs/guides/markdown-features/markdown-features-toc.mdx +++ b/website/docs/guides/markdown-features/markdown-features-toc.mdx @@ -5,6 +5,8 @@ slug: /markdown-features/toc --- import BrowserWindow from '@site/src/components/BrowserWindow'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; # Headings and Table of contents @@ -39,14 +41,41 @@ By default, Docusaurus will generate heading IDs for you, based on the heading t Generated IDs have **some limitations**: - The ID might not look good -- You might want to **change or translate** the text without updating the existing ID +- You might want to **change or translate** the text without updating the existing ID to avoid breaking links -A special Markdown syntax lets you set an **explicit heading id**: +A special syntax lets you set an **explicit heading id**. + + + + +```mdx-code-block +{ + '### Hello World {/* #my-explicit-id */}\n\n' + + '### Hello World \u007B#my-explicit-id}\n' +} +``` + + + ```mdx-code-block -{'### Hello World \u007B#my-explicit-id}\n'} +{ + '### Hello World \n\n' + + '### Hello World \u007B#my-explicit-id}\n' +} ``` + + + +The heading id comment must start with `#`, be placed at the **end** of the heading and will be stripped from the rendered output. + +:::warning Legacy `{#id}` syntax for MDX files + +For MDX files, the `{#id}` syntax should be avoided. Since Docusaurus v3 and MDX v2, it is **not valid MDX syntax anymore**. It can break external tools that support MDX (IDEs and linters). It is only supported in Docusaurus for backward compatibility, thanks to the `markdown.mdx1Compat.headingIds` config option. The comment-based syntax should be preferred for MDX documents. + +::: + :::tip Use the **[`write-heading-ids`](../../cli.mdx#docusaurus-write-heading-ids-sitedir)** CLI command to add explicit IDs to all your Markdown documents.