-
Notifications
You must be signed in to change notification settings - Fork 132
fix(toc): inject _Toc bookmarks so exported DOCX TOC links work without manual Update Table #2431
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
harbournick
merged 1 commit into
main
from
nick/sd-2184-bug-exported-toc-requires-manual-word-update-before-section
Mar 18, 2026
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
59 changes: 59 additions & 0 deletions
59
packages/super-editor/src/document-api-adapters/helpers/toc-bookmark-sync.test.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| import { describe, expect, it } from 'vitest'; | ||
| import { generateTocBookmarkName } from './toc-bookmark-sync.js'; | ||
|
|
||
| describe('generateTocBookmarkName', () => { | ||
| it('produces a _Toc-prefixed name with only valid bookmark characters', () => { | ||
| const name = generateTocBookmarkName('some-block-id'); | ||
| expect(name).toMatch(/^_Toc[a-zA-Z0-9_]+$/); | ||
| }); | ||
|
|
||
| it('escapes hyphens in UUID-style block IDs', () => { | ||
| expect(generateTocBookmarkName('ba2b746a-930a-4baf-93d2-4d65637194d1')).toBe( | ||
| '_Tocba2b746a_2d930a_2d4baf_2d93d2_2d4d65637194d1', | ||
| ); | ||
| }); | ||
|
|
||
| it('passes through pure alphanumeric paraId inputs unchanged', () => { | ||
| expect(generateTocBookmarkName('41964671')).toBe('_Toc41964671'); | ||
| }); | ||
|
|
||
| it('escapes literal underscores to prevent ambiguity', () => { | ||
| expect(generateTocBookmarkName('a_b')).toBe('_Toca__b'); | ||
| }); | ||
|
|
||
| it('is deterministic for the same input', () => { | ||
| const a = generateTocBookmarkName('abc-123'); | ||
| const b = generateTocBookmarkName('abc-123'); | ||
| expect(a).toBe(b); | ||
| }); | ||
|
|
||
| it('produces different names for different inputs', () => { | ||
| const a = generateTocBookmarkName('heading-1'); | ||
| const b = generateTocBookmarkName('heading-2'); | ||
| expect(a).not.toBe(b); | ||
| }); | ||
|
|
||
| it('does not collide for punctuation-folding pairs like p-1 vs p1', () => { | ||
| const a = generateTocBookmarkName('p-1'); | ||
| const b = generateTocBookmarkName('p1'); | ||
| expect(a).not.toBe(b); | ||
| }); | ||
|
|
||
| it('does not collide for underscore vs hyphen pairs like a_b vs a-b', () => { | ||
| const a = generateTocBookmarkName('a_b'); | ||
| const b = generateTocBookmarkName('a-b'); | ||
| expect(a).not.toBe(b); | ||
| }); | ||
|
|
||
| it('does not collide for inputs that collided under the old FNV-1a hash', () => { | ||
| const a = generateTocBookmarkName('id-u4-ehdfkc7l'); | ||
| const b = generateTocBookmarkName('id-f6q-l70lxz94'); | ||
| expect(a).not.toBe(b); | ||
| }); | ||
|
|
||
| it('does not collide for hyphenated paragraph IDs like P-ABCDEF01 vs PABCDEF01', () => { | ||
| const a = generateTocBookmarkName('P-ABCDEF01'); | ||
| const b = generateTocBookmarkName('PABCDEF01'); | ||
| expect(a).not.toBe(b); | ||
| }); | ||
| }); |
232 changes: 232 additions & 0 deletions
232
packages/super-editor/src/document-api-adapters/helpers/toc-bookmark-sync.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,232 @@ | ||
| /** | ||
| * TOC bookmark synchronization — ensures `_Toc` bookmarks exist around | ||
| * headings referenced by TOC entry hyperlinks. | ||
| * | ||
| * Word's TOC `<w:hyperlink w:anchor="...">` elements require matching | ||
| * `<w:bookmarkStart w:name="...">` / `<w:bookmarkEnd>` pairs around | ||
| * the target heading. Without them, TOC links in the exported DOCX | ||
| * are broken until the user manually runs "Update Table" in Word. | ||
| * | ||
| * This module generates deterministic `_Toc`-prefixed bookmark names | ||
| * and injects the bookmark nodes after TOC content is materialized. | ||
| */ | ||
|
|
||
| import type { Node as ProseMirrorNode } from 'prosemirror-model'; | ||
| import type { Editor } from '../../core/Editor.js'; | ||
|
|
||
| const TOC_BOOKMARK_PREFIX = '_Toc'; | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Bookmark name generation | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| /** | ||
| * Generates a deterministic `_Toc`-prefixed bookmark name from a block ID. | ||
| * | ||
| * Uses percent-style encoding to produce a valid OOXML bookmark name that is | ||
| * **injective** — no two distinct block IDs can produce the same output. | ||
| * | ||
| * Encoding rules (using `_` as escape character): | ||
| * - Alphanumeric chars except `_` pass through unchanged | ||
| * - `_` is escaped as `__` (escape-the-escape) | ||
| * - Any other character is escaped as `_xx` (two-digit lowercase hex) | ||
| * | ||
| * Examples: | ||
| * - `ba2b746a-930a-...` → `_Tocba2b746a_2d930a_2d...` | ||
| * - `p-1` → `_Tocp_2d1` | ||
| * - `p1` → `_Tocp1` (no collision with `p-1`) | ||
| */ | ||
| export function generateTocBookmarkName(blockId: string): string { | ||
| return `${TOC_BOOKMARK_PREFIX}${encodeBlockId(blockId)}`; | ||
| } | ||
|
|
||
| /** | ||
| * Injective encoding of a block ID into valid bookmark name characters. | ||
| * Uses `_` as the escape character: literal `_` → `__`, non-alphanumeric → `_xx`. | ||
| */ | ||
| function encodeBlockId(input: string): string { | ||
| let result = ''; | ||
| for (let i = 0; i < input.length; i++) { | ||
| const ch = input[i]!; | ||
| if (ch === '_') { | ||
| result += '__'; | ||
| } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')) { | ||
| result += ch; | ||
| } else { | ||
| result += `_${ch.charCodeAt(0).toString(16).padStart(2, '0')}`; | ||
| } | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Bookmark synchronization | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| /** | ||
| * Ensures `_Toc` bookmarks exist around heading paragraphs referenced by | ||
| * TOC entry hyperlinks. | ||
| * | ||
| * Call after the TOC content has been committed to the editor state. This | ||
| * builds and dispatches a follow-up transaction that inserts any missing | ||
| * `bookmarkStart` / `bookmarkEnd` pairs. | ||
| * | ||
| * Skips silently when: | ||
| * - No sources require bookmarks | ||
| * - All required bookmarks already exist | ||
| * - The schema lacks bookmark node types (headless/test environments) | ||
| */ | ||
| export function syncTocBookmarks(editor: Editor, sources: Array<{ sdBlockId: string }>): void { | ||
| const { schema, doc } = editor.state; | ||
| if (!schema.nodes.bookmarkStart || !schema.nodes.bookmarkEnd) return; | ||
|
|
||
| const needed = deduplicateByBlockId(sources); | ||
| const existing = collectExistingTocBookmarkNames(doc); | ||
| const missing = needed.filter((t) => !existing.has(t.bookmarkName)); | ||
| if (missing.length === 0) return; | ||
|
|
||
| const paragraphPositions = buildBlockIdPositionMap(doc); | ||
| const insertions = resolveInsertionTargets(missing, paragraphPositions, doc); | ||
| if (insertions.length === 0) return; | ||
|
|
||
| const { tr } = editor.state; | ||
| let nextId = findMaxBookmarkId(doc) + 1; | ||
|
|
||
| for (const { bookmarkName, contentStart, contentEnd } of insertions) { | ||
| const bookmarkId = String(nextId++); | ||
| const endNode = schema.nodes.bookmarkEnd.create({ id: bookmarkId }); | ||
| const startNode = schema.nodes.bookmarkStart.create({ name: bookmarkName, id: bookmarkId }); | ||
|
|
||
| // Insert bookmarkStart first, then bookmarkEnd. This ordering is critical | ||
| // for empty paragraphs where contentStart === contentEnd: Mapping.map() is | ||
| // right-biased, so inserting start first guarantees end maps to after start. | ||
| // tr.mapping.map() converts original-doc positions to current-transaction | ||
| // positions, accounting for earlier insertions in this loop. | ||
| tr.insert(tr.mapping.map(contentStart), startNode); | ||
| tr.insert(tr.mapping.map(contentEnd), endNode); | ||
| } | ||
|
|
||
| if (tr.docChanged) { | ||
| dispatchTransaction(editor, tr); | ||
|
harbournick marked this conversation as resolved.
|
||
| } | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Internal helpers | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| interface TocBookmarkTarget { | ||
| blockId: string; | ||
| bookmarkName: string; | ||
| } | ||
|
|
||
| /** | ||
| * Deduplicates sources by blockId — each heading needs at most one bookmark. | ||
| * The injective encoding in `encodeBlockId` guarantees unique names, but the | ||
| * collision guard is retained as defense-in-depth. | ||
| */ | ||
| function deduplicateByBlockId(sources: Array<{ sdBlockId: string }>): TocBookmarkTarget[] { | ||
| const seenBlockIds = new Set<string>(); | ||
| const claimedNames = new Map<string, string>(); // bookmarkName → first blockId | ||
| const targets: TocBookmarkTarget[] = []; | ||
|
|
||
| for (const { sdBlockId } of sources) { | ||
| if (seenBlockIds.has(sdBlockId)) continue; | ||
| seenBlockIds.add(sdBlockId); | ||
|
|
||
| const bookmarkName = generateTocBookmarkName(sdBlockId); | ||
| const existingOwner = claimedNames.get(bookmarkName); | ||
| if (existingOwner !== undefined && existingOwner !== sdBlockId) continue; | ||
|
|
||
| claimedNames.set(bookmarkName, sdBlockId); | ||
| targets.push({ blockId: sdBlockId, bookmarkName }); | ||
| } | ||
|
|
||
| return targets; | ||
| } | ||
|
|
||
| /** Collects names of all existing `_Toc`-prefixed bookmarks in the document. */ | ||
| function collectExistingTocBookmarkNames(doc: ProseMirrorNode): Set<string> { | ||
| const names = new Set<string>(); | ||
| doc.descendants((node) => { | ||
| if (node.type.name === 'bookmarkStart') { | ||
| const name = node.attrs?.name as string | undefined; | ||
| if (name?.startsWith(TOC_BOOKMARK_PREFIX)) names.add(name); | ||
| } | ||
| return true; | ||
| }); | ||
| return names; | ||
| } | ||
|
|
||
| /** Maps block IDs (sdBlockId or paraId) to paragraph positions. */ | ||
| function buildBlockIdPositionMap(doc: ProseMirrorNode): Map<string, number> { | ||
| const map = new Map<string, number>(); | ||
| doc.descendants((node, pos) => { | ||
| if (node.type.name === 'paragraph') { | ||
| const id = (node.attrs?.sdBlockId ?? node.attrs?.paraId) as string | undefined; | ||
| if (id && !map.has(id)) map.set(id, pos); | ||
| } | ||
| return true; | ||
| }); | ||
| return map; | ||
| } | ||
|
|
||
| interface BookmarkInsertion { | ||
| bookmarkName: string; | ||
| /** Position of the first inline content inside the paragraph (paragraphPos + 1). */ | ||
| contentStart: number; | ||
| /** Position just before the paragraph's closing boundary (paragraphPos + nodeSize - 1). */ | ||
| contentEnd: number; | ||
| } | ||
|
|
||
| /** | ||
| * Resolves which paragraphs need bookmark insertions and sorts them | ||
| * descending by position for safe back-to-front processing. | ||
| */ | ||
| function resolveInsertionTargets( | ||
| missing: TocBookmarkTarget[], | ||
| positions: Map<string, number>, | ||
| doc: ProseMirrorNode, | ||
| ): BookmarkInsertion[] { | ||
| const result: BookmarkInsertion[] = []; | ||
|
|
||
| for (const { blockId, bookmarkName } of missing) { | ||
| const pos = positions.get(blockId); | ||
| if (pos === undefined) continue; | ||
|
|
||
| const node = doc.nodeAt(pos); | ||
| if (!node || node.type.name !== 'paragraph') continue; | ||
|
|
||
| result.push({ | ||
| bookmarkName, | ||
| contentStart: pos + 1, | ||
| contentEnd: pos + node.nodeSize - 1, | ||
| }); | ||
| } | ||
|
|
||
| // Descending position order so each insertion only shifts positions we've | ||
| // already processed, keeping earlier mapped positions correct. | ||
| result.sort((a, b) => b.contentStart - a.contentStart); | ||
| return result; | ||
| } | ||
|
|
||
| /** Scans the document for the highest existing bookmark numeric ID. */ | ||
| function findMaxBookmarkId(doc: ProseMirrorNode): number { | ||
| let maxId = -1; | ||
| doc.descendants((node) => { | ||
| if (node.type.name !== 'bookmarkStart' && node.type.name !== 'bookmarkEnd') return true; | ||
| const raw = node.attrs?.id; | ||
| const id = typeof raw === 'string' ? parseInt(raw, 10) : typeof raw === 'number' ? raw : NaN; | ||
| if (!isNaN(id) && id > maxId) maxId = id; | ||
| return true; | ||
| }); | ||
| return maxId; | ||
| } | ||
|
|
||
| function dispatchTransaction(editor: Editor, tr: unknown): void { | ||
| if (typeof editor.dispatch === 'function') { | ||
| editor.dispatch(tr as Parameters<Editor['dispatch']>[0]); | ||
| } else if (typeof editor.view?.dispatch === 'function') { | ||
| editor.view.dispatch(tr as Parameters<NonNullable<Editor['view']>['dispatch']>[0]); | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.