From dcd6b4d1de6571c6bc61ff3b8cbdd8987e1af0a9 Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Tue, 17 Mar 2026 19:46:52 -0700 Subject: [PATCH] fix(toc): inject _Toc bookmarks so exported DOCX TOC links work without manual Update Table --- .../helpers/toc-bookmark-sync.test.ts | 59 +++++ .../helpers/toc-bookmark-sync.ts | 232 ++++++++++++++++++ .../helpers/toc-entry-builder.test.ts | 32 +++ .../helpers/toc-entry-builder.ts | 5 +- .../plan-engine/toc-wrappers.ts | 38 ++- 5 files changed, 357 insertions(+), 9 deletions(-) create mode 100644 packages/super-editor/src/document-api-adapters/helpers/toc-bookmark-sync.test.ts create mode 100644 packages/super-editor/src/document-api-adapters/helpers/toc-bookmark-sync.ts diff --git a/packages/super-editor/src/document-api-adapters/helpers/toc-bookmark-sync.test.ts b/packages/super-editor/src/document-api-adapters/helpers/toc-bookmark-sync.test.ts new file mode 100644 index 0000000000..719f275249 --- /dev/null +++ b/packages/super-editor/src/document-api-adapters/helpers/toc-bookmark-sync.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it } from 'vitest'; +import { generateTocBookmarkName } from './toc-bookmark-sync.js'; + +describe('generateTocBookmarkName', () => { + it('produces a _Toc-prefixed name with only valid bookmark characters', () => { + const name = generateTocBookmarkName('some-block-id'); + expect(name).toMatch(/^_Toc[a-zA-Z0-9_]+$/); + }); + + it('escapes hyphens in UUID-style block IDs', () => { + expect(generateTocBookmarkName('ba2b746a-930a-4baf-93d2-4d65637194d1')).toBe( + '_Tocba2b746a_2d930a_2d4baf_2d93d2_2d4d65637194d1', + ); + }); + + it('passes through pure alphanumeric paraId inputs unchanged', () => { + expect(generateTocBookmarkName('41964671')).toBe('_Toc41964671'); + }); + + it('escapes literal underscores to prevent ambiguity', () => { + expect(generateTocBookmarkName('a_b')).toBe('_Toca__b'); + }); + + it('is deterministic for the same input', () => { + const a = generateTocBookmarkName('abc-123'); + const b = generateTocBookmarkName('abc-123'); + expect(a).toBe(b); + }); + + it('produces different names for different inputs', () => { + const a = generateTocBookmarkName('heading-1'); + const b = generateTocBookmarkName('heading-2'); + expect(a).not.toBe(b); + }); + + it('does not collide for punctuation-folding pairs like p-1 vs p1', () => { + const a = generateTocBookmarkName('p-1'); + const b = generateTocBookmarkName('p1'); + expect(a).not.toBe(b); + }); + + it('does not collide for underscore vs hyphen pairs like a_b vs a-b', () => { + const a = generateTocBookmarkName('a_b'); + const b = generateTocBookmarkName('a-b'); + expect(a).not.toBe(b); + }); + + it('does not collide for inputs that collided under the old FNV-1a hash', () => { + const a = generateTocBookmarkName('id-u4-ehdfkc7l'); + const b = generateTocBookmarkName('id-f6q-l70lxz94'); + expect(a).not.toBe(b); + }); + + it('does not collide for hyphenated paragraph IDs like P-ABCDEF01 vs PABCDEF01', () => { + const a = generateTocBookmarkName('P-ABCDEF01'); + const b = generateTocBookmarkName('PABCDEF01'); + expect(a).not.toBe(b); + }); +}); diff --git a/packages/super-editor/src/document-api-adapters/helpers/toc-bookmark-sync.ts b/packages/super-editor/src/document-api-adapters/helpers/toc-bookmark-sync.ts new file mode 100644 index 0000000000..e57bac0055 --- /dev/null +++ b/packages/super-editor/src/document-api-adapters/helpers/toc-bookmark-sync.ts @@ -0,0 +1,232 @@ +/** + * TOC bookmark synchronization — ensures `_Toc` bookmarks exist around + * headings referenced by TOC entry hyperlinks. + * + * Word's TOC `` elements require matching + * `` / `` pairs around + * the target heading. Without them, TOC links in the exported DOCX + * are broken until the user manually runs "Update Table" in Word. + * + * This module generates deterministic `_Toc`-prefixed bookmark names + * and injects the bookmark nodes after TOC content is materialized. + */ + +import type { Node as ProseMirrorNode } from 'prosemirror-model'; +import type { Editor } from '../../core/Editor.js'; + +const TOC_BOOKMARK_PREFIX = '_Toc'; + +// --------------------------------------------------------------------------- +// Bookmark name generation +// --------------------------------------------------------------------------- + +/** + * Generates a deterministic `_Toc`-prefixed bookmark name from a block ID. + * + * Uses percent-style encoding to produce a valid OOXML bookmark name that is + * **injective** — no two distinct block IDs can produce the same output. + * + * Encoding rules (using `_` as escape character): + * - Alphanumeric chars except `_` pass through unchanged + * - `_` is escaped as `__` (escape-the-escape) + * - Any other character is escaped as `_xx` (two-digit lowercase hex) + * + * Examples: + * - `ba2b746a-930a-...` → `_Tocba2b746a_2d930a_2d...` + * - `p-1` → `_Tocp_2d1` + * - `p1` → `_Tocp1` (no collision with `p-1`) + */ +export function generateTocBookmarkName(blockId: string): string { + return `${TOC_BOOKMARK_PREFIX}${encodeBlockId(blockId)}`; +} + +/** + * Injective encoding of a block ID into valid bookmark name characters. + * Uses `_` as the escape character: literal `_` → `__`, non-alphanumeric → `_xx`. + */ +function encodeBlockId(input: string): string { + let result = ''; + for (let i = 0; i < input.length; i++) { + const ch = input[i]!; + if (ch === '_') { + result += '__'; + } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')) { + result += ch; + } else { + result += `_${ch.charCodeAt(0).toString(16).padStart(2, '0')}`; + } + } + return result; +} + +// --------------------------------------------------------------------------- +// Bookmark synchronization +// --------------------------------------------------------------------------- + +/** + * Ensures `_Toc` bookmarks exist around heading paragraphs referenced by + * TOC entry hyperlinks. + * + * Call after the TOC content has been committed to the editor state. This + * builds and dispatches a follow-up transaction that inserts any missing + * `bookmarkStart` / `bookmarkEnd` pairs. + * + * Skips silently when: + * - No sources require bookmarks + * - All required bookmarks already exist + * - The schema lacks bookmark node types (headless/test environments) + */ +export function syncTocBookmarks(editor: Editor, sources: Array<{ sdBlockId: string }>): void { + const { schema, doc } = editor.state; + if (!schema.nodes.bookmarkStart || !schema.nodes.bookmarkEnd) return; + + const needed = deduplicateByBlockId(sources); + const existing = collectExistingTocBookmarkNames(doc); + const missing = needed.filter((t) => !existing.has(t.bookmarkName)); + if (missing.length === 0) return; + + const paragraphPositions = buildBlockIdPositionMap(doc); + const insertions = resolveInsertionTargets(missing, paragraphPositions, doc); + if (insertions.length === 0) return; + + const { tr } = editor.state; + let nextId = findMaxBookmarkId(doc) + 1; + + for (const { bookmarkName, contentStart, contentEnd } of insertions) { + const bookmarkId = String(nextId++); + const endNode = schema.nodes.bookmarkEnd.create({ id: bookmarkId }); + const startNode = schema.nodes.bookmarkStart.create({ name: bookmarkName, id: bookmarkId }); + + // Insert bookmarkStart first, then bookmarkEnd. This ordering is critical + // for empty paragraphs where contentStart === contentEnd: Mapping.map() is + // right-biased, so inserting start first guarantees end maps to after start. + // tr.mapping.map() converts original-doc positions to current-transaction + // positions, accounting for earlier insertions in this loop. + tr.insert(tr.mapping.map(contentStart), startNode); + tr.insert(tr.mapping.map(contentEnd), endNode); + } + + if (tr.docChanged) { + dispatchTransaction(editor, tr); + } +} + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +interface TocBookmarkTarget { + blockId: string; + bookmarkName: string; +} + +/** + * Deduplicates sources by blockId — each heading needs at most one bookmark. + * The injective encoding in `encodeBlockId` guarantees unique names, but the + * collision guard is retained as defense-in-depth. + */ +function deduplicateByBlockId(sources: Array<{ sdBlockId: string }>): TocBookmarkTarget[] { + const seenBlockIds = new Set(); + const claimedNames = new Map(); // bookmarkName → first blockId + const targets: TocBookmarkTarget[] = []; + + for (const { sdBlockId } of sources) { + if (seenBlockIds.has(sdBlockId)) continue; + seenBlockIds.add(sdBlockId); + + const bookmarkName = generateTocBookmarkName(sdBlockId); + const existingOwner = claimedNames.get(bookmarkName); + if (existingOwner !== undefined && existingOwner !== sdBlockId) continue; + + claimedNames.set(bookmarkName, sdBlockId); + targets.push({ blockId: sdBlockId, bookmarkName }); + } + + return targets; +} + +/** Collects names of all existing `_Toc`-prefixed bookmarks in the document. */ +function collectExistingTocBookmarkNames(doc: ProseMirrorNode): Set { + const names = new Set(); + doc.descendants((node) => { + if (node.type.name === 'bookmarkStart') { + const name = node.attrs?.name as string | undefined; + if (name?.startsWith(TOC_BOOKMARK_PREFIX)) names.add(name); + } + return true; + }); + return names; +} + +/** Maps block IDs (sdBlockId or paraId) to paragraph positions. */ +function buildBlockIdPositionMap(doc: ProseMirrorNode): Map { + const map = new Map(); + doc.descendants((node, pos) => { + if (node.type.name === 'paragraph') { + const id = (node.attrs?.sdBlockId ?? node.attrs?.paraId) as string | undefined; + if (id && !map.has(id)) map.set(id, pos); + } + return true; + }); + return map; +} + +interface BookmarkInsertion { + bookmarkName: string; + /** Position of the first inline content inside the paragraph (paragraphPos + 1). */ + contentStart: number; + /** Position just before the paragraph's closing boundary (paragraphPos + nodeSize - 1). */ + contentEnd: number; +} + +/** + * Resolves which paragraphs need bookmark insertions and sorts them + * descending by position for safe back-to-front processing. + */ +function resolveInsertionTargets( + missing: TocBookmarkTarget[], + positions: Map, + doc: ProseMirrorNode, +): BookmarkInsertion[] { + const result: BookmarkInsertion[] = []; + + for (const { blockId, bookmarkName } of missing) { + const pos = positions.get(blockId); + if (pos === undefined) continue; + + const node = doc.nodeAt(pos); + if (!node || node.type.name !== 'paragraph') continue; + + result.push({ + bookmarkName, + contentStart: pos + 1, + contentEnd: pos + node.nodeSize - 1, + }); + } + + // Descending position order so each insertion only shifts positions we've + // already processed, keeping earlier mapped positions correct. + result.sort((a, b) => b.contentStart - a.contentStart); + return result; +} + +/** Scans the document for the highest existing bookmark numeric ID. */ +function findMaxBookmarkId(doc: ProseMirrorNode): number { + let maxId = -1; + doc.descendants((node) => { + if (node.type.name !== 'bookmarkStart' && node.type.name !== 'bookmarkEnd') return true; + const raw = node.attrs?.id; + const id = typeof raw === 'string' ? parseInt(raw, 10) : typeof raw === 'number' ? raw : NaN; + if (!isNaN(id) && id > maxId) maxId = id; + return true; + }); + return maxId; +} + +function dispatchTransaction(editor: Editor, tr: unknown): void { + if (typeof editor.dispatch === 'function') { + editor.dispatch(tr as Parameters[0]); + } else if (typeof editor.view?.dispatch === 'function') { + editor.view.dispatch(tr as Parameters['dispatch']>[0]); + } +} diff --git a/packages/super-editor/src/document-api-adapters/helpers/toc-entry-builder.test.ts b/packages/super-editor/src/document-api-adapters/helpers/toc-entry-builder.test.ts index 733deeb8a5..b2cb18ec79 100644 --- a/packages/super-editor/src/document-api-adapters/helpers/toc-entry-builder.test.ts +++ b/packages/super-editor/src/document-api-adapters/helpers/toc-entry-builder.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it } from 'vitest'; import { buildTocEntryParagraphs, type TocSource } from './toc-entry-builder.js'; +import { generateTocBookmarkName } from './toc-bookmark-sync.js'; import type { TocSwitchConfig } from '@superdoc/document-api'; const BASE_SOURCE: TocSource = { @@ -18,6 +19,37 @@ function makeConfig(display: TocSwitchConfig['display'] = {}): TocSwitchConfig { } describe('buildTocEntryParagraphs', () => { + describe('hyperlink anchors', () => { + it('uses a _Toc bookmark name as the hyperlink anchor, not the raw sdBlockId', () => { + const paragraphs = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ hyperlinks: true })); + const textNode = paragraphs[0]!.content[0] as { marks?: Array<{ type: string; attrs: Record }> }; + const linkMark = textNode.marks?.find((m) => m.type === 'link'); + + expect(linkMark).toBeDefined(); + expect(linkMark!.attrs.anchor).toMatch(/^_Toc[a-zA-Z0-9_]+$/); + expect(linkMark!.attrs.anchor).toBe(generateTocBookmarkName(BASE_SOURCE.sdBlockId)); + expect(linkMark!.attrs.anchor).not.toBe(BASE_SOURCE.sdBlockId); + }); + + it('produces the same anchor for the same sdBlockId across calls', () => { + const first = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ hyperlinks: true })); + const second = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ hyperlinks: true })); + + const getAnchor = (paragraphs: typeof first) => { + const node = paragraphs[0]!.content[0] as { marks?: Array<{ attrs: Record }> }; + return node.marks?.[0]?.attrs.anchor; + }; + + expect(getAnchor(first)).toBe(getAnchor(second)); + }); + + it('does not add link mark when hyperlinks display option is false', () => { + const paragraphs = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ hyperlinks: false })); + const textNode = paragraphs[0]!.content[0] as { marks?: unknown[] }; + expect(textNode.marks).toBeUndefined(); + }); + }); + describe('rightAlignPageNumbers', () => { it('adds a right-aligned tab stop when rightAlignPageNumbers is true', () => { const paragraphs = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ rightAlignPageNumbers: true })); diff --git a/packages/super-editor/src/document-api-adapters/helpers/toc-entry-builder.ts b/packages/super-editor/src/document-api-adapters/helpers/toc-entry-builder.ts index 94a3197a0e..7e19a7162f 100644 --- a/packages/super-editor/src/document-api-adapters/helpers/toc-entry-builder.ts +++ b/packages/super-editor/src/document-api-adapters/helpers/toc-entry-builder.ts @@ -9,6 +9,7 @@ import type { Node as ProseMirrorNode } from 'prosemirror-model'; import type { TocSwitchConfig } from '@superdoc/document-api'; import { parseTcInstruction } from '../../core/super-converter/field-references/shared/tc-switches.js'; import { getHeadingLevel } from './node-address-resolver.js'; +import { generateTocBookmarkName } from './toc-bookmark-sync.js'; // --------------------------------------------------------------------------- // Source types @@ -158,7 +159,7 @@ export interface EntryParagraphJson { * Each entry gets: * - Paragraph style: TOC{level} * - tocSourceId paragraph attribute (source heading/TC field's sdBlockId) - * - Link mark with anchor pointing to source sdBlockId (when \h is set) + * - Link mark with anchor pointing to a `_Toc`-prefixed bookmark name (when \h is set) * - Page number placeholder "0" with tocPageNumber mark * - Separator: custom (\p switch) or default tab */ @@ -192,7 +193,7 @@ function buildEntryParagraph(source: TocSource, config: TocSwitchConfig): EntryP { type: 'link', attrs: { - anchor: source.sdBlockId, + anchor: generateTocBookmarkName(source.sdBlockId), rId: null, history: true, }, diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/toc-wrappers.ts b/packages/super-editor/src/document-api-adapters/plan-engine/toc-wrappers.ts index 1278e1268e..94e18f0883 100644 --- a/packages/super-editor/src/document-api-adapters/plan-engine/toc-wrappers.ts +++ b/packages/super-editor/src/document-api-adapters/plan-engine/toc-wrappers.ts @@ -37,7 +37,13 @@ import { extractTocInfo, buildTocDiscoveryItem, } from '../helpers/toc-resolver.js'; -import { collectTocSources, buildTocEntryParagraphs, type EntryParagraphJson } from '../helpers/toc-entry-builder.js'; +import { + collectTocSources, + buildTocEntryParagraphs, + type EntryParagraphJson, + type TocSource, +} from '../helpers/toc-entry-builder.js'; +import { syncTocBookmarks } from '../helpers/toc-bookmark-sync.js'; import { paginate } from '../helpers/adapter-utils.js'; import { getRevision } from './revision-tracker.js'; import { executeDomainCommand } from './plan-wrappers.js'; @@ -246,11 +252,16 @@ function sanitizeTocContentForSchema(content: EntryParagraphJson[], editor: Edit }); } -function materializeTocContent(doc: ProseMirrorNode, config: TocSwitchConfig, editor: Editor): EntryParagraphJson[] { +interface MaterializedToc { + content: EntryParagraphJson[]; + sources: TocSource[]; +} + +function materializeTocContent(doc: ProseMirrorNode, config: TocSwitchConfig, editor: Editor): MaterializedToc { const sources = collectTocSources(doc, config); const entryParagraphs = buildTocEntryParagraphs(sources, config); const content = entryParagraphs.length > 0 ? entryParagraphs : NO_ENTRIES_PLACEHOLDER; - return sanitizeTocContentForSchema(content, editor); + return { content: sanitizeTocContentForSchema(content, editor), sources }; } // --------------------------------------------------------------------------- @@ -278,7 +289,11 @@ export function tocConfigureWrapper( // Patch value takes priority; fall back to existing node attr. const effectiveRightAlign = input.patch.rightAlignPageNumbers ?? (resolved.node.attrs?.rightAlignPageNumbers as boolean | undefined); - const nextContent = materializeTocContent(editor.state.doc, withRightAlign(patched, effectiveRightAlign), editor); + const { content: nextContent, sources } = materializeTocContent( + editor.state.doc, + withRightAlign(patched, effectiveRightAlign), + editor, + ); if (areTocConfigsEqual(currentConfig, patched) && !rightAlignChanged) { return tocFailure('NO_OP', 'Configuration patch produced no change.'); @@ -332,6 +347,8 @@ export function tocConfigureWrapper( return tocFailure('NO_OP', 'Configuration change could not be applied.'); } + syncTocBookmarks(editor, sources); + // Re-resolve after mutation to return the current public TOC id. // We look up by sdBlockId because instruction updates may change fallback IDs. const postMutationId = resolvePostMutationTocId(editor.state.doc, commandNodeId); @@ -361,7 +378,7 @@ function tocUpdateAll(editor: Editor, input: TocUpdateInput, options?: MutationO const resolved = resolveTocTarget(editor.state.doc, input.target); const config = parseTocInstruction(resolved.node.attrs?.instruction ?? ''); const rightAlign = resolved.node.attrs?.rightAlignPageNumbers as boolean | undefined; - const content = materializeTocContent(editor.state.doc, withRightAlign(config, rightAlign), editor); + const { content, sources } = materializeTocContent(editor.state.doc, withRightAlign(config, rightAlign), editor); // NO_OP detection: compare new content against existing before executing. // The PM command returns "found" (not "content changed"), so receipt-based @@ -403,7 +420,12 @@ function tocUpdateAll(editor: Editor, input: TocUpdateInput, options?: MutationO options?.expectedRevision, ); - return receiptApplied(receipt) ? tocSuccess(resolved.nodeId) : tocFailure('NO_OP', 'TOC update produced no change.'); + if (!receiptApplied(receipt)) { + return tocFailure('NO_OP', 'TOC update produced no change.'); + } + + syncTocBookmarks(editor, sources); + return tocSuccess(resolved.nodeId); } // --------------------------------------------------------------------------- @@ -642,7 +664,7 @@ export function createTableOfContentsWrapper( // Build instruction from config patch or use defaults const config = input.config ? applyTocPatchTyped(DEFAULT_TOC_CONFIG, input.config) : DEFAULT_TOC_CONFIG; const instruction = serializeTocInstruction(config); - const content = materializeTocContent( + const { content, sources } = materializeTocContent( editor.state.doc, withRightAlign(config, input.config?.rightAlignPageNumbers), editor, @@ -716,6 +738,8 @@ export function createTableOfContentsWrapper( }; } + syncTocBookmarks(editor, sources); + // Re-resolve and return the public TOC id exposed by toc.list/toc.get. const postMutationId = resolvePostMutationTocId(editor.state.doc, sdBlockId); return { success: true, toc: buildTocAddress(postMutationId) };