From 3fee96ffa3af61aeacf49927abb9701bea1ca291 Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Wed, 25 Feb 2026 20:59:14 -0800 Subject: [PATCH 1/5] feat(document-api): markdown handling --- apps/cli/src/__tests__/cli.test.ts | 132 ++++- apps/cli/src/__tests__/conformance/harness.ts | 2 +- .../src/__tests__/conformance/scenarios.ts | 2 +- apps/cli/src/__tests__/host.test.ts | 2 +- apps/cli/src/__tests__/lib/validate.test.ts | 4 +- .../src/cli/cli-only-operation-definitions.ts | 3 +- apps/cli/src/cli/operation-params.ts | 2 + apps/cli/src/commands/open.ts | 51 +- apps/cli/src/lib/document.ts | 37 ++ apps/cli/src/lib/errors.ts | 1 + apps/cli/src/lib/validate.ts | 8 +- apps/cli/src/types/super-editor-adapters.d.ts | 24 +- apps/cli/tsconfig.json | 1 + apps/docs/document-api/common-workflows.mdx | 4 +- apps/docs/document-api/reference/index.mdx | 2 +- apps/docs/document-api/reference/insert.mdx | 25 +- apps/docs/document-engine/sdks.mdx | 4 +- packages/document-api/src/README.md | 4 +- .../src/contract/contract.test.ts | 6 +- .../src/contract/operation-definitions.ts | 5 +- packages/document-api/src/contract/schemas.ts | 5 +- packages/document-api/src/index.test.ts | 152 ++++- packages/document-api/src/index.ts | 2 +- packages/document-api/src/insert/insert.ts | 54 +- .../document-api/src/invoke/invoke.test.ts | 14 +- .../src/overview-examples.test.ts | 15 +- packages/document-api/src/types/receipt.ts | 1 + packages/document-api/src/write/write.ts | 3 + packages/super-editor/package.json | 3 +- .../src/core/Editor.api-contracts.test.js | 4 +- .../importMarkdown.integration.test.js | 51 +- .../src/core/helpers/importMarkdown.js | 62 ++- .../src/core/helpers/importMarkdown.test.js | 61 +- .../src/core/helpers/markdown/index.ts | 17 + .../helpers/markdown/markdownToPmContent.ts | 102 ++++ .../helpers/markdown/mdastToProseMirror.ts | 525 ++++++++++++++++++ .../core/helpers/markdown/parseMarkdownAst.ts | 22 + .../src/core/helpers/markdown/types.ts | 64 +++ .../assemble-adapters.ts | 3 +- .../plan-engine/index.ts | 2 +- .../insert-structured-wrapper.test.ts | 304 ++++++++++ .../plan-engine/plan-wrappers.ts | 223 ++++++++ pnpm-lock.yaml | 25 +- pnpm-workspace.yaml | 2 + tests/behavior/helpers/document-api.ts | 2 +- .../programmatic-tracked-change.spec.ts | 2 +- 46 files changed, 1859 insertions(+), 180 deletions(-) create mode 100644 packages/super-editor/src/core/helpers/markdown/index.ts create mode 100644 packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts create mode 100644 packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts create mode 100644 packages/super-editor/src/core/helpers/markdown/parseMarkdownAst.ts create mode 100644 packages/super-editor/src/core/helpers/markdown/types.ts create mode 100644 packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts diff --git a/apps/cli/src/__tests__/cli.test.ts b/apps/cli/src/__tests__/cli.test.ts index e2dc905d93..1685a0608e 100644 --- a/apps/cli/src/__tests__/cli.test.ts +++ b/apps/cli/src/__tests__/cli.test.ts @@ -279,11 +279,11 @@ describe('superdoc CLI', () => { expect(result.stdout).not.toContain(' Document path or stdin'); }); - test('describe command doc.insert includes --target and --text flags', async () => { + test('describe command doc.insert includes --target and --value flags', async () => { const result = await runCli(['describe', 'command', 'doc.insert', '--output', 'pretty']); expect(result.code).toBe(0); expect(result.stdout).toContain('--target'); - expect(result.stdout).toContain('--text'); + expect(result.stdout).toContain('--value'); }); test('call executes an operation from canonical input payload', async () => { @@ -501,7 +501,7 @@ describe('superdoc CLI', () => { '--input-json', JSON.stringify({ doc: source, - text: 'CALL_INSERT_TOKEN_1597', + value: 'CALL_INSERT_TOKEN_1597', out, }), ]); @@ -838,7 +838,7 @@ describe('superdoc CLI', () => { insertSource, '--target-json', JSON.stringify(collapsedTarget), - '--text', + '--value', 'CLI_INSERT_TOKEN_1597', '--out', insertOut, @@ -861,7 +861,7 @@ describe('superdoc CLI', () => { const insertResult = await runCli([ 'insert', insertSource, - '--text', + '--value', 'CLI_DEFAULT_INSERT_TOKEN_1597', '--out', insertOut, @@ -911,7 +911,7 @@ describe('superdoc CLI', () => { const insertResult = await runCli([ 'insert', blankFirstOut, - '--text', + '--value', 'CLI_BLANK_INSERT_TOKEN_1597', '--out', insertOut, @@ -956,7 +956,7 @@ describe('superdoc CLI', () => { target.blockId, '--offset', '0', - '--text', + '--value', 'CLI_BLOCKID_OFFSET_INSERT_1597', '--out', insertOut, @@ -989,7 +989,7 @@ describe('superdoc CLI', () => { insertSource, '--block-id', target.blockId, - '--text', + '--value', 'CLI_BLOCKID_ONLY_INSERT_1597', '--out', insertOut, @@ -1012,7 +1012,16 @@ describe('superdoc CLI', () => { const insertOut = join(TEST_DIR, 'insert-offset-no-blockid-out.docx'); await copyFile(SAMPLE_DOC, insertSource); - const result = await runCli(['insert', insertSource, '--offset', '5', '--text', 'should-fail', '--out', insertOut]); + const result = await runCli([ + 'insert', + insertSource, + '--offset', + '5', + '--value', + 'should-fail', + '--out', + insertOut, + ]); expect(result.code).toBe(1); const envelope = parseJsonOutput(result); @@ -1208,7 +1217,7 @@ describe('superdoc CLI', () => { deleteSource, '--target-json', JSON.stringify(collapsedTarget), - '--text', + '--value', 'CLI_DELETE_TOKEN_1597', '--out', insertedOut, @@ -1661,7 +1670,7 @@ describe('superdoc CLI', () => { const openResult = await runCli(['open', SAMPLE_DOC]); expect(openResult.code).toBe(0); - const insertResult = await runCli(['insert', '--text', 'STATEFUL_DEFAULT_INSERT_1597']); + const insertResult = await runCli(['insert', '--value', 'STATEFUL_DEFAULT_INSERT_1597']); expect(insertResult.code).toBe(0); const insertEnvelope = parseJsonOutput< @@ -1690,7 +1699,7 @@ describe('superdoc CLI', () => { const insertResult = await runCli([ 'insert', - '--text', + '--value', 'STATEFUL_INSERT_EXPORT_FAILURE_1597', '--out', blockedOutPath, @@ -1938,7 +1947,7 @@ describe('superdoc CLI', () => { test('session save persists a specific session and keeps it open', async () => { await runCli(['open', SAMPLE_DOC, '--session', 'alpha']); - const insertResult = await runCli(['insert', '--session', 'alpha', '--text', 'SESSION_SAVE_TOKEN_1597']); + const insertResult = await runCli(['insert', '--session', 'alpha', '--value', 'SESSION_SAVE_TOKEN_1597']); expect(insertResult.code).toBe(0); const savedOut = join(TEST_DIR, 'session-save-alpha.docx'); @@ -1997,4 +2006,101 @@ describe('superdoc CLI', () => { const findEnvelope = parseJsonOutput(findResult); expect(findEnvelope.error.code).toBe('PROJECT_CONTEXT_MISMATCH'); }); + + // -- open --content-override / --override-type validation -- + + test('open rejects --content-override without --override-type', async () => { + const result = await runCli(['open', SAMPLE_DOC, '--content-override', '# Hello']); + expect(result.code).toBe(1); + const envelope = parseJsonOutput(result); + expect(envelope.error.code).toBe('INVALID_ARGUMENT'); + expect(envelope.error.message).toContain('--override-type'); + }); + + test('open rejects --override-type without --content-override', async () => { + const result = await runCli(['open', SAMPLE_DOC, '--override-type', 'markdown']); + expect(result.code).toBe(1); + const envelope = parseJsonOutput(result); + expect(envelope.error.code).toBe('INVALID_ARGUMENT'); + expect(envelope.error.message).toContain('--content-override'); + }); + + test('open rejects invalid --override-type value', async () => { + const result = await runCli(['open', SAMPLE_DOC, '--content-override', 'x', '--override-type', 'xml']); + expect(result.code).toBe(1); + const envelope = parseJsonOutput(result); + expect(envelope.error.code).toBe('INVALID_ARGUMENT'); + expect(envelope.error.message).toContain('markdown, html, text'); + }); + + test('open with --override-type text applies content semantically', async () => { + const openResult = await runCli([ + 'open', + SAMPLE_DOC, + '--content-override', + 'Override text content', + '--override-type', + 'text', + ]); + expect(openResult.code).toBe(0); + + // Verify the override text is actually present in the document + const findResult = await runCli(['find', '--type', 'text', '--pattern', 'Override text content']); + expect(findResult.code).toBe(0); + const findEnvelope = parseJsonOutput>(findResult); + expect(findEnvelope.data.result.total).toBeGreaterThan(0); + + const closeResult = await runCli(['close', '--discard']); + expect(closeResult.code).toBe(0); + }); + + test('open with --override-type markdown applies content semantically', async () => { + const openResult = await runCli([ + 'open', + SAMPLE_DOC, + '--content-override', + '# Markdown Override Heading', + '--override-type', + 'markdown', + ]); + expect(openResult.code).toBe(0); + + // Verify the markdown content is present in the document + const findResult = await runCli(['find', '--type', 'text', '--pattern', 'Markdown Override Heading']); + expect(findResult.code).toBe(0); + const findEnvelope = parseJsonOutput>(findResult); + expect(findEnvelope.data.result.total).toBeGreaterThan(0); + + const closeResult = await runCli(['close', '--discard']); + expect(closeResult.code).toBe(0); + }); + + test('open with --override-type html rejects in headless CLI', async () => { + const openResult = await runCli([ + 'open', + SAMPLE_DOC, + '--content-override', + '

HTML Override

', + '--override-type', + 'html', + ]); + expect(openResult.code).toBe(1); + const envelope = parseJsonOutput(openResult); + expect(envelope.error.code).toBe('UNSUPPORTED_FORMAT'); + expect(envelope.error.message).toContain('HTML'); + }); + + test('open with --content-override empty string is accepted (not silently ignored)', async () => { + const openResult = await runCli(['open', SAMPLE_DOC, '--content-override', '', '--override-type', 'text']); + expect(openResult.code).toBe(0); + + // Verify original document content was replaced (find for known original text should fail) + const findOriginal = await runCli(['find', '--type', 'text', '--pattern', 'Wilde']); + expect(findOriginal.code).toBe(0); + const findEnvelope = parseJsonOutput>(findOriginal); + expect(findEnvelope.data.result.total).toBe(0); + + const closeResult = await runCli(['close', '--discard']); + expect(closeResult.code).toBe(0); + }); }); diff --git a/apps/cli/src/__tests__/conformance/harness.ts b/apps/cli/src/__tests__/conformance/harness.ts index aa3fc6f80e..de55364c9c 100644 --- a/apps/cli/src/__tests__/conformance/harness.ts +++ b/apps/cli/src/__tests__/conformance/harness.ts @@ -294,7 +294,7 @@ export class ConformanceHarness { sourceDoc, '--target-json', JSON.stringify(collapsedTarget), - '--text', + '--value', 'TRACKED_CONFORMANCE_TOKEN', '--change-mode', 'tracked', diff --git a/apps/cli/src/__tests__/conformance/scenarios.ts b/apps/cli/src/__tests__/conformance/scenarios.ts index f121023f26..ebdaf3d844 100644 --- a/apps/cli/src/__tests__/conformance/scenarios.ts +++ b/apps/cli/src/__tests__/conformance/scenarios.ts @@ -580,7 +580,7 @@ export const SUCCESS_SCENARIOS = { docPath, '--target-json', JSON.stringify(collapsed), - '--text', + '--value', 'CONFORMANCE_INSERT', '--out', harness.createOutputPath('doc-insert-output'), diff --git a/apps/cli/src/__tests__/host.test.ts b/apps/cli/src/__tests__/host.test.ts index 4a0b268753..ad2b75ede0 100644 --- a/apps/cli/src/__tests__/host.test.ts +++ b/apps/cli/src/__tests__/host.test.ts @@ -285,7 +285,7 @@ describe('CLI host mode', () => { docPath, '--target-json', JSON.stringify(collapsedTarget), - '--text', + '--value', 'HOST_CONFORMANCE_INSERT', '--out', path.join(stateDir, 'host-conformance-insert.docx'), diff --git a/apps/cli/src/__tests__/lib/validate.test.ts b/apps/cli/src/__tests__/lib/validate.test.ts index 4c902237b2..9a7aedd699 100644 --- a/apps/cli/src/__tests__/lib/validate.test.ts +++ b/apps/cli/src/__tests__/lib/validate.test.ts @@ -142,7 +142,7 @@ describe('validateCreateParagraphInput', () => { expect(result.at).toEqual({ kind: 'before', - nodeId: 'p1', + target: { kind: 'block', nodeType: 'paragraph', nodeId: 'p1' }, }); }); @@ -156,7 +156,7 @@ describe('validateCreateParagraphInput', () => { expect(result.at).toEqual({ kind: 'after', - nodeId: 'p2', + target: { kind: 'block', nodeType: 'paragraph', nodeId: 'p2' }, }); }); diff --git a/apps/cli/src/cli/cli-only-operation-definitions.ts b/apps/cli/src/cli/cli-only-operation-definitions.ts index 6eb59d1ce6..309a0660ac 100644 --- a/apps/cli/src/cli/cli-only-operation-definitions.ts +++ b/apps/cli/src/cli/cli-only-operation-definitions.ts @@ -40,7 +40,8 @@ export interface CliOnlyOperationDefinition { export const CLI_ONLY_OPERATION_DEFINITIONS: Record = { open: { category: 'lifecycle', - description: 'Open a document and create a persistent editing session.', + description: + 'Open a document and create a persistent editing session. Optionally override the document body with contentOverride + overrideType (markdown, html, or text).', requiresDocumentContext: false, intentName: 'open_document', sdkMetadata: { mutates: false, idempotency: 'non-idempotent', supportsTrackedMode: false, supportsDryRun: false }, diff --git a/apps/cli/src/cli/operation-params.ts b/apps/cli/src/cli/operation-params.ts index 38495d73f1..15f615fb99 100644 --- a/apps/cli/src/cli/operation-params.ts +++ b/apps/cli/src/cli/operation-params.ts @@ -417,6 +417,8 @@ const CLI_ONLY_METADATA: Record = { { name: 'collaboration', kind: 'jsonFlag', flag: 'collaboration-json', type: 'json' }, { name: 'collabDocumentId', kind: 'flag', flag: 'collab-document-id', type: 'string' }, { name: 'collabUrl', kind: 'flag', flag: 'collab-url', type: 'string' }, + { name: 'contentOverride', kind: 'flag', flag: 'content-override', type: 'string' }, + { name: 'overrideType', kind: 'flag', flag: 'override-type', type: 'string' }, ], constraints: null, }, diff --git a/apps/cli/src/commands/open.ts b/apps/cli/src/commands/open.ts index 31d69e889e..f3df19e3de 100644 --- a/apps/cli/src/commands/open.ts +++ b/apps/cli/src/commands/open.ts @@ -16,6 +16,13 @@ import { parseOperationArgs } from '../lib/operation-args'; import { generateSessionId } from '../lib/session'; import type { CommandContext, CommandExecution } from '../lib/types'; +const VALID_OVERRIDE_TYPES = new Set(['markdown', 'html', 'text']); + +/** Escape CommonMark special characters so the text is treated as literal. */ +function escapeMarkdown(str: string): string { + return str.replace(/([\\`*_{}[\]()#+\-.!|>~])/g, '\\$1'); +} + export async function runOpen(tokens: string[], context: CommandContext): Promise { const { parsed, help } = parseOperationArgs('doc.open', tokens, { commandName: 'open', @@ -28,12 +35,14 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis data: { usage: [ 'superdoc open [doc] [--session ]', + 'superdoc open [doc] --content-override --override-type ', 'superdoc open [doc] --collaboration-json "{...}" [--session ]', ], }, pretty: [ 'Usage:', ' superdoc open [doc] [--session ]', + ' superdoc open [doc] --content-override --override-type ', ' superdoc open [doc] --collaboration-json "{...}" [--session ]', ].join('\n'), }; @@ -45,6 +54,23 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis const collaborationPayload = await resolveJsonInput(parsed, 'collaboration'); const collabUrl = getStringOption(parsed, 'collab-url'); const collabDocumentId = getStringOption(parsed, 'collab-document-id'); + const contentOverride = getStringOption(parsed, 'content-override'); + const overrideType = getStringOption(parsed, 'override-type'); + + // Validate contentOverride / overrideType co-requirement. + // Use != null checks so that intentional empty-string overrides are honored. + if (contentOverride != null && !overrideType) { + throw new CliError('INVALID_ARGUMENT', 'open: --content-override requires --override-type.'); + } + if (overrideType && contentOverride == null) { + throw new CliError('INVALID_ARGUMENT', 'open: --override-type requires --content-override.'); + } + if (overrideType && !VALID_OVERRIDE_TYPES.has(overrideType)) { + throw new CliError( + 'INVALID_ARGUMENT', + `open: --override-type must be one of: markdown, html, text. Got "${overrideType}".`, + ); + } if (collaborationPayload != null && (collabUrl || collabDocumentId)) { throw new CliError( @@ -53,6 +79,14 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis ); } + // Content override is incompatible with collaboration mode + if (contentOverride != null && (collaborationPayload != null || collabUrl)) { + throw new CliError( + 'INVALID_ARGUMENT', + 'open: --content-override is incompatible with collaboration mode. Content override is a template-initialization operation.', + ); + } + let collaborationInput; if (collaborationPayload != null) { collaborationInput = parseCollaborationInput(collaborationPayload); @@ -69,6 +103,21 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis const collaboration = collaborationInput ? resolveCollaborationProfile(collaborationInput, sessionId) : undefined; const sessionType = collaboration ? 'collab' : 'local'; + // Build editor open options from override params + const editorOpenOptions: Record = {}; + if (contentOverride != null && overrideType) { + if (overrideType === 'markdown') { + editorOpenOptions.markdown = contentOverride; + } else if (overrideType === 'html') { + editorOpenOptions.html = contentOverride; + } else if (overrideType === 'text') { + // Route through the markdown pipeline which is DOM-free (AST-based), + // so it works in headless CLI mode. Escape markdown syntax characters + // so the content is treated as literal text, not interpreted as formatting. + editorOpenOptions.markdown = escapeMarkdown(contentOverride); + } + } + return withContextLock( context.io, 'open', @@ -104,7 +153,7 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis const opened = collaboration ? await openCollaborativeDocument(doc!, context.io, collaboration) - : await openDocument(doc, context.io); + : await openDocument(doc, context.io, { editorOpenOptions }); let adoptedToHostPool = false; try { const output = await exportToPath(opened.editor, paths.workingDocPath, true); diff --git a/apps/cli/src/lib/document.ts b/apps/cli/src/lib/document.ts index a345ef9213..e92bbcd41a 100644 --- a/apps/cli/src/lib/document.ts +++ b/apps/cli/src/lib/document.ts @@ -3,6 +3,7 @@ import { createHash } from 'node:crypto'; import { Editor } from 'superdoc/super-editor'; import { BLANK_DOCX_BASE64 } from '@superdoc/super-editor/blank-docx'; import { getDocumentApiAdapters } from '@superdoc/super-editor/document-api-adapters'; +import { markdownToPmDoc } from '@superdoc/super-editor/markdown'; import { createDocumentApi, type DocumentApi } from '@superdoc/document-api'; import type { CollaborationProfile } from './collaboration'; @@ -27,6 +28,8 @@ interface OpenDocumentOptions { documentId?: string; ydoc?: unknown; collaborationProvider?: unknown; + /** Options passed through to Editor.open() (e.g., markdown/html for content override). */ + editorOpenOptions?: Record; } export interface FileOutputMeta { @@ -98,6 +101,21 @@ export async function openDocument( meta = { source: 'blank', byteLength: source.byteLength }; } + // Separate content overrides from options passed to Editor.open(). + // The Editor's built-in markdown/html init paths (in the dist bundle) route + // through an HTML-based pipeline that requires DOM. In headless CLI mode + // there is no DOM, so we intercept them here: + // - markdown: applied post-init via the AST-based markdownToPmDoc pipeline (DOM-free) + // - html: rejected with a clear error (no DOM-free HTML pipeline exists) + const { markdown: markdownOverride, html: htmlOverride, ...passThroughEditorOpts } = options.editorOpenOptions ?? {}; + + if (htmlOverride != null) { + throw new CliError( + 'UNSUPPORTED_FORMAT', + 'HTML content override is not supported in headless CLI mode (requires DOM). Use --override-type markdown instead.', + ); + } + let editor: Editor; try { const isTest = process.env.NODE_ENV === 'test'; @@ -107,6 +125,7 @@ export async function openDocument( ...(isTest ? { telemetry: { enabled: false } } : {}), ydoc: options.ydoc, ...(options.collaborationProvider != null ? { collaborationProvider: options.collaborationProvider } : {}), + ...passThroughEditorOpts, }); } catch (error) { const message = error instanceof Error ? error.message : String(error); @@ -116,6 +135,24 @@ export async function openDocument( }); } + // Apply markdown content override post-init (DOM-free AST pipeline). + if (markdownOverride != null) { + try { + const { doc: newDoc } = markdownToPmDoc(markdownOverride, editor); + const tr = editor.state.tr; + // The PM Fragment type is opaque at the CLI boundary — cast through unknown. + tr.replaceWith(0, editor.state.doc.content.size, newDoc.content as any); + editor.dispatch(tr); + } catch (error) { + editor.destroy(); + const message = error instanceof Error ? error.message : String(error); + throw new CliError('DOCUMENT_OPEN_FAILED', 'Failed to apply content override.', { + message, + source: meta, + }); + } + } + const adapters = getDocumentApiAdapters(editor); const docApi = createDocumentApi(adapters); Object.defineProperty(editor, 'doc', { value: docApi, configurable: true, writable: true }); diff --git a/apps/cli/src/lib/errors.ts b/apps/cli/src/lib/errors.ts index 5a1f4c9f6f..8bc194a12c 100644 --- a/apps/cli/src/lib/errors.ts +++ b/apps/cli/src/lib/errors.ts @@ -25,6 +25,7 @@ export type CliErrorCode = | 'TRACK_CHANGE_COMMAND_UNAVAILABLE' | 'TRACK_CHANGE_CONFLICT' | 'COMMAND_FAILED' + | 'UNSUPPORTED_FORMAT' | 'TIMEOUT' // Plan-engine error codes — passed through from document-api adapters | 'REVISION_CHANGED_SINCE_COMPILE' diff --git a/apps/cli/src/lib/validate.ts b/apps/cli/src/lib/validate.ts index 5d678f8d3d..81cfab8811 100644 --- a/apps/cli/src/lib/validate.ts +++ b/apps/cli/src/lib/validate.ts @@ -280,15 +280,19 @@ function validateCreateParagraphLocation(value: unknown, path: string): NonNulla expectOnlyKeys(obj, ['kind', 'nodeId'], path); const nodeId = expectString(obj.nodeId, `${path}.nodeId`); + // nodeId shorthand: wrap in a BlockNodeAddress with nodeType 'paragraph' + // as a default. The adapter falls back to nodeId-only lookup when the + // full nodeType:nodeId key doesn't match, so this works for any block type. + const target = { kind: 'block' as const, nodeType: 'paragraph' as const, nodeId }; if (kind === 'before') { return { kind: 'before', - nodeId, + target, }; } return { kind: 'after', - nodeId, + target, }; } diff --git a/apps/cli/src/types/super-editor-adapters.d.ts b/apps/cli/src/types/super-editor-adapters.d.ts index 2ef5ddad00..f540a99d69 100644 --- a/apps/cli/src/types/super-editor-adapters.d.ts +++ b/apps/cli/src/types/super-editor-adapters.d.ts @@ -1,8 +1,8 @@ /** - * Ambient module declaration for the super-editor adapter bridge. + * Ambient module declarations for the super-editor bridge. * - * At runtime, bun resolves this via the tsconfig `paths` mapping. - * For typecheck (`tsc --noEmit`), this declaration provides the type + * At runtime, bun resolves these via the tsconfig `paths` mappings. + * For typecheck (`tsc --noEmit`), these declarations provide the type * surface without pulling in the super-editor source tree (which uses * internal path aliases that only its own tsconfig maps). */ @@ -17,3 +17,21 @@ declare module '@superdoc/super-editor/document-api-adapters' { */ export function getDocumentApiAdapters(editor: unknown): DocumentApiAdapters; } + +declare module '@superdoc/super-editor/markdown' { + interface MarkdownConversionResult { + /** ProseMirror doc node (typed minimally to avoid PM dependency at the CLI boundary). */ + doc: { readonly content: unknown }; + diagnostics: Array<{ nodeType: string; message: string }>; + } + + /** + * Parse Markdown to a full ProseMirror document node via the AST pipeline + * (remark-parse → mdast → PM JSON). DOM-free — works in headless environments. + */ + export function markdownToPmDoc( + markdown: string, + editor: unknown, + options?: { dryRun?: boolean }, + ): MarkdownConversionResult; +} diff --git a/apps/cli/tsconfig.json b/apps/cli/tsconfig.json index eaee51654a..270d602c31 100644 --- a/apps/cli/tsconfig.json +++ b/apps/cli/tsconfig.json @@ -10,6 +10,7 @@ "@superdoc/super-editor/document-api-adapters": [ "../../packages/super-editor/src/document-api-adapters/index.ts" ], + "@superdoc/super-editor/markdown": ["../../packages/super-editor/src/core/helpers/markdown/index.ts"], "@superdoc/super-editor/blank-docx": ["../../packages/super-editor/src/core/blank-docx.ts"] } }, diff --git a/apps/docs/document-api/common-workflows.mdx b/apps/docs/document-api/common-workflows.mdx index 4273e389ce..24e5b8cfe6 100644 --- a/apps/docs/document-api/common-workflows.mdx +++ b/apps/docs/document-api/common-workflows.mdx @@ -127,7 +127,7 @@ if (caps.operations['format.apply'].available) { } if (caps.global.trackChanges.enabled) { - editor.doc.insert({ text: 'tracked' }, { changeMode: 'tracked' }); + editor.doc.insert({ value: 'tracked' }, { changeMode: 'tracked' }); } ``` @@ -137,7 +137,7 @@ Pass `dryRun: true` to validate an operation without applying it: ```ts const preview = editor.doc.insert( - { target, text: 'hello' }, + { target, value: 'hello' }, { dryRun: true }, ); // preview.success tells you whether the insert would succeed diff --git a/apps/docs/document-api/reference/index.mdx b/apps/docs/document-api/reference/index.mdx index 2ce3a62fff..fa4205236d 100644 --- a/apps/docs/document-api/reference/index.mdx +++ b/apps/docs/document-api/reference/index.mdx @@ -46,7 +46,7 @@ The tables below are grouped by namespace. | getNodeById | editor.doc.getNodeById(...) | Retrieve a single node by its unique ID. | | getText | editor.doc.getText(...) | Extract the plain-text content of the document. | | info | editor.doc.info(...) | Return document metadata including revision, node count, and capabilities. | -| insert | editor.doc.insert(...) | Insert text or inline content at a target position. | +| insert | editor.doc.insert(...) | Insert content at a target position. Supports text (default), markdown, and html content types via the `type` field. | | replace | editor.doc.replace(...) | Replace content at a target position with new text or inline content. | | delete | editor.doc.delete(...) | Delete content at a target position. | diff --git a/apps/docs/document-api/reference/insert.mdx b/apps/docs/document-api/reference/insert.mdx index b4dbeb9ef8..44cf8d9ff5 100644 --- a/apps/docs/document-api/reference/insert.mdx +++ b/apps/docs/document-api/reference/insert.mdx @@ -23,7 +23,8 @@ description: Reference for insert | Field | Type | Required | Description | | --- | --- | --- | --- | | `target` | TextAddress | no | TextAddress | -| `text` | string | yes | | +| `type` | enum | no | `"text"`, `"markdown"`, `"html"` | +| `value` | string | yes | | ### Example request @@ -37,7 +38,8 @@ description: Reference for insert "start": 0 } }, - "text": "Hello, world." + "type": "text", + "value": "example" } ``` @@ -100,6 +102,7 @@ _No fields._ - `INVALID_TARGET` - `NO_OP` +- `CAPABILITY_UNAVAILABLE` ## Raw schemas @@ -111,12 +114,20 @@ _No fields._ "target": { "$ref": "#/$defs/TextAddress" }, - "text": { + "type": { + "enum": [ + "text", + "markdown", + "html" + ], + "type": "string" + }, + "value": { "type": "string" } }, "required": [ - "text" + "value" ], "type": "object" } @@ -139,7 +150,8 @@ _No fields._ "code": { "enum": [ "INVALID_TARGET", - "NO_OP" + "NO_OP", + "CAPABILITY_UNAVAILABLE" ] }, "details": {}, @@ -191,7 +203,8 @@ _No fields._ "code": { "enum": [ "INVALID_TARGET", - "NO_OP" + "NO_OP", + "CAPABILITY_UNAVAILABLE" ] }, "details": {}, diff --git a/apps/docs/document-engine/sdks.mdx b/apps/docs/document-engine/sdks.mdx index e985eadc87..e61c188423 100644 --- a/apps/docs/document-engine/sdks.mdx +++ b/apps/docs/document-engine/sdks.mdx @@ -128,7 +128,7 @@ The SDKs expose all operations from the [Document API](/document-api/overview) p | Operation | CLI command | Description | | --- | --- | --- | -| `doc.open` | `open` | Open a document and create a persistent editing session. | +| `doc.open` | `open` | Open a document and create a persistent editing session. Optionally override the document body with contentOverride + overrideType (markdown, html, or text). | | `doc.save` | `save` | Save the current session to the original file or a new path. | | `doc.close` | `close` | Close the active editing session and clean up resources. | @@ -148,7 +148,7 @@ The SDKs expose all operations from the [Document API](/document-api/overview) p | Operation | CLI command | Description | | --- | --- | --- | -| `doc.insert` | `insert` | Insert text or inline content at a target position. | +| `doc.insert` | `insert` | Insert content at a target position. Supports text (default), markdown, and html content types via the `type` field. | | `doc.replace` | `replace` | Replace content at a target position with new text or inline content. | | `doc.delete` | `delete` | Delete content at a target position. | | `doc.mutations.apply` | `mutations apply` | Execute a mutation plan atomically against the document. | diff --git a/packages/document-api/src/README.md b/packages/document-api/src/README.md index 6038033d93..aa79cdfcde 100644 --- a/packages/document-api/src/README.md +++ b/packages/document-api/src/README.md @@ -102,7 +102,7 @@ Insert text as a tracked change so reviewers can accept or reject it: ```ts const receipt = editor.doc.insert( - { text: 'new content' }, + { value: 'new content' }, { changeMode: 'tracked' }, ); // receipt.resolution.target contains the resolved insertion point @@ -147,7 +147,7 @@ if (caps.operations['format.apply'].available) { editor.doc.format.apply({ target, inline: { bold: true } }); } if (caps.global.trackChanges.enabled) { - editor.doc.insert({ text: 'tracked' }, { changeMode: 'tracked' }); + editor.doc.insert({ value: 'tracked' }, { changeMode: 'tracked' }); } if (caps.operations['create.heading'].dryRun) { const preview = editor.doc.create.heading( diff --git a/packages/document-api/src/contract/contract.test.ts b/packages/document-api/src/contract/contract.test.ts index 79852c1519..b96444d8c3 100644 --- a/packages/document-api/src/contract/contract.test.ts +++ b/packages/document-api/src/contract/contract.test.ts @@ -83,10 +83,10 @@ describe('document-api contract catalog', () => { additionalProperties?: boolean; }; - // Simplified schema: target (optional) + text (required), no allOf constraints + // Simplified schema: target (optional) + value (required) + type (optional enum), no allOf constraints expect(insertInputSchema.type).toBe('object'); - expect(Object.keys(insertInputSchema.properties!).sort()).toEqual(['target', 'text']); - expect(insertInputSchema.required).toEqual(['text']); + expect(Object.keys(insertInputSchema.properties!).sort()).toEqual(['target', 'type', 'value']); + expect(insertInputSchema.required).toEqual(['value']); expect(insertInputSchema.allOf).toBeUndefined(); expect(insertInputSchema.additionalProperties).toBe(false); }); diff --git a/packages/document-api/src/contract/operation-definitions.ts b/packages/document-api/src/contract/operation-definitions.ts index c3a89b1793..649e8019e2 100644 --- a/packages/document-api/src/contract/operation-definitions.ts +++ b/packages/document-api/src/contract/operation-definitions.ts @@ -201,13 +201,14 @@ export const OPERATION_DEFINITIONS = { insert: { memberPath: 'insert', - description: 'Insert text or inline content at a target position.', + description: + 'Insert content at a target position. Supports text (default), markdown, and html content types via the `type` field.', requiresDocumentContext: true, metadata: mutationOperation({ idempotency: 'non-idempotent', supportsDryRun: true, supportsTrackedMode: true, - possibleFailureCodes: ['INVALID_TARGET', 'NO_OP'], + possibleFailureCodes: ['INVALID_TARGET', 'NO_OP', 'CAPABILITY_UNAVAILABLE'], throws: [...T_NOT_FOUND_CAPABLE, 'INVALID_TARGET'], }), referenceDocPath: 'insert.mdx', diff --git a/packages/document-api/src/contract/schemas.ts b/packages/document-api/src/contract/schemas.ts index 290b2efe55..ca6a913094 100644 --- a/packages/document-api/src/contract/schemas.ts +++ b/packages/document-api/src/contract/schemas.ts @@ -894,9 +894,10 @@ const strictEmptyObjectSchema = objectSchema({}); const insertInputSchema = objectSchema( { target: textAddressSchema, - text: { type: 'string' }, + value: { type: 'string' }, + type: { type: 'string', enum: ['text', 'markdown', 'html'] }, }, - ['text'], + ['value'], ); // --------------------------------------------------------------------------- diff --git a/packages/document-api/src/index.test.ts b/packages/document-api/src/index.test.ts index 348459a138..0a8c9db33e 100644 --- a/packages/document-api/src/index.test.ts +++ b/packages/document-api/src/index.test.ts @@ -92,15 +92,17 @@ function makeCommentsAdapter(): CommentsAdapter { } function makeWriteAdapter(): WriteAdapter { + const defaultReceipt = { + success: true as const, + resolution: { + target: { kind: 'text' as const, blockId: 'p1', range: { start: 0, end: 0 } }, + range: { from: 1, to: 1 }, + text: '', + }, + }; return { - write: vi.fn(() => ({ - success: true as const, - resolution: { - target: { kind: 'text' as const, blockId: 'p1', range: { start: 0, end: 0 } }, - range: { from: 1, to: 1 }, - text: '', - }, - })), + write: vi.fn(() => defaultReceipt), + insertStructured: vi.fn(() => defaultReceipt), }; } @@ -537,19 +539,19 @@ describe('createDocumentApi', () => { }); const target = { kind: 'text', blockId: 'p1', range: { start: 0, end: 2 } } as const; - api.insert({ text: 'Hi' }); - api.insert({ target, text: 'Yo' }); + api.insert({ value: 'Hi' }); + api.insert({ target, value: 'Yo' }); api.replace({ target, text: 'Hello' }, { changeMode: 'tracked' }); api.delete({ target }); expect(writeAdpt.write).toHaveBeenNthCalledWith( 1, - { kind: 'insert', text: 'Hi' }, + { kind: 'insert', text: 'Hi' }, // write request keeps `text` (internal protocol) { changeMode: 'direct', dryRun: false }, ); expect(writeAdpt.write).toHaveBeenNthCalledWith( 2, - { kind: 'insert', target, text: 'Yo' }, + { kind: 'insert', target, text: 'Yo' }, // write request keeps `text` (internal protocol) { changeMode: 'direct', dryRun: false }, ); expect(writeAdpt.write).toHaveBeenNthCalledWith( @@ -1063,14 +1065,14 @@ describe('createDocumentApi', () => { it('accepts no-target (default insertion point)', () => { const api = makeApi(); - const result = api.insert({ text: 'hello' }); + const result = api.insert({ value: 'hello' }); expect(result.success).toBe(true); }); it('accepts canonical target', () => { const api = makeApi(); const target = { kind: 'text', blockId: 'p1', range: { start: 0, end: 0 } } as const; - const result = api.insert({ target, text: 'hello' }); + const result = api.insert({ target, value: 'hello' }); expect(result.success).toBe(true); }); @@ -1079,7 +1081,7 @@ describe('createDocumentApi', () => { it('rejects null target', () => { const api = makeApi(); expectValidationError( - () => api.insert({ target: null, text: 'hello' } as any), + () => api.insert({ target: null, value: 'hello' } as any), 'target must be a text address object', ); }); @@ -1087,16 +1089,21 @@ describe('createDocumentApi', () => { it('rejects malformed target objects', () => { const api = makeApi(); expectValidationError( - () => api.insert({ target: { kind: 'text', blockId: 'p1' }, text: 'hello' } as any), + () => api.insert({ target: { kind: 'text', blockId: 'p1' }, value: 'hello' } as any), 'target must be a text address object', ); }); // -- Type checks -- - it('rejects non-string text', () => { + it('rejects non-string value', () => { + const api = makeApi(); + expectValidationError(() => api.insert({ value: 42 } as any), 'value must be a string'); + }); + + it('rejects invalid type enum', () => { const api = makeApi(); - expectValidationError(() => api.insert({ text: 42 } as any), 'text must be a string'); + expectValidationError(() => api.insert({ value: 'hi', type: 'xml' } as any), 'type must be one of'); }); // -- Validation error shape -- @@ -1104,7 +1111,7 @@ describe('createDocumentApi', () => { it('throws DocumentApiValidationError (not plain Error)', () => { const api = makeApi(); try { - api.insert({ text: 42 } as any); + api.insert({ value: 42 } as any); expect.fail('Expected error'); } catch (err: unknown) { expect((err as Error).constructor.name).toBe('DocumentApiValidationError'); @@ -1133,27 +1140,27 @@ describe('createDocumentApi', () => { it('rejects unknown top-level fields', () => { const api = makeApi(); - expectValidationError(() => api.insert({ text: 'hi', block_id: 'abc' } as any), 'Unknown field "block_id"'); + expectValidationError(() => api.insert({ value: 'hi', block_id: 'abc' } as any), 'Unknown field "block_id"'); }); it('rejects flat blockId as unknown field', () => { const api = makeApi(); - expectValidationError(() => api.insert({ blockId: 'p1', text: 'hello' } as any), 'Unknown field "blockId"'); + expectValidationError(() => api.insert({ blockId: 'p1', value: 'hello' } as any), 'Unknown field "blockId"'); }); it('rejects flat offset as unknown field', () => { const api = makeApi(); - expectValidationError(() => api.insert({ text: 'hello', offset: 5 } as any), 'Unknown field "offset"'); + expectValidationError(() => api.insert({ value: 'hello', offset: 5 } as any), 'Unknown field "offset"'); }); it('rejects pos as unknown field', () => { const api = makeApi(); - expectValidationError(() => api.insert({ text: 'hi', pos: 3 } as any), 'Unknown field "pos"'); + expectValidationError(() => api.insert({ value: 'hi', pos: 3 } as any), 'Unknown field "pos"'); }); // -- Backward compatibility parity -- - it('sends same adapter request for insert({ text }) as before', () => { + it('maps insert({ value }) to internal write request with text field', () => { const writeAdpt = makeWriteAdapter(); const api = createDocumentApi({ find: makeFindAdapter(QUERY_RESULT), @@ -1169,14 +1176,14 @@ describe('createDocumentApi', () => { lists: makeListsAdapter(), }); - api.insert({ text: 'hello' }); + api.insert({ value: 'hello' }); expect(writeAdpt.write).toHaveBeenCalledWith( { kind: 'insert', text: 'hello' }, { changeMode: 'direct', dryRun: false }, ); }); - it('sends same adapter request for insert({ target, text }) as before', () => { + it('maps insert({ target, value }) to internal write request with text field', () => { const writeAdpt = makeWriteAdapter(); const api = createDocumentApi({ find: makeFindAdapter(QUERY_RESULT), @@ -1193,12 +1200,103 @@ describe('createDocumentApi', () => { }); const target = { kind: 'text', blockId: 'p1', range: { start: 0, end: 2 } } as const; - api.insert({ target, text: 'hello' }); + api.insert({ target, value: 'hello' }); expect(writeAdpt.write).toHaveBeenCalledWith( { kind: 'insert', target, text: 'hello' }, { changeMode: 'direct', dryRun: false }, ); }); + + // -- Structured insert routing (markdown / html) -- + + it('routes type:"markdown" insert to insertStructured instead of write', () => { + const writeAdpt = makeWriteAdapter(); + const api = createDocumentApi({ + find: makeFindAdapter(QUERY_RESULT), + getNode: makeGetNodeAdapter(PARAGRAPH_INFO), + getText: makeGetTextAdapter(), + info: makeInfoAdapter(), + capabilities: makeCapabilitiesAdapter(), + comments: makeCommentsAdapter(), + write: writeAdpt, + format: makeFormatAdapter(), + trackChanges: makeTrackChangesAdapter(), + create: makeCreateAdapter(), + lists: makeListsAdapter(), + }); + + api.insert({ value: '# Heading', type: 'markdown' }); + expect(writeAdpt.insertStructured).toHaveBeenCalledTimes(1); + expect(writeAdpt.insertStructured).toHaveBeenCalledWith({ value: '# Heading', type: 'markdown' }, undefined); + expect(writeAdpt.write).not.toHaveBeenCalled(); + }); + + it('routes type:"html" insert to insertStructured instead of write', () => { + const writeAdpt = makeWriteAdapter(); + const api = createDocumentApi({ + find: makeFindAdapter(QUERY_RESULT), + getNode: makeGetNodeAdapter(PARAGRAPH_INFO), + getText: makeGetTextAdapter(), + info: makeInfoAdapter(), + capabilities: makeCapabilitiesAdapter(), + comments: makeCommentsAdapter(), + write: writeAdpt, + format: makeFormatAdapter(), + trackChanges: makeTrackChangesAdapter(), + create: makeCreateAdapter(), + lists: makeListsAdapter(), + }); + + api.insert({ value: '

Hello

', type: 'html' }); + expect(writeAdpt.insertStructured).toHaveBeenCalledTimes(1); + expect(writeAdpt.insertStructured).toHaveBeenCalledWith({ value: '

Hello

', type: 'html' }, undefined); + expect(writeAdpt.write).not.toHaveBeenCalled(); + }); + + it('routes type:"text" (or unspecified type) insert to write, not insertStructured', () => { + const writeAdpt = makeWriteAdapter(); + const api = createDocumentApi({ + find: makeFindAdapter(QUERY_RESULT), + getNode: makeGetNodeAdapter(PARAGRAPH_INFO), + getText: makeGetTextAdapter(), + info: makeInfoAdapter(), + capabilities: makeCapabilitiesAdapter(), + comments: makeCommentsAdapter(), + write: writeAdpt, + format: makeFormatAdapter(), + trackChanges: makeTrackChangesAdapter(), + create: makeCreateAdapter(), + lists: makeListsAdapter(), + }); + + api.insert({ value: 'plain text', type: 'text' }); + expect(writeAdpt.write).toHaveBeenCalledTimes(1); + expect(writeAdpt.insertStructured).not.toHaveBeenCalled(); + }); + + it('forwards target to insertStructured for markdown insert', () => { + const writeAdpt = makeWriteAdapter(); + const api = createDocumentApi({ + find: makeFindAdapter(QUERY_RESULT), + getNode: makeGetNodeAdapter(PARAGRAPH_INFO), + getText: makeGetTextAdapter(), + info: makeInfoAdapter(), + capabilities: makeCapabilitiesAdapter(), + comments: makeCommentsAdapter(), + write: writeAdpt, + format: makeFormatAdapter(), + trackChanges: makeTrackChangesAdapter(), + create: makeCreateAdapter(), + lists: makeListsAdapter(), + }); + + const target = { kind: 'text', blockId: 'p1', range: { start: 0, end: 0 } } as const; + api.insert({ target, value: '**bold**', type: 'markdown' }); + expect(writeAdpt.insertStructured).toHaveBeenCalledWith( + { target, value: '**bold**', type: 'markdown' }, + undefined, + ); + }); }); describe('replace target validation', () => { diff --git a/packages/document-api/src/index.ts b/packages/document-api/src/index.ts index e574c43bc4..0a394af436 100644 --- a/packages/document-api/src/index.ts +++ b/packages/document-api/src/index.ts @@ -262,7 +262,7 @@ export type { } from './comments/comments.js'; export type { CommentInfo, CommentsListQuery, CommentsListResult } from './comments/comments.types.js'; export { DocumentApiValidationError } from './errors.js'; -export type { InsertInput } from './insert/insert.js'; +export type { InsertInput, InsertContentType } from './insert/insert.js'; export type { ReplaceInput } from './replace/replace.js'; export type { DeleteInput } from './delete/delete.js'; diff --git a/packages/document-api/src/insert/insert.ts b/packages/document-api/src/insert/insert.ts index bb54330371..994dc7da02 100644 --- a/packages/document-api/src/insert/insert.ts +++ b/packages/document-api/src/insert/insert.ts @@ -3,16 +3,26 @@ import type { TextAddress, TextMutationReceipt } from '../types/index.js'; import { DocumentApiValidationError } from '../errors.js'; import { isRecord, isTextAddress, assertNoUnknownFields } from '../validation-primitives.js'; +/** Content format for the insert operation payload. */ +export type InsertContentType = 'text' | 'markdown' | 'html'; + +/** Input payload for the `doc.insert` operation. */ export interface InsertInput { + /** Optional insertion target. When omitted, adapters resolve a default insertion point. */ target?: TextAddress; - text: string; + /** The content to insert. Interpreted according to {@link InsertInput.type}. */ + value: string; + /** Content format. Defaults to `'text'` when omitted. */ + type?: InsertContentType; } /** * Strict top-level allowlist for InsertInput fields. * Any key not in this list is rejected as an unknown field. */ -const INSERT_INPUT_ALLOWED_KEYS = new Set(['text', 'target']); +const INSERT_INPUT_ALLOWED_KEYS = new Set(['value', 'type', 'target']); + +const VALID_INSERT_TYPES: ReadonlySet = new Set(['text', 'markdown', 'html']); /** * Validates InsertInput and throws DocumentApiValidationError on violations. @@ -21,7 +31,8 @@ const INSERT_INPUT_ALLOWED_KEYS = new Set(['text', 'target']); * 0. Input shape guard (must be non-null plain object) * 1. Unknown field rejection (strict allowlist) * 2. Target type check (target shape) - * 3. Text type check + * 3. Value type check (must be non-empty string) + * 4. Type enum check (must be valid content type) */ function validateInsertInput(input: unknown): asserts input is InsertInput { // Step 0: Input shape guard @@ -32,7 +43,7 @@ function validateInsertInput(input: unknown): asserts input is InsertInput { // Step 1: Unknown field rejection (strict allowlist) assertNoUnknownFields(input, INSERT_INPUT_ALLOWED_KEYS, 'insert'); - const { target, text } = input; + const { target, value, type } = input; // Step 2: Target type check if (target !== undefined && !isTextAddress(target)) { @@ -42,13 +53,25 @@ function validateInsertInput(input: unknown): asserts input is InsertInput { }); } - // Step 3: Text type check - if (typeof text !== 'string') { - throw new DocumentApiValidationError('INVALID_TARGET', `text must be a string, got ${typeof text}.`, { - field: 'text', - value: text, + // Step 3: Value type check + if (typeof value !== 'string') { + throw new DocumentApiValidationError('INVALID_TARGET', `value must be a string, got ${typeof value}.`, { + field: 'value', + value, }); } + + // Step 4: Type enum check + if (type !== undefined && (typeof type !== 'string' || !VALID_INSERT_TYPES.has(type))) { + throw new DocumentApiValidationError( + 'INVALID_TARGET', + `type must be one of: text, markdown, html. Got "${type}".`, + { + field: 'type', + value: type, + }, + ); + } } export function executeInsert( @@ -58,10 +81,17 @@ export function executeInsert( ): TextMutationReceipt { validateInsertInput(input); - const { target, text } = input; + const { target, value } = input; + const contentType = input.type ?? 'text'; + + // For non-text content types, delegate to the adapter's structured insert path. + // The adapter (plan-wrappers) handles markdown/html conversion and block insertion. + if (contentType !== 'text') { + return adapter.insertStructured(input, options); + } - // Canonical target or no-target (default insertion point) - const request = target ? { kind: 'insert' as const, target, text } : { kind: 'insert' as const, text }; + // Text path: use the existing write pipeline + const request = target ? { kind: 'insert' as const, target, text: value } : { kind: 'insert' as const, text: value }; return executeWrite(adapter, request, options); } diff --git a/packages/document-api/src/invoke/invoke.test.ts b/packages/document-api/src/invoke/invoke.test.ts index b395f0be5c..08d7f4623a 100644 --- a/packages/document-api/src/invoke/invoke.test.ts +++ b/packages/document-api/src/invoke/invoke.test.ts @@ -75,6 +75,14 @@ function makeAdapters() { text: '', }, })), + insertStructured: vi.fn(() => ({ + success: true as const, + resolution: { + target: { kind: 'text' as const, blockId: 'p1', range: { start: 0, end: 0 } }, + range: { from: 1, to: 1 }, + text: '', + }, + })), }; const formatReceipt = () => ({ success: true as const, @@ -229,7 +237,7 @@ describe('invoke', () => { it('insert: invoke returns same result as direct call', () => { const { adapters } = makeAdapters(); const api = createDocumentApi(adapters); - const input = { text: 'hello' }; + const input = { value: 'hello' }; const direct = api.insert(input); const invoked = api.invoke({ operationId: 'insert', input }); expect(invoked).toEqual(direct); @@ -238,7 +246,7 @@ describe('invoke', () => { it('insert: invoke forwards options through to adapter-backed execution', () => { const { adapters, writeAdapter } = makeAdapters(); const api = createDocumentApi(adapters); - api.invoke({ operationId: 'insert', input: { text: 'hello' }, options: { changeMode: 'tracked' } }); + api.invoke({ operationId: 'insert', input: { value: 'hello' }, options: { changeMode: 'tracked' } }); expect(writeAdapter.write).toHaveBeenCalledWith( { kind: 'insert', text: 'hello' }, { changeMode: 'tracked', dryRun: false }, @@ -397,7 +405,7 @@ describe('invoke', () => { it('forwards unknown options through to the handler', () => { const { adapters, writeAdapter } = makeAdapters(); const api = createDocumentApi(adapters); - const input: unknown = { text: 'dynamic' }; + const input: unknown = { value: 'dynamic' }; const options: unknown = { changeMode: 'tracked' }; api.invoke({ operationId: 'insert', input, options }); expect(writeAdapter.write).toHaveBeenCalledWith( diff --git a/packages/document-api/src/overview-examples.test.ts b/packages/document-api/src/overview-examples.test.ts index ede440400c..d94b16994f 100644 --- a/packages/document-api/src/overview-examples.test.ts +++ b/packages/document-api/src/overview-examples.test.ts @@ -69,7 +69,10 @@ function makeInfoAdapter() { } function makeWriteAdapter() { - return { write: vi.fn(() => makeTextMutationReceipt()) }; + return { + write: vi.fn(() => makeTextMutationReceipt()), + insertStructured: vi.fn(() => makeTextMutationReceipt()), + }; } function makeFormatAdapter() { @@ -437,7 +440,7 @@ describe('overview.mdx examples', () => { it('insert text with changeMode tracked', () => { const doc = makeApi(); - const receipt = doc.insert({ text: 'new content' }, { changeMode: 'tracked' }); + const receipt = doc.insert({ value: 'new content' }, { changeMode: 'tracked' }); expect(receipt.resolution).toBeDefined(); expect(receipt.resolution.target).toBeDefined(); @@ -457,7 +460,7 @@ describe('overview.mdx examples', () => { } if (caps.global.trackChanges.enabled) { - doc.insert({ text: 'tracked' }, { changeMode: 'tracked' }); + doc.insert({ value: 'tracked' }, { changeMode: 'tracked' }); } // Both branches should execute with our fully-capable mock @@ -472,7 +475,7 @@ describe('overview.mdx examples', () => { const doc = makeApi(); const target = TEXT_TARGET; - const preview = doc.insert({ target, text: 'hello' }, { dryRun: true }); + const preview = doc.insert({ target, value: 'hello' }, { dryRun: true }); // preview.success tells you whether the insert would succeed // preview.resolution shows the resolved target range @@ -509,7 +512,7 @@ describe('src/README.md workflow examples', () => { it('insert in tracked mode and access receipt properties', () => { const doc = makeApi(); - const receipt = doc.insert({ text: 'new content' }, { changeMode: 'tracked' }); + const receipt = doc.insert({ value: 'new content' }, { changeMode: 'tracked' }); // receipt.resolution.target contains the resolved insertion point // receipt.inserted contains TrackedChangeAddress entries for the new change @@ -571,7 +574,7 @@ describe('src/README.md workflow examples', () => { doc.format.apply({ target, inline: { bold: true } }); } if (caps.global.trackChanges.enabled) { - doc.insert({ text: 'tracked' }, { changeMode: 'tracked' }); + doc.insert({ value: 'tracked' }, { changeMode: 'tracked' }); } if (caps.operations['create.heading'].dryRun) { const preview = doc.create.heading({ level: 2, text: 'Preview' }, { dryRun: true }); diff --git a/packages/document-api/src/types/receipt.ts b/packages/document-api/src/types/receipt.ts index beb44d5caa..056a055e84 100644 --- a/packages/document-api/src/types/receipt.ts +++ b/packages/document-api/src/types/receipt.ts @@ -22,6 +22,7 @@ export type ReceiptFailureCode = | 'REVISION_CHANGED_SINCE_COMPILE' | 'INVALID_INSERTION_CONTEXT' | 'DOCUMENT_IDENTITY_CONFLICT' + | 'UNSUPPORTED_ENVIRONMENT' | 'INTERNAL_ERROR'; export type ReceiptFailure = { diff --git a/packages/document-api/src/write/write.ts b/packages/document-api/src/write/write.ts index e071758604..0399a3d5fb 100644 --- a/packages/document-api/src/write/write.ts +++ b/packages/document-api/src/write/write.ts @@ -1,5 +1,6 @@ import type { TextAddress, TextMutationReceipt } from '../types/index.js'; import type { BlockRelativeLocator, BlockRelativeRange } from './locator.js'; +import type { InsertInput } from '../insert/insert.js'; export type ChangeMode = 'direct' | 'tracked'; @@ -49,6 +50,8 @@ export type WriteRequest = InsertWriteRequest | ReplaceWriteRequest | DeleteWrit export interface WriteAdapter { write(request: WriteRequest, options?: MutationOptions): TextMutationReceipt; + /** Structured insert for markdown/html content types. */ + insertStructured(input: InsertInput, options?: MutationOptions): TextMutationReceipt; } export function normalizeMutationOptions(options?: MutationOptions): MutationOptions { diff --git a/packages/super-editor/package.json b/packages/super-editor/package.json index 935ab73f81..62aa5a461c 100644 --- a/packages/super-editor/package.json +++ b/packages/super-editor/package.json @@ -82,7 +82,6 @@ "he": "catalog:", "jszip": "catalog:", "lodash": "^4.17.21", - "marked": "catalog:", "prosemirror-commands": "catalog:", "prosemirror-dropcursor": "catalog:", "prosemirror-gapcursor": "catalog:", @@ -99,6 +98,7 @@ "rehype-parse": "catalog:", "rehype-remark": "catalog:", "remark-gfm": "catalog:", + "remark-parse": "catalog:", "remark-stringify": "catalog:", "unified": "catalog:", "uuid": "catalog:", @@ -114,6 +114,7 @@ }, "devDependencies": { "@floating-ui/dom": "catalog:", + "@types/mdast": "catalog:", "@superdoc/common": "workspace:*", "@superdoc/document-api": "workspace:*", "@superdoc/contracts": "workspace:*", diff --git a/packages/super-editor/src/core/Editor.api-contracts.test.js b/packages/super-editor/src/core/Editor.api-contracts.test.js index 6917093303..2332a5a5a3 100644 --- a/packages/super-editor/src/core/Editor.api-contracts.test.js +++ b/packages/super-editor/src/core/Editor.api-contracts.test.js @@ -131,9 +131,7 @@ describe('Editor - API Contracts (Regression Prevention)', () => { return new Promise((resolve) => { setTimeout(() => { expect(onUnsupportedContent).toHaveBeenCalledTimes(1); - expect(onUnsupportedContent.mock.calls[0][0]).toEqual([ - expect.objectContaining({ tagName: 'VIDEO', count: 1 }), - ]); + expect(onUnsupportedContent.mock.calls[0][0]).toEqual([expect.objectContaining({ tagName: 'VIDEO' })]); resolve(); }, 10); }); diff --git a/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js b/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js index 6268a36a67..0c6abfdc95 100644 --- a/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js +++ b/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js @@ -1,5 +1,37 @@ -import { describe, it, expect } from 'vitest'; -import { convertMarkdownToHTML } from './importMarkdown.js'; +import { beforeAll, beforeEach, afterEach, describe, it, expect } from 'vitest'; +import { createDocFromMarkdown } from './importMarkdown.js'; +import { initTestEditor, loadTestDataForEditorTests } from '@tests/helpers/helpers.js'; + +let docData; + +beforeAll(async () => { + docData = await loadTestDataForEditorTests('blank-doc.docx'); +}); + +let editor; + +beforeEach(() => { + ({ editor } = initTestEditor({ + content: docData.docx, + media: docData.media, + mediaFiles: docData.mediaFiles, + fonts: docData.fonts, + })); +}); + +afterEach(() => { + editor?.destroy(); + editor = null; +}); + +function collectNodeTypes(doc) { + const types = []; + doc.descendants((node) => { + types.push(node.type.name); + return true; + }); + return types; +} describe('markdown to DOCX integration', () => { it('converts complete markdown document with headings and lists', () => { @@ -17,16 +49,13 @@ More text here. 1. Numbered item 2. Second item`; - const html = convertMarkdownToHTML(markdown); + const doc = createDocFromMarkdown(markdown, editor); - // Verify all elements are converted - expect(html).toContain('

Main Title

'); - expect(html).toContain('

Section 2

'); - expect(html).toContain('
    '); - expect(html).toContain('
      '); + expect(doc).toBeDefined(); + expect(doc.type.name).toBe('doc'); - // Verify spacing is added between paragraphs and lists - expect(html).toContain('

      \n

       

      \n
        '); - expect(html).toContain('

        \n

         

        \n
          '); + const types = collectNodeTypes(doc); + expect(types).toContain('paragraph'); + expect(types).toContain('run'); }); }); diff --git a/packages/super-editor/src/core/helpers/importMarkdown.js b/packages/super-editor/src/core/helpers/importMarkdown.js index 0aef461f39..9244849569 100644 --- a/packages/super-editor/src/core/helpers/importMarkdown.js +++ b/packages/super-editor/src/core/helpers/importMarkdown.js @@ -1,37 +1,45 @@ // @ts-check -import { marked } from 'marked'; -import { createDocFromHTML } from './importHtml.js'; - -// Configure marked once -marked.use({ - breaks: false, // Use proper paragraphs, not
          tags - gfm: true, // GitHub Flavored Markdown support -}); +import { markdownToPmDoc } from './markdown/markdownToPmContent.js'; /** - * Create a ProseMirror document from Markdown content + * Create a ProseMirror document from Markdown content. + * + * Delegates to the AST-based conversion pipeline (remark-parse → mdast → PM JSON). + * The old `marked` → HTML → HTML importer path is no longer used. + * * @param {string} markdown - Markdown content - * @param {Object} editor - Editor instance + * @param {import('../Editor').Editor} editor - Editor instance * @param {Object} [options={}] - Import options - * @returns {Object} Document node + * @param {boolean} [options.isImport] - Whether this is an import operation + * @param {Document | null} [options.document] - Optional DOM document (unused by AST path) + * @param {((items: import('./catchAllSchema.js').UnsupportedContentItem[]) => void) | null} [options.onUnsupportedContent] - Callback for unsupported items + * @param {boolean} [options.warnOnUnsupportedContent] - Emit console.warn for unsupported items + * @returns {import('prosemirror-model').Node} Document node */ export function createDocFromMarkdown(markdown, editor, options = {}) { - const html = convertMarkdownToHTML(markdown); - return createDocFromHTML(html, editor, options); -} + const { doc, diagnostics } = markdownToPmDoc(markdown, editor); -/** - * Convert Markdown to HTML with SuperDoc/DOCX compatibility - * @param {string} markdown - Markdown content - * @returns {string} HTML content - */ -export function convertMarkdownToHTML(markdown) { - let html = marked.parse(markdown, { async: false }); + // Surface diagnostics through the unsupported content callback if provided. + // Aggregate by tag name to match the HTML importer's deduplication behavior. + if (diagnostics.length > 0) { + /** @type {Map} */ + const byTag = new Map(); + for (const d of diagnostics) { + const existing = byTag.get(d.nodeType); + if (existing) { + existing.count += 1; + } else { + byTag.set(d.nodeType, { tagName: d.nodeType, outerHTML: d.message, count: 1 }); + } + } + const items = [...byTag.values()]; + + if (options.onUnsupportedContent) { + options.onUnsupportedContent(items); + } else if (options.warnOnUnsupportedContent) { + console.warn('[super-editor] Unsupported Markdown content during import:', items); + } + } - // Add spacing between paragraphs and lists for proper DOCX rendering - return html - .replace(/<\/p>\n
            /g, '

            \n

             

            \n
              ') - .replace(/<\/p>\n
                /g, '

                \n

                 

                \n
                  ') - .replace(/<\/ul>\n\n

                   

                  \n\n\n

                   

                  \n ({ - createDocFromHTML: vi.fn(), -})); +let docData; -describe('markdown import', () => { - it('converts markdown to HTML with proper spacing', () => { - const markdown = `# Heading +beforeAll(async () => { + docData = await loadTestDataForEditorTests('blank-doc.docx'); +}); -Paragraph text +let editor; -- List item`; +beforeEach(() => { + ({ editor } = initTestEditor({ + content: docData.docx, + media: docData.media, + mediaFiles: docData.mediaFiles, + fonts: docData.fonts, + })); +}); - const html = convertMarkdownToHTML(markdown); +afterEach(() => { + editor?.destroy(); + editor = null; +}); - expect(html).toContain('

                  Heading

                  '); - expect(html).toContain('

                  Paragraph text

                  '); - expect(html).toContain('

                  \n

                   

                  \n
                    '); // Spacing before list +describe('markdown import', () => { + it('creates a ProseMirror doc from markdown headings', () => { + const doc = createDocFromMarkdown('# Hello', editor); + expect(doc).toBeDefined(); + expect(doc.type.name).toBe('doc'); + expect(doc.childCount).toBeGreaterThan(0); }); - it('creates ProseMirror doc from markdown', () => { - const mockSchema = { nodes: {} }; - const mockDoc = { type: 'doc' }; - const mockOptions = { isImport: true }; - createDocFromHTML.mockReturnValue(mockDoc); - - const result = createDocFromMarkdown('# Test', mockSchema, mockOptions); + it('surfaces unsupported content through the callback', () => { + const onUnsupportedContent = vi.fn(); + createDocFromMarkdown('', editor, { + onUnsupportedContent, + }); - expect(createDocFromHTML).toHaveBeenCalledWith( - '

                    Test

                    \n', // Exact string that marked.parse returns - mockSchema, - { isImport: true }, + expect(onUnsupportedContent).toHaveBeenCalled(); + expect(onUnsupportedContent.mock.calls[0][0]).toEqual( + expect.arrayContaining([expect.objectContaining({ tagName: 'VIDEO' })]), ); - expect(result).toBe(mockDoc); }); }); diff --git a/packages/super-editor/src/core/helpers/markdown/index.ts b/packages/super-editor/src/core/helpers/markdown/index.ts new file mode 100644 index 0000000000..922c58f581 --- /dev/null +++ b/packages/super-editor/src/core/helpers/markdown/index.ts @@ -0,0 +1,17 @@ +/** + * Markdown → ProseMirror conversion module. + * + * Public API: + * - `markdownToPmDoc` — full document conversion (for body replacement) + * - `markdownToPmFragment` — fragment conversion (for insertion) + * - `parseMarkdownToAst` — raw mdast parsing (for advanced use) + */ + +export { markdownToPmDoc, markdownToPmFragment } from './markdownToPmContent.js'; +export { parseMarkdownToAst } from './parseMarkdownAst.js'; +export type { + MarkdownConversionOptions, + MarkdownConversionResult, + MarkdownFragmentResult, + MarkdownDiagnostic, +} from './types.js'; diff --git a/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts b/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts new file mode 100644 index 0000000000..13939051e4 --- /dev/null +++ b/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts @@ -0,0 +1,102 @@ +/** + * High-level entry points for Markdown → ProseMirror conversion. + * + * Exports two functions: + * - `markdownToPmDoc` — full document (for doc.open body replacement) + * - `markdownToPmFragment` — fragment (for doc.insert structured insertion) + * + * Both parse Markdown to mdast, convert to PM JSON, and materialize + * via the editor's schema. The conversion is synchronous and does not + * perform network I/O (image URLs are stored as-is). + */ + +import { Fragment } from 'prosemirror-model'; +import type { Node as PmNode } from 'prosemirror-model'; +import type { Editor } from '../../Editor.js'; +import { parseMarkdownToAst } from './parseMarkdownAst.js'; +import { convertMdastToBlocks } from './mdastToProseMirror.js'; +import { wrapTextsInRuns } from '../../inputRules/docx-paste/docx-paste.js'; +import type { + MarkdownConversionOptions, + MarkdownConversionResult, + MarkdownFragmentResult, + MdastConversionContext, +} from './types.js'; + +// --------------------------------------------------------------------------- +// Full document conversion (for body replacement in doc.open) +// --------------------------------------------------------------------------- + +/** + * Parse Markdown and produce a full ProseMirror document node. + * + * The result replaces the entire document body. Template-level OOXML context + * (styles.xml, settings, numbering infrastructure) is preserved by the caller. + */ +export function markdownToPmDoc( + markdown: string, + editor: Editor, + options: MarkdownConversionOptions = {}, +): MarkdownConversionResult { + const { blocks, diagnostics } = parseAndConvert(markdown, editor, options); + + const docJson = { + type: 'doc', + content: blocks.length > 0 ? blocks : [{ type: 'paragraph' }], + }; + + let doc: PmNode = editor.schema.nodeFromJSON(docJson); + doc = wrapTextsInRuns(doc) as PmNode; + + return { doc, diagnostics }; +} + +// --------------------------------------------------------------------------- +// Fragment conversion (for structured insertion in doc.insert) +// --------------------------------------------------------------------------- + +/** + * Parse Markdown and produce a ProseMirror Fragment for insertion at a position. + * + * The fragment can contain multiple block nodes (paragraphs, tables, lists, etc.) + * and is suitable for `tr.replaceWith(from, to, fragment)`. + */ +export function markdownToPmFragment( + markdown: string, + editor: Editor, + options: MarkdownConversionOptions = {}, +): MarkdownFragmentResult { + const { blocks, diagnostics } = parseAndConvert(markdown, editor, options); + + if (blocks.length === 0) { + return { fragment: Fragment.empty, diagnostics }; + } + + const nodes = blocks.map((json) => editor.schema.nodeFromJSON(json)); + const wrappedNodes = nodes.map((node) => wrapTextsInRuns(node) as PmNode); + const fragment = Fragment.from(wrappedNodes); + + return { fragment, diagnostics }; +} + +// --------------------------------------------------------------------------- +// Shared parse + convert pipeline +// --------------------------------------------------------------------------- + +function parseAndConvert( + markdown: string, + editor: Editor, + options: MarkdownConversionOptions, +): { blocks: ReturnType; diagnostics: MdastConversionContext['diagnostics'] } { + const ast = parseMarkdownToAst(markdown); + + const ctx: MdastConversionContext = { + editor, + schema: editor.schema, + diagnostics: [], + options, + }; + + const blocks = convertMdastToBlocks(ast, ctx); + return { blocks, diagnostics: ctx.diagnostics }; +} diff --git a/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts b/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts new file mode 100644 index 0000000000..408d18f0cd --- /dev/null +++ b/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts @@ -0,0 +1,525 @@ +/** + * Convert an mdast AST tree into ProseMirror JSON nodes. + * + * This module walks the mdast tree produced by remark-parse and produces + * ProseMirror-compatible JSON that conforms to the SuperEditor schema. + * + * Key schema facts (SuperEditor/OOXML): + * - Headings are `paragraph` nodes with `paragraphProperties.styleId: 'HeadingN'`. + * - Lists are `paragraph` nodes with `paragraphProperties.numberingProperties`. + * - The `run` node wraps text with run-level properties (bold, italic, etc.). + * - Tables use `table` > `tableRow` > `tableCell` with block content inside cells. + * - There is no dedicated blockquote or horizontal-rule node. + */ + +import type { + Node as MdastNode, + Root, + PhrasingContent, + Paragraph as MdastParagraph, + Heading as MdastHeading, + Blockquote as MdastBlockquote, + Code as MdastCode, + Table as MdastTable, + Image as MdastImage, + Html as MdastHtml, + Text as MdastText, + Strong as MdastStrong, + Emphasis as MdastEmphasis, + Delete as MdastDelete, + Link as MdastLink, + InlineCode as MdastInlineCode, + List as MdastList, + ListItem as MdastListItem, +} from 'mdast'; +import { ListHelpers } from '../list-numbering-helpers.js'; +import type { MdastConversionContext, MarkdownDiagnostic } from './types.js'; + +// --------------------------------------------------------------------------- +// Public entry point +// --------------------------------------------------------------------------- + +/** + * Convert an mdast root node into an array of ProseMirror JSON block nodes + * suitable for constructing a full doc or a fragment. + */ +export function convertMdastToBlocks(root: Root, ctx: MdastConversionContext): JsonNode[] { + return flatMapChildren(root, ctx); +} + +// --------------------------------------------------------------------------- +// JSON node shape (matches ProseMirror nodeFromJSON input) +// --------------------------------------------------------------------------- + +interface JsonNode { + type: string; + attrs?: Record; + content?: JsonNode[]; + marks?: JsonMark[]; + text?: string; +} + +interface JsonMark { + type: string; + attrs?: Record; +} + +// --------------------------------------------------------------------------- +// Block-level converters +// --------------------------------------------------------------------------- + +function flatMapChildren(parent: MdastNode & { children?: MdastNode[] }, ctx: MdastConversionContext): JsonNode[] { + if (!parent.children) return []; + const blocks: JsonNode[] = []; + for (const child of parent.children) { + blocks.push(...convertBlockNode(child, ctx)); + } + return blocks; +} + +function convertBlockNode(node: MdastNode, ctx: MdastConversionContext): JsonNode[] { + switch (node.type) { + case 'paragraph': + return [convertParagraph(node as MdastParagraph, ctx)]; + + case 'heading': + return [convertHeading(node as MdastHeading, ctx)]; + + case 'list': + return convertList(node as MdastList, ctx, 0); + + case 'blockquote': + return convertBlockquote(node as MdastBlockquote, ctx); + + case 'code': + return [convertCodeBlock(node as MdastCode, ctx)]; + + case 'thematicBreak': + return [convertThematicBreak(ctx)]; + + case 'table': + return [convertTable(node as MdastTable, ctx)]; + + case 'image': + return [convertImageBlock(node as MdastImage, ctx)]; + + case 'html': + return convertRawHtml(node as MdastHtml, ctx); + + default: + addDiagnostic(ctx, 'warning', node.type, `Unsupported mdast block node "${node.type}" — skipped.`, node); + return []; + } +} + +// --------------------------------------------------------------------------- +// Paragraph +// --------------------------------------------------------------------------- + +function convertParagraph(node: MdastParagraph, ctx: MdastConversionContext): JsonNode { + return makeParagraph(convertInlineChildren(node.children, ctx, [])); +} + +// --------------------------------------------------------------------------- +// Heading (paragraph + styleId) +// --------------------------------------------------------------------------- + +const HEADING_STYLE_MAP: Record = { + 1: 'Heading1', + 2: 'Heading2', + 3: 'Heading3', + 4: 'Heading4', + 5: 'Heading5', + 6: 'Heading6', +}; + +function convertHeading(node: MdastHeading, ctx: MdastConversionContext): JsonNode { + const styleId = HEADING_STYLE_MAP[node.depth] ?? 'Heading1'; + const runs = convertInlineChildren(node.children, ctx, []); + return makeParagraph(runs, { styleId }); +} + +// --------------------------------------------------------------------------- +// List (ordered / bullet → paragraphs with numberingProperties) +// --------------------------------------------------------------------------- + +function convertList(node: MdastList, ctx: MdastConversionContext, depth: number): JsonNode[] { + const listType = node.ordered ? 'orderedList' : 'bulletList'; + + let numId: number | undefined; + if (!ctx.options.dryRun) { + numId = ListHelpers.getNewListId(ctx.editor); + ListHelpers.generateNewListDefinition({ numId, listType, editor: ctx.editor }); + } else { + // Dry-run: use a placeholder numId (never persisted) + numId = 0; + } + + const blocks: JsonNode[] = []; + for (let i = 0; i < node.children.length; i++) { + const listItem = node.children[i]; + blocks.push(...convertListItem(listItem, ctx, numId, depth, listType)); + } + return blocks; +} + +function convertListItem( + item: MdastListItem, + ctx: MdastConversionContext, + numId: number, + depth: number, + listType: string, +): JsonNode[] { + const blocks: JsonNode[] = []; + + for (const child of item.children) { + if (child.type === 'paragraph') { + const runs = convertInlineChildren((child as MdastParagraph).children, ctx, []); + blocks.push(makeListParagraph(runs, numId, depth)); + } else if (child.type === 'list') { + // Nested list — increase depth, reuse same listType context + blocks.push(...convertList(child as MdastList, ctx, depth + 1)); + } else { + // Other block content inside a list item (e.g., blockquote, code) + blocks.push(...convertBlockNode(child, ctx)); + } + } + + // If the list item had no paragraph children (edge case), emit an empty list paragraph + if (blocks.length === 0) { + blocks.push(makeListParagraph([], numId, depth)); + } + + return blocks; +} + +// --------------------------------------------------------------------------- +// Blockquote (paragraph + Quote style) +// --------------------------------------------------------------------------- + +function convertBlockquote(node: MdastBlockquote, ctx: MdastConversionContext): JsonNode[] { + const blocks: JsonNode[] = []; + for (const child of node.children) { + if (child.type === 'paragraph') { + const runs = convertInlineChildren((child as MdastParagraph).children, ctx, []); + blocks.push(makeParagraph(runs, { styleId: 'Quote' })); + } else { + // Nested block inside blockquote — convert normally but could lose quote context + blocks.push(...convertBlockNode(child, ctx)); + } + } + return blocks; +} + +// --------------------------------------------------------------------------- +// Code block (paragraph with monospace run properties) +// --------------------------------------------------------------------------- + +function convertCodeBlock(node: MdastCode, ctx: MdastConversionContext): JsonNode { + const lines = node.value.split('\n'); + const content: JsonNode[] = []; + for (let i = 0; i < lines.length; i++) { + if (i > 0) { + content.push({ type: 'lineBreak' }); + } + if (lines[i].length > 0) { + content.push(makeRun(lines[i], [], { rFonts: { ascii: 'Courier New', hAnsi: 'Courier New' } })); + } + } + return makeParagraph(content); +} + +// --------------------------------------------------------------------------- +// Thematic break (horizontal rule → empty paragraph with border-bottom) +// --------------------------------------------------------------------------- + +function convertThematicBreak(ctx: MdastConversionContext): JsonNode { + // Use the contentBlock node (SuperEditor's inline horizontal rule element) + // if the schema supports it, otherwise fall back to a styled paragraph. + const hasContentBlock = ctx.schema.nodes.contentBlock != null; + if (hasContentBlock) { + return makeParagraph([ + { + type: 'contentBlock', + attrs: { + horizontalRule: true, + size: { width: '100%', height: 2 }, + background: '#e5e7eb', + }, + }, + ]); + } + + return makeParagraph([], { + pBdr: { + bottom: { val: 'single', sz: '6', space: '1', color: 'auto' }, + }, + }); +} + +// --------------------------------------------------------------------------- +// Table +// --------------------------------------------------------------------------- + +function convertTable(node: MdastTable, ctx: MdastConversionContext): JsonNode { + const rows: JsonNode[] = []; + + for (let rowIndex = 0; rowIndex < node.children.length; rowIndex++) { + const mdastRow = node.children[rowIndex]; + const isHeaderRow = rowIndex === 0; + const cells: JsonNode[] = []; + + for (const mdastCell of mdastRow.children) { + const cellContent = convertInlineChildren(mdastCell.children, ctx, []); + const cellParagraph = makeParagraph(cellContent); + const cellType = isHeaderRow ? 'tableHeader' : 'tableCell'; + cells.push({ + type: cellType, + attrs: { + colspan: 1, + rowspan: 1, + colwidth: null, + }, + content: [cellParagraph], + }); + } + + rows.push({ + type: 'tableRow', + content: cells, + }); + } + + return { + type: 'table', + content: rows, + }; +} + +// --------------------------------------------------------------------------- +// Image (block-level — wraps in paragraph if at top level) +// --------------------------------------------------------------------------- + +function convertImageBlock(node: MdastImage, ctx: MdastConversionContext): JsonNode { + if (!node.url) { + addDiagnostic(ctx, 'warning', 'image', 'Image with empty URL — skipped.', node); + return makeParagraph([]); + } + + const imageNode: JsonNode = { + type: 'image', + attrs: { + src: node.url, + alt: node.alt ?? null, + title: node.title ?? null, + }, + }; + + // Image must be wrapped in a paragraph for the OOXML content model + return { + type: 'paragraph', + content: [imageNode], + }; +} + +// --------------------------------------------------------------------------- +// Raw HTML fallback +// --------------------------------------------------------------------------- + +function extractHtmlTagName(html: string): string { + const match = html.match(/^<\/?([a-zA-Z][a-zA-Z0-9]*)/); + return match ? match[1].toUpperCase() : 'HTML'; +} + +function convertRawHtml(node: MdastHtml, ctx: MdastConversionContext): JsonNode[] { + const tagName = extractHtmlTagName(node.value); + addDiagnostic( + ctx, + 'warning', + tagName, + `Raw HTML <${tagName.toLowerCase()}> in markdown — converted to plain text.`, + node, + ); + // Fall back to a plain text paragraph + if (node.value.trim().length === 0) return []; + return [makeParagraph([makeRun(node.value, [])])]; +} + +// --------------------------------------------------------------------------- +// Inline-level converters +// --------------------------------------------------------------------------- + +/** + * Convert an array of mdast phrasing (inline) content into PM JSON run nodes. + * `parentMarks` accumulates marks as we recurse into emphasis/strong/etc. + */ +function convertInlineChildren( + children: PhrasingContent[], + ctx: MdastConversionContext, + parentMarks: JsonMark[], +): JsonNode[] { + const nodes: JsonNode[] = []; + for (const child of children) { + nodes.push(...convertInlineNode(child, ctx, parentMarks)); + } + return nodes; +} + +function convertInlineNode(node: PhrasingContent, ctx: MdastConversionContext, parentMarks: JsonMark[]): JsonNode[] { + switch (node.type) { + case 'text': + return [makeRun((node as MdastText).value, parentMarks)]; + + case 'strong': + return convertInlineChildren((node as MdastStrong).children, ctx, [...parentMarks, { type: 'bold' }]); + + case 'emphasis': + return convertInlineChildren((node as MdastEmphasis).children, ctx, [...parentMarks, { type: 'italic' }]); + + case 'delete': + return convertInlineChildren((node as MdastDelete).children, ctx, [...parentMarks, { type: 'strike' }]); + + case 'link': + return convertLink(node as MdastLink, ctx, parentMarks); + + case 'inlineCode': + return [ + makeRun((node as MdastInlineCode).value, [ + ...parentMarks, + { type: 'textStyle', attrs: { fontFamily: 'Courier New' } }, + ]), + ]; + + case 'break': + return [{ type: 'lineBreak' }]; + + case 'image': + return convertInlineImage(node as MdastImage, ctx); + + default: { + const diagNodeType = node.type === 'html' ? extractHtmlTagName((node as MdastHtml).value ?? '') : node.type; + addDiagnostic( + ctx, + 'warning', + diagNodeType, + `Unsupported mdast inline node "${node.type}" — converted to text.`, + node, + ); + // Attempt to extract text content as fallback + if ('value' in node && typeof (node as unknown as { value: unknown }).value === 'string') { + return [makeRun((node as unknown as { value: string }).value, parentMarks)]; + } + if ('children' in node && Array.isArray((node as unknown as { children: unknown }).children)) { + return convertInlineChildren((node as unknown as { children: PhrasingContent[] }).children, ctx, parentMarks); + } + return []; + } + } +} + +// --------------------------------------------------------------------------- +// Link +// --------------------------------------------------------------------------- + +function convertLink(node: MdastLink, ctx: MdastConversionContext, parentMarks: JsonMark[]): JsonNode[] { + const linkMark: JsonMark = { + type: 'link', + attrs: { + href: node.url, + target: '_blank', + rel: 'noopener noreferrer nofollow', + ...(node.title ? { tooltip: node.title } : {}), + }, + }; + return convertInlineChildren(node.children, ctx, [...parentMarks, linkMark]); +} + +// --------------------------------------------------------------------------- +// Inline image +// --------------------------------------------------------------------------- + +function convertInlineImage(node: MdastImage, ctx: MdastConversionContext): JsonNode[] { + if (!node.url) { + addDiagnostic(ctx, 'warning', 'image', 'Inline image with empty URL — skipped.', node); + return []; + } + return [ + { + type: 'image', + attrs: { + src: node.url, + alt: node.alt ?? null, + title: node.title ?? null, + }, + }, + ]; +} + +// --------------------------------------------------------------------------- +// JSON node builders +// --------------------------------------------------------------------------- + +function makeParagraph(content: JsonNode[], extraParagraphProps?: Record): JsonNode { + const paragraphProperties = extraParagraphProps ? { ...extraParagraphProps } : undefined; + const attrs: Record = {}; + if (paragraphProperties) { + attrs.paragraphProperties = paragraphProperties; + } + return { + type: 'paragraph', + ...(Object.keys(attrs).length > 0 ? { attrs } : {}), + content: content.length > 0 ? content : undefined, + }; +} + +function makeListParagraph(content: JsonNode[], numId: number, ilvl: number): JsonNode { + const numberingProperties = { numId, ilvl }; + return { + type: 'paragraph', + attrs: { + paragraphProperties: { numberingProperties }, + numberingProperties, + }, + content: content.length > 0 ? content : undefined, + }; +} + +/** + * Create a `run` JSON node wrapping a text node with optional marks/run properties. + */ +function makeRun(text: string, marks: JsonMark[], extraRunProperties?: Record): JsonNode { + const textNode: JsonNode = { + type: 'text', + text, + ...(marks.length > 0 ? { marks } : {}), + }; + const runNode: JsonNode = { + type: 'run', + content: [textNode], + }; + if (extraRunProperties) { + runNode.attrs = { runProperties: extraRunProperties }; + } + return runNode; +} + +// --------------------------------------------------------------------------- +// Diagnostics helper +// --------------------------------------------------------------------------- + +function addDiagnostic( + ctx: MdastConversionContext, + severity: MarkdownDiagnostic['severity'], + nodeType: string, + message: string, + node?: MdastNode, +): void { + const diagnostic: MarkdownDiagnostic = { severity, nodeType, message }; + if (node?.position?.start) { + diagnostic.position = { + line: node.position.start.line, + column: node.position.start.column, + }; + } + ctx.diagnostics.push(diagnostic); +} diff --git a/packages/super-editor/src/core/helpers/markdown/parseMarkdownAst.ts b/packages/super-editor/src/core/helpers/markdown/parseMarkdownAst.ts new file mode 100644 index 0000000000..00fd19908f --- /dev/null +++ b/packages/super-editor/src/core/helpers/markdown/parseMarkdownAst.ts @@ -0,0 +1,22 @@ +/** + * Markdown source → mdast AST parsing. + * + * Uses unified + remark-parse + remark-gfm to produce a GFM-aware mdast tree. + * This is the only place in the codebase that touches remark-parse. + */ + +import { unified } from 'unified'; +import remarkParse from 'remark-parse'; +import remarkGfm from 'remark-gfm'; +import type { Root } from 'mdast'; + +/** + * Parse a Markdown string into an mdast AST tree. + * + * Supports GitHub Flavored Markdown (tables, strikethrough, autolinks, task lists). + * This operation is synchronous and side-effect-free. + */ +export function parseMarkdownToAst(markdown: string): Root { + const processor = unified().use(remarkParse).use(remarkGfm); + return processor.parse(markdown) as Root; +} diff --git a/packages/super-editor/src/core/helpers/markdown/types.ts b/packages/super-editor/src/core/helpers/markdown/types.ts new file mode 100644 index 0000000000..890a1e58fb --- /dev/null +++ b/packages/super-editor/src/core/helpers/markdown/types.ts @@ -0,0 +1,64 @@ +/** + * Types for the Markdown → ProseMirror AST conversion pipeline. + */ + +import type { Node as PmNode, Fragment, Schema } from 'prosemirror-model'; +import type { Editor } from '../../Editor.js'; + +// --------------------------------------------------------------------------- +// Conversion options +// --------------------------------------------------------------------------- + +export interface MarkdownConversionOptions { + /** When true, skip side-effects like numbering allocation (for dry-run validation). */ + dryRun?: boolean; +} + +// --------------------------------------------------------------------------- +// Conversion results +// --------------------------------------------------------------------------- + +export interface MarkdownConversionResult { + /** The converted ProseMirror document node. */ + doc: PmNode; + /** Diagnostics for unsupported or problematic mdast nodes. */ + diagnostics: MarkdownDiagnostic[]; +} + +export interface MarkdownFragmentResult { + /** The converted ProseMirror fragment (for insertion, not full doc). */ + fragment: Fragment; + /** Diagnostics for unsupported or problematic mdast nodes. */ + diagnostics: MarkdownDiagnostic[]; +} + +// --------------------------------------------------------------------------- +// Diagnostics +// --------------------------------------------------------------------------- + +export type DiagnosticSeverity = 'warning' | 'error'; + +export interface MarkdownDiagnostic { + severity: DiagnosticSeverity; + /** The mdast node type that triggered the diagnostic. */ + nodeType: string; + /** Human-readable explanation. */ + message: string; + /** Line/column in the source markdown (if available from mdast position). */ + position?: { line: number; column: number }; +} + +// --------------------------------------------------------------------------- +// mdast-to-PM mapper context +// --------------------------------------------------------------------------- + +/** + * Shared context threaded through the mdast → ProseMirror mapping walk. + * Carries the editor, schema, accumulated diagnostics, and conversion options. + */ +export interface MdastConversionContext { + editor: Editor; + schema: Schema; + diagnostics: MarkdownDiagnostic[]; + options: MarkdownConversionOptions; +} diff --git a/packages/super-editor/src/document-api-adapters/assemble-adapters.ts b/packages/super-editor/src/document-api-adapters/assemble-adapters.ts index 9c33a9fcc1..68d92a23fd 100644 --- a/packages/super-editor/src/document-api-adapters/assemble-adapters.ts +++ b/packages/super-editor/src/document-api-adapters/assemble-adapters.ts @@ -6,7 +6,7 @@ import { getTextAdapter } from './get-text-adapter.js'; import { infoAdapter } from './info-adapter.js'; import { getDocumentApiCapabilities } from './capabilities-adapter.js'; import { createCommentsWrapper } from './plan-engine/comments-wrappers.js'; -import { writeWrapper, styleApplyWrapper } from './plan-engine/plan-wrappers.js'; +import { writeWrapper, insertStructuredWrapper, styleApplyWrapper } from './plan-engine/plan-wrappers.js'; import { stylesApplyAdapter } from './styles-adapter.js'; import { formatFontSizeWrapper, @@ -111,6 +111,7 @@ export function assembleDocumentApiAdapters(editor: Editor): DocumentApiAdapters comments: createCommentsWrapper(editor), write: { write: (request, options) => writeWrapper(editor, request, options), + insertStructured: (input, options) => insertStructuredWrapper(editor, input, options), }, format: { apply: (input, options) => styleApplyWrapper(editor, input, options), diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/index.ts b/packages/super-editor/src/document-api-adapters/plan-engine/index.ts index f72d8a9f06..e366fc7613 100644 --- a/packages/super-editor/src/document-api-adapters/plan-engine/index.ts +++ b/packages/super-editor/src/document-api-adapters/plan-engine/index.ts @@ -12,4 +12,4 @@ export { planError, PlanError } from './errors.js'; export { captureRunsInRange, resolveInlineStyle } from './style-resolver.js'; export type { CapturedRun, CapturedStyle } from './style-resolver.js'; export type { CompiledTarget, StepExecutor, CompileContext, ExecuteContext } from './executor-registry.types.js'; -export { writeWrapper, styleApplyWrapper } from './plan-wrappers.js'; +export { writeWrapper, insertStructuredWrapper, styleApplyWrapper } from './plan-wrappers.js'; diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts new file mode 100644 index 0000000000..27aefb767e --- /dev/null +++ b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts @@ -0,0 +1,304 @@ +import { beforeAll, beforeEach, afterEach, describe, it, expect, vi } from 'vitest'; +import { initTestEditor, loadTestDataForEditorTests } from '@tests/helpers/helpers.js'; +import type { Editor } from '../../core/Editor.js'; +import { insertStructuredWrapper } from './plan-wrappers.js'; +import { registerBuiltInExecutors } from './register-executors.js'; +import { clearExecutorRegistry } from './executor-registry.js'; +import { resolveTextTarget } from '../helpers/adapter-utils.js'; + +let docData: Awaited>; + +beforeAll(async () => { + docData = await loadTestDataForEditorTests('blank-doc.docx'); + clearExecutorRegistry(); + registerBuiltInExecutors(); +}); + +let editor: Editor; + +beforeEach(() => { + ({ editor } = initTestEditor({ + content: docData.docx, + media: docData.media, + mediaFiles: docData.mediaFiles, + fonts: docData.fonts, + })); +}); + +afterEach(() => { + editor?.destroy(); + // @ts-expect-error cleanup + editor = null; +}); + +function getDocTextContent(ed: Editor): string { + return ed.state.doc.textContent; +} + +/** Requires prior seeded content — a blank doc has no text offsets to span. */ +function findResolvableNonCollapsedTarget(ed: Editor): { blockId: string; range: { start: number; end: number } } { + const candidateIds = new Set(); + const identityKeys = ['sdBlockId', 'blockId', 'paraId', 'id', 'uuid'] as const; + + ed.state.doc.descendants((node) => { + const attrs = node.attrs as Record | undefined; + if (!attrs) return true; + + for (const key of identityKeys) { + const value = attrs[key]; + if (typeof value === 'string' && value.length > 0) candidateIds.add(value); + } + return true; + }); + + for (const blockId of candidateIds) { + const target = { + kind: 'text' as const, + blockId, + range: { start: 0, end: 1 }, + }; + const resolved = resolveTextTarget(ed, target); + if (resolved && resolved.from !== resolved.to) { + return { blockId, range: { start: 0, end: 1 } }; + } + } + + throw new Error('Expected at least one resolvable non-collapsed text target.'); +} + +describe('insertStructuredWrapper — markdown', () => { + it('inserts markdown paragraph content into the document', () => { + const result = insertStructuredWrapper(editor, { + value: 'Hello from markdown', + type: 'markdown', + }); + + expect(result.success).toBe(true); + expect(getDocTextContent(editor)).toContain('Hello from markdown'); + }); + + it('inserts markdown heading as a styled paragraph', () => { + const result = insertStructuredWrapper(editor, { + value: '# My Heading', + type: 'markdown', + }); + + expect(result.success).toBe(true); + expect(getDocTextContent(editor)).toContain('My Heading'); + + // Verify heading is represented as a paragraph with Heading1 style + let foundHeading = false; + editor.state.doc.descendants((node) => { + if (node.type.name === 'paragraph' && node.attrs?.paragraphProperties?.styleId === 'Heading1') { + foundHeading = true; + } + return true; + }); + expect(foundHeading).toBe(true); + }); + + it('inserts markdown with multiple blocks', () => { + const result = insertStructuredWrapper(editor, { + value: '# Title\n\nFirst paragraph.\n\nSecond paragraph.', + type: 'markdown', + }); + + expect(result.success).toBe(true); + expect(getDocTextContent(editor)).toContain('Title'); + expect(getDocTextContent(editor)).toContain('First paragraph.'); + expect(getDocTextContent(editor)).toContain('Second paragraph.'); + }); + + it('inserts markdown list content', () => { + const result = insertStructuredWrapper(editor, { + value: '- Item one\n- Item two\n- Item three', + type: 'markdown', + }); + + expect(result.success).toBe(true); + expect(getDocTextContent(editor)).toContain('Item one'); + expect(getDocTextContent(editor)).toContain('Item two'); + expect(getDocTextContent(editor)).toContain('Item three'); + }); + + it('returns NO_OP for empty markdown', () => { + const result = insertStructuredWrapper(editor, { + value: '', + type: 'markdown', + }); + + expect(result.success).toBe(false); + expect(result.failure?.code).toBe('NO_OP'); + }); + + it('returns INVALID_TARGET for non-collapsed targets instead of replacing selected text', () => { + const seed = insertStructuredWrapper(editor, { + value: 'abcdef', + type: 'markdown', + }); + expect(seed.success).toBe(true); + + const textBefore = getDocTextContent(editor); + const target = findResolvableNonCollapsedTarget(editor); + + const result = insertStructuredWrapper(editor, { + value: 'X', + type: 'markdown', + target: { kind: 'text', ...target }, + }); + + expect(result.success).toBe(false); + expect(result.failure?.code).toBe('INVALID_TARGET'); + expect(getDocTextContent(editor)).toBe(textBefore); + }); +}); + +describe('insertStructuredWrapper — list numbering rollback', () => { + it('rolls back numbering allocations when insertContentAt fails after markdown parsing', () => { + // This test exercises the actual rollback branch: markdown with list + // syntax is parsed (allocating numbering IDs on editor.converter), then + // insertContentAt is forced to fail, and we verify the snapshot/restore + // reverts numbering state to its pre-insert value. + const converter = (editor as any).converter; + + // Capture numbering state before the insert attempt. + const numberingBefore = JSON.stringify(converter?.numbering ?? {}); + const translatedBefore = JSON.stringify(converter?.translatedNumbering ?? {}); + + // Shadow both view.dispatch and editor.dispatch with undefined so that + // CommandService's #dispatchWithFallback returns false (no dispatch + // method available). This causes insertContentAt to return false AFTER + // markdown parsing has already allocated numbering IDs on the converter. + const view = (editor as any).view; + if (view) { + Object.defineProperty(view, 'dispatch', { value: undefined, configurable: true }); + } + Object.defineProperty(editor, 'dispatch', { value: undefined, configurable: true }); + + try { + const result = insertStructuredWrapper(editor, { + value: '- List item that allocates numbering', + type: 'markdown', + }); + + expect(result.success).toBe(false); + expect(result.failure?.code).toBe('INVALID_TARGET'); + + // The markdown parsing allocated numbering IDs, but rollback should + // have restored converter state to the pre-insert snapshot. + expect(JSON.stringify(converter?.numbering ?? {})).toBe(numberingBefore); + expect(JSON.stringify(converter?.translatedNumbering ?? {})).toBe(translatedBefore); + } finally { + // Remove own-property shadows to restore prototype methods. + if (view) delete view.dispatch; + delete (editor as any).dispatch; + } + }); + + it('does not roll back numbering on successful list insert', () => { + const converter = (editor as any).converter; + + const numberingBefore = JSON.stringify(converter?.numbering ?? {}); + + const result = insertStructuredWrapper(editor, { + value: '- Successfully inserted list item', + type: 'markdown', + }); + + expect(result.success).toBe(true); + // Numbering state should have changed (new list ID allocated). + expect(JSON.stringify(converter?.numbering ?? {})).not.toBe(numberingBefore); + }); +}); + +describe('insertStructuredWrapper — html', () => { + it('does not throw for HTML insert (gracefully succeeds or returns failure)', () => { + // The test editor in vitest (happy-dom) may or may not have DOM support. + // The key assertion is that this never throws an unhandled error. + expect(() => { + const result = insertStructuredWrapper(editor, { + value: '

                    Hello from HTML

                    ', + type: 'html', + }); + + // In a DOM environment it should succeed; in headless it fails gracefully + if (result.success) { + expect(getDocTextContent(editor)).toContain('Hello from HTML'); + } else { + expect(result.failure).toBeDefined(); + expect(['UNSUPPORTED_ENVIRONMENT', 'INVALID_TARGET']).toContain(result.failure?.code); + } + }).not.toThrow(); + }); +}); + +describe('insertStructuredWrapper — dry-run', () => { + it('does not mutate document on dry-run markdown insert', () => { + const textBefore = getDocTextContent(editor); + + const result = insertStructuredWrapper( + editor, + { value: '# Should Not Appear', type: 'markdown' }, + { dryRun: true }, + ); + + expect(result.success).toBe(true); + expect(getDocTextContent(editor)).toBe(textBefore); + }); + + it('mirrors runtime failure for empty markdown in dry-run mode', () => { + const runtime = insertStructuredWrapper(editor, { + value: '', + type: 'markdown', + }); + expect(runtime.success).toBe(false); + expect(runtime.failure?.code).toBe('NO_OP'); + + const dryRun = insertStructuredWrapper( + editor, + { + value: '', + type: 'markdown', + }, + { dryRun: true }, + ); + + expect(dryRun.success).toBe(false); + expect(dryRun.failure?.code).toBe('NO_OP'); + }); + + it('mirrors runtime environment failure for html in dry-run mode', () => { + const opts = (editor as any).options ?? ((editor as any).options = {}); + const prevDocument = opts.document; + const prevMockDocument = opts.mockDocument; + + opts.document = undefined; + opts.mockDocument = undefined; + vi.stubGlobal('document', undefined as any); + + try { + const runtime = insertStructuredWrapper(editor, { + value: '

                    Hello from HTML

                    ', + type: 'html', + }); + expect(runtime.success).toBe(false); + expect(runtime.failure?.code).toBe('UNSUPPORTED_ENVIRONMENT'); + + const dryRun = insertStructuredWrapper( + editor, + { + value: '

                    Hello from HTML

                    ', + type: 'html', + }, + { dryRun: true }, + ); + + expect(dryRun.success).toBe(false); + expect(dryRun.failure?.code).toBe('UNSUPPORTED_ENVIRONMENT'); + } finally { + vi.unstubAllGlobals(); + opts.document = prevDocument; + opts.mockDocument = prevMockDocument; + } + }); +}); diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts index 7c16bd47cf..989684657f 100644 --- a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts +++ b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts @@ -10,6 +10,7 @@ import { v4 as uuidv4 } from 'uuid'; import type { MutationOptions, MutationStep, + InsertInput, TextAddress, TextMutationReceipt, TextMutationResolution, @@ -29,6 +30,8 @@ import { resolveDefaultInsertTarget, resolveTextTarget, type ResolvedTextTarget import { buildTextMutationResolution, readTextAtResolvedRange } from '../helpers/text-mutation-resolution.js'; import { ensureTrackedCapability, requireSchemaMark } from '../helpers/mutation-helpers.js'; import { TrackFormatMarkName } from '../../extensions/track-changes/constants.js'; +import { markdownToPmFragment } from '../../core/helpers/markdown/markdownToPmContent.js'; +import { processContent } from '../../core/helpers/contentProcessor.js'; // --------------------------------------------------------------------------- // Locator normalization (same validation as the old adapters) @@ -447,3 +450,223 @@ export function styleApplyWrapper( return mapPlanReceiptToTextReceipt(receipt, resolution); } + +// --------------------------------------------------------------------------- +// Structured content insertion (markdown / html) +// --------------------------------------------------------------------------- + +/** + * Insert structured content (markdown or html) at a target position. + * + * Routes through `executeDomainCommand` to enforce the revision guard. + * Conversion (markdown → AST → PM, or html → processContent → PM) happens + * inside the handler, so list-definition side effects only occur after the + * revision check passes. HTML content goes through the canonical + * `processContent` pipeline, matching the `insertContent` command path. + * + * Tracked mode is explicitly rejected for structured content in this implementation. + */ +export function insertStructuredWrapper( + editor: Editor, + input: InsertInput, + options?: MutationOptions, +): TextMutationReceipt { + const contentType = input.type ?? 'text'; + const { value, target } = input; + + // Tracked mode not supported for structured content + const mode = options?.changeMode ?? 'direct'; + if (mode === 'tracked') { + throw new DocumentApiAdapterError( + 'CAPABILITY_UNAVAILABLE', + `Tracked mode is not supported for type: '${contentType}' insert operations.`, + ); + } + + // Resolve target position + let resolvedRange: ResolvedTextTarget; + let effectiveTarget: TextAddress; + + if (target) { + const range = resolveTextTarget(editor, target); + if (!range) { + throw new DocumentApiAdapterError('TARGET_NOT_FOUND', 'Structured insert target could not be resolved.', { + target, + }); + } + resolvedRange = range; + effectiveTarget = target; + } else { + const fallback = resolveDefaultInsertTarget(editor); + if (!fallback) { + throw new DocumentApiAdapterError('TARGET_NOT_FOUND', 'No default insertion point available.'); + } + resolvedRange = fallback.range; + effectiveTarget = fallback.target; + } + + const resolution = buildTextMutationResolution({ + requestedTarget: target, + target: effectiveTarget, + range: resolvedRange, + text: readTextAtResolvedRange(editor, resolvedRange), + }); + + const { from, to } = resolvedRange; + + // Insert semantics are point-only for doc.insert, regardless of content type. + if (from !== to) { + return { + success: false, + resolution, + failure: { code: 'INVALID_TARGET', message: 'Insert operations require a collapsed target range.' }, + }; + } + + // Dry-run: parse + validate but do not mutate + if (options?.dryRun) { + if (contentType === 'markdown') { + // Parse to validate structure (side-effect-free with dryRun: true) + const { fragment } = markdownToPmFragment(value, editor, { dryRun: true }); + if (fragment.childCount === 0) { + return { + success: false, + resolution, + failure: { code: 'NO_OP', message: 'Markdown produced no content to insert.' }, + }; + } + } else if (contentType === 'html') { + // NOTE: processContent has no dryRun flag — this runs the full HTML + // pipeline (DOM creation, wrapTextsInRuns) minus the final insertContentAt. + // Acceptable for catching UNSUPPORTED_ENVIRONMENT / INVALID_TARGET early. + try { + const processedDoc = processContent({ content: value, type: 'html', editor }); + if (!processedDoc || typeof (processedDoc as { toJSON?: unknown }).toJSON !== 'function') { + return { + success: false, + resolution, + failure: { + code: 'INVALID_TARGET', + message: 'HTML processing did not produce a valid document node.', + }, + }; + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return { + success: false, + resolution, + failure: { + code: 'UNSUPPORTED_ENVIRONMENT', + message: `HTML structured insert requires a DOM environment. ${message}`, + }, + }; + } + } + return { success: true, resolution }; + } + + // Convert and insert inside executeDomainCommand so the revision guard + // runs before any conversion side effects (e.g. list numbering allocation). + let insertFailure: ReceiptFailure | undefined; + + // Snapshot numbering state so we can roll back if the insert fails. + // List conversion allocates IDs and definitions on editor.converter — these + // mutations sit outside the ProseMirror transaction and aren't auto-reverted. + const converter = (editor as any).converter; + const numberingSnapshot = converter?.numbering ? JSON.parse(JSON.stringify(converter.numbering)) : undefined; + const translatedNumberingSnapshot = converter?.translatedNumbering + ? JSON.parse(JSON.stringify(converter.translatedNumbering)) + : undefined; + + const receipt = executeDomainCommand( + editor, + (): boolean => { + if (contentType === 'markdown') { + const { fragment } = markdownToPmFragment(value, editor); + + if (fragment.childCount === 0) { + insertFailure = { code: 'NO_OP', message: 'Markdown produced no content to insert.' }; + return false; + } + + // Convert Fragment to a JSON array — insertContentAt routes arrays + // through Fragment.fromArray(content.map(schema.nodeFromJSON)), which + // correctly materializes the nodes. Passing a Fragment directly fails + // because createNodeFromContent treats it as a single JSON object. + const jsonNodes: Record[] = []; + fragment.forEach((node) => jsonNodes.push(node.toJSON())); + + const ok = Boolean(editor.commands.insertContentAt({ from, to }, jsonNodes)); + if (!ok) { + insertFailure = { + code: 'INVALID_TARGET', + message: 'Structured content could not be inserted at the target position.', + }; + } + return ok; + } else if (contentType === 'html') { + // Route through processContent for the canonical HTML pipeline + // (createDocFromHTML + wrapTextsInRuns), matching insertContent command behavior. + // processContent requires a DOM; in headless environments this will throw. + try { + const processedDoc = processContent({ content: value, type: 'html', editor }); + if (!processedDoc || typeof (processedDoc as { toJSON?: unknown }).toJSON !== 'function') { + insertFailure = { + code: 'INVALID_TARGET', + message: 'HTML processing did not produce a valid document node.', + }; + return false; + } + const jsonContent = (processedDoc as { toJSON(): Record }).toJSON(); + + const ok = Boolean(editor.commands.insertContentAt({ from, to }, jsonContent)); + if (!ok) { + insertFailure = { + code: 'INVALID_TARGET', + message: 'HTML content could not be inserted at the target position.', + }; + } + return ok; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + insertFailure = { + code: 'UNSUPPORTED_ENVIRONMENT', + message: `HTML structured insert requires a DOM environment. ${message}`, + }; + return false; + } + } + return false; + }, + { expectedRevision: options?.expectedRevision }, + ); + + const commandSucceeded = receipt.steps[0]?.effect === 'changed'; + + // Roll back numbering side effects if the insert failed. + // The ProseMirror transaction is only dispatched on success, but list ID + // allocations mutate converter state directly and need manual rollback. + if (!commandSucceeded && converter) { + if (numberingSnapshot !== undefined) converter.numbering = numberingSnapshot; + if (translatedNumberingSnapshot !== undefined) converter.translatedNumbering = translatedNumberingSnapshot; + } + + // Schedule list migration after successful html/markdown insert, + // matching the insertContent command's post-insert hook. + if (commandSucceeded) { + Promise.resolve() + .then(() => (editor as any).migrateListsToV2?.()) + .catch(() => {}); + } + + if (!commandSucceeded) { + return { + success: false, + resolution, + failure: insertFailure ?? { code: 'INVALID_TARGET', message: 'Structured insert failed.' }, + }; + } + + return { success: true, resolution }; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ba8899b39b..96b0afe633 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -48,6 +48,9 @@ catalogs: '@types/bun': specifier: ^1.3.8 version: 1.3.8 + '@types/mdast': + specifier: ^4.0.4 + version: 4.0.4 '@types/node': specifier: 22.19.2 version: 22.19.2 @@ -129,9 +132,6 @@ catalogs: lib0: specifier: ^0.2.114 version: 0.2.117 - marked: - specifier: ^16.2.0 - version: 16.4.2 naive-ui: specifier: ^2.43.1 version: 2.43.2 @@ -213,6 +213,9 @@ catalogs: remark-gfm: specifier: ^4.0.1 version: 4.0.1 + remark-parse: + specifier: ^11.0.0 + version: 11.0.0 remark-stringify: specifier: ^11.0.0 version: 11.0.0 @@ -1039,9 +1042,6 @@ importers: lodash: specifier: ^4.17.21 version: 4.17.23 - marked: - specifier: 'catalog:' - version: 16.4.2 naive-ui: specifier: ^2.38.2 version: 2.43.2(vue@3.5.25(typescript@5.9.3)) @@ -1093,6 +1093,9 @@ importers: remark-gfm: specifier: 'catalog:' version: 4.0.1 + remark-parse: + specifier: 'catalog:' + version: 11.0.0 remark-stringify: specifier: 'catalog:' version: 11.0.0 @@ -1151,6 +1154,9 @@ importers: '@superdoc/word-layout': specifier: workspace:* version: link:../word-layout + '@types/mdast': + specifier: 'catalog:' + version: 4.0.4 '@vitejs/plugin-vue': specifier: 'catalog:' version: 6.0.2(rolldown-vite@7.3.1(@types/node@22.19.8)(esbuild@0.27.2)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))(vue@3.5.25(typescript@5.9.3)) @@ -7825,11 +7831,6 @@ packages: engines: {node: '>= 18'} hasBin: true - marked@16.4.2: - resolution: {integrity: sha512-TI3V8YYWvkVf3KJe1dRkpnjs68JUPyEa5vjKrp1XEEJUAOaQc+Qj+L1qWbPd0SJuAdQkFU0h73sXXqwDYxsiDA==} - engines: {node: '>= 20'} - hasBin: true - math-intrinsics@1.1.0: resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} engines: {node: '>= 0.4'} @@ -19399,8 +19400,6 @@ snapshots: marked@15.0.12: {} - marked@16.4.2: {} - math-intrinsics@1.1.0: {} md5.js@1.3.5: diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 0d5d3fe214..f2793f6785 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -27,6 +27,7 @@ catalog: '@testing-library/react': ^16.3.0 '@testing-library/user-event': ^14.6.1 '@types/bun': ^1.3.8 + '@types/mdast': ^4.0.4 '@types/node': 22.19.2 '@types/react': ^19.2.6 '@types/react-dom': ^19.2.3 @@ -89,6 +90,7 @@ catalog: rehype-parse: ^9.0.1 rehype-remark: ^10.0.1 remark-gfm: ^4.0.1 + remark-parse: ^11.0.0 remark-stringify: ^11.0.0 rollup-plugin-copy: ^3.5.0 rollup-plugin-visualizer: ^5.12.0 diff --git a/tests/behavior/helpers/document-api.ts b/tests/behavior/helpers/document-api.ts index 9aca57c9c2..036923e674 100644 --- a/tests/behavior/helpers/document-api.ts +++ b/tests/behavior/helpers/document-api.ts @@ -173,7 +173,7 @@ export async function listComments( export async function insertText( page: Page, - input: { text: string; target?: TextAddress }, + input: { value: string; target?: TextAddress; type?: 'text' | 'markdown' | 'html' }, options: { changeMode?: ChangeMode; dryRun?: boolean } = {}, ): Promise { return page.evaluate(({ payload, opts }) => (window as any).editor.doc.insert(payload, opts), { diff --git a/tests/behavior/tests/comments/programmatic-tracked-change.spec.ts b/tests/behavior/tests/comments/programmatic-tracked-change.spec.ts index 36f80b8383..4471b75f06 100644 --- a/tests/behavior/tests/comments/programmatic-tracked-change.spec.ts +++ b/tests/behavior/tests/comments/programmatic-tracked-change.spec.ts @@ -96,7 +96,7 @@ test('direct insert via document-api', async ({ superdoc }) => { }, }; - const receipt = await insertText(superdoc.page, { text: 'Beautiful ', target: insertionTarget }); + const receipt = await insertText(superdoc.page, { value: 'Beautiful ', target: insertionTarget }); assertMutationSucceeded('insertText', receipt); await superdoc.waitForStable(); From f0f445ce25b212e125d732b41e201f907876f30c Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Wed, 25 Feb 2026 21:35:17 -0800 Subject: [PATCH 2/5] fix(document-api): dry-run numbering mutation, insert contract, and text override whitespace --- apps/cli/src/__tests__/cli.test.ts | 15 +++++++++ apps/cli/src/commands/open.ts | 12 ++----- apps/cli/src/lib/document.ts | 32 +++++++++++++++++-- .../reference/_generated-manifest.json | 2 +- apps/docs/document-api/reference/insert.mdx | 7 ++-- .../src/contract/contract.test.ts | 18 +++++++++++ .../src/contract/operation-definitions.ts | 2 +- .../insert-structured-wrapper.test.ts | 21 ++++++++++++ .../plan-engine/plan-wrappers.ts | 16 +++++++++- 9 files changed, 108 insertions(+), 17 deletions(-) diff --git a/apps/cli/src/__tests__/cli.test.ts b/apps/cli/src/__tests__/cli.test.ts index 1685a0608e..b04f77b4c7 100644 --- a/apps/cli/src/__tests__/cli.test.ts +++ b/apps/cli/src/__tests__/cli.test.ts @@ -2054,6 +2054,21 @@ describe('superdoc CLI', () => { expect(closeResult.code).toBe(0); }); + test('open with --override-type text preserves leading whitespace literally', async () => { + const literalText = ' foo'; + + const openResult = await runCli(['open', SAMPLE_DOC, '--content-override', literalText, '--override-type', 'text']); + expect(openResult.code).toBe(0); + + const findResult = await runCli(['find', '--type', 'text', '--pattern', literalText]); + expect(findResult.code).toBe(0); + const findEnvelope = parseJsonOutput>(findResult); + expect(findEnvelope.data.result.total).toBeGreaterThan(0); + + const closeResult = await runCli(['close', '--discard']); + expect(closeResult.code).toBe(0); + }); + test('open with --override-type markdown applies content semantically', async () => { const openResult = await runCli([ 'open', diff --git a/apps/cli/src/commands/open.ts b/apps/cli/src/commands/open.ts index f3df19e3de..3b5d8ecc2f 100644 --- a/apps/cli/src/commands/open.ts +++ b/apps/cli/src/commands/open.ts @@ -18,11 +18,6 @@ import type { CommandContext, CommandExecution } from '../lib/types'; const VALID_OVERRIDE_TYPES = new Set(['markdown', 'html', 'text']); -/** Escape CommonMark special characters so the text is treated as literal. */ -function escapeMarkdown(str: string): string { - return str.replace(/([\\`*_{}[\]()#+\-.!|>~])/g, '\\$1'); -} - export async function runOpen(tokens: string[], context: CommandContext): Promise { const { parsed, help } = parseOperationArgs('doc.open', tokens, { commandName: 'open', @@ -111,10 +106,9 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis } else if (overrideType === 'html') { editorOpenOptions.html = contentOverride; } else if (overrideType === 'text') { - // Route through the markdown pipeline which is DOM-free (AST-based), - // so it works in headless CLI mode. Escape markdown syntax characters - // so the content is treated as literal text, not interpreted as formatting. - editorOpenOptions.markdown = escapeMarkdown(contentOverride); + // Plain text bypass — handed off to document.ts which builds PM + // paragraphs directly, preserving all whitespace without markdown parsing. + editorOpenOptions.plainText = contentOverride; } } diff --git a/apps/cli/src/lib/document.ts b/apps/cli/src/lib/document.ts index e92bbcd41a..707587a1a1 100644 --- a/apps/cli/src/lib/document.ts +++ b/apps/cli/src/lib/document.ts @@ -28,7 +28,7 @@ interface OpenDocumentOptions { documentId?: string; ydoc?: unknown; collaborationProvider?: unknown; - /** Options passed through to Editor.open() (e.g., markdown/html for content override). */ + /** Options passed through to Editor.open() (e.g., markdown/html/plainText for content override). */ editorOpenOptions?: Record; } @@ -107,7 +107,12 @@ export async function openDocument( // there is no DOM, so we intercept them here: // - markdown: applied post-init via the AST-based markdownToPmDoc pipeline (DOM-free) // - html: rejected with a clear error (no DOM-free HTML pipeline exists) - const { markdown: markdownOverride, html: htmlOverride, ...passThroughEditorOpts } = options.editorOpenOptions ?? {}; + const { + markdown: markdownOverride, + html: htmlOverride, + plainText: plainTextOverride, + ...passThroughEditorOpts + } = options.editorOpenOptions ?? {}; if (htmlOverride != null) { throw new CliError( @@ -135,7 +140,9 @@ export async function openDocument( }); } - // Apply markdown content override post-init (DOM-free AST pipeline). + // Apply content override post-init. + // - markdown: DOM-free AST pipeline + // - plainText: builds PM paragraphs directly, preserving all whitespace if (markdownOverride != null) { try { const { doc: newDoc } = markdownToPmDoc(markdownOverride, editor); @@ -151,6 +158,25 @@ export async function openDocument( source: meta, }); } + } else if (plainTextOverride != null) { + try { + const schema = editor.state.schema; + const lines = plainTextOverride.split('\n'); + const paragraphs = lines.map((line) => { + const content = line.length > 0 ? [schema.text(line)] : undefined; + return schema.nodes.paragraph.create(null, content); + }); + const tr = editor.state.tr; + tr.replaceWith(0, editor.state.doc.content.size, paragraphs); + editor.dispatch(tr); + } catch (error) { + editor.destroy(); + const message = error instanceof Error ? error.message : String(error); + throw new CliError('DOCUMENT_OPEN_FAILED', 'Failed to apply text content override.', { + message, + source: meta, + }); + } } const adapters = getDocumentApiAdapters(editor); diff --git a/apps/docs/document-api/reference/_generated-manifest.json b/apps/docs/document-api/reference/_generated-manifest.json index 28a7e3fb77..a8a7c128b7 100644 --- a/apps/docs/document-api/reference/_generated-manifest.json +++ b/apps/docs/document-api/reference/_generated-manifest.json @@ -229,5 +229,5 @@ } ], "marker": "{/* GENERATED FILE: DO NOT EDIT. Regenerate via `pnpm run docapi:sync`. */}", - "sourceHash": "876ac06afd1496519bea3238baa8738286b264af5f1714b259b41a528d8043ff" + "sourceHash": "722ce545fc7c5373e23246fa7bbbc68b381e30bd8e2bc6c21d1616e6c5395ea9" } diff --git a/apps/docs/document-api/reference/insert.mdx b/apps/docs/document-api/reference/insert.mdx index 44cf8d9ff5..e70e395eb9 100644 --- a/apps/docs/document-api/reference/insert.mdx +++ b/apps/docs/document-api/reference/insert.mdx @@ -103,6 +103,7 @@ _No fields._ - `INVALID_TARGET` - `NO_OP` - `CAPABILITY_UNAVAILABLE` +- `UNSUPPORTED_ENVIRONMENT` ## Raw schemas @@ -151,7 +152,8 @@ _No fields._ "enum": [ "INVALID_TARGET", "NO_OP", - "CAPABILITY_UNAVAILABLE" + "CAPABILITY_UNAVAILABLE", + "UNSUPPORTED_ENVIRONMENT" ] }, "details": {}, @@ -204,7 +206,8 @@ _No fields._ "enum": [ "INVALID_TARGET", "NO_OP", - "CAPABILITY_UNAVAILABLE" + "CAPABILITY_UNAVAILABLE", + "UNSUPPORTED_ENVIRONMENT" ] }, "details": {}, diff --git a/packages/document-api/src/contract/contract.test.ts b/packages/document-api/src/contract/contract.test.ts index b96444d8c3..61f02bb9c3 100644 --- a/packages/document-api/src/contract/contract.test.ts +++ b/packages/document-api/src/contract/contract.test.ts @@ -91,6 +91,24 @@ describe('document-api contract catalog', () => { expect(insertInputSchema.additionalProperties).toBe(false); }); + it('declares UNSUPPORTED_ENVIRONMENT for insert metadata and generated failure schema', () => { + const schemas = buildInternalContractSchemas(); + const insertFailureSchema = schemas.operations.insert.failure as { + properties?: { + failure?: { + properties?: { + code?: { + enum?: string[]; + }; + }; + }; + }; + }; + + expect(COMMAND_CATALOG.insert.possibleFailureCodes).toContain('UNSUPPORTED_ENVIRONMENT'); + expect(insertFailureSchema.properties?.failure?.properties?.code?.enum).toContain('UNSUPPORTED_ENVIRONMENT'); + }); + it('derives OPERATION_IDS from OPERATION_DEFINITIONS keys', () => { const definitionKeys = Object.keys(OPERATION_DEFINITIONS).sort(); const operationIds = [...OPERATION_IDS].sort(); diff --git a/packages/document-api/src/contract/operation-definitions.ts b/packages/document-api/src/contract/operation-definitions.ts index 649e8019e2..b545323458 100644 --- a/packages/document-api/src/contract/operation-definitions.ts +++ b/packages/document-api/src/contract/operation-definitions.ts @@ -208,7 +208,7 @@ export const OPERATION_DEFINITIONS = { idempotency: 'non-idempotent', supportsDryRun: true, supportsTrackedMode: true, - possibleFailureCodes: ['INVALID_TARGET', 'NO_OP', 'CAPABILITY_UNAVAILABLE'], + possibleFailureCodes: ['INVALID_TARGET', 'NO_OP', 'CAPABILITY_UNAVAILABLE', 'UNSUPPORTED_ENVIRONMENT'], throws: [...T_NOT_FOUND_CAPABLE, 'INVALID_TARGET'], }), referenceDocPath: 'insert.mdx', diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts index 27aefb767e..7d3d2222f4 100644 --- a/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts +++ b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts @@ -267,6 +267,27 @@ describe('insertStructuredWrapper — dry-run', () => { expect(dryRun.failure?.code).toBe('NO_OP'); }); + it('does not mutate numbering state on dry-run html list insert', () => { + const converter = (editor as any).converter; + expect(converter).toBeDefined(); + + const numberingBefore = JSON.stringify(converter?.numbering ?? {}); + const translatedBefore = JSON.stringify(converter?.translatedNumbering ?? {}); + + const dryRun = insertStructuredWrapper( + editor, + { + value: '
                    1. Dry run list item
                    ', + type: 'html', + }, + { dryRun: true }, + ); + + expect(dryRun.success).toBe(true); + expect(JSON.stringify(converter?.numbering ?? {})).toBe(numberingBefore); + expect(JSON.stringify(converter?.translatedNumbering ?? {})).toBe(translatedBefore); + }); + it('mirrors runtime environment failure for html in dry-run mode', () => { const opts = (editor as any).options ?? ((editor as any).options = {}); const prevDocument = opts.document; diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts index 989684657f..2b07ad565f 100644 --- a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts +++ b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts @@ -538,7 +538,13 @@ export function insertStructuredWrapper( } else if (contentType === 'html') { // NOTE: processContent has no dryRun flag — this runs the full HTML // pipeline (DOM creation, wrapTextsInRuns) minus the final insertContentAt. - // Acceptable for catching UNSUPPORTED_ENVIRONMENT / INVALID_TARGET early. + // Snapshot numbering state so we can roll back after the dry-run, since + // HTML list parsing allocates IDs/definitions on editor.converter. + const converter = (editor as any).converter; + const numberingSnapshot = converter?.numbering ? JSON.parse(JSON.stringify(converter.numbering)) : undefined; + const translatedNumberingSnapshot = converter?.translatedNumbering + ? JSON.parse(JSON.stringify(converter.translatedNumbering)) + : undefined; try { const processedDoc = processContent({ content: value, type: 'html', editor }); if (!processedDoc || typeof (processedDoc as { toJSON?: unknown }).toJSON !== 'function') { @@ -561,6 +567,14 @@ export function insertStructuredWrapper( message: `HTML structured insert requires a DOM environment. ${message}`, }, }; + } finally { + // Roll back numbering mutations from the dry-run HTML pipeline. + if (converter && numberingSnapshot !== undefined) { + converter.numbering = numberingSnapshot; + } + if (converter && translatedNumberingSnapshot !== undefined) { + converter.translatedNumbering = translatedNumberingSnapshot; + } } } return { success: true, resolution }; From 96a0b13b1484f2803b5341bf22eae1b27608dd78 Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Wed, 25 Feb 2026 21:55:28 -0800 Subject: [PATCH 3/5] fix(markdown): preserve multi-paragraph list items as single entries --- .../importMarkdown.integration.test.js | 68 +++++++++++++++++++ .../helpers/markdown/mdastToProseMirror.ts | 9 ++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js b/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js index 0c6abfdc95..54f7f0ce3c 100644 --- a/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js +++ b/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js @@ -33,6 +33,24 @@ function collectNodeTypes(doc) { return types; } +function collectTopLevelParagraphs(doc) { + const paragraphs = []; + doc.forEach((node) => { + if (node.type.name === 'paragraph') { + paragraphs.push(node); + } + }); + return paragraphs; +} + +function hasNumbering(node) { + return Boolean(node.attrs?.paragraphProperties?.numberingProperties); +} + +function paragraphByText(paragraphs, expectedText) { + return paragraphs.find((node) => node.textContent.trim() === expectedText); +} + describe('markdown to DOCX integration', () => { it('converts complete markdown document with headings and lists', () => { const markdown = `# Main Title @@ -58,4 +76,54 @@ More text here. expect(types).toContain('paragraph'); expect(types).toContain('run'); }); + + it('keeps a multi-paragraph bullet item as one logical list entry', () => { + const markdown = `- first paragraph + + continuation paragraph +- second bullet`; + + const doc = createDocFromMarkdown(markdown, editor); + const paragraphs = collectTopLevelParagraphs(doc); + + const first = paragraphByText(paragraphs, 'first paragraph'); + const continuation = paragraphByText(paragraphs, 'continuation paragraph'); + const second = paragraphByText(paragraphs, 'second bullet'); + + expect(first).toBeTruthy(); + expect(continuation).toBeTruthy(); + expect(second).toBeTruthy(); + + expect(hasNumbering(first)).toBe(true); + expect(hasNumbering(continuation)).toBe(false); + expect(hasNumbering(second)).toBe(true); + + const numberedParagraphs = paragraphs.filter(hasNumbering); + expect(numberedParagraphs).toHaveLength(2); + }); + + it('keeps a multi-paragraph ordered item as one numbered entry', () => { + const markdown = `1. first numbered paragraph + + continuation paragraph +2. second numbered item`; + + const doc = createDocFromMarkdown(markdown, editor); + const paragraphs = collectTopLevelParagraphs(doc); + + const first = paragraphByText(paragraphs, 'first numbered paragraph'); + const continuation = paragraphByText(paragraphs, 'continuation paragraph'); + const second = paragraphByText(paragraphs, 'second numbered item'); + + expect(first).toBeTruthy(); + expect(continuation).toBeTruthy(); + expect(second).toBeTruthy(); + + expect(hasNumbering(first)).toBe(true); + expect(hasNumbering(continuation)).toBe(false); + expect(hasNumbering(second)).toBe(true); + + const numberedParagraphs = paragraphs.filter(hasNumbering); + expect(numberedParagraphs).toHaveLength(2); + }); }); diff --git a/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts b/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts index 408d18f0cd..1883c3624c 100644 --- a/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts +++ b/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts @@ -171,11 +171,18 @@ function convertListItem( listType: string, ): JsonNode[] { const blocks: JsonNode[] = []; + let firstParagraphEmitted = false; for (const child of item.children) { if (child.type === 'paragraph') { const runs = convertInlineChildren((child as MdastParagraph).children, ctx, []); - blocks.push(makeListParagraph(runs, numId, depth)); + if (!firstParagraphEmitted) { + blocks.push(makeListParagraph(runs, numId, depth)); + firstParagraphEmitted = true; + } else { + // Continuation paragraph within the same list item — no list marker + blocks.push(makeParagraph(runs)); + } } else if (child.type === 'list') { // Nested list — increase depth, reuse same listType context blocks.push(...convertList(child as MdastList, ctx, depth + 1)); From bb66f5258bc613dcad00abc3a13f933a487fcd44 Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Thu, 26 Feb 2026 15:01:05 -0800 Subject: [PATCH 4/5] feat(markdown): normalize fixed-width ASCII tables to GFM pipe tables --- .../src/core/helpers/markdown/index.ts | 1 + .../helpers/markdown/markdownToPmContent.ts | 4 +- ...malizeFixedWidthTables.integration.test.ts | 227 ++++++++ .../normalizeFixedWidthTables.test.ts | 532 ++++++++++++++++++ .../markdown/normalizeFixedWidthTables.ts | 463 +++++++++++++++ .../src/core/helpers/markdown/types.ts | 6 + .../tests/formatting/inline-formatting.ts | 2 +- .../markdown/markdown-override-roundtrip.ts | 288 ++++++++++ .../markdown/multi-page-nda-test-document.md | 220 ++++++++ .../tests/styles/doc-defaults.ts | 4 +- .../tests/tables/all-commands.ts | 8 +- 11 files changed, 1747 insertions(+), 8 deletions(-) create mode 100644 packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts create mode 100644 packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts create mode 100644 packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts create mode 100644 tests/doc-api-stories/tests/markdown/markdown-override-roundtrip.ts create mode 100644 tests/doc-api-stories/tests/markdown/multi-page-nda-test-document.md diff --git a/packages/super-editor/src/core/helpers/markdown/index.ts b/packages/super-editor/src/core/helpers/markdown/index.ts index 922c58f581..e2caaf1a3f 100644 --- a/packages/super-editor/src/core/helpers/markdown/index.ts +++ b/packages/super-editor/src/core/helpers/markdown/index.ts @@ -9,6 +9,7 @@ export { markdownToPmDoc, markdownToPmFragment } from './markdownToPmContent.js'; export { parseMarkdownToAst } from './parseMarkdownAst.js'; +export { normalizeFixedWidthTables } from './normalizeFixedWidthTables.js'; export type { MarkdownConversionOptions, MarkdownConversionResult, diff --git a/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts b/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts index 13939051e4..f2cbcddee1 100644 --- a/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts +++ b/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts @@ -15,6 +15,7 @@ import type { Node as PmNode } from 'prosemirror-model'; import type { Editor } from '../../Editor.js'; import { parseMarkdownToAst } from './parseMarkdownAst.js'; import { convertMdastToBlocks } from './mdastToProseMirror.js'; +import { normalizeFixedWidthTables } from './normalizeFixedWidthTables.js'; import { wrapTextsInRuns } from '../../inputRules/docx-paste/docx-paste.js'; import type { MarkdownConversionOptions, @@ -88,7 +89,8 @@ function parseAndConvert( editor: Editor, options: MarkdownConversionOptions, ): { blocks: ReturnType; diagnostics: MdastConversionContext['diagnostics'] } { - const ast = parseMarkdownToAst(markdown); + const source = options.normalizeFixedWidthTables === false ? markdown : normalizeFixedWidthTables(markdown); + const ast = parseMarkdownToAst(source); const ctx: MdastConversionContext = { editor, diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts new file mode 100644 index 0000000000..d4b978621b --- /dev/null +++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts @@ -0,0 +1,227 @@ +/** + * Integration test: normalizeFixedWidthTables → remark-gfm AST parsing. + * + * Verifies that the normalizer's GFM output is correctly parsed into mdast + * table nodes by the same remark pipeline used in production. + */ +import { describe, expect, it } from 'vitest'; +import { normalizeFixedWidthTables } from './normalizeFixedWidthTables.js'; +import { parseMarkdownToAst } from './parseMarkdownAst.js'; +import type { Root, Table, TableRow, TableCell } from 'mdast'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function findTables(tree: Root): Table[] { + const tables: Table[] = []; + function walk(node: any) { + if (node.type === 'table') tables.push(node); + if (node.children) node.children.forEach(walk); + } + walk(tree); + return tables; +} + +function tableDimensions(table: Table): { rows: number; cols: number } { + const rows = table.children.length; + const cols = rows > 0 ? table.children[0].children.length : 0; + return { rows, cols }; +} + +function cellText(cell: TableCell): string { + return cell.children + .map((c: any) => { + if (c.type === 'text') return c.value; + if (c.children) return c.children.map((cc: any) => cc.value ?? '').join(''); + return ''; + }) + .join(''); +} + +function rowTexts(row: TableRow): string[] { + return row.children.map(cellText); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('normalizer → remark-gfm AST integration', () => { + it('produces zero tables from raw ASCII input (baseline)', () => { + const raw = [' Clause Description', ' ---------- -----------', ' Term Protection'].join('\n'); + + const ast = parseMarkdownToAst(raw); + expect(findTables(ast)).toHaveLength(0); + }); + + it('produces a valid mdast table after normalization', () => { + const raw = [' Clause Description', ' ---------- -----------', ' Term Protection'].join('\n'); + + const normalized = normalizeFixedWidthTables(raw); + const ast = parseMarkdownToAst(normalized); + const tables = findTables(ast); + + expect(tables).toHaveLength(1); + expect(tableDimensions(tables[0])).toEqual({ rows: 2, cols: 2 }); // header + 1 data + expect(rowTexts(tables[0].children[0])).toEqual(['Clause', 'Description']); + expect(rowTexts(tables[0].children[1])).toEqual(['Term', 'Protection']); + }); + + it('Section 5 table (no borders): 3 columns, 3 data rows', () => { + const raw = [ + ' Clause Description Duration', + ' ---------------------- --------------------------------- -----------', + ' Confidentiality Term Protection of confidential info 5 years', + ' Evaluation Period Business evaluation timeline 12 months', + ' Survival Clause Survives termination Yes', + ].join('\n'); + + const normalized = normalizeFixedWidthTables(raw); + const ast = parseMarkdownToAst(normalized); + const tables = findTables(ast); + + expect(tables).toHaveLength(1); + expect(tableDimensions(tables[0])).toEqual({ rows: 4, cols: 3 }); // header + 3 data + expect(rowTexts(tables[0].children[0])).toEqual(['Clause', 'Description', 'Duration']); + expect(rowTexts(tables[0].children[3])).toEqual(['Survival Clause', 'Survives termination', 'Yes']); + }); + + it('Appendix A table (bordered, continuations): 4 columns, 4 data rows', () => { + const raw = [ + ' -------------------------------------------------------------------------', + ' Classification Description Example Required Controls', + ' ------------------- --------------- ------------ ------------------------', + ' Public No restrictions Press None', + ' release ', + '', + ' Internal Limited Internal Access controls', + ' distribution memo ', + '', + ' Confidential Sensitive Financial Encryption + MFA', + ' business data reports ', + '', + ' Restricted Highly Source code Strict access + logging', + ' sensitive ', + ' -------------------------------------------------------------------------', + ].join('\n'); + + const normalized = normalizeFixedWidthTables(raw); + const ast = parseMarkdownToAst(normalized); + const tables = findTables(ast); + + expect(tables).toHaveLength(1); + expect(tableDimensions(tables[0])).toEqual({ rows: 5, cols: 4 }); // header + 4 data + + // Verify continuation lines merged correctly + expect(rowTexts(tables[0].children[1])).toEqual(['Public', 'No restrictions', 'Press release', 'None']); + expect(rowTexts(tables[0].children[4])).toEqual([ + 'Restricted', + 'Highly sensitive', + 'Source code', + 'Strict access + logging', + ]); + }); + + it('Signatures table (no borders, form fields): 2 columns, 4 data rows', () => { + const raw = [ + ' Disclosing Party Receiving Party', + ' ----------------------------- -----------------------------', + ' Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_', + ' Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_', + ' Signature: \\_\\_\\_\\_\\_\\_ Signature: \\_\\_\\_\\_\\_\\_', + ' Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_', + ].join('\n'); + + const normalized = normalizeFixedWidthTables(raw); + const ast = parseMarkdownToAst(normalized); + const tables = findTables(ast); + + expect(tables).toHaveLength(1); + expect(tableDimensions(tables[0])).toEqual({ rows: 5, cols: 2 }); // header + 4 data + expect(rowTexts(tables[0].children[0])).toEqual(['Disclosing Party', 'Receiving Party']); + }); + + it('does not produce table from indent-mismatched lines', () => { + const raw = ['Header1 Header2', ' -------- --------', ' Data1 Data2'].join('\n'); + + const normalized = normalizeFixedWidthTables(raw); + const ast = parseMarkdownToAst(normalized); + expect(findTables(ast)).toHaveLength(0); + }); + + it('preserves prose when false bottom border is rejected', () => { + const raw = [ + ' -------------------------------------------------------------------------', + ' Classification Description Example', + ' ------------------- --------------- ------------', + ' Public No restrictions Press', + '', + 'This should not be a table row.', + '', + '---', + ].join('\n'); + + const normalized = normalizeFixedWidthTables(raw); + const ast = parseMarkdownToAst(normalized); + const tables = findTables(ast); + + // Should produce exactly 1 table (the real one), not swallow the prose. + expect(tables).toHaveLength(1); + expect(tableDimensions(tables[0])).toEqual({ rows: 2, cols: 3 }); // header + 1 data row + // The prose and thematic break should remain as non-table content. + expect(normalized).toContain('This should not be a table row.'); + }); + + it('full NDA fixture produces 3 mdast tables', () => { + const raw = [ + '## Term and Termination', + '', + ' Clause Description Duration', + ' ---------------------- --------------------------------- -----------', + ' Confidentiality Term Protection of confidential info 5 years', + ' Evaluation Period Business evaluation timeline 12 months', + ' Survival Clause Survives termination Yes', + '', + '---', + '', + '## Appendix A', + '', + ' -------------------------------------------------------------------------', + ' Classification Description Example Required Controls', + ' ------------------- --------------- ------------ ------------------------', + ' Public No restrictions Press None', + ' release ', + '', + ' Internal Limited Internal Access controls', + ' distribution memo ', + '', + ' Confidential Sensitive Financial Encryption + MFA', + ' business data reports ', + '', + ' Restricted Highly Source code Strict access + logging', + ' sensitive ', + ' -------------------------------------------------------------------------', + '', + '---', + '', + '## Signatures', + '', + ' Disclosing Party Receiving Party', + ' ----------------------------- -----------------------------', + ' Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_', + ' Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_', + ' Signature: \\_\\_\\_\\_\\_\\_ Signature: \\_\\_\\_\\_\\_\\_', + ' Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_', + ].join('\n'); + + const normalized = normalizeFixedWidthTables(raw); + const ast = parseMarkdownToAst(normalized); + const tables = findTables(ast); + + expect(tables).toHaveLength(3); + expect(tableDimensions(tables[0])).toEqual({ rows: 4, cols: 3 }); // Section 5 + expect(tableDimensions(tables[1])).toEqual({ rows: 5, cols: 4 }); // Appendix A + expect(tableDimensions(tables[2])).toEqual({ rows: 5, cols: 2 }); // Signatures + }); +}); diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts new file mode 100644 index 0000000000..50318ae4c5 --- /dev/null +++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts @@ -0,0 +1,532 @@ +import { describe, expect, it } from 'vitest'; +import { normalizeFixedWidthTables } from './normalizeFixedWidthTables.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Trim shared leading indentation from a template literal. */ +function dedent(s: string): string { + const lines = s.split('\n'); + // Drop leading/trailing empty lines from the template literal + if (lines[0].trim() === '') lines.shift(); + if (lines.length > 0 && lines[lines.length - 1].trim() === '') lines.pop(); + + const indent = Math.min(...lines.filter((l) => l.trim().length > 0).map((l) => l.match(/^(\s*)/)![1].length)); + return lines.map((l) => l.slice(indent)).join('\n'); +} + +/** Extract lines matching `| ... |` from the output. */ +function extractPipeTable(output: string): string[] { + return output.split('\n').filter((l) => l.startsWith('|')); +} + +// --------------------------------------------------------------------------- +// Section 5 style: unbounded table (no top/bottom borders) +// --------------------------------------------------------------------------- + +describe('normalizeFixedWidthTables', () => { + describe('unbounded tables (no borders)', () => { + it('converts a simple 3-column table without borders', () => { + const input = dedent(` + Clause Description Duration + ---------------------- --------------------------------- ----------- + Confidentiality Term Protection of confidential info 5 years + Evaluation Period Business evaluation timeline 12 months + Survival Clause Survives termination Yes + `); + + const output = normalizeFixedWidthTables(input); + const table = extractPipeTable(output); + + expect(table).toEqual([ + '| Clause | Description | Duration |', + '| --- | --- | --- |', + '| Confidentiality Term | Protection of confidential info | 5 years |', + '| Evaluation Period | Business evaluation timeline | 12 months |', + '| Survival Clause | Survives termination | Yes |', + ]); + }); + + it('stops at first blank line for unbounded tables', () => { + const input = dedent(` + Name Age + ------ --- + Alice 30 + + This is a regular paragraph. + `); + + const output = normalizeFixedWidthTables(input); + + expect(output).toContain('| Alice | 30 |'); + expect(output).toContain('This is a regular paragraph.'); + }); + }); + + // --------------------------------------------------------------------------- + // Appendix A style: bordered table with continuation lines + blank separators + // --------------------------------------------------------------------------- + + describe('bordered tables (top + bottom borders)', () => { + it('converts Appendix A with wrapped continuation lines and blank separators', () => { + const input = dedent(` + ------------------------------------------------------------------------- + Classification Description Example Required Controls + ------------------- --------------- ------------ ------------------------ + Public No restrictions Press None + release + + Internal Limited Internal Access controls + distribution memo + + Confidential Sensitive Financial Encryption + MFA + business data reports + + Restricted Highly Source code Strict access + logging + sensitive + ------------------------------------------------------------------------- + `); + + const output = normalizeFixedWidthTables(input); + const table = extractPipeTable(output); + + expect(table).toEqual([ + '| Classification | Description | Example | Required Controls |', + '| --- | --- | --- | --- |', + '| Public | No restrictions | Press release | None |', + '| Internal | Limited distribution | Internal memo | Access controls |', + '| Confidential | Sensitive business data | Financial reports | Encryption + MFA |', + '| Restricted | Highly sensitive | Source code | Strict access + logging |', + ]); + }); + + it('consumes top and bottom border lines', () => { + const input = dedent(` + before + -------------------- + A B + ----- ----- + 1 2 + -------------------- + after + `); + + const output = normalizeFixedWidthTables(input); + + expect(output).not.toContain('----'); + expect(output).toContain('before'); + expect(output).toContain('after'); + expect(output).toContain('| A | B |'); + expect(output).toContain('| 1 | 2 |'); + }); + }); + + // --------------------------------------------------------------------------- + // Signatures style: form-field content with escaped underscores + // --------------------------------------------------------------------------- + + describe('signature/form tables', () => { + it('converts a two-column table with escaped underscores', () => { + const input = dedent(` + Disclosing Party Receiving Party + ----------------------------- ----------------------------- + Name: \\_\\_\\_\\_\\_ Name: \\_\\_\\_\\_\\_ + Title: \\_\\_\\_\\_\\_ Title: \\_\\_\\_\\_\\_ + Signature: \\_\\_\\_ Signature: \\_\\_\\_ + Date: \\_\\_\\_\\_\\_ Date: \\_\\_\\_\\_\\_ + `); + + const output = normalizeFixedWidthTables(input); + const table = extractPipeTable(output); + + expect(table[0]).toBe('| Disclosing Party | Receiving Party |'); + expect(table[1]).toBe('| --- | --- |'); + expect(table).toHaveLength(6); // header + separator + 4 data rows + }); + }); + + // --------------------------------------------------------------------------- + // Leading indentation + // --------------------------------------------------------------------------- + + describe('indentation handling', () => { + it('handles 2-space indented tables (as in the NDA fixture)', () => { + const input = [ + ' Clause Description', + ' ---------- -----------', + ' Term Protection', + ' Period Evaluation', + ].join('\n'); + + const output = normalizeFixedWidthTables(input); + + expect(output).toContain('| Clause | Description |'); + expect(output).toContain('| Term | Protection |'); + expect(output).toContain('| Period | Evaluation |'); + }); + }); + + // --------------------------------------------------------------------------- + // Edge cases and safety + // --------------------------------------------------------------------------- + + describe('pass-through (no transformation)', () => { + it('passes through text with no tables', () => { + const input = '# Hello\n\nThis is a paragraph.\n\n- item 1\n- item 2'; + expect(normalizeFixedWidthTables(input)).toBe(input); + }); + + it('passes through GFM pipe tables unchanged', () => { + const input = '| A | B |\n| --- | --- |\n| 1 | 2 |'; + expect(normalizeFixedWidthTables(input)).toBe(input); + }); + + it('passes through thematic breaks (--- lines) without matching', () => { + const input = dedent(` + # Section 1 + + Some text. + + --- + + # Section 2 + + More text. + + --- + `); + + const output = normalizeFixedWidthTables(input); + + // No pipe tables should be produced + expect(extractPipeTable(output)).toHaveLength(0); + // Content preserved + expect(output).toContain('# Section 1'); + expect(output).toContain('# Section 2'); + }); + + it('does not match a single dash group (thematic break)', () => { + const input = '------------------------------------------------------------------------'; + expect(normalizeFixedWidthTables(input)).toBe(input); + }); + + it('preserves malformed partial table structures', () => { + const input = dedent(` + Header only, no data + ------ ------ + `); + + // Guide row exists but no data rows → no transformation + const output = normalizeFixedWidthTables(input); + expect(extractPipeTable(output)).toHaveLength(0); + }); + }); + + describe('fenced code blocks', () => { + it('does not transform tables inside fenced code blocks', () => { + const input = dedent(` + \`\`\` + Name Age + ------ --- + Alice 30 + \`\`\` + `); + + const output = normalizeFixedWidthTables(input); + + // Should NOT produce pipe table + expect(extractPipeTable(output)).toHaveLength(0); + // Original content preserved + expect(output).toContain('Alice 30'); + }); + + it('does not transform tables inside tilde-fenced code blocks', () => { + const input = dedent(` + ~~~ + Name Age + ------ --- + Alice 30 + ~~~ + `); + + const output = normalizeFixedWidthTables(input); + expect(extractPipeTable(output)).toHaveLength(0); + }); + + it('transforms tables before and after fenced code blocks', () => { + const input = dedent(` + A B + ----- ----- + 1 2 + + \`\`\` + C D + ----- ----- + 3 4 + \`\`\` + + E F + ----- ----- + 5 6 + `); + + const output = normalizeFixedWidthTables(input); + const tables = extractPipeTable(output); + + // Two tables converted (before and after fence), one preserved inside fence + expect(tables.filter((l) => l.startsWith('| A'))).toHaveLength(1); + expect(tables.filter((l) => l.startsWith('| E'))).toHaveLength(1); + expect(output).toContain('C D'); + }); + }); + + // --------------------------------------------------------------------------- + // Regression: indent mismatch must not corrupt cell text (Bug #1) + // --------------------------------------------------------------------------- + + describe('indent mismatch rejection', () => { + it('rejects table when header indent differs from guide indent', () => { + // Header at column 0, guide indented 2 spaces → mismatch → no table. + const input = ['Header1 Header2', ' -------- --------', ' Data1 Data2'].join('\n'); + + const output = normalizeFixedWidthTables(input); + + // No pipe table should be produced — the candidate is rejected. + expect(extractPipeTable(output)).toHaveLength(0); + expect(output).toContain('Header1 Header2'); + }); + + it('rejects bordered table when header indent differs from guide indent', () => { + const input = [ + ' --------------------', + 'Header1 Header2', + ' -------- --------', + ' Data1 Data2', + ' --------------------', + ].join('\n'); + + const output = normalizeFixedWidthTables(input); + expect(extractPipeTable(output)).toHaveLength(0); + }); + }); + + // --------------------------------------------------------------------------- + // Regression: non-final column overflow must not corrupt cells + // --------------------------------------------------------------------------- + + describe('non-final column overflow rejection', () => { + it('rejects table when first data row overflows a non-final column', () => { + const input = ['A B C', '----- ----- -----', '1stcol_is_very_long bbb ccc'].join('\n'); + + const output = normalizeFixedWidthTables(input); + + // No pipe table — the guide doesn't match the data layout. + expect(extractPipeTable(output)).toHaveLength(0); + expect(output).toContain('1stcol_is_very_long'); + }); + + it('skips a later overflowing row and its continuation lines', () => { + const input = [ + 'A B C', + '----- ----- -----', + 'aaa bbb ccc', + 'overflow_row_here long zzz', + ' cont yyy', + 'ddd eee fff', + ].join('\n'); + + const output = normalizeFixedWidthTables(input); + const table = extractPipeTable(output); + + // Valid rows preserved, overflowing row + its continuation skipped. + expect(table).toContainEqual('| aaa | bbb | ccc |'); + expect(table).toContainEqual('| ddd | eee | fff |'); + expect(table).not.toContainEqual(expect.stringContaining('overflow')); + expect(table).not.toContainEqual(expect.stringContaining('cont')); + expect(table).not.toContainEqual(expect.stringContaining('yyy')); + }); + + it('rejects multi-word overflow in a non-final column', () => { + const input = ['A B', '----- -----', 'ab cdef zzz'].join('\n'); + + const output = normalizeFixedWidthTables(input); + + // "ab cdef" crosses column A boundary → reject entire table. + expect(extractPipeTable(output)).toHaveLength(0); + expect(output).toContain('ab cdef'); + }); + + it('rejects overflow with leading padding in a non-final column', () => { + const input = ['A B', '----- -----', ' abcdef zzz'].join('\n'); + + const output = normalizeFixedWidthTables(input); + + // Leading space + overflowing value → reject entire table. + expect(extractPipeTable(output)).toHaveLength(0); + expect(output).toContain('abcdef'); + }); + + it('allows overflow in the last column (reads to end of line)', () => { + const input = ['A B', '----- -----', 'aaa this value is very long and exceeds the column width'].join('\n'); + + const output = normalizeFixedWidthTables(input); + const table = extractPipeTable(output); + + expect(table).toContainEqual('| aaa | this value is very long and exceeds the column width |'); + }); + }); + + // --------------------------------------------------------------------------- + // Regression: false bottom border must not swallow prose (Bug #2) + // --------------------------------------------------------------------------- + + describe('false bottom border rejection', () => { + it('does not swallow prose between table and unrelated thematic break', () => { + const input = [ + ' -------------------------------------------------------------------------', + ' Classification Description Example', + ' ------------------- --------------- ------------', + ' Public No restrictions Press', + '', + 'This should not be a table row.', + 'Some other content that has nothing to do with the table.', + '', + '---', + ].join('\n'); + + const output = normalizeFixedWidthTables(input); + + // The table should still be converted (1 data row). + expect(output).toContain('| Classification | Description | Example |'); + expect(output).toContain('| Public | No restrictions | Press |'); + + // Prose must be preserved as-is, not absorbed into the table. + expect(output).toContain('This should not be a table row.'); + expect(output).toContain('Some other content that has nothing to do with the table.'); + + // The thematic break must be preserved. + expect(output.split('\n').filter((l) => l.trim() === '---')).toHaveLength(1); + }); + + it('accepts bordered table when cell text exceeds guide width', () => { + const input = [ + ' -----------------------', + ' A B', + ' ----- -----', + ' 1 This is a very very long value that exceeds column width', + '', + ' 2 ok', + ' -----------------------', + ].join('\n'); + + const output = normalizeFixedWidthTables(input); + const table = extractPipeTable(output); + + // Both data rows must be present with full cell text preserved. + expect(table).toEqual([ + '| A | B |', + '| --- | --- |', + '| 1 | This is a very very long value that exceeds column width |', + '| 2 | ok |', + ]); + }); + + it('does not swallow prose when bottom border is missing entirely', () => { + const input = [ + ' -------------------------------------------------------------------------', + ' A B', + ' ----- -----', + ' 1 2', + '', + 'Regular paragraph here.', + '', + '## Next Section', + ].join('\n'); + + const output = normalizeFixedWidthTables(input); + + expect(output).toContain('| A | B |'); + expect(output).toContain('| 1 | 2 |'); + expect(output).toContain('Regular paragraph here.'); + expect(output).toContain('## Next Section'); + }); + }); + + // --------------------------------------------------------------------------- + // Full NDA fixture regression + // --------------------------------------------------------------------------- + + describe('full NDA fixture', () => { + it('converts all three tables from the NDA fixture', () => { + // Exact content from multi-page-nda-test-document.md (relevant sections) + const input = [ + '## Term and Termination', + '', + ' Clause Description Duration', + ' ---------------------- --------------------------------- -----------', + ' Confidentiality Term Protection of confidential info 5 years', + ' Evaluation Period Business evaluation timeline 12 months', + ' Survival Clause Survives termination Yes', + '', + 'This Agreement remains in effect.', + '', + '---', + '', + '## Appendix A -- Data Classification Table', + '', + ' -------------------------------------------------------------------------', + ' Classification Description Example Required Controls', + ' ------------------- --------------- ------------ ------------------------', + ' Public No restrictions Press None', + ' release ', + '', + ' Internal Limited Internal Access controls', + ' distribution memo ', + '', + ' Confidential Sensitive Financial Encryption + MFA', + ' business data reports ', + '', + ' Restricted Highly Source code Strict access + logging', + ' sensitive ', + ' -------------------------------------------------------------------------', + '', + '---', + '', + '## Signatures', + '', + ' Disclosing Party Receiving Party', + ' ----------------------------- -----------------------------', + ' Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_', + ' Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_', + ' Signature: \\_\\_\\_\\_\\_\\_ Signature: \\_\\_\\_\\_\\_\\_', + ' Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_', + ].join('\n'); + + const output = normalizeFixedWidthTables(input); + const tables = extractPipeTable(output); + + // Table 1: Term and Termination (3 cols, 3 data rows) + expect(tables).toContainEqual('| Clause | Description | Duration |'); + expect(tables).toContainEqual('| Confidentiality Term | Protection of confidential info | 5 years |'); + expect(tables).toContainEqual('| Evaluation Period | Business evaluation timeline | 12 months |'); + + // Table 2: Appendix A (4 cols, 4 data rows with merged continuations) + expect(tables).toContainEqual('| Classification | Description | Example | Required Controls |'); + expect(tables).toContainEqual('| Public | No restrictions | Press release | None |'); + expect(tables).toContainEqual('| Restricted | Highly sensitive | Source code | Strict access + logging |'); + + // Table 3: Signatures (2 cols) + expect(tables).toContainEqual('| Disclosing Party | Receiving Party |'); + + // Non-table content preserved + expect(output).toContain('## Term and Termination'); + expect(output).toContain('This Agreement remains in effect.'); + expect(output).toContain('## Appendix A -- Data Classification Table'); + expect(output).toContain('## Signatures'); + + // Thematic breaks preserved + expect(output.split('\n').filter((l) => l.trim() === '---')).toHaveLength(2); + }); + }); +}); diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts new file mode 100644 index 0000000000..8851108c83 --- /dev/null +++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts @@ -0,0 +1,463 @@ +/** + * Pre-AST normalizer that converts fixed-width ASCII tables to GFM pipe tables. + * + * LLMs commonly produce pandoc-style fixed-width tables in markdown: + * + * Name Age City + * ----------- ------ -------- + * Alice 30 Seattle + * Bob 25 Portland + * + * remark-gfm only recognizes GFM pipe-table syntax, so these become paragraphs + * instead of table nodes in the AST. This module detects fixed-width tables and + * rewrites them before AST parsing: + * + * | Name | Age | City | + * | --- | --- | --- | + * | Alice | 30 | Seattle | + * | Bob | 25 | Portland | + * + * Supported layouts: + * (A) border → header → guide → data... → border (bordered) + * (B) header → guide → data... (unbounded) + * + * Continuation lines (empty first column) are merged into the preceding row. + * In bordered tables, blank lines between data rows are treated as row separators. + * In unbounded tables, a blank line terminates the table. + * + * Fenced code blocks are skipped entirely. + */ + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Detect pandoc-style fixed-width ASCII tables in a markdown string and + * rewrite them as GFM pipe tables that remark-gfm can parse. + * + * Fenced code blocks are skipped. Bordered (top/bottom border) and + * unbounded (header + guide only) layouts are both supported, including + * continuation lines that wrap across multiple rows. + * + * @param markdown - Raw markdown source, possibly containing fixed-width tables. + * @returns The markdown with fixed-width tables replaced by GFM pipe-table syntax. + * Returns the input unchanged if no fixed-width tables are detected. + */ +export function normalizeFixedWidthTables(markdown: string): string { + const lines = markdown.split('\n'); + const output: string[] = []; + let i = 0; + + while (i < lines.length) { + if (isFenceOpener(lines[i])) { + const closeIdx = findFenceClose(lines, i); + for (let j = i; j <= closeIdx; j++) output.push(lines[j]); + i = closeIdx + 1; + continue; + } + + const table = tryParseTableAt(lines, i); + if (table) { + output.push(...toGfmPipeTable(table)); + i = table.endLine + 1; + continue; + } + + output.push(lines[i]); + i++; + } + + return output.join('\n'); +} + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/** Character span of a single column, relative to content start (after indent). */ +interface ColumnSpan { + /** Start character index (inclusive), relative to content start. */ + start: number; + /** End character index (inclusive), relative to content start. */ + end: number; +} + +/** Result of parsing a column guide row. */ +interface GuideInfo { + /** Column spans, relative to content start (after stripping leading indent). */ + spans: ColumnSpan[]; + /** Number of leading whitespace characters on the guide row. */ + indent: number; +} + +interface ParsedTable { + /** First line of the table block (top border or header). */ + startLine: number; + /** Last line of the table block (bottom border or last data row). */ + endLine: number; + headers: string[]; + rows: string[][]; +} + +interface TableAnchors { + topBorderIdx?: number; + headerIdx: number; + guideIdx: number; +} + +// --------------------------------------------------------------------------- +// Fenced code block handling +// --------------------------------------------------------------------------- + +const FENCE_OPEN_RE = /^( {0,3})(`{3,}|~{3,})/; + +function isFenceOpener(line: string): boolean { + return FENCE_OPEN_RE.test(line); +} + +function findFenceClose(lines: string[], openIdx: number): number { + const match = lines[openIdx].match(FENCE_OPEN_RE); + if (!match) return openIdx; + + const char = match[2][0] === '`' ? '`' : '~'; + const minLen = match[2].length; + const closeRe = new RegExp(`^( {0,3})${char}{${minLen},}\\s*$`); + + for (let i = openIdx + 1; i < lines.length; i++) { + if (closeRe.test(lines[i])) return i; + } + return lines.length - 1; // unclosed fence: consume to end +} + +// --------------------------------------------------------------------------- +// Line classification +// --------------------------------------------------------------------------- + +/** Count leading whitespace characters on a line. */ +function leadingIndent(line: string): number { + const match = line.match(/^(\s*)/); + return match ? match[1].length : 0; +} + +/** + * Parse a column guide row: 2+ groups of consecutive dashes (≥3 each), + * separated by whitespace. Returns column spans (relative to content start + * after stripping indent) and the indent amount, or null. + */ +function parseColumnGuide(line: string): GuideInfo | null { + const trimmed = line.trimEnd(); + if (!trimmed) return null; + + // Must contain only dashes and spaces + if (!/^[\s-]+$/.test(trimmed)) return null; + + const indent = leadingIndent(trimmed); + const content = trimmed.slice(indent); + + const spans: ColumnSpan[] = []; + let j = 0; + + while (j < content.length) { + if (content[j] === '-') { + const start = j; + while (j < content.length && content[j] === '-') j++; + if (j - start >= 3) { + spans.push({ start, end: j - 1 }); + } + } else { + j++; + } + } + + return spans.length >= 2 ? { spans, indent } : null; +} + +/** A solid border line is a single unbroken run of dashes (optional indent). */ +function isSolidBorder(line: string): boolean { + const trimmed = line.trim(); + return trimmed.length >= 3 && /^-+$/.test(trimmed); +} + +function isBlank(line: string): boolean { + return line.trim().length === 0; +} + +// --------------------------------------------------------------------------- +// Cell extraction +// --------------------------------------------------------------------------- + +/** + * Extract cell values from a line using column spans. + * Strips `indent` characters from the line before slicing so that + * spans (which are relative to content start) align correctly. + */ +function extractCells(line: string, spans: ColumnSpan[], indent: number): string[] { + const content = line.length > indent ? line.slice(indent) : ''; + const lastIdx = spans.length - 1; + return spans.map(({ start, end }, i) => { + if (start >= content.length) return ''; + // Last column reads to end of line so overflow text isn't truncated. + const stop = i === lastIdx ? content.length : Math.min(end + 1, content.length); + return content.slice(start, stop).trim(); + }); +} + +function hasAlphanumericContent(cells: string[]): boolean { + return cells.some((cell) => /[a-zA-Z0-9]/.test(cell)); +} + +/** + * Detect whether any non-final column has a value that overflows past its + * span boundary. + * + * Finds the continuous non-space run that crosses the boundary and measures + * the space gap immediately before it. A wide gap (≥2 spaces) is a column + * separator — the boundary text is the next column starting early due to + * short padding. A narrow gap (0–1 spaces) is a word break within the same + * cell value, so the value genuinely overflows. + * + * `ab cdef` in a 5-char span → 1-space word break → overflow + * ` abcdef` in a 5-char span → leading pad, no content before → overflow + * `val X` in a 29-char span → wide gap → early column start, allowed + */ +function cellsOverflow(line: string, spans: ColumnSpan[], indent: number): boolean { + const content = line.length > indent ? line.slice(indent) : ''; + for (let i = 0; i < spans.length - 1; i++) { + const { start, end } = spans[i]; + const afterCol = end + 1; + if (afterCol >= content.length || content[afterCol] === ' ') continue; + if (content[end] === ' ') continue; + // Find where the non-space run crossing the boundary starts. + let runStart = end; + while (runStart > start && content[runStart - 1] !== ' ') runStart--; + // Count consecutive spaces immediately before the run. + let gapWidth = 0; + for (let j = runStart - 1; j >= start; j--) { + if (content[j] === ' ') gapWidth++; + else break; + } + // A wide gap (≥2 spaces) separates the column's value from the next + // column's early-start text. A narrow gap is a word break within the + // same value — the cell genuinely overflows. + if (gapWidth < 2) return true; + } + return false; +} + +// --------------------------------------------------------------------------- +// Table recognition +// --------------------------------------------------------------------------- + +/** + * Try to recognize a fixed-width table starting at line `i`. + * + * Layout A: border at i, header at i+1, guide at i+2. + * Layout B: header at i, guide at i+1. + * + * Rejects candidates where the header indent differs from the guide indent, + * which prevents cell text corruption from misaligned column slicing. + */ +function tryParseTableAt(lines: string[], i: number): ParsedTable | null { + // Layout A: top border → header → guide + if (i + 2 < lines.length && isSolidBorder(lines[i])) { + const guide = parseColumnGuide(lines[i + 2]); + if (guide && leadingIndent(lines[i + 1]) === guide.indent) { + if (hasAlphanumericContent(extractCells(lines[i + 1], guide.spans, guide.indent))) { + return buildTable(lines, guide, { topBorderIdx: i, headerIdx: i + 1, guideIdx: i + 2 }); + } + } + } + + // Layout B: header → guide + if (i + 1 < lines.length) { + const guide = parseColumnGuide(lines[i + 1]); + if (guide && leadingIndent(lines[i]) === guide.indent) { + if (hasAlphanumericContent(extractCells(lines[i], guide.spans, guide.indent))) { + return buildTable(lines, guide, { headerIdx: i, guideIdx: i + 1 }); + } + } + } + + return null; +} + +// --------------------------------------------------------------------------- +// Table building +// --------------------------------------------------------------------------- + +/** Max lines to scan forward when searching for a bottom border. */ +const MAX_BORDER_SCAN = 100; + +/** Bottom border must be at least this fraction of top border width to match. */ +const MIN_BORDER_WIDTH_RATIO = 0.4; + +function buildTable(lines: string[], guide: GuideInfo, anchors: TableAnchors): ParsedTable | null { + const { topBorderIdx, headerIdx, guideIdx } = anchors; + const { spans, indent } = guide; + const startLine = topBorderIdx ?? headerIdx; + const headers = extractCells(lines[headerIdx], spans, indent); + + // Only look for a bottom border when a top border is present. + let bottomBorderIdx = topBorderIdx !== undefined ? findBottomBorder(lines, guideIdx + 1) : undefined; + + // Validate the candidate bottom border against two structural signals: + // 1. Border shape: the candidate must resemble the top border (similar width/indent). + // This prevents short thematic breaks (---) from being mistaken for table borders. + // 2. Intermediate indent: all non-blank lines between the guide and the candidate + // must have at least the table's leading indent, ruling out unrelated prose. + if (bottomBorderIdx !== undefined && topBorderIdx !== undefined) { + const borderOk = bordersMatch(lines[topBorderIdx], lines[bottomBorderIdx]); + const contentOk = intermediateLinesMeetIndent(lines, guideIdx + 1, bottomBorderIdx, indent); + if (!borderOk || !contentOk) { + bottomBorderIdx = undefined; // discard false bottom border → fall back to unbounded + } + } + + const { rows, lastConsumedIdx } = parseDataRows(lines, spans, indent, guideIdx + 1, bottomBorderIdx); + if (rows.length === 0) return null; + + const endLine = bottomBorderIdx ?? lastConsumedIdx; + return { startLine, endLine, headers, rows }; +} + +/** + * Check whether two border lines are structurally similar enough to be a + * matching top/bottom pair. A short thematic break (`---`, 3 chars) will + * not match a full-width table border (`---...---`, 70+ chars). + */ +function bordersMatch(topBorder: string, candidateBottom: string): boolean { + const topLen = topBorder.trim().length; + const bottomLen = candidateBottom.trim().length; + if (bottomLen < topLen * MIN_BORDER_WIDTH_RATIO) return false; + // Indent should be similar (within 2 characters). + if (Math.abs(leadingIndent(topBorder) - leadingIndent(candidateBottom)) > 2) return false; + return true; +} + +/** + * Check that all non-blank, non-border lines between fromIdx and toIdx + * (exclusive) have at least the table's leading indent. + * Lines with less indent are unrelated prose that wandered between the + * top border and a candidate bottom border. + */ +function intermediateLinesMeetIndent(lines: string[], fromIdx: number, toIdx: number, indent: number): boolean { + for (let k = fromIdx; k < toIdx; k++) { + const line = lines[k]; + if (isBlank(line) || isSolidBorder(line)) continue; + if (leadingIndent(line) < indent) return false; + } + return true; +} + +/** + * Scan forward from `fromIdx` for a solid border line. + * Skips fenced code blocks and stops at markdown headings. + */ +function findBottomBorder(lines: string[], fromIdx: number): number | undefined { + const limit = Math.min(fromIdx + MAX_BORDER_SCAN, lines.length); + + for (let i = fromIdx; i < limit; i++) { + if (isFenceOpener(lines[i])) { + i = findFenceClose(lines, i); + continue; + } + if (isSolidBorder(lines[i])) return i; + // A markdown heading means we've left the table's section. + if (/^#{1,6}\s/.test(lines[i].trimStart())) return undefined; + } + + return undefined; +} + +/** + * Parse data rows from lines after the column guide. + * + * When `boundaryIdx` is defined (bordered table), lines up to but not including + * the boundary are processed, and blank lines are treated as row separators. + * + * When `boundaryIdx` is undefined (unbounded table), the first blank line + * terminates the table. + * + * Continuation lines (first column empty) are merged into the preceding row. + */ +function parseDataRows( + lines: string[], + spans: ColumnSpan[], + indent: number, + startIdx: number, + boundaryIdx: number | undefined, +): { rows: string[][]; lastConsumedIdx: number } { + const rows: string[][] = []; + const bounded = boundaryIdx !== undefined; + const limit = boundaryIdx ?? lines.length; + let lastConsumed = startIdx - 1; + let skippingOverflow = false; + + for (let i = startIdx; i < limit; i++) { + if (isBlank(lines[i])) { + skippingOverflow = false; + if (bounded) { + lastConsumed = i; + continue; // row separator inside bordered table + } + break; // unbounded: blank = end of table + } + + if (isSolidBorder(lines[i])) break; + + // Reject rows where non-final columns overflow their span boundaries. + // First data row overflow → guide doesn't match data → reject entire table. + // Later row overflow → skip that row and its continuation lines. + if (cellsOverflow(lines[i], spans, indent)) { + if (rows.length === 0) break; + skippingOverflow = true; + lastConsumed = i; + continue; + } + + const cells = extractCells(lines[i], spans, indent); + if (!hasAlphanumericContent(cells) && rows.length === 0) break; + + if (cells[0].length === 0 && rows.length > 0) { + // Continuation line: skip if the parent row was skipped due to overflow. + if (skippingOverflow) { + lastConsumed = i; + continue; + } + // Merge non-empty cells into previous row + const prev = rows[rows.length - 1]; + for (let c = 0; c < spans.length; c++) { + if (cells[c]) { + prev[c] = prev[c] ? `${prev[c]} ${cells[c]}` : cells[c]; + } + } + } else if (hasAlphanumericContent(cells)) { + skippingOverflow = false; + rows.push(cells); + } + + lastConsumed = i; + } + + return { rows, lastConsumedIdx: lastConsumed }; +} + +// --------------------------------------------------------------------------- +// GFM pipe-table output +// --------------------------------------------------------------------------- + +function escapePipe(text: string): string { + return text.replace(/\|/g, '\\|'); +} + +function toGfmPipeTable({ headers, rows }: ParsedTable): string[] { + const n = headers.length; + const cell = (row: string[], i: number) => escapePipe(row[i] ?? ''); + + const headerLine = `| ${headers.map(escapePipe).join(' | ')} |`; + const separator = `| ${Array.from({ length: n }, () => '---').join(' | ')} |`; + const dataLines = rows.map((row) => `| ${Array.from({ length: n }, (_, i) => cell(row, i)).join(' | ')} |`); + + return [headerLine, separator, ...dataLines]; +} diff --git a/packages/super-editor/src/core/helpers/markdown/types.ts b/packages/super-editor/src/core/helpers/markdown/types.ts index 890a1e58fb..2a8a58d851 100644 --- a/packages/super-editor/src/core/helpers/markdown/types.ts +++ b/packages/super-editor/src/core/helpers/markdown/types.ts @@ -12,6 +12,12 @@ import type { Editor } from '../../Editor.js'; export interface MarkdownConversionOptions { /** When true, skip side-effects like numbering allocation (for dry-run validation). */ dryRun?: boolean; + /** + * When true (default), detect pandoc-style fixed-width ASCII tables in the + * markdown source and rewrite them as GFM pipe tables before AST parsing. + * Set to `false` to skip this normalization step. + */ + normalizeFixedWidthTables?: boolean; } // --------------------------------------------------------------------------- diff --git a/tests/doc-api-stories/tests/formatting/inline-formatting.ts b/tests/doc-api-stories/tests/formatting/inline-formatting.ts index 2a86d33968..701d4f3229 100644 --- a/tests/doc-api-stories/tests/formatting/inline-formatting.ts +++ b/tests/doc-api-stories/tests/formatting/inline-formatting.ts @@ -39,7 +39,7 @@ describe('document-api story: inline formatting', () => { // Insert text into the blank doc's single paragraph. // Without an explicit target, insert uses the first paragraph. - const insertResult = unwrap(await client.doc.insert({ sessionId, text })); + const insertResult = unwrap(await client.doc.insert({ sessionId, value: text })); expect(insertResult.receipt?.success).toBe(true); // The receipt's hoisted target contains the paragraph's stable blockId. diff --git a/tests/doc-api-stories/tests/markdown/markdown-override-roundtrip.ts b/tests/doc-api-stories/tests/markdown/markdown-override-roundtrip.ts new file mode 100644 index 0000000000..7e327f9601 --- /dev/null +++ b/tests/doc-api-stories/tests/markdown/markdown-override-roundtrip.ts @@ -0,0 +1,288 @@ +import { readFile } from 'node:fs/promises'; +import path from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { unwrap, useStoryHarness } from '../harness'; + +type MarkdownStructureMetrics = { + headingsTotal: number; + listsTotal: number; + bulletsTotal: number; + orderedTotal: number; + hasPurposeItem: boolean; + hasNdaSignedItem: boolean; + hasNestedLevel3: boolean; + endOfAgreementMatches: number; + tablesTotal: number; + /** Sentinel text patterns that confirm table cell content was parsed correctly. */ + tableContentSignals: { + hasConfidentialityTerm: boolean; + hasPressRelease: boolean; + hasSignatureField: boolean; + }; +}; + +function sid(label: string): string { + return `${label}-${Date.now()}-${Math.floor(Math.random() * 1_000_000)}`; +} + +describe('document-api story: markdown override roundtrip', () => { + const { client, outPath, runCli } = useStoryHarness('markdown/override-roundtrip', { + preserveResults: true, + }); + + const fixturePath = path.resolve(import.meta.dirname, 'multi-page-nda-test-document.md'); + + async function collectStructure(sessionId: string): Promise { + const headingResult = unwrap( + await client.doc.find({ + sessionId, + type: 'node', + nodeType: 'heading', + limit: 200, + }), + ); + + const listAll = unwrap( + await client.doc.lists.list({ + sessionId, + limit: 200, + }), + ); + + const listBullets = unwrap( + await client.doc.lists.list({ + sessionId, + kind: 'bullet', + limit: 200, + }), + ); + + const listOrdered = unwrap( + await client.doc.lists.list({ + sessionId, + kind: 'ordered', + limit: 200, + }), + ); + + const endOfAgreement = unwrap( + await client.doc.find({ + sessionId, + type: 'text', + pattern: 'END OF AGREEMENT', + }), + ); + + const tablesResult = unwrap( + await client.doc.find({ + sessionId, + type: 'node', + nodeType: 'table', + limit: 100, + }), + ); + + const listItems: any[] = Array.isArray(listAll.items) ? listAll.items : []; + const hasPurposeItem = listItems.some((item) => item.text === 'Purpose'); + const hasNdaSignedItem = listItems.some((item) => item.text === 'NDA signed'); + const hasNestedLevel3 = listItems.some( + (item) => item.text === 'Level 3' && item.level === 2 && Array.isArray(item.path) && item.path.length === 3, + ); + + // Verify table cell content was parsed correctly via text search. + // These sentinels confirm specific tables exist with correct content: + // - "Confidentiality Term": Section 5 data row (proves table has ≥1 data row) + // - "Press release": Appendix A continuation-merged cell (proves multi-line merge works) + // - "Signature:": Signatures table cell (unique to table rows, not in fixture header) + const [confidentialityTerm, pressRelease, signatureField] = await Promise.all([ + client.doc.find({ sessionId, type: 'text', pattern: 'Confidentiality Term' }), + client.doc.find({ sessionId, type: 'text', pattern: 'Press release' }), + client.doc.find({ sessionId, type: 'text', pattern: 'Signature:' }), + ]); + const tableContentSignals = { + hasConfidentialityTerm: unwrap(confidentialityTerm).total >= 1, + hasPressRelease: unwrap(pressRelease).total >= 1, + hasSignatureField: unwrap(signatureField).total >= 1, + }; + + return { + headingsTotal: headingResult.total, + listsTotal: listAll.total, + bulletsTotal: listBullets.total, + orderedTotal: listOrdered.total, + hasPurposeItem, + hasNdaSignedItem, + hasNestedLevel3, + endOfAgreementMatches: endOfAgreement.total, + tablesTotal: tablesResult.total, + tableContentSignals, + }; + } + + async function applyStylesPatch( + doc: string, + channel: 'run' | 'paragraph', + patch: Record, + out: string, + ): Promise { + const envelope = await runCli([ + 'styles', + 'apply', + doc, + '--target-json', + JSON.stringify({ scope: 'docDefaults', channel }), + '--patch-json', + JSON.stringify(patch), + '--out', + out, + ]); + + const payload = envelope?.data ?? envelope; + const receipt = payload?.receipt ?? payload; + expect(receipt).toBeDefined(); + return receipt; + } + + it('initializes from markdown override and preserves structure after save + reopen', async () => { + const markdown = await readFile(fixturePath, 'utf8'); + const sourceSessionId = sid('markdown-source'); + const roundtripSessionId = sid('markdown-roundtrip'); + const outputDocPath = outPath('nda-markdown-override.docx'); + + await client.doc.open({ + sessionId: sourceSessionId, + contentOverride: markdown, + overrideType: 'markdown', + }); + + const before = await collectStructure(sourceSessionId); + + // Sanity checks ensure this story fails loudly if markdown parsing regresses. + expect(before.headingsTotal).toBeGreaterThanOrEqual(20); + expect(before.listsTotal).toBeGreaterThanOrEqual(50); + expect(before.bulletsTotal).toBeGreaterThanOrEqual(30); + expect(before.orderedTotal).toBeGreaterThanOrEqual(15); + expect(before.hasPurposeItem).toBe(true); + expect(before.hasNdaSignedItem).toBe(true); + expect(before.hasNestedLevel3).toBe(true); + expect(before.endOfAgreementMatches).toBe(1); + + // Fixed-width ASCII tables must normalize to real table nodes. + // The NDA fixture contains 3 tables: Section 5 (3col), Appendix A (4col), Signatures (2col). + expect(before.tablesTotal).toBe(3); + + // Verify table cell content confirms correct parsing — not just node count. + // "Confidentiality Term" = Section 5 data row exists + // "Press release" = Appendix A continuation-merged cell was joined correctly + // "Signature:" = Signatures table cell (unique to table, not in fixture header) + expect(before.tableContentSignals.hasConfidentialityTerm).toBe(true); + expect(before.tableContentSignals.hasPressRelease).toBe(true); + expect(before.tableContentSignals.hasSignatureField).toBe(true); + + await client.doc.save({ + sessionId: sourceSessionId, + out: outputDocPath, + }); + + await client.doc.close({ + sessionId: sourceSessionId, + discard: true, + }); + + await client.doc.open({ + doc: outputDocPath, + sessionId: roundtripSessionId, + }); + + const after = await collectStructure(roundtripSessionId); + + // Roundtrip invariant: structure metrics should remain identical after DOCX save/reopen. + expect(after).toEqual(before); + }); + + it('applies visible docDefaults styles to markdown-seeded content before final export', async () => { + const markdown = await readFile(fixturePath, 'utf8'); + const sourceSessionId = sid('markdown-styled-source'); + const styledSessionId = sid('markdown-styled'); + const verifySessionId = sid('markdown-styled-verify'); + const markdownSeedDoc = outPath('nda-markdown-seeded.docx'); + const runStyledDoc = outPath('nda-markdown-styled-run.docx'); + const styledTemplateDoc = outPath('nda-markdown-styled-template.docx'); + const exportedDoc = outPath('nda-markdown-on-styled-template-export.docx'); + + await client.doc.open({ + sessionId: sourceSessionId, + contentOverride: markdown, + overrideType: 'markdown', + }); + + const before = await collectStructure(sourceSessionId); + expect(before.headingsTotal).toBeGreaterThanOrEqual(20); + expect(before.listsTotal).toBeGreaterThanOrEqual(50); + expect(before.endOfAgreementMatches).toBe(1); + + await client.doc.save({ + sessionId: sourceSessionId, + out: markdownSeedDoc, + }); + + await client.doc.close({ + sessionId: sourceSessionId, + discard: true, + }); + + const runPatch = { + bold: true, + italic: true, + fontSize: 30, + letterSpacing: 24, + color: { val: 'C00000' }, + fontFamily: { ascii: 'Courier New', hAnsi: 'Courier New' }, + }; + + const runReceipt = await applyStylesPatch(markdownSeedDoc, 'run', runPatch, runStyledDoc); + expect(runReceipt.success).toBe(true); + expect(runReceipt.changed).toBe(true); + expect(runReceipt.after.bold).toBe('on'); + expect(runReceipt.after.italic).toBe('on'); + expect(runReceipt.after.fontSize).toBe(30); + expect(runReceipt.after.letterSpacing).toBe(24); + expect(runReceipt.after.color).toEqual({ val: 'C00000' }); + + const paragraphPatch = { + justification: 'justify', + spacing: { before: 240, after: 240, line: 420, lineRule: 'auto' }, + indent: { left: 720, firstLine: 360 }, + }; + + const paragraphReceipt = await applyStylesPatch(runStyledDoc, 'paragraph', paragraphPatch, styledTemplateDoc); + expect(paragraphReceipt.success).toBe(true); + expect(paragraphReceipt.changed).toBe(true); + expect(paragraphReceipt.after.justification).toBe('justify'); + expect(paragraphReceipt.after.spacing).toEqual(paragraphPatch.spacing); + expect(paragraphReceipt.after.indent).toEqual(paragraphPatch.indent); + + await client.doc.open({ + doc: styledTemplateDoc, + sessionId: styledSessionId, + }); + + await client.doc.save({ + sessionId: styledSessionId, + out: exportedDoc, + }); + + await client.doc.close({ + sessionId: styledSessionId, + discard: true, + }); + + await client.doc.open({ + doc: exportedDoc, + sessionId: verifySessionId, + }); + + const after = await collectStructure(verifySessionId); + expect(after).toEqual(before); + }); +}); diff --git a/tests/doc-api-stories/tests/markdown/multi-page-nda-test-document.md b/tests/doc-api-stories/tests/markdown/multi-page-nda-test-document.md new file mode 100644 index 0000000000..e149e32f40 --- /dev/null +++ b/tests/doc-api-stories/tests/markdown/multi-page-nda-test-document.md @@ -0,0 +1,220 @@ +# NON-DISCLOSURE AGREEMENT (NDA) + +**Effective Date:** {{Effective Date}}\ +**Disclosing Party:** {{Disclosing Party Name}}\ +**Receiving Party:** {{Receiving Party Name}} + +------------------------------------------------------------------------ + +## TABLE OF CONTENTS + +1. [Purpose](#purpose) +2. [Definition of Confidential + Information](#definition-of-confidential-information) +3. [Obligations of Receiving Party](#obligations-of-receiving-party) +4. [Exclusions](#exclusions) +5. [Term and Termination](#term-and-termination) +6. [Remedies](#remedies) +7. [Miscellaneous](#miscellaneous) +8. [Signatures](#signatures) +9. [Appendix A -- Data Classification + Table](#appendix-a--data-classification-table) +10. [Appendix B -- Security Controls + Checklist](#appendix-b--security-controls-checklist) + +------------------------------------------------------------------------ + +## Purpose + +This Non-Disclosure Agreement ("Agreement") is entered into as of the +**Effective Date** by and between the parties listed above. + +The purpose of this Agreement is to: + +- Protect confidential and proprietary information. +- Establish permitted uses of disclosed information. +- Define responsibilities and limitations. +- Enable evaluation of a potential business relationship. + +For more information about NDAs, see:\ +- [Wikipedia -- Non-disclosure +agreement](https://en.wikipedia.org/wiki/Non-disclosure_agreement)\ +- [Cornell Law -- NDA +Overview](https://www.law.cornell.edu/wex/non-disclosure_agreement) + +------------------------------------------------------------------------ + +## Definition of Confidential Information + +"Confidential Information" includes, but is not limited to: + +1. Technical data +2. Trade secrets +3. Product designs +4. Financial information +5. Customer lists +6. Business strategies +7. Source code and documentation + +### Examples + +> Confidential Information may be disclosed in written, oral, +> electronic, or other tangible form. + +``` text +Example: Internal API keys, architecture diagrams, pricing models. +``` + +------------------------------------------------------------------------ + +## Obligations of Receiving Party + +The Receiving Party agrees to: + +- Maintain strict confidentiality. +- Use the information solely for evaluation purposes. +- Limit disclosure to authorized representatives. +- Implement reasonable security measures. + +### Required Security Measures + +- Encryption at rest and in transit +- Multi-factor authentication (MFA) +- Access control based on least privilege +- Secure backup procedures + +------------------------------------------------------------------------ + +## Exclusions + +Confidential Information does **NOT** include information that: + +- Is or becomes publicly available. +- Was already known prior to disclosure. +- Is independently developed. +- Is disclosed by court order (with prompt notice). + +------------------------------------------------------------------------ + +## Term and Termination + + Clause Description Duration + ---------------------- --------------------------------- ----------- + Confidentiality Term Protection of confidential info 5 years + Evaluation Period Business evaluation timeline 12 months + Survival Clause Survives termination Yes + +This Agreement remains in effect until terminated in writing by either +party. + +------------------------------------------------------------------------ + +## Remedies + +The Receiving Party acknowledges that: + +- Unauthorized disclosure may cause irreparable harm. +- Monetary damages may be insufficient. +- Injunctive relief may be appropriate. + +------------------------------------------------------------------------ + +## Miscellaneous + +### Governing Law + +This Agreement shall be governed by the laws of: + +- [ ] California\ +- [ ] New York\ +- [ ] Texas\ +- [ ] Other: {{Specify}} + +### Notices + +All notices shall be delivered via: + +1. Certified mail\ +2. Email with confirmation\ +3. Recognized courier service + +------------------------------------------------------------------------ + +## Appendix A -- Data Classification Table + + ------------------------------------------------------------------------- + Classification Description Example Required Controls + ------------------- --------------- ------------ ------------------------ + Public No restrictions Press None + release + + Internal Limited Internal Access controls + distribution memo + + Confidential Sensitive Financial Encryption + MFA + business data reports + + Restricted Highly Source code Strict access + logging + sensitive + ------------------------------------------------------------------------- + +------------------------------------------------------------------------ + +## Appendix B -- Security Controls Checklist + +### Administrative Controls + +- [x] NDA signed +- [ ] Background checks completed +- [ ] Security training conducted + +### Technical Controls + +- [x] Encrypted storage +- [x] Firewall enabled +- [ ] Intrusion detection system + +### Physical Controls + +- [ ] Secure office access +- [ ] Locked filing cabinets +- [ ] Visitor sign-in logs + +------------------------------------------------------------------------ + +## Signatures + + Disclosing Party Receiving Party + ----------------------------- ----------------------------- + Name: \_\_\_\_\_\_\_\_\_\_ Name: \_\_\_\_\_\_\_\_\_\_ + Title: \_\_\_\_\_\_\_\_\_\_ Title: \_\_\_\_\_\_\_\_\_\_ + Signature: \_\_\_\_\_\_ Signature: \_\_\_\_\_\_ + Date: \_\_\_\_\_\_\_\_\_\_ Date: \_\_\_\_\_\_\_\_\_\_ + +------------------------------------------------------------------------ + +## Additional Formatting Examples + +### Nested Lists + +- Level 1 + - Level 2 + - Level 3 + +### Horizontal Rule + +------------------------------------------------------------------------ + +### Emphasis Examples + +- *Italic text* +- **Bold text** +- ~~Strikethrough~~ + +### Inline Code + +Use `CONFIDENTIAL` header in all documents. + +------------------------------------------------------------------------ + +**END OF AGREEMENT** diff --git a/tests/doc-api-stories/tests/styles/doc-defaults.ts b/tests/doc-api-stories/tests/styles/doc-defaults.ts index 8bb41e1545..59aa4567e5 100644 --- a/tests/doc-api-stories/tests/styles/doc-defaults.ts +++ b/tests/doc-api-stories/tests/styles/doc-defaults.ts @@ -20,7 +20,7 @@ describe('document-api story: styles.apply docDefaults', () => { async function seedBlankDoc(sessionId: string, text: string, docName: string): Promise { await client.doc.open({ sessionId }); - const insertResult = unwrap(await client.doc.insert({ sessionId, text })); + const insertResult = unwrap(await client.doc.insert({ sessionId, value: text })); expect(insertResult.receipt?.success).toBe(true); const sourceDoc = outPath(docName); await client.doc.save({ sessionId, out: sourceDoc }); @@ -33,7 +33,7 @@ describe('document-api story: styles.apply docDefaults', () => { } await client.doc.open({ sessionId }); - const firstInsert = unwrap(await client.doc.insert({ sessionId, text: paragraphs[0] })); + const firstInsert = unwrap(await client.doc.insert({ sessionId, value: paragraphs[0] })); expect(firstInsert.receipt?.success).toBe(true); for (const paragraphText of paragraphs.slice(1)) { diff --git a/tests/doc-api-stories/tests/tables/all-commands.ts b/tests/doc-api-stories/tests/tables/all-commands.ts index 159740072f..31920ba34a 100644 --- a/tests/doc-api-stories/tests/tables/all-commands.ts +++ b/tests/doc-api-stories/tests/tables/all-commands.ts @@ -206,7 +206,7 @@ describe('document-api story: all table commands', () => { operationId: 'tables.convertFromText', setup: 'blank', prepare: async (sessionId) => { - await api.doc.insert({ sessionId, text: 'A\tB\tC' }); + await api.doc.insert({ sessionId, value: 'A\tB\tC' }); }, run: async (sessionId) => { const paragraphNodeId = await firstNodeId(sessionId, 'paragraph'); @@ -233,7 +233,7 @@ describe('document-api story: all table commands', () => { prepare: async (sessionId) => { await api.doc.insert({ sessionId, - text: 'Alpha\tBeta\tGamma', + value: 'Alpha\tBeta\tGamma', }); const secondParagraphResult = unwrap( @@ -345,7 +345,7 @@ describe('document-api story: all table commands', () => { prepare: async (sessionId) => { await api.doc.insert({ sessionId, - text: 'Alpha\tBeta\tGamma', + value: 'Alpha\tBeta\tGamma', }); const secondParagraphResult = unwrap( @@ -567,7 +567,7 @@ describe('document-api story: all table commands', () => { operationId: 'tables.deleteCell', setup: 'blank', prepare: async (sessionId) => { - await api.doc.insert({ sessionId, text: 'A1\tB1\tC1' }); + await api.doc.insert({ sessionId, value: 'A1\tB1\tC1' }); for (const rowText of ['A2\tB2\tC2', 'A3\tB3\tC3']) { const createRowResult = unwrap( From 37436a7587709ba071477c48cd8c6a32ca7943eb Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Thu, 26 Feb 2026 15:26:17 -0800 Subject: [PATCH 5/5] fix(markdown): skip indented code blocks in fixed-width table normalization --- ...malizeFixedWidthTables.integration.test.ts | 9 +++++ .../normalizeFixedWidthTables.test.ts | 34 +++++++++++++++++++ .../markdown/normalizeFixedWidthTables.ts | 30 ++++++++++++++-- 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts index d4b978621b..65a5a316ea 100644 --- a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts +++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts @@ -68,6 +68,15 @@ describe('normalizer → remark-gfm AST integration', () => { expect(rowTexts(tables[0].children[1])).toEqual(['Term', 'Protection']); }); + it('does not produce tables from 4-space indented code blocks', () => { + const raw = [' Clause Description', ' ---------- -----------', ' Term Protection'].join('\n'); + + const normalized = normalizeFixedWidthTables(raw); + const ast = parseMarkdownToAst(normalized); + expect(findTables(ast)).toHaveLength(0); + expect(normalized).toBe(raw); + }); + it('Section 5 table (no borders): 3 columns, 3 data rows', () => { const raw = [ ' Clause Description Duration', diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts index 50318ae4c5..4ab2447a61 100644 --- a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts +++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts @@ -281,6 +281,40 @@ describe('normalizeFixedWidthTables', () => { }); }); + describe('indented code blocks', () => { + it('does not transform tables inside 4-space indented code blocks', () => { + const input = [' Name Age', ' ------ ---', ' Alice 30'].join('\n'); + + const output = normalizeFixedWidthTables(input); + + expect(extractPipeTable(output)).toHaveLength(0); + expect(output).toBe(input); + }); + + it('transforms tables before and after 4-space indented code blocks', () => { + const input = [ + 'A B', + '----- -----', + '1 2', + '', + ' C D', + ' ----- -----', + ' 3 4', + '', + 'E F', + '----- -----', + '5 6', + ].join('\n'); + + const output = normalizeFixedWidthTables(input); + const tables = extractPipeTable(output); + + expect(tables.filter((l) => l.startsWith('| A'))).toHaveLength(1); + expect(tables.filter((l) => l.startsWith('| E'))).toHaveLength(1); + expect(output).toContain(' C D'); + }); + }); + // --------------------------------------------------------------------------- // Regression: indent mismatch must not corrupt cell text (Bug #1) // --------------------------------------------------------------------------- diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts index 8851108c83..9fd8890a8e 100644 --- a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts +++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts @@ -25,7 +25,7 @@ * In bordered tables, blank lines between data rows are treated as row separators. * In unbounded tables, a blank line terminates the table. * - * Fenced code blocks are skipped entirely. + * Code blocks are skipped entirely (fenced and 4-space/tab-indented). */ // --------------------------------------------------------------------------- @@ -36,7 +36,7 @@ * Detect pandoc-style fixed-width ASCII tables in a markdown string and * rewrite them as GFM pipe tables that remark-gfm can parse. * - * Fenced code blocks are skipped. Bordered (top/bottom border) and + * Code blocks are skipped (fenced and 4-space/tab-indented). Bordered (top/bottom border) and * unbounded (header + guide only) layouts are both supported, including * continuation lines that wrap across multiple rows. * @@ -57,6 +57,13 @@ export function normalizeFixedWidthTables(markdown: string): string { continue; } + if (isIndentedCodeOpener(lines[i])) { + const closeIdx = findIndentedCodeClose(lines, i); + for (let j = i; j <= closeIdx; j++) output.push(lines[j]); + i = closeIdx + 1; + continue; + } + const table = tryParseTableAt(lines, i); if (table) { output.push(...toGfmPipeTable(table)); @@ -107,15 +114,20 @@ interface TableAnchors { } // --------------------------------------------------------------------------- -// Fenced code block handling +// Code block handling // --------------------------------------------------------------------------- const FENCE_OPEN_RE = /^( {0,3})(`{3,}|~{3,})/; +const INDENTED_CODE_RE = /^(?: {4,}|\t)/; function isFenceOpener(line: string): boolean { return FENCE_OPEN_RE.test(line); } +function isIndentedCodeOpener(line: string): boolean { + return INDENTED_CODE_RE.test(line); +} + function findFenceClose(lines: string[], openIdx: number): number { const match = lines[openIdx].match(FENCE_OPEN_RE); if (!match) return openIdx; @@ -130,6 +142,14 @@ function findFenceClose(lines: string[], openIdx: number): number { return lines.length - 1; // unclosed fence: consume to end } +function findIndentedCodeClose(lines: string[], openIdx: number): number { + for (let i = openIdx + 1; i < lines.length; i++) { + if (isBlank(lines[i])) continue; + if (!isIndentedCodeOpener(lines[i])) return i - 1; + } + return lines.length - 1; +} + // --------------------------------------------------------------------------- // Line classification // --------------------------------------------------------------------------- @@ -362,6 +382,10 @@ function findBottomBorder(lines: string[], fromIdx: number): number | undefined i = findFenceClose(lines, i); continue; } + if (isIndentedCodeOpener(lines[i])) { + i = findIndentedCodeClose(lines, i); + continue; + } if (isSolidBorder(lines[i])) return i; // A markdown heading means we've left the table's section. if (/^#{1,6}\s/.test(lines[i].trimStart())) return undefined;