From 3fee96ffa3af61aeacf49927abb9701bea1ca291 Mon Sep 17 00:00:00 2001
From: Nick Bernal
Date: Wed, 25 Feb 2026 20:59:14 -0800
Subject: [PATCH 1/5] feat(document-api): markdown handling
---
apps/cli/src/__tests__/cli.test.ts | 132 ++++-
apps/cli/src/__tests__/conformance/harness.ts | 2 +-
.../src/__tests__/conformance/scenarios.ts | 2 +-
apps/cli/src/__tests__/host.test.ts | 2 +-
apps/cli/src/__tests__/lib/validate.test.ts | 4 +-
.../src/cli/cli-only-operation-definitions.ts | 3 +-
apps/cli/src/cli/operation-params.ts | 2 +
apps/cli/src/commands/open.ts | 51 +-
apps/cli/src/lib/document.ts | 37 ++
apps/cli/src/lib/errors.ts | 1 +
apps/cli/src/lib/validate.ts | 8 +-
apps/cli/src/types/super-editor-adapters.d.ts | 24 +-
apps/cli/tsconfig.json | 1 +
apps/docs/document-api/common-workflows.mdx | 4 +-
apps/docs/document-api/reference/index.mdx | 2 +-
apps/docs/document-api/reference/insert.mdx | 25 +-
apps/docs/document-engine/sdks.mdx | 4 +-
packages/document-api/src/README.md | 4 +-
.../src/contract/contract.test.ts | 6 +-
.../src/contract/operation-definitions.ts | 5 +-
packages/document-api/src/contract/schemas.ts | 5 +-
packages/document-api/src/index.test.ts | 152 ++++-
packages/document-api/src/index.ts | 2 +-
packages/document-api/src/insert/insert.ts | 54 +-
.../document-api/src/invoke/invoke.test.ts | 14 +-
.../src/overview-examples.test.ts | 15 +-
packages/document-api/src/types/receipt.ts | 1 +
packages/document-api/src/write/write.ts | 3 +
packages/super-editor/package.json | 3 +-
.../src/core/Editor.api-contracts.test.js | 4 +-
.../importMarkdown.integration.test.js | 51 +-
.../src/core/helpers/importMarkdown.js | 62 ++-
.../src/core/helpers/importMarkdown.test.js | 61 +-
.../src/core/helpers/markdown/index.ts | 17 +
.../helpers/markdown/markdownToPmContent.ts | 102 ++++
.../helpers/markdown/mdastToProseMirror.ts | 525 ++++++++++++++++++
.../core/helpers/markdown/parseMarkdownAst.ts | 22 +
.../src/core/helpers/markdown/types.ts | 64 +++
.../assemble-adapters.ts | 3 +-
.../plan-engine/index.ts | 2 +-
.../insert-structured-wrapper.test.ts | 304 ++++++++++
.../plan-engine/plan-wrappers.ts | 223 ++++++++
pnpm-lock.yaml | 25 +-
pnpm-workspace.yaml | 2 +
tests/behavior/helpers/document-api.ts | 2 +-
.../programmatic-tracked-change.spec.ts | 2 +-
46 files changed, 1859 insertions(+), 180 deletions(-)
create mode 100644 packages/super-editor/src/core/helpers/markdown/index.ts
create mode 100644 packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts
create mode 100644 packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts
create mode 100644 packages/super-editor/src/core/helpers/markdown/parseMarkdownAst.ts
create mode 100644 packages/super-editor/src/core/helpers/markdown/types.ts
create mode 100644 packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts
diff --git a/apps/cli/src/__tests__/cli.test.ts b/apps/cli/src/__tests__/cli.test.ts
index e2dc905d93..1685a0608e 100644
--- a/apps/cli/src/__tests__/cli.test.ts
+++ b/apps/cli/src/__tests__/cli.test.ts
@@ -279,11 +279,11 @@ describe('superdoc CLI', () => {
expect(result.stdout).not.toContain(' Document path or stdin');
});
- test('describe command doc.insert includes --target and --text flags', async () => {
+ test('describe command doc.insert includes --target and --value flags', async () => {
const result = await runCli(['describe', 'command', 'doc.insert', '--output', 'pretty']);
expect(result.code).toBe(0);
expect(result.stdout).toContain('--target');
- expect(result.stdout).toContain('--text');
+ expect(result.stdout).toContain('--value');
});
test('call executes an operation from canonical input payload', async () => {
@@ -501,7 +501,7 @@ describe('superdoc CLI', () => {
'--input-json',
JSON.stringify({
doc: source,
- text: 'CALL_INSERT_TOKEN_1597',
+ value: 'CALL_INSERT_TOKEN_1597',
out,
}),
]);
@@ -838,7 +838,7 @@ describe('superdoc CLI', () => {
insertSource,
'--target-json',
JSON.stringify(collapsedTarget),
- '--text',
+ '--value',
'CLI_INSERT_TOKEN_1597',
'--out',
insertOut,
@@ -861,7 +861,7 @@ describe('superdoc CLI', () => {
const insertResult = await runCli([
'insert',
insertSource,
- '--text',
+ '--value',
'CLI_DEFAULT_INSERT_TOKEN_1597',
'--out',
insertOut,
@@ -911,7 +911,7 @@ describe('superdoc CLI', () => {
const insertResult = await runCli([
'insert',
blankFirstOut,
- '--text',
+ '--value',
'CLI_BLANK_INSERT_TOKEN_1597',
'--out',
insertOut,
@@ -956,7 +956,7 @@ describe('superdoc CLI', () => {
target.blockId,
'--offset',
'0',
- '--text',
+ '--value',
'CLI_BLOCKID_OFFSET_INSERT_1597',
'--out',
insertOut,
@@ -989,7 +989,7 @@ describe('superdoc CLI', () => {
insertSource,
'--block-id',
target.blockId,
- '--text',
+ '--value',
'CLI_BLOCKID_ONLY_INSERT_1597',
'--out',
insertOut,
@@ -1012,7 +1012,16 @@ describe('superdoc CLI', () => {
const insertOut = join(TEST_DIR, 'insert-offset-no-blockid-out.docx');
await copyFile(SAMPLE_DOC, insertSource);
- const result = await runCli(['insert', insertSource, '--offset', '5', '--text', 'should-fail', '--out', insertOut]);
+ const result = await runCli([
+ 'insert',
+ insertSource,
+ '--offset',
+ '5',
+ '--value',
+ 'should-fail',
+ '--out',
+ insertOut,
+ ]);
expect(result.code).toBe(1);
const envelope = parseJsonOutput(result);
@@ -1208,7 +1217,7 @@ describe('superdoc CLI', () => {
deleteSource,
'--target-json',
JSON.stringify(collapsedTarget),
- '--text',
+ '--value',
'CLI_DELETE_TOKEN_1597',
'--out',
insertedOut,
@@ -1661,7 +1670,7 @@ describe('superdoc CLI', () => {
const openResult = await runCli(['open', SAMPLE_DOC]);
expect(openResult.code).toBe(0);
- const insertResult = await runCli(['insert', '--text', 'STATEFUL_DEFAULT_INSERT_1597']);
+ const insertResult = await runCli(['insert', '--value', 'STATEFUL_DEFAULT_INSERT_1597']);
expect(insertResult.code).toBe(0);
const insertEnvelope = parseJsonOutput<
@@ -1690,7 +1699,7 @@ describe('superdoc CLI', () => {
const insertResult = await runCli([
'insert',
- '--text',
+ '--value',
'STATEFUL_INSERT_EXPORT_FAILURE_1597',
'--out',
blockedOutPath,
@@ -1938,7 +1947,7 @@ describe('superdoc CLI', () => {
test('session save persists a specific session and keeps it open', async () => {
await runCli(['open', SAMPLE_DOC, '--session', 'alpha']);
- const insertResult = await runCli(['insert', '--session', 'alpha', '--text', 'SESSION_SAVE_TOKEN_1597']);
+ const insertResult = await runCli(['insert', '--session', 'alpha', '--value', 'SESSION_SAVE_TOKEN_1597']);
expect(insertResult.code).toBe(0);
const savedOut = join(TEST_DIR, 'session-save-alpha.docx');
@@ -1997,4 +2006,101 @@ describe('superdoc CLI', () => {
const findEnvelope = parseJsonOutput(findResult);
expect(findEnvelope.error.code).toBe('PROJECT_CONTEXT_MISMATCH');
});
+
+ // -- open --content-override / --override-type validation --
+
+ test('open rejects --content-override without --override-type', async () => {
+ const result = await runCli(['open', SAMPLE_DOC, '--content-override', '# Hello']);
+ expect(result.code).toBe(1);
+ const envelope = parseJsonOutput(result);
+ expect(envelope.error.code).toBe('INVALID_ARGUMENT');
+ expect(envelope.error.message).toContain('--override-type');
+ });
+
+ test('open rejects --override-type without --content-override', async () => {
+ const result = await runCli(['open', SAMPLE_DOC, '--override-type', 'markdown']);
+ expect(result.code).toBe(1);
+ const envelope = parseJsonOutput(result);
+ expect(envelope.error.code).toBe('INVALID_ARGUMENT');
+ expect(envelope.error.message).toContain('--content-override');
+ });
+
+ test('open rejects invalid --override-type value', async () => {
+ const result = await runCli(['open', SAMPLE_DOC, '--content-override', 'x', '--override-type', 'xml']);
+ expect(result.code).toBe(1);
+ const envelope = parseJsonOutput(result);
+ expect(envelope.error.code).toBe('INVALID_ARGUMENT');
+ expect(envelope.error.message).toContain('markdown, html, text');
+ });
+
+ test('open with --override-type text applies content semantically', async () => {
+ const openResult = await runCli([
+ 'open',
+ SAMPLE_DOC,
+ '--content-override',
+ 'Override text content',
+ '--override-type',
+ 'text',
+ ]);
+ expect(openResult.code).toBe(0);
+
+ // Verify the override text is actually present in the document
+ const findResult = await runCli(['find', '--type', 'text', '--pattern', 'Override text content']);
+ expect(findResult.code).toBe(0);
+ const findEnvelope = parseJsonOutput>(findResult);
+ expect(findEnvelope.data.result.total).toBeGreaterThan(0);
+
+ const closeResult = await runCli(['close', '--discard']);
+ expect(closeResult.code).toBe(0);
+ });
+
+ test('open with --override-type markdown applies content semantically', async () => {
+ const openResult = await runCli([
+ 'open',
+ SAMPLE_DOC,
+ '--content-override',
+ '# Markdown Override Heading',
+ '--override-type',
+ 'markdown',
+ ]);
+ expect(openResult.code).toBe(0);
+
+ // Verify the markdown content is present in the document
+ const findResult = await runCli(['find', '--type', 'text', '--pattern', 'Markdown Override Heading']);
+ expect(findResult.code).toBe(0);
+ const findEnvelope = parseJsonOutput>(findResult);
+ expect(findEnvelope.data.result.total).toBeGreaterThan(0);
+
+ const closeResult = await runCli(['close', '--discard']);
+ expect(closeResult.code).toBe(0);
+ });
+
+ test('open with --override-type html rejects in headless CLI', async () => {
+ const openResult = await runCli([
+ 'open',
+ SAMPLE_DOC,
+ '--content-override',
+ 'HTML Override
',
+ '--override-type',
+ 'html',
+ ]);
+ expect(openResult.code).toBe(1);
+ const envelope = parseJsonOutput(openResult);
+ expect(envelope.error.code).toBe('UNSUPPORTED_FORMAT');
+ expect(envelope.error.message).toContain('HTML');
+ });
+
+ test('open with --content-override empty string is accepted (not silently ignored)', async () => {
+ const openResult = await runCli(['open', SAMPLE_DOC, '--content-override', '', '--override-type', 'text']);
+ expect(openResult.code).toBe(0);
+
+ // Verify original document content was replaced (find for known original text should fail)
+ const findOriginal = await runCli(['find', '--type', 'text', '--pattern', 'Wilde']);
+ expect(findOriginal.code).toBe(0);
+ const findEnvelope = parseJsonOutput>(findOriginal);
+ expect(findEnvelope.data.result.total).toBe(0);
+
+ const closeResult = await runCli(['close', '--discard']);
+ expect(closeResult.code).toBe(0);
+ });
});
diff --git a/apps/cli/src/__tests__/conformance/harness.ts b/apps/cli/src/__tests__/conformance/harness.ts
index aa3fc6f80e..de55364c9c 100644
--- a/apps/cli/src/__tests__/conformance/harness.ts
+++ b/apps/cli/src/__tests__/conformance/harness.ts
@@ -294,7 +294,7 @@ export class ConformanceHarness {
sourceDoc,
'--target-json',
JSON.stringify(collapsedTarget),
- '--text',
+ '--value',
'TRACKED_CONFORMANCE_TOKEN',
'--change-mode',
'tracked',
diff --git a/apps/cli/src/__tests__/conformance/scenarios.ts b/apps/cli/src/__tests__/conformance/scenarios.ts
index f121023f26..ebdaf3d844 100644
--- a/apps/cli/src/__tests__/conformance/scenarios.ts
+++ b/apps/cli/src/__tests__/conformance/scenarios.ts
@@ -580,7 +580,7 @@ export const SUCCESS_SCENARIOS = {
docPath,
'--target-json',
JSON.stringify(collapsed),
- '--text',
+ '--value',
'CONFORMANCE_INSERT',
'--out',
harness.createOutputPath('doc-insert-output'),
diff --git a/apps/cli/src/__tests__/host.test.ts b/apps/cli/src/__tests__/host.test.ts
index 4a0b268753..ad2b75ede0 100644
--- a/apps/cli/src/__tests__/host.test.ts
+++ b/apps/cli/src/__tests__/host.test.ts
@@ -285,7 +285,7 @@ describe('CLI host mode', () => {
docPath,
'--target-json',
JSON.stringify(collapsedTarget),
- '--text',
+ '--value',
'HOST_CONFORMANCE_INSERT',
'--out',
path.join(stateDir, 'host-conformance-insert.docx'),
diff --git a/apps/cli/src/__tests__/lib/validate.test.ts b/apps/cli/src/__tests__/lib/validate.test.ts
index 4c902237b2..9a7aedd699 100644
--- a/apps/cli/src/__tests__/lib/validate.test.ts
+++ b/apps/cli/src/__tests__/lib/validate.test.ts
@@ -142,7 +142,7 @@ describe('validateCreateParagraphInput', () => {
expect(result.at).toEqual({
kind: 'before',
- nodeId: 'p1',
+ target: { kind: 'block', nodeType: 'paragraph', nodeId: 'p1' },
});
});
@@ -156,7 +156,7 @@ describe('validateCreateParagraphInput', () => {
expect(result.at).toEqual({
kind: 'after',
- nodeId: 'p2',
+ target: { kind: 'block', nodeType: 'paragraph', nodeId: 'p2' },
});
});
diff --git a/apps/cli/src/cli/cli-only-operation-definitions.ts b/apps/cli/src/cli/cli-only-operation-definitions.ts
index 6eb59d1ce6..309a0660ac 100644
--- a/apps/cli/src/cli/cli-only-operation-definitions.ts
+++ b/apps/cli/src/cli/cli-only-operation-definitions.ts
@@ -40,7 +40,8 @@ export interface CliOnlyOperationDefinition {
export const CLI_ONLY_OPERATION_DEFINITIONS: Record = {
open: {
category: 'lifecycle',
- description: 'Open a document and create a persistent editing session.',
+ description:
+ 'Open a document and create a persistent editing session. Optionally override the document body with contentOverride + overrideType (markdown, html, or text).',
requiresDocumentContext: false,
intentName: 'open_document',
sdkMetadata: { mutates: false, idempotency: 'non-idempotent', supportsTrackedMode: false, supportsDryRun: false },
diff --git a/apps/cli/src/cli/operation-params.ts b/apps/cli/src/cli/operation-params.ts
index 38495d73f1..15f615fb99 100644
--- a/apps/cli/src/cli/operation-params.ts
+++ b/apps/cli/src/cli/operation-params.ts
@@ -417,6 +417,8 @@ const CLI_ONLY_METADATA: Record = {
{ name: 'collaboration', kind: 'jsonFlag', flag: 'collaboration-json', type: 'json' },
{ name: 'collabDocumentId', kind: 'flag', flag: 'collab-document-id', type: 'string' },
{ name: 'collabUrl', kind: 'flag', flag: 'collab-url', type: 'string' },
+ { name: 'contentOverride', kind: 'flag', flag: 'content-override', type: 'string' },
+ { name: 'overrideType', kind: 'flag', flag: 'override-type', type: 'string' },
],
constraints: null,
},
diff --git a/apps/cli/src/commands/open.ts b/apps/cli/src/commands/open.ts
index 31d69e889e..f3df19e3de 100644
--- a/apps/cli/src/commands/open.ts
+++ b/apps/cli/src/commands/open.ts
@@ -16,6 +16,13 @@ import { parseOperationArgs } from '../lib/operation-args';
import { generateSessionId } from '../lib/session';
import type { CommandContext, CommandExecution } from '../lib/types';
+const VALID_OVERRIDE_TYPES = new Set(['markdown', 'html', 'text']);
+
+/** Escape CommonMark special characters so the text is treated as literal. */
+function escapeMarkdown(str: string): string {
+ return str.replace(/([\\`*_{}[\]()#+\-.!|>~])/g, '\\$1');
+}
+
export async function runOpen(tokens: string[], context: CommandContext): Promise {
const { parsed, help } = parseOperationArgs('doc.open', tokens, {
commandName: 'open',
@@ -28,12 +35,14 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis
data: {
usage: [
'superdoc open [doc] [--session ]',
+ 'superdoc open [doc] --content-override --override-type ',
'superdoc open [doc] --collaboration-json "{...}" [--session ]',
],
},
pretty: [
'Usage:',
' superdoc open [doc] [--session ]',
+ ' superdoc open [doc] --content-override --override-type ',
' superdoc open [doc] --collaboration-json "{...}" [--session ]',
].join('\n'),
};
@@ -45,6 +54,23 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis
const collaborationPayload = await resolveJsonInput(parsed, 'collaboration');
const collabUrl = getStringOption(parsed, 'collab-url');
const collabDocumentId = getStringOption(parsed, 'collab-document-id');
+ const contentOverride = getStringOption(parsed, 'content-override');
+ const overrideType = getStringOption(parsed, 'override-type');
+
+ // Validate contentOverride / overrideType co-requirement.
+ // Use != null checks so that intentional empty-string overrides are honored.
+ if (contentOverride != null && !overrideType) {
+ throw new CliError('INVALID_ARGUMENT', 'open: --content-override requires --override-type.');
+ }
+ if (overrideType && contentOverride == null) {
+ throw new CliError('INVALID_ARGUMENT', 'open: --override-type requires --content-override.');
+ }
+ if (overrideType && !VALID_OVERRIDE_TYPES.has(overrideType)) {
+ throw new CliError(
+ 'INVALID_ARGUMENT',
+ `open: --override-type must be one of: markdown, html, text. Got "${overrideType}".`,
+ );
+ }
if (collaborationPayload != null && (collabUrl || collabDocumentId)) {
throw new CliError(
@@ -53,6 +79,14 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis
);
}
+ // Content override is incompatible with collaboration mode
+ if (contentOverride != null && (collaborationPayload != null || collabUrl)) {
+ throw new CliError(
+ 'INVALID_ARGUMENT',
+ 'open: --content-override is incompatible with collaboration mode. Content override is a template-initialization operation.',
+ );
+ }
+
let collaborationInput;
if (collaborationPayload != null) {
collaborationInput = parseCollaborationInput(collaborationPayload);
@@ -69,6 +103,21 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis
const collaboration = collaborationInput ? resolveCollaborationProfile(collaborationInput, sessionId) : undefined;
const sessionType = collaboration ? 'collab' : 'local';
+ // Build editor open options from override params
+ const editorOpenOptions: Record = {};
+ if (contentOverride != null && overrideType) {
+ if (overrideType === 'markdown') {
+ editorOpenOptions.markdown = contentOverride;
+ } else if (overrideType === 'html') {
+ editorOpenOptions.html = contentOverride;
+ } else if (overrideType === 'text') {
+ // Route through the markdown pipeline which is DOM-free (AST-based),
+ // so it works in headless CLI mode. Escape markdown syntax characters
+ // so the content is treated as literal text, not interpreted as formatting.
+ editorOpenOptions.markdown = escapeMarkdown(contentOverride);
+ }
+ }
+
return withContextLock(
context.io,
'open',
@@ -104,7 +153,7 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis
const opened = collaboration
? await openCollaborativeDocument(doc!, context.io, collaboration)
- : await openDocument(doc, context.io);
+ : await openDocument(doc, context.io, { editorOpenOptions });
let adoptedToHostPool = false;
try {
const output = await exportToPath(opened.editor, paths.workingDocPath, true);
diff --git a/apps/cli/src/lib/document.ts b/apps/cli/src/lib/document.ts
index a345ef9213..e92bbcd41a 100644
--- a/apps/cli/src/lib/document.ts
+++ b/apps/cli/src/lib/document.ts
@@ -3,6 +3,7 @@ import { createHash } from 'node:crypto';
import { Editor } from 'superdoc/super-editor';
import { BLANK_DOCX_BASE64 } from '@superdoc/super-editor/blank-docx';
import { getDocumentApiAdapters } from '@superdoc/super-editor/document-api-adapters';
+import { markdownToPmDoc } from '@superdoc/super-editor/markdown';
import { createDocumentApi, type DocumentApi } from '@superdoc/document-api';
import type { CollaborationProfile } from './collaboration';
@@ -27,6 +28,8 @@ interface OpenDocumentOptions {
documentId?: string;
ydoc?: unknown;
collaborationProvider?: unknown;
+ /** Options passed through to Editor.open() (e.g., markdown/html for content override). */
+ editorOpenOptions?: Record;
}
export interface FileOutputMeta {
@@ -98,6 +101,21 @@ export async function openDocument(
meta = { source: 'blank', byteLength: source.byteLength };
}
+ // Separate content overrides from options passed to Editor.open().
+ // The Editor's built-in markdown/html init paths (in the dist bundle) route
+ // through an HTML-based pipeline that requires DOM. In headless CLI mode
+ // there is no DOM, so we intercept them here:
+ // - markdown: applied post-init via the AST-based markdownToPmDoc pipeline (DOM-free)
+ // - html: rejected with a clear error (no DOM-free HTML pipeline exists)
+ const { markdown: markdownOverride, html: htmlOverride, ...passThroughEditorOpts } = options.editorOpenOptions ?? {};
+
+ if (htmlOverride != null) {
+ throw new CliError(
+ 'UNSUPPORTED_FORMAT',
+ 'HTML content override is not supported in headless CLI mode (requires DOM). Use --override-type markdown instead.',
+ );
+ }
+
let editor: Editor;
try {
const isTest = process.env.NODE_ENV === 'test';
@@ -107,6 +125,7 @@ export async function openDocument(
...(isTest ? { telemetry: { enabled: false } } : {}),
ydoc: options.ydoc,
...(options.collaborationProvider != null ? { collaborationProvider: options.collaborationProvider } : {}),
+ ...passThroughEditorOpts,
});
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
@@ -116,6 +135,24 @@ export async function openDocument(
});
}
+ // Apply markdown content override post-init (DOM-free AST pipeline).
+ if (markdownOverride != null) {
+ try {
+ const { doc: newDoc } = markdownToPmDoc(markdownOverride, editor);
+ const tr = editor.state.tr;
+ // The PM Fragment type is opaque at the CLI boundary — cast through unknown.
+ tr.replaceWith(0, editor.state.doc.content.size, newDoc.content as any);
+ editor.dispatch(tr);
+ } catch (error) {
+ editor.destroy();
+ const message = error instanceof Error ? error.message : String(error);
+ throw new CliError('DOCUMENT_OPEN_FAILED', 'Failed to apply content override.', {
+ message,
+ source: meta,
+ });
+ }
+ }
+
const adapters = getDocumentApiAdapters(editor);
const docApi = createDocumentApi(adapters);
Object.defineProperty(editor, 'doc', { value: docApi, configurable: true, writable: true });
diff --git a/apps/cli/src/lib/errors.ts b/apps/cli/src/lib/errors.ts
index 5a1f4c9f6f..8bc194a12c 100644
--- a/apps/cli/src/lib/errors.ts
+++ b/apps/cli/src/lib/errors.ts
@@ -25,6 +25,7 @@ export type CliErrorCode =
| 'TRACK_CHANGE_COMMAND_UNAVAILABLE'
| 'TRACK_CHANGE_CONFLICT'
| 'COMMAND_FAILED'
+ | 'UNSUPPORTED_FORMAT'
| 'TIMEOUT'
// Plan-engine error codes — passed through from document-api adapters
| 'REVISION_CHANGED_SINCE_COMPILE'
diff --git a/apps/cli/src/lib/validate.ts b/apps/cli/src/lib/validate.ts
index 5d678f8d3d..81cfab8811 100644
--- a/apps/cli/src/lib/validate.ts
+++ b/apps/cli/src/lib/validate.ts
@@ -280,15 +280,19 @@ function validateCreateParagraphLocation(value: unknown, path: string): NonNulla
expectOnlyKeys(obj, ['kind', 'nodeId'], path);
const nodeId = expectString(obj.nodeId, `${path}.nodeId`);
+ // nodeId shorthand: wrap in a BlockNodeAddress with nodeType 'paragraph'
+ // as a default. The adapter falls back to nodeId-only lookup when the
+ // full nodeType:nodeId key doesn't match, so this works for any block type.
+ const target = { kind: 'block' as const, nodeType: 'paragraph' as const, nodeId };
if (kind === 'before') {
return {
kind: 'before',
- nodeId,
+ target,
};
}
return {
kind: 'after',
- nodeId,
+ target,
};
}
diff --git a/apps/cli/src/types/super-editor-adapters.d.ts b/apps/cli/src/types/super-editor-adapters.d.ts
index 2ef5ddad00..f540a99d69 100644
--- a/apps/cli/src/types/super-editor-adapters.d.ts
+++ b/apps/cli/src/types/super-editor-adapters.d.ts
@@ -1,8 +1,8 @@
/**
- * Ambient module declaration for the super-editor adapter bridge.
+ * Ambient module declarations for the super-editor bridge.
*
- * At runtime, bun resolves this via the tsconfig `paths` mapping.
- * For typecheck (`tsc --noEmit`), this declaration provides the type
+ * At runtime, bun resolves these via the tsconfig `paths` mappings.
+ * For typecheck (`tsc --noEmit`), these declarations provide the type
* surface without pulling in the super-editor source tree (which uses
* internal path aliases that only its own tsconfig maps).
*/
@@ -17,3 +17,21 @@ declare module '@superdoc/super-editor/document-api-adapters' {
*/
export function getDocumentApiAdapters(editor: unknown): DocumentApiAdapters;
}
+
+declare module '@superdoc/super-editor/markdown' {
+ interface MarkdownConversionResult {
+ /** ProseMirror doc node (typed minimally to avoid PM dependency at the CLI boundary). */
+ doc: { readonly content: unknown };
+ diagnostics: Array<{ nodeType: string; message: string }>;
+ }
+
+ /**
+ * Parse Markdown to a full ProseMirror document node via the AST pipeline
+ * (remark-parse → mdast → PM JSON). DOM-free — works in headless environments.
+ */
+ export function markdownToPmDoc(
+ markdown: string,
+ editor: unknown,
+ options?: { dryRun?: boolean },
+ ): MarkdownConversionResult;
+}
diff --git a/apps/cli/tsconfig.json b/apps/cli/tsconfig.json
index eaee51654a..270d602c31 100644
--- a/apps/cli/tsconfig.json
+++ b/apps/cli/tsconfig.json
@@ -10,6 +10,7 @@
"@superdoc/super-editor/document-api-adapters": [
"../../packages/super-editor/src/document-api-adapters/index.ts"
],
+ "@superdoc/super-editor/markdown": ["../../packages/super-editor/src/core/helpers/markdown/index.ts"],
"@superdoc/super-editor/blank-docx": ["../../packages/super-editor/src/core/blank-docx.ts"]
}
},
diff --git a/apps/docs/document-api/common-workflows.mdx b/apps/docs/document-api/common-workflows.mdx
index 4273e389ce..24e5b8cfe6 100644
--- a/apps/docs/document-api/common-workflows.mdx
+++ b/apps/docs/document-api/common-workflows.mdx
@@ -127,7 +127,7 @@ if (caps.operations['format.apply'].available) {
}
if (caps.global.trackChanges.enabled) {
- editor.doc.insert({ text: 'tracked' }, { changeMode: 'tracked' });
+ editor.doc.insert({ value: 'tracked' }, { changeMode: 'tracked' });
}
```
@@ -137,7 +137,7 @@ Pass `dryRun: true` to validate an operation without applying it:
```ts
const preview = editor.doc.insert(
- { target, text: 'hello' },
+ { target, value: 'hello' },
{ dryRun: true },
);
// preview.success tells you whether the insert would succeed
diff --git a/apps/docs/document-api/reference/index.mdx b/apps/docs/document-api/reference/index.mdx
index 2ce3a62fff..fa4205236d 100644
--- a/apps/docs/document-api/reference/index.mdx
+++ b/apps/docs/document-api/reference/index.mdx
@@ -46,7 +46,7 @@ The tables below are grouped by namespace.
| getNodeById | editor.doc.getNodeById(...) | Retrieve a single node by its unique ID. |
| getText | editor.doc.getText(...) | Extract the plain-text content of the document. |
| info | editor.doc.info(...) | Return document metadata including revision, node count, and capabilities. |
-| insert | editor.doc.insert(...) | Insert text or inline content at a target position. |
+| insert | editor.doc.insert(...) | Insert content at a target position. Supports text (default), markdown, and html content types via the `type` field. |
| replace | editor.doc.replace(...) | Replace content at a target position with new text or inline content. |
| delete | editor.doc.delete(...) | Delete content at a target position. |
diff --git a/apps/docs/document-api/reference/insert.mdx b/apps/docs/document-api/reference/insert.mdx
index b4dbeb9ef8..44cf8d9ff5 100644
--- a/apps/docs/document-api/reference/insert.mdx
+++ b/apps/docs/document-api/reference/insert.mdx
@@ -23,7 +23,8 @@ description: Reference for insert
| Field | Type | Required | Description |
| --- | --- | --- | --- |
| `target` | TextAddress | no | TextAddress |
-| `text` | string | yes | |
+| `type` | enum | no | `"text"`, `"markdown"`, `"html"` |
+| `value` | string | yes | |
### Example request
@@ -37,7 +38,8 @@ description: Reference for insert
"start": 0
}
},
- "text": "Hello, world."
+ "type": "text",
+ "value": "example"
}
```
@@ -100,6 +102,7 @@ _No fields._
- `INVALID_TARGET`
- `NO_OP`
+- `CAPABILITY_UNAVAILABLE`
## Raw schemas
@@ -111,12 +114,20 @@ _No fields._
"target": {
"$ref": "#/$defs/TextAddress"
},
- "text": {
+ "type": {
+ "enum": [
+ "text",
+ "markdown",
+ "html"
+ ],
+ "type": "string"
+ },
+ "value": {
"type": "string"
}
},
"required": [
- "text"
+ "value"
],
"type": "object"
}
@@ -139,7 +150,8 @@ _No fields._
"code": {
"enum": [
"INVALID_TARGET",
- "NO_OP"
+ "NO_OP",
+ "CAPABILITY_UNAVAILABLE"
]
},
"details": {},
@@ -191,7 +203,8 @@ _No fields._
"code": {
"enum": [
"INVALID_TARGET",
- "NO_OP"
+ "NO_OP",
+ "CAPABILITY_UNAVAILABLE"
]
},
"details": {},
diff --git a/apps/docs/document-engine/sdks.mdx b/apps/docs/document-engine/sdks.mdx
index e985eadc87..e61c188423 100644
--- a/apps/docs/document-engine/sdks.mdx
+++ b/apps/docs/document-engine/sdks.mdx
@@ -128,7 +128,7 @@ The SDKs expose all operations from the [Document API](/document-api/overview) p
| Operation | CLI command | Description |
| --- | --- | --- |
-| `doc.open` | `open` | Open a document and create a persistent editing session. |
+| `doc.open` | `open` | Open a document and create a persistent editing session. Optionally override the document body with contentOverride + overrideType (markdown, html, or text). |
| `doc.save` | `save` | Save the current session to the original file or a new path. |
| `doc.close` | `close` | Close the active editing session and clean up resources. |
@@ -148,7 +148,7 @@ The SDKs expose all operations from the [Document API](/document-api/overview) p
| Operation | CLI command | Description |
| --- | --- | --- |
-| `doc.insert` | `insert` | Insert text or inline content at a target position. |
+| `doc.insert` | `insert` | Insert content at a target position. Supports text (default), markdown, and html content types via the `type` field. |
| `doc.replace` | `replace` | Replace content at a target position with new text or inline content. |
| `doc.delete` | `delete` | Delete content at a target position. |
| `doc.mutations.apply` | `mutations apply` | Execute a mutation plan atomically against the document. |
diff --git a/packages/document-api/src/README.md b/packages/document-api/src/README.md
index 6038033d93..aa79cdfcde 100644
--- a/packages/document-api/src/README.md
+++ b/packages/document-api/src/README.md
@@ -102,7 +102,7 @@ Insert text as a tracked change so reviewers can accept or reject it:
```ts
const receipt = editor.doc.insert(
- { text: 'new content' },
+ { value: 'new content' },
{ changeMode: 'tracked' },
);
// receipt.resolution.target contains the resolved insertion point
@@ -147,7 +147,7 @@ if (caps.operations['format.apply'].available) {
editor.doc.format.apply({ target, inline: { bold: true } });
}
if (caps.global.trackChanges.enabled) {
- editor.doc.insert({ text: 'tracked' }, { changeMode: 'tracked' });
+ editor.doc.insert({ value: 'tracked' }, { changeMode: 'tracked' });
}
if (caps.operations['create.heading'].dryRun) {
const preview = editor.doc.create.heading(
diff --git a/packages/document-api/src/contract/contract.test.ts b/packages/document-api/src/contract/contract.test.ts
index 79852c1519..b96444d8c3 100644
--- a/packages/document-api/src/contract/contract.test.ts
+++ b/packages/document-api/src/contract/contract.test.ts
@@ -83,10 +83,10 @@ describe('document-api contract catalog', () => {
additionalProperties?: boolean;
};
- // Simplified schema: target (optional) + text (required), no allOf constraints
+ // Simplified schema: target (optional) + value (required) + type (optional enum), no allOf constraints
expect(insertInputSchema.type).toBe('object');
- expect(Object.keys(insertInputSchema.properties!).sort()).toEqual(['target', 'text']);
- expect(insertInputSchema.required).toEqual(['text']);
+ expect(Object.keys(insertInputSchema.properties!).sort()).toEqual(['target', 'type', 'value']);
+ expect(insertInputSchema.required).toEqual(['value']);
expect(insertInputSchema.allOf).toBeUndefined();
expect(insertInputSchema.additionalProperties).toBe(false);
});
diff --git a/packages/document-api/src/contract/operation-definitions.ts b/packages/document-api/src/contract/operation-definitions.ts
index c3a89b1793..649e8019e2 100644
--- a/packages/document-api/src/contract/operation-definitions.ts
+++ b/packages/document-api/src/contract/operation-definitions.ts
@@ -201,13 +201,14 @@ export const OPERATION_DEFINITIONS = {
insert: {
memberPath: 'insert',
- description: 'Insert text or inline content at a target position.',
+ description:
+ 'Insert content at a target position. Supports text (default), markdown, and html content types via the `type` field.',
requiresDocumentContext: true,
metadata: mutationOperation({
idempotency: 'non-idempotent',
supportsDryRun: true,
supportsTrackedMode: true,
- possibleFailureCodes: ['INVALID_TARGET', 'NO_OP'],
+ possibleFailureCodes: ['INVALID_TARGET', 'NO_OP', 'CAPABILITY_UNAVAILABLE'],
throws: [...T_NOT_FOUND_CAPABLE, 'INVALID_TARGET'],
}),
referenceDocPath: 'insert.mdx',
diff --git a/packages/document-api/src/contract/schemas.ts b/packages/document-api/src/contract/schemas.ts
index 290b2efe55..ca6a913094 100644
--- a/packages/document-api/src/contract/schemas.ts
+++ b/packages/document-api/src/contract/schemas.ts
@@ -894,9 +894,10 @@ const strictEmptyObjectSchema = objectSchema({});
const insertInputSchema = objectSchema(
{
target: textAddressSchema,
- text: { type: 'string' },
+ value: { type: 'string' },
+ type: { type: 'string', enum: ['text', 'markdown', 'html'] },
},
- ['text'],
+ ['value'],
);
// ---------------------------------------------------------------------------
diff --git a/packages/document-api/src/index.test.ts b/packages/document-api/src/index.test.ts
index 348459a138..0a8c9db33e 100644
--- a/packages/document-api/src/index.test.ts
+++ b/packages/document-api/src/index.test.ts
@@ -92,15 +92,17 @@ function makeCommentsAdapter(): CommentsAdapter {
}
function makeWriteAdapter(): WriteAdapter {
+ const defaultReceipt = {
+ success: true as const,
+ resolution: {
+ target: { kind: 'text' as const, blockId: 'p1', range: { start: 0, end: 0 } },
+ range: { from: 1, to: 1 },
+ text: '',
+ },
+ };
return {
- write: vi.fn(() => ({
- success: true as const,
- resolution: {
- target: { kind: 'text' as const, blockId: 'p1', range: { start: 0, end: 0 } },
- range: { from: 1, to: 1 },
- text: '',
- },
- })),
+ write: vi.fn(() => defaultReceipt),
+ insertStructured: vi.fn(() => defaultReceipt),
};
}
@@ -537,19 +539,19 @@ describe('createDocumentApi', () => {
});
const target = { kind: 'text', blockId: 'p1', range: { start: 0, end: 2 } } as const;
- api.insert({ text: 'Hi' });
- api.insert({ target, text: 'Yo' });
+ api.insert({ value: 'Hi' });
+ api.insert({ target, value: 'Yo' });
api.replace({ target, text: 'Hello' }, { changeMode: 'tracked' });
api.delete({ target });
expect(writeAdpt.write).toHaveBeenNthCalledWith(
1,
- { kind: 'insert', text: 'Hi' },
+ { kind: 'insert', text: 'Hi' }, // write request keeps `text` (internal protocol)
{ changeMode: 'direct', dryRun: false },
);
expect(writeAdpt.write).toHaveBeenNthCalledWith(
2,
- { kind: 'insert', target, text: 'Yo' },
+ { kind: 'insert', target, text: 'Yo' }, // write request keeps `text` (internal protocol)
{ changeMode: 'direct', dryRun: false },
);
expect(writeAdpt.write).toHaveBeenNthCalledWith(
@@ -1063,14 +1065,14 @@ describe('createDocumentApi', () => {
it('accepts no-target (default insertion point)', () => {
const api = makeApi();
- const result = api.insert({ text: 'hello' });
+ const result = api.insert({ value: 'hello' });
expect(result.success).toBe(true);
});
it('accepts canonical target', () => {
const api = makeApi();
const target = { kind: 'text', blockId: 'p1', range: { start: 0, end: 0 } } as const;
- const result = api.insert({ target, text: 'hello' });
+ const result = api.insert({ target, value: 'hello' });
expect(result.success).toBe(true);
});
@@ -1079,7 +1081,7 @@ describe('createDocumentApi', () => {
it('rejects null target', () => {
const api = makeApi();
expectValidationError(
- () => api.insert({ target: null, text: 'hello' } as any),
+ () => api.insert({ target: null, value: 'hello' } as any),
'target must be a text address object',
);
});
@@ -1087,16 +1089,21 @@ describe('createDocumentApi', () => {
it('rejects malformed target objects', () => {
const api = makeApi();
expectValidationError(
- () => api.insert({ target: { kind: 'text', blockId: 'p1' }, text: 'hello' } as any),
+ () => api.insert({ target: { kind: 'text', blockId: 'p1' }, value: 'hello' } as any),
'target must be a text address object',
);
});
// -- Type checks --
- it('rejects non-string text', () => {
+ it('rejects non-string value', () => {
+ const api = makeApi();
+ expectValidationError(() => api.insert({ value: 42 } as any), 'value must be a string');
+ });
+
+ it('rejects invalid type enum', () => {
const api = makeApi();
- expectValidationError(() => api.insert({ text: 42 } as any), 'text must be a string');
+ expectValidationError(() => api.insert({ value: 'hi', type: 'xml' } as any), 'type must be one of');
});
// -- Validation error shape --
@@ -1104,7 +1111,7 @@ describe('createDocumentApi', () => {
it('throws DocumentApiValidationError (not plain Error)', () => {
const api = makeApi();
try {
- api.insert({ text: 42 } as any);
+ api.insert({ value: 42 } as any);
expect.fail('Expected error');
} catch (err: unknown) {
expect((err as Error).constructor.name).toBe('DocumentApiValidationError');
@@ -1133,27 +1140,27 @@ describe('createDocumentApi', () => {
it('rejects unknown top-level fields', () => {
const api = makeApi();
- expectValidationError(() => api.insert({ text: 'hi', block_id: 'abc' } as any), 'Unknown field "block_id"');
+ expectValidationError(() => api.insert({ value: 'hi', block_id: 'abc' } as any), 'Unknown field "block_id"');
});
it('rejects flat blockId as unknown field', () => {
const api = makeApi();
- expectValidationError(() => api.insert({ blockId: 'p1', text: 'hello' } as any), 'Unknown field "blockId"');
+ expectValidationError(() => api.insert({ blockId: 'p1', value: 'hello' } as any), 'Unknown field "blockId"');
});
it('rejects flat offset as unknown field', () => {
const api = makeApi();
- expectValidationError(() => api.insert({ text: 'hello', offset: 5 } as any), 'Unknown field "offset"');
+ expectValidationError(() => api.insert({ value: 'hello', offset: 5 } as any), 'Unknown field "offset"');
});
it('rejects pos as unknown field', () => {
const api = makeApi();
- expectValidationError(() => api.insert({ text: 'hi', pos: 3 } as any), 'Unknown field "pos"');
+ expectValidationError(() => api.insert({ value: 'hi', pos: 3 } as any), 'Unknown field "pos"');
});
// -- Backward compatibility parity --
- it('sends same adapter request for insert({ text }) as before', () => {
+ it('maps insert({ value }) to internal write request with text field', () => {
const writeAdpt = makeWriteAdapter();
const api = createDocumentApi({
find: makeFindAdapter(QUERY_RESULT),
@@ -1169,14 +1176,14 @@ describe('createDocumentApi', () => {
lists: makeListsAdapter(),
});
- api.insert({ text: 'hello' });
+ api.insert({ value: 'hello' });
expect(writeAdpt.write).toHaveBeenCalledWith(
{ kind: 'insert', text: 'hello' },
{ changeMode: 'direct', dryRun: false },
);
});
- it('sends same adapter request for insert({ target, text }) as before', () => {
+ it('maps insert({ target, value }) to internal write request with text field', () => {
const writeAdpt = makeWriteAdapter();
const api = createDocumentApi({
find: makeFindAdapter(QUERY_RESULT),
@@ -1193,12 +1200,103 @@ describe('createDocumentApi', () => {
});
const target = { kind: 'text', blockId: 'p1', range: { start: 0, end: 2 } } as const;
- api.insert({ target, text: 'hello' });
+ api.insert({ target, value: 'hello' });
expect(writeAdpt.write).toHaveBeenCalledWith(
{ kind: 'insert', target, text: 'hello' },
{ changeMode: 'direct', dryRun: false },
);
});
+
+ // -- Structured insert routing (markdown / html) --
+
+ it('routes type:"markdown" insert to insertStructured instead of write', () => {
+ const writeAdpt = makeWriteAdapter();
+ const api = createDocumentApi({
+ find: makeFindAdapter(QUERY_RESULT),
+ getNode: makeGetNodeAdapter(PARAGRAPH_INFO),
+ getText: makeGetTextAdapter(),
+ info: makeInfoAdapter(),
+ capabilities: makeCapabilitiesAdapter(),
+ comments: makeCommentsAdapter(),
+ write: writeAdpt,
+ format: makeFormatAdapter(),
+ trackChanges: makeTrackChangesAdapter(),
+ create: makeCreateAdapter(),
+ lists: makeListsAdapter(),
+ });
+
+ api.insert({ value: '# Heading', type: 'markdown' });
+ expect(writeAdpt.insertStructured).toHaveBeenCalledTimes(1);
+ expect(writeAdpt.insertStructured).toHaveBeenCalledWith({ value: '# Heading', type: 'markdown' }, undefined);
+ expect(writeAdpt.write).not.toHaveBeenCalled();
+ });
+
+ it('routes type:"html" insert to insertStructured instead of write', () => {
+ const writeAdpt = makeWriteAdapter();
+ const api = createDocumentApi({
+ find: makeFindAdapter(QUERY_RESULT),
+ getNode: makeGetNodeAdapter(PARAGRAPH_INFO),
+ getText: makeGetTextAdapter(),
+ info: makeInfoAdapter(),
+ capabilities: makeCapabilitiesAdapter(),
+ comments: makeCommentsAdapter(),
+ write: writeAdpt,
+ format: makeFormatAdapter(),
+ trackChanges: makeTrackChangesAdapter(),
+ create: makeCreateAdapter(),
+ lists: makeListsAdapter(),
+ });
+
+ api.insert({ value: 'Hello
', type: 'html' });
+ expect(writeAdpt.insertStructured).toHaveBeenCalledTimes(1);
+ expect(writeAdpt.insertStructured).toHaveBeenCalledWith({ value: 'Hello
', type: 'html' }, undefined);
+ expect(writeAdpt.write).not.toHaveBeenCalled();
+ });
+
+ it('routes type:"text" (or unspecified type) insert to write, not insertStructured', () => {
+ const writeAdpt = makeWriteAdapter();
+ const api = createDocumentApi({
+ find: makeFindAdapter(QUERY_RESULT),
+ getNode: makeGetNodeAdapter(PARAGRAPH_INFO),
+ getText: makeGetTextAdapter(),
+ info: makeInfoAdapter(),
+ capabilities: makeCapabilitiesAdapter(),
+ comments: makeCommentsAdapter(),
+ write: writeAdpt,
+ format: makeFormatAdapter(),
+ trackChanges: makeTrackChangesAdapter(),
+ create: makeCreateAdapter(),
+ lists: makeListsAdapter(),
+ });
+
+ api.insert({ value: 'plain text', type: 'text' });
+ expect(writeAdpt.write).toHaveBeenCalledTimes(1);
+ expect(writeAdpt.insertStructured).not.toHaveBeenCalled();
+ });
+
+ it('forwards target to insertStructured for markdown insert', () => {
+ const writeAdpt = makeWriteAdapter();
+ const api = createDocumentApi({
+ find: makeFindAdapter(QUERY_RESULT),
+ getNode: makeGetNodeAdapter(PARAGRAPH_INFO),
+ getText: makeGetTextAdapter(),
+ info: makeInfoAdapter(),
+ capabilities: makeCapabilitiesAdapter(),
+ comments: makeCommentsAdapter(),
+ write: writeAdpt,
+ format: makeFormatAdapter(),
+ trackChanges: makeTrackChangesAdapter(),
+ create: makeCreateAdapter(),
+ lists: makeListsAdapter(),
+ });
+
+ const target = { kind: 'text', blockId: 'p1', range: { start: 0, end: 0 } } as const;
+ api.insert({ target, value: '**bold**', type: 'markdown' });
+ expect(writeAdpt.insertStructured).toHaveBeenCalledWith(
+ { target, value: '**bold**', type: 'markdown' },
+ undefined,
+ );
+ });
});
describe('replace target validation', () => {
diff --git a/packages/document-api/src/index.ts b/packages/document-api/src/index.ts
index e574c43bc4..0a394af436 100644
--- a/packages/document-api/src/index.ts
+++ b/packages/document-api/src/index.ts
@@ -262,7 +262,7 @@ export type {
} from './comments/comments.js';
export type { CommentInfo, CommentsListQuery, CommentsListResult } from './comments/comments.types.js';
export { DocumentApiValidationError } from './errors.js';
-export type { InsertInput } from './insert/insert.js';
+export type { InsertInput, InsertContentType } from './insert/insert.js';
export type { ReplaceInput } from './replace/replace.js';
export type { DeleteInput } from './delete/delete.js';
diff --git a/packages/document-api/src/insert/insert.ts b/packages/document-api/src/insert/insert.ts
index bb54330371..994dc7da02 100644
--- a/packages/document-api/src/insert/insert.ts
+++ b/packages/document-api/src/insert/insert.ts
@@ -3,16 +3,26 @@ import type { TextAddress, TextMutationReceipt } from '../types/index.js';
import { DocumentApiValidationError } from '../errors.js';
import { isRecord, isTextAddress, assertNoUnknownFields } from '../validation-primitives.js';
+/** Content format for the insert operation payload. */
+export type InsertContentType = 'text' | 'markdown' | 'html';
+
+/** Input payload for the `doc.insert` operation. */
export interface InsertInput {
+ /** Optional insertion target. When omitted, adapters resolve a default insertion point. */
target?: TextAddress;
- text: string;
+ /** The content to insert. Interpreted according to {@link InsertInput.type}. */
+ value: string;
+ /** Content format. Defaults to `'text'` when omitted. */
+ type?: InsertContentType;
}
/**
* Strict top-level allowlist for InsertInput fields.
* Any key not in this list is rejected as an unknown field.
*/
-const INSERT_INPUT_ALLOWED_KEYS = new Set(['text', 'target']);
+const INSERT_INPUT_ALLOWED_KEYS = new Set(['value', 'type', 'target']);
+
+const VALID_INSERT_TYPES: ReadonlySet = new Set(['text', 'markdown', 'html']);
/**
* Validates InsertInput and throws DocumentApiValidationError on violations.
@@ -21,7 +31,8 @@ const INSERT_INPUT_ALLOWED_KEYS = new Set(['text', 'target']);
* 0. Input shape guard (must be non-null plain object)
* 1. Unknown field rejection (strict allowlist)
* 2. Target type check (target shape)
- * 3. Text type check
+ * 3. Value type check (must be non-empty string)
+ * 4. Type enum check (must be valid content type)
*/
function validateInsertInput(input: unknown): asserts input is InsertInput {
// Step 0: Input shape guard
@@ -32,7 +43,7 @@ function validateInsertInput(input: unknown): asserts input is InsertInput {
// Step 1: Unknown field rejection (strict allowlist)
assertNoUnknownFields(input, INSERT_INPUT_ALLOWED_KEYS, 'insert');
- const { target, text } = input;
+ const { target, value, type } = input;
// Step 2: Target type check
if (target !== undefined && !isTextAddress(target)) {
@@ -42,13 +53,25 @@ function validateInsertInput(input: unknown): asserts input is InsertInput {
});
}
- // Step 3: Text type check
- if (typeof text !== 'string') {
- throw new DocumentApiValidationError('INVALID_TARGET', `text must be a string, got ${typeof text}.`, {
- field: 'text',
- value: text,
+ // Step 3: Value type check
+ if (typeof value !== 'string') {
+ throw new DocumentApiValidationError('INVALID_TARGET', `value must be a string, got ${typeof value}.`, {
+ field: 'value',
+ value,
});
}
+
+ // Step 4: Type enum check
+ if (type !== undefined && (typeof type !== 'string' || !VALID_INSERT_TYPES.has(type))) {
+ throw new DocumentApiValidationError(
+ 'INVALID_TARGET',
+ `type must be one of: text, markdown, html. Got "${type}".`,
+ {
+ field: 'type',
+ value: type,
+ },
+ );
+ }
}
export function executeInsert(
@@ -58,10 +81,17 @@ export function executeInsert(
): TextMutationReceipt {
validateInsertInput(input);
- const { target, text } = input;
+ const { target, value } = input;
+ const contentType = input.type ?? 'text';
+
+ // For non-text content types, delegate to the adapter's structured insert path.
+ // The adapter (plan-wrappers) handles markdown/html conversion and block insertion.
+ if (contentType !== 'text') {
+ return adapter.insertStructured(input, options);
+ }
- // Canonical target or no-target (default insertion point)
- const request = target ? { kind: 'insert' as const, target, text } : { kind: 'insert' as const, text };
+ // Text path: use the existing write pipeline
+ const request = target ? { kind: 'insert' as const, target, text: value } : { kind: 'insert' as const, text: value };
return executeWrite(adapter, request, options);
}
diff --git a/packages/document-api/src/invoke/invoke.test.ts b/packages/document-api/src/invoke/invoke.test.ts
index b395f0be5c..08d7f4623a 100644
--- a/packages/document-api/src/invoke/invoke.test.ts
+++ b/packages/document-api/src/invoke/invoke.test.ts
@@ -75,6 +75,14 @@ function makeAdapters() {
text: '',
},
})),
+ insertStructured: vi.fn(() => ({
+ success: true as const,
+ resolution: {
+ target: { kind: 'text' as const, blockId: 'p1', range: { start: 0, end: 0 } },
+ range: { from: 1, to: 1 },
+ text: '',
+ },
+ })),
};
const formatReceipt = () => ({
success: true as const,
@@ -229,7 +237,7 @@ describe('invoke', () => {
it('insert: invoke returns same result as direct call', () => {
const { adapters } = makeAdapters();
const api = createDocumentApi(adapters);
- const input = { text: 'hello' };
+ const input = { value: 'hello' };
const direct = api.insert(input);
const invoked = api.invoke({ operationId: 'insert', input });
expect(invoked).toEqual(direct);
@@ -238,7 +246,7 @@ describe('invoke', () => {
it('insert: invoke forwards options through to adapter-backed execution', () => {
const { adapters, writeAdapter } = makeAdapters();
const api = createDocumentApi(adapters);
- api.invoke({ operationId: 'insert', input: { text: 'hello' }, options: { changeMode: 'tracked' } });
+ api.invoke({ operationId: 'insert', input: { value: 'hello' }, options: { changeMode: 'tracked' } });
expect(writeAdapter.write).toHaveBeenCalledWith(
{ kind: 'insert', text: 'hello' },
{ changeMode: 'tracked', dryRun: false },
@@ -397,7 +405,7 @@ describe('invoke', () => {
it('forwards unknown options through to the handler', () => {
const { adapters, writeAdapter } = makeAdapters();
const api = createDocumentApi(adapters);
- const input: unknown = { text: 'dynamic' };
+ const input: unknown = { value: 'dynamic' };
const options: unknown = { changeMode: 'tracked' };
api.invoke({ operationId: 'insert', input, options });
expect(writeAdapter.write).toHaveBeenCalledWith(
diff --git a/packages/document-api/src/overview-examples.test.ts b/packages/document-api/src/overview-examples.test.ts
index ede440400c..d94b16994f 100644
--- a/packages/document-api/src/overview-examples.test.ts
+++ b/packages/document-api/src/overview-examples.test.ts
@@ -69,7 +69,10 @@ function makeInfoAdapter() {
}
function makeWriteAdapter() {
- return { write: vi.fn(() => makeTextMutationReceipt()) };
+ return {
+ write: vi.fn(() => makeTextMutationReceipt()),
+ insertStructured: vi.fn(() => makeTextMutationReceipt()),
+ };
}
function makeFormatAdapter() {
@@ -437,7 +440,7 @@ describe('overview.mdx examples', () => {
it('insert text with changeMode tracked', () => {
const doc = makeApi();
- const receipt = doc.insert({ text: 'new content' }, { changeMode: 'tracked' });
+ const receipt = doc.insert({ value: 'new content' }, { changeMode: 'tracked' });
expect(receipt.resolution).toBeDefined();
expect(receipt.resolution.target).toBeDefined();
@@ -457,7 +460,7 @@ describe('overview.mdx examples', () => {
}
if (caps.global.trackChanges.enabled) {
- doc.insert({ text: 'tracked' }, { changeMode: 'tracked' });
+ doc.insert({ value: 'tracked' }, { changeMode: 'tracked' });
}
// Both branches should execute with our fully-capable mock
@@ -472,7 +475,7 @@ describe('overview.mdx examples', () => {
const doc = makeApi();
const target = TEXT_TARGET;
- const preview = doc.insert({ target, text: 'hello' }, { dryRun: true });
+ const preview = doc.insert({ target, value: 'hello' }, { dryRun: true });
// preview.success tells you whether the insert would succeed
// preview.resolution shows the resolved target range
@@ -509,7 +512,7 @@ describe('src/README.md workflow examples', () => {
it('insert in tracked mode and access receipt properties', () => {
const doc = makeApi();
- const receipt = doc.insert({ text: 'new content' }, { changeMode: 'tracked' });
+ const receipt = doc.insert({ value: 'new content' }, { changeMode: 'tracked' });
// receipt.resolution.target contains the resolved insertion point
// receipt.inserted contains TrackedChangeAddress entries for the new change
@@ -571,7 +574,7 @@ describe('src/README.md workflow examples', () => {
doc.format.apply({ target, inline: { bold: true } });
}
if (caps.global.trackChanges.enabled) {
- doc.insert({ text: 'tracked' }, { changeMode: 'tracked' });
+ doc.insert({ value: 'tracked' }, { changeMode: 'tracked' });
}
if (caps.operations['create.heading'].dryRun) {
const preview = doc.create.heading({ level: 2, text: 'Preview' }, { dryRun: true });
diff --git a/packages/document-api/src/types/receipt.ts b/packages/document-api/src/types/receipt.ts
index beb44d5caa..056a055e84 100644
--- a/packages/document-api/src/types/receipt.ts
+++ b/packages/document-api/src/types/receipt.ts
@@ -22,6 +22,7 @@ export type ReceiptFailureCode =
| 'REVISION_CHANGED_SINCE_COMPILE'
| 'INVALID_INSERTION_CONTEXT'
| 'DOCUMENT_IDENTITY_CONFLICT'
+ | 'UNSUPPORTED_ENVIRONMENT'
| 'INTERNAL_ERROR';
export type ReceiptFailure = {
diff --git a/packages/document-api/src/write/write.ts b/packages/document-api/src/write/write.ts
index e071758604..0399a3d5fb 100644
--- a/packages/document-api/src/write/write.ts
+++ b/packages/document-api/src/write/write.ts
@@ -1,5 +1,6 @@
import type { TextAddress, TextMutationReceipt } from '../types/index.js';
import type { BlockRelativeLocator, BlockRelativeRange } from './locator.js';
+import type { InsertInput } from '../insert/insert.js';
export type ChangeMode = 'direct' | 'tracked';
@@ -49,6 +50,8 @@ export type WriteRequest = InsertWriteRequest | ReplaceWriteRequest | DeleteWrit
export interface WriteAdapter {
write(request: WriteRequest, options?: MutationOptions): TextMutationReceipt;
+ /** Structured insert for markdown/html content types. */
+ insertStructured(input: InsertInput, options?: MutationOptions): TextMutationReceipt;
}
export function normalizeMutationOptions(options?: MutationOptions): MutationOptions {
diff --git a/packages/super-editor/package.json b/packages/super-editor/package.json
index 935ab73f81..62aa5a461c 100644
--- a/packages/super-editor/package.json
+++ b/packages/super-editor/package.json
@@ -82,7 +82,6 @@
"he": "catalog:",
"jszip": "catalog:",
"lodash": "^4.17.21",
- "marked": "catalog:",
"prosemirror-commands": "catalog:",
"prosemirror-dropcursor": "catalog:",
"prosemirror-gapcursor": "catalog:",
@@ -99,6 +98,7 @@
"rehype-parse": "catalog:",
"rehype-remark": "catalog:",
"remark-gfm": "catalog:",
+ "remark-parse": "catalog:",
"remark-stringify": "catalog:",
"unified": "catalog:",
"uuid": "catalog:",
@@ -114,6 +114,7 @@
},
"devDependencies": {
"@floating-ui/dom": "catalog:",
+ "@types/mdast": "catalog:",
"@superdoc/common": "workspace:*",
"@superdoc/document-api": "workspace:*",
"@superdoc/contracts": "workspace:*",
diff --git a/packages/super-editor/src/core/Editor.api-contracts.test.js b/packages/super-editor/src/core/Editor.api-contracts.test.js
index 6917093303..2332a5a5a3 100644
--- a/packages/super-editor/src/core/Editor.api-contracts.test.js
+++ b/packages/super-editor/src/core/Editor.api-contracts.test.js
@@ -131,9 +131,7 @@ describe('Editor - API Contracts (Regression Prevention)', () => {
return new Promise((resolve) => {
setTimeout(() => {
expect(onUnsupportedContent).toHaveBeenCalledTimes(1);
- expect(onUnsupportedContent.mock.calls[0][0]).toEqual([
- expect.objectContaining({ tagName: 'VIDEO', count: 1 }),
- ]);
+ expect(onUnsupportedContent.mock.calls[0][0]).toEqual([expect.objectContaining({ tagName: 'VIDEO' })]);
resolve();
}, 10);
});
diff --git a/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js b/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js
index 6268a36a67..0c6abfdc95 100644
--- a/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js
+++ b/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js
@@ -1,5 +1,37 @@
-import { describe, it, expect } from 'vitest';
-import { convertMarkdownToHTML } from './importMarkdown.js';
+import { beforeAll, beforeEach, afterEach, describe, it, expect } from 'vitest';
+import { createDocFromMarkdown } from './importMarkdown.js';
+import { initTestEditor, loadTestDataForEditorTests } from '@tests/helpers/helpers.js';
+
+let docData;
+
+beforeAll(async () => {
+ docData = await loadTestDataForEditorTests('blank-doc.docx');
+});
+
+let editor;
+
+beforeEach(() => {
+ ({ editor } = initTestEditor({
+ content: docData.docx,
+ media: docData.media,
+ mediaFiles: docData.mediaFiles,
+ fonts: docData.fonts,
+ }));
+});
+
+afterEach(() => {
+ editor?.destroy();
+ editor = null;
+});
+
+function collectNodeTypes(doc) {
+ const types = [];
+ doc.descendants((node) => {
+ types.push(node.type.name);
+ return true;
+ });
+ return types;
+}
describe('markdown to DOCX integration', () => {
it('converts complete markdown document with headings and lists', () => {
@@ -17,16 +49,13 @@ More text here.
1. Numbered item
2. Second item`;
- const html = convertMarkdownToHTML(markdown);
+ const doc = createDocFromMarkdown(markdown, editor);
- // Verify all elements are converted
- expect(html).toContain('Main Title
');
- expect(html).toContain('Section 2
');
- expect(html).toContain('');
- expect(html).toContain('');
+ expect(doc).toBeDefined();
+ expect(doc.type.name).toBe('doc');
- // Verify spacing is added between paragraphs and lists
- expect(html).toContain('
\n
\n');
- expect(html).toContain('\n
\n');
+ const types = collectNodeTypes(doc);
+ expect(types).toContain('paragraph');
+ expect(types).toContain('run');
});
});
diff --git a/packages/super-editor/src/core/helpers/importMarkdown.js b/packages/super-editor/src/core/helpers/importMarkdown.js
index 0aef461f39..9244849569 100644
--- a/packages/super-editor/src/core/helpers/importMarkdown.js
+++ b/packages/super-editor/src/core/helpers/importMarkdown.js
@@ -1,37 +1,45 @@
// @ts-check
-import { marked } from 'marked';
-import { createDocFromHTML } from './importHtml.js';
-
-// Configure marked once
-marked.use({
- breaks: false, // Use proper paragraphs, not
tags
- gfm: true, // GitHub Flavored Markdown support
-});
+import { markdownToPmDoc } from './markdown/markdownToPmContent.js';
/**
- * Create a ProseMirror document from Markdown content
+ * Create a ProseMirror document from Markdown content.
+ *
+ * Delegates to the AST-based conversion pipeline (remark-parse → mdast → PM JSON).
+ * The old `marked` → HTML → HTML importer path is no longer used.
+ *
* @param {string} markdown - Markdown content
- * @param {Object} editor - Editor instance
+ * @param {import('../Editor').Editor} editor - Editor instance
* @param {Object} [options={}] - Import options
- * @returns {Object} Document node
+ * @param {boolean} [options.isImport] - Whether this is an import operation
+ * @param {Document | null} [options.document] - Optional DOM document (unused by AST path)
+ * @param {((items: import('./catchAllSchema.js').UnsupportedContentItem[]) => void) | null} [options.onUnsupportedContent] - Callback for unsupported items
+ * @param {boolean} [options.warnOnUnsupportedContent] - Emit console.warn for unsupported items
+ * @returns {import('prosemirror-model').Node} Document node
*/
export function createDocFromMarkdown(markdown, editor, options = {}) {
- const html = convertMarkdownToHTML(markdown);
- return createDocFromHTML(html, editor, options);
-}
+ const { doc, diagnostics } = markdownToPmDoc(markdown, editor);
-/**
- * Convert Markdown to HTML with SuperDoc/DOCX compatibility
- * @param {string} markdown - Markdown content
- * @returns {string} HTML content
- */
-export function convertMarkdownToHTML(markdown) {
- let html = marked.parse(markdown, { async: false });
+ // Surface diagnostics through the unsupported content callback if provided.
+ // Aggregate by tag name to match the HTML importer's deduplication behavior.
+ if (diagnostics.length > 0) {
+ /** @type {Map} */
+ const byTag = new Map();
+ for (const d of diagnostics) {
+ const existing = byTag.get(d.nodeType);
+ if (existing) {
+ existing.count += 1;
+ } else {
+ byTag.set(d.nodeType, { tagName: d.nodeType, outerHTML: d.message, count: 1 });
+ }
+ }
+ const items = [...byTag.values()];
+
+ if (options.onUnsupportedContent) {
+ options.onUnsupportedContent(items);
+ } else if (options.warnOnUnsupportedContent) {
+ console.warn('[super-editor] Unsupported Markdown content during import:', items);
+ }
+ }
- // Add spacing between paragraphs and lists for proper DOCX rendering
- return html
- .replace(/<\/p>\n/g, '\n
\n')
- .replace(/<\/p>\n/g, '\n
\n')
- .replace(/<\/ul>\n\n
\n\n\n
\n ({
- createDocFromHTML: vi.fn(),
-}));
+let docData;
-describe('markdown import', () => {
- it('converts markdown to HTML with proper spacing', () => {
- const markdown = `# Heading
+beforeAll(async () => {
+ docData = await loadTestDataForEditorTests('blank-doc.docx');
+});
-Paragraph text
+let editor;
-- List item`;
+beforeEach(() => {
+ ({ editor } = initTestEditor({
+ content: docData.docx,
+ media: docData.media,
+ mediaFiles: docData.mediaFiles,
+ fonts: docData.fonts,
+ }));
+});
- const html = convertMarkdownToHTML(markdown);
+afterEach(() => {
+ editor?.destroy();
+ editor = null;
+});
- expect(html).toContain('Heading
');
- expect(html).toContain('Paragraph text
');
- expect(html).toContain('\n
\n'); // Spacing before list
+describe('markdown import', () => {
+ it('creates a ProseMirror doc from markdown headings', () => {
+ const doc = createDocFromMarkdown('# Hello', editor);
+ expect(doc).toBeDefined();
+ expect(doc.type.name).toBe('doc');
+ expect(doc.childCount).toBeGreaterThan(0);
});
- it('creates ProseMirror doc from markdown', () => {
- const mockSchema = { nodes: {} };
- const mockDoc = { type: 'doc' };
- const mockOptions = { isImport: true };
- createDocFromHTML.mockReturnValue(mockDoc);
-
- const result = createDocFromMarkdown('# Test', mockSchema, mockOptions);
+ it('surfaces unsupported content through the callback', () => {
+ const onUnsupportedContent = vi.fn();
+ createDocFromMarkdown('', editor, {
+ onUnsupportedContent,
+ });
- expect(createDocFromHTML).toHaveBeenCalledWith(
- 'Test
\n', // Exact string that marked.parse returns
- mockSchema,
- { isImport: true },
+ expect(onUnsupportedContent).toHaveBeenCalled();
+ expect(onUnsupportedContent.mock.calls[0][0]).toEqual(
+ expect.arrayContaining([expect.objectContaining({ tagName: 'VIDEO' })]),
);
- expect(result).toBe(mockDoc);
});
});
diff --git a/packages/super-editor/src/core/helpers/markdown/index.ts b/packages/super-editor/src/core/helpers/markdown/index.ts
new file mode 100644
index 0000000000..922c58f581
--- /dev/null
+++ b/packages/super-editor/src/core/helpers/markdown/index.ts
@@ -0,0 +1,17 @@
+/**
+ * Markdown → ProseMirror conversion module.
+ *
+ * Public API:
+ * - `markdownToPmDoc` — full document conversion (for body replacement)
+ * - `markdownToPmFragment` — fragment conversion (for insertion)
+ * - `parseMarkdownToAst` — raw mdast parsing (for advanced use)
+ */
+
+export { markdownToPmDoc, markdownToPmFragment } from './markdownToPmContent.js';
+export { parseMarkdownToAst } from './parseMarkdownAst.js';
+export type {
+ MarkdownConversionOptions,
+ MarkdownConversionResult,
+ MarkdownFragmentResult,
+ MarkdownDiagnostic,
+} from './types.js';
diff --git a/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts b/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts
new file mode 100644
index 0000000000..13939051e4
--- /dev/null
+++ b/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts
@@ -0,0 +1,102 @@
+/**
+ * High-level entry points for Markdown → ProseMirror conversion.
+ *
+ * Exports two functions:
+ * - `markdownToPmDoc` — full document (for doc.open body replacement)
+ * - `markdownToPmFragment` — fragment (for doc.insert structured insertion)
+ *
+ * Both parse Markdown to mdast, convert to PM JSON, and materialize
+ * via the editor's schema. The conversion is synchronous and does not
+ * perform network I/O (image URLs are stored as-is).
+ */
+
+import { Fragment } from 'prosemirror-model';
+import type { Node as PmNode } from 'prosemirror-model';
+import type { Editor } from '../../Editor.js';
+import { parseMarkdownToAst } from './parseMarkdownAst.js';
+import { convertMdastToBlocks } from './mdastToProseMirror.js';
+import { wrapTextsInRuns } from '../../inputRules/docx-paste/docx-paste.js';
+import type {
+ MarkdownConversionOptions,
+ MarkdownConversionResult,
+ MarkdownFragmentResult,
+ MdastConversionContext,
+} from './types.js';
+
+// ---------------------------------------------------------------------------
+// Full document conversion (for body replacement in doc.open)
+// ---------------------------------------------------------------------------
+
+/**
+ * Parse Markdown and produce a full ProseMirror document node.
+ *
+ * The result replaces the entire document body. Template-level OOXML context
+ * (styles.xml, settings, numbering infrastructure) is preserved by the caller.
+ */
+export function markdownToPmDoc(
+ markdown: string,
+ editor: Editor,
+ options: MarkdownConversionOptions = {},
+): MarkdownConversionResult {
+ const { blocks, diagnostics } = parseAndConvert(markdown, editor, options);
+
+ const docJson = {
+ type: 'doc',
+ content: blocks.length > 0 ? blocks : [{ type: 'paragraph' }],
+ };
+
+ let doc: PmNode = editor.schema.nodeFromJSON(docJson);
+ doc = wrapTextsInRuns(doc) as PmNode;
+
+ return { doc, diagnostics };
+}
+
+// ---------------------------------------------------------------------------
+// Fragment conversion (for structured insertion in doc.insert)
+// ---------------------------------------------------------------------------
+
+/**
+ * Parse Markdown and produce a ProseMirror Fragment for insertion at a position.
+ *
+ * The fragment can contain multiple block nodes (paragraphs, tables, lists, etc.)
+ * and is suitable for `tr.replaceWith(from, to, fragment)`.
+ */
+export function markdownToPmFragment(
+ markdown: string,
+ editor: Editor,
+ options: MarkdownConversionOptions = {},
+): MarkdownFragmentResult {
+ const { blocks, diagnostics } = parseAndConvert(markdown, editor, options);
+
+ if (blocks.length === 0) {
+ return { fragment: Fragment.empty, diagnostics };
+ }
+
+ const nodes = blocks.map((json) => editor.schema.nodeFromJSON(json));
+ const wrappedNodes = nodes.map((node) => wrapTextsInRuns(node) as PmNode);
+ const fragment = Fragment.from(wrappedNodes);
+
+ return { fragment, diagnostics };
+}
+
+// ---------------------------------------------------------------------------
+// Shared parse + convert pipeline
+// ---------------------------------------------------------------------------
+
+function parseAndConvert(
+ markdown: string,
+ editor: Editor,
+ options: MarkdownConversionOptions,
+): { blocks: ReturnType; diagnostics: MdastConversionContext['diagnostics'] } {
+ const ast = parseMarkdownToAst(markdown);
+
+ const ctx: MdastConversionContext = {
+ editor,
+ schema: editor.schema,
+ diagnostics: [],
+ options,
+ };
+
+ const blocks = convertMdastToBlocks(ast, ctx);
+ return { blocks, diagnostics: ctx.diagnostics };
+}
diff --git a/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts b/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts
new file mode 100644
index 0000000000..408d18f0cd
--- /dev/null
+++ b/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts
@@ -0,0 +1,525 @@
+/**
+ * Convert an mdast AST tree into ProseMirror JSON nodes.
+ *
+ * This module walks the mdast tree produced by remark-parse and produces
+ * ProseMirror-compatible JSON that conforms to the SuperEditor schema.
+ *
+ * Key schema facts (SuperEditor/OOXML):
+ * - Headings are `paragraph` nodes with `paragraphProperties.styleId: 'HeadingN'`.
+ * - Lists are `paragraph` nodes with `paragraphProperties.numberingProperties`.
+ * - The `run` node wraps text with run-level properties (bold, italic, etc.).
+ * - Tables use `table` > `tableRow` > `tableCell` with block content inside cells.
+ * - There is no dedicated blockquote or horizontal-rule node.
+ */
+
+import type {
+ Node as MdastNode,
+ Root,
+ PhrasingContent,
+ Paragraph as MdastParagraph,
+ Heading as MdastHeading,
+ Blockquote as MdastBlockquote,
+ Code as MdastCode,
+ Table as MdastTable,
+ Image as MdastImage,
+ Html as MdastHtml,
+ Text as MdastText,
+ Strong as MdastStrong,
+ Emphasis as MdastEmphasis,
+ Delete as MdastDelete,
+ Link as MdastLink,
+ InlineCode as MdastInlineCode,
+ List as MdastList,
+ ListItem as MdastListItem,
+} from 'mdast';
+import { ListHelpers } from '../list-numbering-helpers.js';
+import type { MdastConversionContext, MarkdownDiagnostic } from './types.js';
+
+// ---------------------------------------------------------------------------
+// Public entry point
+// ---------------------------------------------------------------------------
+
+/**
+ * Convert an mdast root node into an array of ProseMirror JSON block nodes
+ * suitable for constructing a full doc or a fragment.
+ */
+export function convertMdastToBlocks(root: Root, ctx: MdastConversionContext): JsonNode[] {
+ return flatMapChildren(root, ctx);
+}
+
+// ---------------------------------------------------------------------------
+// JSON node shape (matches ProseMirror nodeFromJSON input)
+// ---------------------------------------------------------------------------
+
+interface JsonNode {
+ type: string;
+ attrs?: Record;
+ content?: JsonNode[];
+ marks?: JsonMark[];
+ text?: string;
+}
+
+interface JsonMark {
+ type: string;
+ attrs?: Record;
+}
+
+// ---------------------------------------------------------------------------
+// Block-level converters
+// ---------------------------------------------------------------------------
+
+function flatMapChildren(parent: MdastNode & { children?: MdastNode[] }, ctx: MdastConversionContext): JsonNode[] {
+ if (!parent.children) return [];
+ const blocks: JsonNode[] = [];
+ for (const child of parent.children) {
+ blocks.push(...convertBlockNode(child, ctx));
+ }
+ return blocks;
+}
+
+function convertBlockNode(node: MdastNode, ctx: MdastConversionContext): JsonNode[] {
+ switch (node.type) {
+ case 'paragraph':
+ return [convertParagraph(node as MdastParagraph, ctx)];
+
+ case 'heading':
+ return [convertHeading(node as MdastHeading, ctx)];
+
+ case 'list':
+ return convertList(node as MdastList, ctx, 0);
+
+ case 'blockquote':
+ return convertBlockquote(node as MdastBlockquote, ctx);
+
+ case 'code':
+ return [convertCodeBlock(node as MdastCode, ctx)];
+
+ case 'thematicBreak':
+ return [convertThematicBreak(ctx)];
+
+ case 'table':
+ return [convertTable(node as MdastTable, ctx)];
+
+ case 'image':
+ return [convertImageBlock(node as MdastImage, ctx)];
+
+ case 'html':
+ return convertRawHtml(node as MdastHtml, ctx);
+
+ default:
+ addDiagnostic(ctx, 'warning', node.type, `Unsupported mdast block node "${node.type}" — skipped.`, node);
+ return [];
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Paragraph
+// ---------------------------------------------------------------------------
+
+function convertParagraph(node: MdastParagraph, ctx: MdastConversionContext): JsonNode {
+ return makeParagraph(convertInlineChildren(node.children, ctx, []));
+}
+
+// ---------------------------------------------------------------------------
+// Heading (paragraph + styleId)
+// ---------------------------------------------------------------------------
+
+const HEADING_STYLE_MAP: Record = {
+ 1: 'Heading1',
+ 2: 'Heading2',
+ 3: 'Heading3',
+ 4: 'Heading4',
+ 5: 'Heading5',
+ 6: 'Heading6',
+};
+
+function convertHeading(node: MdastHeading, ctx: MdastConversionContext): JsonNode {
+ const styleId = HEADING_STYLE_MAP[node.depth] ?? 'Heading1';
+ const runs = convertInlineChildren(node.children, ctx, []);
+ return makeParagraph(runs, { styleId });
+}
+
+// ---------------------------------------------------------------------------
+// List (ordered / bullet → paragraphs with numberingProperties)
+// ---------------------------------------------------------------------------
+
+function convertList(node: MdastList, ctx: MdastConversionContext, depth: number): JsonNode[] {
+ const listType = node.ordered ? 'orderedList' : 'bulletList';
+
+ let numId: number | undefined;
+ if (!ctx.options.dryRun) {
+ numId = ListHelpers.getNewListId(ctx.editor);
+ ListHelpers.generateNewListDefinition({ numId, listType, editor: ctx.editor });
+ } else {
+ // Dry-run: use a placeholder numId (never persisted)
+ numId = 0;
+ }
+
+ const blocks: JsonNode[] = [];
+ for (let i = 0; i < node.children.length; i++) {
+ const listItem = node.children[i];
+ blocks.push(...convertListItem(listItem, ctx, numId, depth, listType));
+ }
+ return blocks;
+}
+
+function convertListItem(
+ item: MdastListItem,
+ ctx: MdastConversionContext,
+ numId: number,
+ depth: number,
+ listType: string,
+): JsonNode[] {
+ const blocks: JsonNode[] = [];
+
+ for (const child of item.children) {
+ if (child.type === 'paragraph') {
+ const runs = convertInlineChildren((child as MdastParagraph).children, ctx, []);
+ blocks.push(makeListParagraph(runs, numId, depth));
+ } else if (child.type === 'list') {
+ // Nested list — increase depth, reuse same listType context
+ blocks.push(...convertList(child as MdastList, ctx, depth + 1));
+ } else {
+ // Other block content inside a list item (e.g., blockquote, code)
+ blocks.push(...convertBlockNode(child, ctx));
+ }
+ }
+
+ // If the list item had no paragraph children (edge case), emit an empty list paragraph
+ if (blocks.length === 0) {
+ blocks.push(makeListParagraph([], numId, depth));
+ }
+
+ return blocks;
+}
+
+// ---------------------------------------------------------------------------
+// Blockquote (paragraph + Quote style)
+// ---------------------------------------------------------------------------
+
+function convertBlockquote(node: MdastBlockquote, ctx: MdastConversionContext): JsonNode[] {
+ const blocks: JsonNode[] = [];
+ for (const child of node.children) {
+ if (child.type === 'paragraph') {
+ const runs = convertInlineChildren((child as MdastParagraph).children, ctx, []);
+ blocks.push(makeParagraph(runs, { styleId: 'Quote' }));
+ } else {
+ // Nested block inside blockquote — convert normally but could lose quote context
+ blocks.push(...convertBlockNode(child, ctx));
+ }
+ }
+ return blocks;
+}
+
+// ---------------------------------------------------------------------------
+// Code block (paragraph with monospace run properties)
+// ---------------------------------------------------------------------------
+
+function convertCodeBlock(node: MdastCode, ctx: MdastConversionContext): JsonNode {
+ const lines = node.value.split('\n');
+ const content: JsonNode[] = [];
+ for (let i = 0; i < lines.length; i++) {
+ if (i > 0) {
+ content.push({ type: 'lineBreak' });
+ }
+ if (lines[i].length > 0) {
+ content.push(makeRun(lines[i], [], { rFonts: { ascii: 'Courier New', hAnsi: 'Courier New' } }));
+ }
+ }
+ return makeParagraph(content);
+}
+
+// ---------------------------------------------------------------------------
+// Thematic break (horizontal rule → empty paragraph with border-bottom)
+// ---------------------------------------------------------------------------
+
+function convertThematicBreak(ctx: MdastConversionContext): JsonNode {
+ // Use the contentBlock node (SuperEditor's inline horizontal rule element)
+ // if the schema supports it, otherwise fall back to a styled paragraph.
+ const hasContentBlock = ctx.schema.nodes.contentBlock != null;
+ if (hasContentBlock) {
+ return makeParagraph([
+ {
+ type: 'contentBlock',
+ attrs: {
+ horizontalRule: true,
+ size: { width: '100%', height: 2 },
+ background: '#e5e7eb',
+ },
+ },
+ ]);
+ }
+
+ return makeParagraph([], {
+ pBdr: {
+ bottom: { val: 'single', sz: '6', space: '1', color: 'auto' },
+ },
+ });
+}
+
+// ---------------------------------------------------------------------------
+// Table
+// ---------------------------------------------------------------------------
+
+function convertTable(node: MdastTable, ctx: MdastConversionContext): JsonNode {
+ const rows: JsonNode[] = [];
+
+ for (let rowIndex = 0; rowIndex < node.children.length; rowIndex++) {
+ const mdastRow = node.children[rowIndex];
+ const isHeaderRow = rowIndex === 0;
+ const cells: JsonNode[] = [];
+
+ for (const mdastCell of mdastRow.children) {
+ const cellContent = convertInlineChildren(mdastCell.children, ctx, []);
+ const cellParagraph = makeParagraph(cellContent);
+ const cellType = isHeaderRow ? 'tableHeader' : 'tableCell';
+ cells.push({
+ type: cellType,
+ attrs: {
+ colspan: 1,
+ rowspan: 1,
+ colwidth: null,
+ },
+ content: [cellParagraph],
+ });
+ }
+
+ rows.push({
+ type: 'tableRow',
+ content: cells,
+ });
+ }
+
+ return {
+ type: 'table',
+ content: rows,
+ };
+}
+
+// ---------------------------------------------------------------------------
+// Image (block-level — wraps in paragraph if at top level)
+// ---------------------------------------------------------------------------
+
+function convertImageBlock(node: MdastImage, ctx: MdastConversionContext): JsonNode {
+ if (!node.url) {
+ addDiagnostic(ctx, 'warning', 'image', 'Image with empty URL — skipped.', node);
+ return makeParagraph([]);
+ }
+
+ const imageNode: JsonNode = {
+ type: 'image',
+ attrs: {
+ src: node.url,
+ alt: node.alt ?? null,
+ title: node.title ?? null,
+ },
+ };
+
+ // Image must be wrapped in a paragraph for the OOXML content model
+ return {
+ type: 'paragraph',
+ content: [imageNode],
+ };
+}
+
+// ---------------------------------------------------------------------------
+// Raw HTML fallback
+// ---------------------------------------------------------------------------
+
+function extractHtmlTagName(html: string): string {
+ const match = html.match(/^<\/?([a-zA-Z][a-zA-Z0-9]*)/);
+ return match ? match[1].toUpperCase() : 'HTML';
+}
+
+function convertRawHtml(node: MdastHtml, ctx: MdastConversionContext): JsonNode[] {
+ const tagName = extractHtmlTagName(node.value);
+ addDiagnostic(
+ ctx,
+ 'warning',
+ tagName,
+ `Raw HTML <${tagName.toLowerCase()}> in markdown — converted to plain text.`,
+ node,
+ );
+ // Fall back to a plain text paragraph
+ if (node.value.trim().length === 0) return [];
+ return [makeParagraph([makeRun(node.value, [])])];
+}
+
+// ---------------------------------------------------------------------------
+// Inline-level converters
+// ---------------------------------------------------------------------------
+
+/**
+ * Convert an array of mdast phrasing (inline) content into PM JSON run nodes.
+ * `parentMarks` accumulates marks as we recurse into emphasis/strong/etc.
+ */
+function convertInlineChildren(
+ children: PhrasingContent[],
+ ctx: MdastConversionContext,
+ parentMarks: JsonMark[],
+): JsonNode[] {
+ const nodes: JsonNode[] = [];
+ for (const child of children) {
+ nodes.push(...convertInlineNode(child, ctx, parentMarks));
+ }
+ return nodes;
+}
+
+function convertInlineNode(node: PhrasingContent, ctx: MdastConversionContext, parentMarks: JsonMark[]): JsonNode[] {
+ switch (node.type) {
+ case 'text':
+ return [makeRun((node as MdastText).value, parentMarks)];
+
+ case 'strong':
+ return convertInlineChildren((node as MdastStrong).children, ctx, [...parentMarks, { type: 'bold' }]);
+
+ case 'emphasis':
+ return convertInlineChildren((node as MdastEmphasis).children, ctx, [...parentMarks, { type: 'italic' }]);
+
+ case 'delete':
+ return convertInlineChildren((node as MdastDelete).children, ctx, [...parentMarks, { type: 'strike' }]);
+
+ case 'link':
+ return convertLink(node as MdastLink, ctx, parentMarks);
+
+ case 'inlineCode':
+ return [
+ makeRun((node as MdastInlineCode).value, [
+ ...parentMarks,
+ { type: 'textStyle', attrs: { fontFamily: 'Courier New' } },
+ ]),
+ ];
+
+ case 'break':
+ return [{ type: 'lineBreak' }];
+
+ case 'image':
+ return convertInlineImage(node as MdastImage, ctx);
+
+ default: {
+ const diagNodeType = node.type === 'html' ? extractHtmlTagName((node as MdastHtml).value ?? '') : node.type;
+ addDiagnostic(
+ ctx,
+ 'warning',
+ diagNodeType,
+ `Unsupported mdast inline node "${node.type}" — converted to text.`,
+ node,
+ );
+ // Attempt to extract text content as fallback
+ if ('value' in node && typeof (node as unknown as { value: unknown }).value === 'string') {
+ return [makeRun((node as unknown as { value: string }).value, parentMarks)];
+ }
+ if ('children' in node && Array.isArray((node as unknown as { children: unknown }).children)) {
+ return convertInlineChildren((node as unknown as { children: PhrasingContent[] }).children, ctx, parentMarks);
+ }
+ return [];
+ }
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Link
+// ---------------------------------------------------------------------------
+
+function convertLink(node: MdastLink, ctx: MdastConversionContext, parentMarks: JsonMark[]): JsonNode[] {
+ const linkMark: JsonMark = {
+ type: 'link',
+ attrs: {
+ href: node.url,
+ target: '_blank',
+ rel: 'noopener noreferrer nofollow',
+ ...(node.title ? { tooltip: node.title } : {}),
+ },
+ };
+ return convertInlineChildren(node.children, ctx, [...parentMarks, linkMark]);
+}
+
+// ---------------------------------------------------------------------------
+// Inline image
+// ---------------------------------------------------------------------------
+
+function convertInlineImage(node: MdastImage, ctx: MdastConversionContext): JsonNode[] {
+ if (!node.url) {
+ addDiagnostic(ctx, 'warning', 'image', 'Inline image with empty URL — skipped.', node);
+ return [];
+ }
+ return [
+ {
+ type: 'image',
+ attrs: {
+ src: node.url,
+ alt: node.alt ?? null,
+ title: node.title ?? null,
+ },
+ },
+ ];
+}
+
+// ---------------------------------------------------------------------------
+// JSON node builders
+// ---------------------------------------------------------------------------
+
+function makeParagraph(content: JsonNode[], extraParagraphProps?: Record): JsonNode {
+ const paragraphProperties = extraParagraphProps ? { ...extraParagraphProps } : undefined;
+ const attrs: Record = {};
+ if (paragraphProperties) {
+ attrs.paragraphProperties = paragraphProperties;
+ }
+ return {
+ type: 'paragraph',
+ ...(Object.keys(attrs).length > 0 ? { attrs } : {}),
+ content: content.length > 0 ? content : undefined,
+ };
+}
+
+function makeListParagraph(content: JsonNode[], numId: number, ilvl: number): JsonNode {
+ const numberingProperties = { numId, ilvl };
+ return {
+ type: 'paragraph',
+ attrs: {
+ paragraphProperties: { numberingProperties },
+ numberingProperties,
+ },
+ content: content.length > 0 ? content : undefined,
+ };
+}
+
+/**
+ * Create a `run` JSON node wrapping a text node with optional marks/run properties.
+ */
+function makeRun(text: string, marks: JsonMark[], extraRunProperties?: Record): JsonNode {
+ const textNode: JsonNode = {
+ type: 'text',
+ text,
+ ...(marks.length > 0 ? { marks } : {}),
+ };
+ const runNode: JsonNode = {
+ type: 'run',
+ content: [textNode],
+ };
+ if (extraRunProperties) {
+ runNode.attrs = { runProperties: extraRunProperties };
+ }
+ return runNode;
+}
+
+// ---------------------------------------------------------------------------
+// Diagnostics helper
+// ---------------------------------------------------------------------------
+
+function addDiagnostic(
+ ctx: MdastConversionContext,
+ severity: MarkdownDiagnostic['severity'],
+ nodeType: string,
+ message: string,
+ node?: MdastNode,
+): void {
+ const diagnostic: MarkdownDiagnostic = { severity, nodeType, message };
+ if (node?.position?.start) {
+ diagnostic.position = {
+ line: node.position.start.line,
+ column: node.position.start.column,
+ };
+ }
+ ctx.diagnostics.push(diagnostic);
+}
diff --git a/packages/super-editor/src/core/helpers/markdown/parseMarkdownAst.ts b/packages/super-editor/src/core/helpers/markdown/parseMarkdownAst.ts
new file mode 100644
index 0000000000..00fd19908f
--- /dev/null
+++ b/packages/super-editor/src/core/helpers/markdown/parseMarkdownAst.ts
@@ -0,0 +1,22 @@
+/**
+ * Markdown source → mdast AST parsing.
+ *
+ * Uses unified + remark-parse + remark-gfm to produce a GFM-aware mdast tree.
+ * This is the only place in the codebase that touches remark-parse.
+ */
+
+import { unified } from 'unified';
+import remarkParse from 'remark-parse';
+import remarkGfm from 'remark-gfm';
+import type { Root } from 'mdast';
+
+/**
+ * Parse a Markdown string into an mdast AST tree.
+ *
+ * Supports GitHub Flavored Markdown (tables, strikethrough, autolinks, task lists).
+ * This operation is synchronous and side-effect-free.
+ */
+export function parseMarkdownToAst(markdown: string): Root {
+ const processor = unified().use(remarkParse).use(remarkGfm);
+ return processor.parse(markdown) as Root;
+}
diff --git a/packages/super-editor/src/core/helpers/markdown/types.ts b/packages/super-editor/src/core/helpers/markdown/types.ts
new file mode 100644
index 0000000000..890a1e58fb
--- /dev/null
+++ b/packages/super-editor/src/core/helpers/markdown/types.ts
@@ -0,0 +1,64 @@
+/**
+ * Types for the Markdown → ProseMirror AST conversion pipeline.
+ */
+
+import type { Node as PmNode, Fragment, Schema } from 'prosemirror-model';
+import type { Editor } from '../../Editor.js';
+
+// ---------------------------------------------------------------------------
+// Conversion options
+// ---------------------------------------------------------------------------
+
+export interface MarkdownConversionOptions {
+ /** When true, skip side-effects like numbering allocation (for dry-run validation). */
+ dryRun?: boolean;
+}
+
+// ---------------------------------------------------------------------------
+// Conversion results
+// ---------------------------------------------------------------------------
+
+export interface MarkdownConversionResult {
+ /** The converted ProseMirror document node. */
+ doc: PmNode;
+ /** Diagnostics for unsupported or problematic mdast nodes. */
+ diagnostics: MarkdownDiagnostic[];
+}
+
+export interface MarkdownFragmentResult {
+ /** The converted ProseMirror fragment (for insertion, not full doc). */
+ fragment: Fragment;
+ /** Diagnostics for unsupported or problematic mdast nodes. */
+ diagnostics: MarkdownDiagnostic[];
+}
+
+// ---------------------------------------------------------------------------
+// Diagnostics
+// ---------------------------------------------------------------------------
+
+export type DiagnosticSeverity = 'warning' | 'error';
+
+export interface MarkdownDiagnostic {
+ severity: DiagnosticSeverity;
+ /** The mdast node type that triggered the diagnostic. */
+ nodeType: string;
+ /** Human-readable explanation. */
+ message: string;
+ /** Line/column in the source markdown (if available from mdast position). */
+ position?: { line: number; column: number };
+}
+
+// ---------------------------------------------------------------------------
+// mdast-to-PM mapper context
+// ---------------------------------------------------------------------------
+
+/**
+ * Shared context threaded through the mdast → ProseMirror mapping walk.
+ * Carries the editor, schema, accumulated diagnostics, and conversion options.
+ */
+export interface MdastConversionContext {
+ editor: Editor;
+ schema: Schema;
+ diagnostics: MarkdownDiagnostic[];
+ options: MarkdownConversionOptions;
+}
diff --git a/packages/super-editor/src/document-api-adapters/assemble-adapters.ts b/packages/super-editor/src/document-api-adapters/assemble-adapters.ts
index 9c33a9fcc1..68d92a23fd 100644
--- a/packages/super-editor/src/document-api-adapters/assemble-adapters.ts
+++ b/packages/super-editor/src/document-api-adapters/assemble-adapters.ts
@@ -6,7 +6,7 @@ import { getTextAdapter } from './get-text-adapter.js';
import { infoAdapter } from './info-adapter.js';
import { getDocumentApiCapabilities } from './capabilities-adapter.js';
import { createCommentsWrapper } from './plan-engine/comments-wrappers.js';
-import { writeWrapper, styleApplyWrapper } from './plan-engine/plan-wrappers.js';
+import { writeWrapper, insertStructuredWrapper, styleApplyWrapper } from './plan-engine/plan-wrappers.js';
import { stylesApplyAdapter } from './styles-adapter.js';
import {
formatFontSizeWrapper,
@@ -111,6 +111,7 @@ export function assembleDocumentApiAdapters(editor: Editor): DocumentApiAdapters
comments: createCommentsWrapper(editor),
write: {
write: (request, options) => writeWrapper(editor, request, options),
+ insertStructured: (input, options) => insertStructuredWrapper(editor, input, options),
},
format: {
apply: (input, options) => styleApplyWrapper(editor, input, options),
diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/index.ts b/packages/super-editor/src/document-api-adapters/plan-engine/index.ts
index f72d8a9f06..e366fc7613 100644
--- a/packages/super-editor/src/document-api-adapters/plan-engine/index.ts
+++ b/packages/super-editor/src/document-api-adapters/plan-engine/index.ts
@@ -12,4 +12,4 @@ export { planError, PlanError } from './errors.js';
export { captureRunsInRange, resolveInlineStyle } from './style-resolver.js';
export type { CapturedRun, CapturedStyle } from './style-resolver.js';
export type { CompiledTarget, StepExecutor, CompileContext, ExecuteContext } from './executor-registry.types.js';
-export { writeWrapper, styleApplyWrapper } from './plan-wrappers.js';
+export { writeWrapper, insertStructuredWrapper, styleApplyWrapper } from './plan-wrappers.js';
diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts
new file mode 100644
index 0000000000..27aefb767e
--- /dev/null
+++ b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts
@@ -0,0 +1,304 @@
+import { beforeAll, beforeEach, afterEach, describe, it, expect, vi } from 'vitest';
+import { initTestEditor, loadTestDataForEditorTests } from '@tests/helpers/helpers.js';
+import type { Editor } from '../../core/Editor.js';
+import { insertStructuredWrapper } from './plan-wrappers.js';
+import { registerBuiltInExecutors } from './register-executors.js';
+import { clearExecutorRegistry } from './executor-registry.js';
+import { resolveTextTarget } from '../helpers/adapter-utils.js';
+
+let docData: Awaited>;
+
+beforeAll(async () => {
+ docData = await loadTestDataForEditorTests('blank-doc.docx');
+ clearExecutorRegistry();
+ registerBuiltInExecutors();
+});
+
+let editor: Editor;
+
+beforeEach(() => {
+ ({ editor } = initTestEditor({
+ content: docData.docx,
+ media: docData.media,
+ mediaFiles: docData.mediaFiles,
+ fonts: docData.fonts,
+ }));
+});
+
+afterEach(() => {
+ editor?.destroy();
+ // @ts-expect-error cleanup
+ editor = null;
+});
+
+function getDocTextContent(ed: Editor): string {
+ return ed.state.doc.textContent;
+}
+
+/** Requires prior seeded content — a blank doc has no text offsets to span. */
+function findResolvableNonCollapsedTarget(ed: Editor): { blockId: string; range: { start: number; end: number } } {
+ const candidateIds = new Set();
+ const identityKeys = ['sdBlockId', 'blockId', 'paraId', 'id', 'uuid'] as const;
+
+ ed.state.doc.descendants((node) => {
+ const attrs = node.attrs as Record | undefined;
+ if (!attrs) return true;
+
+ for (const key of identityKeys) {
+ const value = attrs[key];
+ if (typeof value === 'string' && value.length > 0) candidateIds.add(value);
+ }
+ return true;
+ });
+
+ for (const blockId of candidateIds) {
+ const target = {
+ kind: 'text' as const,
+ blockId,
+ range: { start: 0, end: 1 },
+ };
+ const resolved = resolveTextTarget(ed, target);
+ if (resolved && resolved.from !== resolved.to) {
+ return { blockId, range: { start: 0, end: 1 } };
+ }
+ }
+
+ throw new Error('Expected at least one resolvable non-collapsed text target.');
+}
+
+describe('insertStructuredWrapper — markdown', () => {
+ it('inserts markdown paragraph content into the document', () => {
+ const result = insertStructuredWrapper(editor, {
+ value: 'Hello from markdown',
+ type: 'markdown',
+ });
+
+ expect(result.success).toBe(true);
+ expect(getDocTextContent(editor)).toContain('Hello from markdown');
+ });
+
+ it('inserts markdown heading as a styled paragraph', () => {
+ const result = insertStructuredWrapper(editor, {
+ value: '# My Heading',
+ type: 'markdown',
+ });
+
+ expect(result.success).toBe(true);
+ expect(getDocTextContent(editor)).toContain('My Heading');
+
+ // Verify heading is represented as a paragraph with Heading1 style
+ let foundHeading = false;
+ editor.state.doc.descendants((node) => {
+ if (node.type.name === 'paragraph' && node.attrs?.paragraphProperties?.styleId === 'Heading1') {
+ foundHeading = true;
+ }
+ return true;
+ });
+ expect(foundHeading).toBe(true);
+ });
+
+ it('inserts markdown with multiple blocks', () => {
+ const result = insertStructuredWrapper(editor, {
+ value: '# Title\n\nFirst paragraph.\n\nSecond paragraph.',
+ type: 'markdown',
+ });
+
+ expect(result.success).toBe(true);
+ expect(getDocTextContent(editor)).toContain('Title');
+ expect(getDocTextContent(editor)).toContain('First paragraph.');
+ expect(getDocTextContent(editor)).toContain('Second paragraph.');
+ });
+
+ it('inserts markdown list content', () => {
+ const result = insertStructuredWrapper(editor, {
+ value: '- Item one\n- Item two\n- Item three',
+ type: 'markdown',
+ });
+
+ expect(result.success).toBe(true);
+ expect(getDocTextContent(editor)).toContain('Item one');
+ expect(getDocTextContent(editor)).toContain('Item two');
+ expect(getDocTextContent(editor)).toContain('Item three');
+ });
+
+ it('returns NO_OP for empty markdown', () => {
+ const result = insertStructuredWrapper(editor, {
+ value: '',
+ type: 'markdown',
+ });
+
+ expect(result.success).toBe(false);
+ expect(result.failure?.code).toBe('NO_OP');
+ });
+
+ it('returns INVALID_TARGET for non-collapsed targets instead of replacing selected text', () => {
+ const seed = insertStructuredWrapper(editor, {
+ value: 'abcdef',
+ type: 'markdown',
+ });
+ expect(seed.success).toBe(true);
+
+ const textBefore = getDocTextContent(editor);
+ const target = findResolvableNonCollapsedTarget(editor);
+
+ const result = insertStructuredWrapper(editor, {
+ value: 'X',
+ type: 'markdown',
+ target: { kind: 'text', ...target },
+ });
+
+ expect(result.success).toBe(false);
+ expect(result.failure?.code).toBe('INVALID_TARGET');
+ expect(getDocTextContent(editor)).toBe(textBefore);
+ });
+});
+
+describe('insertStructuredWrapper — list numbering rollback', () => {
+ it('rolls back numbering allocations when insertContentAt fails after markdown parsing', () => {
+ // This test exercises the actual rollback branch: markdown with list
+ // syntax is parsed (allocating numbering IDs on editor.converter), then
+ // insertContentAt is forced to fail, and we verify the snapshot/restore
+ // reverts numbering state to its pre-insert value.
+ const converter = (editor as any).converter;
+
+ // Capture numbering state before the insert attempt.
+ const numberingBefore = JSON.stringify(converter?.numbering ?? {});
+ const translatedBefore = JSON.stringify(converter?.translatedNumbering ?? {});
+
+ // Shadow both view.dispatch and editor.dispatch with undefined so that
+ // CommandService's #dispatchWithFallback returns false (no dispatch
+ // method available). This causes insertContentAt to return false AFTER
+ // markdown parsing has already allocated numbering IDs on the converter.
+ const view = (editor as any).view;
+ if (view) {
+ Object.defineProperty(view, 'dispatch', { value: undefined, configurable: true });
+ }
+ Object.defineProperty(editor, 'dispatch', { value: undefined, configurable: true });
+
+ try {
+ const result = insertStructuredWrapper(editor, {
+ value: '- List item that allocates numbering',
+ type: 'markdown',
+ });
+
+ expect(result.success).toBe(false);
+ expect(result.failure?.code).toBe('INVALID_TARGET');
+
+ // The markdown parsing allocated numbering IDs, but rollback should
+ // have restored converter state to the pre-insert snapshot.
+ expect(JSON.stringify(converter?.numbering ?? {})).toBe(numberingBefore);
+ expect(JSON.stringify(converter?.translatedNumbering ?? {})).toBe(translatedBefore);
+ } finally {
+ // Remove own-property shadows to restore prototype methods.
+ if (view) delete view.dispatch;
+ delete (editor as any).dispatch;
+ }
+ });
+
+ it('does not roll back numbering on successful list insert', () => {
+ const converter = (editor as any).converter;
+
+ const numberingBefore = JSON.stringify(converter?.numbering ?? {});
+
+ const result = insertStructuredWrapper(editor, {
+ value: '- Successfully inserted list item',
+ type: 'markdown',
+ });
+
+ expect(result.success).toBe(true);
+ // Numbering state should have changed (new list ID allocated).
+ expect(JSON.stringify(converter?.numbering ?? {})).not.toBe(numberingBefore);
+ });
+});
+
+describe('insertStructuredWrapper — html', () => {
+ it('does not throw for HTML insert (gracefully succeeds or returns failure)', () => {
+ // The test editor in vitest (happy-dom) may or may not have DOM support.
+ // The key assertion is that this never throws an unhandled error.
+ expect(() => {
+ const result = insertStructuredWrapper(editor, {
+ value: 'Hello from HTML
',
+ type: 'html',
+ });
+
+ // In a DOM environment it should succeed; in headless it fails gracefully
+ if (result.success) {
+ expect(getDocTextContent(editor)).toContain('Hello from HTML');
+ } else {
+ expect(result.failure).toBeDefined();
+ expect(['UNSUPPORTED_ENVIRONMENT', 'INVALID_TARGET']).toContain(result.failure?.code);
+ }
+ }).not.toThrow();
+ });
+});
+
+describe('insertStructuredWrapper — dry-run', () => {
+ it('does not mutate document on dry-run markdown insert', () => {
+ const textBefore = getDocTextContent(editor);
+
+ const result = insertStructuredWrapper(
+ editor,
+ { value: '# Should Not Appear', type: 'markdown' },
+ { dryRun: true },
+ );
+
+ expect(result.success).toBe(true);
+ expect(getDocTextContent(editor)).toBe(textBefore);
+ });
+
+ it('mirrors runtime failure for empty markdown in dry-run mode', () => {
+ const runtime = insertStructuredWrapper(editor, {
+ value: '',
+ type: 'markdown',
+ });
+ expect(runtime.success).toBe(false);
+ expect(runtime.failure?.code).toBe('NO_OP');
+
+ const dryRun = insertStructuredWrapper(
+ editor,
+ {
+ value: '',
+ type: 'markdown',
+ },
+ { dryRun: true },
+ );
+
+ expect(dryRun.success).toBe(false);
+ expect(dryRun.failure?.code).toBe('NO_OP');
+ });
+
+ it('mirrors runtime environment failure for html in dry-run mode', () => {
+ const opts = (editor as any).options ?? ((editor as any).options = {});
+ const prevDocument = opts.document;
+ const prevMockDocument = opts.mockDocument;
+
+ opts.document = undefined;
+ opts.mockDocument = undefined;
+ vi.stubGlobal('document', undefined as any);
+
+ try {
+ const runtime = insertStructuredWrapper(editor, {
+ value: 'Hello from HTML
',
+ type: 'html',
+ });
+ expect(runtime.success).toBe(false);
+ expect(runtime.failure?.code).toBe('UNSUPPORTED_ENVIRONMENT');
+
+ const dryRun = insertStructuredWrapper(
+ editor,
+ {
+ value: 'Hello from HTML
',
+ type: 'html',
+ },
+ { dryRun: true },
+ );
+
+ expect(dryRun.success).toBe(false);
+ expect(dryRun.failure?.code).toBe('UNSUPPORTED_ENVIRONMENT');
+ } finally {
+ vi.unstubAllGlobals();
+ opts.document = prevDocument;
+ opts.mockDocument = prevMockDocument;
+ }
+ });
+});
diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts
index 7c16bd47cf..989684657f 100644
--- a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts
+++ b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts
@@ -10,6 +10,7 @@ import { v4 as uuidv4 } from 'uuid';
import type {
MutationOptions,
MutationStep,
+ InsertInput,
TextAddress,
TextMutationReceipt,
TextMutationResolution,
@@ -29,6 +30,8 @@ import { resolveDefaultInsertTarget, resolveTextTarget, type ResolvedTextTarget
import { buildTextMutationResolution, readTextAtResolvedRange } from '../helpers/text-mutation-resolution.js';
import { ensureTrackedCapability, requireSchemaMark } from '../helpers/mutation-helpers.js';
import { TrackFormatMarkName } from '../../extensions/track-changes/constants.js';
+import { markdownToPmFragment } from '../../core/helpers/markdown/markdownToPmContent.js';
+import { processContent } from '../../core/helpers/contentProcessor.js';
// ---------------------------------------------------------------------------
// Locator normalization (same validation as the old adapters)
@@ -447,3 +450,223 @@ export function styleApplyWrapper(
return mapPlanReceiptToTextReceipt(receipt, resolution);
}
+
+// ---------------------------------------------------------------------------
+// Structured content insertion (markdown / html)
+// ---------------------------------------------------------------------------
+
+/**
+ * Insert structured content (markdown or html) at a target position.
+ *
+ * Routes through `executeDomainCommand` to enforce the revision guard.
+ * Conversion (markdown → AST → PM, or html → processContent → PM) happens
+ * inside the handler, so list-definition side effects only occur after the
+ * revision check passes. HTML content goes through the canonical
+ * `processContent` pipeline, matching the `insertContent` command path.
+ *
+ * Tracked mode is explicitly rejected for structured content in this implementation.
+ */
+export function insertStructuredWrapper(
+ editor: Editor,
+ input: InsertInput,
+ options?: MutationOptions,
+): TextMutationReceipt {
+ const contentType = input.type ?? 'text';
+ const { value, target } = input;
+
+ // Tracked mode not supported for structured content
+ const mode = options?.changeMode ?? 'direct';
+ if (mode === 'tracked') {
+ throw new DocumentApiAdapterError(
+ 'CAPABILITY_UNAVAILABLE',
+ `Tracked mode is not supported for type: '${contentType}' insert operations.`,
+ );
+ }
+
+ // Resolve target position
+ let resolvedRange: ResolvedTextTarget;
+ let effectiveTarget: TextAddress;
+
+ if (target) {
+ const range = resolveTextTarget(editor, target);
+ if (!range) {
+ throw new DocumentApiAdapterError('TARGET_NOT_FOUND', 'Structured insert target could not be resolved.', {
+ target,
+ });
+ }
+ resolvedRange = range;
+ effectiveTarget = target;
+ } else {
+ const fallback = resolveDefaultInsertTarget(editor);
+ if (!fallback) {
+ throw new DocumentApiAdapterError('TARGET_NOT_FOUND', 'No default insertion point available.');
+ }
+ resolvedRange = fallback.range;
+ effectiveTarget = fallback.target;
+ }
+
+ const resolution = buildTextMutationResolution({
+ requestedTarget: target,
+ target: effectiveTarget,
+ range: resolvedRange,
+ text: readTextAtResolvedRange(editor, resolvedRange),
+ });
+
+ const { from, to } = resolvedRange;
+
+ // Insert semantics are point-only for doc.insert, regardless of content type.
+ if (from !== to) {
+ return {
+ success: false,
+ resolution,
+ failure: { code: 'INVALID_TARGET', message: 'Insert operations require a collapsed target range.' },
+ };
+ }
+
+ // Dry-run: parse + validate but do not mutate
+ if (options?.dryRun) {
+ if (contentType === 'markdown') {
+ // Parse to validate structure (side-effect-free with dryRun: true)
+ const { fragment } = markdownToPmFragment(value, editor, { dryRun: true });
+ if (fragment.childCount === 0) {
+ return {
+ success: false,
+ resolution,
+ failure: { code: 'NO_OP', message: 'Markdown produced no content to insert.' },
+ };
+ }
+ } else if (contentType === 'html') {
+ // NOTE: processContent has no dryRun flag — this runs the full HTML
+ // pipeline (DOM creation, wrapTextsInRuns) minus the final insertContentAt.
+ // Acceptable for catching UNSUPPORTED_ENVIRONMENT / INVALID_TARGET early.
+ try {
+ const processedDoc = processContent({ content: value, type: 'html', editor });
+ if (!processedDoc || typeof (processedDoc as { toJSON?: unknown }).toJSON !== 'function') {
+ return {
+ success: false,
+ resolution,
+ failure: {
+ code: 'INVALID_TARGET',
+ message: 'HTML processing did not produce a valid document node.',
+ },
+ };
+ }
+ } catch (err) {
+ const message = err instanceof Error ? err.message : String(err);
+ return {
+ success: false,
+ resolution,
+ failure: {
+ code: 'UNSUPPORTED_ENVIRONMENT',
+ message: `HTML structured insert requires a DOM environment. ${message}`,
+ },
+ };
+ }
+ }
+ return { success: true, resolution };
+ }
+
+ // Convert and insert inside executeDomainCommand so the revision guard
+ // runs before any conversion side effects (e.g. list numbering allocation).
+ let insertFailure: ReceiptFailure | undefined;
+
+ // Snapshot numbering state so we can roll back if the insert fails.
+ // List conversion allocates IDs and definitions on editor.converter — these
+ // mutations sit outside the ProseMirror transaction and aren't auto-reverted.
+ const converter = (editor as any).converter;
+ const numberingSnapshot = converter?.numbering ? JSON.parse(JSON.stringify(converter.numbering)) : undefined;
+ const translatedNumberingSnapshot = converter?.translatedNumbering
+ ? JSON.parse(JSON.stringify(converter.translatedNumbering))
+ : undefined;
+
+ const receipt = executeDomainCommand(
+ editor,
+ (): boolean => {
+ if (contentType === 'markdown') {
+ const { fragment } = markdownToPmFragment(value, editor);
+
+ if (fragment.childCount === 0) {
+ insertFailure = { code: 'NO_OP', message: 'Markdown produced no content to insert.' };
+ return false;
+ }
+
+ // Convert Fragment to a JSON array — insertContentAt routes arrays
+ // through Fragment.fromArray(content.map(schema.nodeFromJSON)), which
+ // correctly materializes the nodes. Passing a Fragment directly fails
+ // because createNodeFromContent treats it as a single JSON object.
+ const jsonNodes: Record[] = [];
+ fragment.forEach((node) => jsonNodes.push(node.toJSON()));
+
+ const ok = Boolean(editor.commands.insertContentAt({ from, to }, jsonNodes));
+ if (!ok) {
+ insertFailure = {
+ code: 'INVALID_TARGET',
+ message: 'Structured content could not be inserted at the target position.',
+ };
+ }
+ return ok;
+ } else if (contentType === 'html') {
+ // Route through processContent for the canonical HTML pipeline
+ // (createDocFromHTML + wrapTextsInRuns), matching insertContent command behavior.
+ // processContent requires a DOM; in headless environments this will throw.
+ try {
+ const processedDoc = processContent({ content: value, type: 'html', editor });
+ if (!processedDoc || typeof (processedDoc as { toJSON?: unknown }).toJSON !== 'function') {
+ insertFailure = {
+ code: 'INVALID_TARGET',
+ message: 'HTML processing did not produce a valid document node.',
+ };
+ return false;
+ }
+ const jsonContent = (processedDoc as { toJSON(): Record }).toJSON();
+
+ const ok = Boolean(editor.commands.insertContentAt({ from, to }, jsonContent));
+ if (!ok) {
+ insertFailure = {
+ code: 'INVALID_TARGET',
+ message: 'HTML content could not be inserted at the target position.',
+ };
+ }
+ return ok;
+ } catch (err) {
+ const message = err instanceof Error ? err.message : String(err);
+ insertFailure = {
+ code: 'UNSUPPORTED_ENVIRONMENT',
+ message: `HTML structured insert requires a DOM environment. ${message}`,
+ };
+ return false;
+ }
+ }
+ return false;
+ },
+ { expectedRevision: options?.expectedRevision },
+ );
+
+ const commandSucceeded = receipt.steps[0]?.effect === 'changed';
+
+ // Roll back numbering side effects if the insert failed.
+ // The ProseMirror transaction is only dispatched on success, but list ID
+ // allocations mutate converter state directly and need manual rollback.
+ if (!commandSucceeded && converter) {
+ if (numberingSnapshot !== undefined) converter.numbering = numberingSnapshot;
+ if (translatedNumberingSnapshot !== undefined) converter.translatedNumbering = translatedNumberingSnapshot;
+ }
+
+ // Schedule list migration after successful html/markdown insert,
+ // matching the insertContent command's post-insert hook.
+ if (commandSucceeded) {
+ Promise.resolve()
+ .then(() => (editor as any).migrateListsToV2?.())
+ .catch(() => {});
+ }
+
+ if (!commandSucceeded) {
+ return {
+ success: false,
+ resolution,
+ failure: insertFailure ?? { code: 'INVALID_TARGET', message: 'Structured insert failed.' },
+ };
+ }
+
+ return { success: true, resolution };
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index ba8899b39b..96b0afe633 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -48,6 +48,9 @@ catalogs:
'@types/bun':
specifier: ^1.3.8
version: 1.3.8
+ '@types/mdast':
+ specifier: ^4.0.4
+ version: 4.0.4
'@types/node':
specifier: 22.19.2
version: 22.19.2
@@ -129,9 +132,6 @@ catalogs:
lib0:
specifier: ^0.2.114
version: 0.2.117
- marked:
- specifier: ^16.2.0
- version: 16.4.2
naive-ui:
specifier: ^2.43.1
version: 2.43.2
@@ -213,6 +213,9 @@ catalogs:
remark-gfm:
specifier: ^4.0.1
version: 4.0.1
+ remark-parse:
+ specifier: ^11.0.0
+ version: 11.0.0
remark-stringify:
specifier: ^11.0.0
version: 11.0.0
@@ -1039,9 +1042,6 @@ importers:
lodash:
specifier: ^4.17.21
version: 4.17.23
- marked:
- specifier: 'catalog:'
- version: 16.4.2
naive-ui:
specifier: ^2.38.2
version: 2.43.2(vue@3.5.25(typescript@5.9.3))
@@ -1093,6 +1093,9 @@ importers:
remark-gfm:
specifier: 'catalog:'
version: 4.0.1
+ remark-parse:
+ specifier: 'catalog:'
+ version: 11.0.0
remark-stringify:
specifier: 'catalog:'
version: 11.0.0
@@ -1151,6 +1154,9 @@ importers:
'@superdoc/word-layout':
specifier: workspace:*
version: link:../word-layout
+ '@types/mdast':
+ specifier: 'catalog:'
+ version: 4.0.4
'@vitejs/plugin-vue':
specifier: 'catalog:'
version: 6.0.2(rolldown-vite@7.3.1(@types/node@22.19.8)(esbuild@0.27.2)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))(vue@3.5.25(typescript@5.9.3))
@@ -7825,11 +7831,6 @@ packages:
engines: {node: '>= 18'}
hasBin: true
- marked@16.4.2:
- resolution: {integrity: sha512-TI3V8YYWvkVf3KJe1dRkpnjs68JUPyEa5vjKrp1XEEJUAOaQc+Qj+L1qWbPd0SJuAdQkFU0h73sXXqwDYxsiDA==}
- engines: {node: '>= 20'}
- hasBin: true
-
math-intrinsics@1.1.0:
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
engines: {node: '>= 0.4'}
@@ -19399,8 +19400,6 @@ snapshots:
marked@15.0.12: {}
- marked@16.4.2: {}
-
math-intrinsics@1.1.0: {}
md5.js@1.3.5:
diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml
index 0d5d3fe214..f2793f6785 100644
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -27,6 +27,7 @@ catalog:
'@testing-library/react': ^16.3.0
'@testing-library/user-event': ^14.6.1
'@types/bun': ^1.3.8
+ '@types/mdast': ^4.0.4
'@types/node': 22.19.2
'@types/react': ^19.2.6
'@types/react-dom': ^19.2.3
@@ -89,6 +90,7 @@ catalog:
rehype-parse: ^9.0.1
rehype-remark: ^10.0.1
remark-gfm: ^4.0.1
+ remark-parse: ^11.0.0
remark-stringify: ^11.0.0
rollup-plugin-copy: ^3.5.0
rollup-plugin-visualizer: ^5.12.0
diff --git a/tests/behavior/helpers/document-api.ts b/tests/behavior/helpers/document-api.ts
index 9aca57c9c2..036923e674 100644
--- a/tests/behavior/helpers/document-api.ts
+++ b/tests/behavior/helpers/document-api.ts
@@ -173,7 +173,7 @@ export async function listComments(
export async function insertText(
page: Page,
- input: { text: string; target?: TextAddress },
+ input: { value: string; target?: TextAddress; type?: 'text' | 'markdown' | 'html' },
options: { changeMode?: ChangeMode; dryRun?: boolean } = {},
): Promise {
return page.evaluate(({ payload, opts }) => (window as any).editor.doc.insert(payload, opts), {
diff --git a/tests/behavior/tests/comments/programmatic-tracked-change.spec.ts b/tests/behavior/tests/comments/programmatic-tracked-change.spec.ts
index 36f80b8383..4471b75f06 100644
--- a/tests/behavior/tests/comments/programmatic-tracked-change.spec.ts
+++ b/tests/behavior/tests/comments/programmatic-tracked-change.spec.ts
@@ -96,7 +96,7 @@ test('direct insert via document-api', async ({ superdoc }) => {
},
};
- const receipt = await insertText(superdoc.page, { text: 'Beautiful ', target: insertionTarget });
+ const receipt = await insertText(superdoc.page, { value: 'Beautiful ', target: insertionTarget });
assertMutationSucceeded('insertText', receipt);
await superdoc.waitForStable();
From f0f445ce25b212e125d732b41e201f907876f30c Mon Sep 17 00:00:00 2001
From: Nick Bernal
Date: Wed, 25 Feb 2026 21:35:17 -0800
Subject: [PATCH 2/5] fix(document-api): dry-run numbering mutation, insert
contract, and text override whitespace
---
apps/cli/src/__tests__/cli.test.ts | 15 +++++++++
apps/cli/src/commands/open.ts | 12 ++-----
apps/cli/src/lib/document.ts | 32 +++++++++++++++++--
.../reference/_generated-manifest.json | 2 +-
apps/docs/document-api/reference/insert.mdx | 7 ++--
.../src/contract/contract.test.ts | 18 +++++++++++
.../src/contract/operation-definitions.ts | 2 +-
.../insert-structured-wrapper.test.ts | 21 ++++++++++++
.../plan-engine/plan-wrappers.ts | 16 +++++++++-
9 files changed, 108 insertions(+), 17 deletions(-)
diff --git a/apps/cli/src/__tests__/cli.test.ts b/apps/cli/src/__tests__/cli.test.ts
index 1685a0608e..b04f77b4c7 100644
--- a/apps/cli/src/__tests__/cli.test.ts
+++ b/apps/cli/src/__tests__/cli.test.ts
@@ -2054,6 +2054,21 @@ describe('superdoc CLI', () => {
expect(closeResult.code).toBe(0);
});
+ test('open with --override-type text preserves leading whitespace literally', async () => {
+ const literalText = ' foo';
+
+ const openResult = await runCli(['open', SAMPLE_DOC, '--content-override', literalText, '--override-type', 'text']);
+ expect(openResult.code).toBe(0);
+
+ const findResult = await runCli(['find', '--type', 'text', '--pattern', literalText]);
+ expect(findResult.code).toBe(0);
+ const findEnvelope = parseJsonOutput>(findResult);
+ expect(findEnvelope.data.result.total).toBeGreaterThan(0);
+
+ const closeResult = await runCli(['close', '--discard']);
+ expect(closeResult.code).toBe(0);
+ });
+
test('open with --override-type markdown applies content semantically', async () => {
const openResult = await runCli([
'open',
diff --git a/apps/cli/src/commands/open.ts b/apps/cli/src/commands/open.ts
index f3df19e3de..3b5d8ecc2f 100644
--- a/apps/cli/src/commands/open.ts
+++ b/apps/cli/src/commands/open.ts
@@ -18,11 +18,6 @@ import type { CommandContext, CommandExecution } from '../lib/types';
const VALID_OVERRIDE_TYPES = new Set(['markdown', 'html', 'text']);
-/** Escape CommonMark special characters so the text is treated as literal. */
-function escapeMarkdown(str: string): string {
- return str.replace(/([\\`*_{}[\]()#+\-.!|>~])/g, '\\$1');
-}
-
export async function runOpen(tokens: string[], context: CommandContext): Promise {
const { parsed, help } = parseOperationArgs('doc.open', tokens, {
commandName: 'open',
@@ -111,10 +106,9 @@ export async function runOpen(tokens: string[], context: CommandContext): Promis
} else if (overrideType === 'html') {
editorOpenOptions.html = contentOverride;
} else if (overrideType === 'text') {
- // Route through the markdown pipeline which is DOM-free (AST-based),
- // so it works in headless CLI mode. Escape markdown syntax characters
- // so the content is treated as literal text, not interpreted as formatting.
- editorOpenOptions.markdown = escapeMarkdown(contentOverride);
+ // Plain text bypass — handed off to document.ts which builds PM
+ // paragraphs directly, preserving all whitespace without markdown parsing.
+ editorOpenOptions.plainText = contentOverride;
}
}
diff --git a/apps/cli/src/lib/document.ts b/apps/cli/src/lib/document.ts
index e92bbcd41a..707587a1a1 100644
--- a/apps/cli/src/lib/document.ts
+++ b/apps/cli/src/lib/document.ts
@@ -28,7 +28,7 @@ interface OpenDocumentOptions {
documentId?: string;
ydoc?: unknown;
collaborationProvider?: unknown;
- /** Options passed through to Editor.open() (e.g., markdown/html for content override). */
+ /** Options passed through to Editor.open() (e.g., markdown/html/plainText for content override). */
editorOpenOptions?: Record;
}
@@ -107,7 +107,12 @@ export async function openDocument(
// there is no DOM, so we intercept them here:
// - markdown: applied post-init via the AST-based markdownToPmDoc pipeline (DOM-free)
// - html: rejected with a clear error (no DOM-free HTML pipeline exists)
- const { markdown: markdownOverride, html: htmlOverride, ...passThroughEditorOpts } = options.editorOpenOptions ?? {};
+ const {
+ markdown: markdownOverride,
+ html: htmlOverride,
+ plainText: plainTextOverride,
+ ...passThroughEditorOpts
+ } = options.editorOpenOptions ?? {};
if (htmlOverride != null) {
throw new CliError(
@@ -135,7 +140,9 @@ export async function openDocument(
});
}
- // Apply markdown content override post-init (DOM-free AST pipeline).
+ // Apply content override post-init.
+ // - markdown: DOM-free AST pipeline
+ // - plainText: builds PM paragraphs directly, preserving all whitespace
if (markdownOverride != null) {
try {
const { doc: newDoc } = markdownToPmDoc(markdownOverride, editor);
@@ -151,6 +158,25 @@ export async function openDocument(
source: meta,
});
}
+ } else if (plainTextOverride != null) {
+ try {
+ const schema = editor.state.schema;
+ const lines = plainTextOverride.split('\n');
+ const paragraphs = lines.map((line) => {
+ const content = line.length > 0 ? [schema.text(line)] : undefined;
+ return schema.nodes.paragraph.create(null, content);
+ });
+ const tr = editor.state.tr;
+ tr.replaceWith(0, editor.state.doc.content.size, paragraphs);
+ editor.dispatch(tr);
+ } catch (error) {
+ editor.destroy();
+ const message = error instanceof Error ? error.message : String(error);
+ throw new CliError('DOCUMENT_OPEN_FAILED', 'Failed to apply text content override.', {
+ message,
+ source: meta,
+ });
+ }
}
const adapters = getDocumentApiAdapters(editor);
diff --git a/apps/docs/document-api/reference/_generated-manifest.json b/apps/docs/document-api/reference/_generated-manifest.json
index 28a7e3fb77..a8a7c128b7 100644
--- a/apps/docs/document-api/reference/_generated-manifest.json
+++ b/apps/docs/document-api/reference/_generated-manifest.json
@@ -229,5 +229,5 @@
}
],
"marker": "{/* GENERATED FILE: DO NOT EDIT. Regenerate via `pnpm run docapi:sync`. */}",
- "sourceHash": "876ac06afd1496519bea3238baa8738286b264af5f1714b259b41a528d8043ff"
+ "sourceHash": "722ce545fc7c5373e23246fa7bbbc68b381e30bd8e2bc6c21d1616e6c5395ea9"
}
diff --git a/apps/docs/document-api/reference/insert.mdx b/apps/docs/document-api/reference/insert.mdx
index 44cf8d9ff5..e70e395eb9 100644
--- a/apps/docs/document-api/reference/insert.mdx
+++ b/apps/docs/document-api/reference/insert.mdx
@@ -103,6 +103,7 @@ _No fields._
- `INVALID_TARGET`
- `NO_OP`
- `CAPABILITY_UNAVAILABLE`
+- `UNSUPPORTED_ENVIRONMENT`
## Raw schemas
@@ -151,7 +152,8 @@ _No fields._
"enum": [
"INVALID_TARGET",
"NO_OP",
- "CAPABILITY_UNAVAILABLE"
+ "CAPABILITY_UNAVAILABLE",
+ "UNSUPPORTED_ENVIRONMENT"
]
},
"details": {},
@@ -204,7 +206,8 @@ _No fields._
"enum": [
"INVALID_TARGET",
"NO_OP",
- "CAPABILITY_UNAVAILABLE"
+ "CAPABILITY_UNAVAILABLE",
+ "UNSUPPORTED_ENVIRONMENT"
]
},
"details": {},
diff --git a/packages/document-api/src/contract/contract.test.ts b/packages/document-api/src/contract/contract.test.ts
index b96444d8c3..61f02bb9c3 100644
--- a/packages/document-api/src/contract/contract.test.ts
+++ b/packages/document-api/src/contract/contract.test.ts
@@ -91,6 +91,24 @@ describe('document-api contract catalog', () => {
expect(insertInputSchema.additionalProperties).toBe(false);
});
+ it('declares UNSUPPORTED_ENVIRONMENT for insert metadata and generated failure schema', () => {
+ const schemas = buildInternalContractSchemas();
+ const insertFailureSchema = schemas.operations.insert.failure as {
+ properties?: {
+ failure?: {
+ properties?: {
+ code?: {
+ enum?: string[];
+ };
+ };
+ };
+ };
+ };
+
+ expect(COMMAND_CATALOG.insert.possibleFailureCodes).toContain('UNSUPPORTED_ENVIRONMENT');
+ expect(insertFailureSchema.properties?.failure?.properties?.code?.enum).toContain('UNSUPPORTED_ENVIRONMENT');
+ });
+
it('derives OPERATION_IDS from OPERATION_DEFINITIONS keys', () => {
const definitionKeys = Object.keys(OPERATION_DEFINITIONS).sort();
const operationIds = [...OPERATION_IDS].sort();
diff --git a/packages/document-api/src/contract/operation-definitions.ts b/packages/document-api/src/contract/operation-definitions.ts
index 649e8019e2..b545323458 100644
--- a/packages/document-api/src/contract/operation-definitions.ts
+++ b/packages/document-api/src/contract/operation-definitions.ts
@@ -208,7 +208,7 @@ export const OPERATION_DEFINITIONS = {
idempotency: 'non-idempotent',
supportsDryRun: true,
supportsTrackedMode: true,
- possibleFailureCodes: ['INVALID_TARGET', 'NO_OP', 'CAPABILITY_UNAVAILABLE'],
+ possibleFailureCodes: ['INVALID_TARGET', 'NO_OP', 'CAPABILITY_UNAVAILABLE', 'UNSUPPORTED_ENVIRONMENT'],
throws: [...T_NOT_FOUND_CAPABLE, 'INVALID_TARGET'],
}),
referenceDocPath: 'insert.mdx',
diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts
index 27aefb767e..7d3d2222f4 100644
--- a/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts
+++ b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts
@@ -267,6 +267,27 @@ describe('insertStructuredWrapper — dry-run', () => {
expect(dryRun.failure?.code).toBe('NO_OP');
});
+ it('does not mutate numbering state on dry-run html list insert', () => {
+ const converter = (editor as any).converter;
+ expect(converter).toBeDefined();
+
+ const numberingBefore = JSON.stringify(converter?.numbering ?? {});
+ const translatedBefore = JSON.stringify(converter?.translatedNumbering ?? {});
+
+ const dryRun = insertStructuredWrapper(
+ editor,
+ {
+ value: '- Dry run list item
',
+ type: 'html',
+ },
+ { dryRun: true },
+ );
+
+ expect(dryRun.success).toBe(true);
+ expect(JSON.stringify(converter?.numbering ?? {})).toBe(numberingBefore);
+ expect(JSON.stringify(converter?.translatedNumbering ?? {})).toBe(translatedBefore);
+ });
+
it('mirrors runtime environment failure for html in dry-run mode', () => {
const opts = (editor as any).options ?? ((editor as any).options = {});
const prevDocument = opts.document;
diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts
index 989684657f..2b07ad565f 100644
--- a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts
+++ b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts
@@ -538,7 +538,13 @@ export function insertStructuredWrapper(
} else if (contentType === 'html') {
// NOTE: processContent has no dryRun flag — this runs the full HTML
// pipeline (DOM creation, wrapTextsInRuns) minus the final insertContentAt.
- // Acceptable for catching UNSUPPORTED_ENVIRONMENT / INVALID_TARGET early.
+ // Snapshot numbering state so we can roll back after the dry-run, since
+ // HTML list parsing allocates IDs/definitions on editor.converter.
+ const converter = (editor as any).converter;
+ const numberingSnapshot = converter?.numbering ? JSON.parse(JSON.stringify(converter.numbering)) : undefined;
+ const translatedNumberingSnapshot = converter?.translatedNumbering
+ ? JSON.parse(JSON.stringify(converter.translatedNumbering))
+ : undefined;
try {
const processedDoc = processContent({ content: value, type: 'html', editor });
if (!processedDoc || typeof (processedDoc as { toJSON?: unknown }).toJSON !== 'function') {
@@ -561,6 +567,14 @@ export function insertStructuredWrapper(
message: `HTML structured insert requires a DOM environment. ${message}`,
},
};
+ } finally {
+ // Roll back numbering mutations from the dry-run HTML pipeline.
+ if (converter && numberingSnapshot !== undefined) {
+ converter.numbering = numberingSnapshot;
+ }
+ if (converter && translatedNumberingSnapshot !== undefined) {
+ converter.translatedNumbering = translatedNumberingSnapshot;
+ }
}
}
return { success: true, resolution };
From 96a0b13b1484f2803b5341bf22eae1b27608dd78 Mon Sep 17 00:00:00 2001
From: Nick Bernal
Date: Wed, 25 Feb 2026 21:55:28 -0800
Subject: [PATCH 3/5] fix(markdown): preserve multi-paragraph list items as
single entries
---
.../importMarkdown.integration.test.js | 68 +++++++++++++++++++
.../helpers/markdown/mdastToProseMirror.ts | 9 ++-
2 files changed, 76 insertions(+), 1 deletion(-)
diff --git a/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js b/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js
index 0c6abfdc95..54f7f0ce3c 100644
--- a/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js
+++ b/packages/super-editor/src/core/helpers/importMarkdown.integration.test.js
@@ -33,6 +33,24 @@ function collectNodeTypes(doc) {
return types;
}
+function collectTopLevelParagraphs(doc) {
+ const paragraphs = [];
+ doc.forEach((node) => {
+ if (node.type.name === 'paragraph') {
+ paragraphs.push(node);
+ }
+ });
+ return paragraphs;
+}
+
+function hasNumbering(node) {
+ return Boolean(node.attrs?.paragraphProperties?.numberingProperties);
+}
+
+function paragraphByText(paragraphs, expectedText) {
+ return paragraphs.find((node) => node.textContent.trim() === expectedText);
+}
+
describe('markdown to DOCX integration', () => {
it('converts complete markdown document with headings and lists', () => {
const markdown = `# Main Title
@@ -58,4 +76,54 @@ More text here.
expect(types).toContain('paragraph');
expect(types).toContain('run');
});
+
+ it('keeps a multi-paragraph bullet item as one logical list entry', () => {
+ const markdown = `- first paragraph
+
+ continuation paragraph
+- second bullet`;
+
+ const doc = createDocFromMarkdown(markdown, editor);
+ const paragraphs = collectTopLevelParagraphs(doc);
+
+ const first = paragraphByText(paragraphs, 'first paragraph');
+ const continuation = paragraphByText(paragraphs, 'continuation paragraph');
+ const second = paragraphByText(paragraphs, 'second bullet');
+
+ expect(first).toBeTruthy();
+ expect(continuation).toBeTruthy();
+ expect(second).toBeTruthy();
+
+ expect(hasNumbering(first)).toBe(true);
+ expect(hasNumbering(continuation)).toBe(false);
+ expect(hasNumbering(second)).toBe(true);
+
+ const numberedParagraphs = paragraphs.filter(hasNumbering);
+ expect(numberedParagraphs).toHaveLength(2);
+ });
+
+ it('keeps a multi-paragraph ordered item as one numbered entry', () => {
+ const markdown = `1. first numbered paragraph
+
+ continuation paragraph
+2. second numbered item`;
+
+ const doc = createDocFromMarkdown(markdown, editor);
+ const paragraphs = collectTopLevelParagraphs(doc);
+
+ const first = paragraphByText(paragraphs, 'first numbered paragraph');
+ const continuation = paragraphByText(paragraphs, 'continuation paragraph');
+ const second = paragraphByText(paragraphs, 'second numbered item');
+
+ expect(first).toBeTruthy();
+ expect(continuation).toBeTruthy();
+ expect(second).toBeTruthy();
+
+ expect(hasNumbering(first)).toBe(true);
+ expect(hasNumbering(continuation)).toBe(false);
+ expect(hasNumbering(second)).toBe(true);
+
+ const numberedParagraphs = paragraphs.filter(hasNumbering);
+ expect(numberedParagraphs).toHaveLength(2);
+ });
});
diff --git a/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts b/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts
index 408d18f0cd..1883c3624c 100644
--- a/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts
+++ b/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts
@@ -171,11 +171,18 @@ function convertListItem(
listType: string,
): JsonNode[] {
const blocks: JsonNode[] = [];
+ let firstParagraphEmitted = false;
for (const child of item.children) {
if (child.type === 'paragraph') {
const runs = convertInlineChildren((child as MdastParagraph).children, ctx, []);
- blocks.push(makeListParagraph(runs, numId, depth));
+ if (!firstParagraphEmitted) {
+ blocks.push(makeListParagraph(runs, numId, depth));
+ firstParagraphEmitted = true;
+ } else {
+ // Continuation paragraph within the same list item — no list marker
+ blocks.push(makeParagraph(runs));
+ }
} else if (child.type === 'list') {
// Nested list — increase depth, reuse same listType context
blocks.push(...convertList(child as MdastList, ctx, depth + 1));
From bb66f5258bc613dcad00abc3a13f933a487fcd44 Mon Sep 17 00:00:00 2001
From: Nick Bernal
Date: Thu, 26 Feb 2026 15:01:05 -0800
Subject: [PATCH 4/5] feat(markdown): normalize fixed-width ASCII tables to GFM
pipe tables
---
.../src/core/helpers/markdown/index.ts | 1 +
.../helpers/markdown/markdownToPmContent.ts | 4 +-
...malizeFixedWidthTables.integration.test.ts | 227 ++++++++
.../normalizeFixedWidthTables.test.ts | 532 ++++++++++++++++++
.../markdown/normalizeFixedWidthTables.ts | 463 +++++++++++++++
.../src/core/helpers/markdown/types.ts | 6 +
.../tests/formatting/inline-formatting.ts | 2 +-
.../markdown/markdown-override-roundtrip.ts | 288 ++++++++++
.../markdown/multi-page-nda-test-document.md | 220 ++++++++
.../tests/styles/doc-defaults.ts | 4 +-
.../tests/tables/all-commands.ts | 8 +-
11 files changed, 1747 insertions(+), 8 deletions(-)
create mode 100644 packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts
create mode 100644 packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts
create mode 100644 packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts
create mode 100644 tests/doc-api-stories/tests/markdown/markdown-override-roundtrip.ts
create mode 100644 tests/doc-api-stories/tests/markdown/multi-page-nda-test-document.md
diff --git a/packages/super-editor/src/core/helpers/markdown/index.ts b/packages/super-editor/src/core/helpers/markdown/index.ts
index 922c58f581..e2caaf1a3f 100644
--- a/packages/super-editor/src/core/helpers/markdown/index.ts
+++ b/packages/super-editor/src/core/helpers/markdown/index.ts
@@ -9,6 +9,7 @@
export { markdownToPmDoc, markdownToPmFragment } from './markdownToPmContent.js';
export { parseMarkdownToAst } from './parseMarkdownAst.js';
+export { normalizeFixedWidthTables } from './normalizeFixedWidthTables.js';
export type {
MarkdownConversionOptions,
MarkdownConversionResult,
diff --git a/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts b/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts
index 13939051e4..f2cbcddee1 100644
--- a/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts
+++ b/packages/super-editor/src/core/helpers/markdown/markdownToPmContent.ts
@@ -15,6 +15,7 @@ import type { Node as PmNode } from 'prosemirror-model';
import type { Editor } from '../../Editor.js';
import { parseMarkdownToAst } from './parseMarkdownAst.js';
import { convertMdastToBlocks } from './mdastToProseMirror.js';
+import { normalizeFixedWidthTables } from './normalizeFixedWidthTables.js';
import { wrapTextsInRuns } from '../../inputRules/docx-paste/docx-paste.js';
import type {
MarkdownConversionOptions,
@@ -88,7 +89,8 @@ function parseAndConvert(
editor: Editor,
options: MarkdownConversionOptions,
): { blocks: ReturnType; diagnostics: MdastConversionContext['diagnostics'] } {
- const ast = parseMarkdownToAst(markdown);
+ const source = options.normalizeFixedWidthTables === false ? markdown : normalizeFixedWidthTables(markdown);
+ const ast = parseMarkdownToAst(source);
const ctx: MdastConversionContext = {
editor,
diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts
new file mode 100644
index 0000000000..d4b978621b
--- /dev/null
+++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts
@@ -0,0 +1,227 @@
+/**
+ * Integration test: normalizeFixedWidthTables → remark-gfm AST parsing.
+ *
+ * Verifies that the normalizer's GFM output is correctly parsed into mdast
+ * table nodes by the same remark pipeline used in production.
+ */
+import { describe, expect, it } from 'vitest';
+import { normalizeFixedWidthTables } from './normalizeFixedWidthTables.js';
+import { parseMarkdownToAst } from './parseMarkdownAst.js';
+import type { Root, Table, TableRow, TableCell } from 'mdast';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function findTables(tree: Root): Table[] {
+ const tables: Table[] = [];
+ function walk(node: any) {
+ if (node.type === 'table') tables.push(node);
+ if (node.children) node.children.forEach(walk);
+ }
+ walk(tree);
+ return tables;
+}
+
+function tableDimensions(table: Table): { rows: number; cols: number } {
+ const rows = table.children.length;
+ const cols = rows > 0 ? table.children[0].children.length : 0;
+ return { rows, cols };
+}
+
+function cellText(cell: TableCell): string {
+ return cell.children
+ .map((c: any) => {
+ if (c.type === 'text') return c.value;
+ if (c.children) return c.children.map((cc: any) => cc.value ?? '').join('');
+ return '';
+ })
+ .join('');
+}
+
+function rowTexts(row: TableRow): string[] {
+ return row.children.map(cellText);
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('normalizer → remark-gfm AST integration', () => {
+ it('produces zero tables from raw ASCII input (baseline)', () => {
+ const raw = [' Clause Description', ' ---------- -----------', ' Term Protection'].join('\n');
+
+ const ast = parseMarkdownToAst(raw);
+ expect(findTables(ast)).toHaveLength(0);
+ });
+
+ it('produces a valid mdast table after normalization', () => {
+ const raw = [' Clause Description', ' ---------- -----------', ' Term Protection'].join('\n');
+
+ const normalized = normalizeFixedWidthTables(raw);
+ const ast = parseMarkdownToAst(normalized);
+ const tables = findTables(ast);
+
+ expect(tables).toHaveLength(1);
+ expect(tableDimensions(tables[0])).toEqual({ rows: 2, cols: 2 }); // header + 1 data
+ expect(rowTexts(tables[0].children[0])).toEqual(['Clause', 'Description']);
+ expect(rowTexts(tables[0].children[1])).toEqual(['Term', 'Protection']);
+ });
+
+ it('Section 5 table (no borders): 3 columns, 3 data rows', () => {
+ const raw = [
+ ' Clause Description Duration',
+ ' ---------------------- --------------------------------- -----------',
+ ' Confidentiality Term Protection of confidential info 5 years',
+ ' Evaluation Period Business evaluation timeline 12 months',
+ ' Survival Clause Survives termination Yes',
+ ].join('\n');
+
+ const normalized = normalizeFixedWidthTables(raw);
+ const ast = parseMarkdownToAst(normalized);
+ const tables = findTables(ast);
+
+ expect(tables).toHaveLength(1);
+ expect(tableDimensions(tables[0])).toEqual({ rows: 4, cols: 3 }); // header + 3 data
+ expect(rowTexts(tables[0].children[0])).toEqual(['Clause', 'Description', 'Duration']);
+ expect(rowTexts(tables[0].children[3])).toEqual(['Survival Clause', 'Survives termination', 'Yes']);
+ });
+
+ it('Appendix A table (bordered, continuations): 4 columns, 4 data rows', () => {
+ const raw = [
+ ' -------------------------------------------------------------------------',
+ ' Classification Description Example Required Controls',
+ ' ------------------- --------------- ------------ ------------------------',
+ ' Public No restrictions Press None',
+ ' release ',
+ '',
+ ' Internal Limited Internal Access controls',
+ ' distribution memo ',
+ '',
+ ' Confidential Sensitive Financial Encryption + MFA',
+ ' business data reports ',
+ '',
+ ' Restricted Highly Source code Strict access + logging',
+ ' sensitive ',
+ ' -------------------------------------------------------------------------',
+ ].join('\n');
+
+ const normalized = normalizeFixedWidthTables(raw);
+ const ast = parseMarkdownToAst(normalized);
+ const tables = findTables(ast);
+
+ expect(tables).toHaveLength(1);
+ expect(tableDimensions(tables[0])).toEqual({ rows: 5, cols: 4 }); // header + 4 data
+
+ // Verify continuation lines merged correctly
+ expect(rowTexts(tables[0].children[1])).toEqual(['Public', 'No restrictions', 'Press release', 'None']);
+ expect(rowTexts(tables[0].children[4])).toEqual([
+ 'Restricted',
+ 'Highly sensitive',
+ 'Source code',
+ 'Strict access + logging',
+ ]);
+ });
+
+ it('Signatures table (no borders, form fields): 2 columns, 4 data rows', () => {
+ const raw = [
+ ' Disclosing Party Receiving Party',
+ ' ----------------------------- -----------------------------',
+ ' Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_',
+ ' Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_',
+ ' Signature: \\_\\_\\_\\_\\_\\_ Signature: \\_\\_\\_\\_\\_\\_',
+ ' Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_',
+ ].join('\n');
+
+ const normalized = normalizeFixedWidthTables(raw);
+ const ast = parseMarkdownToAst(normalized);
+ const tables = findTables(ast);
+
+ expect(tables).toHaveLength(1);
+ expect(tableDimensions(tables[0])).toEqual({ rows: 5, cols: 2 }); // header + 4 data
+ expect(rowTexts(tables[0].children[0])).toEqual(['Disclosing Party', 'Receiving Party']);
+ });
+
+ it('does not produce table from indent-mismatched lines', () => {
+ const raw = ['Header1 Header2', ' -------- --------', ' Data1 Data2'].join('\n');
+
+ const normalized = normalizeFixedWidthTables(raw);
+ const ast = parseMarkdownToAst(normalized);
+ expect(findTables(ast)).toHaveLength(0);
+ });
+
+ it('preserves prose when false bottom border is rejected', () => {
+ const raw = [
+ ' -------------------------------------------------------------------------',
+ ' Classification Description Example',
+ ' ------------------- --------------- ------------',
+ ' Public No restrictions Press',
+ '',
+ 'This should not be a table row.',
+ '',
+ '---',
+ ].join('\n');
+
+ const normalized = normalizeFixedWidthTables(raw);
+ const ast = parseMarkdownToAst(normalized);
+ const tables = findTables(ast);
+
+ // Should produce exactly 1 table (the real one), not swallow the prose.
+ expect(tables).toHaveLength(1);
+ expect(tableDimensions(tables[0])).toEqual({ rows: 2, cols: 3 }); // header + 1 data row
+ // The prose and thematic break should remain as non-table content.
+ expect(normalized).toContain('This should not be a table row.');
+ });
+
+ it('full NDA fixture produces 3 mdast tables', () => {
+ const raw = [
+ '## Term and Termination',
+ '',
+ ' Clause Description Duration',
+ ' ---------------------- --------------------------------- -----------',
+ ' Confidentiality Term Protection of confidential info 5 years',
+ ' Evaluation Period Business evaluation timeline 12 months',
+ ' Survival Clause Survives termination Yes',
+ '',
+ '---',
+ '',
+ '## Appendix A',
+ '',
+ ' -------------------------------------------------------------------------',
+ ' Classification Description Example Required Controls',
+ ' ------------------- --------------- ------------ ------------------------',
+ ' Public No restrictions Press None',
+ ' release ',
+ '',
+ ' Internal Limited Internal Access controls',
+ ' distribution memo ',
+ '',
+ ' Confidential Sensitive Financial Encryption + MFA',
+ ' business data reports ',
+ '',
+ ' Restricted Highly Source code Strict access + logging',
+ ' sensitive ',
+ ' -------------------------------------------------------------------------',
+ '',
+ '---',
+ '',
+ '## Signatures',
+ '',
+ ' Disclosing Party Receiving Party',
+ ' ----------------------------- -----------------------------',
+ ' Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_',
+ ' Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_',
+ ' Signature: \\_\\_\\_\\_\\_\\_ Signature: \\_\\_\\_\\_\\_\\_',
+ ' Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_',
+ ].join('\n');
+
+ const normalized = normalizeFixedWidthTables(raw);
+ const ast = parseMarkdownToAst(normalized);
+ const tables = findTables(ast);
+
+ expect(tables).toHaveLength(3);
+ expect(tableDimensions(tables[0])).toEqual({ rows: 4, cols: 3 }); // Section 5
+ expect(tableDimensions(tables[1])).toEqual({ rows: 5, cols: 4 }); // Appendix A
+ expect(tableDimensions(tables[2])).toEqual({ rows: 5, cols: 2 }); // Signatures
+ });
+});
diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts
new file mode 100644
index 0000000000..50318ae4c5
--- /dev/null
+++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts
@@ -0,0 +1,532 @@
+import { describe, expect, it } from 'vitest';
+import { normalizeFixedWidthTables } from './normalizeFixedWidthTables.js';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Trim shared leading indentation from a template literal. */
+function dedent(s: string): string {
+ const lines = s.split('\n');
+ // Drop leading/trailing empty lines from the template literal
+ if (lines[0].trim() === '') lines.shift();
+ if (lines.length > 0 && lines[lines.length - 1].trim() === '') lines.pop();
+
+ const indent = Math.min(...lines.filter((l) => l.trim().length > 0).map((l) => l.match(/^(\s*)/)![1].length));
+ return lines.map((l) => l.slice(indent)).join('\n');
+}
+
+/** Extract lines matching `| ... |` from the output. */
+function extractPipeTable(output: string): string[] {
+ return output.split('\n').filter((l) => l.startsWith('|'));
+}
+
+// ---------------------------------------------------------------------------
+// Section 5 style: unbounded table (no top/bottom borders)
+// ---------------------------------------------------------------------------
+
+describe('normalizeFixedWidthTables', () => {
+ describe('unbounded tables (no borders)', () => {
+ it('converts a simple 3-column table without borders', () => {
+ const input = dedent(`
+ Clause Description Duration
+ ---------------------- --------------------------------- -----------
+ Confidentiality Term Protection of confidential info 5 years
+ Evaluation Period Business evaluation timeline 12 months
+ Survival Clause Survives termination Yes
+ `);
+
+ const output = normalizeFixedWidthTables(input);
+ const table = extractPipeTable(output);
+
+ expect(table).toEqual([
+ '| Clause | Description | Duration |',
+ '| --- | --- | --- |',
+ '| Confidentiality Term | Protection of confidential info | 5 years |',
+ '| Evaluation Period | Business evaluation timeline | 12 months |',
+ '| Survival Clause | Survives termination | Yes |',
+ ]);
+ });
+
+ it('stops at first blank line for unbounded tables', () => {
+ const input = dedent(`
+ Name Age
+ ------ ---
+ Alice 30
+
+ This is a regular paragraph.
+ `);
+
+ const output = normalizeFixedWidthTables(input);
+
+ expect(output).toContain('| Alice | 30 |');
+ expect(output).toContain('This is a regular paragraph.');
+ });
+ });
+
+ // ---------------------------------------------------------------------------
+ // Appendix A style: bordered table with continuation lines + blank separators
+ // ---------------------------------------------------------------------------
+
+ describe('bordered tables (top + bottom borders)', () => {
+ it('converts Appendix A with wrapped continuation lines and blank separators', () => {
+ const input = dedent(`
+ -------------------------------------------------------------------------
+ Classification Description Example Required Controls
+ ------------------- --------------- ------------ ------------------------
+ Public No restrictions Press None
+ release
+
+ Internal Limited Internal Access controls
+ distribution memo
+
+ Confidential Sensitive Financial Encryption + MFA
+ business data reports
+
+ Restricted Highly Source code Strict access + logging
+ sensitive
+ -------------------------------------------------------------------------
+ `);
+
+ const output = normalizeFixedWidthTables(input);
+ const table = extractPipeTable(output);
+
+ expect(table).toEqual([
+ '| Classification | Description | Example | Required Controls |',
+ '| --- | --- | --- | --- |',
+ '| Public | No restrictions | Press release | None |',
+ '| Internal | Limited distribution | Internal memo | Access controls |',
+ '| Confidential | Sensitive business data | Financial reports | Encryption + MFA |',
+ '| Restricted | Highly sensitive | Source code | Strict access + logging |',
+ ]);
+ });
+
+ it('consumes top and bottom border lines', () => {
+ const input = dedent(`
+ before
+ --------------------
+ A B
+ ----- -----
+ 1 2
+ --------------------
+ after
+ `);
+
+ const output = normalizeFixedWidthTables(input);
+
+ expect(output).not.toContain('----');
+ expect(output).toContain('before');
+ expect(output).toContain('after');
+ expect(output).toContain('| A | B |');
+ expect(output).toContain('| 1 | 2 |');
+ });
+ });
+
+ // ---------------------------------------------------------------------------
+ // Signatures style: form-field content with escaped underscores
+ // ---------------------------------------------------------------------------
+
+ describe('signature/form tables', () => {
+ it('converts a two-column table with escaped underscores', () => {
+ const input = dedent(`
+ Disclosing Party Receiving Party
+ ----------------------------- -----------------------------
+ Name: \\_\\_\\_\\_\\_ Name: \\_\\_\\_\\_\\_
+ Title: \\_\\_\\_\\_\\_ Title: \\_\\_\\_\\_\\_
+ Signature: \\_\\_\\_ Signature: \\_\\_\\_
+ Date: \\_\\_\\_\\_\\_ Date: \\_\\_\\_\\_\\_
+ `);
+
+ const output = normalizeFixedWidthTables(input);
+ const table = extractPipeTable(output);
+
+ expect(table[0]).toBe('| Disclosing Party | Receiving Party |');
+ expect(table[1]).toBe('| --- | --- |');
+ expect(table).toHaveLength(6); // header + separator + 4 data rows
+ });
+ });
+
+ // ---------------------------------------------------------------------------
+ // Leading indentation
+ // ---------------------------------------------------------------------------
+
+ describe('indentation handling', () => {
+ it('handles 2-space indented tables (as in the NDA fixture)', () => {
+ const input = [
+ ' Clause Description',
+ ' ---------- -----------',
+ ' Term Protection',
+ ' Period Evaluation',
+ ].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+
+ expect(output).toContain('| Clause | Description |');
+ expect(output).toContain('| Term | Protection |');
+ expect(output).toContain('| Period | Evaluation |');
+ });
+ });
+
+ // ---------------------------------------------------------------------------
+ // Edge cases and safety
+ // ---------------------------------------------------------------------------
+
+ describe('pass-through (no transformation)', () => {
+ it('passes through text with no tables', () => {
+ const input = '# Hello\n\nThis is a paragraph.\n\n- item 1\n- item 2';
+ expect(normalizeFixedWidthTables(input)).toBe(input);
+ });
+
+ it('passes through GFM pipe tables unchanged', () => {
+ const input = '| A | B |\n| --- | --- |\n| 1 | 2 |';
+ expect(normalizeFixedWidthTables(input)).toBe(input);
+ });
+
+ it('passes through thematic breaks (--- lines) without matching', () => {
+ const input = dedent(`
+ # Section 1
+
+ Some text.
+
+ ---
+
+ # Section 2
+
+ More text.
+
+ ---
+ `);
+
+ const output = normalizeFixedWidthTables(input);
+
+ // No pipe tables should be produced
+ expect(extractPipeTable(output)).toHaveLength(0);
+ // Content preserved
+ expect(output).toContain('# Section 1');
+ expect(output).toContain('# Section 2');
+ });
+
+ it('does not match a single dash group (thematic break)', () => {
+ const input = '------------------------------------------------------------------------';
+ expect(normalizeFixedWidthTables(input)).toBe(input);
+ });
+
+ it('preserves malformed partial table structures', () => {
+ const input = dedent(`
+ Header only, no data
+ ------ ------
+ `);
+
+ // Guide row exists but no data rows → no transformation
+ const output = normalizeFixedWidthTables(input);
+ expect(extractPipeTable(output)).toHaveLength(0);
+ });
+ });
+
+ describe('fenced code blocks', () => {
+ it('does not transform tables inside fenced code blocks', () => {
+ const input = dedent(`
+ \`\`\`
+ Name Age
+ ------ ---
+ Alice 30
+ \`\`\`
+ `);
+
+ const output = normalizeFixedWidthTables(input);
+
+ // Should NOT produce pipe table
+ expect(extractPipeTable(output)).toHaveLength(0);
+ // Original content preserved
+ expect(output).toContain('Alice 30');
+ });
+
+ it('does not transform tables inside tilde-fenced code blocks', () => {
+ const input = dedent(`
+ ~~~
+ Name Age
+ ------ ---
+ Alice 30
+ ~~~
+ `);
+
+ const output = normalizeFixedWidthTables(input);
+ expect(extractPipeTable(output)).toHaveLength(0);
+ });
+
+ it('transforms tables before and after fenced code blocks', () => {
+ const input = dedent(`
+ A B
+ ----- -----
+ 1 2
+
+ \`\`\`
+ C D
+ ----- -----
+ 3 4
+ \`\`\`
+
+ E F
+ ----- -----
+ 5 6
+ `);
+
+ const output = normalizeFixedWidthTables(input);
+ const tables = extractPipeTable(output);
+
+ // Two tables converted (before and after fence), one preserved inside fence
+ expect(tables.filter((l) => l.startsWith('| A'))).toHaveLength(1);
+ expect(tables.filter((l) => l.startsWith('| E'))).toHaveLength(1);
+ expect(output).toContain('C D');
+ });
+ });
+
+ // ---------------------------------------------------------------------------
+ // Regression: indent mismatch must not corrupt cell text (Bug #1)
+ // ---------------------------------------------------------------------------
+
+ describe('indent mismatch rejection', () => {
+ it('rejects table when header indent differs from guide indent', () => {
+ // Header at column 0, guide indented 2 spaces → mismatch → no table.
+ const input = ['Header1 Header2', ' -------- --------', ' Data1 Data2'].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+
+ // No pipe table should be produced — the candidate is rejected.
+ expect(extractPipeTable(output)).toHaveLength(0);
+ expect(output).toContain('Header1 Header2');
+ });
+
+ it('rejects bordered table when header indent differs from guide indent', () => {
+ const input = [
+ ' --------------------',
+ 'Header1 Header2',
+ ' -------- --------',
+ ' Data1 Data2',
+ ' --------------------',
+ ].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+ expect(extractPipeTable(output)).toHaveLength(0);
+ });
+ });
+
+ // ---------------------------------------------------------------------------
+ // Regression: non-final column overflow must not corrupt cells
+ // ---------------------------------------------------------------------------
+
+ describe('non-final column overflow rejection', () => {
+ it('rejects table when first data row overflows a non-final column', () => {
+ const input = ['A B C', '----- ----- -----', '1stcol_is_very_long bbb ccc'].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+
+ // No pipe table — the guide doesn't match the data layout.
+ expect(extractPipeTable(output)).toHaveLength(0);
+ expect(output).toContain('1stcol_is_very_long');
+ });
+
+ it('skips a later overflowing row and its continuation lines', () => {
+ const input = [
+ 'A B C',
+ '----- ----- -----',
+ 'aaa bbb ccc',
+ 'overflow_row_here long zzz',
+ ' cont yyy',
+ 'ddd eee fff',
+ ].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+ const table = extractPipeTable(output);
+
+ // Valid rows preserved, overflowing row + its continuation skipped.
+ expect(table).toContainEqual('| aaa | bbb | ccc |');
+ expect(table).toContainEqual('| ddd | eee | fff |');
+ expect(table).not.toContainEqual(expect.stringContaining('overflow'));
+ expect(table).not.toContainEqual(expect.stringContaining('cont'));
+ expect(table).not.toContainEqual(expect.stringContaining('yyy'));
+ });
+
+ it('rejects multi-word overflow in a non-final column', () => {
+ const input = ['A B', '----- -----', 'ab cdef zzz'].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+
+ // "ab cdef" crosses column A boundary → reject entire table.
+ expect(extractPipeTable(output)).toHaveLength(0);
+ expect(output).toContain('ab cdef');
+ });
+
+ it('rejects overflow with leading padding in a non-final column', () => {
+ const input = ['A B', '----- -----', ' abcdef zzz'].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+
+ // Leading space + overflowing value → reject entire table.
+ expect(extractPipeTable(output)).toHaveLength(0);
+ expect(output).toContain('abcdef');
+ });
+
+ it('allows overflow in the last column (reads to end of line)', () => {
+ const input = ['A B', '----- -----', 'aaa this value is very long and exceeds the column width'].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+ const table = extractPipeTable(output);
+
+ expect(table).toContainEqual('| aaa | this value is very long and exceeds the column width |');
+ });
+ });
+
+ // ---------------------------------------------------------------------------
+ // Regression: false bottom border must not swallow prose (Bug #2)
+ // ---------------------------------------------------------------------------
+
+ describe('false bottom border rejection', () => {
+ it('does not swallow prose between table and unrelated thematic break', () => {
+ const input = [
+ ' -------------------------------------------------------------------------',
+ ' Classification Description Example',
+ ' ------------------- --------------- ------------',
+ ' Public No restrictions Press',
+ '',
+ 'This should not be a table row.',
+ 'Some other content that has nothing to do with the table.',
+ '',
+ '---',
+ ].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+
+ // The table should still be converted (1 data row).
+ expect(output).toContain('| Classification | Description | Example |');
+ expect(output).toContain('| Public | No restrictions | Press |');
+
+ // Prose must be preserved as-is, not absorbed into the table.
+ expect(output).toContain('This should not be a table row.');
+ expect(output).toContain('Some other content that has nothing to do with the table.');
+
+ // The thematic break must be preserved.
+ expect(output.split('\n').filter((l) => l.trim() === '---')).toHaveLength(1);
+ });
+
+ it('accepts bordered table when cell text exceeds guide width', () => {
+ const input = [
+ ' -----------------------',
+ ' A B',
+ ' ----- -----',
+ ' 1 This is a very very long value that exceeds column width',
+ '',
+ ' 2 ok',
+ ' -----------------------',
+ ].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+ const table = extractPipeTable(output);
+
+ // Both data rows must be present with full cell text preserved.
+ expect(table).toEqual([
+ '| A | B |',
+ '| --- | --- |',
+ '| 1 | This is a very very long value that exceeds column width |',
+ '| 2 | ok |',
+ ]);
+ });
+
+ it('does not swallow prose when bottom border is missing entirely', () => {
+ const input = [
+ ' -------------------------------------------------------------------------',
+ ' A B',
+ ' ----- -----',
+ ' 1 2',
+ '',
+ 'Regular paragraph here.',
+ '',
+ '## Next Section',
+ ].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+
+ expect(output).toContain('| A | B |');
+ expect(output).toContain('| 1 | 2 |');
+ expect(output).toContain('Regular paragraph here.');
+ expect(output).toContain('## Next Section');
+ });
+ });
+
+ // ---------------------------------------------------------------------------
+ // Full NDA fixture regression
+ // ---------------------------------------------------------------------------
+
+ describe('full NDA fixture', () => {
+ it('converts all three tables from the NDA fixture', () => {
+ // Exact content from multi-page-nda-test-document.md (relevant sections)
+ const input = [
+ '## Term and Termination',
+ '',
+ ' Clause Description Duration',
+ ' ---------------------- --------------------------------- -----------',
+ ' Confidentiality Term Protection of confidential info 5 years',
+ ' Evaluation Period Business evaluation timeline 12 months',
+ ' Survival Clause Survives termination Yes',
+ '',
+ 'This Agreement remains in effect.',
+ '',
+ '---',
+ '',
+ '## Appendix A -- Data Classification Table',
+ '',
+ ' -------------------------------------------------------------------------',
+ ' Classification Description Example Required Controls',
+ ' ------------------- --------------- ------------ ------------------------',
+ ' Public No restrictions Press None',
+ ' release ',
+ '',
+ ' Internal Limited Internal Access controls',
+ ' distribution memo ',
+ '',
+ ' Confidential Sensitive Financial Encryption + MFA',
+ ' business data reports ',
+ '',
+ ' Restricted Highly Source code Strict access + logging',
+ ' sensitive ',
+ ' -------------------------------------------------------------------------',
+ '',
+ '---',
+ '',
+ '## Signatures',
+ '',
+ ' Disclosing Party Receiving Party',
+ ' ----------------------------- -----------------------------',
+ ' Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Name: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_',
+ ' Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Title: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_',
+ ' Signature: \\_\\_\\_\\_\\_\\_ Signature: \\_\\_\\_\\_\\_\\_',
+ ' Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_ Date: \\_\\_\\_\\_\\_\\_\\_\\_\\_\\_',
+ ].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+ const tables = extractPipeTable(output);
+
+ // Table 1: Term and Termination (3 cols, 3 data rows)
+ expect(tables).toContainEqual('| Clause | Description | Duration |');
+ expect(tables).toContainEqual('| Confidentiality Term | Protection of confidential info | 5 years |');
+ expect(tables).toContainEqual('| Evaluation Period | Business evaluation timeline | 12 months |');
+
+ // Table 2: Appendix A (4 cols, 4 data rows with merged continuations)
+ expect(tables).toContainEqual('| Classification | Description | Example | Required Controls |');
+ expect(tables).toContainEqual('| Public | No restrictions | Press release | None |');
+ expect(tables).toContainEqual('| Restricted | Highly sensitive | Source code | Strict access + logging |');
+
+ // Table 3: Signatures (2 cols)
+ expect(tables).toContainEqual('| Disclosing Party | Receiving Party |');
+
+ // Non-table content preserved
+ expect(output).toContain('## Term and Termination');
+ expect(output).toContain('This Agreement remains in effect.');
+ expect(output).toContain('## Appendix A -- Data Classification Table');
+ expect(output).toContain('## Signatures');
+
+ // Thematic breaks preserved
+ expect(output.split('\n').filter((l) => l.trim() === '---')).toHaveLength(2);
+ });
+ });
+});
diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts
new file mode 100644
index 0000000000..8851108c83
--- /dev/null
+++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts
@@ -0,0 +1,463 @@
+/**
+ * Pre-AST normalizer that converts fixed-width ASCII tables to GFM pipe tables.
+ *
+ * LLMs commonly produce pandoc-style fixed-width tables in markdown:
+ *
+ * Name Age City
+ * ----------- ------ --------
+ * Alice 30 Seattle
+ * Bob 25 Portland
+ *
+ * remark-gfm only recognizes GFM pipe-table syntax, so these become paragraphs
+ * instead of table nodes in the AST. This module detects fixed-width tables and
+ * rewrites them before AST parsing:
+ *
+ * | Name | Age | City |
+ * | --- | --- | --- |
+ * | Alice | 30 | Seattle |
+ * | Bob | 25 | Portland |
+ *
+ * Supported layouts:
+ * (A) border → header → guide → data... → border (bordered)
+ * (B) header → guide → data... (unbounded)
+ *
+ * Continuation lines (empty first column) are merged into the preceding row.
+ * In bordered tables, blank lines between data rows are treated as row separators.
+ * In unbounded tables, a blank line terminates the table.
+ *
+ * Fenced code blocks are skipped entirely.
+ */
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/**
+ * Detect pandoc-style fixed-width ASCII tables in a markdown string and
+ * rewrite them as GFM pipe tables that remark-gfm can parse.
+ *
+ * Fenced code blocks are skipped. Bordered (top/bottom border) and
+ * unbounded (header + guide only) layouts are both supported, including
+ * continuation lines that wrap across multiple rows.
+ *
+ * @param markdown - Raw markdown source, possibly containing fixed-width tables.
+ * @returns The markdown with fixed-width tables replaced by GFM pipe-table syntax.
+ * Returns the input unchanged if no fixed-width tables are detected.
+ */
+export function normalizeFixedWidthTables(markdown: string): string {
+ const lines = markdown.split('\n');
+ const output: string[] = [];
+ let i = 0;
+
+ while (i < lines.length) {
+ if (isFenceOpener(lines[i])) {
+ const closeIdx = findFenceClose(lines, i);
+ for (let j = i; j <= closeIdx; j++) output.push(lines[j]);
+ i = closeIdx + 1;
+ continue;
+ }
+
+ const table = tryParseTableAt(lines, i);
+ if (table) {
+ output.push(...toGfmPipeTable(table));
+ i = table.endLine + 1;
+ continue;
+ }
+
+ output.push(lines[i]);
+ i++;
+ }
+
+ return output.join('\n');
+}
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+/** Character span of a single column, relative to content start (after indent). */
+interface ColumnSpan {
+ /** Start character index (inclusive), relative to content start. */
+ start: number;
+ /** End character index (inclusive), relative to content start. */
+ end: number;
+}
+
+/** Result of parsing a column guide row. */
+interface GuideInfo {
+ /** Column spans, relative to content start (after stripping leading indent). */
+ spans: ColumnSpan[];
+ /** Number of leading whitespace characters on the guide row. */
+ indent: number;
+}
+
+interface ParsedTable {
+ /** First line of the table block (top border or header). */
+ startLine: number;
+ /** Last line of the table block (bottom border or last data row). */
+ endLine: number;
+ headers: string[];
+ rows: string[][];
+}
+
+interface TableAnchors {
+ topBorderIdx?: number;
+ headerIdx: number;
+ guideIdx: number;
+}
+
+// ---------------------------------------------------------------------------
+// Fenced code block handling
+// ---------------------------------------------------------------------------
+
+const FENCE_OPEN_RE = /^( {0,3})(`{3,}|~{3,})/;
+
+function isFenceOpener(line: string): boolean {
+ return FENCE_OPEN_RE.test(line);
+}
+
+function findFenceClose(lines: string[], openIdx: number): number {
+ const match = lines[openIdx].match(FENCE_OPEN_RE);
+ if (!match) return openIdx;
+
+ const char = match[2][0] === '`' ? '`' : '~';
+ const minLen = match[2].length;
+ const closeRe = new RegExp(`^( {0,3})${char}{${minLen},}\\s*$`);
+
+ for (let i = openIdx + 1; i < lines.length; i++) {
+ if (closeRe.test(lines[i])) return i;
+ }
+ return lines.length - 1; // unclosed fence: consume to end
+}
+
+// ---------------------------------------------------------------------------
+// Line classification
+// ---------------------------------------------------------------------------
+
+/** Count leading whitespace characters on a line. */
+function leadingIndent(line: string): number {
+ const match = line.match(/^(\s*)/);
+ return match ? match[1].length : 0;
+}
+
+/**
+ * Parse a column guide row: 2+ groups of consecutive dashes (≥3 each),
+ * separated by whitespace. Returns column spans (relative to content start
+ * after stripping indent) and the indent amount, or null.
+ */
+function parseColumnGuide(line: string): GuideInfo | null {
+ const trimmed = line.trimEnd();
+ if (!trimmed) return null;
+
+ // Must contain only dashes and spaces
+ if (!/^[\s-]+$/.test(trimmed)) return null;
+
+ const indent = leadingIndent(trimmed);
+ const content = trimmed.slice(indent);
+
+ const spans: ColumnSpan[] = [];
+ let j = 0;
+
+ while (j < content.length) {
+ if (content[j] === '-') {
+ const start = j;
+ while (j < content.length && content[j] === '-') j++;
+ if (j - start >= 3) {
+ spans.push({ start, end: j - 1 });
+ }
+ } else {
+ j++;
+ }
+ }
+
+ return spans.length >= 2 ? { spans, indent } : null;
+}
+
+/** A solid border line is a single unbroken run of dashes (optional indent). */
+function isSolidBorder(line: string): boolean {
+ const trimmed = line.trim();
+ return trimmed.length >= 3 && /^-+$/.test(trimmed);
+}
+
+function isBlank(line: string): boolean {
+ return line.trim().length === 0;
+}
+
+// ---------------------------------------------------------------------------
+// Cell extraction
+// ---------------------------------------------------------------------------
+
+/**
+ * Extract cell values from a line using column spans.
+ * Strips `indent` characters from the line before slicing so that
+ * spans (which are relative to content start) align correctly.
+ */
+function extractCells(line: string, spans: ColumnSpan[], indent: number): string[] {
+ const content = line.length > indent ? line.slice(indent) : '';
+ const lastIdx = spans.length - 1;
+ return spans.map(({ start, end }, i) => {
+ if (start >= content.length) return '';
+ // Last column reads to end of line so overflow text isn't truncated.
+ const stop = i === lastIdx ? content.length : Math.min(end + 1, content.length);
+ return content.slice(start, stop).trim();
+ });
+}
+
+function hasAlphanumericContent(cells: string[]): boolean {
+ return cells.some((cell) => /[a-zA-Z0-9]/.test(cell));
+}
+
+/**
+ * Detect whether any non-final column has a value that overflows past its
+ * span boundary.
+ *
+ * Finds the continuous non-space run that crosses the boundary and measures
+ * the space gap immediately before it. A wide gap (≥2 spaces) is a column
+ * separator — the boundary text is the next column starting early due to
+ * short padding. A narrow gap (0–1 spaces) is a word break within the same
+ * cell value, so the value genuinely overflows.
+ *
+ * `ab cdef` in a 5-char span → 1-space word break → overflow
+ * ` abcdef` in a 5-char span → leading pad, no content before → overflow
+ * `val X` in a 29-char span → wide gap → early column start, allowed
+ */
+function cellsOverflow(line: string, spans: ColumnSpan[], indent: number): boolean {
+ const content = line.length > indent ? line.slice(indent) : '';
+ for (let i = 0; i < spans.length - 1; i++) {
+ const { start, end } = spans[i];
+ const afterCol = end + 1;
+ if (afterCol >= content.length || content[afterCol] === ' ') continue;
+ if (content[end] === ' ') continue;
+ // Find where the non-space run crossing the boundary starts.
+ let runStart = end;
+ while (runStart > start && content[runStart - 1] !== ' ') runStart--;
+ // Count consecutive spaces immediately before the run.
+ let gapWidth = 0;
+ for (let j = runStart - 1; j >= start; j--) {
+ if (content[j] === ' ') gapWidth++;
+ else break;
+ }
+ // A wide gap (≥2 spaces) separates the column's value from the next
+ // column's early-start text. A narrow gap is a word break within the
+ // same value — the cell genuinely overflows.
+ if (gapWidth < 2) return true;
+ }
+ return false;
+}
+
+// ---------------------------------------------------------------------------
+// Table recognition
+// ---------------------------------------------------------------------------
+
+/**
+ * Try to recognize a fixed-width table starting at line `i`.
+ *
+ * Layout A: border at i, header at i+1, guide at i+2.
+ * Layout B: header at i, guide at i+1.
+ *
+ * Rejects candidates where the header indent differs from the guide indent,
+ * which prevents cell text corruption from misaligned column slicing.
+ */
+function tryParseTableAt(lines: string[], i: number): ParsedTable | null {
+ // Layout A: top border → header → guide
+ if (i + 2 < lines.length && isSolidBorder(lines[i])) {
+ const guide = parseColumnGuide(lines[i + 2]);
+ if (guide && leadingIndent(lines[i + 1]) === guide.indent) {
+ if (hasAlphanumericContent(extractCells(lines[i + 1], guide.spans, guide.indent))) {
+ return buildTable(lines, guide, { topBorderIdx: i, headerIdx: i + 1, guideIdx: i + 2 });
+ }
+ }
+ }
+
+ // Layout B: header → guide
+ if (i + 1 < lines.length) {
+ const guide = parseColumnGuide(lines[i + 1]);
+ if (guide && leadingIndent(lines[i]) === guide.indent) {
+ if (hasAlphanumericContent(extractCells(lines[i], guide.spans, guide.indent))) {
+ return buildTable(lines, guide, { headerIdx: i, guideIdx: i + 1 });
+ }
+ }
+ }
+
+ return null;
+}
+
+// ---------------------------------------------------------------------------
+// Table building
+// ---------------------------------------------------------------------------
+
+/** Max lines to scan forward when searching for a bottom border. */
+const MAX_BORDER_SCAN = 100;
+
+/** Bottom border must be at least this fraction of top border width to match. */
+const MIN_BORDER_WIDTH_RATIO = 0.4;
+
+function buildTable(lines: string[], guide: GuideInfo, anchors: TableAnchors): ParsedTable | null {
+ const { topBorderIdx, headerIdx, guideIdx } = anchors;
+ const { spans, indent } = guide;
+ const startLine = topBorderIdx ?? headerIdx;
+ const headers = extractCells(lines[headerIdx], spans, indent);
+
+ // Only look for a bottom border when a top border is present.
+ let bottomBorderIdx = topBorderIdx !== undefined ? findBottomBorder(lines, guideIdx + 1) : undefined;
+
+ // Validate the candidate bottom border against two structural signals:
+ // 1. Border shape: the candidate must resemble the top border (similar width/indent).
+ // This prevents short thematic breaks (---) from being mistaken for table borders.
+ // 2. Intermediate indent: all non-blank lines between the guide and the candidate
+ // must have at least the table's leading indent, ruling out unrelated prose.
+ if (bottomBorderIdx !== undefined && topBorderIdx !== undefined) {
+ const borderOk = bordersMatch(lines[topBorderIdx], lines[bottomBorderIdx]);
+ const contentOk = intermediateLinesMeetIndent(lines, guideIdx + 1, bottomBorderIdx, indent);
+ if (!borderOk || !contentOk) {
+ bottomBorderIdx = undefined; // discard false bottom border → fall back to unbounded
+ }
+ }
+
+ const { rows, lastConsumedIdx } = parseDataRows(lines, spans, indent, guideIdx + 1, bottomBorderIdx);
+ if (rows.length === 0) return null;
+
+ const endLine = bottomBorderIdx ?? lastConsumedIdx;
+ return { startLine, endLine, headers, rows };
+}
+
+/**
+ * Check whether two border lines are structurally similar enough to be a
+ * matching top/bottom pair. A short thematic break (`---`, 3 chars) will
+ * not match a full-width table border (`---...---`, 70+ chars).
+ */
+function bordersMatch(topBorder: string, candidateBottom: string): boolean {
+ const topLen = topBorder.trim().length;
+ const bottomLen = candidateBottom.trim().length;
+ if (bottomLen < topLen * MIN_BORDER_WIDTH_RATIO) return false;
+ // Indent should be similar (within 2 characters).
+ if (Math.abs(leadingIndent(topBorder) - leadingIndent(candidateBottom)) > 2) return false;
+ return true;
+}
+
+/**
+ * Check that all non-blank, non-border lines between fromIdx and toIdx
+ * (exclusive) have at least the table's leading indent.
+ * Lines with less indent are unrelated prose that wandered between the
+ * top border and a candidate bottom border.
+ */
+function intermediateLinesMeetIndent(lines: string[], fromIdx: number, toIdx: number, indent: number): boolean {
+ for (let k = fromIdx; k < toIdx; k++) {
+ const line = lines[k];
+ if (isBlank(line) || isSolidBorder(line)) continue;
+ if (leadingIndent(line) < indent) return false;
+ }
+ return true;
+}
+
+/**
+ * Scan forward from `fromIdx` for a solid border line.
+ * Skips fenced code blocks and stops at markdown headings.
+ */
+function findBottomBorder(lines: string[], fromIdx: number): number | undefined {
+ const limit = Math.min(fromIdx + MAX_BORDER_SCAN, lines.length);
+
+ for (let i = fromIdx; i < limit; i++) {
+ if (isFenceOpener(lines[i])) {
+ i = findFenceClose(lines, i);
+ continue;
+ }
+ if (isSolidBorder(lines[i])) return i;
+ // A markdown heading means we've left the table's section.
+ if (/^#{1,6}\s/.test(lines[i].trimStart())) return undefined;
+ }
+
+ return undefined;
+}
+
+/**
+ * Parse data rows from lines after the column guide.
+ *
+ * When `boundaryIdx` is defined (bordered table), lines up to but not including
+ * the boundary are processed, and blank lines are treated as row separators.
+ *
+ * When `boundaryIdx` is undefined (unbounded table), the first blank line
+ * terminates the table.
+ *
+ * Continuation lines (first column empty) are merged into the preceding row.
+ */
+function parseDataRows(
+ lines: string[],
+ spans: ColumnSpan[],
+ indent: number,
+ startIdx: number,
+ boundaryIdx: number | undefined,
+): { rows: string[][]; lastConsumedIdx: number } {
+ const rows: string[][] = [];
+ const bounded = boundaryIdx !== undefined;
+ const limit = boundaryIdx ?? lines.length;
+ let lastConsumed = startIdx - 1;
+ let skippingOverflow = false;
+
+ for (let i = startIdx; i < limit; i++) {
+ if (isBlank(lines[i])) {
+ skippingOverflow = false;
+ if (bounded) {
+ lastConsumed = i;
+ continue; // row separator inside bordered table
+ }
+ break; // unbounded: blank = end of table
+ }
+
+ if (isSolidBorder(lines[i])) break;
+
+ // Reject rows where non-final columns overflow their span boundaries.
+ // First data row overflow → guide doesn't match data → reject entire table.
+ // Later row overflow → skip that row and its continuation lines.
+ if (cellsOverflow(lines[i], spans, indent)) {
+ if (rows.length === 0) break;
+ skippingOverflow = true;
+ lastConsumed = i;
+ continue;
+ }
+
+ const cells = extractCells(lines[i], spans, indent);
+ if (!hasAlphanumericContent(cells) && rows.length === 0) break;
+
+ if (cells[0].length === 0 && rows.length > 0) {
+ // Continuation line: skip if the parent row was skipped due to overflow.
+ if (skippingOverflow) {
+ lastConsumed = i;
+ continue;
+ }
+ // Merge non-empty cells into previous row
+ const prev = rows[rows.length - 1];
+ for (let c = 0; c < spans.length; c++) {
+ if (cells[c]) {
+ prev[c] = prev[c] ? `${prev[c]} ${cells[c]}` : cells[c];
+ }
+ }
+ } else if (hasAlphanumericContent(cells)) {
+ skippingOverflow = false;
+ rows.push(cells);
+ }
+
+ lastConsumed = i;
+ }
+
+ return { rows, lastConsumedIdx: lastConsumed };
+}
+
+// ---------------------------------------------------------------------------
+// GFM pipe-table output
+// ---------------------------------------------------------------------------
+
+function escapePipe(text: string): string {
+ return text.replace(/\|/g, '\\|');
+}
+
+function toGfmPipeTable({ headers, rows }: ParsedTable): string[] {
+ const n = headers.length;
+ const cell = (row: string[], i: number) => escapePipe(row[i] ?? '');
+
+ const headerLine = `| ${headers.map(escapePipe).join(' | ')} |`;
+ const separator = `| ${Array.from({ length: n }, () => '---').join(' | ')} |`;
+ const dataLines = rows.map((row) => `| ${Array.from({ length: n }, (_, i) => cell(row, i)).join(' | ')} |`);
+
+ return [headerLine, separator, ...dataLines];
+}
diff --git a/packages/super-editor/src/core/helpers/markdown/types.ts b/packages/super-editor/src/core/helpers/markdown/types.ts
index 890a1e58fb..2a8a58d851 100644
--- a/packages/super-editor/src/core/helpers/markdown/types.ts
+++ b/packages/super-editor/src/core/helpers/markdown/types.ts
@@ -12,6 +12,12 @@ import type { Editor } from '../../Editor.js';
export interface MarkdownConversionOptions {
/** When true, skip side-effects like numbering allocation (for dry-run validation). */
dryRun?: boolean;
+ /**
+ * When true (default), detect pandoc-style fixed-width ASCII tables in the
+ * markdown source and rewrite them as GFM pipe tables before AST parsing.
+ * Set to `false` to skip this normalization step.
+ */
+ normalizeFixedWidthTables?: boolean;
}
// ---------------------------------------------------------------------------
diff --git a/tests/doc-api-stories/tests/formatting/inline-formatting.ts b/tests/doc-api-stories/tests/formatting/inline-formatting.ts
index 2a86d33968..701d4f3229 100644
--- a/tests/doc-api-stories/tests/formatting/inline-formatting.ts
+++ b/tests/doc-api-stories/tests/formatting/inline-formatting.ts
@@ -39,7 +39,7 @@ describe('document-api story: inline formatting', () => {
// Insert text into the blank doc's single paragraph.
// Without an explicit target, insert uses the first paragraph.
- const insertResult = unwrap(await client.doc.insert({ sessionId, text }));
+ const insertResult = unwrap(await client.doc.insert({ sessionId, value: text }));
expect(insertResult.receipt?.success).toBe(true);
// The receipt's hoisted target contains the paragraph's stable blockId.
diff --git a/tests/doc-api-stories/tests/markdown/markdown-override-roundtrip.ts b/tests/doc-api-stories/tests/markdown/markdown-override-roundtrip.ts
new file mode 100644
index 0000000000..7e327f9601
--- /dev/null
+++ b/tests/doc-api-stories/tests/markdown/markdown-override-roundtrip.ts
@@ -0,0 +1,288 @@
+import { readFile } from 'node:fs/promises';
+import path from 'node:path';
+import { describe, expect, it } from 'vitest';
+import { unwrap, useStoryHarness } from '../harness';
+
+type MarkdownStructureMetrics = {
+ headingsTotal: number;
+ listsTotal: number;
+ bulletsTotal: number;
+ orderedTotal: number;
+ hasPurposeItem: boolean;
+ hasNdaSignedItem: boolean;
+ hasNestedLevel3: boolean;
+ endOfAgreementMatches: number;
+ tablesTotal: number;
+ /** Sentinel text patterns that confirm table cell content was parsed correctly. */
+ tableContentSignals: {
+ hasConfidentialityTerm: boolean;
+ hasPressRelease: boolean;
+ hasSignatureField: boolean;
+ };
+};
+
+function sid(label: string): string {
+ return `${label}-${Date.now()}-${Math.floor(Math.random() * 1_000_000)}`;
+}
+
+describe('document-api story: markdown override roundtrip', () => {
+ const { client, outPath, runCli } = useStoryHarness('markdown/override-roundtrip', {
+ preserveResults: true,
+ });
+
+ const fixturePath = path.resolve(import.meta.dirname, 'multi-page-nda-test-document.md');
+
+ async function collectStructure(sessionId: string): Promise {
+ const headingResult = unwrap(
+ await client.doc.find({
+ sessionId,
+ type: 'node',
+ nodeType: 'heading',
+ limit: 200,
+ }),
+ );
+
+ const listAll = unwrap(
+ await client.doc.lists.list({
+ sessionId,
+ limit: 200,
+ }),
+ );
+
+ const listBullets = unwrap(
+ await client.doc.lists.list({
+ sessionId,
+ kind: 'bullet',
+ limit: 200,
+ }),
+ );
+
+ const listOrdered = unwrap(
+ await client.doc.lists.list({
+ sessionId,
+ kind: 'ordered',
+ limit: 200,
+ }),
+ );
+
+ const endOfAgreement = unwrap(
+ await client.doc.find({
+ sessionId,
+ type: 'text',
+ pattern: 'END OF AGREEMENT',
+ }),
+ );
+
+ const tablesResult = unwrap(
+ await client.doc.find({
+ sessionId,
+ type: 'node',
+ nodeType: 'table',
+ limit: 100,
+ }),
+ );
+
+ const listItems: any[] = Array.isArray(listAll.items) ? listAll.items : [];
+ const hasPurposeItem = listItems.some((item) => item.text === 'Purpose');
+ const hasNdaSignedItem = listItems.some((item) => item.text === 'NDA signed');
+ const hasNestedLevel3 = listItems.some(
+ (item) => item.text === 'Level 3' && item.level === 2 && Array.isArray(item.path) && item.path.length === 3,
+ );
+
+ // Verify table cell content was parsed correctly via text search.
+ // These sentinels confirm specific tables exist with correct content:
+ // - "Confidentiality Term": Section 5 data row (proves table has ≥1 data row)
+ // - "Press release": Appendix A continuation-merged cell (proves multi-line merge works)
+ // - "Signature:": Signatures table cell (unique to table rows, not in fixture header)
+ const [confidentialityTerm, pressRelease, signatureField] = await Promise.all([
+ client.doc.find({ sessionId, type: 'text', pattern: 'Confidentiality Term' }),
+ client.doc.find({ sessionId, type: 'text', pattern: 'Press release' }),
+ client.doc.find({ sessionId, type: 'text', pattern: 'Signature:' }),
+ ]);
+ const tableContentSignals = {
+ hasConfidentialityTerm: unwrap(confidentialityTerm).total >= 1,
+ hasPressRelease: unwrap(pressRelease).total >= 1,
+ hasSignatureField: unwrap(signatureField).total >= 1,
+ };
+
+ return {
+ headingsTotal: headingResult.total,
+ listsTotal: listAll.total,
+ bulletsTotal: listBullets.total,
+ orderedTotal: listOrdered.total,
+ hasPurposeItem,
+ hasNdaSignedItem,
+ hasNestedLevel3,
+ endOfAgreementMatches: endOfAgreement.total,
+ tablesTotal: tablesResult.total,
+ tableContentSignals,
+ };
+ }
+
+ async function applyStylesPatch(
+ doc: string,
+ channel: 'run' | 'paragraph',
+ patch: Record,
+ out: string,
+ ): Promise {
+ const envelope = await runCli([
+ 'styles',
+ 'apply',
+ doc,
+ '--target-json',
+ JSON.stringify({ scope: 'docDefaults', channel }),
+ '--patch-json',
+ JSON.stringify(patch),
+ '--out',
+ out,
+ ]);
+
+ const payload = envelope?.data ?? envelope;
+ const receipt = payload?.receipt ?? payload;
+ expect(receipt).toBeDefined();
+ return receipt;
+ }
+
+ it('initializes from markdown override and preserves structure after save + reopen', async () => {
+ const markdown = await readFile(fixturePath, 'utf8');
+ const sourceSessionId = sid('markdown-source');
+ const roundtripSessionId = sid('markdown-roundtrip');
+ const outputDocPath = outPath('nda-markdown-override.docx');
+
+ await client.doc.open({
+ sessionId: sourceSessionId,
+ contentOverride: markdown,
+ overrideType: 'markdown',
+ });
+
+ const before = await collectStructure(sourceSessionId);
+
+ // Sanity checks ensure this story fails loudly if markdown parsing regresses.
+ expect(before.headingsTotal).toBeGreaterThanOrEqual(20);
+ expect(before.listsTotal).toBeGreaterThanOrEqual(50);
+ expect(before.bulletsTotal).toBeGreaterThanOrEqual(30);
+ expect(before.orderedTotal).toBeGreaterThanOrEqual(15);
+ expect(before.hasPurposeItem).toBe(true);
+ expect(before.hasNdaSignedItem).toBe(true);
+ expect(before.hasNestedLevel3).toBe(true);
+ expect(before.endOfAgreementMatches).toBe(1);
+
+ // Fixed-width ASCII tables must normalize to real table nodes.
+ // The NDA fixture contains 3 tables: Section 5 (3col), Appendix A (4col), Signatures (2col).
+ expect(before.tablesTotal).toBe(3);
+
+ // Verify table cell content confirms correct parsing — not just node count.
+ // "Confidentiality Term" = Section 5 data row exists
+ // "Press release" = Appendix A continuation-merged cell was joined correctly
+ // "Signature:" = Signatures table cell (unique to table, not in fixture header)
+ expect(before.tableContentSignals.hasConfidentialityTerm).toBe(true);
+ expect(before.tableContentSignals.hasPressRelease).toBe(true);
+ expect(before.tableContentSignals.hasSignatureField).toBe(true);
+
+ await client.doc.save({
+ sessionId: sourceSessionId,
+ out: outputDocPath,
+ });
+
+ await client.doc.close({
+ sessionId: sourceSessionId,
+ discard: true,
+ });
+
+ await client.doc.open({
+ doc: outputDocPath,
+ sessionId: roundtripSessionId,
+ });
+
+ const after = await collectStructure(roundtripSessionId);
+
+ // Roundtrip invariant: structure metrics should remain identical after DOCX save/reopen.
+ expect(after).toEqual(before);
+ });
+
+ it('applies visible docDefaults styles to markdown-seeded content before final export', async () => {
+ const markdown = await readFile(fixturePath, 'utf8');
+ const sourceSessionId = sid('markdown-styled-source');
+ const styledSessionId = sid('markdown-styled');
+ const verifySessionId = sid('markdown-styled-verify');
+ const markdownSeedDoc = outPath('nda-markdown-seeded.docx');
+ const runStyledDoc = outPath('nda-markdown-styled-run.docx');
+ const styledTemplateDoc = outPath('nda-markdown-styled-template.docx');
+ const exportedDoc = outPath('nda-markdown-on-styled-template-export.docx');
+
+ await client.doc.open({
+ sessionId: sourceSessionId,
+ contentOverride: markdown,
+ overrideType: 'markdown',
+ });
+
+ const before = await collectStructure(sourceSessionId);
+ expect(before.headingsTotal).toBeGreaterThanOrEqual(20);
+ expect(before.listsTotal).toBeGreaterThanOrEqual(50);
+ expect(before.endOfAgreementMatches).toBe(1);
+
+ await client.doc.save({
+ sessionId: sourceSessionId,
+ out: markdownSeedDoc,
+ });
+
+ await client.doc.close({
+ sessionId: sourceSessionId,
+ discard: true,
+ });
+
+ const runPatch = {
+ bold: true,
+ italic: true,
+ fontSize: 30,
+ letterSpacing: 24,
+ color: { val: 'C00000' },
+ fontFamily: { ascii: 'Courier New', hAnsi: 'Courier New' },
+ };
+
+ const runReceipt = await applyStylesPatch(markdownSeedDoc, 'run', runPatch, runStyledDoc);
+ expect(runReceipt.success).toBe(true);
+ expect(runReceipt.changed).toBe(true);
+ expect(runReceipt.after.bold).toBe('on');
+ expect(runReceipt.after.italic).toBe('on');
+ expect(runReceipt.after.fontSize).toBe(30);
+ expect(runReceipt.after.letterSpacing).toBe(24);
+ expect(runReceipt.after.color).toEqual({ val: 'C00000' });
+
+ const paragraphPatch = {
+ justification: 'justify',
+ spacing: { before: 240, after: 240, line: 420, lineRule: 'auto' },
+ indent: { left: 720, firstLine: 360 },
+ };
+
+ const paragraphReceipt = await applyStylesPatch(runStyledDoc, 'paragraph', paragraphPatch, styledTemplateDoc);
+ expect(paragraphReceipt.success).toBe(true);
+ expect(paragraphReceipt.changed).toBe(true);
+ expect(paragraphReceipt.after.justification).toBe('justify');
+ expect(paragraphReceipt.after.spacing).toEqual(paragraphPatch.spacing);
+ expect(paragraphReceipt.after.indent).toEqual(paragraphPatch.indent);
+
+ await client.doc.open({
+ doc: styledTemplateDoc,
+ sessionId: styledSessionId,
+ });
+
+ await client.doc.save({
+ sessionId: styledSessionId,
+ out: exportedDoc,
+ });
+
+ await client.doc.close({
+ sessionId: styledSessionId,
+ discard: true,
+ });
+
+ await client.doc.open({
+ doc: exportedDoc,
+ sessionId: verifySessionId,
+ });
+
+ const after = await collectStructure(verifySessionId);
+ expect(after).toEqual(before);
+ });
+});
diff --git a/tests/doc-api-stories/tests/markdown/multi-page-nda-test-document.md b/tests/doc-api-stories/tests/markdown/multi-page-nda-test-document.md
new file mode 100644
index 0000000000..e149e32f40
--- /dev/null
+++ b/tests/doc-api-stories/tests/markdown/multi-page-nda-test-document.md
@@ -0,0 +1,220 @@
+# NON-DISCLOSURE AGREEMENT (NDA)
+
+**Effective Date:** {{Effective Date}}\
+**Disclosing Party:** {{Disclosing Party Name}}\
+**Receiving Party:** {{Receiving Party Name}}
+
+------------------------------------------------------------------------
+
+## TABLE OF CONTENTS
+
+1. [Purpose](#purpose)
+2. [Definition of Confidential
+ Information](#definition-of-confidential-information)
+3. [Obligations of Receiving Party](#obligations-of-receiving-party)
+4. [Exclusions](#exclusions)
+5. [Term and Termination](#term-and-termination)
+6. [Remedies](#remedies)
+7. [Miscellaneous](#miscellaneous)
+8. [Signatures](#signatures)
+9. [Appendix A -- Data Classification
+ Table](#appendix-a--data-classification-table)
+10. [Appendix B -- Security Controls
+ Checklist](#appendix-b--security-controls-checklist)
+
+------------------------------------------------------------------------
+
+## Purpose
+
+This Non-Disclosure Agreement ("Agreement") is entered into as of the
+**Effective Date** by and between the parties listed above.
+
+The purpose of this Agreement is to:
+
+- Protect confidential and proprietary information.
+- Establish permitted uses of disclosed information.
+- Define responsibilities and limitations.
+- Enable evaluation of a potential business relationship.
+
+For more information about NDAs, see:\
+- [Wikipedia -- Non-disclosure
+agreement](https://en.wikipedia.org/wiki/Non-disclosure_agreement)\
+- [Cornell Law -- NDA
+Overview](https://www.law.cornell.edu/wex/non-disclosure_agreement)
+
+------------------------------------------------------------------------
+
+## Definition of Confidential Information
+
+"Confidential Information" includes, but is not limited to:
+
+1. Technical data
+2. Trade secrets
+3. Product designs
+4. Financial information
+5. Customer lists
+6. Business strategies
+7. Source code and documentation
+
+### Examples
+
+> Confidential Information may be disclosed in written, oral,
+> electronic, or other tangible form.
+
+``` text
+Example: Internal API keys, architecture diagrams, pricing models.
+```
+
+------------------------------------------------------------------------
+
+## Obligations of Receiving Party
+
+The Receiving Party agrees to:
+
+- Maintain strict confidentiality.
+- Use the information solely for evaluation purposes.
+- Limit disclosure to authorized representatives.
+- Implement reasonable security measures.
+
+### Required Security Measures
+
+- Encryption at rest and in transit
+- Multi-factor authentication (MFA)
+- Access control based on least privilege
+- Secure backup procedures
+
+------------------------------------------------------------------------
+
+## Exclusions
+
+Confidential Information does **NOT** include information that:
+
+- Is or becomes publicly available.
+- Was already known prior to disclosure.
+- Is independently developed.
+- Is disclosed by court order (with prompt notice).
+
+------------------------------------------------------------------------
+
+## Term and Termination
+
+ Clause Description Duration
+ ---------------------- --------------------------------- -----------
+ Confidentiality Term Protection of confidential info 5 years
+ Evaluation Period Business evaluation timeline 12 months
+ Survival Clause Survives termination Yes
+
+This Agreement remains in effect until terminated in writing by either
+party.
+
+------------------------------------------------------------------------
+
+## Remedies
+
+The Receiving Party acknowledges that:
+
+- Unauthorized disclosure may cause irreparable harm.
+- Monetary damages may be insufficient.
+- Injunctive relief may be appropriate.
+
+------------------------------------------------------------------------
+
+## Miscellaneous
+
+### Governing Law
+
+This Agreement shall be governed by the laws of:
+
+- [ ] California\
+- [ ] New York\
+- [ ] Texas\
+- [ ] Other: {{Specify}}
+
+### Notices
+
+All notices shall be delivered via:
+
+1. Certified mail\
+2. Email with confirmation\
+3. Recognized courier service
+
+------------------------------------------------------------------------
+
+## Appendix A -- Data Classification Table
+
+ -------------------------------------------------------------------------
+ Classification Description Example Required Controls
+ ------------------- --------------- ------------ ------------------------
+ Public No restrictions Press None
+ release
+
+ Internal Limited Internal Access controls
+ distribution memo
+
+ Confidential Sensitive Financial Encryption + MFA
+ business data reports
+
+ Restricted Highly Source code Strict access + logging
+ sensitive
+ -------------------------------------------------------------------------
+
+------------------------------------------------------------------------
+
+## Appendix B -- Security Controls Checklist
+
+### Administrative Controls
+
+- [x] NDA signed
+- [ ] Background checks completed
+- [ ] Security training conducted
+
+### Technical Controls
+
+- [x] Encrypted storage
+- [x] Firewall enabled
+- [ ] Intrusion detection system
+
+### Physical Controls
+
+- [ ] Secure office access
+- [ ] Locked filing cabinets
+- [ ] Visitor sign-in logs
+
+------------------------------------------------------------------------
+
+## Signatures
+
+ Disclosing Party Receiving Party
+ ----------------------------- -----------------------------
+ Name: \_\_\_\_\_\_\_\_\_\_ Name: \_\_\_\_\_\_\_\_\_\_
+ Title: \_\_\_\_\_\_\_\_\_\_ Title: \_\_\_\_\_\_\_\_\_\_
+ Signature: \_\_\_\_\_\_ Signature: \_\_\_\_\_\_
+ Date: \_\_\_\_\_\_\_\_\_\_ Date: \_\_\_\_\_\_\_\_\_\_
+
+------------------------------------------------------------------------
+
+## Additional Formatting Examples
+
+### Nested Lists
+
+- Level 1
+ - Level 2
+ - Level 3
+
+### Horizontal Rule
+
+------------------------------------------------------------------------
+
+### Emphasis Examples
+
+- *Italic text*
+- **Bold text**
+- ~~Strikethrough~~
+
+### Inline Code
+
+Use `CONFIDENTIAL` header in all documents.
+
+------------------------------------------------------------------------
+
+**END OF AGREEMENT**
diff --git a/tests/doc-api-stories/tests/styles/doc-defaults.ts b/tests/doc-api-stories/tests/styles/doc-defaults.ts
index 8bb41e1545..59aa4567e5 100644
--- a/tests/doc-api-stories/tests/styles/doc-defaults.ts
+++ b/tests/doc-api-stories/tests/styles/doc-defaults.ts
@@ -20,7 +20,7 @@ describe('document-api story: styles.apply docDefaults', () => {
async function seedBlankDoc(sessionId: string, text: string, docName: string): Promise {
await client.doc.open({ sessionId });
- const insertResult = unwrap(await client.doc.insert({ sessionId, text }));
+ const insertResult = unwrap(await client.doc.insert({ sessionId, value: text }));
expect(insertResult.receipt?.success).toBe(true);
const sourceDoc = outPath(docName);
await client.doc.save({ sessionId, out: sourceDoc });
@@ -33,7 +33,7 @@ describe('document-api story: styles.apply docDefaults', () => {
}
await client.doc.open({ sessionId });
- const firstInsert = unwrap(await client.doc.insert({ sessionId, text: paragraphs[0] }));
+ const firstInsert = unwrap(await client.doc.insert({ sessionId, value: paragraphs[0] }));
expect(firstInsert.receipt?.success).toBe(true);
for (const paragraphText of paragraphs.slice(1)) {
diff --git a/tests/doc-api-stories/tests/tables/all-commands.ts b/tests/doc-api-stories/tests/tables/all-commands.ts
index 159740072f..31920ba34a 100644
--- a/tests/doc-api-stories/tests/tables/all-commands.ts
+++ b/tests/doc-api-stories/tests/tables/all-commands.ts
@@ -206,7 +206,7 @@ describe('document-api story: all table commands', () => {
operationId: 'tables.convertFromText',
setup: 'blank',
prepare: async (sessionId) => {
- await api.doc.insert({ sessionId, text: 'A\tB\tC' });
+ await api.doc.insert({ sessionId, value: 'A\tB\tC' });
},
run: async (sessionId) => {
const paragraphNodeId = await firstNodeId(sessionId, 'paragraph');
@@ -233,7 +233,7 @@ describe('document-api story: all table commands', () => {
prepare: async (sessionId) => {
await api.doc.insert({
sessionId,
- text: 'Alpha\tBeta\tGamma',
+ value: 'Alpha\tBeta\tGamma',
});
const secondParagraphResult = unwrap(
@@ -345,7 +345,7 @@ describe('document-api story: all table commands', () => {
prepare: async (sessionId) => {
await api.doc.insert({
sessionId,
- text: 'Alpha\tBeta\tGamma',
+ value: 'Alpha\tBeta\tGamma',
});
const secondParagraphResult = unwrap(
@@ -567,7 +567,7 @@ describe('document-api story: all table commands', () => {
operationId: 'tables.deleteCell',
setup: 'blank',
prepare: async (sessionId) => {
- await api.doc.insert({ sessionId, text: 'A1\tB1\tC1' });
+ await api.doc.insert({ sessionId, value: 'A1\tB1\tC1' });
for (const rowText of ['A2\tB2\tC2', 'A3\tB3\tC3']) {
const createRowResult = unwrap(
From 37436a7587709ba071477c48cd8c6a32ca7943eb Mon Sep 17 00:00:00 2001
From: Nick Bernal
Date: Thu, 26 Feb 2026 15:26:17 -0800
Subject: [PATCH 5/5] fix(markdown): skip indented code blocks in fixed-width
table normalization
---
...malizeFixedWidthTables.integration.test.ts | 9 +++++
.../normalizeFixedWidthTables.test.ts | 34 +++++++++++++++++++
.../markdown/normalizeFixedWidthTables.ts | 30 ++++++++++++++--
3 files changed, 70 insertions(+), 3 deletions(-)
diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts
index d4b978621b..65a5a316ea 100644
--- a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts
+++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.integration.test.ts
@@ -68,6 +68,15 @@ describe('normalizer → remark-gfm AST integration', () => {
expect(rowTexts(tables[0].children[1])).toEqual(['Term', 'Protection']);
});
+ it('does not produce tables from 4-space indented code blocks', () => {
+ const raw = [' Clause Description', ' ---------- -----------', ' Term Protection'].join('\n');
+
+ const normalized = normalizeFixedWidthTables(raw);
+ const ast = parseMarkdownToAst(normalized);
+ expect(findTables(ast)).toHaveLength(0);
+ expect(normalized).toBe(raw);
+ });
+
it('Section 5 table (no borders): 3 columns, 3 data rows', () => {
const raw = [
' Clause Description Duration',
diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts
index 50318ae4c5..4ab2447a61 100644
--- a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts
+++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.test.ts
@@ -281,6 +281,40 @@ describe('normalizeFixedWidthTables', () => {
});
});
+ describe('indented code blocks', () => {
+ it('does not transform tables inside 4-space indented code blocks', () => {
+ const input = [' Name Age', ' ------ ---', ' Alice 30'].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+
+ expect(extractPipeTable(output)).toHaveLength(0);
+ expect(output).toBe(input);
+ });
+
+ it('transforms tables before and after 4-space indented code blocks', () => {
+ const input = [
+ 'A B',
+ '----- -----',
+ '1 2',
+ '',
+ ' C D',
+ ' ----- -----',
+ ' 3 4',
+ '',
+ 'E F',
+ '----- -----',
+ '5 6',
+ ].join('\n');
+
+ const output = normalizeFixedWidthTables(input);
+ const tables = extractPipeTable(output);
+
+ expect(tables.filter((l) => l.startsWith('| A'))).toHaveLength(1);
+ expect(tables.filter((l) => l.startsWith('| E'))).toHaveLength(1);
+ expect(output).toContain(' C D');
+ });
+ });
+
// ---------------------------------------------------------------------------
// Regression: indent mismatch must not corrupt cell text (Bug #1)
// ---------------------------------------------------------------------------
diff --git a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts
index 8851108c83..9fd8890a8e 100644
--- a/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts
+++ b/packages/super-editor/src/core/helpers/markdown/normalizeFixedWidthTables.ts
@@ -25,7 +25,7 @@
* In bordered tables, blank lines between data rows are treated as row separators.
* In unbounded tables, a blank line terminates the table.
*
- * Fenced code blocks are skipped entirely.
+ * Code blocks are skipped entirely (fenced and 4-space/tab-indented).
*/
// ---------------------------------------------------------------------------
@@ -36,7 +36,7 @@
* Detect pandoc-style fixed-width ASCII tables in a markdown string and
* rewrite them as GFM pipe tables that remark-gfm can parse.
*
- * Fenced code blocks are skipped. Bordered (top/bottom border) and
+ * Code blocks are skipped (fenced and 4-space/tab-indented). Bordered (top/bottom border) and
* unbounded (header + guide only) layouts are both supported, including
* continuation lines that wrap across multiple rows.
*
@@ -57,6 +57,13 @@ export function normalizeFixedWidthTables(markdown: string): string {
continue;
}
+ if (isIndentedCodeOpener(lines[i])) {
+ const closeIdx = findIndentedCodeClose(lines, i);
+ for (let j = i; j <= closeIdx; j++) output.push(lines[j]);
+ i = closeIdx + 1;
+ continue;
+ }
+
const table = tryParseTableAt(lines, i);
if (table) {
output.push(...toGfmPipeTable(table));
@@ -107,15 +114,20 @@ interface TableAnchors {
}
// ---------------------------------------------------------------------------
-// Fenced code block handling
+// Code block handling
// ---------------------------------------------------------------------------
const FENCE_OPEN_RE = /^( {0,3})(`{3,}|~{3,})/;
+const INDENTED_CODE_RE = /^(?: {4,}|\t)/;
function isFenceOpener(line: string): boolean {
return FENCE_OPEN_RE.test(line);
}
+function isIndentedCodeOpener(line: string): boolean {
+ return INDENTED_CODE_RE.test(line);
+}
+
function findFenceClose(lines: string[], openIdx: number): number {
const match = lines[openIdx].match(FENCE_OPEN_RE);
if (!match) return openIdx;
@@ -130,6 +142,14 @@ function findFenceClose(lines: string[], openIdx: number): number {
return lines.length - 1; // unclosed fence: consume to end
}
+function findIndentedCodeClose(lines: string[], openIdx: number): number {
+ for (let i = openIdx + 1; i < lines.length; i++) {
+ if (isBlank(lines[i])) continue;
+ if (!isIndentedCodeOpener(lines[i])) return i - 1;
+ }
+ return lines.length - 1;
+}
+
// ---------------------------------------------------------------------------
// Line classification
// ---------------------------------------------------------------------------
@@ -362,6 +382,10 @@ function findBottomBorder(lines: string[], fromIdx: number): number | undefined
i = findFenceClose(lines, i);
continue;
}
+ if (isIndentedCodeOpener(lines[i])) {
+ i = findIndentedCodeClose(lines, i);
+ continue;
+ }
if (isSolidBorder(lines[i])) return i;
// A markdown heading means we've left the table's section.
if (/^#{1,6}\s/.test(lines[i].trimStart())) return undefined;