From 95b10c67261c7707be73baaea00178dfab026d16 Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Tue, 3 Mar 2026 16:32:26 -0800 Subject: [PATCH] feat(document-api): add getHtml operation, fix HTML insert and SDK tool selection --- apps/cli/package.json | 1 + apps/cli/scripts/export-sdk-contract.ts | 1 + apps/cli/src/__tests__/cli.test.ts | 41 +++++++-- .../src/__tests__/conformance/scenarios.ts | 5 ++ apps/cli/src/cli/operation-hints.ts | 4 + apps/cli/src/lib/document.ts | 37 +++++--- apps/cli/src/lib/dom-environment.test.ts | 62 +++++++++++++ apps/cli/src/lib/dom-environment.ts | 55 ++++++++++++ .../document-api/available-operations.mdx | 3 +- .../reference/_generated-manifest.json | 4 +- .../reference/capabilities/get.mdx | 49 ++++++++++ .../document-api/reference/core/index.mdx | 1 + apps/docs/document-api/reference/get-html.mdx | 81 +++++++++++++++++ apps/docs/document-api/reference/index.mdx | 3 +- apps/docs/document-engine/sdks.mdx | 1 + .../src/contract/operation-definitions.ts | 9 ++ .../src/contract/operation-registry.ts | 2 + packages/document-api/src/contract/schemas.ts | 6 ++ .../src/get-html/get-html.test.ts | 26 ++++++ .../document-api/src/get-html/get-html.ts | 28 ++++++ packages/document-api/src/index.ts | 10 +++ packages/document-api/src/invoke/invoke.ts | 1 + packages/sdk/codegen/src/generate-node.mjs | 26 ++++++ packages/sdk/codegen/src/generate-python.mjs | 1 + packages/sdk/langs/node/src/tools.ts | 18 +++- .../sdk/langs/python/superdoc/tools_api.py | 23 +++-- .../python/tests/test_client_method_naming.py | 16 ++++ .../assemble-adapters.ts | 4 + .../document-api-adapters/get-html-adapter.ts | 21 +++++ .../insert-structured-wrapper.test.ts | 23 ++--- .../plan-engine/plan-wrappers.ts | 90 +++++++++---------- pnpm-lock.yaml | 3 + 32 files changed, 560 insertions(+), 95 deletions(-) create mode 100644 apps/cli/src/lib/dom-environment.test.ts create mode 100644 apps/cli/src/lib/dom-environment.ts create mode 100644 apps/docs/document-api/reference/get-html.mdx create mode 100644 packages/document-api/src/get-html/get-html.test.ts create mode 100644 packages/document-api/src/get-html/get-html.ts create mode 100644 packages/super-editor/src/document-api-adapters/get-html-adapter.ts diff --git a/apps/cli/package.json b/apps/cli/package.json index ebe12fbc87..8c720b5535 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -32,6 +32,7 @@ "dependencies": { "@hocuspocus/provider": "catalog:", "fast-glob": "catalog:", + "happy-dom": "catalog:", "y-websocket": "catalog:", "yjs": "catalog:" }, diff --git a/apps/cli/scripts/export-sdk-contract.ts b/apps/cli/scripts/export-sdk-contract.ts index cae5fd1e75..62cb23faa1 100644 --- a/apps/cli/scripts/export-sdk-contract.ts +++ b/apps/cli/scripts/export-sdk-contract.ts @@ -54,6 +54,7 @@ const INTENT_NAMES = { 'doc.getNodeById': 'get_node_by_id', 'doc.getText': 'get_document_text', 'doc.getMarkdown': 'get_document_markdown', + 'doc.getHtml': 'get_document_html', 'doc.info': 'get_document_info', 'doc.capabilities.get': 'get_capabilities', 'doc.insert': 'insert_content', diff --git a/apps/cli/src/__tests__/cli.test.ts b/apps/cli/src/__tests__/cli.test.ts index 3f827843a1..4b8632c0a2 100644 --- a/apps/cli/src/__tests__/cli.test.ts +++ b/apps/cli/src/__tests__/cli.test.ts @@ -1031,6 +1031,32 @@ describe('superdoc CLI', () => { expect(envelope.error.message).toContain('Unknown field'); }); + test('insert with --type html inserts HTML content into the document', async () => { + const insertSource = join(TEST_DIR, 'insert-html-source.docx'); + const insertOut = join(TEST_DIR, 'insert-html-out.docx'); + await copyFile(SAMPLE_DOC, insertSource); + + const insertResult = await runCli([ + 'insert', + insertSource, + '--value', + '

CLI_HTML_INSERT_TOKEN

', + '--type', + 'html', + '--out', + insertOut, + ]); + + expect(insertResult.code).toBe(0); + const insertEnvelope = parseJsonOutput>(insertResult); + expect(insertEnvelope.data.receipt.success).toBe(true); + + const verifyResult = await runCli(['find', insertOut, '--type', 'text', '--pattern', 'CLI_HTML_INSERT_TOKEN']); + expect(verifyResult.code).toBe(0); + const verifyEnvelope = parseJsonOutput>(verifyResult); + expect(verifyEnvelope.data.result.total).toBeGreaterThan(0); + }); + test('create paragraph writes output and adds a new paragraph with seed text', async () => { const createSource = join(TEST_DIR, 'create-paragraph-source.docx'); const createOut = join(TEST_DIR, 'create-paragraph-out.docx'); @@ -2123,7 +2149,7 @@ describe('superdoc CLI', () => { expect(closeResult.code).toBe(0); }); - test('open with --override-type html rejects in headless CLI', async () => { + test('open with --override-type html succeeds (happy-dom provides DOM)', async () => { const openResult = await runCli([ 'open', SAMPLE_DOC, @@ -2132,10 +2158,15 @@ describe('superdoc CLI', () => { '--override-type', 'html', ]); - expect(openResult.code).toBe(1); - const envelope = parseJsonOutput(openResult); - expect(envelope.error.code).toBe('UNSUPPORTED_FORMAT'); - expect(envelope.error.message).toContain('HTML'); + expect(openResult.code).toBe(0); + + const textResult = await runCli(['get-text']); + expect(textResult.code).toBe(0); + const textEnvelope = parseJsonOutput<{ data: { text: string } }>(textResult); + expect(textEnvelope.data.text).toContain('HTML Override'); + + const closeResult = await runCli(['close', '--discard']); + expect(closeResult.code).toBe(0); }); test('open with --content-override empty string is accepted (not silently ignored)', async () => { diff --git a/apps/cli/src/__tests__/conformance/scenarios.ts b/apps/cli/src/__tests__/conformance/scenarios.ts index 75bc88aab8..6fecef8c12 100644 --- a/apps/cli/src/__tests__/conformance/scenarios.ts +++ b/apps/cli/src/__tests__/conformance/scenarios.ts @@ -703,6 +703,11 @@ export const SUCCESS_SCENARIOS = { const docPath = await harness.copyFixtureDoc('doc-get-text'); return { stateDir, args: ['get-markdown', docPath] }; }, + 'doc.getHtml': async (harness: ConformanceHarness): Promise => { + const stateDir = await harness.createStateDir('doc-get-html-success'); + const docPath = await harness.copyFixtureDoc('doc-get-text'); + return { stateDir, args: ['get-html', docPath] }; + }, 'doc.query.match': async (harness: ConformanceHarness): Promise => { const stateDir = await harness.createStateDir('doc-query-match-success'); const docPath = await harness.copyFixtureDoc('doc-query-match'); diff --git a/apps/cli/src/cli/operation-hints.ts b/apps/cli/src/cli/operation-hints.ts index 558e332f5c..eae120f593 100644 --- a/apps/cli/src/cli/operation-hints.ts +++ b/apps/cli/src/cli/operation-hints.ts @@ -76,6 +76,7 @@ export const SUCCESS_VERB: Record = { getNodeById: 'resolved node', getText: 'extracted text', getMarkdown: 'extracted markdown', + getHtml: 'extracted html', info: 'retrieved info', insert: 'inserted text', replace: 'replaced text', @@ -210,6 +211,7 @@ export const OUTPUT_FORMAT: Record = { getNodeById: 'nodeInfo', getText: 'plain', getMarkdown: 'plain', + getHtml: 'plain', info: 'documentInfo', insert: 'mutationReceipt', replace: 'mutationReceipt', @@ -328,6 +330,7 @@ export const RESPONSE_ENVELOPE_KEY: Record getNodeById: 'node', getText: 'text', getMarkdown: 'markdown', + getHtml: 'html', info: null, insert: null, replace: null, @@ -474,6 +477,7 @@ export const OPERATION_FAMILY: Record = getNodeById: 'query', getText: 'query', getMarkdown: 'query', + getHtml: 'query', info: 'general', insert: 'textMutation', replace: 'textMutation', diff --git a/apps/cli/src/lib/document.ts b/apps/cli/src/lib/document.ts index f2d65d187a..52295f8cfa 100644 --- a/apps/cli/src/lib/document.ts +++ b/apps/cli/src/lib/document.ts @@ -6,6 +6,7 @@ import { getDocumentApiAdapters } from '@superdoc/super-editor/document-api-adap import { markdownToPmDoc } from '@superdoc/super-editor/markdown'; import { createDocumentApi, type DocumentApi } from '@superdoc/document-api'; +import { createCliDomEnvironment } from './dom-environment'; import type { CollaborationProfile } from './collaboration'; import { createCollaborationRuntime } from './collaboration'; import { @@ -35,12 +36,22 @@ export interface OpenedDocument { dispose(): void; } +/** Content override options extracted before calling Editor.open(). */ +interface ContentOverrideOptions { + markdown?: string; + html?: string; + plainText?: string; +} + +/** Options passed through to Editor.open() alongside content overrides. */ +type EditorPassThroughOptions = Record; + interface OpenDocumentOptions { documentId?: string; ydoc?: unknown; collaborationProvider?: unknown; /** Options passed through to Editor.open() (e.g., markdown/html/plainText for content override). */ - editorOpenOptions?: Record; + editorOpenOptions?: ContentOverrideOptions & EditorPassThroughOptions; /** When set, overrides Editor's auto-detected isNewFile flag. */ isNewFile?: boolean; /** Optional user identity for attribution (comments, tracked changes, collaboration presence). */ @@ -117,11 +128,9 @@ export async function openDocument( } // Separate content overrides from options passed to Editor.open(). - // The Editor's built-in markdown/html init paths (in the dist bundle) route - // through an HTML-based pipeline that requires DOM. In headless CLI mode - // there is no DOM, so we intercept them here: - // - markdown: applied post-init via the AST-based markdownToPmDoc pipeline (DOM-free) - // - html: rejected with a clear error (no DOM-free HTML pipeline exists) + // Markdown and plainText are applied post-init (DOM-free AST pipelines). + // HTML passes through to Editor.open() directly — the CLI-provided happy-dom + // document enables the Editor's built-in HTML init path. const { markdown: markdownOverride, html: htmlOverride, @@ -129,18 +138,16 @@ export async function openDocument( ...passThroughEditorOpts } = options.editorOpenOptions ?? {}; - if (htmlOverride != null) { - throw new CliError( - 'UNSUPPORTED_FORMAT', - 'HTML content override is not supported in headless CLI mode (requires DOM). Use --override-type markdown instead.', - ); - } + // Create a DOM environment for headless HTML support (getHtml, insert HTML, + // HTML content override). Always inject via options.document — never set globals. + const domEnv = createCliDomEnvironment(); let editor: Editor; try { const isTest = process.env.NODE_ENV === 'test'; editor = await Editor.open(Buffer.from(source), { documentId: options.documentId ?? meta.path ?? 'blank.docx', + document: domEnv.document, user: options.user ? { name: options.user.name, email: options.user.email, image: null } : { id: 'cli', name: 'CLI' }, @@ -148,9 +155,12 @@ export async function openDocument( ydoc: options.ydoc, ...(options.collaborationProvider != null ? { collaborationProvider: options.collaborationProvider } : {}), ...(options.isNewFile != null ? { isNewFile: options.isNewFile } : {}), + // Pass through HTML override directly — happy-dom provides DOM support. + ...(htmlOverride != null ? { html: htmlOverride } : {}), ...passThroughEditorOpts, }); } catch (error) { + domEnv.dispose(); const message = error instanceof Error ? error.message : String(error); throw new CliError('DOCUMENT_OPEN_FAILED', 'Failed to open document.', { message, @@ -170,6 +180,7 @@ export async function openDocument( editor.dispatch(tr); } catch (error) { editor.destroy(); + domEnv.dispose(); const message = error instanceof Error ? error.message : String(error); throw new CliError('DOCUMENT_OPEN_FAILED', 'Failed to apply content override.', { message, @@ -189,6 +200,7 @@ export async function openDocument( editor.dispatch(tr); } catch (error) { editor.destroy(); + domEnv.dispose(); const message = error instanceof Error ? error.message : String(error); throw new CliError('DOCUMENT_OPEN_FAILED', 'Failed to apply text content override.', { message, @@ -207,6 +219,7 @@ export async function openDocument( meta, dispose() { editor.destroy(); + domEnv.dispose(); }, }; } diff --git a/apps/cli/src/lib/dom-environment.test.ts b/apps/cli/src/lib/dom-environment.test.ts new file mode 100644 index 0000000000..9961cca141 --- /dev/null +++ b/apps/cli/src/lib/dom-environment.test.ts @@ -0,0 +1,62 @@ +import { describe, it, expect } from 'bun:test'; +import { createCliDomEnvironment } from './dom-environment'; + +describe('createCliDomEnvironment', () => { + it('returns a document that supports createElement', () => { + const env = createCliDomEnvironment(); + try { + const div = env.document.createElement('div'); + expect(div.tagName).toBe('DIV'); + } finally { + env.dispose(); + } + }); + + it('supports innerHTML round-trip', () => { + const env = createCliDomEnvironment(); + try { + const div = env.document.createElement('div'); + div.innerHTML = '

hello world

'; + expect(div.innerHTML).toContain('

'); + expect(div.innerHTML).toContain('world'); + } finally { + env.dispose(); + } + }); + + it('exposes DOMParser via document.defaultView', () => { + const env = createCliDomEnvironment(); + try { + const DOMParser = env.document.defaultView?.DOMParser; + expect(DOMParser).toBeDefined(); + + const parser = new DOMParser!(); + const parsed = parser.parseFromString('

test

', 'text/html'); + expect(parsed.body.innerHTML).toContain('test'); + } finally { + env.dispose(); + } + }); + + it('supports element.dataset access', () => { + const env = createCliDomEnvironment(); + try { + const div = env.document.createElement('div'); + div.dataset.testKey = 'value'; + expect(div.dataset.testKey).toBe('value'); + } finally { + env.dispose(); + } + }); + + it('dispose does not throw', () => { + const env = createCliDomEnvironment(); + expect(() => env.dispose()).not.toThrow(); + }); + + it('dispose can be called multiple times safely', () => { + const env = createCliDomEnvironment(); + env.dispose(); + expect(() => env.dispose()).not.toThrow(); + }); +}); diff --git a/apps/cli/src/lib/dom-environment.ts b/apps/cli/src/lib/dom-environment.ts new file mode 100644 index 0000000000..488f1ad247 --- /dev/null +++ b/apps/cli/src/lib/dom-environment.ts @@ -0,0 +1,55 @@ +/** + * CLI DOM environment backed by happy-dom. + * + * Provides a minimal `Document` instance for headless Editor sessions that + * need DOM APIs (HTML import/export, content override, structured insert). + * + * ## Lifecycle + * + * ```ts + * const env = createCliDomEnvironment(); + * const editor = await Editor.open(source, { document: env.document }); + * // ... use editor ... + * env.dispose(); + * ``` + * + * ## DOM injection strategy + * + * Always pass `env.document` via `EditorOptions.document`. This bypasses the + * memoized `canUseDOM()` check in super-editor — no globals are set on + * `globalThis`, so the CLI stays free of side-effects. + * + * ## Known edge: `globalThis.Element` instanceof + * + * `createDocFromHTML` checks `parsedContent instanceof globalThis.Element`. + * Because we inject DOM via `options.document` (not via globals), the happy-dom + * `Element` class may differ from `globalThis.Element`. In current defaults + * this only affects unsupported-content detection, not core HTML parsing. + */ + +import { Window } from 'happy-dom'; + +export interface CliDomEnvironment { + /** The happy-dom `Document` to pass as `EditorOptions.document`. */ + document: Document; + /** Release the happy-dom window and all associated resources. */ + dispose(): void; +} + +/** + * Create an isolated DOM environment for a single CLI document session. + * + * Each call creates a fresh `Window` — callers must call `dispose()` when + * the session is complete to avoid memory leaks in long-lived host processes. + */ +export function createCliDomEnvironment(): CliDomEnvironment { + const window = new Window(); + + return { + document: window.document as unknown as Document, + dispose() { + window.happyDOM.abort(); + window.close(); + }, + }; +} diff --git a/apps/docs/document-api/available-operations.mdx b/apps/docs/document-api/available-operations.mdx index 624eb7f20f..4c9ba72cc0 100644 --- a/apps/docs/document-api/available-operations.mdx +++ b/apps/docs/document-api/available-operations.mdx @@ -17,7 +17,7 @@ Use the tables below to see what operations are available and where each one is | Blocks | 1 | 0 | 1 | [Reference](/document-api/reference/blocks/index) | | Capabilities | 1 | 0 | 1 | [Reference](/document-api/reference/capabilities/index) | | Comments | 5 | 0 | 5 | [Reference](/document-api/reference/comments/index) | -| Core | 9 | 0 | 9 | [Reference](/document-api/reference/core/index) | +| Core | 10 | 0 | 10 | [Reference](/document-api/reference/core/index) | | Create | 5 | 0 | 5 | [Reference](/document-api/reference/create/index) | | Format | 44 | 1 | 45 | [Reference](/document-api/reference/format/index) | | History | 3 | 0 | 3 | [Reference](/document-api/reference/history/index) | @@ -46,6 +46,7 @@ Use the tables below to see what operations are available and where each one is | editor.doc.getNodeById(...) | [`getNodeById`](/document-api/reference/get-node-by-id) | | editor.doc.getText(...) | [`getText`](/document-api/reference/get-text) | | editor.doc.getMarkdown(...) | [`getMarkdown`](/document-api/reference/get-markdown) | +| editor.doc.getHtml(...) | [`getHtml`](/document-api/reference/get-html) | | editor.doc.info(...) | [`info`](/document-api/reference/info) | | editor.doc.insert(...) | [`insert`](/document-api/reference/insert) | | editor.doc.replace(...) | [`replace`](/document-api/reference/replace) | diff --git a/apps/docs/document-api/reference/_generated-manifest.json b/apps/docs/document-api/reference/_generated-manifest.json index f7294acd4a..0f7ce05945 100644 --- a/apps/docs/document-api/reference/_generated-manifest.json +++ b/apps/docs/document-api/reference/_generated-manifest.json @@ -83,6 +83,7 @@ "apps/docs/document-api/reference/format/vanish.mdx", "apps/docs/document-api/reference/format/vert-align.mdx", "apps/docs/document-api/reference/format/web-hidden.mdx", + "apps/docs/document-api/reference/get-html.mdx", "apps/docs/document-api/reference/get-markdown.mdx", "apps/docs/document-api/reference/get-node-by-id.mdx", "apps/docs/document-api/reference/get-node.mdx", @@ -212,6 +213,7 @@ "getNodeById", "getText", "getMarkdown", + "getHtml", "info", "insert", "replace", @@ -494,5 +496,5 @@ } ], "marker": "{/* GENERATED FILE: DO NOT EDIT. Regenerate via `pnpm run docapi:sync`. */}", - "sourceHash": "f1dd7d6cb56f926499a024e1bdd02a3540e666cbbd102f025e7edc8ff85b83ac" + "sourceHash": "d63e4c31b2ecb4768b8d7c22f4fca6ec66da0d674ff05b7663fa95db8791754b" } diff --git a/apps/docs/document-api/reference/capabilities/get.mdx b/apps/docs/document-api/reference/capabilities/get.mdx index a00bcb8a2b..18f347e6cc 100644 --- a/apps/docs/document-api/reference/capabilities/get.mdx +++ b/apps/docs/document-api/reference/capabilities/get.mdx @@ -647,6 +647,11 @@ _No fields._ | `operations.format.webHidden.dryRun` | boolean | yes | | | `operations.format.webHidden.reasons` | enum[] | no | | | `operations.format.webHidden.tracked` | boolean | yes | | +| `operations.getHtml` | object | yes | | +| `operations.getHtml.available` | boolean | yes | | +| `operations.getHtml.dryRun` | boolean | yes | | +| `operations.getHtml.reasons` | enum[] | no | | +| `operations.getHtml.tracked` | boolean | yes | | | `operations.getMarkdown` | object | yes | | | `operations.getMarkdown.available` | boolean | yes | | | `operations.getMarkdown.dryRun` | boolean | yes | | @@ -2083,6 +2088,14 @@ _No fields._ ], "tracked": true }, + "getHtml": { + "available": true, + "dryRun": true, + "reasons": [ + "COMMAND_UNAVAILABLE" + ], + "tracked": true + }, "getMarkdown": { "available": true, "dryRun": true, @@ -7229,6 +7242,41 @@ _No fields._ ], "type": "object" }, + "getHtml": { + "additionalProperties": false, + "properties": { + "available": { + "type": "boolean" + }, + "dryRun": { + "type": "boolean" + }, + "reasons": { + "items": { + "enum": [ + "COMMAND_UNAVAILABLE", + "HELPER_UNAVAILABLE", + "OPERATION_UNAVAILABLE", + "TRACKED_MODE_UNAVAILABLE", + "DRY_RUN_UNAVAILABLE", + "NAMESPACE_UNAVAILABLE", + "STYLES_PART_MISSING", + "COLLABORATION_ACTIVE" + ] + }, + "type": "array" + }, + "tracked": { + "type": "boolean" + } + }, + "required": [ + "available", + "tracked", + "dryRun" + ], + "type": "object" + }, "getMarkdown": { "additionalProperties": false, "properties": { @@ -10946,6 +10994,7 @@ _No fields._ "getNodeById", "getText", "getMarkdown", + "getHtml", "info", "insert", "replace", diff --git a/apps/docs/document-api/reference/core/index.mdx b/apps/docs/document-api/reference/core/index.mdx index 250904a3ca..2bfddcdb2e 100644 --- a/apps/docs/document-api/reference/core/index.mdx +++ b/apps/docs/document-api/reference/core/index.mdx @@ -19,6 +19,7 @@ Primary read and write operations. | getNodeById | `getNodeById` | No | `idempotent` | No | No | | getText | `getText` | No | `idempotent` | No | No | | getMarkdown | `getMarkdown` | No | `idempotent` | No | No | +| getHtml | `getHtml` | No | `idempotent` | No | No | | info | `info` | No | `idempotent` | No | No | | insert | `insert` | Yes | `non-idempotent` | Yes | Yes | | replace | `replace` | Yes | `conditional` | Yes | Yes | diff --git a/apps/docs/document-api/reference/get-html.mdx b/apps/docs/document-api/reference/get-html.mdx new file mode 100644 index 0000000000..a911aed5c4 --- /dev/null +++ b/apps/docs/document-api/reference/get-html.mdx @@ -0,0 +1,81 @@ +--- +title: getHtml +sidebarTitle: getHtml +description: Extract the document content as an HTML string. +--- + +{/* GENERATED FILE: DO NOT EDIT. Regenerate via `pnpm run docapi:sync`. */} + +> Alpha: Document API is currently alpha and subject to breaking changes. + +## Summary + +Extract the document content as an HTML string. + +- Operation ID: `getHtml` +- API member path: `editor.doc.getHtml(...)` +- Mutates document: `no` +- Idempotency: `idempotent` +- Supports tracked mode: `no` +- Supports dry run: `no` +- Deterministic target resolution: `yes` + +## Expected result + +Returns the full document content as an HTML-formatted string. + +## Input fields + +| Field | Type | Required | Description | +| --- | --- | --- | --- | +| `unflattenLists` | boolean | no | | + +### Example request + +```json +{ + "unflattenLists": true +} +``` + +## Output fields + +_No fields._ + +### Example response + +```json +"example" +``` + +## Pre-apply throws + +- None + +## Non-applied failure codes + +- None + +## Raw schemas + + +```json +{ + "additionalProperties": false, + "properties": { + "unflattenLists": { + "type": "boolean" + } + }, + "type": "object" +} +``` + + + +```json +{ + "type": "string" +} +``` + diff --git a/apps/docs/document-api/reference/index.mdx b/apps/docs/document-api/reference/index.mdx index 94d879af2b..d30b11c1ed 100644 --- a/apps/docs/document-api/reference/index.mdx +++ b/apps/docs/document-api/reference/index.mdx @@ -20,7 +20,7 @@ Document API is currently alpha and subject to breaking changes. | Namespace | Canonical ops | Aliases | Total surface | Reference | | --- | --- | --- | --- | --- | -| Core | 9 | 0 | 9 | [Open](/document-api/reference/core/index) | +| Core | 10 | 0 | 10 | [Open](/document-api/reference/core/index) | | Blocks | 1 | 0 | 1 | [Open](/document-api/reference/blocks/index) | | Capabilities | 1 | 0 | 1 | [Open](/document-api/reference/capabilities/index) | | Create | 5 | 0 | 5 | [Open](/document-api/reference/create/index) | @@ -51,6 +51,7 @@ The tables below are grouped by namespace. | getNodeById | editor.doc.getNodeById(...) | Retrieve a single node by its unique ID. | | getText | editor.doc.getText(...) | Extract the plain-text content of the document. | | getMarkdown | editor.doc.getMarkdown(...) | Extract the document content as a Markdown string. | +| getHtml | editor.doc.getHtml(...) | Extract the document content as an HTML string. | | info | editor.doc.info(...) | Return document metadata including revision, node count, and capabilities. | | insert | editor.doc.insert(...) | Insert content at a target position, or at the end of the document when target is omitted. Supports text (default), markdown, and html content types via the `type` field. | | replace | editor.doc.replace(...) | Replace content at a target position with new text or inline content. | diff --git a/apps/docs/document-engine/sdks.mdx b/apps/docs/document-engine/sdks.mdx index a31a6f3754..868280cc3d 100644 --- a/apps/docs/document-engine/sdks.mdx +++ b/apps/docs/document-engine/sdks.mdx @@ -364,6 +364,7 @@ The SDKs expose all operations from the [Document API](/document-api/overview) p | `doc.getNodeById` | `get-node-by-id` | Retrieve a single node by its unique ID. | | `doc.getText` | `get-text` | Extract the plain-text content of the document. | | `doc.getMarkdown` | `get-markdown` | Extract the document content as a Markdown string. | +| `doc.getHtml` | `get-html` | Extract the document content as an HTML string. | | `doc.info` | `info` | Return document metadata including revision, node count, and capabilities. | | `doc.query.match` | `query match` | Deterministic selector-based search with cardinality contracts for mutation targeting. | | `doc.mutations.preview` | `mutations preview` | Dry-run a mutation plan, returning resolved targets without applying changes. | diff --git a/packages/document-api/src/contract/operation-definitions.ts b/packages/document-api/src/contract/operation-definitions.ts index 9fc6123429..f786cf38fa 100644 --- a/packages/document-api/src/contract/operation-definitions.ts +++ b/packages/document-api/src/contract/operation-definitions.ts @@ -265,6 +265,15 @@ export const OPERATION_DEFINITIONS = { referenceDocPath: 'get-markdown.mdx', referenceGroup: 'core', }, + getHtml: { + memberPath: 'getHtml', + description: 'Extract the document content as an HTML string.', + expectedResult: 'Returns the full document content as an HTML-formatted string.', + requiresDocumentContext: true, + metadata: readOperation(), + referenceDocPath: 'get-html.mdx', + referenceGroup: 'core', + }, info: { memberPath: 'info', description: 'Return document metadata including revision, node count, and capabilities.', diff --git a/packages/document-api/src/contract/operation-registry.ts b/packages/document-api/src/contract/operation-registry.ts index ab7faf913c..f564d98d6c 100644 --- a/packages/document-api/src/contract/operation-registry.ts +++ b/packages/document-api/src/contract/operation-registry.ts @@ -23,6 +23,7 @@ import type { FindOptions } from '../find/find.js'; import type { GetNodeByIdInput } from '../get-node/get-node.js'; import type { GetTextInput } from '../get-text/get-text.js'; import type { GetMarkdownInput } from '../get-markdown/get-markdown.js'; +import type { GetHtmlInput } from '../get-html/get-html.js'; import type { InfoInput } from '../info/info.js'; import type { InsertInput } from '../insert/insert.js'; import type { ReplaceInput } from '../replace/replace.js'; @@ -212,6 +213,7 @@ export interface OperationRegistry extends FormatInlineAliasOperationRegistry { getNodeById: { input: GetNodeByIdInput; options: never; output: NodeInfo }; getText: { input: GetTextInput; options: never; output: string }; getMarkdown: { input: GetMarkdownInput; options: never; output: string }; + getHtml: { input: GetHtmlInput; options: never; output: string }; info: { input: InfoInput; options: never; output: DocumentInfo }; // --- Singleton mutations --- diff --git a/packages/document-api/src/contract/schemas.ts b/packages/document-api/src/contract/schemas.ts index 38cdf14ee0..4c536d9e4f 100644 --- a/packages/document-api/src/contract/schemas.ts +++ b/packages/document-api/src/contract/schemas.ts @@ -1556,6 +1556,12 @@ const operationSchemas: Record = { input: strictEmptyObjectSchema, output: { type: 'string' }, }, + getHtml: { + input: objectSchema({ + unflattenLists: { type: 'boolean' }, + }), + output: { type: 'string' }, + }, info: { input: strictEmptyObjectSchema, output: documentInfoSchema, diff --git a/packages/document-api/src/get-html/get-html.test.ts b/packages/document-api/src/get-html/get-html.test.ts new file mode 100644 index 0000000000..c033b8c931 --- /dev/null +++ b/packages/document-api/src/get-html/get-html.test.ts @@ -0,0 +1,26 @@ +import { executeGetHtml } from './get-html.js'; +import type { GetHtmlAdapter } from './get-html.js'; + +describe('executeGetHtml', () => { + it('delegates to adapter.getHtml with the input', () => { + const adapter: GetHtmlAdapter = { + getHtml: vi.fn(() => '

Hello world

'), + }; + + const result = executeGetHtml(adapter, {}); + + expect(result).toBe('

Hello world

'); + expect(adapter.getHtml).toHaveBeenCalledWith({}); + }); + + it('passes unflattenLists option through to the adapter', () => { + const adapter: GetHtmlAdapter = { + getHtml: vi.fn(() => '
  1. item
'), + }; + + const result = executeGetHtml(adapter, { unflattenLists: false }); + + expect(result).toBe('
  1. item
'); + expect(adapter.getHtml).toHaveBeenCalledWith({ unflattenLists: false }); + }); +}); diff --git a/packages/document-api/src/get-html/get-html.ts b/packages/document-api/src/get-html/get-html.ts new file mode 100644 index 0000000000..4af4ed4cdb --- /dev/null +++ b/packages/document-api/src/get-html/get-html.ts @@ -0,0 +1,28 @@ +export interface GetHtmlInput { + /** + * Convert SuperDoc's internal flat-list representation to proper nested + * `
    `/`
      ` HTML. Defaults to `true`. + */ + unflattenLists?: boolean; +} + +/** + * Engine-specific adapter that the getHtml API delegates to. + */ +export interface GetHtmlAdapter { + /** + * Return the full document content as an HTML string. + */ + getHtml(input: GetHtmlInput): string; +} + +/** + * Execute a getHtml operation via the provided adapter. + * + * @param adapter - Engine-specific getHtml adapter. + * @param input - Canonical getHtml input object. + * @returns The full document content as an HTML string. + */ +export function executeGetHtml(adapter: GetHtmlAdapter, input: GetHtmlInput): string { + return adapter.getHtml(input); +} diff --git a/packages/document-api/src/index.ts b/packages/document-api/src/index.ts index 4324448c7b..c969851ec9 100644 --- a/packages/document-api/src/index.ts +++ b/packages/document-api/src/index.ts @@ -69,6 +69,7 @@ import type { GetNodeAdapter, GetNodeByIdInput } from './get-node/get-node.js'; import { executeGetNode, executeGetNodeById } from './get-node/get-node.js'; import { executeGetText, type GetTextAdapter, type GetTextInput } from './get-text/get-text.js'; import { executeGetMarkdown, type GetMarkdownAdapter, type GetMarkdownInput } from './get-markdown/get-markdown.js'; +import { executeGetHtml, type GetHtmlAdapter, type GetHtmlInput } from './get-html/get-html.js'; import { executeInfo, type InfoAdapter, type InfoInput } from './info/info.js'; import type { InsertInput } from './insert/insert.js'; import { executeDelete } from './delete/delete.js'; @@ -341,6 +342,7 @@ export type { FindAdapter, FindOptions } from './find/find.js'; export type { GetNodeAdapter, GetNodeByIdInput } from './get-node/get-node.js'; export type { GetTextAdapter, GetTextInput } from './get-text/get-text.js'; export type { GetMarkdownAdapter, GetMarkdownInput } from './get-markdown/get-markdown.js'; +export type { GetHtmlAdapter, GetHtmlInput } from './get-html/get-html.js'; export type { InfoAdapter, InfoInput } from './info/info.js'; export type { WriteAdapter, WriteRequest } from './write/write.js'; export type { @@ -720,6 +722,10 @@ export interface DocumentApi { * Return the full document content as a Markdown string. */ getMarkdown(input: GetMarkdownInput): string; + /** + * Return the full document content as an HTML string. + */ + getHtml(input: GetHtmlInput): string; /** * Return document summary info used by `doc.info`. */ @@ -816,6 +822,7 @@ export interface DocumentApiAdapters { getNode: GetNodeAdapter; getText: GetTextAdapter; getMarkdown: GetMarkdownAdapter; + getHtml: GetHtmlAdapter; info: InfoAdapter; capabilities: CapabilitiesAdapter; comments: CommentsAdapter; @@ -883,6 +890,9 @@ export function createDocumentApi(adapters: DocumentApiAdapters): DocumentApi { getMarkdown(input: GetMarkdownInput): string { return executeGetMarkdown(adapters.getMarkdown, input); }, + getHtml(input: GetHtmlInput): string { + return executeGetHtml(adapters.getHtml, input); + }, info(input: InfoInput): DocumentInfo { return executeInfo(adapters.info, input); }, diff --git a/packages/document-api/src/invoke/invoke.ts b/packages/document-api/src/invoke/invoke.ts index 0926da1c35..0290c0c169 100644 --- a/packages/document-api/src/invoke/invoke.ts +++ b/packages/document-api/src/invoke/invoke.ts @@ -64,6 +64,7 @@ export function buildDispatchTable(api: DocumentApi): TypedDispatchTable { getNodeById: (input) => api.getNodeById(input), getText: (input) => api.getText(input), getMarkdown: (input) => api.getMarkdown(input), + getHtml: (input) => api.getHtml(input), info: (input) => api.info(input), // --- Singleton mutations --- diff --git a/packages/sdk/codegen/src/generate-node.mjs b/packages/sdk/codegen/src/generate-node.mjs index e2ad7898f5..e555af98d0 100644 --- a/packages/sdk/codegen/src/generate-node.mjs +++ b/packages/sdk/codegen/src/generate-node.mjs @@ -130,6 +130,18 @@ function generateResultType(operationId, operation, $defs) { return { name, source: `export type ${name} = ${body};` }; } +// --------------------------------------------------------------------------- +// String-envelope unwrapping +// --------------------------------------------------------------------------- + +// Operations whose CLI response wraps a plain string inside +// `{ document, : "..." }`. The SDK unwraps to return the string directly. +const STRING_ENVELOPE_KEY_BY_OPERATION_ID = { + 'doc.getText': 'text', + 'doc.getMarkdown': 'markdown', + 'doc.getHtml': 'html', +}; + // --------------------------------------------------------------------------- // Client tree rendering // --------------------------------------------------------------------------- @@ -143,6 +155,10 @@ function renderTreeNode(treeNode, paramTypeMap, resultTypeMap, indent = ' ') const resultTypeName = resultTypeMap.get(op.id); const hasRequired = (op.params ?? []).some((p) => p.required); const paramsArg = hasRequired ? `params: ${typeName}` : `params: ${typeName} = {}`; + const envelopeKey = STRING_ENVELOPE_KEY_BY_OPERATION_ID[op.id]; + if (envelopeKey) { + return `${indent}${camelCase(key)}: async (${paramsArg}, options?: InvokeOptions): Promise<${resultTypeName}> => unwrapStringEnvelope(await runtime.invoke(CONTRACT.operations[${JSON.stringify(op.id)}], params as unknown as Record, options), ${JSON.stringify(envelopeKey)}),`; + } return `${indent}${camelCase(key)}: (${paramsArg}, options?: InvokeOptions) => runtime.invoke<${resultTypeName}>(CONTRACT.operations[${JSON.stringify(op.id)}], params as unknown as Record, options),`; } @@ -184,6 +200,16 @@ function generateClientTs(contract) { "import { CONTRACT } from './contract.js';", "import type { SuperDocRuntime, InvokeOptions } from '../runtime/process.js';", '', + '/** Extract a string value from a CLI response envelope like `{ document, text: "..." }`. */', + 'function unwrapStringEnvelope(value: unknown, key: string): string {', + ' if (typeof value === "string") return value;', + ' if (typeof value === "object" && value !== null) {', + ' const extracted = (value as Record)[key];', + ' if (typeof extracted === "string") return extracted;', + ' }', + ' return value as string;', + '}', + '', paramInterfaces.join('\n\n'), '', resultTypes.join('\n\n'), diff --git a/packages/sdk/codegen/src/generate-python.mjs b/packages/sdk/codegen/src/generate-python.mjs index 496c266381..b1d935bc4d 100644 --- a/packages/sdk/codegen/src/generate-python.mjs +++ b/packages/sdk/codegen/src/generate-python.mjs @@ -182,6 +182,7 @@ function classNameFor(pathParts, asyncMode) { const STRING_ENVELOPE_KEY_BY_OPERATION_ID = { 'doc.getText': 'text', 'doc.getMarkdown': 'markdown', + 'doc.getHtml': 'html', }; function snakeCase(value) { diff --git a/packages/sdk/langs/node/src/tools.ts b/packages/sdk/langs/node/src/tools.ts index 7c83149254..2d72e2a9fd 100644 --- a/packages/sdk/langs/node/src/tools.ts +++ b/packages/sdk/langs/node/src/tools.ts @@ -433,27 +433,37 @@ export async function chooseTools(input: ToolChooserInput): Promise<{ return false; }); - for (const forcedToolName of input.policy?.forceInclude ?? []) { + // Resolve forceInclude tools — these are guaranteed slots exempt from budget trimming. + const forcedToolNames = new Set(input.policy?.forceInclude ?? []); + const forcedTools: ToolCatalogEntry[] = []; + for (const forcedToolName of forcedToolNames) { const forced = indexByToolName.get(forcedToolName); if (!forced) { excluded.push({ toolName: forcedToolName, reason: 'not-in-profile' }); continue; } candidates.push(forced); + forcedTools.push(forced); } candidates = [...new Map(candidates.map((tool) => [tool.toolName, tool])).values()]; - const selected: ToolCatalogEntry[] = []; + // Start with forceInclude tools — they always occupy a slot. + const selected: ToolCatalogEntry[] = [...forcedTools]; + const selectedNames = new Set(selected.map((tool) => tool.toolName)); + const foundationalIds = new Set(policy.defaults.foundationalOperationIds); - const foundational = candidates.filter((tool) => foundationalIds.has(tool.operationId)); + const foundational = candidates.filter( + (tool) => foundationalIds.has(tool.operationId) && !selectedNames.has(tool.toolName), + ); for (const tool of foundational) { if (selected.length >= minReadTools || selected.length >= maxTools) break; selected.push(tool); + selectedNames.add(tool.toolName); } const remaining = stableSortByPhasePriority( - candidates.filter((tool) => !selected.some((entry) => entry.toolName === tool.toolName)), + candidates.filter((tool) => !selectedNames.has(tool.toolName)), phasePolicy.priority, ); diff --git a/packages/sdk/langs/python/superdoc/tools_api.py b/packages/sdk/langs/python/superdoc/tools_api.py index 57036e3b03..ff19ed7a59 100644 --- a/packages/sdk/langs/python/superdoc/tools_api.py +++ b/packages/sdk/langs/python/superdoc/tools_api.py @@ -297,28 +297,41 @@ def should_include(tool: Dict[str, Any]) -> bool: continue filtered.append(tool) + # Resolve forceInclude tools — these are guaranteed slots exempt from budget trimming. index_by_name = {str(tool.get('toolName')): tool for tool in profile_tools if isinstance(tool, dict)} - for forced_name in policy.get('forceInclude') or []: - forced = index_by_name.get(str(forced_name)) + forced_tool_names_raw: list = list(policy.get('forceInclude') or []) + forced_tool_names_seen: set = set() + forced_tools: List[Dict[str, Any]] = [] + for forced_name in forced_tool_names_raw: + forced_name_key = str(forced_name) + if forced_name_key in forced_tool_names_seen: + continue + forced_tool_names_seen.add(forced_name_key) + forced = index_by_name.get(forced_name_key) if forced is None: excluded.append({'toolName': str(forced_name), 'reason': 'not-in-profile'}) continue filtered.append(forced) + forced_tools.append(forced) deduped: Dict[str, Dict[str, Any]] = {} for tool in filtered: deduped[str(tool.get('toolName'))] = tool candidates = list(deduped.values()) - selected: List[Dict[str, Any]] = [] + # Start with forceInclude tools — they always occupy a slot. + selected: List[Dict[str, Any]] = list(forced_tools) + selected_names: set = {str(tool.get('toolName')) for tool in selected} + foundational_ids = set(defaults.get('foundationalOperationIds', [])) - foundational = [tool for tool in candidates if str(tool.get('operationId')) in foundational_ids] + foundational = [tool for tool in candidates if str(tool.get('operationId')) in foundational_ids and str(tool.get('toolName')) not in selected_names] for tool in foundational: if len(selected) >= min_read_tools or len(selected) >= max_tools: break selected.append(tool) + selected_names.add(str(tool.get('toolName'))) - remaining = [tool for tool in _priority_sort(candidates, phase_policy['priority']) if str(tool.get('toolName')) not in {str(item.get('toolName')) for item in selected}] + remaining = [tool for tool in _priority_sort(candidates, phase_policy['priority']) if str(tool.get('toolName')) not in selected_names] for tool in remaining: if len(selected) >= max_tools: diff --git a/packages/sdk/langs/python/tests/test_client_method_naming.py b/packages/sdk/langs/python/tests/test_client_method_naming.py index b9c1965eb5..71913860c6 100644 --- a/packages/sdk/langs/python/tests/test_client_method_naming.py +++ b/packages/sdk/langs/python/tests/test_client_method_naming.py @@ -24,6 +24,8 @@ def invoke(self, operation_id, params, **_kwargs): return {"document": {"path": "x.docx"}, "markdown": "# Hello"} if operation_id == "doc.getText": return {"document": {"path": "x.docx"}, "text": "Hello"} + if operation_id == "doc.getHtml": + return {"document": {"path": "x.docx"}, "html": "

      Hello

      "} return {"operation_id": operation_id, "params": params} @@ -32,6 +34,8 @@ async def _async_invoke_with_envelopes(operation_id, params, **_kwargs): return {"document": {"path": "x.docx"}, "markdown": "# Hello"} if operation_id == "doc.getText": return {"document": {"path": "x.docx"}, "text": "Hello"} + if operation_id == "doc.getHtml": + return {"document": {"path": "x.docx"}, "html": "

      Hello

      "} return {"operation_id": operation_id, "params": params} @@ -49,6 +53,11 @@ def test_sync_doc_api_exposes_snake_case_and_camel_aliases(): assert doc.get_markdown({})["operation_id"] == "doc.getMarkdown" assert doc.getMarkdown({})["operation_id"] == "doc.getMarkdown" + assert hasattr(doc, "get_html") + assert hasattr(doc, "getHtml") + assert doc.get_html({})["operation_id"] == "doc.getHtml" + assert doc.getHtml({})["operation_id"] == "doc.getHtml" + assert hasattr(doc, "track_changes") assert hasattr(doc, "trackChanges") assert doc.track_changes.list({})["operation_id"] == "doc.trackChanges.list" @@ -65,6 +74,11 @@ def test_async_doc_api_exposes_snake_case_and_camel_aliases(): assert asyncio.run(doc.get_markdown({}))["operation_id"] == "doc.getMarkdown" assert asyncio.run(doc.getMarkdown({}))["operation_id"] == "doc.getMarkdown" + assert hasattr(doc, "get_html") + assert hasattr(doc, "getHtml") + assert asyncio.run(doc.get_html({}))["operation_id"] == "doc.getHtml" + assert asyncio.run(doc.getHtml({}))["operation_id"] == "doc.getHtml" + def test_sync_doc_api_unwraps_string_envelopes(): from superdoc.generated.client import _SyncDocApi @@ -72,6 +86,7 @@ def test_sync_doc_api_unwraps_string_envelopes(): doc = _SyncDocApi(_SyncEnvelopeRuntimeStub()) assert doc.get_markdown({}) == "# Hello" assert doc.get_text({}) == "Hello" + assert doc.get_html({}) == "

      Hello

      " def test_async_doc_api_unwraps_string_envelopes(): @@ -80,3 +95,4 @@ def test_async_doc_api_unwraps_string_envelopes(): doc = _AsyncDocApi(_AsyncEnvelopeRuntimeStub()) assert asyncio.run(doc.get_markdown({})) == "# Hello" assert asyncio.run(doc.get_text({})) == "Hello" + assert asyncio.run(doc.get_html({})) == "

      Hello

      " diff --git a/packages/super-editor/src/document-api-adapters/assemble-adapters.ts b/packages/super-editor/src/document-api-adapters/assemble-adapters.ts index f0c10eb41c..68ffed189b 100644 --- a/packages/super-editor/src/document-api-adapters/assemble-adapters.ts +++ b/packages/super-editor/src/document-api-adapters/assemble-adapters.ts @@ -4,6 +4,7 @@ import { findAdapter } from './find-adapter.js'; import { getNodeAdapter, getNodeByIdAdapter } from './get-node-adapter.js'; import { getTextAdapter } from './get-text-adapter.js'; import { getMarkdownAdapter } from './get-markdown-adapter.js'; +import { getHtmlAdapter } from './get-html-adapter.js'; import { infoAdapter } from './info-adapter.js'; import { getDocumentApiCapabilities } from './capabilities-adapter.js'; import { createCommentsWrapper } from './plan-engine/comments-wrappers.js'; @@ -174,6 +175,9 @@ export function assembleDocumentApiAdapters(editor: Editor): DocumentApiAdapters getMarkdown: { getMarkdown: (input) => getMarkdownAdapter(editor, input), }, + getHtml: { + getHtml: (input) => getHtmlAdapter(editor, input), + }, info: { info: (input) => infoAdapter(editor, input), }, diff --git a/packages/super-editor/src/document-api-adapters/get-html-adapter.ts b/packages/super-editor/src/document-api-adapters/get-html-adapter.ts new file mode 100644 index 0000000000..88440ca09f --- /dev/null +++ b/packages/super-editor/src/document-api-adapters/get-html-adapter.ts @@ -0,0 +1,21 @@ +import type { Editor } from '../core/Editor.js'; +import type { GetHtmlInput } from '@superdoc/document-api'; + +const DEFAULT_UNFLATTEN_LISTS = true; + +/** + * Return the full document content as an HTML string. + * + * Unlike the markdown adapter (which uses its own AST pipeline), this delegates + * directly to `editor.getHTML()` because there is no equivalent AST-based HTML + * serialization pipeline. The DOM required by `getHTML()` is provided by the + * CLI-injected `options.document` in headless sessions. + * + * @param editor - The editor instance. + * @param input - Canonical getHtml input. + * @returns HTML string representation of the document. + */ +export function getHtmlAdapter(editor: Editor, input: GetHtmlInput): string { + const unflattenLists = input.unflattenLists ?? DEFAULT_UNFLATTEN_LISTS; + return editor.getHTML({ unflattenLists }); +} diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts index 7d3d2222f4..0a8a04a0ce 100644 --- a/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts +++ b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts @@ -212,23 +212,14 @@ describe('insertStructuredWrapper — list numbering rollback', () => { }); describe('insertStructuredWrapper — html', () => { - it('does not throw for HTML insert (gracefully succeeds or returns failure)', () => { - // The test editor in vitest (happy-dom) may or may not have DOM support. - // The key assertion is that this never throws an unhandled error. - expect(() => { - const result = insertStructuredWrapper(editor, { - value: '

      Hello from HTML

      ', - type: 'html', - }); + it('inserts HTML content into the document', () => { + const result = insertStructuredWrapper(editor, { + value: '

      Hello from HTML

      ', + type: 'html', + }); - // In a DOM environment it should succeed; in headless it fails gracefully - if (result.success) { - expect(getDocTextContent(editor)).toContain('Hello from HTML'); - } else { - expect(result.failure).toBeDefined(); - expect(['UNSUPPORTED_ENVIRONMENT', 'INVALID_TARGET']).toContain(result.failure?.code); - } - }).not.toThrow(); + expect(result.success).toBe(true); + expect(getDocTextContent(editor)).toContain('Hello from HTML'); }); }); diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts index 6f8dfed446..1f7271aecd 100644 --- a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts +++ b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts @@ -45,7 +45,16 @@ import { import { TrackFormatMarkName } from '../../extensions/track-changes/constants.js'; import { applyDirectMutationMeta, applyTrackedMutationMeta } from '../helpers/transaction-meta.js'; import { markdownToPmFragment } from '../../core/helpers/markdown/markdownToPmContent.js'; -import { processContent } from '../../core/helpers/contentProcessor.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Check whether the editor has a DOM document available for HTML parsing. */ +function editorHasDom(editor: Editor): boolean { + const opts = (editor as any).options; + return !!(opts?.document ?? opts?.mockDocument ?? (typeof document !== 'undefined' ? document : null)); +} // --------------------------------------------------------------------------- // Locator normalization (same validation as the old adapters) @@ -490,10 +499,11 @@ export function styleApplyWrapper( * Insert structured content (markdown or html) at a target position. * * Routes through `executeDomainCommand` to enforce the revision guard. - * Conversion (markdown → AST → PM, or html → processContent → PM) happens + * Conversion (markdown → AST → PM, or html → insertContentAt) happens * inside the handler, so list-definition side effects only occur after the - * revision check passes. HTML content goes through the canonical - * `processContent` pipeline, matching the `insertContent` command path. + * revision check passes. HTML content is passed directly to + * `editor.commands.insertContentAt` to avoid prosemirror-model dual-copy + * issues when the Editor is loaded from a bundled dist. * * Tracked mode is explicitly rejected for structured content in this implementation. */ @@ -576,45 +586,25 @@ export function insertStructuredWrapper( }; } } else if (contentType === 'html') { - // NOTE: processContent has no dryRun flag — this runs the full HTML - // pipeline (DOM creation, wrapTextsInRuns) minus the final insertContentAt. - // Snapshot numbering state so we can roll back after the dry-run, since - // HTML list parsing allocates IDs/definitions on editor.converter. - const converter = (editor as any).converter; - const numberingSnapshot = converter?.numbering ? JSON.parse(JSON.stringify(converter.numbering)) : undefined; - const translatedNumberingSnapshot = converter?.translatedNumbering - ? JSON.parse(JSON.stringify(converter.translatedNumbering)) - : undefined; - try { - const processedDoc = processContent({ content: value, type: 'html', editor }); - if (!processedDoc || typeof (processedDoc as { toJSON?: unknown }).toJSON !== 'function') { - return { - success: false, - resolution, - failure: { - code: 'INVALID_TARGET', - message: 'HTML processing did not produce a valid document node.', - }, - }; - } - } catch (err) { - const message = err instanceof Error ? err.message : String(err); + // Dry-run for HTML: validate that a DOM is available and input is non-empty. + // Full PM parsing validation happens at insert time via the Editor's + // bundled command infrastructure (see the non-dry-run path below). + if (!value || typeof value !== 'string' || value.trim().length === 0) { + return { + success: false, + resolution, + failure: { code: 'NO_OP', message: 'HTML content is empty.' }, + }; + } + if (!editorHasDom(editor)) { return { success: false, resolution, failure: { code: 'UNSUPPORTED_ENVIRONMENT', - message: `HTML structured insert requires a DOM environment. ${message}`, + message: 'HTML insert requires a DOM environment. Provide { document } in editor options.', }, }; - } finally { - // Roll back numbering mutations from the dry-run HTML pipeline. - if (converter && numberingSnapshot !== undefined) { - converter.numbering = numberingSnapshot; - } - if (converter && translatedNumberingSnapshot !== undefined) { - converter.translatedNumbering = translatedNumberingSnapshot; - } } } return { success: true, resolution }; @@ -660,21 +650,21 @@ export function insertStructuredWrapper( } return ok; } else if (contentType === 'html') { - // Route through processContent for the canonical HTML pipeline - // (createDocFromHTML + wrapTextsInRuns), matching insertContent command behavior. - // processContent requires a DOM; in headless environments this will throw. + // Pass HTML string directly to insertContentAt. This avoids a + // prosemirror-model dual-copy issue: calling processContent from this + // source file imports DOMParser from node_modules, but the Editor's + // schema uses the bundled copy from the superdoc dist. Routing through + // the Editor's command infrastructure uses the same bundled copy for + // both DOMParser and the schema — avoiding the mismatch. + if (!editorHasDom(editor)) { + insertFailure = { + code: 'UNSUPPORTED_ENVIRONMENT', + message: 'HTML insert requires a DOM environment. Provide { document } in editor options.', + }; + return false; + } try { - const processedDoc = processContent({ content: value, type: 'html', editor }); - if (!processedDoc || typeof (processedDoc as { toJSON?: unknown }).toJSON !== 'function') { - insertFailure = { - code: 'INVALID_TARGET', - message: 'HTML processing did not produce a valid document node.', - }; - return false; - } - const jsonContent = (processedDoc as { toJSON(): Record }).toJSON(); - - const ok = Boolean(editor.commands.insertContentAt({ from, to }, jsonContent)); + const ok = Boolean(editor.commands.insertContentAt({ from, to }, value)); if (!ok) { insertFailure = { code: 'INVALID_TARGET', diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5cba8dedaa..4dc8953346 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -415,6 +415,9 @@ importers: fast-glob: specifier: 'catalog:' version: 3.3.3 + happy-dom: + specifier: 20.4.0 + version: 20.4.0 y-websocket: specifier: 'catalog:' version: 3.0.0(yjs@13.6.19)