From ccdb086ff3f2bfc7653c6a05416f2002c4abb140 Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Wed, 4 Mar 2026 22:13:50 -0800 Subject: [PATCH] feat(markdown): import images from markdown --- .../helpers/markdown/mdastToProseMirror.ts | 66 ++++++++-- .../insert-structured-wrapper.test.ts | 39 ++++++ .../plan-engine/plan-wrappers.ts | 34 ++++++ .../tests/markdown/markdown-insert-image.ts | 115 ++++++++++++++++++ 4 files changed, 241 insertions(+), 13 deletions(-) create mode 100644 tests/doc-api-stories/tests/markdown/markdown-insert-image.ts diff --git a/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts b/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts index 7ea8aa3a70..480808b2d1 100644 --- a/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts +++ b/packages/super-editor/src/core/helpers/markdown/mdastToProseMirror.ts @@ -32,7 +32,10 @@ import type { List as MdastList, ListItem as MdastListItem, } from 'mdast'; +import { v4 as uuidv4 } from 'uuid'; import { ListHelpers } from '../list-numbering-helpers.js'; +import { generateDocxRandomId } from '../generateDocxRandomId.js'; +import { readImageDimensionsFromDataUri } from '../../super-converter/image-dimensions.js'; import type { MdastConversionContext, MarkdownDiagnostic } from './types.js'; // --------------------------------------------------------------------------- @@ -67,6 +70,7 @@ interface JsonMark { // OOXML stores percentages in fiftieths of a percent. // 5000 = 100% table width. const FULL_WIDTH_TABLE_PCT = 5000; +const imageDocPrIdsByContext = new WeakMap>(); // --------------------------------------------------------------------------- // Block-level converters @@ -366,14 +370,7 @@ function convertImageBlock(node: MdastImage, ctx: MdastConversionContext): JsonN return makeParagraph([]); } - const imageNode: JsonNode = { - type: 'image', - attrs: { - src: node.url, - alt: node.alt ?? null, - title: node.title ?? null, - }, - }; + const imageNode: JsonNode = { type: 'image', attrs: buildImageAttrs(node, ctx) }; // Image must be wrapped in a paragraph for the OOXML content model return { @@ -506,15 +503,58 @@ function convertInlineImage(node: MdastImage, ctx: MdastConversionContext): Json return [ { type: 'image', - attrs: { - src: node.url, - alt: node.alt ?? null, - title: node.title ?? null, - }, + attrs: buildImageAttrs(node, ctx), }, ]; } +function buildImageAttrs(node: MdastImage, ctx: MdastConversionContext): Record { + const attrs: Record = { + src: node.url, + alt: node.alt ?? null, + title: node.title ?? null, + sdImageId: uuidv4(), + id: generateUniqueImageDocPrId(ctx), + }; + + const dimensions = readImageDimensionsFromDataUri(node.url); + if (dimensions) { + attrs.size = dimensions; + } + + return attrs; +} + +function generateUniqueImageDocPrId(ctx: MdastConversionContext): string { + const existingIds = getOrCreateImageDocPrIdSet(ctx); + let candidate = ''; + + do { + const hex = generateDocxRandomId(); + candidate = String(parseInt(hex, 16)); + } while (!candidate || existingIds.has(candidate)); + + existingIds.add(candidate); + return candidate; +} + +function getOrCreateImageDocPrIdSet(ctx: MdastConversionContext): Set { + const cached = imageDocPrIdsByContext.get(ctx); + if (cached) return cached; + + const existingIds = new Set(); + ctx.editor?.state?.doc?.descendants((node) => { + if (node.type.name !== 'image') return true; + if (node.attrs.id !== undefined && node.attrs.id !== null) { + existingIds.add(String(node.attrs.id)); + } + return true; + }); + + imageDocPrIdsByContext.set(ctx, existingIds); + return existingIds; +} + // --------------------------------------------------------------------------- // JSON node builders // --------------------------------------------------------------------------- diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts index 5b80265b2c..4f509938b7 100644 --- a/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts +++ b/packages/super-editor/src/document-api-adapters/plan-engine/insert-structured-wrapper.test.ts @@ -1,4 +1,5 @@ import { beforeAll, beforeEach, afterEach, describe, it, expect, vi } from 'vitest'; +import type { Node as PmNode } from 'prosemirror-model'; import { initTestEditor, loadTestDataForEditorTests } from '@tests/helpers/helpers.js'; import type { Editor } from '../../core/Editor.js'; import { insertStructuredWrapper } from './plan-wrappers.js'; @@ -35,6 +36,18 @@ function getDocTextContent(ed: Editor): string { return ed.state.doc.textContent; } +function getFirstImageNode(ed: Editor): PmNode | null { + let found: PmNode | null = null; + ed.state.doc.descendants((node) => { + if (node.type.name === 'image') { + found = node; + return false; + } + return true; + }); + return found; +} + /** Requires prior seeded content — a blank doc has no text offsets to span. */ function findResolvableNonCollapsedTarget(ed: Editor): { blockId: string; range: { start: number; end: number } } { const candidateIds = new Set(); @@ -67,6 +80,9 @@ function findResolvableNonCollapsedTarget(ed: Editor): { blockId: string; range: } describe('insertStructuredWrapper — markdown', () => { + const oneByOnePngDataUri = + 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO2Z5kYAAAAASUVORK5CYII='; + it('inserts markdown paragraph content into the document', () => { const result = insertStructuredWrapper(editor, { value: 'Hello from markdown', @@ -151,6 +167,29 @@ describe('insertStructuredWrapper — markdown', () => { expect(result.failure?.code).toBe('INVALID_TARGET'); expect(getDocTextContent(editor)).toBe(textBefore); }); + + it('inserts markdown images with stable image metadata', () => { + (editor as any).options.isHeadless = true; + + const result = insertStructuredWrapper(editor, { + value: `![pixel](${oneByOnePngDataUri})`, + type: 'markdown', + }); + + expect(result.success).toBe(true); + + const imageNode = getFirstImageNode(editor); + expect(imageNode).not.toBeNull(); + if (!imageNode) return; // narrow for TS + + expect(String(imageNode.attrs.src)).toMatch(/^word\/media\//); + expect(imageNode.attrs.rId).toEqual(expect.any(String)); + expect(imageNode.attrs.sdImageId).toEqual(expect.any(String)); + expect(imageNode.attrs.sdImageId.length).toBeGreaterThan(0); + expect(imageNode.attrs.id).toEqual(expect.any(String)); + expect(imageNode.attrs.size).toEqual({ width: 1, height: 1 }); + expect((editor as any).storage?.image?.media?.[imageNode.attrs.src]).toBe(oneByOnePngDataUri); + }); }); describe('insertStructuredWrapper — table separators', () => { diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts index 27d8ac358b..0f0b35cfde 100644 --- a/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts +++ b/packages/super-editor/src/document-api-adapters/plan-engine/plan-wrappers.ts @@ -56,6 +56,39 @@ function editorHasDom(editor: Editor): boolean { return !!(opts?.document ?? opts?.mockDocument ?? (typeof document !== 'undefined' ? document : null)); } +function isJsonObject(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +/** + * Ensure every inserted markdown image node has a stable `sdImageId`. + * + * The markdown converter should already provide this, but we enforce it at the + * insert boundary so `images.list/get` remain reliable even if upstream + * conversion changes or misses an edge-case image shape. + */ +function ensureMarkdownImageIds(nodes: Record[]): void { + const visit = (node: Record) => { + if (node.type === 'image') { + const attrs = isJsonObject(node.attrs) ? { ...node.attrs } : {}; + const hasStableId = typeof attrs.sdImageId === 'string' && attrs.sdImageId.length > 0; + if (!hasStableId) { + attrs.sdImageId = uuidv4(); + } + node.attrs = attrs; + } + + if (!Array.isArray(node.content)) return; + for (const child of node.content) { + if (isJsonObject(child)) visit(child); + } + }; + + for (const node of nodes) { + visit(node); + } +} + /** * Mutate `jsonNodes` in place so that consecutive table nodes within the * array are separated by an empty paragraph. Only handles within-fragment @@ -654,6 +687,7 @@ export function insertStructuredWrapper( // because createNodeFromContent treats it as a single JSON object. const jsonNodes: Record[] = []; fragment.forEach((node) => jsonNodes.push(node.toJSON())); + ensureMarkdownImageIds(jsonNodes); // Word always separates adjacent tables with a paragraph. Without a // trailing separator, consecutive markdown inserts produce adjacent diff --git a/tests/doc-api-stories/tests/markdown/markdown-insert-image.ts b/tests/doc-api-stories/tests/markdown/markdown-insert-image.ts new file mode 100644 index 0000000000..016672eae5 --- /dev/null +++ b/tests/doc-api-stories/tests/markdown/markdown-insert-image.ts @@ -0,0 +1,115 @@ +import { describe, expect, it } from 'vitest'; +import { unwrap, useStoryHarness } from '../harness'; + +const ONE_BY_ONE_PNG_DATA_URI = + 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO2Z5kYAAAAASUVORK5CYII='; + +function sid(label: string): string { + return `${label}-${Date.now()}-${Math.floor(Math.random() * 1_000_000)}`; +} + +describe('document-api story: markdown insert image', () => { + const { client, outPath } = useStoryHarness('markdown/insert-image', { + preserveResults: true, + }); + + it('inserts markdown with a base64 image and preserves it after save + reopen', async () => { + const sessionId = sid('markdown-image'); + const reopenSessionId = sid('markdown-image-reopen'); + const outputDoc = outPath('markdown-insert-image.docx'); + const markdown = `![pixel](${ONE_BY_ONE_PNG_DATA_URI})`; + + await client.doc.open({ sessionId }); + + const beforeImageNodes = unwrap( + await client.doc.find({ + sessionId, + type: 'node', + nodeType: 'image', + limit: 100, + }), + ); + const beforeImages = unwrap(await client.doc.images.list({ sessionId })); + const baselineNodeTotal = Number(beforeImageNodes.total ?? 0); + const baselineImageTotal = Number(beforeImages.total ?? 0); + + const insertResult = unwrap(await client.doc.insert({ sessionId, value: markdown, type: 'markdown' })); + expect(insertResult?.receipt?.success ?? insertResult?.success).toBe(true); + + const imageNodesAfterInsert = unwrap( + await client.doc.find({ + sessionId, + type: 'node', + nodeType: 'image', + limit: 100, + }), + ); + expect(imageNodesAfterInsert.total).toBe(baselineNodeTotal + 1); + + const imagesAfterInsert = unwrap(await client.doc.images.list({ sessionId })); + expect(imagesAfterInsert.total).toBe(baselineImageTotal + 1); + + const insertedImageId = imagesAfterInsert.items?.[imagesAfterInsert.items.length - 1]?.sdImageId; + expect(typeof insertedImageId).toBe('string'); + expect(insertedImageId?.length).toBeGreaterThan(0); + + const insertedImage = unwrap( + await client.doc.images.get({ + sessionId, + imageId: insertedImageId, + }), + ); + expect(insertedImage.properties?.alt).toBe('pixel'); + expect(insertedImage.properties?.size).toMatchObject({ width: 1, height: 1 }); + + const markdownAfterInsert = unwrap(await client.doc.getMarkdown({ sessionId })); + expect(typeof markdownAfterInsert).toBe('string'); + expect(markdownAfterInsert).toContain('![pixel]('); + + await client.doc.save({ + sessionId, + out: outputDoc, + force: true, + }); + + await client.doc.close({ + sessionId, + discard: true, + }); + + await client.doc.open({ + sessionId: reopenSessionId, + doc: outputDoc, + }); + + const imageNodesAfterReopen = unwrap( + await client.doc.find({ + sessionId: reopenSessionId, + type: 'node', + nodeType: 'image', + limit: 100, + }), + ); + expect(imageNodesAfterReopen.total).toBe(baselineNodeTotal + 1); + + const imagesAfterReopen = unwrap(await client.doc.images.list({ sessionId: reopenSessionId })); + expect(imagesAfterReopen.total).toBe(baselineImageTotal + 1); + + const reopenedImageId = imagesAfterReopen.items?.[imagesAfterReopen.items.length - 1]?.sdImageId; + expect(typeof reopenedImageId).toBe('string'); + expect(reopenedImageId?.length).toBeGreaterThan(0); + + const reopenedImage = unwrap( + await client.doc.images.get({ + sessionId: reopenSessionId, + imageId: reopenedImageId, + }), + ); + expect(reopenedImage.properties?.alt).toBe('pixel'); + expect(reopenedImage.properties?.size).toMatchObject({ width: 1, height: 1 }); + + const markdownAfterReopen = unwrap(await client.doc.getMarkdown({ sessionId: reopenSessionId })); + expect(typeof markdownAfterReopen).toBe('string'); + expect(markdownAfterReopen).toContain('![pixel]('); + }); +});