From 60e1f9b271a7f542b29520e32bdae787b6f4cd50 Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Thu, 19 Feb 2026 13:33:33 -0800 Subject: [PATCH] fix(super-editor): add unsupported-content reporting across HTML/Markdown import paths --- apps/docs/core/superdoc/configuration.mdx | 16 ++ apps/docs/core/supereditor/configuration.mdx | 16 ++ apps/docs/core/supereditor/methods.mdx | 6 + .../src/core/Editor.api-contracts.test.js | 22 ++ packages/super-editor/src/core/Editor.ts | 21 +- .../src/core/commands/insertContent.js | 4 + .../src/core/helpers/catchAllSchema.js | 172 +++++++++++++++ .../src/core/helpers/catchAllSchema.test.js | 201 ++++++++++++++++++ .../src/core/helpers/contentProcessor.js | 22 +- .../src/core/helpers/importHtml.js | 22 ++ .../src/core/types/EditorConfig.ts | 13 ++ packages/super-editor/src/index.d.ts | 10 + packages/superdoc/src/core/types/index.js | 2 + 13 files changed, 521 insertions(+), 6 deletions(-) create mode 100644 packages/super-editor/src/core/helpers/catchAllSchema.js create mode 100644 packages/super-editor/src/core/helpers/catchAllSchema.test.js diff --git a/apps/docs/core/superdoc/configuration.mdx b/apps/docs/core/superdoc/configuration.mdx index f917276af0..3adb469bb0 100644 --- a/apps/docs/core/superdoc/configuration.mdx +++ b/apps/docs/core/superdoc/configuration.mdx @@ -324,6 +324,22 @@ new SuperDoc({ Disable custom context menus + + Callback invoked with HTML elements that were dropped during import because they have no schema representation. Receives an array of `{ tagName, outerHTML, count }` items. When provided, `console.warn` is suppressed. + + ```javascript + onUnsupportedContent: (items) => { + items.forEach(({ tagName, count }) => { + console.log(`Dropped ${count}x <${tagName}>`); + }); + } + ``` + + + + Log a `console.warn` listing HTML elements dropped during import. Ignored when `onUnsupportedContent` is provided. + + Content Security Policy nonce diff --git a/apps/docs/core/supereditor/configuration.mdx b/apps/docs/core/supereditor/configuration.mdx index ab94cf797b..a399ec998e 100644 --- a/apps/docs/core/supereditor/configuration.mdx +++ b/apps/docs/core/supereditor/configuration.mdx @@ -141,6 +141,22 @@ const editor = await Editor.open(file, { Use ProseMirror JSON content instead of DOCX parsing + + Callback invoked with HTML elements that were dropped during import because they have no schema representation. Receives an array of `{ tagName, outerHTML, count }` items. When provided, `console.warn` is suppressed. + + ```javascript + onUnsupportedContent: (items) => { + items.forEach(({ tagName, count }) => { + console.log(`Dropped ${count}x <${tagName}>`); + }); + } + ``` + + + + Log a `console.warn` listing HTML elements dropped during import. Ignored when `onUnsupportedContent` is provided. + + ## Features diff --git a/apps/docs/core/supereditor/methods.mdx b/apps/docs/core/supereditor/methods.mdx index a855e204f0..12bf029bef 100644 --- a/apps/docs/core/supereditor/methods.mdx +++ b/apps/docs/core/supereditor/methods.mdx @@ -542,6 +542,12 @@ Insert content with automatic format detection. Insert position (defaults to cursor) + + Callback for HTML elements dropped during parsing. Receives `{ tagName, outerHTML, count }[]`. Falls back to the editor-level option if not set. + + + Log dropped elements via `console.warn`. Falls back to the editor-level option if not set. + diff --git a/packages/super-editor/src/core/Editor.api-contracts.test.js b/packages/super-editor/src/core/Editor.api-contracts.test.js index 4128ee549f..6917093303 100644 --- a/packages/super-editor/src/core/Editor.api-contracts.test.js +++ b/packages/super-editor/src/core/Editor.api-contracts.test.js @@ -117,6 +117,28 @@ describe('Editor - API Contracts (Regression Prevention)', () => { }); }); + it('docx markdown initialization forwards unsupported-content callback', () => { + const onUnsupportedContent = vi.fn(); + + ({ editor } = initTestEditor({ + mode: 'docx', + content: '

Fallback content

', + markdown: '', + onUnsupportedContent, + useImmediateSetTimeout: false, + })); + + return new Promise((resolve) => { + setTimeout(() => { + expect(onUnsupportedContent).toHaveBeenCalledTimes(1); + expect(onUnsupportedContent.mock.calls[0][0]).toEqual([ + expect.objectContaining({ tagName: 'VIDEO', count: 1 }), + ]); + resolve(); + }, 10); + }); + }); + it('html option should initialize with editor instance', () => { let initCompleted = false; diff --git a/packages/super-editor/src/core/Editor.ts b/packages/super-editor/src/core/Editor.ts index f57ee9930d..7f806c7369 100644 --- a/packages/super-editor/src/core/Editor.ts +++ b/packages/super-editor/src/core/Editor.ts @@ -1888,11 +1888,21 @@ export class Editor extends EventEmitter { // Check for markdown BEFORE html (since markdown gets converted to HTML) if (this.options.markdown) { - doc = createDocFromMarkdown(this.options.markdown, this, { isImport: true, document: domDocument }); + doc = createDocFromMarkdown(this.options.markdown, this, { + isImport: true, + document: domDocument, + onUnsupportedContent: this.options.onUnsupportedContent, + warnOnUnsupportedContent: this.options.warnOnUnsupportedContent, + }); } // If we have a new doc, and have html data, we initialize from html else if (this.options.html) - doc = createDocFromHTML(this.options.html, this, { isImport: true, document: domDocument }); + doc = createDocFromHTML(this.options.html, this, { + isImport: true, + document: domDocument, + onUnsupportedContent: this.options.onUnsupportedContent, + warnOnUnsupportedContent: this.options.warnOnUnsupportedContent, + }); else if (this.options.jsonOverride) doc = this.schema.nodeFromJSON(this.options.jsonOverride); if (fragment) doc = yXmlFragmentToProseMirrorRootNode(fragment, this.schema); @@ -1902,7 +1912,12 @@ export class Editor extends EventEmitter { // If we are in HTML mode, we initialize from either content or html (or blank) else if (mode === 'text' || mode === 'html') { if (loadFromSchema && hasJsonContent(content)) doc = this.schema.nodeFromJSON(content); - else if (typeof content === 'string') doc = createDocFromHTML(content, this, { document: domDocument }); + else if (typeof content === 'string') + doc = createDocFromHTML(content, this, { + document: domDocument, + onUnsupportedContent: this.options.onUnsupportedContent, + warnOnUnsupportedContent: this.options.warnOnUnsupportedContent, + }); else doc = this.schema.topNodeType.createAndFill()!; } } catch (err) { diff --git a/packages/super-editor/src/core/commands/insertContent.js b/packages/super-editor/src/core/commands/insertContent.js index 7dd6e5637f..c2db6a23f2 100644 --- a/packages/super-editor/src/core/commands/insertContent.js +++ b/packages/super-editor/src/core/commands/insertContent.js @@ -12,6 +12,8 @@ import { processContent } from '../helpers/contentProcessor.js'; * @param {Object} [options={}] - Options for insertion. * @param {string} [options.contentType] - The type of content being inserted: 'html', 'markdown', 'text', or 'schema'. * @param {boolean} [options.parseOptions] - Additional options for parsing (if applicable). + * @param {((items: Array<{tagName: string, outerHTML: string, count: number}>) => void) | null} [options.onUnsupportedContent] - Callback for unsupported HTML elements. Falls back to editor.options.onUnsupportedContent. + * @param {boolean} [options.warnOnUnsupportedContent] - When true, emits console.warn for unsupported content. Falls back to editor.options.warnOnUnsupportedContent. * @returns {function} A command function that can be executed by the editor. */ export const insertContent = @@ -30,6 +32,8 @@ export const insertContent = content: value, type: options.contentType, editor, + onUnsupportedContent: options.onUnsupportedContent ?? editor.options?.onUnsupportedContent, + warnOnUnsupportedContent: options.warnOnUnsupportedContent ?? editor.options?.warnOnUnsupportedContent, }); const jsonContent = processedDoc.toJSON(); diff --git a/packages/super-editor/src/core/helpers/catchAllSchema.js b/packages/super-editor/src/core/helpers/catchAllSchema.js new file mode 100644 index 0000000000..a6f3fa04c9 --- /dev/null +++ b/packages/super-editor/src/core/helpers/catchAllSchema.js @@ -0,0 +1,172 @@ +//@ts-check +import { Schema } from 'prosemirror-model'; + +/** + * @typedef {Object} UnsupportedContentItem + * @property {string} tagName - e.g. "HR", "DETAILS" + * @property {string} outerHTML - truncated to 200 chars max + * @property {number} count - how many instances of this tagName were dropped + */ + +const CATCH_ALL_NODE_NAME = '__supereditor__private__unknown__catch__all__node'; +const MAX_OUTER_HTML_LENGTH = 200; + +/** @type {WeakMap} */ +const catchAllSchemaCache = new WeakMap(); + +/** + * Returns a cached copy of the given schema with a catch-all node appended. + * The catch-all node matches any element not already handled by the real schema, + * allowing detection of unsupported content. + * + * @param {Schema} baseSchema + * @returns {Schema} + */ +export function getCatchAllSchema(baseSchema) { + let cached = catchAllSchemaCache.get(baseSchema); + if (cached) return cached; + + cached = new Schema({ + topNode: baseSchema.spec.topNode, + marks: baseSchema.spec.marks, + nodes: baseSchema.spec.nodes.append({ + [CATCH_ALL_NODE_NAME]: { + content: 'inline*', + group: 'block', + parseDOM: [{ tag: '*' }], + }, + }), + }); + + catchAllSchemaCache.set(baseSchema, cached); + return cached; +} + +/** + * Parses an element with a catch-all schema to detect unsupported content. + * Returns an aggregated list of unsupported items grouped by tagName. + * + * @param {Element} element - The DOM element to parse + * @param {Schema} schema - The real editor schema + * @returns {UnsupportedContentItem[]} + */ +export function detectUnsupportedContent(element, schema) { + /** @type {Map} */ + const itemsByTag = new Map(); + + const knownTags = collectKnownTags(schema); + scanForUnsupported(element, knownTags, itemsByTag); + + return Array.from(itemsByTag.values()); +} + +/** @type {WeakMap>} */ +const knownTagsCache = new WeakMap(); + +/** + * Collect all tag names that the schema knows how to parse (cached per schema). + * @param {Schema} schema + * @returns {Set} + */ +function collectKnownTags(schema) { + const cached = knownTagsCache.get(schema); + if (cached) return cached; + + const tags = new Set(); + + // Collect from nodes + // NOTE: parseDOM may be a function in super-editor extensions (non-standard), + // so we cast to unknown to keep the runtime guard while satisfying TS. + for (const nodeType of Object.values(schema.nodes)) { + const raw = /** @type {unknown} */ (nodeType.spec.parseDOM); + if (!raw) continue; + const rules = typeof raw === 'function' ? raw() : /** @type {any[]} */ (raw); + for (const rule of rules) { + if (rule.tag) { + const match = rule.tag.match(/^([a-zA-Z][a-zA-Z0-9-]*)/); + if (match) tags.add(match[1].toUpperCase()); + } + } + } + + // Collect from marks + for (const markType of Object.values(schema.marks)) { + const raw = /** @type {unknown} */ (markType.spec.parseDOM); + if (!raw) continue; + const rules = typeof raw === 'function' ? raw() : /** @type {any[]} */ (raw); + for (const rule of rules) { + if (rule.tag) { + const match = rule.tag.match(/^([a-zA-Z][a-zA-Z0-9-]*)/); + if (match) tags.add(match[1].toUpperCase()); + } + } + } + + // Always consider basic structural tags as known (they wrap content, not dropped) + for (const tag of ['HTML', 'HEAD', 'BODY', 'DIV', 'SPAN']) { + tags.add(tag); + } + + knownTagsCache.set(schema, tags); + return tags; +} + +/** + * Recursively scan DOM for elements whose tag is not in the known set. + * + * When an unknown tag has descendants with known tags (e.g. `` wrapping + * ``), ProseMirror "looks through" the wrapper and parses the children. + * Those transparent wrappers are NOT reported — only elements whose entire + * subtree is also unknown (truly dropped content) are reported. + * + * @param {Element} element + * @param {Set} knownTags + * @param {Map} itemsByTag + */ +function scanForUnsupported(element, knownTags, itemsByTag) { + for (let i = 0; i < element.children.length; i++) { + const child = element.children[i]; + const tag = child.tagName.toUpperCase(); + + if (!knownTags.has(tag)) { + // ProseMirror "looks through" unknown wrappers and parses their + // children — including text nodes and known elements. Only report + // elements whose content is truly lost (no text, no known descendants). + if (hasPreservableContent(child, knownTags)) { + scanForUnsupported(child, knownTags, itemsByTag); + continue; + } + + const existing = itemsByTag.get(tag); + if (existing) { + existing.count++; + } else { + let outerHTML = child.outerHTML; + if (outerHTML.length > MAX_OUTER_HTML_LENGTH) { + outerHTML = outerHTML.slice(0, MAX_OUTER_HTML_LENGTH) + '…'; + } + itemsByTag.set(tag, { tagName: tag, outerHTML, count: 1 }); + } + } else { + // Known tag — recurse into children to find nested unsupported elements + scanForUnsupported(child, knownTags, itemsByTag); + } + } +} + +/** + * Returns true if ProseMirror will preserve content from this element — + * either because it contains non-whitespace text or a known descendant element. + * @param {Element} element + * @param {Set} knownTags + * @returns {boolean} + */ +function hasPreservableContent(element, knownTags) { + if (element.textContent && element.textContent.trim().length > 0) return true; + for (let i = 0; i < element.children.length; i++) { + const child = element.children[i]; + if (knownTags.has(child.tagName.toUpperCase())) return true; + if (hasPreservableContent(child, knownTags)) return true; + } + return false; +} diff --git a/packages/super-editor/src/core/helpers/catchAllSchema.test.js b/packages/super-editor/src/core/helpers/catchAllSchema.test.js new file mode 100644 index 0000000000..274879e56d --- /dev/null +++ b/packages/super-editor/src/core/helpers/catchAllSchema.test.js @@ -0,0 +1,201 @@ +import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest'; +import { Schema } from 'prosemirror-model'; +import { getCatchAllSchema, detectUnsupportedContent } from './catchAllSchema.js'; + +// Build a minimal schema that supports only doc, paragraph, text, blockquote, strong, em. +// Tags like