diff --git a/apps/cli/src/__tests__/cli.test.ts b/apps/cli/src/__tests__/cli.test.ts index 82632ac5f2..3c361d4527 100644 --- a/apps/cli/src/__tests__/cli.test.ts +++ b/apps/cli/src/__tests__/cli.test.ts @@ -3,6 +3,7 @@ import { access, copyFile, mkdir, readFile, rm, writeFile } from 'node:fs/promis import { join } from 'node:path'; import { run } from '../index'; import { resolveListDocFixture, resolveSourceDocFixture } from './fixtures'; +import { writeListDocWithoutParaIds } from './unstable-list-fixture'; type RunResult = { code: number; @@ -1218,6 +1219,55 @@ describe('superdoc CLI', () => { expect(getEnvelope.data.item.address.nodeId).toBe(address.nodeId); }); + test('lists list/get keep list item addresses stable for docs without paraIds in stateless mode', async () => { + const source = join(TEST_DIR, 'lists-no-paraids-stateless.docx'); + await writeListDocWithoutParaIds(source); + + const address = await firstListItemAddress(['lists', 'list', source, '--limit', '1']); + + const getResult = await runCli(['lists', 'get', source, '--address-json', JSON.stringify(address)]); + expect(getResult.code).toBe(0); + + const getEnvelope = parseJsonOutput< + SuccessEnvelope<{ + address: ListItemAddress; + item: { address: ListItemAddress }; + }> + >(getResult); + expect(getEnvelope.data.item.address.nodeId).toBe(address.nodeId); + + const secondAddress = await firstListItemAddress(['lists', 'list', source, '--limit', '1']); + expect(secondAddress.nodeId).toBe(address.nodeId); + }); + + test('lists list/get keep list item addresses stable for docs without paraIds in stateful mode', async () => { + const source = join(TEST_DIR, 'lists-no-paraids-stateful.docx'); + await writeListDocWithoutParaIds(source); + + try { + const openResult = await runCli(['open', source]); + expect(openResult.code).toBe(0); + + const address = await firstListItemAddress(['lists', 'list', '--limit', '1']); + + const getResult = await runCli(['lists', 'get', '--address-json', JSON.stringify(address)]); + expect(getResult.code).toBe(0); + + const getEnvelope = parseJsonOutput< + SuccessEnvelope<{ + address: ListItemAddress; + item: { address: ListItemAddress }; + }> + >(getResult); + expect(getEnvelope.data.item.address.nodeId).toBe(address.nodeId); + + const secondAddress = await firstListItemAddress(['lists', 'list', '--limit', '1']); + expect(secondAddress.nodeId).toBe(address.nodeId); + } finally { + await runCli(['close', '--discard']); + } + }); + test('lists list pretty prints list rows', async () => { const result = await runCli(['lists', 'list', LIST_SAMPLE_DOC, '--limit', '2', '--output', 'pretty']); expect(result.code).toBe(0); diff --git a/apps/cli/src/__tests__/unstable-list-fixture.ts b/apps/cli/src/__tests__/unstable-list-fixture.ts new file mode 100644 index 0000000000..5f9187f3e8 --- /dev/null +++ b/apps/cli/src/__tests__/unstable-list-fixture.ts @@ -0,0 +1,58 @@ +import { createRequire } from 'node:module'; +import { readFile, writeFile } from 'node:fs/promises'; +import path from 'node:path'; +import { pathToFileURL } from 'node:url'; +import { resolveListDocFixture } from './fixtures'; + +/** Minimal type for the subset of the JSZip API used in this fixture. */ +interface JsZipInstance { + file(path: string): { async(type: string): Promise } | null; + file(path: string, data: string): void; + generateAsync(options: { type: string }): Promise; +} + +interface JsZipConstructor { + loadAsync(data: Buffer | Uint8Array): Promise; +} + +const REPO_ROOT = path.resolve(import.meta.dir, '../../../..'); +const require = createRequire(import.meta.url); + +let jsZipPromise: Promise | null = null; + +async function loadJsZip(): Promise { + if (jsZipPromise) return jsZipPromise; + + jsZipPromise = (async () => { + const entry = require.resolve('jszip', { + paths: [path.join(REPO_ROOT, 'packages/super-editor')], + }); + const mod = await import(pathToFileURL(entry).href); + return (mod.default ?? mod) as JsZipConstructor; + })(); + + return jsZipPromise; +} + +export async function writeListDocWithoutParaIds(outputPath: string): Promise { + const sourcePath = await resolveListDocFixture(); + const JSZip = await loadJsZip(); + const sourceBytes = await readFile(sourcePath); + const zip = await JSZip.loadAsync(sourceBytes); + const documentXmlFile = zip.file('word/document.xml'); + if (!documentXmlFile) { + throw new Error(`Fixture doc is missing word/document.xml: ${sourcePath}`); + } + + const documentXml = await documentXmlFile.async('string'); + const updatedXml = documentXml.replace(/\s+w14:paraId="[^"]*"/g, '').replace(/\s+w14:textId="[^"]*"/g, ''); + + if (updatedXml === documentXml) { + throw new Error(`Fixture doc did not contain paragraph ids to strip: ${sourcePath}`); + } + + zip.file('word/document.xml', updatedXml); + const outputBytes = await zip.generateAsync({ type: 'nodebuffer' }); + await writeFile(outputPath, outputBytes); + return outputPath; +} diff --git a/apps/docs/document-api/overview.mdx b/apps/docs/document-api/overview.mdx index 0e8f5df834..f4e68b91b2 100644 --- a/apps/docs/document-api/overview.mdx +++ b/apps/docs/document-api/overview.mdx @@ -73,7 +73,7 @@ For nodes created at runtime (not imported from DOCX), `nodeId` falls back to `s | ID source | Stable across loads? | When used | |-----------|---------------------|-----------| -| `paraId` (from DOCX) | Best effort (usually stable for unchanged DOCX blocks) | Paragraphs, tables, rows, cells imported from DOCX | +| `paraId` (from DOCX) | Best effort (usually stable for unchanged DOCX blocks) | Paragraphs and table rows imported from DOCX | | `sdBlockId` (runtime) | No (session-scoped) | Nodes created programmatically before first export | diff --git a/apps/docs/document-api/reference/_generated-manifest.json b/apps/docs/document-api/reference/_generated-manifest.json index 8d7cdf6f75..61571a90d6 100644 --- a/apps/docs/document-api/reference/_generated-manifest.json +++ b/apps/docs/document-api/reference/_generated-manifest.json @@ -962,5 +962,5 @@ } ], "marker": "{/* GENERATED FILE: DO NOT EDIT. Regenerate via `pnpm run docapi:sync`. */}", - "sourceHash": "9197780d09944c67a656339ee8adc0e2c4473d1dffb09288c9c0b85f68fd34f3" + "sourceHash": "278f3d13c4cb49be6d084639559a5de63cd623606bc046c4abea69815fcbbe1c" } diff --git a/apps/docs/document-api/reference/sections/set-odd-even-headers-footers.mdx b/apps/docs/document-api/reference/sections/set-odd-even-headers-footers.mdx index 2b2a255b14..b5bca4b92a 100644 --- a/apps/docs/document-api/reference/sections/set-odd-even-headers-footers.mdx +++ b/apps/docs/document-api/reference/sections/set-odd-even-headers-footers.mdx @@ -22,7 +22,7 @@ Enable or disable odd/even header-footer mode in document settings. ## Expected result -Returns a DocumentMutationResult receipt; reports NO_OP if the odd/even setting already matches. +Returns a DocumentMutationResult (not SectionMutationResult) because odd/even headers-footers is a document-level setting, not per-section. Reports NO_OP if the setting already matches. ## Input fields diff --git a/packages/super-editor/src/core/super-converter/v2/importer/normalizeDuplicateBlockIdentitiesInContent.js b/packages/super-editor/src/core/super-converter/v2/importer/normalizeDuplicateBlockIdentitiesInContent.js index 8cc3e90867..ff7e649750 100644 --- a/packages/super-editor/src/core/super-converter/v2/importer/normalizeDuplicateBlockIdentitiesInContent.js +++ b/packages/super-editor/src/core/super-converter/v2/importer/normalizeDuplicateBlockIdentitiesInContent.js @@ -1,8 +1,9 @@ -import { generateDocxRandomId } from '@core/helpers/generateDocxRandomId.js'; - const PARAGRAPH_IDENTITY_ATTRS = ['sdBlockId', 'paraId']; const TABLE_IDENTITY_ATTRS = ['sdBlockId', 'paraId', 'blockId']; const DEFAULT_BLOCK_IDENTITY_ATTRS = ['sdBlockId', 'blockId', 'paraId']; +const SYNTHETIC_PARA_ID_TYPES = new Set(['paragraph', 'tableRow']); +const DOCX_ID_LENGTH = 8; +const MAX_DOCX_ID = 0xffffffff; /** Maps block node types to safe block-identity attribute lookup order. */ const BLOCK_IDENTITY_ATTRS = { @@ -23,55 +24,134 @@ function toIdentityValue(value) { return undefined; } -function resolvePrimaryBlockIdentity(node) { - if (!node || typeof node !== 'object') return undefined; +function getBlockIdentityAttrs(node) { + if (!node || typeof node !== 'object') return []; + return BLOCK_IDENTITY_ATTRS[node.type] ?? []; +} - const attrPriority = BLOCK_IDENTITY_ATTRS[node.type]; - if (!attrPriority) return undefined; +function getExplicitIdentityEntries(node) { + const attrPriority = getBlockIdentityAttrs(node); + if (attrPriority.length === 0) return []; const attrs = typeof node.attrs === 'object' && node.attrs ? node.attrs : {}; + const identityEntries = []; + for (const attr of attrPriority) { const value = toIdentityValue(attrs[attr]); - if (value) return { id: value, source: attr }; + if (value) { + identityEntries.push({ attr, value }); + } } - return undefined; + + return identityEntries; +} + +function groupIdentityEntriesByValue(identityEntries) { + const groupsByValue = new Map(); + + for (const entry of identityEntries) { + const existingGroup = groupsByValue.get(entry.value); + if (existingGroup) { + existingGroup.attrs.push(entry.attr); + continue; + } + + groupsByValue.set(entry.value, { + value: entry.value, + attrs: [entry.attr], + }); + } + + return [...groupsByValue.values()]; +} + +function shouldSynthesizeParaId(node) { + return Boolean(node && typeof node === 'object' && SYNTHETIC_PARA_ID_TYPES.has(node.type)); } -function nextUniqueDocxId(usedIds) { - let id = generateDocxRandomId(); - while (usedIds.has(id)) { - id = generateDocxRandomId(); +function collectExplicitBlockIdentities(node, reservedIds) { + if (!node || typeof node !== 'object') return; + + const identityEntries = getExplicitIdentityEntries(node); + for (const { value } of groupIdentityEntriesByValue(identityEntries)) { + reservedIds.add(value); + } + + if (Array.isArray(node.content)) { + node.content.forEach((child) => collectExplicitBlockIdentities(child, reservedIds)); } - return id; } -function dedupeBlockIdentitiesInNode(node, usedIds) { +function createDeterministicDocxIdAllocator(reservedIds) { + let nextValue = 1; + + return () => { + while (nextValue <= MAX_DOCX_ID) { + const id = nextValue.toString(16).toUpperCase().padStart(DOCX_ID_LENGTH, '0'); + nextValue += 1; + + if (reservedIds.has(id)) continue; + + reservedIds.add(id); + return id; + } + + throw new Error('Unable to allocate a unique synthetic DOCX block id.'); + }; +} + +function setBlockIdentity(node, attrName, value) { + node.attrs = { ...(node.attrs ?? {}), [attrName]: value }; +} + +function normalizeBlockIdentitiesInNode(node, seenIds, allocateDocxId) { if (!node || typeof node !== 'object') return; - const identity = resolvePrimaryBlockIdentity(node); - if (identity) { - if (usedIds.has(identity.id)) { - const replacementId = nextUniqueDocxId(usedIds); - node.attrs = { ...node.attrs, [identity.source]: replacementId }; - usedIds.add(replacementId); - } else { - usedIds.add(identity.id); + const identityEntries = getExplicitIdentityEntries(node); + const groupedIdentities = groupIdentityEntriesByValue(identityEntries); + + if (groupedIdentities.length > 0) { + for (const identityGroup of groupedIdentities) { + if (seenIds.has(identityGroup.value)) { + const replacementId = allocateDocxId(); + for (const attr of identityGroup.attrs) { + setBlockIdentity(node, attr, replacementId); + } + seenIds.add(replacementId); + } else { + seenIds.add(identityGroup.value); + } } + } else if (shouldSynthesizeParaId(node)) { + const syntheticParaId = allocateDocxId(); + setBlockIdentity(node, 'paraId', syntheticParaId); + seenIds.add(syntheticParaId); } if (Array.isArray(node.content)) { - node.content.forEach((child) => dedupeBlockIdentitiesInNode(child, usedIds)); + node.content.forEach((child) => normalizeBlockIdentitiesInNode(child, seenIds, allocateDocxId)); } } /** - * Deduplicate block identities during import so document-api targeting remains stable. + * Normalize imported block identities so document-api targeting remains stable. * * Word files can occasionally contain duplicate stable block IDs across blocks. - * Since stable IDs are used for deterministic targeting in the adapters, - * duplicates break deterministic targeting and mutations. + * Some exporters also omit `w14:paraId` entirely, leaving imported blocks with + * no stable public identity and forcing the adapter layer to fall back to the + * volatile `sdBlockId` assigned at editor startup. * - * Only safe block identity attributes are rewritten: sdBlockId, paraId, and blockId. + * This pass fixes both cases: + * - rewrites duplicate explicit identity values while preserving the first + * explicit occurrence of each value + * - reserves every explicit identity value up front so synthesized IDs never + * collide with a non-primary but still-public identifier such as paragraph + * `paraId` + * - synthesizes deterministic `paraId` values for schema-valid block types + * that arrive with no stable identity at all + * + * Only block identity attributes are rewritten or synthesized: sdBlockId, + * paraId, and blockId. * * @param {Array<{type?: string, attrs?: Record, content?: unknown[]}>} content * @returns {Array<{type?: string, attrs?: Record, content?: unknown[]}>} @@ -79,8 +159,12 @@ function dedupeBlockIdentitiesInNode(node, usedIds) { export function normalizeDuplicateBlockIdentitiesInContent(content = []) { if (!Array.isArray(content) || content.length === 0) return content; - const usedIds = new Set(); - content.forEach((node) => dedupeBlockIdentitiesInNode(node, usedIds)); + const reservedIds = new Set(); + content.forEach((node) => collectExplicitBlockIdentities(node, reservedIds)); + + const allocateDocxId = createDeterministicDocxIdAllocator(reservedIds); + const seenIds = new Set(); + content.forEach((node) => normalizeBlockIdentitiesInNode(node, seenIds, allocateDocxId)); return content; } diff --git a/packages/super-editor/src/core/super-converter/v2/importer/normalizeDuplicateBlockIdentitiesInContent.test.js b/packages/super-editor/src/core/super-converter/v2/importer/normalizeDuplicateBlockIdentitiesInContent.test.js index c3608b71f9..b687b5cd99 100644 --- a/packages/super-editor/src/core/super-converter/v2/importer/normalizeDuplicateBlockIdentitiesInContent.test.js +++ b/packages/super-editor/src/core/super-converter/v2/importer/normalizeDuplicateBlockIdentitiesInContent.test.js @@ -35,6 +35,43 @@ describe('normalizeDuplicateBlockIdentitiesInContent', () => { expect(content[1].attrs.paraId).toBeUndefined(); }); + it('synthesizes deterministic paraId values for paragraphs missing stable identities', () => { + const firstImport = [paragraph({}, 'A'), paragraph({}, 'B')]; + const secondImport = [paragraph({}, 'A'), paragraph({}, 'B')]; + + normalizeDuplicateBlockIdentitiesInContent(firstImport); + normalizeDuplicateBlockIdentitiesInContent(secondImport); + + const firstIds = firstImport.map((node) => node.attrs.paraId); + const secondIds = secondImport.map((node) => node.attrs.paraId); + + expect(firstIds).toEqual(secondIds); + expect(firstIds[0]).toMatch(/^[0-9A-F]{8}$/); + expect(firstIds[1]).toMatch(/^[0-9A-F]{8}$/); + expect(firstIds[0]).not.toBe(firstIds[1]); + }); + + it('reserves explicit ids before synthesizing paraIds for earlier missing blocks', () => { + const content = [paragraph({}, 'Missing ID'), paragraph({ paraId: '00000001' }, 'Explicit ID')]; + + normalizeDuplicateBlockIdentitiesInContent(content); + + expect(content[0].attrs.paraId).toMatch(/^[0-9A-F]{8}$/); + expect(content[0].attrs.paraId).not.toBe('00000001'); + expect(content[1].attrs.paraId).toBe('00000001'); + }); + + it('reserves explicit paraIds even when sdBlockId is the primary paragraph identity', () => { + const content = [paragraph({}, 'Missing ID'), paragraph({ sdBlockId: 'X', paraId: '00000001' }, 'Explicit ParaId')]; + + normalizeDuplicateBlockIdentitiesInContent(content); + + expect(content[0].attrs.paraId).toMatch(/^[0-9A-F]{8}$/); + expect(content[0].attrs.paraId).not.toBe('00000001'); + expect(content[1].attrs.sdBlockId).toBe('X'); + expect(content[1].attrs.paraId).toBe('00000001'); + }); + it('prioritizes sdBlockId over paraId when both are present on paragraphs', () => { const content = [ paragraph({ paraId: 'P1', sdBlockId: 'SAME' }, 'A'), @@ -50,6 +87,21 @@ describe('normalizeDuplicateBlockIdentitiesInContent', () => { expect(content[1].attrs.paraId).toBe('P2'); }); + it('deduplicates explicit paraIds even when sdBlockIds are distinct', () => { + const content = [ + paragraph({ sdBlockId: 'A', paraId: 'DUPLICATE' }, 'A'), + paragraph({ sdBlockId: 'B', paraId: 'DUPLICATE' }, 'B'), + ]; + + normalizeDuplicateBlockIdentitiesInContent(content); + + expect(content[0].attrs.sdBlockId).toBe('A'); + expect(content[1].attrs.sdBlockId).toBe('B'); + expect(content[0].attrs.paraId).toBe('DUPLICATE'); + expect(content[1].attrs.paraId).not.toBe('DUPLICATE'); + expect(content[1].attrs.paraId).toMatch(/^[0-9A-F]{8}$/); + }); + it('deduplicates table blockId when paraId/sdBlockId are not present', () => { const content = [table([], { blockId: 'TABLE-ID' }), table([], { blockId: 'TABLE-ID' })]; @@ -93,13 +145,12 @@ describe('normalizeDuplicateBlockIdentitiesInContent', () => { const collect = (node) => { if (!node || typeof node !== 'object') return; const attrs = node.attrs ?? {}; - const id = - (typeof attrs.paraId === 'string' && attrs.paraId) || - (typeof attrs.sdBlockId === 'string' && attrs.sdBlockId) || - (typeof attrs.blockId === 'string' && attrs.blockId) || - (typeof attrs.id === 'string' && attrs.id) || - (typeof attrs.uuid === 'string' && attrs.uuid); - if (id) { + const nodeIds = new Set( + [attrs.paraId, attrs.sdBlockId, attrs.blockId, attrs.id, attrs.uuid].filter( + (value) => typeof value === 'string' && value.length > 0, + ), + ); + for (const id of nodeIds) { if (identities.has(id)) duplicates.add(id); identities.add(id); } @@ -109,4 +160,26 @@ describe('normalizeDuplicateBlockIdentitiesInContent', () => { content.forEach(collect); expect(duplicates.size).toBe(0); }); + + it('synthesizes paraIds only for schema-valid paragraph and row nodes', () => { + const content = [table([row([cell([paragraph({}, 'R1C1')], {}), cell([paragraph({}, 'R1C2')], {})])], {})]; + + normalizeDuplicateBlockIdentitiesInContent(content); + + const tableNode = content[0]; + const rowNode = tableNode.content[0]; + const firstCell = rowNode.content[0]; + const secondCell = rowNode.content[1]; + + expect(tableNode.attrs.paraId).toBeUndefined(); + expect(firstCell.attrs.paraId).toBeUndefined(); + expect(secondCell.attrs.paraId).toBeUndefined(); + + expect(rowNode.attrs.paraId).toMatch(/^[0-9A-F]{8}$/); + expect(firstCell.content[0].attrs.paraId).toMatch(/^[0-9A-F]{8}$/); + expect(secondCell.content[0].attrs.paraId).toMatch(/^[0-9A-F]{8}$/); + expect( + new Set([rowNode.attrs.paraId, firstCell.content[0].attrs.paraId, secondCell.content[0].attrs.paraId]).size, + ).toBe(3); + }); }); diff --git a/packages/super-editor/src/core/super-converter/v3/handlers/utils.js b/packages/super-editor/src/core/super-converter/v3/handlers/utils.js index 594e185705..0da20bf8d3 100644 --- a/packages/super-editor/src/core/super-converter/v3/handlers/utils.js +++ b/packages/super-editor/src/core/super-converter/v3/handlers/utils.js @@ -202,6 +202,29 @@ export function createTrackChangesPropertyHandler(xmlName, sdName = null, extraA }; } +const UNSUPPORTED_TABLE_IDENTITY_ATTRS = ['w14:paraId', 'w14:textId']; + +/** + * Removes legacy identity attributes from exported table containers whose OOXML + * schemas do not define `w14:paraId` / `w14:textId`. + * + * We still import these attributes from older SuperDoc-generated files for + * backwards compatibility, but we must not emit them on `` or ``. + * + * @param {Record | undefined} attrs + * @returns {Record} + */ +export function stripUnsupportedTableIdentityAttributes(attrs) { + if (!attrs || typeof attrs !== 'object') return {}; + + const filteredAttrs = { ...attrs }; + for (const attrName of UNSUPPORTED_TABLE_IDENTITY_ATTRS) { + delete filteredAttrs[attrName]; + } + + return filteredAttrs; +} + /** * Parses a measurement value, handling ECMA-376 percentage strings. * diff --git a/packages/super-editor/src/core/super-converter/v3/handlers/w/tbl/tbl-translator.js b/packages/super-editor/src/core/super-converter/v3/handlers/w/tbl/tbl-translator.js index d8e6611a6a..cd718b4098 100644 --- a/packages/super-editor/src/core/super-converter/v3/handlers/w/tbl/tbl-translator.js +++ b/packages/super-editor/src/core/super-converter/v3/handlers/w/tbl/tbl-translator.js @@ -4,14 +4,19 @@ import { preProcessVerticalMergeCells } from '@core/super-converter/export-helpe import { eighthPointsToPixels, halfPointToPoints, twipsToPixels } from '@core/super-converter/helpers.js'; import { buildFallbackGridForTable } from '@core/super-converter/helpers/tableFallbackHelpers.js'; import { translateChildNodes } from '@core/super-converter/v2/exporter/helpers/index.js'; -import { createAttributeHandler } from '@converter/v3/handlers/utils.js'; +import { createAttributeHandler, stripUnsupportedTableIdentityAttributes } from '@converter/v3/handlers/utils.js'; import { NodeTranslator } from '@translator'; import { translator as tblGridTranslator } from '../tblGrid'; import { translator as tblPrTranslator } from '../tblPr'; import { translator as trTranslator } from '../tr'; /** - * Attributes preserved across DOCX roundtrip for table identity. + * Legacy table identity attributes imported from older SuperDoc exports. + * + * WordprocessingML does not define `w14:paraId` / `w14:textId` on ``, + * so decode intentionally strips them before export. We still read them on + * import so previously exported documents remain addressable in-session. + * * @type {import('@translator').AttrConfig[]} */ const validXmlAttributes = ['w14:paraId', 'w14:textId'].map((xmlName) => createAttributeHandler(xmlName)); @@ -321,7 +326,7 @@ const decode = (params, decodedAttrs) => { return { name: 'w:tbl', - attributes: decodedAttrs || {}, + attributes: stripUnsupportedTableIdentityAttributes(decodedAttrs), elements, }; }; diff --git a/packages/super-editor/src/core/super-converter/v3/handlers/w/tbl/tbl-translator.test.js b/packages/super-editor/src/core/super-converter/v3/handlers/w/tbl/tbl-translator.test.js index d69884c782..1979eb9fe5 100644 --- a/packages/super-editor/src/core/super-converter/v3/handlers/w/tbl/tbl-translator.test.js +++ b/packages/super-editor/src/core/super-converter/v3/handlers/w/tbl/tbl-translator.test.js @@ -43,6 +43,11 @@ describe('w:tbl translator', () => { expect(translator.sdNodeOrKeyName).toBe('table'); expect(translator).toBeInstanceOf(NodeTranslator); }); + + it('keeps legacy paraId handlers for import compatibility', () => { + const paraIdHandler = translator.attributes.find((attr) => attr.sdName === 'paraId'); + expect(paraIdHandler?.xmlName).toBe('w14:paraId'); + }); }); describe('encode', () => { @@ -230,6 +235,22 @@ describe('w:tbl translator', () => { }); describe('decode', () => { + it('drops legacy w14 table identity attributes on export', () => { + const result = translator.decode( + { + node: { + type: 'table', + attrs: {}, + content: [], + }, + extraParams: {}, + }, + { 'w14:paraId': 'ABCDEF01', 'w14:textId': 'ABCDEF02' }, + ); + + expect(result.attributes).toEqual({}); + }); + it('should decode a table node with properties and grid', () => { const mockNode = { type: 'table', diff --git a/packages/super-editor/src/core/super-converter/v3/handlers/w/tc/tc-translator.js b/packages/super-editor/src/core/super-converter/v3/handlers/w/tc/tc-translator.js index 99881b6966..f635933172 100644 --- a/packages/super-editor/src/core/super-converter/v3/handlers/w/tc/tc-translator.js +++ b/packages/super-editor/src/core/super-converter/v3/handlers/w/tc/tc-translator.js @@ -1,5 +1,5 @@ import { NodeTranslator } from '../../../node-translator/node-translator'; -import { createAttributeHandler } from '@converter/v3/handlers/utils.js'; +import { createAttributeHandler, stripUnsupportedTableIdentityAttributes } from '@converter/v3/handlers/utils.js'; import { handleTableCellNode } from './helpers/legacy-handle-table-cell-node'; import { translateTableCell } from './helpers/translate-table-cell'; @@ -10,7 +10,12 @@ const XML_NODE_NAME = 'w:tc'; const SD_NODE_NAME = 'tableCell'; /** - * Attributes preserved across DOCX roundtrip for cell identity. + * Legacy cell identity attributes imported from older SuperDoc exports. + * + * WordprocessingML does not define `w14:paraId` / `w14:textId` on ``. + * We continue importing them for backwards compatibility, but decode strips + * them so newly exported DOCX files stay schema-valid. + * * @type {import('@translator').AttrConfig[]} */ const validXmlAttributes = ['w14:paraId', 'w14:textId'].map((xmlName) => createAttributeHandler(xmlName)); @@ -60,8 +65,9 @@ function encode(params, encodedAttrs) { */ function decode(params, decodedAttrs) { const translated = translateTableCell(params); - if (decodedAttrs && Object.keys(decodedAttrs).length) { - translated.attributes = { ...(translated.attributes || {}), ...decodedAttrs }; + const filteredDecodedAttrs = stripUnsupportedTableIdentityAttributes(decodedAttrs); + if (Object.keys(filteredDecodedAttrs).length) { + translated.attributes = { ...(translated.attributes || {}), ...filteredDecodedAttrs }; } return translated; } diff --git a/packages/super-editor/src/core/super-converter/v3/handlers/w/tc/tc-translator.test.js b/packages/super-editor/src/core/super-converter/v3/handlers/w/tc/tc-translator.test.js index 4f6d92aa83..ef72189eb9 100644 --- a/packages/super-editor/src/core/super-converter/v3/handlers/w/tc/tc-translator.test.js +++ b/packages/super-editor/src/core/super-converter/v3/handlers/w/tc/tc-translator.test.js @@ -31,6 +31,11 @@ describe('w:tc translator', () => { expect(translator.sdNodeOrKeyName).toBe('tableCell'); }); + it('keeps legacy paraId handlers for import compatibility', () => { + const paraIdHandler = translator.attributes.find((attr) => attr.sdName === 'paraId'); + expect(paraIdHandler?.xmlName).toBe('w14:paraId'); + }); + it('encode calls legacy handler and merges encodedAttrs into result attrs', () => { const params = { extraParams: { node: {}, table: {}, row: {} } }; const res = config.encode(params, { extra: 'ok' }); @@ -48,4 +53,13 @@ describe('w:tc translator', () => { expect(out.name).toBe('w:tc'); expect(out.attributes).toMatchObject({ 'w:foo': 'bar' }); }); + + it('drops legacy w14 cell identity attributes on export', () => { + const params = { node: { type: 'tableCell', attrs: {} } }; + const out = config.decode(params, { 'w14:paraId': 'ABCDEF01', 'w14:textId': 'ABCDEF02' }); + + expect(translateTableCell).toHaveBeenCalledTimes(1); + expect(out.name).toBe('w:tc'); + expect(out.attributes).toBeUndefined(); + }); }); diff --git a/packages/super-editor/src/document-api-adapters/helpers/node-address-resolver.test.ts b/packages/super-editor/src/document-api-adapters/helpers/node-address-resolver.test.ts index 2ad579ce25..3518455106 100644 --- a/packages/super-editor/src/document-api-adapters/helpers/node-address-resolver.test.ts +++ b/packages/super-editor/src/document-api-adapters/helpers/node-address-resolver.test.ts @@ -36,9 +36,10 @@ function makeNode( attrs, nodeSize, isBlock, - descendants(callback: (node: ProseMirrorNode, pos: number) => void) { - for (const child of children) { - callback(child.node, child.offset); + descendants(callback: (node: ProseMirrorNode, pos: number, parent?: ProseMirrorNode, index?: number) => void) { + for (let index = 0; index < children.length; index += 1) { + const child = children[index]!; + callback(child.node, child.offset, this as unknown as ProseMirrorNode, index); } }, } as unknown as ProseMirrorNode; @@ -368,7 +369,7 @@ describe('buildBlockIndex', () => { expect(index.byId.get('table:sd-t1')).toBeDefined(); }); - it('does not register alias when sdBlockId is the primary (no paraId)', () => { + it('keeps a descriptive sdBlockId as the primary table id when no better persisted id exists', () => { const index = indexFromNodes({ typeName: 'table', attrs: { sdBlockId: 'sd-t1' }, @@ -377,6 +378,17 @@ describe('buildBlockIndex', () => { expect(index.candidates[0].nodeId).toBe('sd-t1'); expect(index.byId.get('table:sd-t1')).toBeDefined(); }); + + it('registers a UUID-like sdBlockId as an alias when table fallback ids are used', () => { + const index = indexFromNodes({ + typeName: 'table', + attrs: { sdBlockId: '7701a615-4ad8-45b5-922c-2a32114df4c8' }, + offset: 0, + }); + + expect(index.candidates[0].nodeId).toMatch(/^table-auto-/); + expect(index.byId.get('table:7701a615-4ad8-45b5-922c-2a32114df4c8')).toBeDefined(); + }); }); describe('ID resolution — non-paragraph nodes', () => { @@ -389,7 +401,7 @@ describe('buildBlockIndex', () => { expect(index.candidates[0].nodeId).toBe('p1'); }); - it('uses sdBlockId when paraId is absent', () => { + it('uses a descriptive sdBlockId when paraId is absent', () => { const index = indexFromNodes({ typeName: 'table', attrs: { sdBlockId: 'sd1' }, @@ -398,6 +410,24 @@ describe('buildBlockIndex', () => { expect(index.candidates[0].nodeId).toBe('sd1'); }); + it('uses deterministic fallback ids for tables when only a UUID-like sdBlockId exists', () => { + const index = indexFromNodes({ + typeName: 'table', + attrs: { sdBlockId: '7701a615-4ad8-45b5-922c-2a32114df4c8' }, + offset: 0, + }); + expect(index.candidates[0].nodeId).toMatch(/^table-auto-/); + }); + + it('uses deterministic fallback ids for table cells when only a UUID-like sdBlockId exists', () => { + const index = indexFromNodes({ + typeName: 'tableCell', + attrs: { sdBlockId: 'e488acbb-54ee-4ee8-b40b-2b9b9b062e49' }, + offset: 0, + }); + expect(index.candidates[0].nodeId).toMatch(/^cell-auto-/); + }); + it('falls back to blockId when paraId and sdBlockId are absent', () => { const index = indexFromNodes({ typeName: 'table', @@ -434,13 +464,13 @@ describe('buildBlockIndex', () => { expect(index.candidates[0].nodeId).toBe('u1'); }); - it('skips non-paragraph candidates when no id attrs exist', () => { + it('builds deterministic fallback ids for tables with no explicit identity attrs', () => { const index = indexFromNodes({ typeName: 'table', attrs: {}, offset: 3, }); - expect(index.candidates).toHaveLength(0); + expect(index.candidates[0]?.nodeId).toMatch(/^table-auto-/); }); it('ignores empty string attrs', () => { diff --git a/packages/super-editor/src/document-api-adapters/helpers/node-address-resolver.ts b/packages/super-editor/src/document-api-adapters/helpers/node-address-resolver.ts index ee8a416354..60a300df8d 100644 --- a/packages/super-editor/src/document-api-adapters/helpers/node-address-resolver.ts +++ b/packages/super-editor/src/document-api-adapters/helpers/node-address-resolver.ts @@ -5,6 +5,7 @@ import type { BlockNodeAddress, BlockNodeType, NodeAddress, NodeType } from '@su import type { ParagraphAttrs } from '../../extensions/types/node-attributes.js'; import { toId } from './value-utils.js'; import { resolvePublicTocNodeId } from './toc-node-id.js'; +import { buildFallbackTableNodeId, isVolatileRuntimeBlockId } from './table-node-id.js'; import { DocumentApiAdapterError } from '../errors.js'; /** Superset of all possible ID attributes across block node types. */ @@ -36,6 +37,8 @@ export type BlockIndex = { ambiguous: ReadonlySet; }; +type TraversalPath = readonly number[]; + // Keep in sync with BlockNodeType in document-api/types/node.ts const SUPPORTED_BLOCK_NODE_TYPES: ReadonlySet = new Set([ 'paragraph', @@ -110,7 +113,29 @@ export function mapBlockNodeType(node: ProseMirrorNode): BlockNodeType | undefin } } -export function resolveBlockNodeId(node: ProseMirrorNode, pos: number, nodeType: BlockNodeType): string | undefined { +function resolveLegacyTableIdentity(attrs: BlockIdAttrs): string | undefined { + return toId(attrs.paraId) ?? toId(attrs.blockId) ?? toId(attrs.id) ?? toId(attrs.uuid); +} + +function resolveRuntimeTableIdentity( + nodeType: BlockNodeType, + attrs: BlockIdAttrs, + pos: number, + path?: TraversalPath, +): string | undefined { + const sdBlockId = toId(attrs.sdBlockId); + if (sdBlockId && !isVolatileRuntimeBlockId(sdBlockId)) { + return sdBlockId; + } + return buildFallbackTableNodeId(nodeType, pos, path); +} + +export function resolveBlockNodeId( + node: ProseMirrorNode, + pos: number, + nodeType: BlockNodeType, + path?: TraversalPath, +): string | undefined { if (node.type.name === 'paragraph') { const attrs = node.attrs as ParagraphAttrs | undefined; // paraId (imported from DOCX) is the primary identity for paragraphs. This @@ -126,13 +151,20 @@ export function resolveBlockNodeId(node: ProseMirrorNode, pos: number, nodeType: const attrs = (node.attrs ?? {}) as BlockIdAttrs; const typeName = node.type.name; - // Table nodes prefer paraId (preserved across DOCX roundtrips) over - // sdBlockId (regenerated on every document open). sdBlockId is still the - // fallback for programmatically created tables before their first export. - if (typeName === 'table' || typeName === 'tableRow' || typeName === 'tableCell' || typeName === 'tableHeader') { + // Table rows legitimately carry w14:paraId in DOCX, so prefer it when + // present and fall back to sdBlockId for newly created rows. + if (typeName === 'tableRow') { return toId(attrs.paraId) ?? toId(attrs.sdBlockId) ?? toId(attrs.blockId) ?? toId(attrs.id) ?? toId(attrs.uuid); } + // Older SuperDoc exports also stored paraId on tables/cells. Keep honoring + // those legacy IDs when we encounter them. When only a runtime-generated + // UUID sdBlockId exists, expose a deterministic fallback instead so session + // addresses remain reusable across fresh document opens. + if (typeName === 'table' || typeName === 'tableCell' || typeName === 'tableHeader') { + return resolveLegacyTableIdentity(attrs) ?? resolveRuntimeTableIdentity(nodeType, attrs, pos, path); + } + // NOTE: Migration surface for the stable-addresses plan. // Imported IDs currently win over `sdBlockId` to preserve historical // identity during DOCX round-trips. @@ -193,6 +225,9 @@ export function buildBlockIndex(editor: Editor): BlockIndex { const candidates: BlockCandidate[] = []; const byId = new Map(); const ambiguous = new Set(); + const pathByNode = new WeakMap(); + + pathByNode.set(editor.state.doc, []); function registerKey(key: string, candidate: BlockCandidate): void { if (byId.has(key)) { @@ -206,10 +241,18 @@ export function buildBlockIndex(editor: Editor): BlockIndex { // This traversal is a hot path for adapter workflows (for example find -> // getNode). Keep this pure snapshot builder so a transaction-invalidated // cache can be layered on later without API changes. - editor.state.doc.descendants((node, pos) => { + editor.state.doc.descendants((node, pos, parent, index) => { + const parentPath = parent ? (pathByNode.get(parent) ?? []) : []; + const path = + typeof index === 'number' && Number.isInteger(index) && index >= 0 ? [...parentPath, index] : undefined; + + if (path) { + pathByNode.set(node, path); + } + const nodeType = mapBlockNodeType(node); if (!nodeType) return; - const nodeId = resolveBlockNodeId(node, pos, nodeType); + const nodeId = resolveBlockNodeId(node, pos, nodeType, path); if (!nodeId) return; const candidate: BlockCandidate = { diff --git a/packages/super-editor/src/document-api-adapters/helpers/sd-projection.ts b/packages/super-editor/src/document-api-adapters/helpers/sd-projection.ts index 38ac190816..040a06e2e2 100644 --- a/packages/super-editor/src/document-api-adapters/helpers/sd-projection.ts +++ b/packages/super-editor/src/document-api-adapters/helpers/sd-projection.ts @@ -106,9 +106,10 @@ export function projectMarkBasedInline( /** * Matches a PM block node against a blockId by checking all known ID attributes. * - * The block index assigns primary IDs using `resolveBlockNodeId` which prefers - * `paraId` over `sdBlockId` for paragraphs and tables. Inline anchors carry - * this primary ID. To match correctly we must check all candidate attributes. + * The block index assigns primary IDs using `resolveBlockNodeId`, which may + * choose `paraId` or `sdBlockId` depending on node type and document origin. + * Inline anchors carry this primary ID, so matching must check all supported + * identity attributes rather than assuming a single source. */ function blockMatchesId(node: ProseMirrorNode, blockId: string): boolean { const attrs = node.attrs as Record | undefined; diff --git a/packages/super-editor/src/document-api-adapters/helpers/table-node-id.test.ts b/packages/super-editor/src/document-api-adapters/helpers/table-node-id.test.ts new file mode 100644 index 0000000000..4e9c79017f --- /dev/null +++ b/packages/super-editor/src/document-api-adapters/helpers/table-node-id.test.ts @@ -0,0 +1,29 @@ +import { buildFallbackTableNodeId, isVolatileRuntimeBlockId } from './table-node-id.js'; + +describe('table-node-id', () => { + describe('isVolatileRuntimeBlockId', () => { + it('treats UUID-like sdBlockIds as volatile runtime ids', () => { + expect(isVolatileRuntimeBlockId('7701a615-4ad8-45b5-922c-2a32114df4c8')).toBe(true); + }); + + it('does not treat descriptive ids as volatile runtime ids', () => { + expect(isVolatileRuntimeBlockId('table-1')).toBe(false); + }); + }); + + describe('buildFallbackTableNodeId', () => { + it('builds deterministic table fallback ids from traversal paths', () => { + expect(buildFallbackTableNodeId('table', 42, [3])).toBe('table-auto-4fff82cf'); + expect(buildFallbackTableNodeId('table', 42, [3])).toBe('table-auto-4fff82cf'); + }); + + it('builds deterministic table-cell fallback ids from traversal paths', () => { + expect(buildFallbackTableNodeId('tableCell', 88, [3, 1, 2])).toBe('cell-auto-5e34d2b2'); + expect(buildFallbackTableNodeId('tableCell', 88, [3, 1, 2])).toBe('cell-auto-5e34d2b2'); + }); + + it('falls back to position when a traversal path is unavailable', () => { + expect(buildFallbackTableNodeId('table', 12)).toBe('table-auto-c3f1b3e8'); + }); + }); +}); diff --git a/packages/super-editor/src/document-api-adapters/helpers/table-node-id.ts b/packages/super-editor/src/document-api-adapters/helpers/table-node-id.ts new file mode 100644 index 0000000000..81e4977909 --- /dev/null +++ b/packages/super-editor/src/document-api-adapters/helpers/table-node-id.ts @@ -0,0 +1,64 @@ +import type { BlockNodeType } from '@superdoc/document-api'; + +type TableLikeBlockNodeType = 'table' | 'tableCell'; + +const TABLE_LIKE_PREFIX: Readonly> = { + table: 'table-auto', + tableCell: 'cell-auto', +}; + +const UUID_LIKE_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + +/** Fast deterministic hash for public fallback block IDs. */ +function stableHash(input: string): string { + let hash = 2166136261; + for (let index = 0; index < input.length; index += 1) { + hash ^= input.charCodeAt(index); + hash = Math.imul(hash, 16777619); + } + return (hash >>> 0).toString(16).padStart(8, '0'); +} + +function toTableLikeBlockNodeType(nodeType: BlockNodeType): TableLikeBlockNodeType | undefined { + if (nodeType === 'table') return 'table'; + if (nodeType === 'tableCell') return 'tableCell'; + return undefined; +} + +function serializeTraversalPath(path: readonly number[] | undefined, pos: number): string { + if (Array.isArray(path) && path.length > 0) { + return `path:${path.join('.')}`; + } + return `pos:${pos}`; +} + +/** + * Returns true when an sdBlockId looks like a runtime-generated UUID. + * + * Table and table-cell sdBlockIds are frequently generated at editor startup, + * so UUID-like values are not safe to expose as public document-api node IDs. + */ +export function isVolatileRuntimeBlockId(id: string | undefined): boolean { + return typeof id === 'string' && UUID_LIKE_PATTERN.test(id); +} + +/** + * Builds a deterministic public fallback ID for table-like nodes that lack a + * schema-valid persisted identity. + * + * The traversal path is preferred because it stays stable across reopen of the + * same unchanged document while remaining independent of runtime-generated + * `sdBlockId` UUIDs. + */ +export function buildFallbackTableNodeId( + nodeType: BlockNodeType, + pos: number, + path?: readonly number[], +): string | undefined { + const tableLikeType = toTableLikeBlockNodeType(nodeType); + if (!tableLikeType) return undefined; + + const prefix = TABLE_LIKE_PREFIX[tableLikeType]; + const source = serializeTraversalPath(path, pos); + return `${prefix}-${stableHash(`${tableLikeType}:${source}`)}`; +} diff --git a/packages/super-editor/src/document-api-adapters/plan-engine/blocks-wrappers.ts b/packages/super-editor/src/document-api-adapters/plan-engine/blocks-wrappers.ts index daa247a8cc..97d9c32f20 100644 --- a/packages/super-editor/src/document-api-adapters/plan-engine/blocks-wrappers.ts +++ b/packages/super-editor/src/document-api-adapters/plan-engine/blocks-wrappers.ts @@ -143,7 +143,7 @@ function collectTopLevelBlocks(editor: Editor): BlockCandidate[] { // Delegate to the canonical ID resolver so IDs match the block index. // This ensures blocks.list output is directly usable in blocks.delete // and blocks.deleteRange without ID mismatches. - const nodeId = resolveBlockNodeId(child, pos, nodeType); + const nodeId = resolveBlockNodeId(child, pos, nodeType, [i]); if (nodeId) { results.push({ node: child, pos, end: pos + child.nodeSize, nodeType, nodeId }); diff --git a/packages/super-editor/src/document-api-adapters/tables-adapter.integration.test.ts b/packages/super-editor/src/document-api-adapters/tables-adapter.integration.test.ts index 71ee761beb..94d73dd830 100644 --- a/packages/super-editor/src/document-api-adapters/tables-adapter.integration.test.ts +++ b/packages/super-editor/src/document-api-adapters/tables-adapter.integration.test.ts @@ -122,4 +122,24 @@ describe('tables adapter DOCX integration', () => { expect(documentXml).not.toMatch(/<\/w:tbl>\s*/); expect(documentXml).toMatch(/<\/w:tbl>\s*]*(?:\/>|>[\s\S]*?<\/w:p>)\s*/); }); + + it('exports row paraIds without writing invalid table or cell w14 identity attrs', async () => { + ({ editor } = initTestEditor({ + content: docData.docx, + media: docData.media, + mediaFiles: docData.mediaFiles, + fonts: docData.fonts, + useImmediateSetTimeout: false, + })); + + createTableAdapter(editor, { rows: 2, columns: 2, at: { kind: 'documentEnd' } }, DIRECT_MUTATION_OPTIONS); + + const exportedFiles = await exportDocxFiles(editor); + const documentXml = exportedFiles['word/document.xml']; + + expect(documentXml).toBeTruthy(); + expect(documentXml).toMatch(/]*\bw14:paraId=/); + expect(documentXml).not.toMatch(/]*\bw14:(?:paraId|textId)=/); + expect(documentXml).not.toMatch(/]*\bw14:(?:paraId|textId)=/); + }); }); diff --git a/packages/super-editor/src/document-api-adapters/tables-adapter.ts b/packages/super-editor/src/document-api-adapters/tables-adapter.ts index 34d5d408c8..3c59a0eee1 100644 --- a/packages/super-editor/src/document-api-adapters/tables-adapter.ts +++ b/packages/super-editor/src/document-api-adapters/tables-adapter.ts @@ -75,6 +75,7 @@ import { DocumentApiAdapterError } from './errors.js'; import { toBlockAddress, findBlockById, findBlockByNodeIdOnly } from './helpers/node-address-resolver.js'; import { twipsToPixels } from '../core/super-converter/helpers.js'; import { resolvePreferredNewTableStyleId, isKnownTableStyleId } from '@superdoc/style-engine/ooxml'; +import { generateDocxHexId } from '../utils/generateDocxHexId.js'; import { readSettingsRoot, ensureSettingsRoot, @@ -97,12 +98,6 @@ const PIXELS_TO_TWIPS = 1440 / 96; const DEFAULT_TABLE_GRID_WIDTH_TWIPS = 1500; const SETTINGS_PART: PartId = 'word/settings.xml'; -function generateParaId(): string { - return Array.from({ length: 8 }, () => Math.floor(Math.random() * 16).toString(16)) - .join('') - .toUpperCase(); -} - function createSeparatorParagraph(schema: Editor['state']['schema']): import('prosemirror-model').Node | null { const paragraphType = schema.nodes.paragraph; if (!paragraphType) return null; @@ -110,7 +105,7 @@ function createSeparatorParagraph(schema: Editor['state']['schema']): import('pr // Keep separator paragraphs addressable/stable for downstream DOCX roundtrip. const separatorAttrs = { sdBlockId: uuidv4(), - paraId: generateParaId(), + paraId: generateDocxHexId(), }; return paragraphType.createAndFill(separatorAttrs) ?? paragraphType.createAndFill(); @@ -1750,7 +1745,6 @@ export function tablesConvertFromTextAdapter( schema.nodes.tableCell.createAndFill( { sdBlockId: uuidv4(), - paraId: generateParaId(), }, para, )!, @@ -1760,7 +1754,7 @@ export function tablesConvertFromTextAdapter( schema.nodes.tableRow.createAndFill( { sdBlockId: uuidv4(), - paraId: generateParaId(), + paraId: generateDocxHexId(), }, tableCells, )!, @@ -1768,8 +1762,7 @@ export function tablesConvertFromTextAdapter( } const tableId = uuidv4(); - const tableParaId = generateParaId(); - const tableNode = schema.nodes.table.create({ sdBlockId: tableId, paraId: tableParaId }, tableRows); + const tableNode = schema.nodes.table.create({ sdBlockId: tableId }, tableRows); // Replace the source paragraphs with the new table. const startPos = paragraphs[0].pos; @@ -1845,7 +1838,7 @@ export function tablesSplitAdapter( // Build the new table with the same attributes. const newTableAttrs = { ...(tableNode.attrs as Record) }; delete newTableAttrs.sdBlockId; // Each table needs a unique ID — let PM assign one. - delete newTableAttrs.paraId; // Avoid duplicate w14:paraId after split. + delete newTableAttrs.paraId; // Never duplicate legacy/imported table paraIds across split tables. delete newTableAttrs.textId; // Avoid duplicate w14:textId after split. const newTable = schema.nodes.table.create(newTableAttrs, secondTableRows); const separatorParagraph = createSeparatorParagraph(schema); @@ -1990,7 +1983,6 @@ export function tablesInsertCellAdapter( return ( candidateType.createAndFill({ sdBlockId: uuidv4(), - paraId: generateParaId(), }) ?? candidateType.createAndFill()! ); }; @@ -2008,7 +2000,7 @@ export function tablesInsertCellAdapter( const overflowRowAttrs = { ...templateRowAttrs, sdBlockId: uuidv4(), - paraId: generateParaId(), + paraId: generateDocxHexId(), }; const overflowRow = schema.nodes.tableRow.createAndFill(overflowRowAttrs, overflowRowCells) ?? @@ -3418,15 +3410,11 @@ export function createTableAdapter( } const tableId = uuidv4(); - // Generate a w14:paraId-compatible 8-char uppercase hex string for DOCX roundtrip stability. - const paraId = generateParaId(); - const didApply = insertTableAt({ pos: insertAt, rows: input.rows, columns: input.columns, sdBlockId: tableId, - paraId, tracked: mode === 'tracked', }); diff --git a/packages/super-editor/src/extensions/table-cell/table-cell.js b/packages/super-editor/src/extensions/table-cell/table-cell.js index 97b4b15f01..8fc503d2e7 100644 --- a/packages/super-editor/src/extensions/table-cell/table-cell.js +++ b/packages/super-editor/src/extensions/table-cell/table-cell.js @@ -111,7 +111,7 @@ export const TableCell = Node.create({ }, }, - /** @private - OOXML identifier preserved across DOCX roundtrips */ + /** @private - Legacy imported identity preserved for backwards compatibility */ paraId: { default: null, keepOnSplit: false, @@ -119,7 +119,7 @@ export const TableCell = Node.create({ renderDOM: () => ({}), }, - /** @private - OOXML text identifier preserved across DOCX roundtrips */ + /** @private - Legacy imported text identifier preserved for backwards compatibility */ textId: { default: null, keepOnSplit: false, diff --git a/packages/super-editor/src/extensions/table/table.js b/packages/super-editor/src/extensions/table/table.js index 1b6ee9b840..5ef942c374 100644 --- a/packages/super-editor/src/extensions/table/table.js +++ b/packages/super-editor/src/extensions/table/table.js @@ -170,6 +170,7 @@ */ import { v4 as uuidv4 } from 'uuid'; +import { generateDocxHexId } from '../../utils/generateDocxHexId.js'; import { Fragment } from 'prosemirror-model'; import { Node, Attribute } from '@core/index.js'; import { callOrGet } from '@core/utilities/callOrGet.js'; @@ -377,7 +378,7 @@ export const Table = Node.create({ /** * @private * @category Attribute - * @param {string} [paraId] - OOXML paragraph/element identifier (w14:paraId), preserved across DOCX roundtrips + * @param {string} [paraId] - Legacy imported identity preserved for backwards compatibility */ paraId: { default: null, @@ -389,7 +390,7 @@ export const Table = Node.create({ /** * @private * @category Attribute - * @param {string} [textId] - OOXML text identifier (w14:textId), preserved across DOCX roundtrips + * @param {string} [textId] - Legacy imported text identifier preserved for backwards compatibility */ textId: { default: null, @@ -685,11 +686,10 @@ export const Table = Node.create({ * @param {number} options.rows - Number of rows * @param {number} options.columns - Number of columns * @param {string} [options.sdBlockId] - Stable block ID for the created table - * @param {string} [options.paraId] - OOXML-compatible identifier (w14:paraId) that survives DOCX roundtrips * @param {boolean} [options.tracked] - When true, sets forceTrackChanges meta; when false, sets skipTrackChanges meta */ insertTableAt: - ({ pos, rows, columns, sdBlockId, paraId, tracked } = {}) => + ({ pos, rows, columns, sdBlockId, tracked } = {}) => ({ tr, state, dispatch, editor }) => { const tableType = state.schema.nodes.table; const tableRowType = state.schema.nodes.tableRow; @@ -700,21 +700,17 @@ export const Table = Node.create({ if (!Number.isInteger(columns) || columns < 1) return false; try { - const genParaId = () => - Array.from({ length: 8 }, () => Math.floor(Math.random() * 16).toString(16)) - .join('') - .toUpperCase(); const widths = computeColumnWidths(editor, columns); const rowNodes = []; for (let r = 0; r < rows; r++) { const cellNodes = []; for (let c = 0; c < columns; c++) { - const cellAttrs = { paraId: genParaId(), ...(widths ? { colwidth: [widths[c]] } : {}) }; + const cellAttrs = widths ? { colwidth: [widths[c]] } : {}; const cell = tableCellType.createAndFill(cellAttrs); if (!cell) return false; cellNodes.push(cell); } - const row = tableRowType.createChecked(null, cellNodes); + const row = tableRowType.createChecked({ paraId: generateDocxHexId() }, cellNodes); rowNodes.push(row); } const resolved = normalizeNewTableAttrs(editor); @@ -723,14 +719,13 @@ export const Table = Node.create({ ...(resolved.borders ? { borders: resolved.borders } : {}), ...(resolved.tableProperties ? { tableProperties: resolved.tableProperties } : {}), ...(sdBlockId ? { sdBlockId } : {}), - ...(paraId ? { paraId } : {}), }; const tableNode = tableType.createChecked(tableAttrs, rowNodes); if (dispatch) { const sep = tableSeparatorNeeds(state.doc, pos); const makeSep = () => { - const attrs = { sdBlockId: uuidv4(), paraId: genParaId() }; + const attrs = { sdBlockId: uuidv4(), paraId: generateDocxHexId() }; return state.schema.nodes.paragraph.createAndFill(attrs); }; if (sep.before || sep.after) { diff --git a/packages/super-editor/src/extensions/table/table.test.js b/packages/super-editor/src/extensions/table/table.test.js index 87cf18bb3d..e84f9be1d3 100644 --- a/packages/super-editor/src/extensions/table/table.test.js +++ b/packages/super-editor/src/extensions/table/table.test.js @@ -1150,6 +1150,27 @@ describe('Table commands', async () => { expect(editor.state.doc.toJSON()).toEqual(docBefore.toJSON()); }); + + it('creates row paraIds without assigning legacy table or cell paraIds', async () => { + const { docx, media, mediaFiles, fonts } = cachedBlankDoc; + ({ editor } = initTestEditor({ content: docx, media, mediaFiles, fonts })); + + const pos = editor.state.doc.content.size; + editor.commands.insertTableAt({ pos, rows: 2, columns: 2 }); + + const tablePos = findTablePos(editor.state.doc); + expect(tablePos).not.toBeNull(); + + const table = editor.state.doc.nodeAt(tablePos); + expect(table?.attrs.paraId).toBeNull(); + + table?.forEach((row) => { + expect(row.attrs.paraId).toMatch(/^[0-9A-F]{8}$/); + row.forEach((cell) => { + expect(cell.attrs.paraId).toBeNull(); + }); + }); + }); }); describe('normalizeNewTableAttrs tblLook (SD-2086)', async () => { diff --git a/packages/super-editor/src/extensions/table/tableHelpers/createTable.js b/packages/super-editor/src/extensions/table/tableHelpers/createTable.js index 2f86537489..082ec7a0eb 100644 --- a/packages/super-editor/src/extensions/table/tableHelpers/createTable.js +++ b/packages/super-editor/src/extensions/table/tableHelpers/createTable.js @@ -1,6 +1,7 @@ // @ts-check import { getNodeType } from '@core/helpers/getNodeType.js'; import { createCell } from './createCell.js'; +import { generateDocxHexId } from '../../../utils/generateDocxHexId.js'; /** * Create a new table with specified dimensions @@ -57,7 +58,10 @@ export const createTable = ( for (let index = 0; index < rowsCount; index++) { const isHeader = withHeaderRow && index === 0; const cellsToInsert = isHeader ? headerCells : cells; - const rowAttrs = isHeader ? { tableRowProperties: { repeatHeader: true } } : null; + const rowAttrs = { + ...(isHeader ? { tableRowProperties: { repeatHeader: true } } : {}), + paraId: generateDocxHexId(), + }; rows.push(types.tableRow.createChecked(rowAttrs, cellsToInsert)); } diff --git a/packages/super-editor/src/extensions/types/node-attributes.ts b/packages/super-editor/src/extensions/types/node-attributes.ts index fa14eb46c5..8a07536389 100644 --- a/packages/super-editor/src/extensions/types/node-attributes.ts +++ b/packages/super-editor/src/extensions/types/node-attributes.ts @@ -279,9 +279,9 @@ export interface TableAttrs extends TableNodeAttributes { tableGrid: TableGrid | null; /** Table properties */ tableProperties: TableProperties | null; - /** OOXML paragraph/element identifier (w14:paraId), preserved across DOCX roundtrips */ + /** Legacy imported identity preserved for backwards compatibility */ paraId?: string | null; - /** OOXML text identifier (w14:textId), preserved across DOCX roundtrips */ + /** Legacy imported text identifier preserved for backwards compatibility */ textId?: string | null; } @@ -367,9 +367,9 @@ export interface CellBackground { /** Table cell node attributes */ export interface TableCellAttrs extends TableNodeAttributes { - /** OOXML paragraph/element identifier (w14:paraId), preserved across DOCX roundtrips */ + /** Legacy imported identity preserved for backwards compatibility */ paraId?: string | null; - /** OOXML text identifier (w14:textId), preserved across DOCX roundtrips */ + /** Legacy imported text identifier preserved for backwards compatibility */ textId?: string | null; /** Number of columns this cell spans */ colspan: number; diff --git a/packages/super-editor/src/utils/generateDocxHexId.js b/packages/super-editor/src/utils/generateDocxHexId.js new file mode 100644 index 0000000000..4bbe144d2e --- /dev/null +++ b/packages/super-editor/src/utils/generateDocxHexId.js @@ -0,0 +1,15 @@ +const DOCX_HEX_ID_LENGTH = 8; + +/** + * Generates an uppercase 8-character hexadecimal identifier compatible with + * OOXML attributes such as `w14:paraId` and `w14:textId`. + * + * Callers are responsible for using the value only on schema-valid elements. + * + * @returns {string} + */ +export function generateDocxHexId() { + return Array.from({ length: DOCX_HEX_ID_LENGTH }, () => Math.floor(Math.random() * 16).toString(16)) + .join('') + .toUpperCase(); +} diff --git a/tests/doc-api-stories/tests/lists/missing-paraid-address-stability.ts b/tests/doc-api-stories/tests/lists/missing-paraid-address-stability.ts new file mode 100644 index 0000000000..d788ebea49 --- /dev/null +++ b/tests/doc-api-stories/tests/lists/missing-paraid-address-stability.ts @@ -0,0 +1,262 @@ +import { access, mkdir, readFile, rm, writeFile } from 'node:fs/promises'; +import { execFile } from 'node:child_process'; +import { createRequire } from 'node:module'; +import path from 'node:path'; +import { pathToFileURL } from 'node:url'; +import { promisify } from 'node:util'; +import { beforeEach, describe, expect, it } from 'vitest'; + +type ListItemAddress = { + kind: 'block'; + nodeType: 'listItem'; + nodeId: string; +}; + +type ListsListResult = { + total: number; + items: Array<{ + address: ListItemAddress; + listId: string; + marker?: string; + ordinal?: number; + level?: number; + kind?: 'ordered' | 'bullet'; + text?: string; + }>; +}; + +type ListsGetEnvelope = { + data?: { + item?: { + address: ListItemAddress; + listId: string; + marker?: string; + ordinal?: number; + level?: number; + kind?: 'ordered' | 'bullet'; + text?: string; + }; + }; +}; + +const REPO_ROOT = path.resolve(import.meta.dirname, '../../../..'); +const RESULTS_DIR = path.resolve(import.meta.dirname, '../../results/lists/missing-paraid-address-stability'); +const CLI_SRC_BIN = path.join(REPO_ROOT, 'apps/cli/src/index.ts'); +const LIST_FIXTURE_CANDIDATES = [ + path.join(REPO_ROOT, 'packages/super-editor/src/tests/data/basic-list.docx'), + path.join(REPO_ROOT, 'packages/super-editor/src/tests/data/list_with_indents.docx'), + path.join(REPO_ROOT, 'devtools/document-api-tests/fixtures/matrix-list.input.docx'), + path.join(REPO_ROOT, 'e2e-tests/test-data/basic-documents/lists-complex-items.docx'), +]; + +const execFileAsync = promisify(execFile); +const require = createRequire(import.meta.url); + +type JsZipConstructor = typeof import('jszip').default; + +let jsZipPromise: Promise | null = null; +let resolvedListFixture: string | null = null; +let stateDir = ''; + +function sid(label: string): string { + return `${label}-${Date.now()}-${Math.floor(Math.random() * 1_000_000)}`; +} + +function outPath(name: string): string { + return path.join(RESULTS_DIR, name); +} + +function parseJsonEnvelope(stdout: string, stderr: string): any { + const sources = [stdout.trim(), stderr.trim()].filter((source) => source.length > 0); + if (sources.length === 0) { + throw new Error('No CLI JSON envelope output found.'); + } + + for (const source of sources) { + try { + return JSON.parse(source); + } catch { + const lines = source.split(/\r?\n/); + for (let index = 0; index < lines.length; index += 1) { + const candidate = lines.slice(index).join('\n').trim(); + if (!candidate.startsWith('{')) continue; + try { + return JSON.parse(candidate); + } catch { + // Keep scanning for the envelope. + } + } + } + } + + throw new Error(`Failed to parse CLI JSON envelope:\n${sources.join('\n')}`); +} + +async function runCli(args: string[], options: { allowError?: boolean } = {}): Promise { + let stdout = ''; + let stderr = ''; + + try { + const executed = await execFileAsync('bun', [CLI_SRC_BIN, ...args, '--output', 'json'], { + cwd: REPO_ROOT, + env: { + ...process.env, + SUPERDOC_CLI_STATE_DIR: stateDir, + }, + }); + stdout = executed.stdout; + stderr = executed.stderr; + } catch (error) { + const failed = error as { stdout?: string; stderr?: string }; + stdout = failed.stdout ?? ''; + stderr = failed.stderr ?? ''; + } + + const envelope = parseJsonEnvelope(stdout, stderr); + if (envelope?.ok === false && options.allowError !== true) { + const code = envelope.error?.code ?? 'UNKNOWN'; + const message = envelope.error?.message ?? 'Unknown CLI error'; + throw new Error(`${code}: ${message}`); + } + return envelope; +} + +async function resolveFixture(candidates: string[], fixtureLabel: string): Promise { + for (const candidate of candidates) { + try { + await access(candidate); + return candidate; + } catch { + // Try the next candidate. + } + } + + throw new Error(`No ${fixtureLabel} fixture found. Tried: ${candidates.join(', ')}`); +} + +async function resolveListFixture(): Promise { + if (resolvedListFixture) return resolvedListFixture; + resolvedListFixture = await resolveFixture(LIST_FIXTURE_CANDIDATES, 'list'); + return resolvedListFixture; +} + +async function loadJsZip(): Promise { + if (jsZipPromise) return jsZipPromise; + + jsZipPromise = (async () => { + const entry = require.resolve('jszip', { + paths: [path.join(REPO_ROOT, 'packages/super-editor')], + }); + const mod = await import(pathToFileURL(entry).href); + return (mod.default ?? mod) as JsZipConstructor; + })(); + + return jsZipPromise; +} + +async function writeListFixtureWithoutParaIds(outputPath: string): Promise { + const sourcePath = await resolveListFixture(); + const JSZip = await loadJsZip(); + const sourceBytes = await readFile(sourcePath); + const zip = await JSZip.loadAsync(sourceBytes); + const documentXmlFile = zip.file('word/document.xml'); + + if (!documentXmlFile) { + throw new Error(`Fixture doc is missing word/document.xml: ${sourcePath}`); + } + + const documentXml = await documentXmlFile.async('string'); + const updatedXml = documentXml.replace(/\s+w14:paraId="[^"]*"/g, '').replace(/\s+w14:textId="[^"]*"/g, ''); + + if (updatedXml === documentXml) { + throw new Error(`Fixture doc did not contain paragraph ids to strip: ${sourcePath}`); + } + + zip.file('word/document.xml', updatedXml); + const outputBytes = await zip.generateAsync({ type: 'nodebuffer' }); + await writeFile(outputPath, outputBytes); + return outputPath; +} + +function expectFirstListItem(result: ListsListResult): ListItemAddress { + expect(result.total).toBeGreaterThan(0); + expect(result.items.length).toBeGreaterThan(0); + + const address = result.items[0]?.address; + expect(address?.nodeType).toBe('listItem'); + expect(typeof address?.nodeId).toBe('string'); + expect(address?.nodeId.length).toBeGreaterThan(0); + + return address as ListItemAddress; +} + +function extractListResult(envelope: any): ListsListResult { + return envelope?.data?.result as ListsListResult; +} + +function extractGetAddress(envelope: ListsGetEnvelope): ListItemAddress { + const address = envelope.data?.item?.address; + expect(address?.nodeType).toBe('listItem'); + expect(typeof address?.nodeId).toBe('string'); + expect(address?.nodeId.length).toBeGreaterThan(0); + return address as ListItemAddress; +} + +beforeEach(async () => { + await rm(RESULTS_DIR, { recursive: true, force: true }); + await mkdir(RESULTS_DIR, { recursive: true }); + stateDir = outPath('.superdoc-cli-state'); +}); + +describe('document-api story: lists missing paraId address stability', () => { + it('keeps list item addresses stable for docs without paraIds across repeated reads and reopen', async () => { + const sourceDoc = await writeListFixtureWithoutParaIds(outPath('lists-without-paraids.docx')); + const firstSessionId = sid('lists-no-paraid-first'); + const reopenedSessionId = sid('lists-no-paraid-reopened'); + + try { + const statelessFirstList = extractListResult(await runCli(['lists', 'list', sourceDoc, '--limit', '20'])); + const firstAddress = expectFirstListItem(statelessFirstList); + + const statelessGetAddress = extractGetAddress( + await runCli(['lists', 'get', sourceDoc, '--address-json', JSON.stringify(firstAddress)]), + ); + expect(statelessGetAddress).toEqual(firstAddress); + + const statelessSecondList = extractListResult(await runCli(['lists', 'list', sourceDoc, '--limit', '20'])); + const secondAddress = expectFirstListItem(statelessSecondList); + expect(secondAddress).toEqual(firstAddress); + + await runCli(['open', sourceDoc, '--session', firstSessionId]); + + const sessionFirstList = extractListResult( + await runCli(['lists', 'list', '--session', firstSessionId, '--limit', '20']), + ); + const sessionFirstAddress = expectFirstListItem(sessionFirstList); + expect(sessionFirstAddress).toEqual(firstAddress); + + const sessionGetAddress = extractGetAddress( + await runCli(['lists', 'get', '--session', firstSessionId, '--address-json', JSON.stringify(firstAddress)]), + ); + expect(sessionGetAddress).toEqual(firstAddress); + + await runCli(['close', '--session', firstSessionId, '--discard']); + + await runCli(['open', sourceDoc, '--session', reopenedSessionId]); + + const reopenedGetAddress = extractGetAddress( + await runCli(['lists', 'get', '--session', reopenedSessionId, '--address-json', JSON.stringify(firstAddress)]), + ); + expect(reopenedGetAddress).toEqual(firstAddress); + + const reopenedList = extractListResult( + await runCli(['lists', 'list', '--session', reopenedSessionId, '--limit', '20']), + ); + const reopenedAddress = expectFirstListItem(reopenedList); + expect(reopenedAddress).toEqual(firstAddress); + } finally { + await runCli(['close', '--session', firstSessionId, '--discard'], { allowError: true }); + await runCli(['close', '--session', reopenedSessionId, '--discard'], { allowError: true }); + } + }, 30_000); +});