Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions apps/cli/src/__tests__/cli.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { access, copyFile, mkdir, readFile, rm, writeFile } from 'node:fs/promis
import { join } from 'node:path';
import { run } from '../index';
import { resolveListDocFixture, resolveSourceDocFixture } from './fixtures';
import { writeListDocWithoutParaIds } from './unstable-list-fixture';

type RunResult = {
code: number;
Expand Down Expand Up @@ -1218,6 +1219,55 @@ describe('superdoc CLI', () => {
expect(getEnvelope.data.item.address.nodeId).toBe(address.nodeId);
});

test('lists list/get keep list item addresses stable for docs without paraIds in stateless mode', async () => {
const source = join(TEST_DIR, 'lists-no-paraids-stateless.docx');
await writeListDocWithoutParaIds(source);

const address = await firstListItemAddress(['lists', 'list', source, '--limit', '1']);

const getResult = await runCli(['lists', 'get', source, '--address-json', JSON.stringify(address)]);
expect(getResult.code).toBe(0);

const getEnvelope = parseJsonOutput<
SuccessEnvelope<{
address: ListItemAddress;
item: { address: ListItemAddress };
}>
>(getResult);
expect(getEnvelope.data.item.address.nodeId).toBe(address.nodeId);

const secondAddress = await firstListItemAddress(['lists', 'list', source, '--limit', '1']);
expect(secondAddress.nodeId).toBe(address.nodeId);
});

test('lists list/get keep list item addresses stable for docs without paraIds in stateful mode', async () => {
const source = join(TEST_DIR, 'lists-no-paraids-stateful.docx');
await writeListDocWithoutParaIds(source);

try {
const openResult = await runCli(['open', source]);
expect(openResult.code).toBe(0);

const address = await firstListItemAddress(['lists', 'list', '--limit', '1']);

const getResult = await runCli(['lists', 'get', '--address-json', JSON.stringify(address)]);
expect(getResult.code).toBe(0);

const getEnvelope = parseJsonOutput<
SuccessEnvelope<{
address: ListItemAddress;
item: { address: ListItemAddress };
}>
>(getResult);
expect(getEnvelope.data.item.address.nodeId).toBe(address.nodeId);

const secondAddress = await firstListItemAddress(['lists', 'list', '--limit', '1']);
expect(secondAddress.nodeId).toBe(address.nodeId);
} finally {
await runCli(['close', '--discard']);
}
});

test('lists list pretty prints list rows', async () => {
const result = await runCli(['lists', 'list', LIST_SAMPLE_DOC, '--limit', '2', '--output', 'pretty']);
expect(result.code).toBe(0);
Expand Down
58 changes: 58 additions & 0 deletions apps/cli/src/__tests__/unstable-list-fixture.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { createRequire } from 'node:module';
import { readFile, writeFile } from 'node:fs/promises';
import path from 'node:path';
import { pathToFileURL } from 'node:url';
import { resolveListDocFixture } from './fixtures';

/** Minimal type for the subset of the JSZip API used in this fixture. */
interface JsZipInstance {
file(path: string): { async(type: string): Promise<string> } | null;
file(path: string, data: string): void;
generateAsync(options: { type: string }): Promise<Buffer>;
}

interface JsZipConstructor {
loadAsync(data: Buffer | Uint8Array): Promise<JsZipInstance>;
}

const REPO_ROOT = path.resolve(import.meta.dir, '../../../..');
const require = createRequire(import.meta.url);

let jsZipPromise: Promise<JsZipConstructor> | null = null;

async function loadJsZip(): Promise<JsZipConstructor> {
if (jsZipPromise) return jsZipPromise;

jsZipPromise = (async () => {
const entry = require.resolve('jszip', {
paths: [path.join(REPO_ROOT, 'packages/super-editor')],
});
const mod = await import(pathToFileURL(entry).href);
return (mod.default ?? mod) as JsZipConstructor;
})();

return jsZipPromise;
}

export async function writeListDocWithoutParaIds(outputPath: string): Promise<string> {
const sourcePath = await resolveListDocFixture();
const JSZip = await loadJsZip();
const sourceBytes = await readFile(sourcePath);
const zip = await JSZip.loadAsync(sourceBytes);
const documentXmlFile = zip.file('word/document.xml');
if (!documentXmlFile) {
throw new Error(`Fixture doc is missing word/document.xml: ${sourcePath}`);
}

const documentXml = await documentXmlFile.async('string');
const updatedXml = documentXml.replace(/\s+w14:paraId="[^"]*"/g, '').replace(/\s+w14:textId="[^"]*"/g, '');

if (updatedXml === documentXml) {
throw new Error(`Fixture doc did not contain paragraph ids to strip: ${sourcePath}`);
}

zip.file('word/document.xml', updatedXml);
const outputBytes = await zip.generateAsync({ type: 'nodebuffer' });
await writeFile(outputPath, outputBytes);
return outputPath;
}
2 changes: 1 addition & 1 deletion apps/docs/document-api/overview.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ For nodes created at runtime (not imported from DOCX), `nodeId` falls back to `s

| ID source | Stable across loads? | When used |
|-----------|---------------------|-----------|
| `paraId` (from DOCX) | Best effort (usually stable for unchanged DOCX blocks) | Paragraphs, tables, rows, cells imported from DOCX |
| `paraId` (from DOCX) | Best effort (usually stable for unchanged DOCX blocks) | Paragraphs and table rows imported from DOCX |
| `sdBlockId` (runtime) | No (session-scoped) | Nodes created programmatically before first export |

<Tip>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -962,5 +962,5 @@
}
],
"marker": "{/* GENERATED FILE: DO NOT EDIT. Regenerate via `pnpm run docapi:sync`. */}",
"sourceHash": "9197780d09944c67a656339ee8adc0e2c4473d1dffb09288c9c0b85f68fd34f3"
"sourceHash": "278f3d13c4cb49be6d084639559a5de63cd623606bc046c4abea69815fcbbe1c"
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Enable or disable odd/even header-footer mode in document settings.

## Expected result

Returns a DocumentMutationResult receipt; reports NO_OP if the odd/even setting already matches.
Returns a DocumentMutationResult (not SectionMutationResult) because odd/even headers-footers is a document-level setting, not per-section. Reports NO_OP if the setting already matches.

## Input fields

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { generateDocxRandomId } from '@core/helpers/generateDocxRandomId.js';

const PARAGRAPH_IDENTITY_ATTRS = ['sdBlockId', 'paraId'];
const TABLE_IDENTITY_ATTRS = ['sdBlockId', 'paraId', 'blockId'];
const DEFAULT_BLOCK_IDENTITY_ATTRS = ['sdBlockId', 'blockId', 'paraId'];
const SYNTHETIC_PARA_ID_TYPES = new Set(['paragraph', 'tableRow']);
const DOCX_ID_LENGTH = 8;
const MAX_DOCX_ID = 0xffffffff;

/** Maps block node types to safe block-identity attribute lookup order. */
const BLOCK_IDENTITY_ATTRS = {
Expand All @@ -23,64 +24,147 @@ function toIdentityValue(value) {
return undefined;
}

function resolvePrimaryBlockIdentity(node) {
if (!node || typeof node !== 'object') return undefined;
function getBlockIdentityAttrs(node) {
if (!node || typeof node !== 'object') return [];
return BLOCK_IDENTITY_ATTRS[node.type] ?? [];
}

const attrPriority = BLOCK_IDENTITY_ATTRS[node.type];
if (!attrPriority) return undefined;
function getExplicitIdentityEntries(node) {
const attrPriority = getBlockIdentityAttrs(node);
if (attrPriority.length === 0) return [];

const attrs = typeof node.attrs === 'object' && node.attrs ? node.attrs : {};
const identityEntries = [];

for (const attr of attrPriority) {
const value = toIdentityValue(attrs[attr]);
if (value) return { id: value, source: attr };
if (value) {
identityEntries.push({ attr, value });
}
}
return undefined;

return identityEntries;
}

function groupIdentityEntriesByValue(identityEntries) {
const groupsByValue = new Map();

for (const entry of identityEntries) {
const existingGroup = groupsByValue.get(entry.value);
if (existingGroup) {
existingGroup.attrs.push(entry.attr);
continue;
}

groupsByValue.set(entry.value, {
value: entry.value,
attrs: [entry.attr],
});
}

return [...groupsByValue.values()];
}

function shouldSynthesizeParaId(node) {
return Boolean(node && typeof node === 'object' && SYNTHETIC_PARA_ID_TYPES.has(node.type));
}

function nextUniqueDocxId(usedIds) {
let id = generateDocxRandomId();
while (usedIds.has(id)) {
id = generateDocxRandomId();
function collectExplicitBlockIdentities(node, reservedIds) {
if (!node || typeof node !== 'object') return;

const identityEntries = getExplicitIdentityEntries(node);
for (const { value } of groupIdentityEntriesByValue(identityEntries)) {
reservedIds.add(value);
}

if (Array.isArray(node.content)) {
node.content.forEach((child) => collectExplicitBlockIdentities(child, reservedIds));
}
return id;
}

function dedupeBlockIdentitiesInNode(node, usedIds) {
function createDeterministicDocxIdAllocator(reservedIds) {
let nextValue = 1;

return () => {
while (nextValue <= MAX_DOCX_ID) {
const id = nextValue.toString(16).toUpperCase().padStart(DOCX_ID_LENGTH, '0');
nextValue += 1;

if (reservedIds.has(id)) continue;

reservedIds.add(id);
return id;
}

throw new Error('Unable to allocate a unique synthetic DOCX block id.');
};
}

function setBlockIdentity(node, attrName, value) {
node.attrs = { ...(node.attrs ?? {}), [attrName]: value };
}

function normalizeBlockIdentitiesInNode(node, seenIds, allocateDocxId) {
if (!node || typeof node !== 'object') return;

const identity = resolvePrimaryBlockIdentity(node);
if (identity) {
if (usedIds.has(identity.id)) {
const replacementId = nextUniqueDocxId(usedIds);
node.attrs = { ...node.attrs, [identity.source]: replacementId };
usedIds.add(replacementId);
} else {
usedIds.add(identity.id);
const identityEntries = getExplicitIdentityEntries(node);
const groupedIdentities = groupIdentityEntriesByValue(identityEntries);

if (groupedIdentities.length > 0) {
for (const identityGroup of groupedIdentities) {
if (seenIds.has(identityGroup.value)) {
const replacementId = allocateDocxId();
for (const attr of identityGroup.attrs) {
setBlockIdentity(node, attr, replacementId);
}
seenIds.add(replacementId);
} else {
seenIds.add(identityGroup.value);
}
}
} else if (shouldSynthesizeParaId(node)) {
const syntheticParaId = allocateDocxId();
setBlockIdentity(node, 'paraId', syntheticParaId);
seenIds.add(syntheticParaId);
}

if (Array.isArray(node.content)) {
node.content.forEach((child) => dedupeBlockIdentitiesInNode(child, usedIds));
node.content.forEach((child) => normalizeBlockIdentitiesInNode(child, seenIds, allocateDocxId));
}
}

/**
* Deduplicate block identities during import so document-api targeting remains stable.
* Normalize imported block identities so document-api targeting remains stable.
*
* Word files can occasionally contain duplicate stable block IDs across blocks.
* Since stable IDs are used for deterministic targeting in the adapters,
* duplicates break deterministic targeting and mutations.
* Some exporters also omit `w14:paraId` entirely, leaving imported blocks with
* no stable public identity and forcing the adapter layer to fall back to the
* volatile `sdBlockId` assigned at editor startup.
*
* Only safe block identity attributes are rewritten: sdBlockId, paraId, and blockId.
* This pass fixes both cases:
* - rewrites duplicate explicit identity values while preserving the first
* explicit occurrence of each value
* - reserves every explicit identity value up front so synthesized IDs never
* collide with a non-primary but still-public identifier such as paragraph
* `paraId`
* - synthesizes deterministic `paraId` values for schema-valid block types
* that arrive with no stable identity at all
*
* Only block identity attributes are rewritten or synthesized: sdBlockId,
* paraId, and blockId.
*
* @param {Array<{type?: string, attrs?: Record<string, unknown>, content?: unknown[]}>} content
* @returns {Array<{type?: string, attrs?: Record<string, unknown>, content?: unknown[]}>}
*/
export function normalizeDuplicateBlockIdentitiesInContent(content = []) {
if (!Array.isArray(content) || content.length === 0) return content;

const usedIds = new Set();
content.forEach((node) => dedupeBlockIdentitiesInNode(node, usedIds));
const reservedIds = new Set();
content.forEach((node) => collectExplicitBlockIdentities(node, reservedIds));

const allocateDocxId = createDeterministicDocxIdAllocator(reservedIds);
const seenIds = new Set();
content.forEach((node) => normalizeBlockIdentitiesInNode(node, seenIds, allocateDocxId));

return content;
}
Loading
Loading