diff --git a/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.test.js b/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.test.js
index 91edee6a18..0e73018ea2 100644
--- a/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.test.js
+++ b/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.test.js
@@ -64,6 +64,29 @@ const buildInlineNodeToken = (attrs = {}, type = { name: 'link' }, pos = 0) => {
   };
 };
 
+/**
+ * Builds a mock image inline-node token for diff tests.
+ *
+ * @param {Record<string, unknown>} attrs Image node attributes.
+ * @param {number} pos Position offset for the image node.
+ * @returns {import('./inline-diffing.ts').InlineNodeToken}
+ */
+const buildImageNodeToken = (attrs = {}, pos = 0) => {
+  const nodeAttrs = { ...attrs };
+  const type = { name: 'image' };
+  return {
+    kind: 'inlineNode',
+    nodeType: 'image',
+    node: {
+      type,
+      attrs: nodeAttrs,
+      toJSON: () => ({ type: 'image', attrs: nodeAttrs }),
+    },
+    nodeJSON: { type: 'image', attrs: nodeAttrs },
+    pos,
+  };
+};
+
 /**
  * Builds text tokens without offsets for tokenizer assertions.
  *
@@ -426,3 +449,131 @@ describe('tokenizeInlineContent', () => {
     expect(tokens[5]?.offset).toBe(16);
   });
 });
+
+describe('image semantic normalization in inline diff', () => {
+  it('produces no diff when images differ only in volatile originalAttributes', () => {
+    const baseAttrs = {
+      src: 'image1.png',
+      size: { width: 100, height: 50 },
+      originalAttributes: {
+        'wp14:anchorId': 'AAAA1111',
+        'wp14:editId': 'BBBB2222',
+        cx: '914400',
+      },
+    };
+    const changedAttrs = {
+      src: 'image1.png',
+      size: { width: 100, height: 50 },
+      originalAttributes: {
+        'wp14:anchorId': 'CCCC3333',
+        'wp14:editId': 'DDDD4444',
+        cx: '914400',
+      },
+    };
+
+    const oldToken = buildImageNodeToken(baseAttrs, 5);
+    const newToken = buildImageNodeToken(changedAttrs, 5);
+
+    const diffs = getInlineDiff([oldToken], [newToken], 6);
+    expect(diffs).toEqual([]);
+  });
+
+  it('detects a real image change even when volatile attrs also differ', () => {
+    const oldAttrs = {
+      src: 'old-image.png',
+      originalAttributes: { 'wp14:anchorId': 'A1', cx: '100' },
+    };
+    const newAttrs = {
+      src: 'new-image.png',
+      originalAttributes: { 'wp14:anchorId': 'A2', cx: '100' },
+    };
+
+    const oldToken = buildImageNodeToken(oldAttrs, 3);
+    const newToken = buildImageNodeToken(newAttrs, 3);
+
+    const diffs = getInlineDiff([oldToken], [newToken], 4);
+
+    expect(diffs).toHaveLength(1);
+    expect(diffs[0].action).toBe('modified');
+    expect(diffs[0].kind).toBe('inlineNode');
+    expect(diffs[0].attrsDiff?.modified).toHaveProperty('src');
+  });
+
+  it('handles multiple images in one paragraph using type-based pairing', () => {
+    const mkImage = (src, anchorId, pos) =>
+      buildImageNodeToken({ src, originalAttributes: { 'wp14:anchorId': anchorId, cx: '100' } }, pos);
+
+    const oldTokens = [mkImage('a.png', 'ID1', 1), mkImage('b.png', 'ID2', 3)];
+    const newTokens = [mkImage('a.png', 'ID3', 1), mkImage('b.png', 'ID4', 3)];
+
+    const diffs = getInlineDiff(oldTokens, newTokens, 5);
+    expect(diffs).toEqual([]);
+  });
+
+  it('emits a diff when one of multiple images genuinely changes', () => {
+    const mkImage = (src, anchorId, pos) =>
+      buildImageNodeToken({ src, originalAttributes: { 'wp14:anchorId': anchorId } }, pos);
+
+    const oldTokens = [mkImage('a.png', 'ID1', 1), mkImage('b.png', 'ID2', 3)];
+    const newTokens = [mkImage('a.png', 'ID3', 1), mkImage('c.png', 'ID4', 3)];
+
+    const diffs = getInlineDiff(oldTokens, newTokens, 5);
+
+    expect(diffs).toHaveLength(1);
+    expect(diffs[0].action).toBe('modified');
+    expect(diffs[0].attrsDiff?.modified).toHaveProperty('src');
+  });
+
+  it('correctly detects an image insertion when a new image is prepended', () => {
+    const mkImage = (src, pos) => buildImageNodeToken({ src }, pos);
+
+    const oldTokens = [mkImage('a.png', 1), mkImage('b.png', 3)];
+    const newTokens = [mkImage('x.png', 1), mkImage('a.png', 3), mkImage('b.png', 5)];
+
+    const diffs = getInlineDiff(oldTokens, newTokens, 5);
+
+    // Should be a single insertion of x.png, not two modifications + addition
+    expect(diffs).toHaveLength(1);
+    expect(diffs[0].action).toBe('added');
+    expect(diffs[0].kind).toBe('inlineNode');
+    expect(diffs[0].nodeJSON.attrs.src).toBe('x.png');
+  });
+
+  it('correctly detects image reordering as delete + add', () => {
+    const mkImage = (src, pos) => buildImageNodeToken({ src }, pos);
+
+    const oldTokens = [mkImage('a.png', 1), mkImage('b.png', 3)];
+    const newTokens = [mkImage('b.png', 1), mkImage('a.png', 3)];
+
+    const diffs = getInlineDiff(oldTokens, newTokens, 5);
+
+    // Reorder produces diffs — at minimum some combination of added/deleted
+    expect(diffs.length).toBeGreaterThan(0);
+  });
+
+  it('excludes volatile attrs from attrsDiff when a real image change occurs', () => {
+    const oldAttrs = {
+      src: 'v1.png',
+      size: { width: 100 },
+      originalAttributes: { 'wp14:anchorId': 'OLD', 'wp14:editId': 'OLD', cx: '100' },
+    };
+    const newAttrs = {
+      src: 'v2.png',
+      size: { width: 200 },
+      originalAttributes: { 'wp14:anchorId': 'NEW', 'wp14:editId': 'NEW', cx: '100' },
+    };
+
+    const diffs = getInlineDiff([buildImageNodeToken(oldAttrs, 1)], [buildImageNodeToken(newAttrs, 1)], 2);
+
+    expect(diffs).toHaveLength(1);
+    const attrsDiff = diffs[0].attrsDiff;
+
+    // Semantic changes are reported
+    expect(attrsDiff?.modified).toHaveProperty('src');
+    expect(attrsDiff?.modified).toHaveProperty('size.width');
+
+    // Volatile changes are NOT reported
+    expect(attrsDiff?.modified).not.toHaveProperty('originalAttributes.wp14:anchorId');
+    expect(attrsDiff?.modified).not.toHaveProperty('originalAttributes.wp14:editId');
+  });
+});
diff --git a/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.ts b/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.ts
index 53adf8ba04..41d57ae175 100644
--- a/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.ts
+++ b/packages/super-editor/src/extensions/diffing/algorithm/inline-diffing.ts
@@ -1,5 +1,6 @@
 import type { Node as PMNode } from 'prosemirror-model';
 import { getAttributesDiff, getMarksDiff, type AttributesDiff, type MarksDiff } from './attributes-diffing';
+import { normalizeInlineNodeJSON, normalizeInlineNodeAttrs, semanticInlineNodeKey } from './semantic-normalization';
 import { diffSequences } from './sequence-diffing';
 
 type NodeJSON = ReturnType<PMNode['toJSON']>;
@@ -237,7 +238,9 @@ export function getInlineDiff(
     buildDeleted: (token, oldIdx) => buildInlineDiff('deleted', token, oldIdx),
     buildModified: (oldToken, newToken, oldIdx) => {
       if (oldToken.kind !== 'text' && newToken.kind !== 'text') {
-        const attrsDiff = getAttributesDiff(oldToken.node.attrs, newToken.node.attrs);
+        const oldNormalized = normalizeInlineNodeAttrs(oldToken.node.type.name, oldToken.node.attrs);
+        const newNormalized = normalizeInlineNodeAttrs(newToken.node.type.name, newToken.node.attrs);
+        const attrsDiff = getAttributesDiff(oldNormalized, newNormalized);
         return {
           action: 'modified',
           idx: oldIdx,
@@ -270,7 +273,8 @@ export function getInlineDiff(
 
 /**
  * Compares two inline tokens to decide if they can be considered equal for the Myers diff.
- * Text tokens compare character equality while inline nodes compare their type.
+ * Text tokens compare character equality. Inline nodes compare by semantic identity
+ * (normalized JSON), not just type name, so that distinct images are not falsely paired.
  */
 function inlineComparator(a: InlineDiffToken, b: InlineDiffToken): boolean {
   if (a.kind !== b.kind) {
@@ -281,7 +285,7 @@ function inlineComparator(a: InlineDiffToken, b: InlineDiffToken): boolean {
     return a.char === b.char;
   }
   if (a.kind === 'inlineNode' && b.kind === 'inlineNode') {
-    return a.node.type.name === b.node.type.name;
+    return semanticInlineNodeKey(a.node) === semanticInlineNodeKey(b.node);
   }
   return false;
 }
@@ -299,8 +303,8 @@ function shouldProcessEqualAsModification(oldToken: InlineDiffToken, newToken: I
   }
 
   if (oldToken.kind === 'inlineNode' && newToken.kind === 'inlineNode') {
-    const oldJSON = oldToken.node.toJSON();
-    const newJSON = newToken.node.toJSON();
+    const oldJSON = normalizeInlineNodeJSON(oldToken.node.toJSON());
+    const newJSON = normalizeInlineNodeJSON(newToken.node.toJSON());
     return JSON.stringify(oldJSON) !== JSON.stringify(newJSON);
   }
 
diff --git a/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.test.js b/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.test.js
index b6f5749fe3..38dd460976 100644
--- a/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.test.js
+++ b/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.test.js
@@ -7,6 +7,7 @@ import {
   buildModifiedParagraphDiff,
   canTreatAsModification,
 } from './paragraph-diffing.ts';
+import { semanticInlineNodeKey } from './semantic-normalization.ts';
 
 /**
  * Builds text tokens without offsets for paragraph diff tests.
@@ -35,6 +36,28 @@ const buildMarkedRuns = (text, marks, attrs = {}, offsetStart = 0) =>
     offset: offsetStart + index,
   }));
 
+/**
+ * Builds a mock inline image node token for paragraph diff tests.
+ *
+ * @param {Record<string, unknown>} attrs Image node attributes.
+ * @param {number} pos Position offset for the image node.
+ * @returns {Record<string, unknown>}
+ */
+const buildImageToken = (attrs = {}, pos = 0) => {
+  const nodeAttrs = { ...attrs };
+  return {
+    kind: 'inlineNode',
+    nodeType: 'image',
+    node: {
+      type: { name: 'image' },
+      attrs: nodeAttrs,
+      toJSON: () => ({ type: 'image', attrs: nodeAttrs }),
+    },
+    nodeJSON: { type: 'image', attrs: nodeAttrs },
+    pos,
+  };
+};
+
 /**
  * Creates a mock paragraph node with default attributes.
  *
@@ -60,6 +83,21 @@ const createParagraphNode = (overrides = {}) => {
  * @param {Record<string, unknown>} overrides Overrides for the snapshot.
  * @returns {Record<string, unknown>}
  */
+/**
+ * Derives a content signature from tokens, matching the real buildContentSignature logic.
+ * Text tokens contribute their char; inline node tokens contribute a normalized JSON key.
+ */
+const deriveContentSignature = (tokens) =>
+  tokens
+    .map((token) => {
+      if (token.kind === 'text') return token.char;
+      if (token.kind === 'inlineNode' && token.node) {
+        return `\0${semanticInlineNodeKey(token.node)}\0`;
+      }
+      return '';
+    })
+    .join('');
+
 const createParagraphInfo = (overrides = {}) => {
   const fullText = overrides.fullText ?? 'text';
   const paragraphPos = overrides.pos ?? 0;
@@ -79,6 +117,8 @@ const createParagraphInfo = (overrides = {}) => {
     return token;
   });
 
+  const contentSignature = overrides.contentSignature ?? deriveContentSignature(textTokens);
+
   return {
     node: createParagraphNode(overrides.node),
     pos: paragraphPos,
@@ -86,6 +126,7 @@ const createParagraphInfo = (overrides = {}) => {
     fullText,
     text: textTokens,
     endPos: overrides.endPos ?? paragraphPos + 1 + fullText.length,
+    contentSignature,
     ...overrides,
   };
 };
@@ -244,6 +285,183 @@ describe('paragraph diff builders', () => {
   });
 });
 
+describe('image paragraph semantic normalization', () => {
+  it('does not emit a modification when only volatile attrs differ on an image paragraph', () => {
+    const makeImageParagraphNode = (paraId, rsidR, anchorId, editId) =>
+      createParagraphNode({
+        attrs: { paraId, rsidR, align: 'left' },
+        toJSON: () => ({
+          type: 'paragraph',
+          attrs: { paraId, rsidR, align: 'left' },
+          content: [
+            {
+              type: 'run',
+              attrs: {},
+              content: [
+                {
+                  type: 'image',
+                  attrs: {
+                    src: 'photo.png',
+                    originalAttributes: {
+                      'wp14:anchorId': anchorId,
+                      'wp14:editId': editId,
+                      cx: '914400',
+                    },
+                  },
+                },
+              ],
+            },
+          ],
+        }),
+      });
+
+    const oldInfo = createParagraphInfo({
+      node: makeImageParagraphNode('P1', 'R1', 'ANC1', 'EDT1'),
+      fullText: '',
+      text: [],
+    });
+    const newInfo = createParagraphInfo({
+      node: makeImageParagraphNode('P1', 'R2', 'ANC2', 'EDT2'),
+      fullText: '',
+      text: [],
+    });
+
+    expect(shouldProcessEqualAsModification(oldInfo, newInfo)).toBe(false);
+  });
+
+  it('emits a modification when semantic image attrs change alongside volatile attrs', () => {
+    const makeNode = (paraId, rsidR, src, anchorId) =>
+      createParagraphNode({
+        attrs: { paraId, rsidR },
+        toJSON: () => ({
+          type: 'paragraph',
+          attrs: { paraId, rsidR },
+          content: [
+            {
+              type: 'run',
+              attrs: {},
+              content: [
+                {
+                  type: 'image',
+                  attrs: {
+                    src,
+                    originalAttributes: { 'wp14:anchorId': anchorId },
+                  },
+                },
+              ],
+            },
+          ],
+        }),
+      });
+
+    const oldInfo = createParagraphInfo({
+      node: makeNode('P1', 'R1', 'old.png', 'ANC1'),
+      fullText: '',
+      text: [],
+    });
+    const newInfo = createParagraphInfo({
+      node: makeNode('P1', 'R2', 'new.png', 'ANC2'),
+      fullText: '',
+      text: [],
+    });
+
+    expect(shouldProcessEqualAsModification(oldInfo, newInfo)).toBe(true);
+  });
+
+  it('does not report volatile paragraph attrs as a modification diff', () => {
+    const oldParagraph = createParagraphInfo({
+      node: createParagraphNode({ attrs: { paraId: 'A', rsidR: '001', align: 'left' } }),
+      fullText: 'same',
+    });
+    const newParagraph = createParagraphInfo({
+      node: createParagraphNode({ attrs: { paraId: 'B', rsidR: '002', align: 'left' } }),
+      fullText: 'same',
+    });
+
+    const diff = buildModifiedParagraphDiff(oldParagraph, newParagraph);
+    expect(diff).toBeNull();
+  });
+
+  it('still detects semantic paragraph attr changes', () => {
+    const oldParagraph = createParagraphInfo({
+      node: createParagraphNode({ attrs: { paraId: 'A', rsidR: '001', align: 'left' } }),
+    });
+    const newParagraph = createParagraphInfo({
+      node: createParagraphNode({ attrs: { paraId: 'B', rsidR: '002', align: 'center' } }),
+    });
+
+    const diff = buildModifiedParagraphDiff(oldParagraph, newParagraph);
+    expect(diff).not.toBeNull();
+    expect(diff.attrsDiff?.modified).toHaveProperty('align');
+    expect(diff.attrsDiff?.modified).not.toHaveProperty('paraId');
+    expect(diff.attrsDiff?.modified).not.toHaveProperty('rsidR');
+  });
+
+  it('text paragraphs are unaffected by normalization', () => {
+    const oldParagraph = createParagraphInfo({
+      fullText: 'hello',
+      text: buildRuns('hello'),
+      node: createParagraphNode({ attrs: { align: 'left' } }),
+    });
+    const newParagraph = createParagraphInfo({
+      fullText: 'world',
+      text: buildRuns('world'),
+      node: createParagraphNode({ attrs: { align: 'left' } }),
+    });
+
+    const diff = buildModifiedParagraphDiff(oldParagraph, newParagraph);
+    expect(diff).not.toBeNull();
+    expect(diff.contentDiff.length).toBeGreaterThan(0);
+    expect(diff.attrsDiff).toBeNull();
+  });
+});
+
+describe('paragraphComparator with image-only paragraphs', () => {
+  it('distinguishes image-only paragraphs with different images', () => {
+    const imgA = createParagraphInfo({
+      fullText: '',
+      text: [buildImageToken({ src: 'a.png' })],
+      node: createParagraphNode({ attrs: {} }),
+    });
+    const imgB = createParagraphInfo({
+      fullText: '',
+      text: [buildImageToken({ src: 'b.png' })],
+      node: createParagraphNode({ attrs: {} }),
+    });
+
+    expect(paragraphComparator(imgA, imgB)).toBe(false);
+  });
+
+  it('matches image-only paragraphs with the same semantic content', () => {
+    const makeInfo = (anchorId) =>
+      createParagraphInfo({
+        fullText: '',
+        text: [
+          buildImageToken({
+            src: 'same.png',
+            originalAttributes: { 'wp14:anchorId': anchorId, cx: '100' },
+          }),
+        ],
+        node: createParagraphNode({ attrs: {} }),
+      });
+
+    // volatile anchorId differs, but semantic content is the same
+    expect(paragraphComparator(makeInfo('ID1'), makeInfo('ID2'))).toBe(true);
+  });
+
+  it('still matches text paragraphs by fullText', () => {
+    const a = createParagraphInfo({ fullText: 'same text' });
+    const b = createParagraphInfo({ fullText: 'same text' });
+    expect(paragraphComparator(a, b)).toBe(true);
+  });
+
+  it('falls back to fullText when contentSignature is missing', () => {
+    const a = { node: { attrs: {} }, fullText: 'fallback' };
+    const b = { node: { attrs: {} }, fullText: 'fallback' };
+    expect(paragraphComparator(a, b)).toBe(true);
+  });
+});
+
 describe('canTreatAsModification', () => {
   it('returns true when paragraph comparator matches by paraId', () => {
     const buildInfo = (paraId) => ({
diff --git a/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.ts b/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.ts
index bf785ab718..d24d3db2e3 100644
--- a/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.ts
+++ b/packages/super-editor/src/extensions/diffing/algorithm/paragraph-diffing.ts
@@ -2,6 +2,7 @@ import type { Node as PMNode } from 'prosemirror-model';
 import { getInlineDiff, tokenizeInlineContent, type InlineDiffToken, type InlineDiffResult } from './inline-diffing';
 import { getAttributesDiff, type AttributesDiff } from './attributes-diffing';
 import { getInsertionPos, type NodePositionInfo } from './diff-utils';
+import { normalizeParagraphAttrs, normalizeParagraphNodeJSON, semanticInlineNodeKey } from './semantic-normalization';
 import { levenshteinDistance } from './similarity';
 
 // Heuristics that prevent unrelated paragraphs from being paired as modifications.
@@ -23,6 +24,8 @@ export interface ParagraphNodeInfo {
   endPos: number;
   /** Plain-text representation of the paragraph content. */
   fullText: string;
+  /** Semantic fingerprint of all inline content (text + nodes), used for identity matching. */
+  contentSignature: string;
 }
 
 /**
@@ -97,9 +100,30 @@ export function createParagraphSnapshot(paragraph: PMNode, paragraphPos: number,
     text,
     endPos: paragraphPos + 1 + paragraph.content.size,
     fullText: text.map((token) => (token.kind === 'text' ? token.char : '')).join(''),
+    contentSignature: buildContentSignature(text),
   };
 }
 
+/**
+ * Builds a semantic fingerprint from inline tokens that covers both
+ * text characters and inline nodes (images, etc.).
+ *
+ * Text-only paragraphs produce the same result as `fullText`.
+ * Image-only paragraphs produce a unique key per distinct image,
+ * so that the paragraph comparator can tell them apart.
+ */
+function buildContentSignature(tokens: InlineDiffToken[]): string {
+  return tokens
+    .map((token) => {
+      if (token.kind === 'text') {
+        return token.char;
+      }
+      // Null bytes delimit inline node keys so they can't collide with text
+      return `\0${semanticInlineNodeKey(token.node)}\0`;
+    })
+    .join('');
+}
+
 /**
  * Determines whether equal paragraph nodes should still be marked as modified because their serialized structure differs.
  *
@@ -111,11 +135,16 @@ export function shouldProcessEqualAsModification(
   oldParagraph: ParagraphNodeInfo,
   newParagraph: ParagraphNodeInfo,
 ): boolean {
-  return JSON.stringify(oldParagraph.node.toJSON()) !== JSON.stringify(newParagraph.node.toJSON());
+  const oldNormalized = normalizeParagraphNodeJSON(oldParagraph.node.toJSON());
+  const newNormalized = normalizeParagraphNodeJSON(newParagraph.node.toJSON());
+  return JSON.stringify(oldNormalized) !== JSON.stringify(newNormalized);
 }
 
 /**
- * Compares two paragraphs for identity based on paraId or text content.
+ * Compares two paragraphs for identity based on paraId, then content signature.
+ *
+ * The content signature covers both text and inline nodes (images, etc.),
+ * so image-only paragraphs with different images are not falsely paired.
  */
 export function paragraphComparator(oldParagraph: ParagraphNodeInfo, newParagraph: ParagraphNodeInfo): boolean {
   const oldId = oldParagraph?.node?.attrs?.paraId;
@@ -123,7 +152,11 @@ export function paragraphComparator(oldParagraph: ParagraphNodeInfo, newParagrap
   if (oldId && newId && oldId === newId) {
     return true;
   }
-  return oldParagraph?.fullText === newParagraph?.fullText;
+  // Content signature includes inline node fingerprints, so it distinguishes
+  // image-only paragraphs that would otherwise all have empty fullText.
+  const oldSig = oldParagraph?.contentSignature ?? oldParagraph?.fullText;
+  const newSig = newParagraph?.contentSignature ?? newParagraph?.fullText;
+  return oldSig === newSig;
 }
 
 /**
@@ -165,7 +198,10 @@ export function buildModifiedParagraphDiff(
 ): ModifiedParagraphDiff | null {
   const contentDiff = getInlineDiff(oldParagraph.text, newParagraph.text, oldParagraph.endPos);
 
-  const attrsDiff = getAttributesDiff(oldParagraph.node.attrs, newParagraph.node.attrs);
+  const attrsDiff = getAttributesDiff(
+    normalizeParagraphAttrs(oldParagraph.node.attrs),
+    normalizeParagraphAttrs(newParagraph.node.attrs),
+  );
   if (contentDiff.length === 0 && !attrsDiff) {
     return null;
   }
diff --git a/packages/super-editor/src/extensions/diffing/algorithm/semantic-normalization.test.ts b/packages/super-editor/src/extensions/diffing/algorithm/semantic-normalization.test.ts
new file mode 100644
index 0000000000..f76ae06b90
--- /dev/null
+++ b/packages/super-editor/src/extensions/diffing/algorithm/semantic-normalization.test.ts
@@ -0,0 +1,338 @@
+import { describe, it, expect } from 'vitest';
+import {
+  normalizeParagraphAttrs,
+  normalizeImageNodeJSON,
+  normalizeInlineNodeJSON,
+  normalizeInlineNodeAttrs,
+  normalizeParagraphNodeJSON,
+  normalizeDocJSON,
+  semanticInlineNodeKey,
+} from './semantic-normalization';
+
+describe('normalizeParagraphAttrs', () => {
+  it('strips all volatile paragraph attributes', () => {
+    const attrs = {
+      paraId: '1A2B3C4D',
+      textId: '77777777',
+      rsidR: '00A1B2C3',
+      rsidRDefault: '00D4E5F6',
+      rsidP: '00112233',
+      rsidRPr: '00445566',
+      rsidDel: '00778899',
+      align: 'center',
+      indent: { left: 720 },
+    };
+
+    const result = normalizeParagraphAttrs(attrs);
+
+    expect(result).toEqual({
+      align: 'center',
+      indent: { left: 720 },
+    });
+  });
+
+  it('returns all attributes when none are volatile', () => {
+    const attrs = { align: 'left', spacing: { before: 100 } };
+    const result = normalizeParagraphAttrs(attrs);
+    expect(result).toEqual(attrs);
+  });
+
+  it('returns an empty object for empty input', () => {
+    expect(normalizeParagraphAttrs({})).toEqual({});
+  });
+});
+
+describe('normalizeImageNodeJSON', () => {
+  it('strips volatile keys from originalAttributes', () => {
+    const nodeJSON = {
+      type: 'image',
+      attrs: {
+        src: 'image1.png',
+        size: { width: 100, height: 100 },
+        originalAttributes: {
+          'wp14:anchorId': '4A5B6C7D',
+          'wp14:editId': '8E9F0A1B',
+          cx: '914400',
+          cy: '914400',
+        },
+      },
+    };
+
+    const result = normalizeImageNodeJSON(nodeJSON);
+
+    expect(result.attrs.originalAttributes).toEqual({
+      cx: '914400',
+      cy: '914400',
+    });
+    expect(result.attrs.src).toBe('image1.png');
+    expect(result.attrs.size).toEqual({ width: 100, height: 100 });
+  });
+
+  it('returns the node unchanged when originalAttributes is absent', () => {
+    const nodeJSON = { type: 'image', attrs: { src: 'img.png' } };
+    const result = normalizeImageNodeJSON(nodeJSON);
+    expect(result).toEqual(nodeJSON);
+  });
+
+  it('preserves non-volatile originalAttributes keys', () => {
+    const nodeJSON = {
+      type: 'image',
+      attrs: {
+        originalAttributes: { cx: '100', cy: '200' },
+      },
+    };
+
+    const result = normalizeImageNodeJSON(nodeJSON);
+    expect(result.attrs.originalAttributes).toEqual({ cx: '100', cy: '200' });
+  });
+
+  it('does not mutate the input', () => {
+    const original = {
+      type: 'image',
+      attrs: {
+        originalAttributes: { 'wp14:anchorId': 'AAA', cx: '100' },
+      },
+    };
+    const copy = JSON.parse(JSON.stringify(original));
+
+    normalizeImageNodeJSON(original);
+
+    expect(original).toEqual(copy);
+  });
+});
+
+describe('normalizeInlineNodeJSON', () => {
+  it('normalizes image nodes', () => {
+    const imageJSON = {
+      type: 'image',
+      attrs: {
+        originalAttributes: { 'wp14:anchorId': 'X', keep: 'yes' },
+      },
+    };
+
+    const result = normalizeInlineNodeJSON(imageJSON);
+    expect(result.attrs.originalAttributes).toEqual({ keep: 'yes' });
+  });
+
+  it('passes non-image nodes through unchanged', () => {
+    const linkJSON = { type: 'link', attrs: { href: 'http://example.com' } };
+    const result = normalizeInlineNodeJSON(linkJSON);
+    expect(result).toBe(linkJSON);
+  });
+});
+
+describe('normalizeParagraphNodeJSON', () => {
+  it('strips volatile attrs and normalizes nested image nodes', () => {
+    const paragraphJSON = {
+      type: 'paragraph',
+      attrs: { paraId: 'AABB', rsidR: '0011', align: 'left' },
+      content: [
+        {
+          type: 'run',
+          attrs: {},
+          content: [
+            {
+              type: 'image',
+              attrs: {
+                src: 'photo.png',
+                originalAttributes: {
+                  'wp14:anchorId': 'DEAD',
+                  'wp14:editId': 'BEEF',
+                  cx: '500',
+                },
+              },
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = normalizeParagraphNodeJSON(paragraphJSON) as any;
+
+    expect(result.attrs).toEqual({ align: 'left' });
+    expect(result.content[0].content[0].attrs.originalAttributes).toEqual({ cx: '500' });
+    expect(result.content[0].content[0].attrs.src).toBe('photo.png');
+  });
+
+  it('handles paragraphs with no content', () => {
+    const paragraphJSON = {
+      type: 'paragraph',
+      attrs: { paraId: 'X', align: 'center' },
+    };
+
+    const result = normalizeParagraphNodeJSON(paragraphJSON);
+    expect(result.attrs).toEqual({ align: 'center' });
+    expect(result).not.toHaveProperty('content');
+  });
+
+  it('handles text-only paragraphs without modifying content', () => {
+    const paragraphJSON = {
+      type: 'paragraph',
+      attrs: { rsidR: '00AA' },
+      content: [
+        {
+          type: 'run',
+          attrs: {},
+          content: [{ type: 'text', text: 'hello' }],
+        },
+      ],
+    };
+
+    const result = normalizeParagraphNodeJSON(paragraphJSON) as any;
+    expect(result.content[0].content[0]).toEqual({ type: 'text', text: 'hello' });
+  });
+});
+
+describe('normalizeDocJSON', () => {
+  it('normalizes paragraphs within a document tree', () => {
+    const docJSON = {
+      type: 'doc',
+      content: [
+        {
+          type: 'paragraph',
+          attrs: { paraId: 'P1', rsidR: 'R1', align: 'left' },
+          content: [
+            {
+              type: 'run',
+              attrs: {},
+              content: [
+                {
+                  type: 'image',
+                  attrs: {
+                    src: 'test.png',
+                    originalAttributes: { 'wp14:anchorId': 'A1' },
+                  },
+                },
+              ],
+            },
+          ],
+        },
+        {
+          type: 'paragraph',
+          attrs: { paraId: 'P2', align: 'right' },
+          content: [
+            {
+              type: 'run',
+              attrs: {},
+              content: [{ type: 'text', text: 'world' }],
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = normalizeDocJSON(docJSON) as any;
+
+    // First paragraph: volatile attrs stripped, image normalized
+    expect(result.content[0].attrs).toEqual({ align: 'left' });
+    expect(result.content[0].content[0].content[0].attrs.originalAttributes).toEqual({});
+
+    // Second paragraph: volatile attrs stripped, text untouched
+    expect(result.content[1].attrs).toEqual({ align: 'right' });
+    expect(result.content[1].content[0].content[0]).toEqual({ type: 'text', text: 'world' });
+  });
+
+  it('recurses into structural containers (tables, etc.)', () => {
+    const docJSON = {
+      type: 'doc',
+      content: [
+        {
+          type: 'table',
+          attrs: {},
+          content: [
+            {
+              type: 'tableRow',
+              attrs: {},
+              content: [
+                {
+                  type: 'tableCell',
+                  attrs: {},
+                  content: [
+                    {
+                      type: 'paragraph',
+                      attrs: { paraId: 'TC1', rsidR: 'R9' },
+                    },
+                  ],
+                },
+              ],
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = normalizeDocJSON(docJSON) as any;
+    const cellParagraph = result.content[0].content[0].content[0].content[0];
+    expect(cellParagraph.attrs).toEqual({});
+  });
+
+  it('returns the doc unchanged when there is no content', () => {
+    const docJSON = { type: 'doc' };
+    expect(normalizeDocJSON(docJSON)).toEqual(docJSON);
+  });
+});
+
+describe('normalizeInlineNodeAttrs', () => {
+  it('strips volatile keys from image originalAttributes', () => {
+    const attrs = {
+      src: 'img.png',
+      originalAttributes: {
+        'wp14:anchorId': 'A1',
+        'wp14:editId': 'E1',
+        cx: '100',
+      },
+    };
+
+    const result = normalizeInlineNodeAttrs('image', attrs);
+
+    expect(result.originalAttributes).toEqual({ cx: '100' });
+    expect(result.src).toBe('img.png');
+  });
+
+  it('passes non-image attrs through unchanged', () => {
+    const attrs = { href: 'http://example.com' };
+    const result = normalizeInlineNodeAttrs('link', attrs);
+    expect(result).toBe(attrs);
+  });
+
+  it('passes image attrs through when originalAttributes is absent', () => {
+    const attrs = { src: 'img.png' };
+    const result = normalizeInlineNodeAttrs('image', attrs);
+    expect(result).toBe(attrs);
+  });
+});
+
+describe('semanticInlineNodeKey', () => {
+  it('produces identical keys for images differing only in volatile attrs', () => {
+    const makeNode = (anchorId: string) => ({
+      type: { name: 'image' },
+      toJSON: () => ({
+        type: 'image',
+        attrs: { src: 'same.png', originalAttributes: { 'wp14:anchorId': anchorId, cx: '100' } },
+      }),
+    });
+
+    expect(semanticInlineNodeKey(makeNode('A'))).toBe(semanticInlineNodeKey(makeNode('B')));
+  });
+
+  it('produces different keys for images with different semantic attrs', () => {
+    const makeNode = (src: string) => ({
+      type: { name: 'image' },
+      toJSON: () => ({
+        type: 'image',
+        attrs: { src, originalAttributes: { 'wp14:anchorId': 'same' } },
+      }),
+    });
+
+    expect(semanticInlineNodeKey(makeNode('a.png'))).not.toBe(semanticInlineNodeKey(makeNode('b.png')));
+  });
+
+  it('passes non-image nodes through without normalization', () => {
+    const node = {
+      type: { name: 'link' },
+      toJSON: () => ({ type: 'link', attrs: { href: 'http://example.com' } }),
+    };
+
+    expect(semanticInlineNodeKey(node)).toBe(JSON.stringify({ type: 'link', attrs: { href: 'http://example.com' } }));
+  });
+});
diff --git a/packages/super-editor/src/extensions/diffing/algorithm/semantic-normalization.ts b/packages/super-editor/src/extensions/diffing/algorithm/semantic-normalization.ts
new file mode 100644
index 0000000000..b9fb447eb2
--- /dev/null
+++ b/packages/super-editor/src/extensions/diffing/algorithm/semantic-normalization.ts
@@ -0,0 +1,215 @@
+/**
+ * Semantic normalization for diff comparisons.
+ *
+ * Strips non-semantic OOXML metadata from node JSON before diffing so that
+ * volatile attributes (regenerated by Word on every save) do not produce
+ * false-positive diffs. This module is used exclusively by the diffing
+ * pipeline — it never mutates live ProseMirror nodes or touches
+ * importer/exporter code.
+ */
+
+/**
+ * Paragraph-level attributes that Word regenerates on every save.
+ * These carry no semantic meaning for diff comparison.
+ */
+const VOLATILE_PARAGRAPH_ATTRS = new Set(['paraId', 'textId', 'rsidR', 'rsidRDefault', 'rsidP', 'rsidRPr', 'rsidDel']);
+
+/**
+ * Keys inside `originalAttributes` on image nodes that Word regenerates
+ * on every save. These are drawing-level identifiers, not content.
+ */
+const VOLATILE_IMAGE_ORIGINAL_ATTR_KEYS = new Set(['wp14:anchorId', 'wp14:editId']);
+
+/**
+ * Removes volatile keys from a flat attributes object.
+ * Returns a new object — never mutates the input.
+ */
+function omitKeys(attrs: Record<string, unknown>, keysToOmit: Set<string>): Record<string, unknown> {
+  const result: Record<string, unknown> = {};
+  for (const [key, value] of Object.entries(attrs)) {
+    if (!keysToOmit.has(key)) {
+      result[key] = value;
+    }
+  }
+  return result;
+}
+
+/**
+ * Strips volatile OOXML metadata from paragraph attributes.
+ *
+ * @param attrs Raw paragraph node attributes.
+ * @returns A shallow copy with non-semantic keys removed.
+ */
+export function normalizeParagraphAttrs(attrs: Record<string, unknown>): Record<string, unknown> {
+  return omitKeys(attrs, VOLATILE_PARAGRAPH_ATTRS);
+}
+
+/**
+ * Strips volatile OOXML metadata from an image node's JSON representation.
+ *
+ * Only touches `attrs.originalAttributes` — all other attributes are
+ * preserved as-is so that genuine image changes (src, size, wrapping, etc.)
+ * still produce diffs.
+ *
+ * @param nodeJSON Serialized image node (from `node.toJSON()`).
+ * @returns A deep-enough copy with volatile keys removed from `originalAttributes`.
+ */
+export function normalizeImageNodeJSON(nodeJSON: Record<string, unknown>): Record<string, unknown> {
+  const attrs = nodeJSON.attrs as Record<string, unknown> | undefined;
+  if (!attrs?.originalAttributes) {
+    return nodeJSON;
+  }
+
+  const originalAttributes = attrs.originalAttributes as Record<string, unknown>;
+  const cleanedOriginalAttributes = omitKeys(originalAttributes, VOLATILE_IMAGE_ORIGINAL_ATTR_KEYS);
+
+  return {
+    ...nodeJSON,
+    attrs: {
+      ...attrs,
+      originalAttributes: cleanedOriginalAttributes,
+    },
+  };
+}
+
+/**
+ * Strips volatile metadata from an inline node's JSON based on its type.
+ *
+ * Currently normalizes image nodes. Other inline node types pass through
+ * unchanged — extend the switch as new volatile-attr patterns emerge.
+ *
+ * @param nodeJSON Serialized inline node.
+ * @returns Normalized copy (or the original if no normalization is needed).
+ */
+export function normalizeInlineNodeJSON(nodeJSON: Record<string, unknown>): Record<string, unknown> {
+  if (nodeJSON.type === 'image') {
+    return normalizeImageNodeJSON(nodeJSON);
+  }
+  return nodeJSON;
+}
+
+/**
+ * Strips volatile metadata from an inline node's raw attributes.
+ *
+ * Used when computing attrsDiff for modified inline nodes so that
+ * volatile keys don't appear in the diff payload.
+ *
+ * @param typeName The node type name (e.g. 'image').
+ * @param attrs Raw node attributes.
+ * @returns Normalized copy with volatile keys removed.
+ */
+export function normalizeInlineNodeAttrs(typeName: string, attrs: Record<string, unknown>): Record<string, unknown> {
+  if (typeName !== 'image') {
+    return attrs;
+  }
+
+  const originalAttributes = attrs.originalAttributes as Record<string, unknown> | undefined;
+  if (!originalAttributes) {
+    return attrs;
+  }
+
+  return {
+    ...attrs,
+    originalAttributes: omitKeys(originalAttributes, VOLATILE_IMAGE_ORIGINAL_ATTR_KEYS),
+  };
+}
+
+/**
+ * Produces a stable semantic key for an inline node, suitable for
+ * identity comparison in Myers diff.
+ *
+ * Two nodes with the same key represent the same semantic content.
+ * Volatile OOXML metadata is stripped so that re-imported copies of
+ * the same image are treated as identical.
+ *
+ * @param node A ProseMirror node (or mock) with `type.name` and `toJSON()`.
+ * @returns Stable JSON string usable as a comparison key.
+ */
+export function semanticInlineNodeKey(node: { type: { name: string }; toJSON: () => unknown }): string {
+  return JSON.stringify(normalizeInlineNodeJSON(node.toJSON() as Record<string, unknown>));
+}
+
+/**
+ * Strips volatile metadata from a paragraph node's full JSON representation.
+ *
+ * Normalizes the paragraph's own attrs and recursively normalizes any
+ * inline image nodes nested within its content tree.
+ *
+ * @param nodeJSON Serialized paragraph node (from `node.toJSON()`).
+ * @returns Normalized copy suitable for semantic comparison.
+ */
+export function normalizeParagraphNodeJSON(nodeJSON: Record<string, unknown>): Record<string, unknown> {
+  const attrs = (nodeJSON.attrs as Record<string, unknown>) ?? {};
+  const content = nodeJSON.content as Record<string, unknown>[] | undefined;
+
+  return {
+    ...nodeJSON,
+    attrs: normalizeParagraphAttrs(attrs),
+    ...(content ? { content: content.map(normalizeContentNodeJSON) } : {}),
+  };
+}
+
+/**
+ * Recursively normalizes a content node within a paragraph's JSON tree.
+ *
+ * Applies inline node normalization to leaf nodes (e.g. images) and
+ * recurses into container nodes (e.g. runs) that have their own content.
+ */
+function normalizeContentNodeJSON(nodeJSON: Record<string, unknown>): Record<string, unknown> {
+  const content = nodeJSON.content as Record<string, unknown>[] | undefined;
+
+  // Leaf inline nodes (image, etc.)
+  if (!content) {
+    return normalizeInlineNodeJSON(nodeJSON);
+  }
+
+  // Container nodes (run, etc.) — recurse into children
+  return {
+    ...nodeJSON,
+    content: content.map(normalizeContentNodeJSON),
+  };
+}
+
+/**
+ * Normalizes an entire document JSON tree for diff fingerprinting.
+ *
+ * Walks the full document structure, stripping volatile paragraph and
+ * image attributes at every level. Used by `canonicalize.ts` to ensure
+ * fingerprints and diff comparisons agree on what counts as a real change.
+ *
+ * @param docJSON Serialized document (from `doc.toJSON()`).
+ * @returns Normalized copy suitable for stable fingerprinting.
+ */
+export function normalizeDocJSON(docJSON: Record<string, unknown>): Record<string, unknown> {
+  const content = docJSON.content as Record<string, unknown>[] | undefined;
+  if (!content) {
+    return docJSON;
+  }
+
+  return {
+    ...docJSON,
+    content: content.map(normalizeDocNodeJSON),
+  };
+}
+
+/**
+ * Normalizes a single node within the document tree based on its type.
+ */
+function normalizeDocNodeJSON(nodeJSON: Record<string, unknown>): Record<string, unknown> {
+  const type = nodeJSON.type as string | undefined;
+
+  if (type === 'paragraph') {
+    return normalizeParagraphNodeJSON(nodeJSON);
+  }
+
+  // Recurse into structural containers (body, section, table, row, cell, etc.)
+  const content = nodeJSON.content as Record<string, unknown>[] | undefined;
+  if (content) {
+    return {
+      ...nodeJSON,
+      content: content.map(normalizeDocNodeJSON),
+    };
+  }
+
+  return nodeJSON;
+}
diff --git a/packages/super-editor/src/extensions/diffing/computeDiff.test.js b/packages/super-editor/src/extensions/diffing/computeDiff.test.js
index 35fd762082..62bd82da1c 100644
--- a/packages/super-editor/src/extensions/diffing/computeDiff.test.js
+++ b/packages/super-editor/src/extensions/diffing/computeDiff.test.js
@@ -63,11 +63,13 @@ describe('Diff', () => {
     const deletedDiffs = diffs.filter((diff) => diff.action === 'deleted');
     const attrOnlyDiffs = modifiedDiffs.filter((diff) => diff.contentDiff.length === 0);
 
-    expect(diffs).toHaveLength(19);
-    expect(modifiedDiffs).toHaveLength(9);
+    // One volatile-only paragraph diff (paraId/rsidR/textId changes) is now
+    // correctly filtered out by semantic normalization. See semantic-normalization.ts.
+    expect(diffs).toHaveLength(18);
+    expect(modifiedDiffs).toHaveLength(8);
     expect(addedDiffs).toHaveLength(5);
     expect(deletedDiffs).toHaveLength(5);
-    expect(attrOnlyDiffs).toHaveLength(4);
+    expect(attrOnlyDiffs).toHaveLength(3);
 
     // Modified paragraph with multiple text diffs
     let diff = getDiff(
@@ -166,7 +168,6 @@ describe('Diff', () => {
     expect(diff.contentDiff[0].newText).toBe(' ');
     expect(diff.contentDiff[1].text).toBe('NEW');
     expect(diff.contentDiff[2].text).toBe(' ');
-    expect(diff.attrsDiff?.modified?.textId).toBeDefined();
 
     diff = diffs.find((diff) => diff.action === 'deleted' && diff.oldText === 'I deleted this sentence.');
     expect(diff).toBeDefined();
@@ -177,7 +178,6 @@ describe('Diff', () => {
     diff = diffs.find((diff) => diff.action === 'modified' && diff.oldText === 'We are not done yet.');
     expect(diff.newText).toBe('We are done now.');
     expect(diff.contentDiff).toHaveLength(3);
-    expect(diff.attrsDiff?.modified?.textId).toBeDefined();
   });
 
   it('Compare another set of two documents with only formatting changes', async () => {
diff --git a/packages/super-editor/src/extensions/diffing/service/canonicalize.ts b/packages/super-editor/src/extensions/diffing/service/canonicalize.ts
index 454229e415..dfba296d48 100644
--- a/packages/super-editor/src/extensions/diffing/service/canonicalize.ts
+++ b/packages/super-editor/src/extensions/diffing/service/canonicalize.ts
@@ -9,6 +9,7 @@ import type { Node as PMNode } from 'prosemirror-model';
 import type { NumberingProperties, StylesDocumentProperties } from '@superdoc/style-engine/ooxml';
 import type { CommentInput } from '../algorithm/comment-diffing';
 import { COMMENT_ATTRS_DIFF_IGNORED_KEYS } from '../algorithm/comment-diffing';
+import { normalizeDocJSON } from '../algorithm/semantic-normalization';
 
 /** The canonical diffable state of one document. */
 export interface CanonicalDiffableState {
@@ -63,7 +64,7 @@ export function buildCanonicalDiffableState(
   numbering: NumberingProperties | null | undefined,
 ): CanonicalDiffableState {
   return {
-    body: doc.toJSON() as Record<string, unknown>,
+    body: normalizeDocJSON(doc.toJSON() as Record<string, unknown>),
     comments: comments.map(canonicalizeComment),
     styles: styles ? (styles as unknown as Record<string, unknown>) : null,
     numbering: numbering ? (numbering as unknown as Record<string, unknown>) : null,