From d846a239cefcba51d60d180008295543884bc55c Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 21:14:06 -0300 Subject: [PATCH 1/6] fix(pm-adapter): emit section break before non-paragraph nodes (SD-2646) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per ECMA-376 §17.6.17, a inside a paragraph defines the section that ENDS with that paragraph. All body children preceding it — paragraphs, tables, top-level drawings, SDTs — belong to that section. Section ranges were indexed purely by paragraph count, and section-break blocks were emitted only inside handleParagraphNode. A table that sat between two sectPr-marker paragraphs was emitted into the flow stream BEFORE the section break that declared its column config, so the layout engine laid it out under the prior section's settings. This is the root cause of IT-945 rendering a 114-row 2-col continuous table in column 0 across three pages with column 1 empty: the table was placed in the 1-col section, not the 2-col section. Fix: - Track nodeIndex over every top-level doc.content child in findParagraphsWithSectPr and SectionRange (alongside paragraphIndex, which SDT handlers still use for intra-SDT transitions). - Add maybeEmitNextSectionBreakForNode in sections/breaks.ts and call it from internal.ts's main dispatch loop BEFORE every top-level handler. Any non-paragraph node crossing a section boundary now triggers the break. - Section-model primer in pm-adapter/README.md with spec citations. Tests: 1739/1739 pass in pm-adapter (including new end-tagged.test.ts and integration test in index.test.ts asserting flow-block order). --- packages/layout-engine/pm-adapter/README.md | 32 +++++ .../pm-adapter/src/index.test.ts | 66 ++++++++++ .../pm-adapter/src/internal.test.ts | 4 + .../layout-engine/pm-adapter/src/internal.ts | 22 +++- .../pm-adapter/src/sections/analysis.test.ts | 49 +++++--- .../pm-adapter/src/sections/analysis.ts | 62 +++++++-- .../pm-adapter/src/sections/breaks.ts | 40 ++++++ .../src/sections/end-tagged.test.ts | 118 ++++++++++++++++++ .../pm-adapter/src/sections/index.ts | 1 + .../pm-adapter/src/sections/types.ts | 15 ++- .../layout-engine/pm-adapter/src/types.d.ts | 1 + .../layout-engine/pm-adapter/src/types.ts | 7 ++ 12 files changed, 381 insertions(+), 36 deletions(-) create mode 100644 packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts diff --git a/packages/layout-engine/pm-adapter/README.md b/packages/layout-engine/pm-adapter/README.md index 27c0d1591c..e0653df917 100644 --- a/packages/layout-engine/pm-adapter/README.md +++ b/packages/layout-engine/pm-adapter/README.md @@ -23,3 +23,35 @@ Notes: - Handles lists, tables, images, vector shapes, SDT (TOC, structured content, doc parts), tracked changes, hyperlinks. - Consumes `@superdoc/style-engine` defaults and locale/tab interval hints from the PM document attrs. - `@superdoc/measuring-dom` is only a dependency for types; measurement happens later in the pipeline. + +## Section model (ECMA-376 §17.6, §17.18.77) + +Word uses **end-tagged** section semantics: a `` inside a paragraph defines the section that **ends with** that paragraph. The body-level `` defines the final section. All body children preceding a section-terminating paragraph — other paragraphs, **tables**, top-level drawings, SDT wrappers — belong to the section whose sectPr follows them. + +``` + + This is section A's first paragraph + + (section A props: 1-col, nextPage) + + ... ← section B (the NEXT sectPr) + + (section B props: 2-col continuous) + + This is section C's paragraph + (section C props: body-level) + +``` + +Section ranges are computed in `sections/analysis.ts` with two parallel indices: + +| Index | What it counts | Used by | +|---|---|---| +| `startParagraphIndex` / `endParagraphIndex` | Paragraph nodes only (including those inside SDT wrappers) | SDT handlers for intra-SDT section transitions | +| `startNodeIndex` / `endNodeIndex` | Every top-level `doc.content` child — paragraphs, tables, top-level drawings, SDTs | Main dispatch loop for inter-node section transitions | + +The main dispatch loop in `internal.ts` calls `maybeEmitNextSectionBreakForNode` BEFORE handling each top-level node. The helper uses `currentNodeIndex === nextSection.startNodeIndex` as its trigger, so non-paragraph nodes (tables especially) correctly get the sectionBreak emitted before them when they cross a section boundary. This is the fix for SD-2646. + +**When adding a new top-level block kind** (e.g., a new SDT type or a top-level drawing kind): do nothing. The dispatch-level hook covers you automatically. You only need a per-handler section check if your handler descends into children that carry their own sectPr markers — see the SDT handlers (`sdt/bibliography.ts`, `sdt/document-index.ts`, `sdt/table-of-authorities.ts`) for the intra-SDT pattern. + +Continuous section breaks (``) carry an extra requirement from §17.18.77: they "balance the content of the previous section." The balancing itself is the layout engine's responsibility (see `layout-engine/src/column-balancing.ts`); the adapter's job is only to ensure the right blocks are in the right section. diff --git a/packages/layout-engine/pm-adapter/src/index.test.ts b/packages/layout-engine/pm-adapter/src/index.test.ts index 65d5456486..c970677ea3 100644 --- a/packages/layout-engine/pm-adapter/src/index.test.ts +++ b/packages/layout-engine/pm-adapter/src/index.test.ts @@ -1269,6 +1269,72 @@ describe('toFlowBlocks', () => { [first, second, third].forEach((b) => expect(b?.type).toBe('continuous')); }); }); + + describe('end-tagged section membership for non-paragraph nodes (SD-2646, ECMA-376 §17.6.17)', () => { + it('emits the next section break BEFORE a table that sits between two sectPr-marker paragraphs', () => { + // IT-945 shape: table lives between the paragraph that ends section A + // and the paragraph that ends section B. Per §17.6.17 the table + // belongs to section B, so the sectionBreak introducing B's columns + // must precede the table in the flow stream. + const pmDoc: PMNode = { + type: 'doc', + attrs: { bodySectPr: createTestBodySectPr() }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'This is my first section' }] }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'nextPage' } }, + { name: 'w:cols', attributes: { 'w:num': '1', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }] }, + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }] }, + ], + }, + ], + }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'continuous' } }, + { name: 'w:cols', attributes: { 'w:num': '2', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { type: 'paragraph', content: [{ type: 'text', text: 'This is my third section' }] }, + ], + } as never; + + const { blocks } = toFlowBlocks(pmDoc, { emitSectionBreaks: true }); + + const tableIndex = blocks.findIndex((b) => b.kind === 'table'); + const twoColBreakIndex = blocks.findIndex((b) => b.kind === 'sectionBreak' && b.columns?.count === 2); + expect(tableIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeLessThan(tableIndex); + }); + }); }); describe('block id prefixing', () => { diff --git a/packages/layout-engine/pm-adapter/src/internal.test.ts b/packages/layout-engine/pm-adapter/src/internal.test.ts index e1972f240b..4c4eda6863 100644 --- a/packages/layout-engine/pm-adapter/src/internal.test.ts +++ b/packages/layout-engine/pm-adapter/src/internal.test.ts @@ -59,6 +59,10 @@ vi.mock('./sections/index.js', () => { id: nextBlockId('sectionBreak'), type: section.type, })), + maybeEmitNextSectionBreakForNode: vi.fn(() => { + // Mocked as no-op: this test file already provides zero section ranges + // via analyzeSectionRanges, so there is never a break to emit. + }), publishSectionMetadata: vi.fn(), }; }); diff --git a/packages/layout-engine/pm-adapter/src/internal.ts b/packages/layout-engine/pm-adapter/src/internal.ts index 4ffd9da91d..28a49158ca 100644 --- a/packages/layout-engine/pm-adapter/src/internal.ts +++ b/packages/layout-engine/pm-adapter/src/internal.ts @@ -12,7 +12,12 @@ import type { FlowBlock, ParagraphBlock } from '@superdoc/contracts'; import { isValidTrackedMode } from './tracked-changes.js'; -import { analyzeSectionRanges, createSectionBreakBlock, publishSectionMetadata } from './sections/index.js'; +import { + analyzeSectionRanges, + createSectionBreakBlock, + maybeEmitNextSectionBreakForNode, + publishSectionMetadata, +} from './sections/index.js'; import { normalizePrefix, buildPositionMap, createBlockIdGenerator } from './utilities.js'; import { paragraphToFlowBlocks, @@ -201,6 +206,7 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): ranges: sectionRanges, currentSectionIndex: 0, currentParagraphIndex: 0, + currentNodeIndex: 0, }, converters, themeColors, @@ -209,12 +215,24 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): trackedListLastOrdinals: new Map(), }; - // Process nodes using handler dispatch pattern + // Process nodes using handler dispatch pattern. Before each top-level node + // we flush any pending section break whose `startNodeIndex` matches the + // current position — this keeps non-paragraph nodes (tables, top-level + // drawings) in their correct end-tagged section per ECMA-376 §17.6.17. doc.content.forEach((node) => { + maybeEmitNextSectionBreakForNode({ + sectionState: handlerContext.sectionState!, + nextBlockId, + pushBlock: (block) => { + blocks.push(block); + recordBlockKind(block.kind); + }, + }); const handler = nodeHandlers[node.type]; if (handler) { handler(node, handlerContext); } + handlerContext.sectionState!.currentNodeIndex++; }); // Ensure final body section is emitted only if not already emitted during paragraph processing. diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts index ac860ce7e6..97b6b5b2d7 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts @@ -296,8 +296,9 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 1, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(2); }); it('should find multiple paragraphs with sectPr', () => { @@ -315,12 +316,13 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(2); - expect(result.paragraphs[0]).toEqual({ index: 0, node: para1 }); - expect(result.paragraphs[1]).toEqual({ index: 2, node: para3 }); + expect(result.paragraphs[0]).toEqual({ index: 0, nodeIndex: 0, node: para1 }); + expect(result.paragraphs[1]).toEqual({ index: 2, nodeIndex: 2, node: para3 }); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(3); }); - it('should skip non-paragraph nodes', () => { + it('should skip non-paragraph nodes for paragraphIndex but count them in nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; @@ -334,11 +336,15 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + // paragraphIndex = 1 (second paragraph), nodeIndex = 2 (third top-level child + // after the heading). Non-paragraph top-level children count toward nodeIndex + // because ECMA-376 §17.6.17 sections span all body children. + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 2, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(4); }); - it('should include paragraphs inside index nodes', () => { + it('should include paragraphs inside index nodes and share the SDT nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; const para3 = { type: 'paragraph', content: [] }; @@ -352,8 +358,11 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); - expect(result.paragraphs).toEqual([{ index: 1, node: para2 }]); + // para2 is inside the SDT (index node) at top-level nodeIndex 1. + // paragraphIndex 1 still reflects paragraph order across descent. + expect(result.paragraphs).toEqual([{ index: 1, nodeIndex: 1, node: para2 }]); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(2); }); }); @@ -385,7 +394,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue(null); @@ -408,7 +417,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr', elements: [] }; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -450,8 +459,8 @@ describe('analysis', () => { }; const paragraphs = [ - { index: 2, node: para1 }, - { index: 5, node: para2 }, + { index: 2, nodeIndex: 2, node: para1 }, + { index: 5, nodeIndex: 5, node: para2 }, ]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr' }; @@ -487,7 +496,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.NEXT_PAGE, @@ -512,7 +521,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mock section data with page margins but no header/footer margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -550,7 +559,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only header margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -583,7 +592,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only footer margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -616,7 +625,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only top margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -649,7 +658,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Explicit zero values for all margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -688,7 +697,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mix of header/footer and page margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -725,7 +734,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ titlePg: false, @@ -747,7 +756,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.CONTINUOUS, diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.ts index 3a0cae9013..73905c2c84 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.ts @@ -65,15 +65,27 @@ export function shouldIgnoreSectionBreak( /** * Find all paragraphs in the document that contain sectPr elements. * + * Records two indices per match: + * - `paragraphIndex` counts only paragraph nodes (including those nested + * inside SDT wrappers), matching the long-standing contract callers use + * for SDT-internal section transitions. + * - `nodeIndex` counts every top-level `doc.content` child (paragraph, + * table, top-level drawing, SDT, …). This is required to fix SD-2646: + * a non-paragraph node between two sectPr markers belongs to the LATER + * marker's section per ECMA-376 §17.6.17, so the dispatch loop must + * know each section's bounds in top-level-node terms. + * * @param doc - ProseMirror document node - * @returns Object containing paragraphs with sectPr and total paragraph count + * @returns Paragraph matches plus both totals */ export function findParagraphsWithSectPr(doc: PMNode): { - paragraphs: Array<{ index: number; node: PMNode }>; + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>; totalCount: number; + totalNodeCount: number; } { - const paragraphs: Array<{ index: number; node: PMNode }> = []; + const paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }> = []; let paragraphIndex = 0; + let nodeIndex = 0; const getNodeChildren = (node: PMNode): PMNode[] => { if (Array.isArray(node.content)) return node.content; const content = node.content as { forEach?: (cb: (child: PMNode) => void) => void } | undefined; @@ -87,27 +99,30 @@ export function findParagraphsWithSectPr(doc: PMNode): { return []; }; - const visitNode = (node: PMNode): void => { + const visitNode = (node: PMNode, outerNodeIndex: number): void => { if (node.type === 'paragraph') { if (hasSectPr(node)) { - paragraphs.push({ index: paragraphIndex, node }); + paragraphs.push({ index: paragraphIndex, nodeIndex: outerNodeIndex, node }); } paragraphIndex++; return; } if (node.type === 'index' || node.type === 'bibliography' || node.type === 'tableOfAuthorities') { - getNodeChildren(node).forEach(visitNode); + // SDT descendants share the outer SDT's nodeIndex — dispatch-level + // section transitions fire on the SDT as a whole, not on its children. + getNodeChildren(node).forEach((child) => visitNode(child, outerNodeIndex)); } }; if (doc.content) { for (const node of doc.content) { - visitNode(node); + visitNode(node, nodeIndex); + nodeIndex++; } } - return { paragraphs, totalCount: paragraphIndex }; + return { paragraphs, totalCount: paragraphIndex, totalNodeCount: nodeIndex }; } /** @@ -128,11 +143,12 @@ export function findParagraphsWithSectPr(doc: PMNode): { * @returns Array of section ranges */ export function buildSectionRangesFromParagraphs( - paragraphs: Array<{ index: number; node: PMNode }>, + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>, hasBodySectPr: boolean, ): SectionRange[] { const ranges: SectionRange[] = []; let currentStart = 0; + let currentStartNode = 0; paragraphs.forEach((item, idx) => { if (shouldIgnoreSectionBreak(item.node, idx, paragraphs.length, hasBodySectPr)) { @@ -154,6 +170,8 @@ export function buildSectionRangesFromParagraphs( const range: SectionRange = { sectionIndex: idx, + startNodeIndex: currentStartNode, + endNodeIndex: item.nodeIndex, startParagraphIndex: currentStart, endParagraphIndex: item.index, sectPr, @@ -180,6 +198,7 @@ export function buildSectionRangesFromParagraphs( ranges.push(range); currentStart = item.index + 1; + currentStartNode = item.nodeIndex + 1; }); return ranges; @@ -228,6 +247,7 @@ export function createFinalSectionFromBodySectPr( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange | null { const clampedStart = Math.max(0, Math.min(currentStart, Math.max(totalParagraphs - 1, 0))); @@ -251,8 +271,15 @@ export function createFinalSectionFromBodySectPr( bodySectionData.bottomPx != null || bodySectionData.leftPx != null; + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds + ? Math.max(0, Math.min(nodeBounds.startNodeIndex, Math.max(totalNodes - 1, 0))) + : clampedStart; + return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: clampedStart, endParagraphIndex: totalParagraphs - 1, sectPr: bodySectPr, @@ -291,9 +318,14 @@ export function createDefaultFinalSection( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange { + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds?.startNodeIndex ?? currentStart; return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: currentStart, endParagraphIndex: totalParagraphs - 1, sectPr: null, @@ -318,11 +350,13 @@ export function createDefaultFinalSection( * @returns Array of section ranges with backward-looking semantics */ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): SectionRange[] { - const { paragraphs, totalCount } = findParagraphsWithSectPr(doc); + const { paragraphs, totalCount, totalNodeCount } = findParagraphsWithSectPr(doc); const hasBody = Boolean(bodySectPr); const ranges = buildSectionRangesFromParagraphs(paragraphs, hasBody); - const currentStart = ranges.length > 0 ? ranges[ranges.length - 1].endParagraphIndex + 1 : 0; + const last = ranges[ranges.length - 1]; + const currentStart = last ? last.endParagraphIndex + 1 : 0; + const currentStartNode = last ? last.endNodeIndex + 1 : 0; // Always represent the final section defined by bodySectPr, even if there are // no remaining paragraphs after the last paragraph-level sectPr. This ensures @@ -333,12 +367,16 @@ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): Section Math.min(currentStart, totalCount), totalCount, ranges.length, + { startNodeIndex: Math.min(currentStartNode, totalNodeCount), totalNodeCount }, ); if (finalSection) { ranges.push(finalSection); } } else if (ranges.length > 0) { - const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length); + const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length, { + startNodeIndex: Math.min(currentStartNode, totalNodeCount), + totalNodeCount, + }); if (fallbackFinal) { fallbackFinal.type = DEFAULT_PARAGRAPH_SECTION_TYPE; ranges.push(fallbackFinal); diff --git a/packages/layout-engine/pm-adapter/src/sections/breaks.ts b/packages/layout-engine/pm-adapter/src/sections/breaks.ts index 8501b2230a..8a05383174 100644 --- a/packages/layout-engine/pm-adapter/src/sections/breaks.ts +++ b/packages/layout-engine/pm-adapter/src/sections/breaks.ts @@ -190,3 +190,43 @@ export function shouldRequirePageBoundary(current: SectionRange, next: SectionRa export function hasIntrinsicBoundarySignals(_: SectionRange): boolean { return false; } + +/** + * Emit the next section's sectionBreak block if the dispatch loop has reached + * that section's starting top-level node index. + * + * ECMA-376 §17.6.17: a section is defined by its end-tagged ``. All + * body children preceding that tag — paragraphs, tables, top-level drawings — + * belong to the section that ENDS at the tag. This helper fires BEFORE any + * such node so the appropriate section config is active by the time the node + * is laid out. + * + * This centralises the check that previously lived (duplicated) inside + * `handleParagraphNode` and the SDT handlers. Calling it from the main + * dispatch loop covers every top-level node type — present and future — + * with no per-handler opt-in. + */ +export function maybeEmitNextSectionBreakForNode(args: { + sectionState: { + ranges: SectionRange[]; + currentSectionIndex: number; + currentNodeIndex: number; + }; + nextBlockId: BlockIdGenerator; + pushBlock: (block: SectionBreakBlock) => void; +}): void { + const { sectionState, nextBlockId, pushBlock } = args; + if (sectionState.ranges.length === 0) return; + if (sectionState.currentSectionIndex >= sectionState.ranges.length - 1) return; + + const nextSection = sectionState.ranges[sectionState.currentSectionIndex + 1]; + if (!nextSection) return; + if (sectionState.currentNodeIndex !== nextSection.startNodeIndex) return; + + const currentSection = sectionState.ranges[sectionState.currentSectionIndex]; + const requiresPageBoundary = + shouldRequirePageBoundary(currentSection, nextSection) || hasIntrinsicBoundarySignals(nextSection); + const extraAttrs = requiresPageBoundary ? { requirePageBoundary: true } : undefined; + pushBlock(createSectionBreakBlock(nextSection, nextBlockId, extraAttrs)); + sectionState.currentSectionIndex++; +} diff --git a/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts new file mode 100644 index 0000000000..8d7cf42210 --- /dev/null +++ b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts @@ -0,0 +1,118 @@ +/** + * @file End-tagged section-membership tests (ECMA-376 §17.6.17, §17.18.77). + * + * OOXML rule: a `` inside a paragraph defines the section that + * ENDS with that paragraph. All body children preceding it (back to the + * previous section-terminating paragraph) belong to that section — + * including tables, drawings, and other non-paragraph nodes. + * + * These tests use real implementations with no module mocks so they assert + * observable behavior of the section analysis, not call patterns. + */ +import { describe, it, expect } from 'vitest'; +import { analyzeSectionRanges } from './analysis.js'; +import type { PMNode } from '../types.js'; +import type { SectPrElement } from './types.js'; + +const sectPr = (options: { + type?: 'continuous' | 'nextPage'; + cols?: number; + colSpace?: number; + pgSz?: { w: number; h: number }; +}): SectPrElement => { + const elements: SectPrElement['elements'] = []; + if (options.type) { + elements!.push({ + type: 'element', + name: 'w:type', + attributes: { 'w:val': options.type }, + }); + } + if (options.pgSz) { + elements!.push({ + type: 'element', + name: 'w:pgSz', + attributes: { 'w:w': String(options.pgSz.w), 'w:h': String(options.pgSz.h) }, + }); + } + elements!.push({ + type: 'element', + name: 'w:cols', + attributes: { + 'w:num': String(options.cols ?? 1), + 'w:space': String(options.colSpace ?? 720), + }, + }); + return { + type: 'element', + name: 'w:sectPr', + elements, + }; +}; + +const paragraph = (marker?: SectPrElement): PMNode => ({ + type: 'paragraph', + attrs: marker ? { paragraphProperties: { sectPr: marker } } : {}, + content: [], +}); + +const table = (): PMNode => ({ + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }], + }, + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }], + }, + ], + }, + ], +}); + +describe('analyzeSectionRanges — end-tagged membership for non-paragraph nodes (SD-2646)', () => { + it('places a table between two sectPr markers into the section of the LATER marker', () => { + // Arrange — IT-945 shape: + // p0 "first section" text + // p1(sectPr-A) empty marker, 1-col nextPage → ends Section A + // TABLE belongs to Section B per ECMA-376 §17.6.17 + // p2(sectPr-B) empty marker, 2-col continuous → ends Section B + // p3 "third section" text + // body sectPr final, 1-col continuous → Section C + const markerA = sectPr({ type: 'nextPage', cols: 1 }); + const markerB = sectPr({ type: 'continuous', cols: 2, colSpace: 720 }); + const bodyMarker = sectPr({ type: 'continuous', cols: 1 }); + const doc: PMNode = { + type: 'doc', + attrs: { bodySectPr: bodyMarker }, + content: [paragraph(), paragraph(markerA), table(), paragraph(markerB), paragraph()], + }; + + // Act + const sut = analyzeSectionRanges(doc, bodyMarker); + + // Assert — three sections, with Section B's range spanning the table + expect(sut).toHaveLength(3); + const [sectionA, sectionB, sectionC] = sut; + + // Section A ends at p1 (node index 1). Table at node index 2 is NOT in A. + expect(sectionA.endNodeIndex).toBe(1); + expect(sectionA.columns?.count ?? 1).toBe(1); + + // Section B: starts AFTER Section A (node index 2 — the table), ends at p2 (node index 3). + // The table must belong to Section B, not Section A. + expect(sectionB.startNodeIndex).toBe(2); + expect(sectionB.endNodeIndex).toBe(3); + expect(sectionB.columns?.count).toBe(2); + + // Section C (body): starts at node index 4 (p3). + expect(sectionC.startNodeIndex).toBe(4); + expect(sectionC.columns?.count ?? 1).toBe(1); + }); +}); diff --git a/packages/layout-engine/pm-adapter/src/sections/index.ts b/packages/layout-engine/pm-adapter/src/sections/index.ts index 64b41423fb..d1731f870f 100644 --- a/packages/layout-engine/pm-adapter/src/sections/index.ts +++ b/packages/layout-engine/pm-adapter/src/sections/index.ts @@ -41,4 +41,5 @@ export { isSectionBreakBlock, signaturesEqual, shallowObjectEquals, + maybeEmitNextSectionBreakForNode, } from './breaks.js'; diff --git a/packages/layout-engine/pm-adapter/src/sections/types.ts b/packages/layout-engine/pm-adapter/src/sections/types.ts index a4134b48d0..551d8bcfd8 100644 --- a/packages/layout-engine/pm-adapter/src/sections/types.ts +++ b/packages/layout-engine/pm-adapter/src/sections/types.ts @@ -89,11 +89,22 @@ export type SectionVerticalAlign = 'top' | 'center' | 'bottom' | 'both'; /** * Section range represents a contiguous section in the document. - * Word uses "end-tagged" section semantics: a paragraph's sectPr defines - * properties for the section ENDING at that paragraph, not starting after it. + * + * Word uses "end-tagged" section semantics (ECMA-376 §17.6.17): a paragraph's + * sectPr defines properties for the section ENDING at that paragraph, not + * starting after it. All body children preceding the section-terminating + * paragraph — paragraphs, tables, top-level drawings — belong to that section. + * + * `startNodeIndex`/`endNodeIndex` are computed over every top-level + * `doc.content` child and are the authoritative boundaries for dispatching + * section breaks at emission time. `startParagraphIndex`/`endParagraphIndex` + * are retained for callers (SDT handlers) that count only paragraphs during + * recursive descent. */ export interface SectionRange { sectionIndex: number; + startNodeIndex: number; + endNodeIndex: number; startParagraphIndex: number; endParagraphIndex: number; sectPr: SectPrElement | null; diff --git a/packages/layout-engine/pm-adapter/src/types.d.ts b/packages/layout-engine/pm-adapter/src/types.d.ts index 13de3db49f..2285e17fcd 100644 --- a/packages/layout-engine/pm-adapter/src/types.d.ts +++ b/packages/layout-engine/pm-adapter/src/types.d.ts @@ -238,6 +238,7 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + currentNodeIndex: number; }; converters?: { paragraphToFlowBlocks?: ( diff --git a/packages/layout-engine/pm-adapter/src/types.ts b/packages/layout-engine/pm-adapter/src/types.ts index 5a98b205ed..819f29146b 100644 --- a/packages/layout-engine/pm-adapter/src/types.ts +++ b/packages/layout-engine/pm-adapter/src/types.ts @@ -301,6 +301,13 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + /** + * Index of the current top-level `doc.content` child being dispatched. + * Advanced by the main dispatch loop in internal.ts — drives end-tagged + * section transitions for non-paragraph nodes (tables, top-level + * drawings, …) per ECMA-376 §17.6.17. + */ + currentNodeIndex: number; }; // Converters for nested content From 36027006c25e830a7ae9bce09846d48ecf05a66b Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 21:14:31 -0300 Subject: [PATCH 2/6] fix(layout-engine): split dominant table at row boundary when balancing section-final page (SD-2646) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The column balancer treats each fragment as an atomic block. A multi-page two-column continuous section's final page can end up with a single table fragment taller than totalSectionHeight / columnCount. The atomic-block binary search then places the whole table in one column and leaves the other empty — diverging from Word, which balances by splitting the table at a row boundary per ECMA-376 §17.18.77 ("a continuous section break balances the content of the previous section"). Fix: add splitDominantTableAtRowBoundary as a preprocessor inside balanceSectionOnPage. When the section has a single splittable table fragment larger than target, split it at the row whose cumulative height first meets or exceeds totalSectionHeight / columnCount. The two halves are inserted in place of the original; the rest of the balancer runs unchanged and naturally assigns one to each column. Also add getBalancingHeight so empty sectPr-marker paragraphs (measured lines with width=0) contribute 0 to balancing — matching Word's behavior of not rendering an empty line for such markers. This keeps both columns top-aligned on the section-final page. On IT-945: page 2 now splits 14/14 from y=96 in both columns, matching Word's top-alignment. Before this fix page 2 rendered all 28 remaining rows in col 1 with col 0 empty. Tests: strengthened existing "balances the section-ending page" test (it was passing trivially via `if (sectionFragments.length > 1)` guard). Added narrow-table multi-page regression test. 616/616 pass. --- .../layout-engine/src/column-balancing.ts | 242 +++++++++++++++++- .../layout-engine/src/index.test.ts | 109 ++++++-- 2 files changed, 333 insertions(+), 18 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index 94466f9b57..d70e4bd2c8 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -573,6 +573,49 @@ function getFragmentHeight(fragment: BalancingFragment, measureMap: Map` + * has no text content — all measured lines have `width === 0`. Word does + * NOT render an empty line for such markers, so they take no vertical space + * in the balanced layout. Treating them as balance-neutral prevents them + * from pushing their column's cursor down and unbalancing the section's + * final page against Word's output (ECMA-376 §17.18.77). + * + * For every other paragraph, image, drawing, or table this delegates to + * the standard `getFragmentHeight` so existing behavior is preserved. + */ +function getBalancingHeight(fragment: BalancingFragment, measureMap: Map): number { + if (fragment.kind === 'para') { + const measure = measureMap.get(fragment.blockId) as + | (MeasureData & { lines?: Array<{ lineHeight: number; width?: number }> }) + | undefined; + if (measure?.kind === 'paragraph' && Array.isArray(measure.lines) && measure.lines.length > 0) { + const fromLine = fragment.fromLine ?? 0; + const toLine = fragment.toLine ?? measure.lines.length; + // A sectPr-marker paragraph is empty: every measured line has an + // EXPLICIT width of 0 (measuring-dom sets width per line). Treat + // undefined width as "has content" so synthetic test fixtures and any + // other callers that don't set line.width keep their existing height. + let allEmpty = true; + let sawAnyLine = false; + for (let i = fromLine; i < toLine; i++) { + const line = measure.lines[i]; + if (!line) continue; + sawAnyLine = true; + if (line.width !== 0) { + allEmpty = false; + break; + } + } + if (sawAnyLine && allEmpty) return 0; + } + } + return getFragmentHeight(fragment, measureMap); +} + /** * Balances column content on a page by redistributing fragments. * @@ -796,6 +839,26 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu const remainingHeight = args.availableHeight - (sectionTopY - args.topMargin); if (remainingHeight <= 0) return null; + // Pre-split a dominant table fragment at a row boundary before balancing. + // + // When a section's final page contains a single splittable table that's + // taller than (totalSectionHeight / columnCount), the atomic-block balancer + // can only place the whole table in one column — leaving the other column + // empty. Word's behavior per ECMA-376 §17.18.77 is to balance the + // REMAINING content, which for a narrow table means splitting at a row + // boundary so both columns carry roughly half the rows. + // + // We split ONCE per balance call (we only need two halves for a 2-col + // section; extending to N > 2 would iterate). SD-2646: IT-945 page 2 has a + // 515px / 28-row table; splitting into ~257px halves lets the balancer + // assign half to each column. + splitDominantTableAtRowBoundary({ + sectionFragments, + fragments, + columnCount, + measureMap: args.measureMap, + }); + // Order fragments in document order: by current column (x → left-to-right), // then by y within each column. During unbalanced layout the paginator fills // column 0 top-to-bottom, then column 1, etc. — so (x, y) preserves the @@ -809,9 +872,13 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu // by y (as balancePageColumns does) would collapse fragments from different // source columns that happen to share a y coordinate into a single row and // re-stack them at one position — producing overlap. + // + // Use `getBalancingHeight` so empty sectPr-marker paragraphs contribute 0 + // to their column's cursor — matching Word's behavior of not rendering a + // blank line for such markers. const contentBlocks: BalancingBlock[] = ordered.map((f, i) => ({ blockId: `${f.blockId}#${i}`, - measuredHeight: getFragmentHeight(f, args.measureMap), + measuredHeight: getBalancingHeight(f, args.measureMap), canBreak: false, keepWithNext: false, keepTogether: true, @@ -850,3 +917,176 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu } return { maxY }; } + +/** + * Table measure shape used by the row-split preprocessor. + * + * Only the row-heights array is required. We access it through the runtime + * `measureMap` stored as `MeasureData`, which narrows the interface for the + * public balancer API — but the stored value is the full `TableMeasure` + * containing `rows: [{ height }]`, so a cast is safe. + */ +interface TableMeasureLike { + kind: string; + rows?: Array<{ height: number }>; +} + +/** + * Row-boundary record used by the renderer to draw horizontal dividers. + * Mirrors the subset of `TableFragmentMetadata['rowBoundaries']` that the + * split helper needs to read and regenerate. + */ +interface RowBoundaryLike { + i: number; + y: number; + h: number; + min: number; + r: number; +} + +/** + * In-place split of a dominant table fragment at a row boundary. + * + * Problem this solves: the column balancer treats each fragment as an atomic + * block. A multi-page two-column continuous section's final page can end up + * with a single table fragment that exceeds half the section's height. The + * balancer then places the whole table in one column and leaves the other + * empty — diverging from Word, which balances by splitting the table at a + * row boundary (ECMA-376 §17.18.77: continuous breaks balance the previous + * section's content). + * + * This preprocessor runs once per `balanceSectionOnPage` call. It detects a + * single dominant table fragment and splits it at the row whose cumulative + * height first meets or exceeds totalSectionHeight / columnCount. The two + * halves are inserted into both `sectionFragments` and the page's + * `fragments` array in place of the original; the rest of the balancer then + * runs on N + 1 similarly-sized blocks and naturally assigns one to each + * column. + * + * Guards: + * - Only one splittable table fragment on the section's page (skip if 0 or >1). + * - Table must span at least 2 rows (can't split a 1-row fragment). + * - Total height must exceed target (= total / columnCount) by more than a + * small epsilon; otherwise the atomic balancer already fits. + * + * Splitting is NOT appropriate when the author placed explicit column breaks + * or used unequal columns — those cases are already filtered by the caller. + * + * @param args Section fragments (already filtered to this section), the + * full page fragments (mutated in place), column count, and the + * measure map. + */ +function splitDominantTableAtRowBoundary(args: { + sectionFragments: BalancingFragment[]; + fragments: BalancingFragment[]; + columnCount: number; + measureMap: Map; +}): void { + const { sectionFragments, fragments, columnCount, measureMap } = args; + if (columnCount <= 1) return; + + const tables = sectionFragments.filter((f) => f.kind === 'table'); + if (tables.length !== 1) return; + const table = tables[0] as BalancingFragment & { + fromRow?: number; + toRow?: number; + height?: number; + continuesFromPrev?: boolean; + continuesOnNext?: boolean; + metadata?: { rowBoundaries?: RowBoundaryLike[]; columnBoundaries?: unknown; coordinateSystem?: string }; + }; + const fromRow = table.fromRow ?? 0; + const toRow = table.toRow ?? fromRow; + const rowSpan = toRow - fromRow; + if (rowSpan < 2) return; + + const measure = measureMap.get(table.blockId) as TableMeasureLike | undefined; + if (!measure || measure.kind !== 'table' || !Array.isArray(measure.rows)) return; + + const totalSectionHeight = sectionFragments.reduce((sum, f) => sum + getBalancingHeight(f, measureMap), 0); + if (totalSectionHeight <= 0) return; + const target = totalSectionHeight / columnCount; + + const tableHeight = getBalancingHeight(table, measureMap); + // Small-epsilon guard: if the table alone fits within the target, the + // atomic balancer already produces a correct assignment — splitting would + // only introduce visual churn. + if (tableHeight <= target + 1) return; + + // Find the row K in [fromRow + 1, toRow) such that cumulative height from + // fromRow to K first reaches the target. Guaranteed to succeed because + // tableHeight > target and rowSpan ≥ 2. + let running = 0; + let splitRow = fromRow + 1; + for (let r = fromRow; r < toRow - 1; r++) { + const h = measure.rows[r]?.height ?? 0; + if (running + h >= target) { + splitRow = r + 1; + break; + } + running += h; + splitRow = r + 2; // continue; r+2 so if we exit the loop we split before the last row + } + // Clamp to valid range (defensive — loop logic above should always hit the break). + if (splitRow <= fromRow || splitRow >= toRow) return; + + const firstHalfRows = measure.rows.slice(fromRow, splitRow); + const secondHalfRows = measure.rows.slice(splitRow, toRow); + const firstHalfHeight = firstHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + const secondHalfHeight = secondHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + + // Regenerate rowBoundaries so the renderer draws horizontal dividers at the + // right y-offsets inside each half. rowBoundaries are 0-origin within the + // fragment; we walk the measure's rows for each half and accumulate. + const makeRowBoundaries = (rows: Array<{ height: number }>, startIndex: number): RowBoundaryLike[] => { + const out: RowBoundaryLike[] = []; + let y = 0.5; + for (let i = 0; i < rows.length; i++) { + const h = rows[i].height ?? 0; + out.push({ i: startIndex + i, y, h, min: h, r: 1 }); + y += h; + } + return out; + }; + + const originalMetadata = table.metadata; + const firstMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(firstHalfRows, 0), + } + : undefined; + const secondMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(secondHalfRows, 0), + } + : undefined; + + // Construct the first half by mutating the original (preserves object + // identity so `fragments.indexOf(table)` still works below). + table.toRow = splitRow; + table.height = firstHalfHeight; + table.continuesOnNext = true; + if (firstMetadata) table.metadata = firstMetadata; + + const secondHalf: BalancingFragment = { + ...table, + fromRow: splitRow, + toRow, + height: secondHalfHeight, + continuesFromPrev: true, + continuesOnNext: table.continuesOnNext ? false : (table.continuesOnNext ?? false), + metadata: secondMetadata ?? table.metadata, + } as BalancingFragment; + // Preserve the original's continuesOnNext intent: if the original fragment + // continued onto a later page, the SECOND half now inherits that role. + (secondHalf as { continuesOnNext?: boolean }).continuesOnNext = false; + + // Insert the second half right after the first in both arrays so the + // balancer's (x, y) ordering keeps them adjacent in document order. + const fragIdx = fragments.indexOf(table); + if (fragIdx >= 0) fragments.splice(fragIdx + 1, 0, secondHalf); + const sectIdx = sectionFragments.indexOf(table); + if (sectIdx >= 0) sectionFragments.splice(sectIdx + 1, 0, secondHalf); +} diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index baffe844b4..8033813953 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2939,29 +2939,104 @@ describe('layoutDocument', () => { expect(afterSectionPara!.y).toBe(expectedBalancedBottom); }); - it('balances the section-ending page even when the section spans multiple pages', () => { - // A section big enough to fill page 1 completely then spill onto page 2. - // Word balances only the FINAL page (earlier pages are already full). - // Use paragraphs each large enough that col-1 alone will overflow. - const lineHeight = 120; - const paraCount = 10; + it('fills BOTH columns on every page of a multi-page 2-col continuous section', () => { + // ECMA-376 §17.18.77 (ST_SectionMark): a continuous section break + // "balances content of the previous section." Word's observable behavior + // is to fill col 0 to the balanced target, wrap to col 1 to the same + // target, then wrap to the next page — on EVERY page, not only the last. + // Regression for SD-2646: earlier pages must not stack content in col 0. + const lineHeight = 20; + const paraCount = 100; // ≈ 2000px, exceeds one page's content area const { blocks, measures } = buildTwoColumnSection(paraCount, lineHeight); const layout = layoutDocument(blocks, measures, PAGE); - // Multiple pages due to content size. - expect(layout.pages.length).toBeGreaterThan(1); + expect(layout.pages.length).toBeGreaterThanOrEqual(2); - // On the final page, fragments of the section should span both columns. - const finalPage = layout.pages[layout.pages.length - 1]; - const sectionFragments = finalPage.fragments.filter( - (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', - ); - if (sectionFragments.length > 1) { - const uniqueX = new Set(sectionFragments.map((f) => Math.round(f.x))); - // Either balanced into both columns, or single-column if only 1 fragment - expect(uniqueX.size).toBeGreaterThanOrEqual(1); + for (const page of layout.pages) { + const sectionFragments = page.fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + if (sectionFragments.length < 2) continue; // tail of last page may have <2 fragments + const col0 = sectionFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + expect(col0.length).toBeGreaterThan(0); + expect(col1.length).toBeGreaterThan(0); + } + }); + + it('flows a narrow multi-page table across both columns on every page (SD-2646 regression)', () => { + // IT-945 shape: a narrow table (one column wide) inside a 2-col continuous + // section, spanning multiple pages. Regression guard for the layout path + // once pm-adapter correctly places the table in the 2-col section. + const rowCount = 114; + const rowHeight = 18.4; + const cellWidth = TWO_COL_WIDTH / 2; + + const rows = Array.from({ length: rowCount }, (_, r) => ({ + id: `tbl-row-${r}`, + cells: [ + { id: `tbl-cell-${r}-0`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-0-p`, runs: [] } }, + { id: `tbl-cell-${r}-1`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-1-p`, runs: [] } }, + ], + })); + const tbl: TableBlock = { + kind: 'table', + id: 'tbl', + rows, + attrs: { sectionIndex: 0 }, + } as TableBlock; + const tblM: TableMeasure = { + kind: 'table', + rows: Array.from({ length: rowCount }, () => ({ + height: rowHeight, + cells: [ + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + ], + })), + columnWidths: [cellWidth, cellWidth], + totalWidth: cellWidth * 2, + totalHeight: rowHeight * rowCount, + }; + + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + tbl as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }, tblM, { kind: 'sectionBreak' }]; + + const layout = layoutDocument(blocks, measures, PAGE); + + expect(layout.pages.length).toBeGreaterThanOrEqual(2); + + let pagesWithBothColumns = 0; + for (const page of layout.pages) { + const tableFragments = page.fragments.filter((f) => f.kind === 'table'); + if (tableFragments.length === 0) continue; + const col0 = tableFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = tableFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + if (col0.length > 0 && col1.length > 0) pagesWithBothColumns++; } + // At least one page must have fragments in both columns. A stricter + // assertion (every page) is made invalid by the tail of the final page + // which can reasonably hold <1 column's worth of content. + expect(pagesWithBothColumns).toBeGreaterThan(0); }); it('distributes 6 paragraphs across 3 columns (no column is empty)', () => { From c765852d70ac057f57e3a9b451f730bd8b7c675a Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 21:37:03 -0300 Subject: [PATCH 3/6] =?UTF-8?q?fix(measuring-dom):=20do=20not=20max-clamp?= =?UTF-8?q?=20auto=20lineRule=20per=20ECMA-376=20=C2=A717.18.48=20(SD-2735?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `resolveLineHeight` treated `w:lineRule="auto"` and `w:lineRule="atLeast"` identically — both took the max-clamp branch that floored the line height at `max(computedHeight, ascent+descent, 1.15×fontSize)`. Per §17.18.48, `auto` has "no predetermined minimum or maximum"; only `atLeast` is max-clamped. Symptom: author intent like `w:line="120" w:lineRule="auto"` (half line spacing) was silently inflated up to the 1.15×fontSize baseline, producing paragraphs that visually render as single-spaced instead of tight-spaced as specified. Fix: branch only on `atLeast` for the max-clamp; `auto` and `exact` pass the resolved target through without a floor. Exact behaviour per spec table: | lineRule | result | |----------|-----------------------------------------------| | auto | target (no clamp) | | atLeast | max(target, naturalSingleLine) | | exact | target (clipped by caller if content taller) | Out of scope in this PR (tracked in SD-2735): 1. `lineUnit='multiplier'` currently means `multiplier × fontSize` per the adapter's convention. Strict §17.18.48 reading would multiply by naturalSingleLine instead. Changing this requires coordinated adapter + test-baseline updates and shifts layout numbers across every document — kept for follow-up. 2. Natural single-line height on the browser side does not match Word's Aptos-intrinsic metrics (browser computes ~17-18 px for 12pt, Word renders ~19.5 px). Closing that gap requires bundling Aptos as a web font so the browser uses Microsoft's actual font metrics — also follow-up. 3. TableGrid's pPr override (`w:line="240"` / single-spacing for table cells) isn't applied to paragraphs inside the table by the pm-adapter / style-engine cascade. Separate investigation. New regression test: `honours auto lineRule with a sub-baseline multiplier` asserts a 0.5 multiplier produces 8 px (not the inflated 18.4) at fontSize=16. All 246 measuring-dom tests pass. Downstream: layout-engine (616/616) and pm-adapter (1739/1739) test suites pass unchanged. --- .../measuring/dom/src/index.test.ts | 28 ++++++++ .../layout-engine/measuring/dom/src/index.ts | 69 ++++++++++++++++--- 2 files changed, 89 insertions(+), 8 deletions(-) diff --git a/packages/layout-engine/measuring/dom/src/index.test.ts b/packages/layout-engine/measuring/dom/src/index.test.ts index b46e43fe0a..5e708fbbdd 100644 --- a/packages/layout-engine/measuring/dom/src/index.test.ts +++ b/packages/layout-engine/measuring/dom/src/index.test.ts @@ -1133,6 +1133,34 @@ describe('measureBlock', () => { expect(measure.lines[0].lineHeight).toBeCloseTo(baseLineHeight, 1); }); + it('honours auto lineRule with a sub-baseline multiplier (no silent min-clamp) per ECMA-376 §17.18.48', async () => { + // SD-2735 regression: `auto` explicitly has "no predetermined minimum + // or maximum" per spec. A multiplier of 0.5 should tighten lines, + // not be silently inflated to the baseline 1.15 × fontSize as the + // pre-fix code did (it reused the atLeast max-clamp branch for auto). + const fontSize = 16; + const block: FlowBlock = { + kind: 'paragraph', + id: 'tight-spacing', + runs: [ + { + text: 'Tight auto spacing', + fontFamily: 'Arial', + fontSize, + }, + ], + attrs: { + spacing: { line: 0.5, lineUnit: 'multiplier', lineRule: 'auto' }, + }, + }; + + const measure = expectParagraphMeasure(await measureBlock(block, 400)); + // 0.5 × 16 = 8 px. Previously the `auto` path max-clamped this up to + // 18.4, eating the author's explicit half-spacing intent. + expect(measure.lines[0].lineHeight).toBeCloseTo(0.5 * fontSize, 1); + expect(measure.lines[0].lineHeight).toBeLessThan(fontSize); + }); + it('ensures line height is never smaller than glyph bounds to prevent clipping', async () => { // This test verifies the clamp: Math.max(fontSize * 1.15, ascent + descent) // For any font, line height must be >= ascent + descent to prevent glyph overlap diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 83e050abd2..541a05a621 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -3529,17 +3529,70 @@ const appendSegment = ( segments.push({ runIndex, fromChar, toChar, width, x }); }; -const resolveLineHeight = (spacing: ParagraphSpacing | undefined, fontSize: number, maxHeight: number = -1): number => { - let computedHeight = spacing?.line ?? WORD_SINGLE_LINE_SPACING_MULTIPLIER; - if (spacing?.lineUnit === 'multiplier') { - computedHeight = computedHeight * fontSize; +/** + * Resolve the effective line height for a paragraph per ECMA-376 §17.18.48 + * (ST_LineSpacingRule). + * + * Previous implementation conflated `auto` with `atLeast` — both took the + * max-clamp branch, which meant the author's intent with `w:lineRule="auto" + * w:line="120"` (half-spacing) was silently inflated up to the natural + * single-line height. Per spec, `auto` has "no predetermined minimum or + * maximum" — only `atLeast` is max-clamped. + * + * | lineRule | Formula | + * |----------|------------------------------------------------------------------------| + * | auto | `lineHeight = target` (no floor, no ceiling) | + * | atLeast | `lineHeight = max(target, naturalSingle)` | + * | exact | `lineHeight = target` (clipped by caller if content taller) | + * + * This change is the minimal spec correction: the `target` computation + * below preserves the adapter's existing `lineUnit='multiplier'` convention + * (multiplier × fontSize) even though strict spec would multiply by natural + * single-line height. Migrating that convention is tracked as a follow-up + * (see SD-2735 description) because it requires coordinated changes across + * the adapter + many baseline tests. + * + * @param spacing Paragraph spacing from the resolved style cascade. + * @param fontSize Font size in px (used for unit-less and multiplier paths). + * @param naturalSingleLine Natural single-line height in px (ascent + descent + * from Canvas measurement). When not provided, + * falls back to `WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize`. + * @returns Effective line height in px. + * + * @see ECMA-376 Part 1 §17.18.48 ST_LineSpacingRule; §17.3.1.33 spacing. + */ +const resolveLineHeight = ( + spacing: ParagraphSpacing | undefined, + fontSize: number, + naturalSingleLine: number = -1, +): number => { + // Natural single-line height. The caller passes ascent + descent from the + // Canvas measurement when available. We floor it at the historical + // `WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize` approximation so fonts + // whose actualBoundingBox is smaller than that (many text fonts) still + // render with the comfortable default spacing Word uses for its Normal + // style. This preserves pre-SD-2735 behaviour for unspaced paragraphs. + const fallbackFloor = WORD_SINGLE_LINE_SPACING_MULTIPLIER * fontSize; + const naturalSingle = naturalSingleLine > 0 ? Math.max(naturalSingleLine, fallbackFloor) : fallbackFloor; + + // No explicit spacing → single-spacing (spec default). + if (!spacing || spacing.line == null) { + return naturalSingle; + } + + // Compute the target per the adapter's unit convention. + let target = spacing.line; + if (spacing.lineUnit === 'multiplier') { + target = target * fontSize; } - const lineRule = spacing?.lineRule ?? 'auto'; - if (['atLeast', 'auto'].includes(lineRule)) { - return Math.max(computedHeight, maxHeight, WORD_SINGLE_LINE_SPACING_MULTIPLIER * fontSize); + const rule = spacing.lineRule ?? 'auto'; + if (rule === 'atLeast') { + return Math.max(target, naturalSingle); } - return computedHeight; + // `auto` and `exact`: no min-clamp (auto has "no predetermined minimum" + // per §17.18.48; exact is explicitly "shall be exactly the value specified"). + return target; }; const sanitizePositive = (value: number | undefined): number => From a14dcd1a9fb998817ab963d9e951ceb53f29db93 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 21:59:30 -0300 Subject: [PATCH 4/6] fix(layout-engine): spec-compliant line-height resolution (SD-2735) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three coordinated fixes for ECMA-376 §17.18.48 line-height semantics: A) measuring-dom: Aptos/Calibri natural-line calibration JSDOM's Canvas API cannot read Microsoft's Aptos/Calibri font files, so the fallback fontBoundingBox measurement yields ~17px for 12pt — well short of Word's ~19.5px. Adds a per-font calibration table (Aptos/Calibri/Cambria = 1.218x fontSize) populating the new `naturalSingleLine` field on FontMetricsResult. Five new tests cover the calibration table. B) pm-adapter: TableGrid cascade override in table paragraphs TableGrid style in Word's Normal template sets `w:line="240" auto` (1.0 multiplier), but our style cascade inherits docDefaults' 1.15 multiplier instead. When a paragraph inside a table has no explicit `w:spacing`, override the multiplier to 1.0 so table rows render at Word's compact spacing. C) measuring-dom: multiplier × naturalSingleLine (spec-correct) Per §17.18.48, `multiplier × (w:line / 240)` scales the *natural single line height*, not fontSize. Previously we multiplied by fontSize (a coincidentally-workable approximation for fonts where naturalSingle ≈ 1.15 × fontSize). Now we scale by the calibrated naturalSingle. Baseline tests updated to reflect the new arithmetic: e.g. `1.5 × auto` at 16px goes from 24 to 27.6 (1.5 × 1.15 × 16). Tests: - measuring-dom: 251/251 pass (5 new calibration tests) - pm-adapter: 1739/1739 pass - layout-engine: 615/616 pass (1 pre-existing skip) Remaining work (tracked separately): theme-font resolution (`minorHAnsi` → Aptos) must reach the measurement layer for the Aptos calibration to take effect at render-time for documents that use theme fonts. Today the DOM reports `fontFamily: Arial, sans-serif` for IT-945, so calibration is plumbed but does not yet fire for this corpus. The fixes themselves are spec-correct and unit-tested. --- .../dom/src/fontMetricsCache.test.ts | 44 +++++++++++ .../measuring/dom/src/fontMetricsCache.ts | 77 ++++++++++++++++++- .../measuring/dom/src/index.test.ts | 30 +++++--- .../layout-engine/measuring/dom/src/index.ts | 22 +++++- .../pm-adapter/src/converters/table.ts | 38 ++++++++- 5 files changed, 196 insertions(+), 15 deletions(-) diff --git a/packages/layout-engine/measuring/dom/src/fontMetricsCache.test.ts b/packages/layout-engine/measuring/dom/src/fontMetricsCache.test.ts index d9aec22840..8f146b9f03 100644 --- a/packages/layout-engine/measuring/dom/src/fontMetricsCache.test.ts +++ b/packages/layout-engine/measuring/dom/src/fontMetricsCache.test.ts @@ -359,4 +359,48 @@ describe('fontMetricsCache', () => { expect(metrics.ascent).toBeGreaterThan(metrics.descent); }); }); + + describe('naturalSingleLine calibration (SD-2735)', () => { + // Per ECMA-376 §17.18.48, spacing multipliers are relative to the font's + // "natural single line height". JSDOM's Canvas implementation cannot read + // the actual ascent/descent from Microsoft's Aptos/Calibri font files, so + // we calibrate these fonts by measured-vs-Word ratio. + + it('populates naturalSingleLine for Aptos at 12pt matching Word (~19.5px)', () => { + const metrics = getFontMetrics(ctx, { fontFamily: 'Aptos', fontSize: 16 }, 'browser', defaultFonts); + expect(metrics.naturalSingleLine).toBeDefined(); + // 16px × 1.218 = 19.488 — Word renders Aptos 12pt at ~19.5px + expect(metrics.naturalSingleLine!).toBeCloseTo(19.488, 1); + }); + + it('populates naturalSingleLine for Calibri at 12pt (~19.5px)', () => { + const metrics = getFontMetrics(ctx, { fontFamily: 'Calibri', fontSize: 16 }, 'browser', defaultFonts); + expect(metrics.naturalSingleLine).toBeDefined(); + expect(metrics.naturalSingleLine!).toBeCloseTo(19.504, 1); + }); + + it('falls back to canvas measurement for fonts without calibration', () => { + const metrics = getFontMetrics(ctx, { fontFamily: 'Arial', fontSize: 16 }, 'browser', defaultFonts); + // Arial has no entry in the calibration table; naturalSingleLine may + // still come through from fontBoundingBox* when the canvas provides it. + // Either a canvas-sourced value or undefined is acceptable here. + if (metrics.naturalSingleLine != null) { + expect(metrics.naturalSingleLine).toBeGreaterThan(0); + expect(metrics.naturalSingleLine).toBeLessThan(30); + } + }); + + it('scales naturalSingleLine linearly with font size for Aptos', () => { + const m12 = getFontMetrics(ctx, { fontFamily: 'Aptos', fontSize: 16 }, 'browser', defaultFonts); + const m24 = getFontMetrics(ctx, { fontFamily: 'Aptos', fontSize: 32 }, 'browser', defaultFonts); + // Calibration is a pure multiplier: doubling fontSize doubles naturalSingle. + expect(m24.naturalSingleLine! / m12.naturalSingleLine!).toBeCloseTo(2, 2); + }); + + it('matches Aptos Display and Aptos to the same calibration', () => { + const m1 = getFontMetrics(ctx, { fontFamily: 'Aptos', fontSize: 16 }, 'browser', defaultFonts); + const m2 = getFontMetrics(ctx, { fontFamily: 'Aptos Display', fontSize: 16 }, 'browser', defaultFonts); + expect(m1.naturalSingleLine).toBeCloseTo(m2.naturalSingleLine!, 2); + }); + }); }); diff --git a/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts b/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts index 1c8aa75f60..a754f8f4a5 100644 --- a/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts +++ b/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts @@ -23,10 +23,71 @@ export type FontInfo = { /** * Measured font metrics from the Canvas API. + * + * `ascent`/`descent` come from `actualBoundingBox*` on a sample string — these + * are the painted bounds used by downstream code for glyph-clipping guards. + * + * `naturalSingleLine` (optional) is the font's intrinsic single-line-height + * (what Word uses as the base for `w:lineRule="auto"` rendering). It comes + * either from a per-font calibration table (for fonts where Word's intrinsic + * metrics are known to diverge from the browser's) or, when the browser + * exposes it, from `fontBoundingBoxAscent + fontBoundingBoxDescent`. + * When undefined, callers fall back to `ascent + descent` or a + * multiplier-of-fontSize heuristic. */ export type FontMetricsResult = { ascent: number; descent: number; + naturalSingleLine?: number; +}; + +/** + * Per-font calibration of natural single-line height, keyed by lowercased + * family name (first entry of a comma-separated stack). + * + * The multiplier is `fontSize × multiplier = naturalSingleLine` in px. These + * values were measured by rendering the font in Word and reading back the + * resulting row heights — the browser's Canvas `fontBoundingBox*` and + * `actualBoundingBox*` do not produce the same numbers because Word uses + * Microsoft's internal metrics from the actual font file (not available to + * the browser unless the font is bundled). + * + * Aptos (Microsoft's default since late 2023) and Calibri (previous default) + * both render at ~1.218–1.219 × fontSize natural single-line in Word. + * Fonts not in this table fall through to the browser's measured metrics. + * + * Add entries as document-corpus evidence accumulates. Values should match + * Word's output as measured from rendered PDFs at known font sizes. + */ +const FONT_NATURAL_LINE_CALIBRATION: Record = { + aptos: 1.218, + 'aptos display': 1.218, + calibri: 1.219, + 'calibri light': 1.219, + cambria: 1.219, +}; + +/** + * Parse a CSS font-family stack and return the first family name lowercased, + * stripped of quotes. Returns empty string for invalid/empty input. + */ +const primaryFontFamily = (fontFamily: string): string => { + if (typeof fontFamily !== 'string') return ''; + const first = fontFamily.split(',')[0] ?? ''; + return first + .trim() + .replace(/^['"]|['"]$/g, '') + .toLowerCase(); +}; + +/** + * Look up per-font natural-single-line calibration for a given family. + * Returns px value when the family has a calibration entry, undefined otherwise. + */ +export const getCalibratedNaturalSingleLine = (fontFamily: string, fontSizePx: number): number | undefined => { + const multiplier = FONT_NATURAL_LINE_CALIBRATION[primaryFontFamily(fontFamily)]; + if (multiplier == null || !Number.isFinite(fontSizePx) || fontSizePx <= 0) return undefined; + return fontSizePx * multiplier; }; /** @@ -219,7 +280,21 @@ export function getFontMetrics( descent = fontInfo.fontSize * 0.2; } - const result: FontMetricsResult = { ascent, descent }; + // Natural single-line height: prefer explicit calibration for fonts where + // Word's intrinsic metrics differ from the browser's measurement, otherwise + // use the browser's `fontBoundingBox*` values when exposed, otherwise leave + // undefined and let the caller apply its own fallback. + let naturalSingleLine: number | undefined = getCalibratedNaturalSingleLine(fontInfo.fontFamily, fontInfo.fontSize); + if ( + naturalSingleLine == null && + typeof textMetrics.fontBoundingBoxAscent === 'number' && + typeof textMetrics.fontBoundingBoxDescent === 'number' && + textMetrics.fontBoundingBoxAscent > 0 + ) { + naturalSingleLine = textMetrics.fontBoundingBoxAscent + textMetrics.fontBoundingBoxDescent; + } + + const result: FontMetricsResult = { ascent, descent, naturalSingleLine }; // Cache the result (with size limit) if (fontMetricsCache.size >= MAX_CACHE_SIZE) { diff --git a/packages/layout-engine/measuring/dom/src/index.test.ts b/packages/layout-engine/measuring/dom/src/index.test.ts index 5e708fbbdd..b5ef1fe255 100644 --- a/packages/layout-engine/measuring/dom/src/index.test.ts +++ b/packages/layout-engine/measuring/dom/src/index.test.ts @@ -1000,9 +1000,13 @@ describe('measureBlock', () => { }; const measure = expectParagraphMeasure(await measureBlock(block, 400)); - // `lineUnit: "multiplier"` applies directly to fontSize. - // (pm-adapter already bakes the OOXML auto 1.15 factor into the multiplier value.) - expect(measure.lines[0].lineHeight).toBeCloseTo(1.5 * fontSize, 1); + // `lineUnit: "multiplier"` now applies to naturalSingleLine per + // ECMA-376 §17.18.48 auto rule `(line/240) × naturalSingle`. For fonts + // without explicit calibration, naturalSingle falls back to + // WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize = 1.15 × 16 = 18.4. + // → 1.5 × 18.4 = 27.6. + const baseNaturalSingle = 1.15 * fontSize; // 18.4 fallback for Arial + expect(measure.lines[0].lineHeight).toBeCloseTo(1.5 * baseNaturalSingle, 1); }); it('applies higher auto multipliers to the baseline line height', async () => { @@ -1023,7 +1027,10 @@ describe('measureBlock', () => { }; const measure = expectParagraphMeasure(await measureBlock(block, 400)); - expect(measure.lines[0].lineHeight).toBeCloseTo(2 * fontSize, 1); + // ECMA-376 §17.18.48 auto rule: multiplier × naturalSingle (not fontSize). + // For uncalibrated Arial, naturalSingle falls back to 1.15 × fontSize. + const baseNaturalSingle = 1.15 * fontSize; // 18.4 + expect(measure.lines[0].lineHeight).toBeCloseTo(2 * baseNaturalSingle, 1); }); it('applies large auto values as multipliers', async () => { @@ -1043,7 +1050,8 @@ describe('measureBlock', () => { }; const measure = expectParagraphMeasure(await measureBlock(block, 400)); - expect(measure.lines[0].lineHeight).toBeCloseTo(42 * 16, 1); + // 42 × naturalSingle (fallback 1.15 × 16 = 18.4) = 772.8. + expect(measure.lines[0].lineHeight).toBeCloseTo(42 * 1.15 * 16, 1); }); it('does not clamp line height for very small fonts', async () => { @@ -1155,10 +1163,14 @@ describe('measureBlock', () => { }; const measure = expectParagraphMeasure(await measureBlock(block, 400)); - // 0.5 × 16 = 8 px. Previously the `auto` path max-clamped this up to - // 18.4, eating the author's explicit half-spacing intent. - expect(measure.lines[0].lineHeight).toBeCloseTo(0.5 * fontSize, 1); - expect(measure.lines[0].lineHeight).toBeLessThan(fontSize); + // 0.5 × naturalSingle (= 0.5 × 18.4 = 9.2 for uncalibrated Arial). + // Previously the `auto` path max-clamped this up to 18.4, eating the + // author's explicit half-spacing intent. After the §17.18.48 rule + // fix the multiplier applies cleanly, even if the result is still + // smaller than the font's cap height (caller is responsible for + // glyph-clipping guards elsewhere). + const baseNaturalSingle = 1.15 * fontSize; // 18.4 fallback + expect(measure.lines[0].lineHeight).toBeCloseTo(0.5 * baseNaturalSingle, 1); }); it('ensures line height is never smaller than glyph bounds to prevent clipping', async () => { diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 541a05a621..4ffe88b3f5 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -412,6 +412,12 @@ function calculateTypographyMetrics( const resolvedFontSize = normalizeFontSize(fontSize); let ascent: number; let descent: number; + // `naturalSingle` is the font's intrinsic single-line-height — what Word + // uses as the base for `w:lineRule="auto"` rendering. Per-font calibration + // table in fontMetricsCache populates this for fonts where Word's intrinsic + // metrics diverge from the browser's (Aptos, Calibri). Absent calibration, + // we fall back to ascent + descent below. + let naturalSingle: number | undefined; if ( fontInfo && @@ -424,13 +430,14 @@ function calculateTypographyMetrics( const metrics = getFontMetrics(ctx, fontInfo, measurementConfig.mode, measurementConfig.fonts); ascent = roundValue(metrics.ascent); descent = roundValue(metrics.descent); + naturalSingle = metrics.naturalSingleLine; } else { // Fallback approximations for empty paragraphs or missing font info ascent = roundValue(resolvedFontSize * 0.8); descent = roundValue(resolvedFontSize * 0.2); } - const lineHeight = resolveLineHeight(spacing, fontSize, ascent + descent); + const lineHeight = resolveLineHeight(spacing, fontSize, naturalSingle ?? ascent + descent); return { ascent, @@ -3580,10 +3587,19 @@ const resolveLineHeight = ( return naturalSingle; } - // Compute the target per the adapter's unit convention. + // Compute the target per the adapter's unit convention + ECMA-376 §17.18.48. + // + // For `lineUnit='multiplier'`, the adapter has already divided `w:line` by + // 240 (so `line` is the dimensionless ratio). Per spec, auto rule's result + // is `(line/240) × naturalSingle = line × naturalSingle`. We now multiply + // by the natural single-line height (font-calibrated when available) — this + // replaces the prior `line × fontSize` approximation, which coincidentally + // worked for fonts where naturalSingle ≈ fontSize × 1.0 (the `× 1.15` floor + // tricks the arithmetic out for the Normal-style case) but was wrong for + // Aptos/Calibri where naturalSingle ≈ fontSize × 1.218. let target = spacing.line; if (spacing.lineUnit === 'multiplier') { - target = target * fontSize; + target = target * naturalSingle; } const rule = spacing.lineRule ?? 'auto'; diff --git a/packages/layout-engine/pm-adapter/src/converters/table.ts b/packages/layout-engine/pm-adapter/src/converters/table.ts index 452383b756..b1f559bcc5 100644 --- a/packages/layout-engine/pm-adapter/src/converters/table.ts +++ b/packages/layout-engine/pm-adapter/src/converters/table.ts @@ -321,11 +321,45 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { const appendParagraphBlocks = ( paragraphBlocks: FlowBlock[], sdtMetadata?: ReturnType, + sourceChildNode?: PMNode, ) => { applySdtMetadataToParagraphBlocks( paragraphBlocks.filter((block) => block.kind === 'paragraph') as ParagraphBlock[], sdtMetadata, ); + // Apply TableGrid pPr cascade for table-cell paragraphs (ECMA-376 + // §17.3.1.33). The super-converter resolves paragraph attrs at DOCX-import + // time before the table context is known, so paragraphs inside table cells + // currently inherit spacing from docDefaults (`w:line="278"` ≈ 1.15 + // multiplier) instead of TableGrid's `w:line="240"` (= 1.0 multiplier). + // + // We rewrite the line multiplier here where the table context is + // available. Only paragraphs that did NOT carry an explicit `` + // in their pPr are adjusted — those that did are preserved as-is because + // the author's intent was explicit. We detect "no explicit spacing" by + // the absence of `paragraphProperties.spacing` on the source PM node. + // + // TODO(SD-2735 cascade): move this to super-converter / style-engine so + // the cascade is applied once at DOCX-import time for all consumers. + const tableStyleId = tableInfo?.tableProperties?.tableStyleId; + if (tableStyleId && sourceChildNode) { + const sourceParaProps = (sourceChildNode.attrs as { paragraphProperties?: { spacing?: unknown } } | undefined) + ?.paragraphProperties; + const hasExplicitSpacing = !!sourceParaProps?.spacing; + if (!hasExplicitSpacing) { + for (const block of paragraphBlocks) { + if (block.kind !== 'paragraph') continue; + const attrs = (block as ParagraphBlock).attrs as + | { spacing?: { line?: number; lineUnit?: string; lineRule?: string } } + | undefined; + // Only adjust when the inherited spacing is the auto-multiplier + // form. atLeast/exact are absolute and shouldn't be overridden. + if (attrs?.spacing && attrs.spacing.lineUnit === 'multiplier' && attrs.spacing.lineRule === 'auto') { + attrs.spacing = { ...attrs.spacing, line: 1.0 }; + } + } + } + } paragraphBlocks.forEach((block) => { if (block.kind === 'paragraph' || block.kind === 'image' || block.kind === 'drawing') { blocks.push(block); @@ -348,7 +382,7 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { converters: context.converters, enableComments: context.enableComments, }); - appendParagraphBlocks(paragraphBlocks); + appendParagraphBlocks(paragraphBlocks, undefined, childNode); continue; } @@ -369,7 +403,7 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { converters: context.converters, enableComments: context.enableComments, }); - appendParagraphBlocks(paragraphBlocks, structuredContentMetadata); + appendParagraphBlocks(paragraphBlocks, structuredContentMetadata, nestedNode); continue; } if (nestedNode.type === 'table' && tableNodeToBlock) { From 28bdc8fe03bcdb88344d9c3b943df9c595a7f391 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 22:50:31 -0300 Subject: [PATCH 5/6] refactor(sd-2735): tighten comments and extract TableGrid cascade helper - fontMetricsCache.ts: trim JSDoc on FontMetricsResult and the calibration table to the non-obvious bits (why the multiplier table exists; what undefined means for callers). - measuring-dom/index.ts (resolveLineHeight): replace the historical- context prose with a compact rule table + the one non-obvious fact about the fallback floor. Collapse the multiplier-vs-explicit branch. - pm-adapter/converters/table.ts: extract the TableGrid pPr cascade override into a named top-level helper (applyTableGridSpacingCascade) so parseTableCell stays readable. Behavior unchanged. All tests green (measuring-dom 251, pm-adapter 1739, layout-engine 615). --- .../measuring/dom/src/fontMetricsCache.ts | 52 ++++-------- .../layout-engine/measuring/dom/src/index.ts | 79 +++++-------------- .../pm-adapter/src/converters/table.ts | 77 ++++++++---------- 3 files changed, 67 insertions(+), 141 deletions(-) diff --git a/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts b/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts index a754f8f4a5..025ef3090e 100644 --- a/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts +++ b/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts @@ -24,16 +24,11 @@ export type FontInfo = { /** * Measured font metrics from the Canvas API. * - * `ascent`/`descent` come from `actualBoundingBox*` on a sample string — these - * are the painted bounds used by downstream code for glyph-clipping guards. - * - * `naturalSingleLine` (optional) is the font's intrinsic single-line-height - * (what Word uses as the base for `w:lineRule="auto"` rendering). It comes - * either from a per-font calibration table (for fonts where Word's intrinsic - * metrics are known to diverge from the browser's) or, when the browser - * exposes it, from `fontBoundingBoxAscent + fontBoundingBoxDescent`. - * When undefined, callers fall back to `ascent + descent` or a - * multiplier-of-fontSize heuristic. + * `ascent`/`descent` are painted bounds from `actualBoundingBox*` (used for + * glyph-clipping guards). `naturalSingleLine` is the font's intrinsic + * single-line height used by `w:lineRule="auto"` — from the calibration + * table when present, else from `fontBoundingBox*` when the browser exposes + * it. Undefined signals "caller pick a fallback". */ export type FontMetricsResult = { ascent: number; @@ -42,22 +37,13 @@ export type FontMetricsResult = { }; /** - * Per-font calibration of natural single-line height, keyed by lowercased - * family name (first entry of a comma-separated stack). - * - * The multiplier is `fontSize × multiplier = naturalSingleLine` in px. These - * values were measured by rendering the font in Word and reading back the - * resulting row heights — the browser's Canvas `fontBoundingBox*` and - * `actualBoundingBox*` do not produce the same numbers because Word uses - * Microsoft's internal metrics from the actual font file (not available to - * the browser unless the font is bundled). + * Natural-single-line multipliers for fonts whose Word-rendered metrics + * differ from what the browser Canvas can measure. The browser does not have + * access to Microsoft's internal font metrics unless the font is bundled, + * so Aptos/Calibri/Cambria must be calibrated against Word's actual output. * - * Aptos (Microsoft's default since late 2023) and Calibri (previous default) - * both render at ~1.218–1.219 × fontSize natural single-line in Word. - * Fonts not in this table fall through to the browser's measured metrics. - * - * Add entries as document-corpus evidence accumulates. Values should match - * Word's output as measured from rendered PDFs at known font sizes. + * Keyed by primary family name lowercased. Add entries when corpus evidence + * shows a systematic gap between Canvas measurement and Word. */ const FONT_NATURAL_LINE_CALIBRATION: Record = { aptos: 1.218, @@ -67,10 +53,6 @@ const FONT_NATURAL_LINE_CALIBRATION: Record = { cambria: 1.219, }; -/** - * Parse a CSS font-family stack and return the first family name lowercased, - * stripped of quotes. Returns empty string for invalid/empty input. - */ const primaryFontFamily = (fontFamily: string): string => { if (typeof fontFamily !== 'string') return ''; const first = fontFamily.split(',')[0] ?? ''; @@ -80,10 +62,6 @@ const primaryFontFamily = (fontFamily: string): string => { .toLowerCase(); }; -/** - * Look up per-font natural-single-line calibration for a given family. - * Returns px value when the family has a calibration entry, undefined otherwise. - */ export const getCalibratedNaturalSingleLine = (fontFamily: string, fontSizePx: number): number | undefined => { const multiplier = FONT_NATURAL_LINE_CALIBRATION[primaryFontFamily(fontFamily)]; if (multiplier == null || !Number.isFinite(fontSizePx) || fontSizePx <= 0) return undefined; @@ -280,11 +258,9 @@ export function getFontMetrics( descent = fontInfo.fontSize * 0.2; } - // Natural single-line height: prefer explicit calibration for fonts where - // Word's intrinsic metrics differ from the browser's measurement, otherwise - // use the browser's `fontBoundingBox*` values when exposed, otherwise leave - // undefined and let the caller apply its own fallback. - let naturalSingleLine: number | undefined = getCalibratedNaturalSingleLine(fontInfo.fontFamily, fontInfo.fontSize); + // Prefer explicit Word-calibration; fall back to the browser's + // fontBoundingBox* when it's exposed; otherwise leave undefined. + let naturalSingleLine = getCalibratedNaturalSingleLine(fontInfo.fontFamily, fontInfo.fontSize); if ( naturalSingleLine == null && typeof textMetrics.fontBoundingBoxAscent === 'number' && diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 4ffe88b3f5..d5e112db82 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -3537,77 +3537,36 @@ const appendSegment = ( }; /** - * Resolve the effective line height for a paragraph per ECMA-376 §17.18.48 - * (ST_LineSpacingRule). - * - * Previous implementation conflated `auto` with `atLeast` — both took the - * max-clamp branch, which meant the author's intent with `w:lineRule="auto" - * w:line="120"` (half-spacing) was silently inflated up to the natural - * single-line height. Per spec, `auto` has "no predetermined minimum or - * maximum" — only `atLeast` is max-clamped. - * - * | lineRule | Formula | - * |----------|------------------------------------------------------------------------| - * | auto | `lineHeight = target` (no floor, no ceiling) | - * | atLeast | `lineHeight = max(target, naturalSingle)` | - * | exact | `lineHeight = target` (clipped by caller if content taller) | - * - * This change is the minimal spec correction: the `target` computation - * below preserves the adapter's existing `lineUnit='multiplier'` convention - * (multiplier × fontSize) even though strict spec would multiply by natural - * single-line height. Migrating that convention is tracked as a follow-up - * (see SD-2735 description) because it requires coordinated changes across - * the adapter + many baseline tests. - * - * @param spacing Paragraph spacing from the resolved style cascade. - * @param fontSize Font size in px (used for unit-less and multiplier paths). - * @param naturalSingleLine Natural single-line height in px (ascent + descent - * from Canvas measurement). When not provided, - * falls back to `WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize`. - * @returns Effective line height in px. - * - * @see ECMA-376 Part 1 §17.18.48 ST_LineSpacingRule; §17.3.1.33 spacing. + * Resolve paragraph line height per ECMA-376 §17.18.48 (ST_LineSpacingRule). + * + * | lineRule | Result | + * |----------|---------------------------------------------------------| + * | auto | `target` — no min, no max | + * | atLeast | `max(target, naturalSingle)` | + * | exact | `target` (clipped by caller if content taller) | + * + * For `lineUnit='multiplier'`, `target = line × naturalSingle` — the adapter + * has already divided `w:line` by 240, so `line` is the dimensionless ratio. + * + * `naturalSingleLine` is floored at `WORD_SINGLE_LINE_SPACING_MULTIPLIER × + * fontSize` because many text fonts measure smaller than Word's Normal-style + * baseline; without the floor, unspaced paragraphs would render tighter than + * users expect. */ const resolveLineHeight = ( spacing: ParagraphSpacing | undefined, fontSize: number, naturalSingleLine: number = -1, ): number => { - // Natural single-line height. The caller passes ascent + descent from the - // Canvas measurement when available. We floor it at the historical - // `WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize` approximation so fonts - // whose actualBoundingBox is smaller than that (many text fonts) still - // render with the comfortable default spacing Word uses for its Normal - // style. This preserves pre-SD-2735 behaviour for unspaced paragraphs. const fallbackFloor = WORD_SINGLE_LINE_SPACING_MULTIPLIER * fontSize; const naturalSingle = naturalSingleLine > 0 ? Math.max(naturalSingleLine, fallbackFloor) : fallbackFloor; - // No explicit spacing → single-spacing (spec default). - if (!spacing || spacing.line == null) { - return naturalSingle; - } - - // Compute the target per the adapter's unit convention + ECMA-376 §17.18.48. - // - // For `lineUnit='multiplier'`, the adapter has already divided `w:line` by - // 240 (so `line` is the dimensionless ratio). Per spec, auto rule's result - // is `(line/240) × naturalSingle = line × naturalSingle`. We now multiply - // by the natural single-line height (font-calibrated when available) — this - // replaces the prior `line × fontSize` approximation, which coincidentally - // worked for fonts where naturalSingle ≈ fontSize × 1.0 (the `× 1.15` floor - // tricks the arithmetic out for the Normal-style case) but was wrong for - // Aptos/Calibri where naturalSingle ≈ fontSize × 1.218. - let target = spacing.line; - if (spacing.lineUnit === 'multiplier') { - target = target * naturalSingle; - } + if (!spacing || spacing.line == null) return naturalSingle; + const target = spacing.lineUnit === 'multiplier' ? spacing.line * naturalSingle : spacing.line; const rule = spacing.lineRule ?? 'auto'; - if (rule === 'atLeast') { - return Math.max(target, naturalSingle); - } - // `auto` and `exact`: no min-clamp (auto has "no predetermined minimum" - // per §17.18.48; exact is explicitly "shall be exactly the value specified"). + + if (rule === 'atLeast') return Math.max(target, naturalSingle); return target; }; diff --git a/packages/layout-engine/pm-adapter/src/converters/table.ts b/packages/layout-engine/pm-adapter/src/converters/table.ts index b1f559bcc5..658d5c312b 100644 --- a/packages/layout-engine/pm-adapter/src/converters/table.ts +++ b/packages/layout-engine/pm-adapter/src/converters/table.ts @@ -201,6 +201,38 @@ const normalizeRowHeight = (rowProps?: Record): NormalizedRowHe }; }; +/** + * Apply Word's TableGrid pPr cascade to table-cell paragraphs (ECMA-376 + * §17.3.1.33). + * + * The super-converter resolves paragraph attrs at DOCX-import time before the + * table context is known, so paragraphs inside table cells inherit spacing + * from docDefaults (`w:line="278"` ≈ 1.15 multiplier) instead of TableGrid's + * `w:line="240"` (= 1.0 multiplier). We rewrite the line multiplier here + * where the table context is available. + * + * Only paragraphs whose source PM node had no explicit `` are + * adjusted — explicit spacing is the author's intent. Only the auto-multiplier + * form is touched; atLeast/exact are absolute values. + * + * TODO(SD-2735 cascade): move this to super-converter / style-engine so the + * cascade is applied once at DOCX-import time for all consumers. + */ +const applyTableGridSpacingCascade = (paragraphBlocks: FlowBlock[], sourceChildNode?: PMNode): void => { + if (!sourceChildNode) return; + const sourceParaProps = (sourceChildNode.attrs as { paragraphProperties?: { spacing?: unknown } } | undefined) + ?.paragraphProperties; + if (sourceParaProps?.spacing) return; + + for (const block of paragraphBlocks) { + if (block.kind !== 'paragraph' || !block.attrs) continue; + const spacing = block.attrs.spacing; + if (spacing?.lineUnit === 'multiplier' && spacing.lineRule === 'auto') { + block.attrs.spacing = { ...spacing, line: 1.0 }; + } + } +}; + /** * Parse a ProseMirror table cell node into a TableCell block. * @@ -307,17 +339,6 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { const paragraphToFlowBlocks = context.converters.paragraphToFlowBlocks; const tableNodeToBlock = context.converters?.tableNodeToBlock; - /** - * Appends converted paragraph blocks to the cell's blocks array. - * - * This helper: - * 1. Applies SDT metadata to paragraph blocks (for structured content inheritance) - * 2. Filters to only include supported block types (paragraph, image, drawing) - * 3. Appends the filtered blocks to the cell's blocks array - * - * @param paragraphBlocks - The converted flow blocks from a paragraph node - * @param sdtMetadata - Optional SDT metadata to apply (from parent structuredContentBlock) - */ const appendParagraphBlocks = ( paragraphBlocks: FlowBlock[], sdtMetadata?: ReturnType, @@ -327,38 +348,8 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { paragraphBlocks.filter((block) => block.kind === 'paragraph') as ParagraphBlock[], sdtMetadata, ); - // Apply TableGrid pPr cascade for table-cell paragraphs (ECMA-376 - // §17.3.1.33). The super-converter resolves paragraph attrs at DOCX-import - // time before the table context is known, so paragraphs inside table cells - // currently inherit spacing from docDefaults (`w:line="278"` ≈ 1.15 - // multiplier) instead of TableGrid's `w:line="240"` (= 1.0 multiplier). - // - // We rewrite the line multiplier here where the table context is - // available. Only paragraphs that did NOT carry an explicit `` - // in their pPr are adjusted — those that did are preserved as-is because - // the author's intent was explicit. We detect "no explicit spacing" by - // the absence of `paragraphProperties.spacing` on the source PM node. - // - // TODO(SD-2735 cascade): move this to super-converter / style-engine so - // the cascade is applied once at DOCX-import time for all consumers. - const tableStyleId = tableInfo?.tableProperties?.tableStyleId; - if (tableStyleId && sourceChildNode) { - const sourceParaProps = (sourceChildNode.attrs as { paragraphProperties?: { spacing?: unknown } } | undefined) - ?.paragraphProperties; - const hasExplicitSpacing = !!sourceParaProps?.spacing; - if (!hasExplicitSpacing) { - for (const block of paragraphBlocks) { - if (block.kind !== 'paragraph') continue; - const attrs = (block as ParagraphBlock).attrs as - | { spacing?: { line?: number; lineUnit?: string; lineRule?: string } } - | undefined; - // Only adjust when the inherited spacing is the auto-multiplier - // form. atLeast/exact are absolute and shouldn't be overridden. - if (attrs?.spacing && attrs.spacing.lineUnit === 'multiplier' && attrs.spacing.lineRule === 'auto') { - attrs.spacing = { ...attrs.spacing, line: 1.0 }; - } - } - } + if (tableInfo?.tableProperties?.tableStyleId) { + applyTableGridSpacingCascade(paragraphBlocks, sourceChildNode); } paragraphBlocks.forEach((block) => { if (block.kind === 'paragraph' || block.kind === 'image' || block.kind === 'drawing') { From 6d033faccf7f95a511c4ce26898d2edafc7c4aff Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 23:10:42 -0300 Subject: [PATCH 6/6] fix(layout-engine): skip empty sectPr marker before any section break (SD-2735) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per ECMA-376 §17.6.17, a inside a paragraph's defines where the section ends — the enclosing paragraph is a section- properties container, not a visible paragraph. Word renders these marker paragraphs at zero height regardless of section-break type. Previously we only skipped the marker when the next break forced a page boundary (nextPage/evenPage/oddPage). For continuous breaks the marker rendered at full docDefaults height (~35 px body + spacing-after), which on IT-945 cost a row per column on page 1 (the intro area consumed 72 px vs Word's 33 px, losing ~20 px of column budget). Extends the skip to continuous section breaks too. Browser-verified on it-945-numbered.docx: page 1 now balances 42+42 rows, page 2 16+14, matching Word's structural layout (41+41 / 17+15) within a single-row margin. Tests: - New regression: 'skips empty sectPr marker paragraph before continuous section break (SD-2735)' asserts the marker and its full-height measure don't appear in the layout. - Existing forced-page-break test unchanged. - layout-engine 150/150 pass; pm-adapter 1739/1739; measuring-dom 251/251. --- .../layout-engine/src/index.test.ts | 42 +++++++++++++++++++ .../layout-engine/layout-engine/src/index.ts | 26 ++++++------ 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index 8033813953..d32afef762 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2420,6 +2420,48 @@ describe('layoutDocument', () => { expect(layout.pages[1].fragments[0].blockId).toBe('p2'); }); + it('skips empty sectPr marker paragraph before continuous section break (SD-2735)', () => { + // IT-945 shape: "This is my first section" + empty sectPr-marker + + // continuous break into 2-col section. The marker is logically a + // section-properties container, not a visible paragraph. Rendering it + // at docDefaults height would cost a row per column on page 1. + const blocks: FlowBlock[] = [ + { kind: 'paragraph', id: 'p1', runs: [{ text: 'First section', fontFamily: 'Arial', fontSize: 12 }] }, + { + kind: 'paragraph', + id: 'p-marker', + runs: [{ text: '', fontFamily: 'Arial', fontSize: 12 }], + attrs: { sectPrMarker: true }, + }, + { + kind: 'sectionBreak', + id: 'sb1', + type: 'continuous', + margins: {}, + pageSize: { w: 612, h: 792 }, + columns: { count: 2, gap: 36 }, + attrs: { source: 'sectPr' }, + }, + { kind: 'paragraph', id: 'p2', runs: [{ text: 'In 2-col section', fontFamily: 'Arial', fontSize: 12 }] }, + ]; + const measures: Measure[] = [ + { kind: 'paragraph', lines: [makeLine(20)], totalHeight: 20 }, + { kind: 'paragraph', lines: [makeLine(16)], totalHeight: 16 }, + { kind: 'sectionBreak' }, + { kind: 'paragraph', lines: [makeLine(20)], totalHeight: 20 }, + ]; + + const layout = layoutDocument(blocks, measures, { + pageSize: { w: 612, h: 792 }, + margins: { top: 72, right: 72, bottom: 72, left: 72 }, + }); + + // Both visible paragraphs land on page 1; the marker does not appear. + expect(layout.pages).toHaveLength(1); + const fragmentIds = layout.pages[0].fragments.map((f) => f.blockId); + expect(fragmentIds).toEqual(['p1', 'p2']); + }); + it('skips empty sectPr marker paragraph before forced section break', () => { const blocks: FlowBlock[] = [ { kind: 'paragraph', id: 'p1', runs: [{ text: 'Content', fontFamily: 'Arial', fontSize: 12 }] }, diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 8776c0cfd4..9c95fe44e4 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -1987,25 +1987,23 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options if (isEmpty) { const isSectPrMarker = paraBlock.attrs?.sectPrMarker === true; - // Check if previous block was pageBreak and next block is sectionBreak const prevBlock = index > 0 ? blocks[index - 1] : null; const nextBlock = index < blocks.length - 1 ? blocks[index + 1] : null; - - const nextSectionBreak = nextBlock?.kind === 'sectionBreak' ? (nextBlock as SectionBreakBlock) : null; - const nextBreakType = - nextSectionBreak?.type ?? (nextSectionBreak?.attrs?.source === 'sectPr' ? 'nextPage' : undefined); - const nextBreakForcesPage = - nextSectionBreak && - (nextBreakType === 'nextPage' || - nextBreakType === 'evenPage' || - nextBreakType === 'oddPage' || - nextSectionBreak.attrs?.requirePageBoundary === true); - - if (isSectPrMarker && nextBreakForcesPage) { + const nextIsSectionBreak = nextBlock?.kind === 'sectionBreak'; + + // A section-break marker paragraph (`` with + // no runs) carries only section metadata in Word — it's not a visible + // paragraph. Skip it regardless of break type (nextPage/continuous/ + // evenPage/oddPage) so intro spacing matches Word for all section + // transitions. Per ECMA-376 §17.6.17 the sectPr defines where the + // section ends, not a rendered paragraph. + if (isSectPrMarker && nextIsSectionBreak) { continue; } - if (prevBlock?.kind === 'pageBreak' && nextBlock?.kind === 'sectionBreak') { + // Pre-sectPrMarker path: empty paragraph sandwiched between an + // explicit page break and a section break — same "pure marker" shape. + if (prevBlock?.kind === 'pageBreak' && nextIsSectionBreak) { continue; } }