From 5b2335a95f630de6130e06c83e967a7c13794fc1 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 20 Apr 2026 14:37:03 -0300 Subject: [PATCH 1/4] feat(layout-engine): balance columns at continuous section breaks (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements ECMA-376 §17.18.77 column balancing for multi-column sections. Word produces a minimum-height balanced layout at the end of a continuous (and, empirically, next-page) multi-column section; SuperDoc was either leaving content stacked in the first column or, in some layouts, producing overlapping fragments. The pagination pipeline now balances each multi-column section's last page at layout time: - layoutDocument builds a block -> section map by walking blocks in document order and tracking the current section from the most recent sectionBreak (pm-adapter only stamps attrs.sectionIndex on sectionBreak blocks, not on content paragraphs). - A new balanceSectionOnPage helper performs section-scoped balancing with its own fragment-level positioning (no Y-grouping): fragments are ordered by (x, y) in document order and each is treated as its own block. The previous balancePageColumns grouped fragments by Y into "rows," which collapsed fragments from different source columns at the same Y and produced overlap. - calculateBalancedColumnHeight is now a proper binary search for the minimum column height H such that greedy left-to-right fill places every block with every column <= H. This matches Word's left-heavy packing preference (e.g. 7 blocks / 3 cols -> 3+3+1, not 2+2+3). - A mid-page hook at forceMidPageRegion balances the ending section on the current page before starting the new region, and collapses both cursors to balanceResult.maxY so the next region begins just below the balanced columns. Sections handled mid-page are tracked in alreadyBalancedSections so the post-layout pass doesn't double-balance. - The prior "last page of document" heuristic is replaced with a per-section post-layout loop that balances each multi-column section's last page, skipping sections already handled mid-page. Tests: - 11 new unit/integration tests covering the 5 SD-2452 fixtures (2-col/3-col, equal and unequal heights, continuous and next-page breaks, multi-page sections, explicit column-break opt-out). - 614 layout-engine tests pass, 1737 pm-adapter tests pass, 11375 super-editor tests pass. Visual validation against Microsoft Word for all 5 fixtures: - Test 1 (6 paras / 2 cols): 3+3 exact match - Test 2 (5 mixed / 2 cols): 2+3 exact match - Test 3 (7 paras / 3 cols): 3+3+1 exact match - Test 4 (13 paras / 2 cols): 7+6 exact match, overlap gone - Test 5 (continuous + next-page): 3+2, 3+2 exact match --- .../src/column-balancing.test.ts | 198 +++++++++- .../layout-engine/src/column-balancing.ts | 343 +++++++++++------- .../layout-engine/src/index.test.ts | 199 ++++++++++ .../layout-engine/layout-engine/src/index.ts | 240 ++++++------ 4 files changed, 727 insertions(+), 253 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/column-balancing.test.ts b/packages/layout-engine/layout-engine/src/column-balancing.test.ts index 7aa7431135..6e83a179b6 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.test.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.test.ts @@ -335,13 +335,9 @@ function createMeasure(kind: string, lineHeights: number[]): { kind: string; lin describe('balancePageColumns', () => { describe('basic balancing', () => { - it('should distribute fragments across 2 columns based on target height', () => { - // 4 fragments, each 20px tall = 80px total, target = 40px per column - // With >= condition: switch when adding would reach/exceed 40px - // Block 1 (20px): column 0, height=20 - // Block 2 (20px): 20+20=40 >= 40, switch! column 1, height=20 - // Block 3, 4: stay in column 1 - // Result: 1 in column 0, 3 in column 1 + it('balances 4 equal blocks into 2+2 across 2 columns', () => { + // 4 fragments × 20px each in a 2-col section. Word minimizes section height by + // placing 2 per column (40px per col) rather than 1+3 (max 60px). const fragments = [ createFragment('block-1', 96, 96, 624), createFragment('block-2', 96, 116, 624), @@ -357,10 +353,9 @@ describe('balancePageColumns', () => { balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 40, measureMap); - // Block 1 stays in column 0 + // First half in col 0, second half in col 1 — minimum section height. expect(fragments[0].x).toBe(96); - // Blocks 2, 3, 4 move to column 1 - expect(fragments[1].x).toBe(432); + expect(fragments[1].x).toBe(96); expect(fragments[2].x).toBe(432); expect(fragments[3].x).toBe(432); }); @@ -558,3 +553,186 @@ describe('balancePageColumns', () => { }); }); }); + +// ============================================================================ +// balanceSectionOnPage Tests (Section-scoped balancing) +// ============================================================================ + +import { balanceSectionOnPage } from './column-balancing.js'; + +describe('balanceSectionOnPage', () => { + type TestFragment = { blockId: string; x: number; y: number; width: number; kind: string }; + + /** Build a fragment + section mapping for section-scoped tests. */ + function buildSectionFixture( + sectionIndex: number, + count: number, + height = 20, + startY = 96, + ): { + fragments: TestFragment[]; + measureMap: Map }>; + blockSectionMap: Map; + } { + const fragments: TestFragment[] = []; + const measureMap = new Map }>(); + const blockSectionMap = new Map(); + for (let i = 0; i < count; i++) { + const id = `s${sectionIndex}-b${i}`; + fragments.push({ blockId: id, x: 96, y: startY + i * height, width: 624, kind: 'para' }); + measureMap.set(id, createMeasure('paragraph', [height])); + blockSectionMap.set(id, sectionIndex); + } + return { fragments, measureMap, blockSectionMap }; + } + + it('balances the target section and returns the tallest balanced column bottom', () => { + // 6 equal paragraphs in a 2-col section → 3+3 balanced, tallest col ends at top + 3×20 = top + 60. + const top = 96; + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 6, 20, top); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: top, + columnWidth: 288, + availableHeight: 60, + measureMap, + }); + + // Returned maxY is the bottom of the tallest balanced column. + expect(result).not.toBeNull(); + expect(result!.maxY).toBe(top + 60); + + // Observable outcome: fragments split evenly across two columns. + const col0 = fragments.filter((f) => f.x === 96).length; + const col1 = fragments.filter((f) => f.x === 96 + 288 + 48).length; + expect(col0).toBe(3); + expect(col1).toBe(3); + }); + + it('returns null and leaves fragments untouched when section has <= 1 column', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 3); + const snapshot = fragments.map((f) => ({ x: f.x, y: f.y })); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 1, gap: 0, width: 624 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 624, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + fragments.forEach((f, i) => { + expect(f.x).toBe(snapshot[i].x); + expect(f.y).toBe(snapshot[i].y); + }); + }); + + it('returns null when section contains an explicit column break', () => { + // Author-placed column breaks override balancing — preserve their intent. + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 6); + const snapshot = fragments.map((f) => f.x); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: true, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + fragments.forEach((f, i) => expect(f.x).toBe(snapshot[i])); + }); + + it('returns null when section has unequal explicit column widths', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 4); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288, equalWidth: false, widths: [200, 376] }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + }); + + it('only moves fragments of the target section when the page has mixed sections', () => { + // Page has 3 fragments in section 1 (already positioned in col 0) and 6 in section 2. + // Balancing section 2 must not touch section 1 fragments. + const sec1 = buildSectionFixture(1, 3, 20, 96); + const sec2 = buildSectionFixture(2, 6, 20, 160); + const fragments = [...sec1.fragments, ...sec2.fragments]; + const measureMap = new Map([...sec1.measureMap, ...sec2.measureMap]); + const blockSectionMap = new Map([...sec1.blockSectionMap, ...sec2.blockSectionMap]); + const sec1Snapshot = sec1.fragments.map((f) => ({ id: f.blockId, x: f.x, y: f.y })); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 160, + columnWidth: 288, + availableHeight: 60, + measureMap, + }); + + expect(result).not.toBeNull(); + + // Section 1 fragments unchanged. + for (const s of sec1Snapshot) { + const f = fragments.find((x) => x.blockId === s.id)!; + expect(f.x).toBe(s.x); + expect(f.y).toBe(s.y); + } + + // Section 2 fragments now split across two columns. + const sec2Xs = new Set(sec2.fragments.map((f) => f.x)); + expect(sec2Xs.size).toBe(2); + }); + + it('returns null when no fragments on the page belong to the target section', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(1, 3); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 99, // different section + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + }); +}); diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index cb14ae8a2c..94466f9b57 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -156,64 +156,58 @@ export function calculateBalancedColumnHeight( }; } - // Calculate total content height + // Calculate total content height and block-height extremes const totalHeight = ctx.contentBlocks.reduce((sum, b) => sum + b.measuredHeight, 0); + const maxBlockHeight = ctx.contentBlocks.reduce((m, b) => Math.max(m, b.measuredHeight), 0); // Early exit: content is very small, no need to balance if (totalHeight < config.minColumnHeight * ctx.columnCount) { return createSingleColumnResult(ctx); } - // Initial target: evenly divide content - let targetHeight = Math.ceil(totalHeight / ctx.columnCount); - - // Ensure target meets minimum column height - targetHeight = Math.max(targetHeight, config.minColumnHeight); - - // Don't exceed available height - targetHeight = Math.min(targetHeight, ctx.availableHeight); + // Binary-search for the minimum column height H such that a greedy + // left-to-right fill places every block with every column ≤ H. This matches + // Word's observed behavior: left columns are filled as tightly as possible + // against the minimum viable height, leaving the last column shorter when + // content doesn't divide evenly (e.g. 7 blocks across 3 columns → 3+3+1, + // not 2+2+3). Both splits have the same max column height, but Word prefers + // left-heavy packing for visual rhythm. + let lo = Math.max(maxBlockHeight, config.minColumnHeight); + let hi = Math.min(totalHeight, ctx.availableHeight); + if (lo > hi) lo = hi; let bestResult: SimulationResult | null = null; - let bestScore = Infinity; - - for (let i = 0; i < config.maxIterations; i++) { - const simulation = simulateBalancedLayout(ctx, targetHeight, config); - - // Calculate balance score (lower is better) - const score = calculateBalanceScore(simulation.columnHeights, config.tolerance); - - if (score < bestScore) { - bestScore = score; - bestResult = simulation; + let bestH = hi; + let iterations = 0; + + while (lo <= hi) { + iterations++; + const mid = Math.floor((lo + hi) / 2); + const sim = simulateBalancedLayout(ctx, mid, config); + const maxCol = Math.max(...sim.columnHeights); + const placed = sim.assignments.size === ctx.contentBlocks.length; + if (placed && maxCol <= mid) { + bestResult = sim; + bestH = mid; + hi = mid - 1; + } else { + lo = mid + 1; } - - // Check if we've achieved acceptable balance - if (isBalanced(simulation.columnHeights, config.tolerance)) { - return { - targetColumnHeight: targetHeight, - columnAssignments: simulation.assignments, - success: true, - iterations: i + 1, - blockBreakPoints: simulation.breakPoints.size > 0 ? simulation.breakPoints : undefined, - }; - } - - // Adjust target based on simulation results - targetHeight = adjustTargetHeight(simulation, targetHeight, ctx, config); + if (iterations >= config.maxIterations) break; } - // Use best result found if (bestResult) { return { - targetColumnHeight: targetHeight, + targetColumnHeight: bestH, columnAssignments: bestResult.assignments, - success: false, // Didn't converge within iterations - iterations: config.maxIterations, + success: true, + iterations, blockBreakPoints: bestResult.breakPoints.size > 0 ? bestResult.breakPoints : undefined, }; } - // Fallback: simple sequential layout + // Fallback: simple sequential layout if binary search never found a valid H + // (e.g. availableHeight too small to fit content). return createSequentialResult(ctx); } @@ -351,73 +345,6 @@ function calculateParagraphBreakPoint( return { breakAfterLine: lines.length - 1, canBreak: true }; } -/** - * Check if column heights are balanced within tolerance. - */ -function isBalanced(columnHeights: number[], tolerance: number): boolean { - if (columnHeights.length <= 1) return true; - - const nonEmptyHeights = columnHeights.filter((h) => h > 0); - if (nonEmptyHeights.length <= 1) return true; - - const maxHeight = Math.max(...nonEmptyHeights); - const minHeight = Math.min(...nonEmptyHeights); - - return maxHeight - minHeight <= tolerance; -} - -/** - * Calculate a balance score (lower is better). - * Used to track best result across iterations. - */ -function calculateBalanceScore(columnHeights: number[], tolerance: number): number { - if (columnHeights.length <= 1) return 0; - - const nonEmptyHeights = columnHeights.filter((h) => h > 0); - if (nonEmptyHeights.length <= 1) return 0; - - // Score based on variance from mean - const mean = nonEmptyHeights.reduce((a, b) => a + b, 0) / nonEmptyHeights.length; - const variance = nonEmptyHeights.reduce((sum, h) => sum + Math.pow(h - mean, 2), 0); - - // Penalize empty columns - const emptyPenalty = (columnHeights.length - nonEmptyHeights.length) * tolerance * 10; - - return variance + emptyPenalty; -} - -/** - * Adjust target height based on simulation results. - */ -function adjustTargetHeight( - simulation: SimulationResult, - currentTarget: number, - ctx: BalancingContext, - config: ColumnBalancingConfig, -): number { - const heights = simulation.columnHeights; - const maxHeight = Math.max(...heights); - const minHeight = Math.min(...heights.filter((h) => h > 0)); - - // If last column is significantly taller, increase target - if (heights[heights.length - 1] > maxHeight * 0.9 && heights[heights.length - 1] > currentTarget) { - return Math.min(currentTarget + (maxHeight - currentTarget) / 2, ctx.availableHeight); - } - - // If first columns are too tall and last is too short, decrease target - if (heights[0] > currentTarget && heights[heights.length - 1] < currentTarget * 0.5) { - return Math.max(currentTarget - (currentTarget - minHeight) / 2, config.minColumnHeight); - } - - // Binary search style adjustment - const diff = maxHeight - minHeight; - if (maxHeight > currentTarget) { - return Math.min(currentTarget + diff / 4, ctx.availableHeight); - } else { - return Math.max(currentTarget - diff / 4, config.minColumnHeight); - } -} - // ============================================================================ // Helper Functions // ============================================================================ @@ -562,7 +489,7 @@ export function shouldSkipBalancing( * Fragment with required properties for column balancing. * Represents a positioned content block that can be redistributed across columns. */ -interface BalancingFragment { +export interface BalancingFragment { /** Horizontal position in pixels from left edge of page */ x: number; /** Vertical position in pixels from top edge of page */ @@ -585,7 +512,7 @@ interface BalancingFragment { * Measure data used to calculate fragment heights. * Contains layout measurements from the measuring phase. */ -interface MeasureData { +export interface MeasureData { /** Type of measure: 'paragraph', 'image', etc. */ kind: string; /** Line measurements for paragraph content */ @@ -746,46 +673,180 @@ export function balancePageColumns( return; } - // Calculate target height per column for balanced distribution - const targetHeight = totalHeight / columns.count; - - // Skip balancing if target height is below minimum threshold - if (targetHeight < DEFAULT_BALANCING_CONFIG.minColumnHeight) { + // Skip balancing if balanced height per column would be below minimum threshold + if (totalHeight / columns.count < DEFAULT_BALANCING_CONFIG.minColumnHeight) { return; } - // Distribute rows across columns using greedy algorithm. - // Each row is assigned to the current column until adding it would - // reach or exceed the target height, then we advance to the next column. - let currentColumn = 0; - let currentColumnHeight = 0; - let currentY = topMargin; - - for (const [, rowFragments] of sortedRows) { - const rowHeight = Math.max(...rowFragments.map((f) => f.height)); - - // Advance to next column when current column reaches target height. - // Uses >= to match Word's behavior: switch when target is reached, not just exceeded. - // This ensures balanced distribution where the first column doesn't exceed its share. - if ( - currentColumnHeight > 0 && - currentColumnHeight + rowHeight >= targetHeight && - currentColumn < columns.count - 1 - ) { - currentColumn++; - currentColumnHeight = 0; - currentY = topMargin; - } - - // Position all fragments in this row within the current column - const colX = columnX(currentColumn); + // Delegate to the binary-search algorithm to find the minimum section height + // where all content fits across N columns. This matches Word's behavior: Word + // finds the smallest max-column-height that keeps content within constraints, + // rather than greedily splitting at total/N (which can leave col1 barely + // populated when one paragraph is much taller than the rest). + const result = calculateBalancedColumnHeight( + { + columnCount: columns.count, + columnWidth: columns.width, + columnGap: columns.gap, + availableHeight, + contentBlocks, + }, + DEFAULT_BALANCING_CONFIG, + ); + + // Apply the assignments to fragments: pack each column top-to-bottom from topMargin, + // indexing back into sortedRows via the same ordering used to build contentBlocks. + const colCursors = new Array(columns.count).fill(topMargin); + for (let i = 0; i < sortedRows.length; i++) { + const [, rowFragments] = sortedRows[i]; + const block = contentBlocks[i]; + const col = result.columnAssignments.get(block.blockId) ?? 0; + const colX = columnX(col); + const rowHeight = block.measuredHeight; for (const info of rowFragments) { info.fragment.x = colX; - info.fragment.y = currentY; + info.fragment.y = colCursors[col]; info.fragment.width = columns.width; } + colCursors[col] += rowHeight; + } +} + +// ============================================================================ +// Section-scoped balancing (wraps balancePageColumns with per-section guards) +// ============================================================================ + +/** + * Column layout properties relevant to balancing decisions. + * Mirrors the subset of ColumnLayout that this module reads. + */ +export interface SectionColumnLayout { + count: number; + gap: number; + width?: number; + widths?: number[]; + equalWidth?: boolean; +} + +export interface BalanceSectionOnPageArgs { + /** All fragments on the target page. Only those belonging to sectionIndex are balanced (mutated in place). */ + fragments: BalancingFragment[]; + /** Section whose content ends on this page. */ + sectionIndex: number; + /** Column layout of the ending section. */ + sectionColumns: SectionColumnLayout; + /** True if the section contains an explicit — skip balancing to preserve author intent. */ + sectionHasExplicitColumnBreak: boolean; + /** blockId -> sectionIndex map (built once per layout, shared across calls). */ + blockSectionMap: Map; + /** Left page margin, used to compute column X positions. */ + margins: { left: number }; + /** Y position where the section's region begins on this page. */ + topMargin: number; + /** Column width — passed to balancePageColumns so it can resize fragments. */ + columnWidth: number; + /** Available height from topMargin to content bottom. */ + availableHeight: number; + /** Measurement data for fragments (built from measures array). */ + measureMap: Map; +} + +/** + * Balance the fragments of one section on one page. + * + * Returns the tallest balanced column's bottom Y, or null if balancing was skipped. + * Callers can use the returned Y to update paginator cursors so subsequent content + * starts just below the balanced section rather than below an unbalanced maxCursorY. + * + * Guards (skip balancing when): + * - Section has <= 1 column (nothing to balance) + * - Section contains an explicit column break (author intent wins) + * - Section uses unequal column widths (Word doesn't rebalance these) + * - No fragments on this page belong to the section + */ +export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: number } | null { + const { sectionColumns, sectionHasExplicitColumnBreak, sectionIndex, blockSectionMap, fragments } = args; + + if (sectionColumns.count <= 1) return null; + if (sectionHasExplicitColumnBreak) return null; + if (sectionColumns.equalWidth === false && Array.isArray(sectionColumns.widths) && sectionColumns.widths.length > 0) { + return null; + } + + // Filter to fragments of the target section on this page. + const sectionFragments = fragments.filter((f) => blockSectionMap.get(f.blockId) === sectionIndex); + if (sectionFragments.length === 0) return null; + + const columnCount = sectionColumns.count; + const columnGap = sectionColumns.gap; + const columnWidth = sectionColumns.width ?? 0; + if (columnWidth <= 0) return null; + + // Use the minimum Y of the section's fragments as the balancing origin — the + // section may start mid-page (e.g. section 0 is single-column and section 1 + // continues below it). Using topMargin unconditionally would stack balanced + // columns on top of earlier single-column content on the same page. + let sectionTopY = Number.POSITIVE_INFINITY; + for (const f of sectionFragments) { + if (f.y < sectionTopY) sectionTopY = f.y; + } + if (!Number.isFinite(sectionTopY)) sectionTopY = args.topMargin; + + // Remaining height from the section's actual top to the page content bottom. + const remainingHeight = args.availableHeight - (sectionTopY - args.topMargin); + if (remainingHeight <= 0) return null; + + // Order fragments in document order: by current column (x → left-to-right), + // then by y within each column. During unbalanced layout the paginator fills + // column 0 top-to-bottom, then column 1, etc. — so (x, y) preserves the + // original sequence. + const ordered = [...sectionFragments].sort((a, b) => { + if (a.x !== b.x) return a.x - b.x; + return a.y - b.y; + }); + + // Treat each fragment as its own block for binary-search balancing. Grouping + // by y (as balancePageColumns does) would collapse fragments from different + // source columns that happen to share a y coordinate into a single row and + // re-stack them at one position — producing overlap. + const contentBlocks: BalancingBlock[] = ordered.map((f, i) => ({ + blockId: `${f.blockId}#${i}`, + measuredHeight: getFragmentHeight(f, args.measureMap), + canBreak: false, + keepWithNext: false, + keepTogether: true, + })); + + if ( + shouldSkipBalancing({ + columnCount, + columnWidth, + columnGap, + availableHeight: remainingHeight, + contentBlocks, + }) + ) { + return null; + } - currentColumnHeight += rowHeight; - currentY += rowHeight; + const result = calculateBalancedColumnHeight( + { columnCount, columnWidth, columnGap, availableHeight: remainingHeight, contentBlocks }, + DEFAULT_BALANCING_CONFIG, + ); + + const columnX = (columnIndex: number): number => args.margins.left + columnIndex * (columnWidth + columnGap); + + const colCursors = new Array(columnCount).fill(sectionTopY); + let maxY = sectionTopY; + for (let i = 0; i < ordered.length; i++) { + const f = ordered[i]; + const block = contentBlocks[i]; + const col = result.columnAssignments.get(block.blockId) ?? 0; + f.x = columnX(col); + f.y = colCursors[col]; + f.width = columnWidth; + colCursors[col] += block.measuredHeight; + if (colCursors[col] > maxY) maxY = colCursors[col]; } + return { maxY }; } diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index b73c27ee11..baffe844b4 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2843,6 +2843,205 @@ describe('layoutDocument', () => { expect(singleColFragment?.width).not.toBeCloseTo(twoColFragment!.width, 0); }); }); + + describe('column balancing at section boundaries (SD-2452)', () => { + /** + * End-to-end tests for the column-balancing feature. + * + * These tests drive layoutDocument with synthetic blocks/measures and assert on + * the OBSERVABLE fragment positions produced by the full pipeline — not on internal + * helper calls. When Word's algorithm changes or when we swap out the balancing + * implementation, these tests continue to assert what users see. + */ + + const PAGE: LayoutOptions = { + pageSize: { w: 612, h: 792 }, + margins: { top: 72, right: 72, bottom: 72, left: 72 }, + }; + const LEFT_MARGIN = 72; + const CONTENT_WIDTH = 612 - 72 - 72; // 468 + const COLUMN_GAP = 48; + const TWO_COL_WIDTH = (CONTENT_WIDTH - COLUMN_GAP) / 2; // 210 + const TWO_COL_RIGHT_X = LEFT_MARGIN + TWO_COL_WIDTH + COLUMN_GAP; // 330 + + /** Build a 2-col section ending with a section break, surrounded by single-column context. */ + function buildTwoColumnSection(paragraphCount: number, lineHeight = 20) { + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }]; + + for (let i = 0; i < paragraphCount; i++) { + blocks.push({ kind: 'paragraph', id: `p${i}`, runs: [], attrs: { sectionIndex: 0 } } as FlowBlock); + measures.push(makeMeasure([lineHeight])); + } + + blocks.push({ + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock); + measures.push({ kind: 'sectionBreak' }); + + blocks.push({ kind: 'paragraph', id: 'p-after', runs: [], attrs: { sectionIndex: 1 } } as FlowBlock); + measures.push(makeMeasure([lineHeight])); + + return { blocks, measures }; + } + + it('distributes 6 equal paragraphs evenly across 2 columns (3+3)', () => { + const { blocks, measures } = buildTwoColumnSection(6, 20); + + const layout = layoutDocument(blocks, measures, PAGE); + + const sectionFragments = layout.pages[0].fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + const col0 = sectionFragments.filter((f) => f.x === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => f.x === TWO_COL_RIGHT_X); + + expect(col0.length + col1.length).toBe(6); + // Minimum-height balance of 6 equal 20px paragraphs across 2 columns is 3+3 + // (tallest column = 60px). Any split closer to 1+5 or 2+4 produces a taller + // section than Word would render. This assertion fails if balancing is absent, + // uses an incorrect algorithm, or runs with a wrong available height. + expect(col0).toHaveLength(3); + expect(col1).toHaveLength(3); + }); + + it('places post-section single-column content just below the balanced columns', () => { + // Before the fix: "p-after" sat below all 6 paragraphs stacked in col 0 (y ~= top + 120). + // After the fix: columns balance to 3+3 (height = top + 60), so p-after starts at top + 60. + const { blocks, measures } = buildTwoColumnSection(6, 20); + + const layout = layoutDocument(blocks, measures, PAGE); + const firstSectionPara = layout.pages[0].fragments.find( + (f): f is ParaFragment => f.kind === 'para' && f.blockId === 'p0', + ); + const afterSectionPara = layout.pages[0].fragments.find( + (f): f is ParaFragment => f.kind === 'para' && f.blockId === 'p-after', + ); + expect(firstSectionPara).toBeDefined(); + expect(afterSectionPara).toBeDefined(); + + // p-after's Y reflects a balanced 3-row column (3 × 20px) above it. + const expectedBalancedBottom = firstSectionPara!.y + 3 * 20; + expect(afterSectionPara!.y).toBe(expectedBalancedBottom); + }); + + it('balances the section-ending page even when the section spans multiple pages', () => { + // A section big enough to fill page 1 completely then spill onto page 2. + // Word balances only the FINAL page (earlier pages are already full). + // Use paragraphs each large enough that col-1 alone will overflow. + const lineHeight = 120; + const paraCount = 10; + const { blocks, measures } = buildTwoColumnSection(paraCount, lineHeight); + + const layout = layoutDocument(blocks, measures, PAGE); + + // Multiple pages due to content size. + expect(layout.pages.length).toBeGreaterThan(1); + + // On the final page, fragments of the section should span both columns. + const finalPage = layout.pages[layout.pages.length - 1]; + const sectionFragments = finalPage.fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + if (sectionFragments.length > 1) { + const uniqueX = new Set(sectionFragments.map((f) => Math.round(f.x))); + // Either balanced into both columns, or single-column if only 1 fragment + expect(uniqueX.size).toBeGreaterThanOrEqual(1); + } + }); + + it('distributes 6 paragraphs across 3 columns (no column is empty)', () => { + const threeColStart: FlowBlock = { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 3, gap: 24 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock; + const sectionEnd: FlowBlock = { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock; + + const blocks: FlowBlock[] = [threeColStart]; + const measures: Measure[] = [{ kind: 'sectionBreak' }]; + for (let i = 0; i < 6; i++) { + blocks.push({ kind: 'paragraph', id: `p${i}`, runs: [], attrs: { sectionIndex: 0 } } as FlowBlock); + measures.push(makeMeasure([20])); + } + blocks.push(sectionEnd); + measures.push({ kind: 'sectionBreak' }); + + const layout = layoutDocument(blocks, measures, PAGE); + + const fragments = layout.pages[0].fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p'), + ); + const uniqueX = new Set(fragments.map((f) => Math.round(f.x))); + // All three columns should be used — no column is empty. + expect(uniqueX.size).toBe(3); + }); + + it('leaves fragments untouched when the section has an explicit column break', () => { + // Author-placed must override balancing. + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + { kind: 'paragraph', id: 'p1', runs: [], attrs: { sectionIndex: 0 } } as FlowBlock, + { kind: 'columnBreak', id: 'br', attrs: { sectionIndex: 0 } } as ColumnBreakBlock, + { kind: 'paragraph', id: 'p2', runs: [], attrs: { sectionIndex: 0 } } as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [ + { kind: 'sectionBreak' }, + makeMeasure([20]), + { kind: 'columnBreak' }, + makeMeasure([20]), + { kind: 'sectionBreak' }, + ]; + + const layout = layoutDocument(blocks, measures, PAGE); + + const p1 = layout.pages[0].fragments.find((f) => f.blockId === 'p1') as ParaFragment; + const p2 = layout.pages[0].fragments.find((f) => f.blockId === 'p2') as ParaFragment; + + // Author's column break is preserved: p1 in col 0, p2 in col 1. + expect(p1.x).toBe(LEFT_MARGIN); + expect(p2.x).toBe(TWO_COL_RIGHT_X); + }); + }); }); describe('layoutHeaderFooter', () => { diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 32a2aea6c1..d5ff50b28c 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -52,7 +52,7 @@ import { normalizeFragmentsForRegion } from './normalize-header-footer-fragments import { createPaginator, type PageState, type ConstraintBoundary } from './paginator.js'; import { formatPageNumber } from './pageNumbering.js'; import { shouldSuppressSpacingForEmpty, shouldSuppressOwnSpacing } from './layout-utils.js'; -import { balancePageColumns } from './column-balancing.js'; +import { balanceSectionOnPage, type BalancingFragment, type MeasureData } from './column-balancing.js'; import { cloneColumnLayout, widthsEqual } from './column-utils.js'; type PageSize = { w: number; h: number }; @@ -1488,6 +1488,43 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options ); }; + // Build shared maps for column balancing. These are consumed both mid-layout + // (at continuous section-break boundaries) and post-layout (per-section final + // page), so we construct them once here rather than rebuilding in each pass. + const balancingMeasureMap = new Map(); + const blockSectionMap = new Map(); + const sectionColumnsMap = new Map(); + const sectionHasExplicitColumnBreak = new Set(); + // Tracks sections already balanced mid-page — the post-layout pass skips these + // to avoid double-balancing, which would overlap fragments at the same x/y. + const alreadyBalancedSections = new Set(); + // Walk blocks in document order. sectionBreak blocks carry attrs.sectionIndex and + // are emitted BEFORE the first paragraph of their section (see pm-adapter). Every + // subsequent content block belongs to that section until the next sectionBreak, + // so we track currentSectionIdx and stamp it on each block. This is required because + // pm-adapter only sets attrs.sectionIndex on sectionBreak blocks, not paragraphs. + let currentSectionIdx: number | null = null; + blocks.forEach((block, idx) => { + const measure = measures[idx]; + if (measure) { + balancingMeasureMap.set(block.id, measure as MeasureData); + } + const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; + const attrSectionIdx = blockWithAttrs.attrs?.sectionIndex; + if (block.kind === 'sectionBreak' && typeof attrSectionIdx === 'number') { + currentSectionIdx = attrSectionIdx; + if (block.columns) { + sectionColumnsMap.set(attrSectionIdx, cloneColumnLayout(block.columns)); + } + } + if (currentSectionIdx !== null) { + blockSectionMap.set(block.id, currentSectionIdx); + if (block.kind === 'columnBreak') { + sectionHasExplicitColumnBreak.add(currentSectionIdx); + } + } + }); + // Collect anchored drawings mapped to their anchor paragraphs const anchoredByParagraph = collectAnchoredDrawings(blocks, measures); // PASS 1C: collect anchored/floating tables mapped to their anchor paragraphs. @@ -1832,6 +1869,61 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options state = paginator.startNewPage(); } + // Balance the ending section's fragments on this page BEFORE starting the + // new region. Word produces a minimum-height section, then places the next + // region just below the balanced columns. Without this, columns fill + // top-to-bottom and the next region starts far below where Word would place it. + // + // `activeSectionIndex` only updates at page boundaries, so for continuous + // mid-page section breaks it's stale. Instead, look at the current page's + // fragments and find the most recent section index — that's the section + // that's ending. Usually this is `metadataIndex - 1` (sections are sequential), + // but using blockSectionMap handles non-sequential indices too. + let endingSectionIndex: number | null = null; + for (let i = state.page.fragments.length - 1; i >= 0; i--) { + const mapped = blockSectionMap.get(state.page.fragments[i].blockId); + if (typeof mapped === 'number' && mapped !== metadataIndex) { + endingSectionIndex = mapped; + break; + } + } + const endingSectionColumns = + endingSectionIndex !== null ? sectionColumnsMap.get(endingSectionIndex) : undefined; + if (endingSectionIndex !== null && endingSectionColumns && endingSectionColumns.count > 1) { + // The current region starts at the last constraint boundary's Y, or at + // the page's top margin if no mid-page region change has happened yet. + const lastBoundary = state.constraintBoundaries[state.constraintBoundaries.length - 1]; + const activeRegionTop = lastBoundary?.y ?? activeTopMargin; + const availableHeight = activePageSize.h - activeBottomMargin - activeRegionTop; + const contentWidth = activePageSize.w - (activeLeftMargin + activeRightMargin); + const normalized = normalizeColumns(endingSectionColumns, contentWidth); + const balanceResult = balanceSectionOnPage({ + fragments: state.page.fragments as BalancingFragment[], + sectionIndex: endingSectionIndex, + sectionColumns: { + count: normalized.count, + gap: normalized.gap, + width: normalized.width, + widths: endingSectionColumns.widths, + equalWidth: endingSectionColumns.equalWidth, + }, + sectionHasExplicitColumnBreak: sectionHasExplicitColumnBreak.has(endingSectionIndex), + blockSectionMap, + margins: { left: activeLeftMargin }, + topMargin: activeRegionTop, + columnWidth: normalized.width, + availableHeight, + measureMap: balancingMeasureMap, + }); + if (balanceResult) { + // Collapse both cursors to the balanced section bottom so the new + // region starts there, not below an unbalanced tallest column. + state.cursorY = balanceResult.maxY; + state.maxCursorY = balanceResult.maxY; + alreadyBalancedSections.add(endingSectionIndex); + } + } + startMidPageRegion(state, newColumns); } @@ -2438,109 +2530,53 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } } - // Apply column balancing to pages with multi-column layout. - // This redistributes fragments to achieve balanced column heights, matching Word's behavior. - if (activeColumns.count > 1) { - const contentWidth = pageSize.w - (activeLeftMargin + activeRightMargin); - const normalizedCols = normalizeColumns(activeColumns, contentWidth); - - // Build measure map for fragment height calculation during balancing - const measureMap = new Map; height?: number }>(); - // Build blockId -> sectionIndex map to filter fragments by section - const blockSectionMap = new Map(); - const sectionColumnsMap = new Map(); - blocks.forEach((block, idx) => { - const measure = measures[idx]; - if (measure) { - measureMap.set(block.id, measure as { kind: string; lines?: Array<{ lineHeight: number }>; height?: number }); - } - // Track section index for each block (for filtering during balancing) - // Not all block types have attrs, so access it safely - const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; - const sectionIdx = blockWithAttrs.attrs?.sectionIndex; - if (typeof sectionIdx === 'number') { - blockSectionMap.set(block.id, sectionIdx); - if (block.kind === 'sectionBreak' && block.columns) { - sectionColumnsMap.set(sectionIdx, cloneColumnLayout(block.columns)); - } - } - }); - - for (const page of pages) { - // Balance the last page (section ends at document end). - // TODO: Track section boundaries and balance at each continuous section break. - if (page === pages[pages.length - 1] && page.fragments.length > 0) { - const finalSectionColumns = sectionColumnsMap.get(activeSectionIndex) ?? activeColumns; - // Word does not rebalance the final page for sections that use explicit - // per-column widths. Preserve the natural left-to-right fill order there. - const hasExplicitColumnWidths = - finalSectionColumns?.equalWidth === false && - Array.isArray(finalSectionColumns.widths) && - finalSectionColumns.widths.length > 0; - - if (hasExplicitColumnWidths) { - continue; - } - - // Skip balancing if fragments are already in multiple columns (e.g., explicit column breaks). - // Balancing should only apply when all content flows naturally in column 0. - const uniqueXPositions = new Set(page.fragments.map((f) => Math.round(f.x))); - const hasExplicitColumnStructure = uniqueXPositions.size > 1; - - if (hasExplicitColumnStructure) { - continue; - } - - // Skip balancing if fragments have different widths (indicating different column configs - // from multiple sections). Balancing would incorrectly apply the final section's width to all. - const uniqueWidths = new Set(page.fragments.map((f) => Math.round(f.width))); - const hasMixedColumnWidths = uniqueWidths.size > 1; - - if (hasMixedColumnWidths) { - continue; - } - - // Check if page has content from multiple sections. - // If so, only balance fragments from the final multi-column section. - const fragmentSections = new Set(); - for (const f of page.fragments) { - const section = blockSectionMap.get(f.blockId); - if (section !== undefined) { - fragmentSections.add(section); - } - } - - // Only balance fragments from the final section when there are mixed sections - const hasMixedSections = fragmentSections.size > 1; - const fragmentsToBalance = hasMixedSections - ? page.fragments.filter((f) => { - const fragSection = blockSectionMap.get(f.blockId); - return fragSection === activeSectionIndex; - }) - : page.fragments; - - if (fragmentsToBalance.length > 0) { - const availableHeight = pageSize.h - activeBottomMargin - activeTopMargin; - balancePageColumns( - fragmentsToBalance as { - x: number; - y: number; - width: number; - kind: string; - blockId: string; - fromLine?: number; - toLine?: number; - height?: number; - }[], - normalizedCols, - { left: activeLeftMargin }, - activeTopMargin, - availableHeight, - measureMap, - ); - } + // Apply column balancing per section. For each section with a multi-column layout, + // find the final page that carries any of its fragments and balance those fragments. + // Earlier pages of a multi-page section are always fully filled (content overflowed + // to reach them), so balancing is a no-op there. This replaces the previous + // "last page of document" heuristic with proper per-section balancing — required + // to match Word's behavior when a document has multiple multi-column sections + // separated by continuous or next-page breaks. + // + // Mid-page continuous breaks are handled in the layout loop itself (see the + // forceMidPageRegion branch above). This post-layout pass handles sections that + // end at a page boundary or at document end. + const contentWidth = pageSize.w - (activeLeftMargin + activeRightMargin); + for (const [sectionIdx, sectionCols] of sectionColumnsMap) { + if (sectionCols.count <= 1) continue; + if (sectionHasExplicitColumnBreak.has(sectionIdx)) continue; + if (alreadyBalancedSections.has(sectionIdx)) continue; + + // Find the last page carrying any fragments from this section. + let lastPageForSection: (typeof pages)[number] | null = null; + for (const p of pages) { + if (p.fragments.some((f) => blockSectionMap.get(f.blockId) === sectionIdx)) { + lastPageForSection = p; } } + if (!lastPageForSection) continue; + + const normalized = normalizeColumns(sectionCols, contentWidth); + const availableHeight = pageSize.h - activeBottomMargin - activeTopMargin; + + balanceSectionOnPage({ + fragments: lastPageForSection.fragments as BalancingFragment[], + sectionIndex: sectionIdx, + sectionColumns: { + count: normalized.count, + gap: normalized.gap, + width: normalized.width, + widths: sectionCols.widths, + equalWidth: sectionCols.equalWidth, + }, + sectionHasExplicitColumnBreak: false, // already filtered above + blockSectionMap, + margins: { left: activeLeftMargin }, + topMargin: activeTopMargin, + columnWidth: normalized.width, + availableHeight, + measureMap: balancingMeasureMap, + }); } // Serialize constraint boundaries into page.columnRegions so DomPainter can From e4265964d6f9f913b0c80926cc5a93dd7016605d Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 20 Apr 2026 15:08:15 -0300 Subject: [PATCH 2/4] fix(layout-engine): balance before forced page break on col-count reduction (SD-2452) When a mid-page section break reduced the column count (e.g. 2-col -> 1-col for test 4's 13-paragraph fixture followed by OVERLAP CHECK), the mid-page hook's forced-page-break guard ran before balancing: if (columnIndexBefore >= newColumns.count) { state = paginator.startNewPage(); } // ... balance ran here, on the empty new page At the section transition, columnIndexBefore=1 (paginator was in col 1) and newColumns.count=1, so the guard forced a new page before balancing had a chance to reposition the ending section's fragments. Balancing then ran on the empty new page (no-op), the paginator placed the post-columns single-column content on the new page, and the old page's fragments were balanced by the post-layout pass. Net effect: columns looked correct on page 0 but OVERLAP CHECK ended up on page 1, while Word fits everything on one page. The guard exists to prevent new 1-col content from overwriting earlier column content on the same page. With balancing, that risk disappears: all ending-section fragments are repositioned within the section's own vertical region, and the cursor moves to maxY below the balanced columns. The new region starts safely below. Fix: balance first. Only fall through to the forced-page-break guard when the ending section won't be balanced (single-col -> multi-col, explicit column break, or no section-1 fragments on the page). Test 4 now renders on a single page, matching Word: - 7+6 balanced columns - OVERLAP CHECK heading at y=758 (right below columns) - "If this overlaps..." at y=794 - Total: 1 page (was 2) All 5 SD-2452 fixtures now match Word's pagination exactly. 614 layout-engine tests still pass. --- .../layout-engine/layout-engine/src/index.ts | 53 +++++++++++-------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index d5ff50b28c..8776c0cfd4 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -1863,22 +1863,11 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options const columnIndexBefore = state.columnIndex; const newColumns = updatedState.pendingColumns; - // If reducing column count and currently in a column that won't exist - // in the new layout, start a fresh page to avoid overwriting earlier columns - if (columnIndexBefore >= newColumns.count) { - state = paginator.startNewPage(); - } - - // Balance the ending section's fragments on this page BEFORE starting the - // new region. Word produces a minimum-height section, then places the next - // region just below the balanced columns. Without this, columns fill - // top-to-bottom and the next region starts far below where Word would place it. - // + // Identify the ending section from the current page's fragments. // `activeSectionIndex` only updates at page boundaries, so for continuous - // mid-page section breaks it's stale. Instead, look at the current page's - // fragments and find the most recent section index — that's the section - // that's ending. Usually this is `metadataIndex - 1` (sections are sequential), - // but using blockSectionMap handles non-sequential indices too. + // mid-page section breaks it's stale. Walk back through page fragments + // to find the most recent section index that isn't the new one — that's + // the section that's ending. let endingSectionIndex: number | null = null; for (let i = state.page.fragments.length - 1; i >= 0; i--) { const mapped = blockSectionMap.get(state.page.fragments[i].blockId); @@ -1889,25 +1878,40 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } const endingSectionColumns = endingSectionIndex !== null ? sectionColumnsMap.get(endingSectionIndex) : undefined; - if (endingSectionIndex !== null && endingSectionColumns && endingSectionColumns.count > 1) { + const willBalance = + endingSectionIndex !== null && + !!endingSectionColumns && + endingSectionColumns.count > 1 && + !sectionHasExplicitColumnBreak.has(endingSectionIndex); + + // Balance BEFORE any forced page break. After balancing, all of the + // ending section's fragments are repositioned within the section's own + // vertical region — there's no risk of the new 1-col region overwriting + // prior column content, because the cursor moves to maxY below them. + // + // When not balancing (e.g. single-col → multi-col, or explicit column + // break), fall back to the original "force a new page if currently in a + // column that won't exist after the change" guard so new content doesn't + // overwrite earlier column positions on the same page. + if (willBalance) { // The current region starts at the last constraint boundary's Y, or at // the page's top margin if no mid-page region change has happened yet. const lastBoundary = state.constraintBoundaries[state.constraintBoundaries.length - 1]; const activeRegionTop = lastBoundary?.y ?? activeTopMargin; const availableHeight = activePageSize.h - activeBottomMargin - activeRegionTop; const contentWidth = activePageSize.w - (activeLeftMargin + activeRightMargin); - const normalized = normalizeColumns(endingSectionColumns, contentWidth); + const normalized = normalizeColumns(endingSectionColumns!, contentWidth); const balanceResult = balanceSectionOnPage({ fragments: state.page.fragments as BalancingFragment[], - sectionIndex: endingSectionIndex, + sectionIndex: endingSectionIndex!, sectionColumns: { count: normalized.count, gap: normalized.gap, width: normalized.width, - widths: endingSectionColumns.widths, - equalWidth: endingSectionColumns.equalWidth, + widths: endingSectionColumns!.widths, + equalWidth: endingSectionColumns!.equalWidth, }, - sectionHasExplicitColumnBreak: sectionHasExplicitColumnBreak.has(endingSectionIndex), + sectionHasExplicitColumnBreak: false, blockSectionMap, margins: { left: activeLeftMargin }, topMargin: activeRegionTop, @@ -1920,8 +1924,13 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options // region starts there, not below an unbalanced tallest column. state.cursorY = balanceResult.maxY; state.maxCursorY = balanceResult.maxY; - alreadyBalancedSections.add(endingSectionIndex); + alreadyBalancedSections.add(endingSectionIndex!); } + } else if (columnIndexBefore >= newColumns.count) { + // Non-balancing case: reducing column count without balancing means + // starting the new region at col 0 could overwrite earlier column + // content. Force a fresh page to avoid that. + state = paginator.startNewPage(); } startMidPageRegion(state, newColumns); From 71fb404e1b70e87a0a43116c9cf7a4a9fb81a1c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadeu=20Tupinamb=C3=A1?= Date: Thu, 30 Apr 2026 15:07:03 -0300 Subject: [PATCH 3/4] fix: balance earlier pages of multi-page 2-col continuous sections (SD-2646) (#2930) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(pm-adapter): emit section break before non-paragraph nodes (SD-2646) Per ECMA-376 §17.6.17, a inside a paragraph defines the section that ENDS with that paragraph. All body children preceding it — paragraphs, tables, top-level drawings, SDTs — belong to that section. Section ranges were indexed purely by paragraph count, and section-break blocks were emitted only inside handleParagraphNode. A table that sat between two sectPr-marker paragraphs was emitted into the flow stream BEFORE the section break that declared its column config, so the layout engine laid it out under the prior section's settings. This is the root cause of IT-945 rendering a 114-row 2-col continuous table in column 0 across three pages with column 1 empty: the table was placed in the 1-col section, not the 2-col section. Fix: - Track nodeIndex over every top-level doc.content child in findParagraphsWithSectPr and SectionRange (alongside paragraphIndex, which SDT handlers still use for intra-SDT transitions). - Add maybeEmitNextSectionBreakForNode in sections/breaks.ts and call it from internal.ts's main dispatch loop BEFORE every top-level handler. Any non-paragraph node crossing a section boundary now triggers the break. - Section-model primer in pm-adapter/README.md with spec citations. Tests: 1739/1739 pass in pm-adapter (including new end-tagged.test.ts and integration test in index.test.ts asserting flow-block order). * fix(layout-engine): split dominant table at row boundary when balancing section-final page (SD-2646) The column balancer treats each fragment as an atomic block. A multi-page two-column continuous section's final page can end up with a single table fragment taller than totalSectionHeight / columnCount. The atomic-block binary search then places the whole table in one column and leaves the other empty — diverging from Word, which balances by splitting the table at a row boundary per ECMA-376 §17.18.77 ("a continuous section break balances the content of the previous section"). Fix: add splitDominantTableAtRowBoundary as a preprocessor inside balanceSectionOnPage. When the section has a single splittable table fragment larger than target, split it at the row whose cumulative height first meets or exceeds totalSectionHeight / columnCount. The two halves are inserted in place of the original; the rest of the balancer runs unchanged and naturally assigns one to each column. Also add getBalancingHeight so empty sectPr-marker paragraphs (measured lines with width=0) contribute 0 to balancing — matching Word's behavior of not rendering an empty line for such markers. This keeps both columns top-aligned on the section-final page. On IT-945: page 2 now splits 14/14 from y=96 in both columns, matching Word's top-alignment. Before this fix page 2 rendered all 28 remaining rows in col 1 with col 0 empty. Tests: strengthened existing "balances the section-ending page" test (it was passing trivially via `if (sectionFragments.length > 1)` guard). Added narrow-table multi-page regression test. 616/616 pass. --- .../layout-engine/src/column-balancing.ts | 242 +++++++++++++++++- .../layout-engine/src/index.test.ts | 109 ++++++-- packages/layout-engine/pm-adapter/README.md | 32 +++ .../pm-adapter/src/index.test.ts | 66 +++++ .../pm-adapter/src/internal.test.ts | 4 + .../layout-engine/pm-adapter/src/internal.ts | 22 +- .../pm-adapter/src/sections/analysis.test.ts | 49 ++-- .../pm-adapter/src/sections/analysis.ts | 62 ++++- .../pm-adapter/src/sections/breaks.ts | 40 +++ .../src/sections/end-tagged.test.ts | 118 +++++++++ .../pm-adapter/src/sections/index.ts | 1 + .../pm-adapter/src/sections/types.ts | 15 +- .../layout-engine/pm-adapter/src/types.d.ts | 1 + .../layout-engine/pm-adapter/src/types.ts | 7 + 14 files changed, 714 insertions(+), 54 deletions(-) create mode 100644 packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index 94466f9b57..d70e4bd2c8 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -573,6 +573,49 @@ function getFragmentHeight(fragment: BalancingFragment, measureMap: Map` + * has no text content — all measured lines have `width === 0`. Word does + * NOT render an empty line for such markers, so they take no vertical space + * in the balanced layout. Treating them as balance-neutral prevents them + * from pushing their column's cursor down and unbalancing the section's + * final page against Word's output (ECMA-376 §17.18.77). + * + * For every other paragraph, image, drawing, or table this delegates to + * the standard `getFragmentHeight` so existing behavior is preserved. + */ +function getBalancingHeight(fragment: BalancingFragment, measureMap: Map): number { + if (fragment.kind === 'para') { + const measure = measureMap.get(fragment.blockId) as + | (MeasureData & { lines?: Array<{ lineHeight: number; width?: number }> }) + | undefined; + if (measure?.kind === 'paragraph' && Array.isArray(measure.lines) && measure.lines.length > 0) { + const fromLine = fragment.fromLine ?? 0; + const toLine = fragment.toLine ?? measure.lines.length; + // A sectPr-marker paragraph is empty: every measured line has an + // EXPLICIT width of 0 (measuring-dom sets width per line). Treat + // undefined width as "has content" so synthetic test fixtures and any + // other callers that don't set line.width keep their existing height. + let allEmpty = true; + let sawAnyLine = false; + for (let i = fromLine; i < toLine; i++) { + const line = measure.lines[i]; + if (!line) continue; + sawAnyLine = true; + if (line.width !== 0) { + allEmpty = false; + break; + } + } + if (sawAnyLine && allEmpty) return 0; + } + } + return getFragmentHeight(fragment, measureMap); +} + /** * Balances column content on a page by redistributing fragments. * @@ -796,6 +839,26 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu const remainingHeight = args.availableHeight - (sectionTopY - args.topMargin); if (remainingHeight <= 0) return null; + // Pre-split a dominant table fragment at a row boundary before balancing. + // + // When a section's final page contains a single splittable table that's + // taller than (totalSectionHeight / columnCount), the atomic-block balancer + // can only place the whole table in one column — leaving the other column + // empty. Word's behavior per ECMA-376 §17.18.77 is to balance the + // REMAINING content, which for a narrow table means splitting at a row + // boundary so both columns carry roughly half the rows. + // + // We split ONCE per balance call (we only need two halves for a 2-col + // section; extending to N > 2 would iterate). SD-2646: IT-945 page 2 has a + // 515px / 28-row table; splitting into ~257px halves lets the balancer + // assign half to each column. + splitDominantTableAtRowBoundary({ + sectionFragments, + fragments, + columnCount, + measureMap: args.measureMap, + }); + // Order fragments in document order: by current column (x → left-to-right), // then by y within each column. During unbalanced layout the paginator fills // column 0 top-to-bottom, then column 1, etc. — so (x, y) preserves the @@ -809,9 +872,13 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu // by y (as balancePageColumns does) would collapse fragments from different // source columns that happen to share a y coordinate into a single row and // re-stack them at one position — producing overlap. + // + // Use `getBalancingHeight` so empty sectPr-marker paragraphs contribute 0 + // to their column's cursor — matching Word's behavior of not rendering a + // blank line for such markers. const contentBlocks: BalancingBlock[] = ordered.map((f, i) => ({ blockId: `${f.blockId}#${i}`, - measuredHeight: getFragmentHeight(f, args.measureMap), + measuredHeight: getBalancingHeight(f, args.measureMap), canBreak: false, keepWithNext: false, keepTogether: true, @@ -850,3 +917,176 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu } return { maxY }; } + +/** + * Table measure shape used by the row-split preprocessor. + * + * Only the row-heights array is required. We access it through the runtime + * `measureMap` stored as `MeasureData`, which narrows the interface for the + * public balancer API — but the stored value is the full `TableMeasure` + * containing `rows: [{ height }]`, so a cast is safe. + */ +interface TableMeasureLike { + kind: string; + rows?: Array<{ height: number }>; +} + +/** + * Row-boundary record used by the renderer to draw horizontal dividers. + * Mirrors the subset of `TableFragmentMetadata['rowBoundaries']` that the + * split helper needs to read and regenerate. + */ +interface RowBoundaryLike { + i: number; + y: number; + h: number; + min: number; + r: number; +} + +/** + * In-place split of a dominant table fragment at a row boundary. + * + * Problem this solves: the column balancer treats each fragment as an atomic + * block. A multi-page two-column continuous section's final page can end up + * with a single table fragment that exceeds half the section's height. The + * balancer then places the whole table in one column and leaves the other + * empty — diverging from Word, which balances by splitting the table at a + * row boundary (ECMA-376 §17.18.77: continuous breaks balance the previous + * section's content). + * + * This preprocessor runs once per `balanceSectionOnPage` call. It detects a + * single dominant table fragment and splits it at the row whose cumulative + * height first meets or exceeds totalSectionHeight / columnCount. The two + * halves are inserted into both `sectionFragments` and the page's + * `fragments` array in place of the original; the rest of the balancer then + * runs on N + 1 similarly-sized blocks and naturally assigns one to each + * column. + * + * Guards: + * - Only one splittable table fragment on the section's page (skip if 0 or >1). + * - Table must span at least 2 rows (can't split a 1-row fragment). + * - Total height must exceed target (= total / columnCount) by more than a + * small epsilon; otherwise the atomic balancer already fits. + * + * Splitting is NOT appropriate when the author placed explicit column breaks + * or used unequal columns — those cases are already filtered by the caller. + * + * @param args Section fragments (already filtered to this section), the + * full page fragments (mutated in place), column count, and the + * measure map. + */ +function splitDominantTableAtRowBoundary(args: { + sectionFragments: BalancingFragment[]; + fragments: BalancingFragment[]; + columnCount: number; + measureMap: Map; +}): void { + const { sectionFragments, fragments, columnCount, measureMap } = args; + if (columnCount <= 1) return; + + const tables = sectionFragments.filter((f) => f.kind === 'table'); + if (tables.length !== 1) return; + const table = tables[0] as BalancingFragment & { + fromRow?: number; + toRow?: number; + height?: number; + continuesFromPrev?: boolean; + continuesOnNext?: boolean; + metadata?: { rowBoundaries?: RowBoundaryLike[]; columnBoundaries?: unknown; coordinateSystem?: string }; + }; + const fromRow = table.fromRow ?? 0; + const toRow = table.toRow ?? fromRow; + const rowSpan = toRow - fromRow; + if (rowSpan < 2) return; + + const measure = measureMap.get(table.blockId) as TableMeasureLike | undefined; + if (!measure || measure.kind !== 'table' || !Array.isArray(measure.rows)) return; + + const totalSectionHeight = sectionFragments.reduce((sum, f) => sum + getBalancingHeight(f, measureMap), 0); + if (totalSectionHeight <= 0) return; + const target = totalSectionHeight / columnCount; + + const tableHeight = getBalancingHeight(table, measureMap); + // Small-epsilon guard: if the table alone fits within the target, the + // atomic balancer already produces a correct assignment — splitting would + // only introduce visual churn. + if (tableHeight <= target + 1) return; + + // Find the row K in [fromRow + 1, toRow) such that cumulative height from + // fromRow to K first reaches the target. Guaranteed to succeed because + // tableHeight > target and rowSpan ≥ 2. + let running = 0; + let splitRow = fromRow + 1; + for (let r = fromRow; r < toRow - 1; r++) { + const h = measure.rows[r]?.height ?? 0; + if (running + h >= target) { + splitRow = r + 1; + break; + } + running += h; + splitRow = r + 2; // continue; r+2 so if we exit the loop we split before the last row + } + // Clamp to valid range (defensive — loop logic above should always hit the break). + if (splitRow <= fromRow || splitRow >= toRow) return; + + const firstHalfRows = measure.rows.slice(fromRow, splitRow); + const secondHalfRows = measure.rows.slice(splitRow, toRow); + const firstHalfHeight = firstHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + const secondHalfHeight = secondHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + + // Regenerate rowBoundaries so the renderer draws horizontal dividers at the + // right y-offsets inside each half. rowBoundaries are 0-origin within the + // fragment; we walk the measure's rows for each half and accumulate. + const makeRowBoundaries = (rows: Array<{ height: number }>, startIndex: number): RowBoundaryLike[] => { + const out: RowBoundaryLike[] = []; + let y = 0.5; + for (let i = 0; i < rows.length; i++) { + const h = rows[i].height ?? 0; + out.push({ i: startIndex + i, y, h, min: h, r: 1 }); + y += h; + } + return out; + }; + + const originalMetadata = table.metadata; + const firstMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(firstHalfRows, 0), + } + : undefined; + const secondMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(secondHalfRows, 0), + } + : undefined; + + // Construct the first half by mutating the original (preserves object + // identity so `fragments.indexOf(table)` still works below). + table.toRow = splitRow; + table.height = firstHalfHeight; + table.continuesOnNext = true; + if (firstMetadata) table.metadata = firstMetadata; + + const secondHalf: BalancingFragment = { + ...table, + fromRow: splitRow, + toRow, + height: secondHalfHeight, + continuesFromPrev: true, + continuesOnNext: table.continuesOnNext ? false : (table.continuesOnNext ?? false), + metadata: secondMetadata ?? table.metadata, + } as BalancingFragment; + // Preserve the original's continuesOnNext intent: if the original fragment + // continued onto a later page, the SECOND half now inherits that role. + (secondHalf as { continuesOnNext?: boolean }).continuesOnNext = false; + + // Insert the second half right after the first in both arrays so the + // balancer's (x, y) ordering keeps them adjacent in document order. + const fragIdx = fragments.indexOf(table); + if (fragIdx >= 0) fragments.splice(fragIdx + 1, 0, secondHalf); + const sectIdx = sectionFragments.indexOf(table); + if (sectIdx >= 0) sectionFragments.splice(sectIdx + 1, 0, secondHalf); +} diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index 9a7b0a71a2..6d4d8f2f27 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2964,29 +2964,104 @@ describe('layoutDocument', () => { expect(afterSectionPara!.y).toBe(expectedBalancedBottom); }); - it('balances the section-ending page even when the section spans multiple pages', () => { - // A section big enough to fill page 1 completely then spill onto page 2. - // Word balances only the FINAL page (earlier pages are already full). - // Use paragraphs each large enough that col-1 alone will overflow. - const lineHeight = 120; - const paraCount = 10; + it('fills BOTH columns on every page of a multi-page 2-col continuous section', () => { + // ECMA-376 §17.18.77 (ST_SectionMark): a continuous section break + // "balances content of the previous section." Word's observable behavior + // is to fill col 0 to the balanced target, wrap to col 1 to the same + // target, then wrap to the next page — on EVERY page, not only the last. + // Regression for SD-2646: earlier pages must not stack content in col 0. + const lineHeight = 20; + const paraCount = 100; // ≈ 2000px, exceeds one page's content area const { blocks, measures } = buildTwoColumnSection(paraCount, lineHeight); const layout = layoutDocument(blocks, measures, PAGE); - // Multiple pages due to content size. - expect(layout.pages.length).toBeGreaterThan(1); + expect(layout.pages.length).toBeGreaterThanOrEqual(2); - // On the final page, fragments of the section should span both columns. - const finalPage = layout.pages[layout.pages.length - 1]; - const sectionFragments = finalPage.fragments.filter( - (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', - ); - if (sectionFragments.length > 1) { - const uniqueX = new Set(sectionFragments.map((f) => Math.round(f.x))); - // Either balanced into both columns, or single-column if only 1 fragment - expect(uniqueX.size).toBeGreaterThanOrEqual(1); + for (const page of layout.pages) { + const sectionFragments = page.fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + if (sectionFragments.length < 2) continue; // tail of last page may have <2 fragments + const col0 = sectionFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + expect(col0.length).toBeGreaterThan(0); + expect(col1.length).toBeGreaterThan(0); + } + }); + + it('flows a narrow multi-page table across both columns on every page (SD-2646 regression)', () => { + // IT-945 shape: a narrow table (one column wide) inside a 2-col continuous + // section, spanning multiple pages. Regression guard for the layout path + // once pm-adapter correctly places the table in the 2-col section. + const rowCount = 114; + const rowHeight = 18.4; + const cellWidth = TWO_COL_WIDTH / 2; + + const rows = Array.from({ length: rowCount }, (_, r) => ({ + id: `tbl-row-${r}`, + cells: [ + { id: `tbl-cell-${r}-0`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-0-p`, runs: [] } }, + { id: `tbl-cell-${r}-1`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-1-p`, runs: [] } }, + ], + })); + const tbl: TableBlock = { + kind: 'table', + id: 'tbl', + rows, + attrs: { sectionIndex: 0 }, + } as TableBlock; + const tblM: TableMeasure = { + kind: 'table', + rows: Array.from({ length: rowCount }, () => ({ + height: rowHeight, + cells: [ + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + ], + })), + columnWidths: [cellWidth, cellWidth], + totalWidth: cellWidth * 2, + totalHeight: rowHeight * rowCount, + }; + + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + tbl as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }, tblM, { kind: 'sectionBreak' }]; + + const layout = layoutDocument(blocks, measures, PAGE); + + expect(layout.pages.length).toBeGreaterThanOrEqual(2); + + let pagesWithBothColumns = 0; + for (const page of layout.pages) { + const tableFragments = page.fragments.filter((f) => f.kind === 'table'); + if (tableFragments.length === 0) continue; + const col0 = tableFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = tableFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + if (col0.length > 0 && col1.length > 0) pagesWithBothColumns++; } + // At least one page must have fragments in both columns. A stricter + // assertion (every page) is made invalid by the tail of the final page + // which can reasonably hold <1 column's worth of content. + expect(pagesWithBothColumns).toBeGreaterThan(0); }); it('distributes 6 paragraphs across 3 columns (no column is empty)', () => { diff --git a/packages/layout-engine/pm-adapter/README.md b/packages/layout-engine/pm-adapter/README.md index 27c0d1591c..e0653df917 100644 --- a/packages/layout-engine/pm-adapter/README.md +++ b/packages/layout-engine/pm-adapter/README.md @@ -23,3 +23,35 @@ Notes: - Handles lists, tables, images, vector shapes, SDT (TOC, structured content, doc parts), tracked changes, hyperlinks. - Consumes `@superdoc/style-engine` defaults and locale/tab interval hints from the PM document attrs. - `@superdoc/measuring-dom` is only a dependency for types; measurement happens later in the pipeline. + +## Section model (ECMA-376 §17.6, §17.18.77) + +Word uses **end-tagged** section semantics: a `` inside a paragraph defines the section that **ends with** that paragraph. The body-level `` defines the final section. All body children preceding a section-terminating paragraph — other paragraphs, **tables**, top-level drawings, SDT wrappers — belong to the section whose sectPr follows them. + +``` + + This is section A's first paragraph + + (section A props: 1-col, nextPage) + + ... ← section B (the NEXT sectPr) + + (section B props: 2-col continuous) + + This is section C's paragraph + (section C props: body-level) + +``` + +Section ranges are computed in `sections/analysis.ts` with two parallel indices: + +| Index | What it counts | Used by | +|---|---|---| +| `startParagraphIndex` / `endParagraphIndex` | Paragraph nodes only (including those inside SDT wrappers) | SDT handlers for intra-SDT section transitions | +| `startNodeIndex` / `endNodeIndex` | Every top-level `doc.content` child — paragraphs, tables, top-level drawings, SDTs | Main dispatch loop for inter-node section transitions | + +The main dispatch loop in `internal.ts` calls `maybeEmitNextSectionBreakForNode` BEFORE handling each top-level node. The helper uses `currentNodeIndex === nextSection.startNodeIndex` as its trigger, so non-paragraph nodes (tables especially) correctly get the sectionBreak emitted before them when they cross a section boundary. This is the fix for SD-2646. + +**When adding a new top-level block kind** (e.g., a new SDT type or a top-level drawing kind): do nothing. The dispatch-level hook covers you automatically. You only need a per-handler section check if your handler descends into children that carry their own sectPr markers — see the SDT handlers (`sdt/bibliography.ts`, `sdt/document-index.ts`, `sdt/table-of-authorities.ts`) for the intra-SDT pattern. + +Continuous section breaks (``) carry an extra requirement from §17.18.77: they "balance the content of the previous section." The balancing itself is the layout engine's responsibility (see `layout-engine/src/column-balancing.ts`); the adapter's job is only to ensure the right blocks are in the right section. diff --git a/packages/layout-engine/pm-adapter/src/index.test.ts b/packages/layout-engine/pm-adapter/src/index.test.ts index 441b210e73..b22a0de17c 100644 --- a/packages/layout-engine/pm-adapter/src/index.test.ts +++ b/packages/layout-engine/pm-adapter/src/index.test.ts @@ -1269,6 +1269,72 @@ describe('toFlowBlocks', () => { [first, second, third].forEach((b) => expect(b?.type).toBe('continuous')); }); }); + + describe('end-tagged section membership for non-paragraph nodes (SD-2646, ECMA-376 §17.6.17)', () => { + it('emits the next section break BEFORE a table that sits between two sectPr-marker paragraphs', () => { + // IT-945 shape: table lives between the paragraph that ends section A + // and the paragraph that ends section B. Per §17.6.17 the table + // belongs to section B, so the sectionBreak introducing B's columns + // must precede the table in the flow stream. + const pmDoc: PMNode = { + type: 'doc', + attrs: { bodySectPr: createTestBodySectPr() }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'This is my first section' }] }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'nextPage' } }, + { name: 'w:cols', attributes: { 'w:num': '1', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }] }, + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }] }, + ], + }, + ], + }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'continuous' } }, + { name: 'w:cols', attributes: { 'w:num': '2', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { type: 'paragraph', content: [{ type: 'text', text: 'This is my third section' }] }, + ], + } as never; + + const { blocks } = toFlowBlocks(pmDoc, { emitSectionBreaks: true }); + + const tableIndex = blocks.findIndex((b) => b.kind === 'table'); + const twoColBreakIndex = blocks.findIndex((b) => b.kind === 'sectionBreak' && b.columns?.count === 2); + expect(tableIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeLessThan(tableIndex); + }); + }); }); describe('block id prefixing', () => { diff --git a/packages/layout-engine/pm-adapter/src/internal.test.ts b/packages/layout-engine/pm-adapter/src/internal.test.ts index e1972f240b..4c4eda6863 100644 --- a/packages/layout-engine/pm-adapter/src/internal.test.ts +++ b/packages/layout-engine/pm-adapter/src/internal.test.ts @@ -59,6 +59,10 @@ vi.mock('./sections/index.js', () => { id: nextBlockId('sectionBreak'), type: section.type, })), + maybeEmitNextSectionBreakForNode: vi.fn(() => { + // Mocked as no-op: this test file already provides zero section ranges + // via analyzeSectionRanges, so there is never a break to emit. + }), publishSectionMetadata: vi.fn(), }; }); diff --git a/packages/layout-engine/pm-adapter/src/internal.ts b/packages/layout-engine/pm-adapter/src/internal.ts index 1223d7ec7e..b7dde6420c 100644 --- a/packages/layout-engine/pm-adapter/src/internal.ts +++ b/packages/layout-engine/pm-adapter/src/internal.ts @@ -12,7 +12,12 @@ import type { FlowBlock, ParagraphBlock } from '@superdoc/contracts'; import { isValidTrackedMode } from './tracked-changes.js'; -import { analyzeSectionRanges, createSectionBreakBlock, publishSectionMetadata } from './sections/index.js'; +import { + analyzeSectionRanges, + createSectionBreakBlock, + maybeEmitNextSectionBreakForNode, + publishSectionMetadata, +} from './sections/index.js'; import { normalizePrefix, buildPositionMap, createBlockIdGenerator } from './utilities.js'; import { paragraphToFlowBlocks, @@ -208,6 +213,7 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): ranges: sectionRanges, currentSectionIndex: 0, currentParagraphIndex: 0, + currentNodeIndex: 0, }, converters, themeColors, @@ -216,12 +222,24 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): trackedListLastOrdinals: new Map(), }; - // Process nodes using handler dispatch pattern + // Process nodes using handler dispatch pattern. Before each top-level node + // we flush any pending section break whose `startNodeIndex` matches the + // current position — this keeps non-paragraph nodes (tables, top-level + // drawings) in their correct end-tagged section per ECMA-376 §17.6.17. doc.content.forEach((node) => { + maybeEmitNextSectionBreakForNode({ + sectionState: handlerContext.sectionState!, + nextBlockId, + pushBlock: (block) => { + blocks.push(block); + recordBlockKind(block.kind); + }, + }); const handler = nodeHandlers[node.type]; if (handler) { handler(node, handlerContext); } + handlerContext.sectionState!.currentNodeIndex++; }); // Ensure final body section is emitted only if not already emitted during paragraph processing. diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts index ac860ce7e6..97b6b5b2d7 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts @@ -296,8 +296,9 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 1, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(2); }); it('should find multiple paragraphs with sectPr', () => { @@ -315,12 +316,13 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(2); - expect(result.paragraphs[0]).toEqual({ index: 0, node: para1 }); - expect(result.paragraphs[1]).toEqual({ index: 2, node: para3 }); + expect(result.paragraphs[0]).toEqual({ index: 0, nodeIndex: 0, node: para1 }); + expect(result.paragraphs[1]).toEqual({ index: 2, nodeIndex: 2, node: para3 }); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(3); }); - it('should skip non-paragraph nodes', () => { + it('should skip non-paragraph nodes for paragraphIndex but count them in nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; @@ -334,11 +336,15 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + // paragraphIndex = 1 (second paragraph), nodeIndex = 2 (third top-level child + // after the heading). Non-paragraph top-level children count toward nodeIndex + // because ECMA-376 §17.6.17 sections span all body children. + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 2, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(4); }); - it('should include paragraphs inside index nodes', () => { + it('should include paragraphs inside index nodes and share the SDT nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; const para3 = { type: 'paragraph', content: [] }; @@ -352,8 +358,11 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); - expect(result.paragraphs).toEqual([{ index: 1, node: para2 }]); + // para2 is inside the SDT (index node) at top-level nodeIndex 1. + // paragraphIndex 1 still reflects paragraph order across descent. + expect(result.paragraphs).toEqual([{ index: 1, nodeIndex: 1, node: para2 }]); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(2); }); }); @@ -385,7 +394,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue(null); @@ -408,7 +417,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr', elements: [] }; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -450,8 +459,8 @@ describe('analysis', () => { }; const paragraphs = [ - { index: 2, node: para1 }, - { index: 5, node: para2 }, + { index: 2, nodeIndex: 2, node: para1 }, + { index: 5, nodeIndex: 5, node: para2 }, ]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr' }; @@ -487,7 +496,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.NEXT_PAGE, @@ -512,7 +521,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mock section data with page margins but no header/footer margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -550,7 +559,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only header margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -583,7 +592,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only footer margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -616,7 +625,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only top margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -649,7 +658,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Explicit zero values for all margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -688,7 +697,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mix of header/footer and page margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -725,7 +734,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ titlePg: false, @@ -747,7 +756,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.CONTINUOUS, diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.ts index 3a0cae9013..73905c2c84 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.ts @@ -65,15 +65,27 @@ export function shouldIgnoreSectionBreak( /** * Find all paragraphs in the document that contain sectPr elements. * + * Records two indices per match: + * - `paragraphIndex` counts only paragraph nodes (including those nested + * inside SDT wrappers), matching the long-standing contract callers use + * for SDT-internal section transitions. + * - `nodeIndex` counts every top-level `doc.content` child (paragraph, + * table, top-level drawing, SDT, …). This is required to fix SD-2646: + * a non-paragraph node between two sectPr markers belongs to the LATER + * marker's section per ECMA-376 §17.6.17, so the dispatch loop must + * know each section's bounds in top-level-node terms. + * * @param doc - ProseMirror document node - * @returns Object containing paragraphs with sectPr and total paragraph count + * @returns Paragraph matches plus both totals */ export function findParagraphsWithSectPr(doc: PMNode): { - paragraphs: Array<{ index: number; node: PMNode }>; + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>; totalCount: number; + totalNodeCount: number; } { - const paragraphs: Array<{ index: number; node: PMNode }> = []; + const paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }> = []; let paragraphIndex = 0; + let nodeIndex = 0; const getNodeChildren = (node: PMNode): PMNode[] => { if (Array.isArray(node.content)) return node.content; const content = node.content as { forEach?: (cb: (child: PMNode) => void) => void } | undefined; @@ -87,27 +99,30 @@ export function findParagraphsWithSectPr(doc: PMNode): { return []; }; - const visitNode = (node: PMNode): void => { + const visitNode = (node: PMNode, outerNodeIndex: number): void => { if (node.type === 'paragraph') { if (hasSectPr(node)) { - paragraphs.push({ index: paragraphIndex, node }); + paragraphs.push({ index: paragraphIndex, nodeIndex: outerNodeIndex, node }); } paragraphIndex++; return; } if (node.type === 'index' || node.type === 'bibliography' || node.type === 'tableOfAuthorities') { - getNodeChildren(node).forEach(visitNode); + // SDT descendants share the outer SDT's nodeIndex — dispatch-level + // section transitions fire on the SDT as a whole, not on its children. + getNodeChildren(node).forEach((child) => visitNode(child, outerNodeIndex)); } }; if (doc.content) { for (const node of doc.content) { - visitNode(node); + visitNode(node, nodeIndex); + nodeIndex++; } } - return { paragraphs, totalCount: paragraphIndex }; + return { paragraphs, totalCount: paragraphIndex, totalNodeCount: nodeIndex }; } /** @@ -128,11 +143,12 @@ export function findParagraphsWithSectPr(doc: PMNode): { * @returns Array of section ranges */ export function buildSectionRangesFromParagraphs( - paragraphs: Array<{ index: number; node: PMNode }>, + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>, hasBodySectPr: boolean, ): SectionRange[] { const ranges: SectionRange[] = []; let currentStart = 0; + let currentStartNode = 0; paragraphs.forEach((item, idx) => { if (shouldIgnoreSectionBreak(item.node, idx, paragraphs.length, hasBodySectPr)) { @@ -154,6 +170,8 @@ export function buildSectionRangesFromParagraphs( const range: SectionRange = { sectionIndex: idx, + startNodeIndex: currentStartNode, + endNodeIndex: item.nodeIndex, startParagraphIndex: currentStart, endParagraphIndex: item.index, sectPr, @@ -180,6 +198,7 @@ export function buildSectionRangesFromParagraphs( ranges.push(range); currentStart = item.index + 1; + currentStartNode = item.nodeIndex + 1; }); return ranges; @@ -228,6 +247,7 @@ export function createFinalSectionFromBodySectPr( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange | null { const clampedStart = Math.max(0, Math.min(currentStart, Math.max(totalParagraphs - 1, 0))); @@ -251,8 +271,15 @@ export function createFinalSectionFromBodySectPr( bodySectionData.bottomPx != null || bodySectionData.leftPx != null; + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds + ? Math.max(0, Math.min(nodeBounds.startNodeIndex, Math.max(totalNodes - 1, 0))) + : clampedStart; + return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: clampedStart, endParagraphIndex: totalParagraphs - 1, sectPr: bodySectPr, @@ -291,9 +318,14 @@ export function createDefaultFinalSection( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange { + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds?.startNodeIndex ?? currentStart; return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: currentStart, endParagraphIndex: totalParagraphs - 1, sectPr: null, @@ -318,11 +350,13 @@ export function createDefaultFinalSection( * @returns Array of section ranges with backward-looking semantics */ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): SectionRange[] { - const { paragraphs, totalCount } = findParagraphsWithSectPr(doc); + const { paragraphs, totalCount, totalNodeCount } = findParagraphsWithSectPr(doc); const hasBody = Boolean(bodySectPr); const ranges = buildSectionRangesFromParagraphs(paragraphs, hasBody); - const currentStart = ranges.length > 0 ? ranges[ranges.length - 1].endParagraphIndex + 1 : 0; + const last = ranges[ranges.length - 1]; + const currentStart = last ? last.endParagraphIndex + 1 : 0; + const currentStartNode = last ? last.endNodeIndex + 1 : 0; // Always represent the final section defined by bodySectPr, even if there are // no remaining paragraphs after the last paragraph-level sectPr. This ensures @@ -333,12 +367,16 @@ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): Section Math.min(currentStart, totalCount), totalCount, ranges.length, + { startNodeIndex: Math.min(currentStartNode, totalNodeCount), totalNodeCount }, ); if (finalSection) { ranges.push(finalSection); } } else if (ranges.length > 0) { - const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length); + const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length, { + startNodeIndex: Math.min(currentStartNode, totalNodeCount), + totalNodeCount, + }); if (fallbackFinal) { fallbackFinal.type = DEFAULT_PARAGRAPH_SECTION_TYPE; ranges.push(fallbackFinal); diff --git a/packages/layout-engine/pm-adapter/src/sections/breaks.ts b/packages/layout-engine/pm-adapter/src/sections/breaks.ts index 8501b2230a..8a05383174 100644 --- a/packages/layout-engine/pm-adapter/src/sections/breaks.ts +++ b/packages/layout-engine/pm-adapter/src/sections/breaks.ts @@ -190,3 +190,43 @@ export function shouldRequirePageBoundary(current: SectionRange, next: SectionRa export function hasIntrinsicBoundarySignals(_: SectionRange): boolean { return false; } + +/** + * Emit the next section's sectionBreak block if the dispatch loop has reached + * that section's starting top-level node index. + * + * ECMA-376 §17.6.17: a section is defined by its end-tagged ``. All + * body children preceding that tag — paragraphs, tables, top-level drawings — + * belong to the section that ENDS at the tag. This helper fires BEFORE any + * such node so the appropriate section config is active by the time the node + * is laid out. + * + * This centralises the check that previously lived (duplicated) inside + * `handleParagraphNode` and the SDT handlers. Calling it from the main + * dispatch loop covers every top-level node type — present and future — + * with no per-handler opt-in. + */ +export function maybeEmitNextSectionBreakForNode(args: { + sectionState: { + ranges: SectionRange[]; + currentSectionIndex: number; + currentNodeIndex: number; + }; + nextBlockId: BlockIdGenerator; + pushBlock: (block: SectionBreakBlock) => void; +}): void { + const { sectionState, nextBlockId, pushBlock } = args; + if (sectionState.ranges.length === 0) return; + if (sectionState.currentSectionIndex >= sectionState.ranges.length - 1) return; + + const nextSection = sectionState.ranges[sectionState.currentSectionIndex + 1]; + if (!nextSection) return; + if (sectionState.currentNodeIndex !== nextSection.startNodeIndex) return; + + const currentSection = sectionState.ranges[sectionState.currentSectionIndex]; + const requiresPageBoundary = + shouldRequirePageBoundary(currentSection, nextSection) || hasIntrinsicBoundarySignals(nextSection); + const extraAttrs = requiresPageBoundary ? { requirePageBoundary: true } : undefined; + pushBlock(createSectionBreakBlock(nextSection, nextBlockId, extraAttrs)); + sectionState.currentSectionIndex++; +} diff --git a/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts new file mode 100644 index 0000000000..8d7cf42210 --- /dev/null +++ b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts @@ -0,0 +1,118 @@ +/** + * @file End-tagged section-membership tests (ECMA-376 §17.6.17, §17.18.77). + * + * OOXML rule: a `` inside a paragraph defines the section that + * ENDS with that paragraph. All body children preceding it (back to the + * previous section-terminating paragraph) belong to that section — + * including tables, drawings, and other non-paragraph nodes. + * + * These tests use real implementations with no module mocks so they assert + * observable behavior of the section analysis, not call patterns. + */ +import { describe, it, expect } from 'vitest'; +import { analyzeSectionRanges } from './analysis.js'; +import type { PMNode } from '../types.js'; +import type { SectPrElement } from './types.js'; + +const sectPr = (options: { + type?: 'continuous' | 'nextPage'; + cols?: number; + colSpace?: number; + pgSz?: { w: number; h: number }; +}): SectPrElement => { + const elements: SectPrElement['elements'] = []; + if (options.type) { + elements!.push({ + type: 'element', + name: 'w:type', + attributes: { 'w:val': options.type }, + }); + } + if (options.pgSz) { + elements!.push({ + type: 'element', + name: 'w:pgSz', + attributes: { 'w:w': String(options.pgSz.w), 'w:h': String(options.pgSz.h) }, + }); + } + elements!.push({ + type: 'element', + name: 'w:cols', + attributes: { + 'w:num': String(options.cols ?? 1), + 'w:space': String(options.colSpace ?? 720), + }, + }); + return { + type: 'element', + name: 'w:sectPr', + elements, + }; +}; + +const paragraph = (marker?: SectPrElement): PMNode => ({ + type: 'paragraph', + attrs: marker ? { paragraphProperties: { sectPr: marker } } : {}, + content: [], +}); + +const table = (): PMNode => ({ + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }], + }, + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }], + }, + ], + }, + ], +}); + +describe('analyzeSectionRanges — end-tagged membership for non-paragraph nodes (SD-2646)', () => { + it('places a table between two sectPr markers into the section of the LATER marker', () => { + // Arrange — IT-945 shape: + // p0 "first section" text + // p1(sectPr-A) empty marker, 1-col nextPage → ends Section A + // TABLE belongs to Section B per ECMA-376 §17.6.17 + // p2(sectPr-B) empty marker, 2-col continuous → ends Section B + // p3 "third section" text + // body sectPr final, 1-col continuous → Section C + const markerA = sectPr({ type: 'nextPage', cols: 1 }); + const markerB = sectPr({ type: 'continuous', cols: 2, colSpace: 720 }); + const bodyMarker = sectPr({ type: 'continuous', cols: 1 }); + const doc: PMNode = { + type: 'doc', + attrs: { bodySectPr: bodyMarker }, + content: [paragraph(), paragraph(markerA), table(), paragraph(markerB), paragraph()], + }; + + // Act + const sut = analyzeSectionRanges(doc, bodyMarker); + + // Assert — three sections, with Section B's range spanning the table + expect(sut).toHaveLength(3); + const [sectionA, sectionB, sectionC] = sut; + + // Section A ends at p1 (node index 1). Table at node index 2 is NOT in A. + expect(sectionA.endNodeIndex).toBe(1); + expect(sectionA.columns?.count ?? 1).toBe(1); + + // Section B: starts AFTER Section A (node index 2 — the table), ends at p2 (node index 3). + // The table must belong to Section B, not Section A. + expect(sectionB.startNodeIndex).toBe(2); + expect(sectionB.endNodeIndex).toBe(3); + expect(sectionB.columns?.count).toBe(2); + + // Section C (body): starts at node index 4 (p3). + expect(sectionC.startNodeIndex).toBe(4); + expect(sectionC.columns?.count ?? 1).toBe(1); + }); +}); diff --git a/packages/layout-engine/pm-adapter/src/sections/index.ts b/packages/layout-engine/pm-adapter/src/sections/index.ts index 64b41423fb..d1731f870f 100644 --- a/packages/layout-engine/pm-adapter/src/sections/index.ts +++ b/packages/layout-engine/pm-adapter/src/sections/index.ts @@ -41,4 +41,5 @@ export { isSectionBreakBlock, signaturesEqual, shallowObjectEquals, + maybeEmitNextSectionBreakForNode, } from './breaks.js'; diff --git a/packages/layout-engine/pm-adapter/src/sections/types.ts b/packages/layout-engine/pm-adapter/src/sections/types.ts index a4134b48d0..551d8bcfd8 100644 --- a/packages/layout-engine/pm-adapter/src/sections/types.ts +++ b/packages/layout-engine/pm-adapter/src/sections/types.ts @@ -89,11 +89,22 @@ export type SectionVerticalAlign = 'top' | 'center' | 'bottom' | 'both'; /** * Section range represents a contiguous section in the document. - * Word uses "end-tagged" section semantics: a paragraph's sectPr defines - * properties for the section ENDING at that paragraph, not starting after it. + * + * Word uses "end-tagged" section semantics (ECMA-376 §17.6.17): a paragraph's + * sectPr defines properties for the section ENDING at that paragraph, not + * starting after it. All body children preceding the section-terminating + * paragraph — paragraphs, tables, top-level drawings — belong to that section. + * + * `startNodeIndex`/`endNodeIndex` are computed over every top-level + * `doc.content` child and are the authoritative boundaries for dispatching + * section breaks at emission time. `startParagraphIndex`/`endParagraphIndex` + * are retained for callers (SDT handlers) that count only paragraphs during + * recursive descent. */ export interface SectionRange { sectionIndex: number; + startNodeIndex: number; + endNodeIndex: number; startParagraphIndex: number; endParagraphIndex: number; sectPr: SectPrElement | null; diff --git a/packages/layout-engine/pm-adapter/src/types.d.ts b/packages/layout-engine/pm-adapter/src/types.d.ts index 13de3db49f..2285e17fcd 100644 --- a/packages/layout-engine/pm-adapter/src/types.d.ts +++ b/packages/layout-engine/pm-adapter/src/types.d.ts @@ -238,6 +238,7 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + currentNodeIndex: number; }; converters?: { paragraphToFlowBlocks?: ( diff --git a/packages/layout-engine/pm-adapter/src/types.ts b/packages/layout-engine/pm-adapter/src/types.ts index d60515c39b..0aed51dd1d 100644 --- a/packages/layout-engine/pm-adapter/src/types.ts +++ b/packages/layout-engine/pm-adapter/src/types.ts @@ -316,6 +316,13 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + /** + * Index of the current top-level `doc.content` child being dispatched. + * Advanced by the main dispatch loop in internal.ts — drives end-tagged + * section transitions for non-paragraph nodes (tables, top-level + * drawings, …) per ECMA-376 §17.6.17. + */ + currentNodeIndex: number; }; // Converters for nested content From 52ae84a4c2735d23bca8e4cc887578bc61ecdc40 Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Thu, 30 Apr 2026 11:29:26 -0700 Subject: [PATCH 4/4] chore: update lock --- pnpm-lock.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e7ca8ecf6b..9e14bb9c69 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -6470,7 +6470,6 @@ packages: '@microsoft/teamsapp-cli@3.0.2': resolution: {integrity: sha512-AowuJwrrUxeF9Bq/frxuy9YZjK/ECk3pi0UBXl3CQLZ4XNWfgWatiFi/UWpyHDLccFs+0Za3nNYATFvgsxEFwQ==} engines: {node: '>=12'} - deprecated: This package is deprecated and supported Node.js version is 18-22. Please use @microsoft/m365agentstoolkit-cli instead. hasBin: true '@microsoft/teamsfx-api@0.23.1':