diff --git a/packages/layout-engine/layout-engine/src/column-balancing.test.ts b/packages/layout-engine/layout-engine/src/column-balancing.test.ts index 7aa7431135..6e83a179b6 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.test.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.test.ts @@ -335,13 +335,9 @@ function createMeasure(kind: string, lineHeights: number[]): { kind: string; lin describe('balancePageColumns', () => { describe('basic balancing', () => { - it('should distribute fragments across 2 columns based on target height', () => { - // 4 fragments, each 20px tall = 80px total, target = 40px per column - // With >= condition: switch when adding would reach/exceed 40px - // Block 1 (20px): column 0, height=20 - // Block 2 (20px): 20+20=40 >= 40, switch! column 1, height=20 - // Block 3, 4: stay in column 1 - // Result: 1 in column 0, 3 in column 1 + it('balances 4 equal blocks into 2+2 across 2 columns', () => { + // 4 fragments × 20px each in a 2-col section. Word minimizes section height by + // placing 2 per column (40px per col) rather than 1+3 (max 60px). const fragments = [ createFragment('block-1', 96, 96, 624), createFragment('block-2', 96, 116, 624), @@ -357,10 +353,9 @@ describe('balancePageColumns', () => { balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 40, measureMap); - // Block 1 stays in column 0 + // First half in col 0, second half in col 1 — minimum section height. expect(fragments[0].x).toBe(96); - // Blocks 2, 3, 4 move to column 1 - expect(fragments[1].x).toBe(432); + expect(fragments[1].x).toBe(96); expect(fragments[2].x).toBe(432); expect(fragments[3].x).toBe(432); }); @@ -558,3 +553,186 @@ describe('balancePageColumns', () => { }); }); }); + +// ============================================================================ +// balanceSectionOnPage Tests (Section-scoped balancing) +// ============================================================================ + +import { balanceSectionOnPage } from './column-balancing.js'; + +describe('balanceSectionOnPage', () => { + type TestFragment = { blockId: string; x: number; y: number; width: number; kind: string }; + + /** Build a fragment + section mapping for section-scoped tests. */ + function buildSectionFixture( + sectionIndex: number, + count: number, + height = 20, + startY = 96, + ): { + fragments: TestFragment[]; + measureMap: Map }>; + blockSectionMap: Map; + } { + const fragments: TestFragment[] = []; + const measureMap = new Map }>(); + const blockSectionMap = new Map(); + for (let i = 0; i < count; i++) { + const id = `s${sectionIndex}-b${i}`; + fragments.push({ blockId: id, x: 96, y: startY + i * height, width: 624, kind: 'para' }); + measureMap.set(id, createMeasure('paragraph', [height])); + blockSectionMap.set(id, sectionIndex); + } + return { fragments, measureMap, blockSectionMap }; + } + + it('balances the target section and returns the tallest balanced column bottom', () => { + // 6 equal paragraphs in a 2-col section → 3+3 balanced, tallest col ends at top + 3×20 = top + 60. + const top = 96; + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 6, 20, top); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: top, + columnWidth: 288, + availableHeight: 60, + measureMap, + }); + + // Returned maxY is the bottom of the tallest balanced column. + expect(result).not.toBeNull(); + expect(result!.maxY).toBe(top + 60); + + // Observable outcome: fragments split evenly across two columns. + const col0 = fragments.filter((f) => f.x === 96).length; + const col1 = fragments.filter((f) => f.x === 96 + 288 + 48).length; + expect(col0).toBe(3); + expect(col1).toBe(3); + }); + + it('returns null and leaves fragments untouched when section has <= 1 column', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 3); + const snapshot = fragments.map((f) => ({ x: f.x, y: f.y })); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 1, gap: 0, width: 624 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 624, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + fragments.forEach((f, i) => { + expect(f.x).toBe(snapshot[i].x); + expect(f.y).toBe(snapshot[i].y); + }); + }); + + it('returns null when section contains an explicit column break', () => { + // Author-placed column breaks override balancing — preserve their intent. + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 6); + const snapshot = fragments.map((f) => f.x); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: true, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + fragments.forEach((f, i) => expect(f.x).toBe(snapshot[i])); + }); + + it('returns null when section has unequal explicit column widths', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 4); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288, equalWidth: false, widths: [200, 376] }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + }); + + it('only moves fragments of the target section when the page has mixed sections', () => { + // Page has 3 fragments in section 1 (already positioned in col 0) and 6 in section 2. + // Balancing section 2 must not touch section 1 fragments. + const sec1 = buildSectionFixture(1, 3, 20, 96); + const sec2 = buildSectionFixture(2, 6, 20, 160); + const fragments = [...sec1.fragments, ...sec2.fragments]; + const measureMap = new Map([...sec1.measureMap, ...sec2.measureMap]); + const blockSectionMap = new Map([...sec1.blockSectionMap, ...sec2.blockSectionMap]); + const sec1Snapshot = sec1.fragments.map((f) => ({ id: f.blockId, x: f.x, y: f.y })); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 160, + columnWidth: 288, + availableHeight: 60, + measureMap, + }); + + expect(result).not.toBeNull(); + + // Section 1 fragments unchanged. + for (const s of sec1Snapshot) { + const f = fragments.find((x) => x.blockId === s.id)!; + expect(f.x).toBe(s.x); + expect(f.y).toBe(s.y); + } + + // Section 2 fragments now split across two columns. + const sec2Xs = new Set(sec2.fragments.map((f) => f.x)); + expect(sec2Xs.size).toBe(2); + }); + + it('returns null when no fragments on the page belong to the target section', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(1, 3); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 99, // different section + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + }); +}); diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index cb14ae8a2c..d70e4bd2c8 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -156,64 +156,58 @@ export function calculateBalancedColumnHeight( }; } - // Calculate total content height + // Calculate total content height and block-height extremes const totalHeight = ctx.contentBlocks.reduce((sum, b) => sum + b.measuredHeight, 0); + const maxBlockHeight = ctx.contentBlocks.reduce((m, b) => Math.max(m, b.measuredHeight), 0); // Early exit: content is very small, no need to balance if (totalHeight < config.minColumnHeight * ctx.columnCount) { return createSingleColumnResult(ctx); } - // Initial target: evenly divide content - let targetHeight = Math.ceil(totalHeight / ctx.columnCount); - - // Ensure target meets minimum column height - targetHeight = Math.max(targetHeight, config.minColumnHeight); - - // Don't exceed available height - targetHeight = Math.min(targetHeight, ctx.availableHeight); + // Binary-search for the minimum column height H such that a greedy + // left-to-right fill places every block with every column ≤ H. This matches + // Word's observed behavior: left columns are filled as tightly as possible + // against the minimum viable height, leaving the last column shorter when + // content doesn't divide evenly (e.g. 7 blocks across 3 columns → 3+3+1, + // not 2+2+3). Both splits have the same max column height, but Word prefers + // left-heavy packing for visual rhythm. + let lo = Math.max(maxBlockHeight, config.minColumnHeight); + let hi = Math.min(totalHeight, ctx.availableHeight); + if (lo > hi) lo = hi; let bestResult: SimulationResult | null = null; - let bestScore = Infinity; - - for (let i = 0; i < config.maxIterations; i++) { - const simulation = simulateBalancedLayout(ctx, targetHeight, config); - - // Calculate balance score (lower is better) - const score = calculateBalanceScore(simulation.columnHeights, config.tolerance); - - if (score < bestScore) { - bestScore = score; - bestResult = simulation; - } - - // Check if we've achieved acceptable balance - if (isBalanced(simulation.columnHeights, config.tolerance)) { - return { - targetColumnHeight: targetHeight, - columnAssignments: simulation.assignments, - success: true, - iterations: i + 1, - blockBreakPoints: simulation.breakPoints.size > 0 ? simulation.breakPoints : undefined, - }; + let bestH = hi; + let iterations = 0; + + while (lo <= hi) { + iterations++; + const mid = Math.floor((lo + hi) / 2); + const sim = simulateBalancedLayout(ctx, mid, config); + const maxCol = Math.max(...sim.columnHeights); + const placed = sim.assignments.size === ctx.contentBlocks.length; + if (placed && maxCol <= mid) { + bestResult = sim; + bestH = mid; + hi = mid - 1; + } else { + lo = mid + 1; } - - // Adjust target based on simulation results - targetHeight = adjustTargetHeight(simulation, targetHeight, ctx, config); + if (iterations >= config.maxIterations) break; } - // Use best result found if (bestResult) { return { - targetColumnHeight: targetHeight, + targetColumnHeight: bestH, columnAssignments: bestResult.assignments, - success: false, // Didn't converge within iterations - iterations: config.maxIterations, + success: true, + iterations, blockBreakPoints: bestResult.breakPoints.size > 0 ? bestResult.breakPoints : undefined, }; } - // Fallback: simple sequential layout + // Fallback: simple sequential layout if binary search never found a valid H + // (e.g. availableHeight too small to fit content). return createSequentialResult(ctx); } @@ -351,73 +345,6 @@ function calculateParagraphBreakPoint( return { breakAfterLine: lines.length - 1, canBreak: true }; } -/** - * Check if column heights are balanced within tolerance. - */ -function isBalanced(columnHeights: number[], tolerance: number): boolean { - if (columnHeights.length <= 1) return true; - - const nonEmptyHeights = columnHeights.filter((h) => h > 0); - if (nonEmptyHeights.length <= 1) return true; - - const maxHeight = Math.max(...nonEmptyHeights); - const minHeight = Math.min(...nonEmptyHeights); - - return maxHeight - minHeight <= tolerance; -} - -/** - * Calculate a balance score (lower is better). - * Used to track best result across iterations. - */ -function calculateBalanceScore(columnHeights: number[], tolerance: number): number { - if (columnHeights.length <= 1) return 0; - - const nonEmptyHeights = columnHeights.filter((h) => h > 0); - if (nonEmptyHeights.length <= 1) return 0; - - // Score based on variance from mean - const mean = nonEmptyHeights.reduce((a, b) => a + b, 0) / nonEmptyHeights.length; - const variance = nonEmptyHeights.reduce((sum, h) => sum + Math.pow(h - mean, 2), 0); - - // Penalize empty columns - const emptyPenalty = (columnHeights.length - nonEmptyHeights.length) * tolerance * 10; - - return variance + emptyPenalty; -} - -/** - * Adjust target height based on simulation results. - */ -function adjustTargetHeight( - simulation: SimulationResult, - currentTarget: number, - ctx: BalancingContext, - config: ColumnBalancingConfig, -): number { - const heights = simulation.columnHeights; - const maxHeight = Math.max(...heights); - const minHeight = Math.min(...heights.filter((h) => h > 0)); - - // If last column is significantly taller, increase target - if (heights[heights.length - 1] > maxHeight * 0.9 && heights[heights.length - 1] > currentTarget) { - return Math.min(currentTarget + (maxHeight - currentTarget) / 2, ctx.availableHeight); - } - - // If first columns are too tall and last is too short, decrease target - if (heights[0] > currentTarget && heights[heights.length - 1] < currentTarget * 0.5) { - return Math.max(currentTarget - (currentTarget - minHeight) / 2, config.minColumnHeight); - } - - // Binary search style adjustment - const diff = maxHeight - minHeight; - if (maxHeight > currentTarget) { - return Math.min(currentTarget + diff / 4, ctx.availableHeight); - } else { - return Math.max(currentTarget - diff / 4, config.minColumnHeight); - } -} - // ============================================================================ // Helper Functions // ============================================================================ @@ -562,7 +489,7 @@ export function shouldSkipBalancing( * Fragment with required properties for column balancing. * Represents a positioned content block that can be redistributed across columns. */ -interface BalancingFragment { +export interface BalancingFragment { /** Horizontal position in pixels from left edge of page */ x: number; /** Vertical position in pixels from top edge of page */ @@ -585,7 +512,7 @@ interface BalancingFragment { * Measure data used to calculate fragment heights. * Contains layout measurements from the measuring phase. */ -interface MeasureData { +export interface MeasureData { /** Type of measure: 'paragraph', 'image', etc. */ kind: string; /** Line measurements for paragraph content */ @@ -646,6 +573,49 @@ function getFragmentHeight(fragment: BalancingFragment, measureMap: Map` + * has no text content — all measured lines have `width === 0`. Word does + * NOT render an empty line for such markers, so they take no vertical space + * in the balanced layout. Treating them as balance-neutral prevents them + * from pushing their column's cursor down and unbalancing the section's + * final page against Word's output (ECMA-376 §17.18.77). + * + * For every other paragraph, image, drawing, or table this delegates to + * the standard `getFragmentHeight` so existing behavior is preserved. + */ +function getBalancingHeight(fragment: BalancingFragment, measureMap: Map): number { + if (fragment.kind === 'para') { + const measure = measureMap.get(fragment.blockId) as + | (MeasureData & { lines?: Array<{ lineHeight: number; width?: number }> }) + | undefined; + if (measure?.kind === 'paragraph' && Array.isArray(measure.lines) && measure.lines.length > 0) { + const fromLine = fragment.fromLine ?? 0; + const toLine = fragment.toLine ?? measure.lines.length; + // A sectPr-marker paragraph is empty: every measured line has an + // EXPLICIT width of 0 (measuring-dom sets width per line). Treat + // undefined width as "has content" so synthetic test fixtures and any + // other callers that don't set line.width keep their existing height. + let allEmpty = true; + let sawAnyLine = false; + for (let i = fromLine; i < toLine; i++) { + const line = measure.lines[i]; + if (!line) continue; + sawAnyLine = true; + if (line.width !== 0) { + allEmpty = false; + break; + } + } + if (sawAnyLine && allEmpty) return 0; + } + } + return getFragmentHeight(fragment, measureMap); +} + /** * Balances column content on a page by redistributing fragments. * @@ -746,46 +716,377 @@ export function balancePageColumns( return; } - // Calculate target height per column for balanced distribution - const targetHeight = totalHeight / columns.count; - - // Skip balancing if target height is below minimum threshold - if (targetHeight < DEFAULT_BALANCING_CONFIG.minColumnHeight) { + // Skip balancing if balanced height per column would be below minimum threshold + if (totalHeight / columns.count < DEFAULT_BALANCING_CONFIG.minColumnHeight) { return; } - // Distribute rows across columns using greedy algorithm. - // Each row is assigned to the current column until adding it would - // reach or exceed the target height, then we advance to the next column. - let currentColumn = 0; - let currentColumnHeight = 0; - let currentY = topMargin; - - for (const [, rowFragments] of sortedRows) { - const rowHeight = Math.max(...rowFragments.map((f) => f.height)); - - // Advance to next column when current column reaches target height. - // Uses >= to match Word's behavior: switch when target is reached, not just exceeded. - // This ensures balanced distribution where the first column doesn't exceed its share. - if ( - currentColumnHeight > 0 && - currentColumnHeight + rowHeight >= targetHeight && - currentColumn < columns.count - 1 - ) { - currentColumn++; - currentColumnHeight = 0; - currentY = topMargin; - } - - // Position all fragments in this row within the current column - const colX = columnX(currentColumn); + // Delegate to the binary-search algorithm to find the minimum section height + // where all content fits across N columns. This matches Word's behavior: Word + // finds the smallest max-column-height that keeps content within constraints, + // rather than greedily splitting at total/N (which can leave col1 barely + // populated when one paragraph is much taller than the rest). + const result = calculateBalancedColumnHeight( + { + columnCount: columns.count, + columnWidth: columns.width, + columnGap: columns.gap, + availableHeight, + contentBlocks, + }, + DEFAULT_BALANCING_CONFIG, + ); + + // Apply the assignments to fragments: pack each column top-to-bottom from topMargin, + // indexing back into sortedRows via the same ordering used to build contentBlocks. + const colCursors = new Array(columns.count).fill(topMargin); + for (let i = 0; i < sortedRows.length; i++) { + const [, rowFragments] = sortedRows[i]; + const block = contentBlocks[i]; + const col = result.columnAssignments.get(block.blockId) ?? 0; + const colX = columnX(col); + const rowHeight = block.measuredHeight; for (const info of rowFragments) { info.fragment.x = colX; - info.fragment.y = currentY; + info.fragment.y = colCursors[col]; info.fragment.width = columns.width; } + colCursors[col] += rowHeight; + } +} + +// ============================================================================ +// Section-scoped balancing (wraps balancePageColumns with per-section guards) +// ============================================================================ + +/** + * Column layout properties relevant to balancing decisions. + * Mirrors the subset of ColumnLayout that this module reads. + */ +export interface SectionColumnLayout { + count: number; + gap: number; + width?: number; + widths?: number[]; + equalWidth?: boolean; +} - currentColumnHeight += rowHeight; - currentY += rowHeight; +export interface BalanceSectionOnPageArgs { + /** All fragments on the target page. Only those belonging to sectionIndex are balanced (mutated in place). */ + fragments: BalancingFragment[]; + /** Section whose content ends on this page. */ + sectionIndex: number; + /** Column layout of the ending section. */ + sectionColumns: SectionColumnLayout; + /** True if the section contains an explicit — skip balancing to preserve author intent. */ + sectionHasExplicitColumnBreak: boolean; + /** blockId -> sectionIndex map (built once per layout, shared across calls). */ + blockSectionMap: Map; + /** Left page margin, used to compute column X positions. */ + margins: { left: number }; + /** Y position where the section's region begins on this page. */ + topMargin: number; + /** Column width — passed to balancePageColumns so it can resize fragments. */ + columnWidth: number; + /** Available height from topMargin to content bottom. */ + availableHeight: number; + /** Measurement data for fragments (built from measures array). */ + measureMap: Map; +} + +/** + * Balance the fragments of one section on one page. + * + * Returns the tallest balanced column's bottom Y, or null if balancing was skipped. + * Callers can use the returned Y to update paginator cursors so subsequent content + * starts just below the balanced section rather than below an unbalanced maxCursorY. + * + * Guards (skip balancing when): + * - Section has <= 1 column (nothing to balance) + * - Section contains an explicit column break (author intent wins) + * - Section uses unequal column widths (Word doesn't rebalance these) + * - No fragments on this page belong to the section + */ +export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: number } | null { + const { sectionColumns, sectionHasExplicitColumnBreak, sectionIndex, blockSectionMap, fragments } = args; + + if (sectionColumns.count <= 1) return null; + if (sectionHasExplicitColumnBreak) return null; + if (sectionColumns.equalWidth === false && Array.isArray(sectionColumns.widths) && sectionColumns.widths.length > 0) { + return null; + } + + // Filter to fragments of the target section on this page. + const sectionFragments = fragments.filter((f) => blockSectionMap.get(f.blockId) === sectionIndex); + if (sectionFragments.length === 0) return null; + + const columnCount = sectionColumns.count; + const columnGap = sectionColumns.gap; + const columnWidth = sectionColumns.width ?? 0; + if (columnWidth <= 0) return null; + + // Use the minimum Y of the section's fragments as the balancing origin — the + // section may start mid-page (e.g. section 0 is single-column and section 1 + // continues below it). Using topMargin unconditionally would stack balanced + // columns on top of earlier single-column content on the same page. + let sectionTopY = Number.POSITIVE_INFINITY; + for (const f of sectionFragments) { + if (f.y < sectionTopY) sectionTopY = f.y; } + if (!Number.isFinite(sectionTopY)) sectionTopY = args.topMargin; + + // Remaining height from the section's actual top to the page content bottom. + const remainingHeight = args.availableHeight - (sectionTopY - args.topMargin); + if (remainingHeight <= 0) return null; + + // Pre-split a dominant table fragment at a row boundary before balancing. + // + // When a section's final page contains a single splittable table that's + // taller than (totalSectionHeight / columnCount), the atomic-block balancer + // can only place the whole table in one column — leaving the other column + // empty. Word's behavior per ECMA-376 §17.18.77 is to balance the + // REMAINING content, which for a narrow table means splitting at a row + // boundary so both columns carry roughly half the rows. + // + // We split ONCE per balance call (we only need two halves for a 2-col + // section; extending to N > 2 would iterate). SD-2646: IT-945 page 2 has a + // 515px / 28-row table; splitting into ~257px halves lets the balancer + // assign half to each column. + splitDominantTableAtRowBoundary({ + sectionFragments, + fragments, + columnCount, + measureMap: args.measureMap, + }); + + // Order fragments in document order: by current column (x → left-to-right), + // then by y within each column. During unbalanced layout the paginator fills + // column 0 top-to-bottom, then column 1, etc. — so (x, y) preserves the + // original sequence. + const ordered = [...sectionFragments].sort((a, b) => { + if (a.x !== b.x) return a.x - b.x; + return a.y - b.y; + }); + + // Treat each fragment as its own block for binary-search balancing. Grouping + // by y (as balancePageColumns does) would collapse fragments from different + // source columns that happen to share a y coordinate into a single row and + // re-stack them at one position — producing overlap. + // + // Use `getBalancingHeight` so empty sectPr-marker paragraphs contribute 0 + // to their column's cursor — matching Word's behavior of not rendering a + // blank line for such markers. + const contentBlocks: BalancingBlock[] = ordered.map((f, i) => ({ + blockId: `${f.blockId}#${i}`, + measuredHeight: getBalancingHeight(f, args.measureMap), + canBreak: false, + keepWithNext: false, + keepTogether: true, + })); + + if ( + shouldSkipBalancing({ + columnCount, + columnWidth, + columnGap, + availableHeight: remainingHeight, + contentBlocks, + }) + ) { + return null; + } + + const result = calculateBalancedColumnHeight( + { columnCount, columnWidth, columnGap, availableHeight: remainingHeight, contentBlocks }, + DEFAULT_BALANCING_CONFIG, + ); + + const columnX = (columnIndex: number): number => args.margins.left + columnIndex * (columnWidth + columnGap); + + const colCursors = new Array(columnCount).fill(sectionTopY); + let maxY = sectionTopY; + for (let i = 0; i < ordered.length; i++) { + const f = ordered[i]; + const block = contentBlocks[i]; + const col = result.columnAssignments.get(block.blockId) ?? 0; + f.x = columnX(col); + f.y = colCursors[col]; + f.width = columnWidth; + colCursors[col] += block.measuredHeight; + if (colCursors[col] > maxY) maxY = colCursors[col]; + } + return { maxY }; +} + +/** + * Table measure shape used by the row-split preprocessor. + * + * Only the row-heights array is required. We access it through the runtime + * `measureMap` stored as `MeasureData`, which narrows the interface for the + * public balancer API — but the stored value is the full `TableMeasure` + * containing `rows: [{ height }]`, so a cast is safe. + */ +interface TableMeasureLike { + kind: string; + rows?: Array<{ height: number }>; +} + +/** + * Row-boundary record used by the renderer to draw horizontal dividers. + * Mirrors the subset of `TableFragmentMetadata['rowBoundaries']` that the + * split helper needs to read and regenerate. + */ +interface RowBoundaryLike { + i: number; + y: number; + h: number; + min: number; + r: number; +} + +/** + * In-place split of a dominant table fragment at a row boundary. + * + * Problem this solves: the column balancer treats each fragment as an atomic + * block. A multi-page two-column continuous section's final page can end up + * with a single table fragment that exceeds half the section's height. The + * balancer then places the whole table in one column and leaves the other + * empty — diverging from Word, which balances by splitting the table at a + * row boundary (ECMA-376 §17.18.77: continuous breaks balance the previous + * section's content). + * + * This preprocessor runs once per `balanceSectionOnPage` call. It detects a + * single dominant table fragment and splits it at the row whose cumulative + * height first meets or exceeds totalSectionHeight / columnCount. The two + * halves are inserted into both `sectionFragments` and the page's + * `fragments` array in place of the original; the rest of the balancer then + * runs on N + 1 similarly-sized blocks and naturally assigns one to each + * column. + * + * Guards: + * - Only one splittable table fragment on the section's page (skip if 0 or >1). + * - Table must span at least 2 rows (can't split a 1-row fragment). + * - Total height must exceed target (= total / columnCount) by more than a + * small epsilon; otherwise the atomic balancer already fits. + * + * Splitting is NOT appropriate when the author placed explicit column breaks + * or used unequal columns — those cases are already filtered by the caller. + * + * @param args Section fragments (already filtered to this section), the + * full page fragments (mutated in place), column count, and the + * measure map. + */ +function splitDominantTableAtRowBoundary(args: { + sectionFragments: BalancingFragment[]; + fragments: BalancingFragment[]; + columnCount: number; + measureMap: Map; +}): void { + const { sectionFragments, fragments, columnCount, measureMap } = args; + if (columnCount <= 1) return; + + const tables = sectionFragments.filter((f) => f.kind === 'table'); + if (tables.length !== 1) return; + const table = tables[0] as BalancingFragment & { + fromRow?: number; + toRow?: number; + height?: number; + continuesFromPrev?: boolean; + continuesOnNext?: boolean; + metadata?: { rowBoundaries?: RowBoundaryLike[]; columnBoundaries?: unknown; coordinateSystem?: string }; + }; + const fromRow = table.fromRow ?? 0; + const toRow = table.toRow ?? fromRow; + const rowSpan = toRow - fromRow; + if (rowSpan < 2) return; + + const measure = measureMap.get(table.blockId) as TableMeasureLike | undefined; + if (!measure || measure.kind !== 'table' || !Array.isArray(measure.rows)) return; + + const totalSectionHeight = sectionFragments.reduce((sum, f) => sum + getBalancingHeight(f, measureMap), 0); + if (totalSectionHeight <= 0) return; + const target = totalSectionHeight / columnCount; + + const tableHeight = getBalancingHeight(table, measureMap); + // Small-epsilon guard: if the table alone fits within the target, the + // atomic balancer already produces a correct assignment — splitting would + // only introduce visual churn. + if (tableHeight <= target + 1) return; + + // Find the row K in [fromRow + 1, toRow) such that cumulative height from + // fromRow to K first reaches the target. Guaranteed to succeed because + // tableHeight > target and rowSpan ≥ 2. + let running = 0; + let splitRow = fromRow + 1; + for (let r = fromRow; r < toRow - 1; r++) { + const h = measure.rows[r]?.height ?? 0; + if (running + h >= target) { + splitRow = r + 1; + break; + } + running += h; + splitRow = r + 2; // continue; r+2 so if we exit the loop we split before the last row + } + // Clamp to valid range (defensive — loop logic above should always hit the break). + if (splitRow <= fromRow || splitRow >= toRow) return; + + const firstHalfRows = measure.rows.slice(fromRow, splitRow); + const secondHalfRows = measure.rows.slice(splitRow, toRow); + const firstHalfHeight = firstHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + const secondHalfHeight = secondHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + + // Regenerate rowBoundaries so the renderer draws horizontal dividers at the + // right y-offsets inside each half. rowBoundaries are 0-origin within the + // fragment; we walk the measure's rows for each half and accumulate. + const makeRowBoundaries = (rows: Array<{ height: number }>, startIndex: number): RowBoundaryLike[] => { + const out: RowBoundaryLike[] = []; + let y = 0.5; + for (let i = 0; i < rows.length; i++) { + const h = rows[i].height ?? 0; + out.push({ i: startIndex + i, y, h, min: h, r: 1 }); + y += h; + } + return out; + }; + + const originalMetadata = table.metadata; + const firstMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(firstHalfRows, 0), + } + : undefined; + const secondMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(secondHalfRows, 0), + } + : undefined; + + // Construct the first half by mutating the original (preserves object + // identity so `fragments.indexOf(table)` still works below). + table.toRow = splitRow; + table.height = firstHalfHeight; + table.continuesOnNext = true; + if (firstMetadata) table.metadata = firstMetadata; + + const secondHalf: BalancingFragment = { + ...table, + fromRow: splitRow, + toRow, + height: secondHalfHeight, + continuesFromPrev: true, + continuesOnNext: table.continuesOnNext ? false : (table.continuesOnNext ?? false), + metadata: secondMetadata ?? table.metadata, + } as BalancingFragment; + // Preserve the original's continuesOnNext intent: if the original fragment + // continued onto a later page, the SECOND half now inherits that role. + (secondHalf as { continuesOnNext?: boolean }).continuesOnNext = false; + + // Insert the second half right after the first in both arrays so the + // balancer's (x, y) ordering keeps them adjacent in document order. + const fragIdx = fragments.indexOf(table); + if (fragIdx >= 0) fragments.splice(fragIdx + 1, 0, secondHalf); + const sectIdx = sectionFragments.indexOf(table); + if (sectIdx >= 0) sectionFragments.splice(sectIdx + 1, 0, secondHalf); } diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index 6d4dcb7228..3d4c736265 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2868,6 +2868,280 @@ describe('layoutDocument', () => { expect(singleColFragment?.width).not.toBeCloseTo(twoColFragment!.width, 0); }); }); + + describe('column balancing at section boundaries (SD-2452)', () => { + /** + * End-to-end tests for the column-balancing feature. + * + * These tests drive layoutDocument with synthetic blocks/measures and assert on + * the OBSERVABLE fragment positions produced by the full pipeline — not on internal + * helper calls. When Word's algorithm changes or when we swap out the balancing + * implementation, these tests continue to assert what users see. + */ + + const PAGE: LayoutOptions = { + pageSize: { w: 612, h: 792 }, + margins: { top: 72, right: 72, bottom: 72, left: 72 }, + }; + const LEFT_MARGIN = 72; + const CONTENT_WIDTH = 612 - 72 - 72; // 468 + const COLUMN_GAP = 48; + const TWO_COL_WIDTH = (CONTENT_WIDTH - COLUMN_GAP) / 2; // 210 + const TWO_COL_RIGHT_X = LEFT_MARGIN + TWO_COL_WIDTH + COLUMN_GAP; // 330 + + /** Build a 2-col section ending with a section break, surrounded by single-column context. */ + function buildTwoColumnSection(paragraphCount: number, lineHeight = 20) { + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }]; + + for (let i = 0; i < paragraphCount; i++) { + blocks.push({ kind: 'paragraph', id: `p${i}`, runs: [], attrs: { sectionIndex: 0 } } as FlowBlock); + measures.push(makeMeasure([lineHeight])); + } + + blocks.push({ + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock); + measures.push({ kind: 'sectionBreak' }); + + blocks.push({ kind: 'paragraph', id: 'p-after', runs: [], attrs: { sectionIndex: 1 } } as FlowBlock); + measures.push(makeMeasure([lineHeight])); + + return { blocks, measures }; + } + + it('distributes 6 equal paragraphs evenly across 2 columns (3+3)', () => { + const { blocks, measures } = buildTwoColumnSection(6, 20); + + const layout = layoutDocument(blocks, measures, PAGE); + + const sectionFragments = layout.pages[0].fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + const col0 = sectionFragments.filter((f) => f.x === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => f.x === TWO_COL_RIGHT_X); + + expect(col0.length + col1.length).toBe(6); + // Minimum-height balance of 6 equal 20px paragraphs across 2 columns is 3+3 + // (tallest column = 60px). Any split closer to 1+5 or 2+4 produces a taller + // section than Word would render. This assertion fails if balancing is absent, + // uses an incorrect algorithm, or runs with a wrong available height. + expect(col0).toHaveLength(3); + expect(col1).toHaveLength(3); + }); + + it('places post-section single-column content just below the balanced columns', () => { + // Before the fix: "p-after" sat below all 6 paragraphs stacked in col 0 (y ~= top + 120). + // After the fix: columns balance to 3+3 (height = top + 60), so p-after starts at top + 60. + const { blocks, measures } = buildTwoColumnSection(6, 20); + + const layout = layoutDocument(blocks, measures, PAGE); + const firstSectionPara = layout.pages[0].fragments.find( + (f): f is ParaFragment => f.kind === 'para' && f.blockId === 'p0', + ); + const afterSectionPara = layout.pages[0].fragments.find( + (f): f is ParaFragment => f.kind === 'para' && f.blockId === 'p-after', + ); + expect(firstSectionPara).toBeDefined(); + expect(afterSectionPara).toBeDefined(); + + // p-after's Y reflects a balanced 3-row column (3 × 20px) above it. + const expectedBalancedBottom = firstSectionPara!.y + 3 * 20; + expect(afterSectionPara!.y).toBe(expectedBalancedBottom); + }); + + it('fills BOTH columns on every page of a multi-page 2-col continuous section', () => { + // ECMA-376 §17.18.77 (ST_SectionMark): a continuous section break + // "balances content of the previous section." Word's observable behavior + // is to fill col 0 to the balanced target, wrap to col 1 to the same + // target, then wrap to the next page — on EVERY page, not only the last. + // Regression for SD-2646: earlier pages must not stack content in col 0. + const lineHeight = 20; + const paraCount = 100; // ≈ 2000px, exceeds one page's content area + const { blocks, measures } = buildTwoColumnSection(paraCount, lineHeight); + + const layout = layoutDocument(blocks, measures, PAGE); + + expect(layout.pages.length).toBeGreaterThanOrEqual(2); + + for (const page of layout.pages) { + const sectionFragments = page.fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + if (sectionFragments.length < 2) continue; // tail of last page may have <2 fragments + const col0 = sectionFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + expect(col0.length).toBeGreaterThan(0); + expect(col1.length).toBeGreaterThan(0); + } + }); + + it('flows a narrow multi-page table across both columns on every page (SD-2646 regression)', () => { + // IT-945 shape: a narrow table (one column wide) inside a 2-col continuous + // section, spanning multiple pages. Regression guard for the layout path + // once pm-adapter correctly places the table in the 2-col section. + const rowCount = 114; + const rowHeight = 18.4; + const cellWidth = TWO_COL_WIDTH / 2; + + const rows = Array.from({ length: rowCount }, (_, r) => ({ + id: `tbl-row-${r}`, + cells: [ + { id: `tbl-cell-${r}-0`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-0-p`, runs: [] } }, + { id: `tbl-cell-${r}-1`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-1-p`, runs: [] } }, + ], + })); + const tbl: TableBlock = { + kind: 'table', + id: 'tbl', + rows, + attrs: { sectionIndex: 0 }, + } as TableBlock; + const tblM: TableMeasure = { + kind: 'table', + rows: Array.from({ length: rowCount }, () => ({ + height: rowHeight, + cells: [ + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + ], + })), + columnWidths: [cellWidth, cellWidth], + totalWidth: cellWidth * 2, + totalHeight: rowHeight * rowCount, + }; + + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + tbl as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }, tblM, { kind: 'sectionBreak' }]; + + const layout = layoutDocument(blocks, measures, PAGE); + + expect(layout.pages.length).toBeGreaterThanOrEqual(2); + + let pagesWithBothColumns = 0; + for (const page of layout.pages) { + const tableFragments = page.fragments.filter((f) => f.kind === 'table'); + if (tableFragments.length === 0) continue; + const col0 = tableFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = tableFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + if (col0.length > 0 && col1.length > 0) pagesWithBothColumns++; + } + // At least one page must have fragments in both columns. A stricter + // assertion (every page) is made invalid by the tail of the final page + // which can reasonably hold <1 column's worth of content. + expect(pagesWithBothColumns).toBeGreaterThan(0); + }); + + it('distributes 6 paragraphs across 3 columns (no column is empty)', () => { + const threeColStart: FlowBlock = { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 3, gap: 24 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock; + const sectionEnd: FlowBlock = { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock; + + const blocks: FlowBlock[] = [threeColStart]; + const measures: Measure[] = [{ kind: 'sectionBreak' }]; + for (let i = 0; i < 6; i++) { + blocks.push({ kind: 'paragraph', id: `p${i}`, runs: [], attrs: { sectionIndex: 0 } } as FlowBlock); + measures.push(makeMeasure([20])); + } + blocks.push(sectionEnd); + measures.push({ kind: 'sectionBreak' }); + + const layout = layoutDocument(blocks, measures, PAGE); + + const fragments = layout.pages[0].fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p'), + ); + const uniqueX = new Set(fragments.map((f) => Math.round(f.x))); + // All three columns should be used — no column is empty. + expect(uniqueX.size).toBe(3); + }); + + it('leaves fragments untouched when the section has an explicit column break', () => { + // Author-placed must override balancing. + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + { kind: 'paragraph', id: 'p1', runs: [], attrs: { sectionIndex: 0 } } as FlowBlock, + { kind: 'columnBreak', id: 'br', attrs: { sectionIndex: 0 } } as ColumnBreakBlock, + { kind: 'paragraph', id: 'p2', runs: [], attrs: { sectionIndex: 0 } } as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [ + { kind: 'sectionBreak' }, + makeMeasure([20]), + { kind: 'columnBreak' }, + makeMeasure([20]), + { kind: 'sectionBreak' }, + ]; + + const layout = layoutDocument(blocks, measures, PAGE); + + const p1 = layout.pages[0].fragments.find((f) => f.blockId === 'p1') as ParaFragment; + const p2 = layout.pages[0].fragments.find((f) => f.blockId === 'p2') as ParaFragment; + + // Author's column break is preserved: p1 in col 0, p2 in col 1. + expect(p1.x).toBe(LEFT_MARGIN); + expect(p2.x).toBe(TWO_COL_RIGHT_X); + }); + }); }); describe('layoutHeaderFooter', () => { diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 7277550626..60cdcd3e61 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -52,7 +52,7 @@ import { normalizeFragmentsForRegion } from './normalize-header-footer-fragments import { createPaginator, type PageState, type ConstraintBoundary } from './paginator.js'; import { formatPageNumber } from './pageNumbering.js'; import { shouldSuppressSpacingForEmpty, shouldSuppressOwnSpacing } from './layout-utils.js'; -import { balancePageColumns } from './column-balancing.js'; +import { balanceSectionOnPage, type BalancingFragment, type MeasureData } from './column-balancing.js'; import { cloneColumnLayout, widthsEqual } from './column-utils.js'; type PageSize = { w: number; h: number }; @@ -1566,6 +1566,43 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options ); }; + // Build shared maps for column balancing. These are consumed both mid-layout + // (at continuous section-break boundaries) and post-layout (per-section final + // page), so we construct them once here rather than rebuilding in each pass. + const balancingMeasureMap = new Map(); + const blockSectionMap = new Map(); + const sectionColumnsMap = new Map(); + const sectionHasExplicitColumnBreak = new Set(); + // Tracks sections already balanced mid-page — the post-layout pass skips these + // to avoid double-balancing, which would overlap fragments at the same x/y. + const alreadyBalancedSections = new Set(); + // Walk blocks in document order. sectionBreak blocks carry attrs.sectionIndex and + // are emitted BEFORE the first paragraph of their section (see pm-adapter). Every + // subsequent content block belongs to that section until the next sectionBreak, + // so we track currentSectionIdx and stamp it on each block. This is required because + // pm-adapter only sets attrs.sectionIndex on sectionBreak blocks, not paragraphs. + let currentSectionIdx: number | null = null; + blocks.forEach((block, idx) => { + const measure = measures[idx]; + if (measure) { + balancingMeasureMap.set(block.id, measure as MeasureData); + } + const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; + const attrSectionIdx = blockWithAttrs.attrs?.sectionIndex; + if (block.kind === 'sectionBreak' && typeof attrSectionIdx === 'number') { + currentSectionIdx = attrSectionIdx; + if (block.columns) { + sectionColumnsMap.set(attrSectionIdx, cloneColumnLayout(block.columns)); + } + } + if (currentSectionIdx !== null) { + blockSectionMap.set(block.id, currentSectionIdx); + if (block.kind === 'columnBreak') { + sectionHasExplicitColumnBreak.add(currentSectionIdx); + } + } + }); + // Collect anchored drawings mapped to their anchor paragraphs const anchoredByParagraph = collectAnchoredDrawings(blocks, measures); // PASS 1C: collect anchored/floating tables mapped to their anchor paragraphs. @@ -1904,9 +1941,73 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options const columnIndexBefore = state.columnIndex; const newColumns = updatedState.pendingColumns; - // If reducing column count and currently in a column that won't exist - // in the new layout, start a fresh page to avoid overwriting earlier columns - if (columnIndexBefore >= newColumns.count) { + // Identify the ending section from the current page's fragments. + // `activeSectionIndex` only updates at page boundaries, so for continuous + // mid-page section breaks it's stale. Walk back through page fragments + // to find the most recent section index that isn't the new one — that's + // the section that's ending. + let endingSectionIndex: number | null = null; + for (let i = state.page.fragments.length - 1; i >= 0; i--) { + const mapped = blockSectionMap.get(state.page.fragments[i].blockId); + if (typeof mapped === 'number' && mapped !== metadataIndex) { + endingSectionIndex = mapped; + break; + } + } + const endingSectionColumns = + endingSectionIndex !== null ? sectionColumnsMap.get(endingSectionIndex) : undefined; + const willBalance = + endingSectionIndex !== null && + !!endingSectionColumns && + endingSectionColumns.count > 1 && + !sectionHasExplicitColumnBreak.has(endingSectionIndex); + + // Balance BEFORE any forced page break. After balancing, all of the + // ending section's fragments are repositioned within the section's own + // vertical region — there's no risk of the new 1-col region overwriting + // prior column content, because the cursor moves to maxY below them. + // + // When not balancing (e.g. single-col → multi-col, or explicit column + // break), fall back to the original "force a new page if currently in a + // column that won't exist after the change" guard so new content doesn't + // overwrite earlier column positions on the same page. + if (willBalance) { + // The current region starts at the last constraint boundary's Y, or at + // the page's top margin if no mid-page region change has happened yet. + const lastBoundary = state.constraintBoundaries[state.constraintBoundaries.length - 1]; + const activeRegionTop = lastBoundary?.y ?? activeTopMargin; + const availableHeight = activePageSize.h - activeBottomMargin - activeRegionTop; + const contentWidth = activePageSize.w - (activeLeftMargin + activeRightMargin); + const normalized = normalizeColumns(endingSectionColumns!, contentWidth); + const balanceResult = balanceSectionOnPage({ + fragments: state.page.fragments as BalancingFragment[], + sectionIndex: endingSectionIndex!, + sectionColumns: { + count: normalized.count, + gap: normalized.gap, + width: normalized.width, + widths: endingSectionColumns!.widths, + equalWidth: endingSectionColumns!.equalWidth, + }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: activeLeftMargin }, + topMargin: activeRegionTop, + columnWidth: normalized.width, + availableHeight, + measureMap: balancingMeasureMap, + }); + if (balanceResult) { + // Collapse both cursors to the balanced section bottom so the new + // region starts there, not below an unbalanced tallest column. + state.cursorY = balanceResult.maxY; + state.maxCursorY = balanceResult.maxY; + alreadyBalancedSections.add(endingSectionIndex!); + } + } else if (columnIndexBefore >= newColumns.count) { + // Non-balancing case: reducing column count without balancing means + // starting the new region at col 0 could overwrite earlier column + // content. Force a fresh page to avoid that. state = paginator.startNewPage(); } @@ -2518,109 +2619,53 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } } - // Apply column balancing to pages with multi-column layout. - // This redistributes fragments to achieve balanced column heights, matching Word's behavior. - if (activeColumns.count > 1) { - const contentWidth = pageSize.w - (activeLeftMargin + activeRightMargin); - const normalizedCols = normalizeColumns(activeColumns, contentWidth); - - // Build measure map for fragment height calculation during balancing - const measureMap = new Map; height?: number }>(); - // Build blockId -> sectionIndex map to filter fragments by section - const blockSectionMap = new Map(); - const sectionColumnsMap = new Map(); - blocks.forEach((block, idx) => { - const measure = measures[idx]; - if (measure) { - measureMap.set(block.id, measure as { kind: string; lines?: Array<{ lineHeight: number }>; height?: number }); - } - // Track section index for each block (for filtering during balancing) - // Not all block types have attrs, so access it safely - const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; - const sectionIdx = blockWithAttrs.attrs?.sectionIndex; - if (typeof sectionIdx === 'number') { - blockSectionMap.set(block.id, sectionIdx); - if (block.kind === 'sectionBreak' && block.columns) { - sectionColumnsMap.set(sectionIdx, cloneColumnLayout(block.columns)); - } - } - }); - - for (const page of pages) { - // Balance the last page (section ends at document end). - // TODO: Track section boundaries and balance at each continuous section break. - if (page === pages[pages.length - 1] && page.fragments.length > 0) { - const finalSectionColumns = sectionColumnsMap.get(activeSectionIndex) ?? activeColumns; - // Word does not rebalance the final page for sections that use explicit - // per-column widths. Preserve the natural left-to-right fill order there. - const hasExplicitColumnWidths = - finalSectionColumns?.equalWidth === false && - Array.isArray(finalSectionColumns.widths) && - finalSectionColumns.widths.length > 0; - - if (hasExplicitColumnWidths) { - continue; - } - - // Skip balancing if fragments are already in multiple columns (e.g., explicit column breaks). - // Balancing should only apply when all content flows naturally in column 0. - const uniqueXPositions = new Set(page.fragments.map((f) => Math.round(f.x))); - const hasExplicitColumnStructure = uniqueXPositions.size > 1; - - if (hasExplicitColumnStructure) { - continue; - } - - // Skip balancing if fragments have different widths (indicating different column configs - // from multiple sections). Balancing would incorrectly apply the final section's width to all. - const uniqueWidths = new Set(page.fragments.map((f) => Math.round(f.width))); - const hasMixedColumnWidths = uniqueWidths.size > 1; - - if (hasMixedColumnWidths) { - continue; - } - - // Check if page has content from multiple sections. - // If so, only balance fragments from the final multi-column section. - const fragmentSections = new Set(); - for (const f of page.fragments) { - const section = blockSectionMap.get(f.blockId); - if (section !== undefined) { - fragmentSections.add(section); - } - } - - // Only balance fragments from the final section when there are mixed sections - const hasMixedSections = fragmentSections.size > 1; - const fragmentsToBalance = hasMixedSections - ? page.fragments.filter((f) => { - const fragSection = blockSectionMap.get(f.blockId); - return fragSection === activeSectionIndex; - }) - : page.fragments; - - if (fragmentsToBalance.length > 0) { - const availableHeight = pageSize.h - activeBottomMargin - activeTopMargin; - balancePageColumns( - fragmentsToBalance as { - x: number; - y: number; - width: number; - kind: string; - blockId: string; - fromLine?: number; - toLine?: number; - height?: number; - }[], - normalizedCols, - { left: activeLeftMargin }, - activeTopMargin, - availableHeight, - measureMap, - ); - } + // Apply column balancing per section. For each section with a multi-column layout, + // find the final page that carries any of its fragments and balance those fragments. + // Earlier pages of a multi-page section are always fully filled (content overflowed + // to reach them), so balancing is a no-op there. This replaces the previous + // "last page of document" heuristic with proper per-section balancing — required + // to match Word's behavior when a document has multiple multi-column sections + // separated by continuous or next-page breaks. + // + // Mid-page continuous breaks are handled in the layout loop itself (see the + // forceMidPageRegion branch above). This post-layout pass handles sections that + // end at a page boundary or at document end. + const contentWidth = pageSize.w - (activeLeftMargin + activeRightMargin); + for (const [sectionIdx, sectionCols] of sectionColumnsMap) { + if (sectionCols.count <= 1) continue; + if (sectionHasExplicitColumnBreak.has(sectionIdx)) continue; + if (alreadyBalancedSections.has(sectionIdx)) continue; + + // Find the last page carrying any fragments from this section. + let lastPageForSection: (typeof pages)[number] | null = null; + for (const p of pages) { + if (p.fragments.some((f) => blockSectionMap.get(f.blockId) === sectionIdx)) { + lastPageForSection = p; } } + if (!lastPageForSection) continue; + + const normalized = normalizeColumns(sectionCols, contentWidth); + const availableHeight = pageSize.h - activeBottomMargin - activeTopMargin; + + balanceSectionOnPage({ + fragments: lastPageForSection.fragments as BalancingFragment[], + sectionIndex: sectionIdx, + sectionColumns: { + count: normalized.count, + gap: normalized.gap, + width: normalized.width, + widths: sectionCols.widths, + equalWidth: sectionCols.equalWidth, + }, + sectionHasExplicitColumnBreak: false, // already filtered above + blockSectionMap, + margins: { left: activeLeftMargin }, + topMargin: activeTopMargin, + columnWidth: normalized.width, + availableHeight, + measureMap: balancingMeasureMap, + }); } // Serialize constraint boundaries into page.columnRegions so DomPainter can diff --git a/packages/layout-engine/pm-adapter/README.md b/packages/layout-engine/pm-adapter/README.md index 27c0d1591c..e0653df917 100644 --- a/packages/layout-engine/pm-adapter/README.md +++ b/packages/layout-engine/pm-adapter/README.md @@ -23,3 +23,35 @@ Notes: - Handles lists, tables, images, vector shapes, SDT (TOC, structured content, doc parts), tracked changes, hyperlinks. - Consumes `@superdoc/style-engine` defaults and locale/tab interval hints from the PM document attrs. - `@superdoc/measuring-dom` is only a dependency for types; measurement happens later in the pipeline. + +## Section model (ECMA-376 §17.6, §17.18.77) + +Word uses **end-tagged** section semantics: a `` inside a paragraph defines the section that **ends with** that paragraph. The body-level `` defines the final section. All body children preceding a section-terminating paragraph — other paragraphs, **tables**, top-level drawings, SDT wrappers — belong to the section whose sectPr follows them. + +``` + + This is section A's first paragraph + + (section A props: 1-col, nextPage) + + ... ← section B (the NEXT sectPr) + + (section B props: 2-col continuous) + + This is section C's paragraph + (section C props: body-level) + +``` + +Section ranges are computed in `sections/analysis.ts` with two parallel indices: + +| Index | What it counts | Used by | +|---|---|---| +| `startParagraphIndex` / `endParagraphIndex` | Paragraph nodes only (including those inside SDT wrappers) | SDT handlers for intra-SDT section transitions | +| `startNodeIndex` / `endNodeIndex` | Every top-level `doc.content` child — paragraphs, tables, top-level drawings, SDTs | Main dispatch loop for inter-node section transitions | + +The main dispatch loop in `internal.ts` calls `maybeEmitNextSectionBreakForNode` BEFORE handling each top-level node. The helper uses `currentNodeIndex === nextSection.startNodeIndex` as its trigger, so non-paragraph nodes (tables especially) correctly get the sectionBreak emitted before them when they cross a section boundary. This is the fix for SD-2646. + +**When adding a new top-level block kind** (e.g., a new SDT type or a top-level drawing kind): do nothing. The dispatch-level hook covers you automatically. You only need a per-handler section check if your handler descends into children that carry their own sectPr markers — see the SDT handlers (`sdt/bibliography.ts`, `sdt/document-index.ts`, `sdt/table-of-authorities.ts`) for the intra-SDT pattern. + +Continuous section breaks (``) carry an extra requirement from §17.18.77: they "balance the content of the previous section." The balancing itself is the layout engine's responsibility (see `layout-engine/src/column-balancing.ts`); the adapter's job is only to ensure the right blocks are in the right section. diff --git a/packages/layout-engine/pm-adapter/src/attributes/borders.ts b/packages/layout-engine/pm-adapter/src/attributes/borders.ts index 3b5960fe9f..c080c0d9b4 100644 --- a/packages/layout-engine/pm-adapter/src/attributes/borders.ts +++ b/packages/layout-engine/pm-adapter/src/attributes/borders.ts @@ -36,7 +36,14 @@ type BorderConversionOptions = { * * Clamps results to reasonable bounds to prevent edge cases. */ -export const borderSizeToPx = (size?: number): number | undefined => eighthPointsToPixels(size, { clamp: true }); +export const borderSizeToPx = (size?: number): number | undefined => { + if (!isFiniteNumber(size)) return undefined; + if (size <= 0) return 0; + + const width = eighthPointsToPixels(size); + if (width == null) return undefined; + return clampPixelBorderWidth(width); +}; const clampPixelBorderWidth = (width: number): number => Math.min(MAX_BORDER_SIZE_PX, Math.max(MIN_BORDER_SIZE_PX, width)); diff --git a/packages/layout-engine/pm-adapter/src/index.test.ts b/packages/layout-engine/pm-adapter/src/index.test.ts index 441b210e73..b22a0de17c 100644 --- a/packages/layout-engine/pm-adapter/src/index.test.ts +++ b/packages/layout-engine/pm-adapter/src/index.test.ts @@ -1269,6 +1269,72 @@ describe('toFlowBlocks', () => { [first, second, third].forEach((b) => expect(b?.type).toBe('continuous')); }); }); + + describe('end-tagged section membership for non-paragraph nodes (SD-2646, ECMA-376 §17.6.17)', () => { + it('emits the next section break BEFORE a table that sits between two sectPr-marker paragraphs', () => { + // IT-945 shape: table lives between the paragraph that ends section A + // and the paragraph that ends section B. Per §17.6.17 the table + // belongs to section B, so the sectionBreak introducing B's columns + // must precede the table in the flow stream. + const pmDoc: PMNode = { + type: 'doc', + attrs: { bodySectPr: createTestBodySectPr() }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'This is my first section' }] }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'nextPage' } }, + { name: 'w:cols', attributes: { 'w:num': '1', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }] }, + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }] }, + ], + }, + ], + }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'continuous' } }, + { name: 'w:cols', attributes: { 'w:num': '2', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { type: 'paragraph', content: [{ type: 'text', text: 'This is my third section' }] }, + ], + } as never; + + const { blocks } = toFlowBlocks(pmDoc, { emitSectionBreaks: true }); + + const tableIndex = blocks.findIndex((b) => b.kind === 'table'); + const twoColBreakIndex = blocks.findIndex((b) => b.kind === 'sectionBreak' && b.columns?.count === 2); + expect(tableIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeLessThan(tableIndex); + }); + }); }); describe('block id prefixing', () => { diff --git a/packages/layout-engine/pm-adapter/src/internal.test.ts b/packages/layout-engine/pm-adapter/src/internal.test.ts index e1972f240b..4c4eda6863 100644 --- a/packages/layout-engine/pm-adapter/src/internal.test.ts +++ b/packages/layout-engine/pm-adapter/src/internal.test.ts @@ -59,6 +59,10 @@ vi.mock('./sections/index.js', () => { id: nextBlockId('sectionBreak'), type: section.type, })), + maybeEmitNextSectionBreakForNode: vi.fn(() => { + // Mocked as no-op: this test file already provides zero section ranges + // via analyzeSectionRanges, so there is never a break to emit. + }), publishSectionMetadata: vi.fn(), }; }); diff --git a/packages/layout-engine/pm-adapter/src/internal.ts b/packages/layout-engine/pm-adapter/src/internal.ts index 1223d7ec7e..b7dde6420c 100644 --- a/packages/layout-engine/pm-adapter/src/internal.ts +++ b/packages/layout-engine/pm-adapter/src/internal.ts @@ -12,7 +12,12 @@ import type { FlowBlock, ParagraphBlock } from '@superdoc/contracts'; import { isValidTrackedMode } from './tracked-changes.js'; -import { analyzeSectionRanges, createSectionBreakBlock, publishSectionMetadata } from './sections/index.js'; +import { + analyzeSectionRanges, + createSectionBreakBlock, + maybeEmitNextSectionBreakForNode, + publishSectionMetadata, +} from './sections/index.js'; import { normalizePrefix, buildPositionMap, createBlockIdGenerator } from './utilities.js'; import { paragraphToFlowBlocks, @@ -208,6 +213,7 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): ranges: sectionRanges, currentSectionIndex: 0, currentParagraphIndex: 0, + currentNodeIndex: 0, }, converters, themeColors, @@ -216,12 +222,24 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): trackedListLastOrdinals: new Map(), }; - // Process nodes using handler dispatch pattern + // Process nodes using handler dispatch pattern. Before each top-level node + // we flush any pending section break whose `startNodeIndex` matches the + // current position — this keeps non-paragraph nodes (tables, top-level + // drawings) in their correct end-tagged section per ECMA-376 §17.6.17. doc.content.forEach((node) => { + maybeEmitNextSectionBreakForNode({ + sectionState: handlerContext.sectionState!, + nextBlockId, + pushBlock: (block) => { + blocks.push(block); + recordBlockKind(block.kind); + }, + }); const handler = nodeHandlers[node.type]; if (handler) { handler(node, handlerContext); } + handlerContext.sectionState!.currentNodeIndex++; }); // Ensure final body section is emitted only if not already emitted during paragraph processing. diff --git a/packages/layout-engine/pm-adapter/src/sdt/document-part-object.ts b/packages/layout-engine/pm-adapter/src/sdt/document-part-object.ts index facc5b8ef8..3ab4c4b17c 100644 --- a/packages/layout-engine/pm-adapter/src/sdt/document-part-object.ts +++ b/packages/layout-engine/pm-adapter/src/sdt/document-part-object.ts @@ -15,13 +15,9 @@ import { processTocChildren } from './toc.js'; * Processes TOC children for Table of Contents galleries. * For other gallery types (page numbers, etc.), processes child paragraphs normally. * - * If a preceding paragraph carried a `w:sectPr` whose next section starts at - * this SDT, emit the pending section break BEFORE processing children so the - * SDT's paragraphs render on the new page (see SD-2557). `findParagraphsWithSectPr` - * doesn't recurse into `documentPartObject`, so its child paragraphs don't bump - * `currentParagraphIndex` — and without this call, the deferred break would only - * fire on the next body paragraph AFTER the SDT, leaving e.g. a TOC on the - * prior page with the cover content. + * If a section transition occurs inside this SDT, child paragraph processing + * emits the pending break before the paragraph that starts the next section and + * advances `currentParagraphIndex` in step with `findParagraphsWithSectPr`. * * @param node - Document part object node to process * @param context - Shared handler context diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts index ac860ce7e6..97b6b5b2d7 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts @@ -296,8 +296,9 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 1, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(2); }); it('should find multiple paragraphs with sectPr', () => { @@ -315,12 +316,13 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(2); - expect(result.paragraphs[0]).toEqual({ index: 0, node: para1 }); - expect(result.paragraphs[1]).toEqual({ index: 2, node: para3 }); + expect(result.paragraphs[0]).toEqual({ index: 0, nodeIndex: 0, node: para1 }); + expect(result.paragraphs[1]).toEqual({ index: 2, nodeIndex: 2, node: para3 }); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(3); }); - it('should skip non-paragraph nodes', () => { + it('should skip non-paragraph nodes for paragraphIndex but count them in nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; @@ -334,11 +336,15 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + // paragraphIndex = 1 (second paragraph), nodeIndex = 2 (third top-level child + // after the heading). Non-paragraph top-level children count toward nodeIndex + // because ECMA-376 §17.6.17 sections span all body children. + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 2, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(4); }); - it('should include paragraphs inside index nodes', () => { + it('should include paragraphs inside index nodes and share the SDT nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; const para3 = { type: 'paragraph', content: [] }; @@ -352,8 +358,11 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); - expect(result.paragraphs).toEqual([{ index: 1, node: para2 }]); + // para2 is inside the SDT (index node) at top-level nodeIndex 1. + // paragraphIndex 1 still reflects paragraph order across descent. + expect(result.paragraphs).toEqual([{ index: 1, nodeIndex: 1, node: para2 }]); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(2); }); }); @@ -385,7 +394,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue(null); @@ -408,7 +417,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr', elements: [] }; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -450,8 +459,8 @@ describe('analysis', () => { }; const paragraphs = [ - { index: 2, node: para1 }, - { index: 5, node: para2 }, + { index: 2, nodeIndex: 2, node: para1 }, + { index: 5, nodeIndex: 5, node: para2 }, ]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr' }; @@ -487,7 +496,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.NEXT_PAGE, @@ -512,7 +521,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mock section data with page margins but no header/footer margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -550,7 +559,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only header margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -583,7 +592,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only footer margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -616,7 +625,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only top margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -649,7 +658,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Explicit zero values for all margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -688,7 +697,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mix of header/footer and page margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -725,7 +734,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ titlePg: false, @@ -747,7 +756,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.CONTINUOUS, diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.ts index 925b0561f5..51ab2e1d98 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.ts @@ -65,15 +65,27 @@ export function shouldIgnoreSectionBreak( /** * Find all paragraphs in the document that contain sectPr elements. * + * Records two indices per match: + * - `paragraphIndex` counts only paragraph nodes (including those nested + * inside SDT wrappers), matching the long-standing contract callers use + * for SDT-internal section transitions. + * - `nodeIndex` counts every top-level `doc.content` child (paragraph, + * table, top-level drawing, SDT, …). This is required to fix SD-2646: + * a non-paragraph node between two sectPr markers belongs to the LATER + * marker's section per ECMA-376 §17.6.17, so the dispatch loop must + * know each section's bounds in top-level-node terms. + * * @param doc - ProseMirror document node - * @returns Object containing paragraphs with sectPr and total paragraph count + * @returns Paragraph matches plus both totals */ export function findParagraphsWithSectPr(doc: PMNode): { - paragraphs: Array<{ index: number; node: PMNode }>; + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>; totalCount: number; + totalNodeCount: number; } { - const paragraphs: Array<{ index: number; node: PMNode }> = []; + const paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }> = []; let paragraphIndex = 0; + let nodeIndex = 0; const getNodeChildren = (node: PMNode): PMNode[] => { if (Array.isArray(node.content)) return node.content; const content = node.content as { forEach?: (cb: (child: PMNode) => void) => void } | undefined; @@ -87,10 +99,10 @@ export function findParagraphsWithSectPr(doc: PMNode): { return []; }; - const visitNode = (node: PMNode): void => { + const visitNode = (node: PMNode, outerNodeIndex: number): void => { if (node.type === 'paragraph') { if (hasSectPr(node)) { - paragraphs.push({ index: paragraphIndex, node }); + paragraphs.push({ index: paragraphIndex, nodeIndex: outerNodeIndex, node }); } paragraphIndex++; return; @@ -101,6 +113,10 @@ export function findParagraphsWithSectPr(doc: PMNode): { // their handlers increment `currentParagraphIndex` + call the section-break // emission helper per child. // + // SDT descendants share the outer SDT's nodeIndex — dispatch-level + // section transitions fire on the SDT as a whole, while child handlers can + // still emit paragraph-index transitions within the SDT. + // // `documentPartObject` / `tableOfContents` are important for SD-2557: // Word stores the closing sectPr of a TOC section on the trailing empty // paragraph INSIDE the SDT. Without recursion, that sectPr is invisible to @@ -113,17 +129,18 @@ export function findParagraphsWithSectPr(doc: PMNode): { node.type === 'documentPartObject' || node.type === 'tableOfContents' ) { - getNodeChildren(node).forEach(visitNode); + getNodeChildren(node).forEach((child) => visitNode(child, outerNodeIndex)); } }; if (doc.content) { for (const node of doc.content) { - visitNode(node); + visitNode(node, nodeIndex); + nodeIndex++; } } - return { paragraphs, totalCount: paragraphIndex }; + return { paragraphs, totalCount: paragraphIndex, totalNodeCount: nodeIndex }; } /** @@ -144,11 +161,12 @@ export function findParagraphsWithSectPr(doc: PMNode): { * @returns Array of section ranges */ export function buildSectionRangesFromParagraphs( - paragraphs: Array<{ index: number; node: PMNode }>, + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>, hasBodySectPr: boolean, ): SectionRange[] { const ranges: SectionRange[] = []; let currentStart = 0; + let currentStartNode = 0; paragraphs.forEach((item, idx) => { if (shouldIgnoreSectionBreak(item.node, idx, paragraphs.length, hasBodySectPr)) { @@ -170,6 +188,8 @@ export function buildSectionRangesFromParagraphs( const range: SectionRange = { sectionIndex: idx, + startNodeIndex: currentStartNode, + endNodeIndex: item.nodeIndex, startParagraphIndex: currentStart, endParagraphIndex: item.index, sectPr, @@ -196,6 +216,7 @@ export function buildSectionRangesFromParagraphs( ranges.push(range); currentStart = item.index + 1; + currentStartNode = item.nodeIndex + 1; }); return ranges; @@ -244,6 +265,7 @@ export function createFinalSectionFromBodySectPr( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange | null { const clampedStart = Math.max(0, Math.min(currentStart, Math.max(totalParagraphs - 1, 0))); @@ -267,8 +289,15 @@ export function createFinalSectionFromBodySectPr( bodySectionData.bottomPx != null || bodySectionData.leftPx != null; + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds + ? Math.max(0, Math.min(nodeBounds.startNodeIndex, Math.max(totalNodes - 1, 0))) + : clampedStart; + return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: clampedStart, endParagraphIndex: totalParagraphs - 1, sectPr: bodySectPr, @@ -307,9 +336,14 @@ export function createDefaultFinalSection( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange { + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds?.startNodeIndex ?? currentStart; return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: currentStart, endParagraphIndex: totalParagraphs - 1, sectPr: null, @@ -334,11 +368,13 @@ export function createDefaultFinalSection( * @returns Array of section ranges with backward-looking semantics */ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): SectionRange[] { - const { paragraphs, totalCount } = findParagraphsWithSectPr(doc); + const { paragraphs, totalCount, totalNodeCount } = findParagraphsWithSectPr(doc); const hasBody = Boolean(bodySectPr); const ranges = buildSectionRangesFromParagraphs(paragraphs, hasBody); - const currentStart = ranges.length > 0 ? ranges[ranges.length - 1].endParagraphIndex + 1 : 0; + const last = ranges[ranges.length - 1]; + const currentStart = last ? last.endParagraphIndex + 1 : 0; + const currentStartNode = last ? last.endNodeIndex + 1 : 0; // Always represent the final section defined by bodySectPr, even if there are // no remaining paragraphs after the last paragraph-level sectPr. This ensures @@ -349,12 +385,16 @@ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): Section Math.min(currentStart, totalCount), totalCount, ranges.length, + { startNodeIndex: Math.min(currentStartNode, totalNodeCount), totalNodeCount }, ); if (finalSection) { ranges.push(finalSection); } } else if (ranges.length > 0) { - const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length); + const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length, { + startNodeIndex: Math.min(currentStartNode, totalNodeCount), + totalNodeCount, + }); if (fallbackFinal) { fallbackFinal.type = DEFAULT_PARAGRAPH_SECTION_TYPE; ranges.push(fallbackFinal); diff --git a/packages/layout-engine/pm-adapter/src/sections/breaks.ts b/packages/layout-engine/pm-adapter/src/sections/breaks.ts index 4b4ed6f030..b7fe08ca74 100644 --- a/packages/layout-engine/pm-adapter/src/sections/breaks.ts +++ b/packages/layout-engine/pm-adapter/src/sections/breaks.ts @@ -201,16 +201,52 @@ interface SectionStateMutable { currentParagraphIndex: number; } +/** + * Emit the next section's sectionBreak block if the dispatch loop has reached + * that section's starting top-level node index. + * + * ECMA-376 §17.6.17: a section is defined by its end-tagged ``. All + * body children preceding that tag — paragraphs, tables, top-level drawings — + * belong to the section that ENDS at the tag. This helper fires BEFORE any + * such node so the appropriate section config is active by the time the node + * is laid out. + * + * Calling it from the main dispatch loop covers every top-level node type — + * present and future — with no per-handler opt-in. Paragraph-index emission + * still handles transitions inside SDT child content. + */ +export function maybeEmitNextSectionBreakForNode(args: { + sectionState: { + ranges: SectionRange[]; + currentSectionIndex: number; + currentNodeIndex: number; + }; + nextBlockId: BlockIdGenerator; + pushBlock: (block: SectionBreakBlock) => void; +}): void { + const { sectionState, nextBlockId, pushBlock } = args; + if (sectionState.ranges.length === 0) return; + if (sectionState.currentSectionIndex >= sectionState.ranges.length - 1) return; + + const nextSection = sectionState.ranges[sectionState.currentSectionIndex + 1]; + if (!nextSection) return; + if (sectionState.currentNodeIndex !== nextSection.startNodeIndex) return; + + const currentSection = sectionState.ranges[sectionState.currentSectionIndex]; + const requiresPageBoundary = + shouldRequirePageBoundary(currentSection, nextSection) || hasIntrinsicBoundarySignals(nextSection); + const extraAttrs = requiresPageBoundary ? { requirePageBoundary: true } : undefined; + pushBlock(createSectionBreakBlock(nextSection, nextBlockId, extraAttrs)); + sectionState.currentSectionIndex++; +} + /** * Emit a pending section break before a paragraph if the current paragraph * index matches the start of the next section. * - * Centralizes the "check, emit, advance" pattern used by paragraph and SDT - * handlers. SDT handlers that process children as an opaque block (e.g. - * TOC/docPartObj where child paragraphs aren't counted by - * `findParagraphsWithSectPr`) should call this ONCE at the SDT boundary — - * if the SDT sits at a section boundary, this emits the break so the SDT's - * contents render on the new page. + * Centralizes the "check, emit, advance" pattern for handlers that process + * paragraph children directly, including SDT handlers. This keeps nested + * paragraph traversal in sync with `findParagraphsWithSectPr`. * * No-op when: * - sectionState is undefined or has no ranges diff --git a/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts new file mode 100644 index 0000000000..8d7cf42210 --- /dev/null +++ b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts @@ -0,0 +1,118 @@ +/** + * @file End-tagged section-membership tests (ECMA-376 §17.6.17, §17.18.77). + * + * OOXML rule: a `` inside a paragraph defines the section that + * ENDS with that paragraph. All body children preceding it (back to the + * previous section-terminating paragraph) belong to that section — + * including tables, drawings, and other non-paragraph nodes. + * + * These tests use real implementations with no module mocks so they assert + * observable behavior of the section analysis, not call patterns. + */ +import { describe, it, expect } from 'vitest'; +import { analyzeSectionRanges } from './analysis.js'; +import type { PMNode } from '../types.js'; +import type { SectPrElement } from './types.js'; + +const sectPr = (options: { + type?: 'continuous' | 'nextPage'; + cols?: number; + colSpace?: number; + pgSz?: { w: number; h: number }; +}): SectPrElement => { + const elements: SectPrElement['elements'] = []; + if (options.type) { + elements!.push({ + type: 'element', + name: 'w:type', + attributes: { 'w:val': options.type }, + }); + } + if (options.pgSz) { + elements!.push({ + type: 'element', + name: 'w:pgSz', + attributes: { 'w:w': String(options.pgSz.w), 'w:h': String(options.pgSz.h) }, + }); + } + elements!.push({ + type: 'element', + name: 'w:cols', + attributes: { + 'w:num': String(options.cols ?? 1), + 'w:space': String(options.colSpace ?? 720), + }, + }); + return { + type: 'element', + name: 'w:sectPr', + elements, + }; +}; + +const paragraph = (marker?: SectPrElement): PMNode => ({ + type: 'paragraph', + attrs: marker ? { paragraphProperties: { sectPr: marker } } : {}, + content: [], +}); + +const table = (): PMNode => ({ + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }], + }, + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }], + }, + ], + }, + ], +}); + +describe('analyzeSectionRanges — end-tagged membership for non-paragraph nodes (SD-2646)', () => { + it('places a table between two sectPr markers into the section of the LATER marker', () => { + // Arrange — IT-945 shape: + // p0 "first section" text + // p1(sectPr-A) empty marker, 1-col nextPage → ends Section A + // TABLE belongs to Section B per ECMA-376 §17.6.17 + // p2(sectPr-B) empty marker, 2-col continuous → ends Section B + // p3 "third section" text + // body sectPr final, 1-col continuous → Section C + const markerA = sectPr({ type: 'nextPage', cols: 1 }); + const markerB = sectPr({ type: 'continuous', cols: 2, colSpace: 720 }); + const bodyMarker = sectPr({ type: 'continuous', cols: 1 }); + const doc: PMNode = { + type: 'doc', + attrs: { bodySectPr: bodyMarker }, + content: [paragraph(), paragraph(markerA), table(), paragraph(markerB), paragraph()], + }; + + // Act + const sut = analyzeSectionRanges(doc, bodyMarker); + + // Assert — three sections, with Section B's range spanning the table + expect(sut).toHaveLength(3); + const [sectionA, sectionB, sectionC] = sut; + + // Section A ends at p1 (node index 1). Table at node index 2 is NOT in A. + expect(sectionA.endNodeIndex).toBe(1); + expect(sectionA.columns?.count ?? 1).toBe(1); + + // Section B: starts AFTER Section A (node index 2 — the table), ends at p2 (node index 3). + // The table must belong to Section B, not Section A. + expect(sectionB.startNodeIndex).toBe(2); + expect(sectionB.endNodeIndex).toBe(3); + expect(sectionB.columns?.count).toBe(2); + + // Section C (body): starts at node index 4 (p3). + expect(sectionC.startNodeIndex).toBe(4); + expect(sectionC.columns?.count ?? 1).toBe(1); + }); +}); diff --git a/packages/layout-engine/pm-adapter/src/sections/index.ts b/packages/layout-engine/pm-adapter/src/sections/index.ts index 5f293b3d9d..b72280d106 100644 --- a/packages/layout-engine/pm-adapter/src/sections/index.ts +++ b/packages/layout-engine/pm-adapter/src/sections/index.ts @@ -41,5 +41,6 @@ export { isSectionBreakBlock, signaturesEqual, shallowObjectEquals, + maybeEmitNextSectionBreakForNode, emitPendingSectionBreakForParagraph, } from './breaks.js'; diff --git a/packages/layout-engine/pm-adapter/src/sections/types.ts b/packages/layout-engine/pm-adapter/src/sections/types.ts index a4134b48d0..551d8bcfd8 100644 --- a/packages/layout-engine/pm-adapter/src/sections/types.ts +++ b/packages/layout-engine/pm-adapter/src/sections/types.ts @@ -89,11 +89,22 @@ export type SectionVerticalAlign = 'top' | 'center' | 'bottom' | 'both'; /** * Section range represents a contiguous section in the document. - * Word uses "end-tagged" section semantics: a paragraph's sectPr defines - * properties for the section ENDING at that paragraph, not starting after it. + * + * Word uses "end-tagged" section semantics (ECMA-376 §17.6.17): a paragraph's + * sectPr defines properties for the section ENDING at that paragraph, not + * starting after it. All body children preceding the section-terminating + * paragraph — paragraphs, tables, top-level drawings — belong to that section. + * + * `startNodeIndex`/`endNodeIndex` are computed over every top-level + * `doc.content` child and are the authoritative boundaries for dispatching + * section breaks at emission time. `startParagraphIndex`/`endParagraphIndex` + * are retained for callers (SDT handlers) that count only paragraphs during + * recursive descent. */ export interface SectionRange { sectionIndex: number; + startNodeIndex: number; + endNodeIndex: number; startParagraphIndex: number; endParagraphIndex: number; sectPr: SectPrElement | null; diff --git a/packages/layout-engine/pm-adapter/src/types.d.ts b/packages/layout-engine/pm-adapter/src/types.d.ts index 13de3db49f..2285e17fcd 100644 --- a/packages/layout-engine/pm-adapter/src/types.d.ts +++ b/packages/layout-engine/pm-adapter/src/types.d.ts @@ -238,6 +238,7 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + currentNodeIndex: number; }; converters?: { paragraphToFlowBlocks?: ( diff --git a/packages/layout-engine/pm-adapter/src/types.ts b/packages/layout-engine/pm-adapter/src/types.ts index d60515c39b..0aed51dd1d 100644 --- a/packages/layout-engine/pm-adapter/src/types.ts +++ b/packages/layout-engine/pm-adapter/src/types.ts @@ -316,6 +316,13 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + /** + * Index of the current top-level `doc.content` child being dispatched. + * Advanced by the main dispatch loop in internal.ts — drives end-tagged + * section transitions for non-paragraph nodes (tables, top-level + * drawings, …) per ECMA-376 §17.6.17. + */ + currentNodeIndex: number; }; // Converters for nested content diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e7ca8ecf6b..9e14bb9c69 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -6470,7 +6470,6 @@ packages: '@microsoft/teamsapp-cli@3.0.2': resolution: {integrity: sha512-AowuJwrrUxeF9Bq/frxuy9YZjK/ECk3pi0UBXl3CQLZ4XNWfgWatiFi/UWpyHDLccFs+0Za3nNYATFvgsxEFwQ==} engines: {node: '>=12'} - deprecated: This package is deprecated and supported Node.js version is 18-22. Please use @microsoft/m365agentstoolkit-cli instead. hasBin: true '@microsoft/teamsfx-api@0.23.1':