diff --git a/.gitignore b/.gitignore index cbff81c71..649fe0a3d 100644 --- a/.gitignore +++ b/.gitignore @@ -33,16 +33,16 @@ coverage/ # Producer regression test failures (generated debugging artifacts) packages/producer/tests/*/failures/ +# HDR smoke-test workdirs (generated by scripts/hdr-smoke.ts) +packages/producer/tests/hdr-regression/_renders/ + +# Local-only HDR demo fixture (large source media; not part of the test suite) +packages/producer/tests/hdr-regression/hdr-full-demo/ + # Rendered output (not test fixtures — those use git LFS) output/ -renders/ !packages/producer/tests/*/output/ -# Composition source media (large binaries) -compositions/**/*.mp4 -compositions/**/*.mov -compositions/**/*.MOV - # npm pack artifacts *.tgz diff --git a/.oxlintrc.json b/.oxlintrc.json index 0fc2d5f8c..933813280 100644 --- a/.oxlintrc.json +++ b/.oxlintrc.json @@ -4,5 +4,5 @@ "correctness": "error" }, "plugins": ["react", "typescript"], - "ignorePatterns": ["dist/", "coverage/", "node_modules/"] + "ignorePatterns": ["dist/", "coverage/", "node_modules/", "**/vendor/"] } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 84398d6fb..08f0f1481 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,6 +28,14 @@ bun run --filter @hyperframes/engine test # Engine unit tests (vitest) bun run --filter @hyperframes/core test:hyperframe-runtime-ci # Runtime contract tests ``` +For HDR-specific changes (BT.2020 PQ encode, sRGB→BT.2020 overlay +conversion, HDR10 metadata, mixed SDR/HDR compositing) the engine unit +tests above cover the building blocks, but end-to-end HDR pipeline +verification is handled by a separate manual smoke script that is **not** +in CI. See +[`packages/producer/tests/hdr-regression/README.md`](packages/producer/tests/hdr-regression/README.md) +for fixtures, the `hdr-smoke` runner, and the current coverage gaps. + ### Linting & Formatting ```bash diff --git a/packages/engine/src/index.ts b/packages/engine/src/index.ts index 5cb608fd9..7ec1c5806 100644 --- a/packages/engine/src/index.ts +++ b/packages/engine/src/index.ts @@ -117,6 +117,8 @@ export { createFrameLookupTable, FrameLookupTable, type VideoElement, + parseImageElements, + type ImageElement, type ExtractedFrames, type ExtractionOptions, type ExtractionResult, @@ -168,7 +170,6 @@ export { blitRgb48leRegion, blitRgb48leAffine, parseTransformMatrix, - getSrgbToHdrLut, roundedRectAlpha, } from "./utils/alphaBlit.js"; @@ -209,8 +210,11 @@ export { detectTransfer, getHdrEncoderColorParams, analyzeCompositionHdr, + DEFAULT_HDR10_MASTERING, type HdrTransfer, type HdrEncoderColorParams, type CompositionHdrInfo, + type HdrMasteringMetadata, } from "./utils/hdr.js"; +export { injectHdrBoxes } from "./utils/mp4HdrBoxes.js"; export type { VideoColorSpace } from "./utils/ffprobe.js"; diff --git a/packages/engine/src/services/frameCapture.ts b/packages/engine/src/services/frameCapture.ts index 25a3c0918..d6243fec3 100644 --- a/packages/engine/src/services/frameCapture.ts +++ b/packages/engine/src/services/frameCapture.ts @@ -220,6 +220,22 @@ export async function initializeSession(session: CaptureSession): Promise } }); + // Install a no-op `__name` shim on every document before any script runs. + // + // tsx/bun's esbuild bundling has `keepNames` enabled, which wraps every + // `function` declaration (including ones nested inside `page.evaluate` + // callbacks) with a call to `__name(fn, "name")` to preserve + // `Function.prototype.name`. That helper is injected in the bundled host + // code but is NOT serialized into the function body Puppeteer ships to + // the browser — so any evaluate callback with nested `function` + // declarations crashes with `ReferenceError: __name is not defined`. + // + // We pass a *string* (not a function) because esbuild does not transform + // string contents, and `evaluateOnNewDocument` runs before the page's + // own scripts so the shim is in place for every subsequent + // `page.evaluate()`. + await page.evaluateOnNewDocument("globalThis.__name = function (fn) { return fn; };"); + // Navigate to the file server const url = `${serverUrl}/index.html`; if (session.captureMode === "screenshot") { diff --git a/packages/engine/src/services/streamingEncoder.ts b/packages/engine/src/services/streamingEncoder.ts index 2d51b4066..660a9af73 100644 --- a/packages/engine/src/services/streamingEncoder.ts +++ b/packages/engine/src/services/streamingEncoder.ts @@ -17,7 +17,8 @@ import { existsSync, mkdirSync, statSync } from "fs"; import { dirname } from "path"; import { type GpuEncoder, getCachedGpuEncoder, getGpuEncoderName } from "../utils/gpuEncoder.js"; -import { getHdrEncoderColorParams } from "../utils/hdr.js"; +import { DEFAULT_HDR10_MASTERING, getHdrEncoderColorParams } from "../utils/hdr.js"; +import { injectHdrBoxes } from "../utils/mp4HdrBoxes.js"; import { type EncoderOptions } from "./chunkEncoder.types.js"; import { DEFAULT_CONFIG, type EngineConfig } from "../config.js"; @@ -358,6 +359,13 @@ export async function spawnStreamingEncoder( exitPromiseResolve?.(); }); + // Prevent unhandled 'error' events on stdin from crashing the process. + // Writes to a pipe that ffmpeg has already closed (e.g. the last frame + // races with ffmpeg's exit) emit EINVAL/EPIPE. The close handler above + // captures the exit status; swallowing the stdin error avoids a crash + // while still reporting the failure via the result object. + ffmpeg.stdin?.on("error", () => {}); + // Handle abort signal const onAbort = () => { if (exitStatus === "running") { @@ -427,10 +435,32 @@ export async function spawnStreamingEncoder( success: false, durationMs, fileSize: 0, - error: `FFmpeg exited with code ${exitCode}`, + error: `FFmpeg exited with code ${exitCode}: ${stderr.slice(-2000)}`, }; } + // Post-encode: inject mdcv/clli MP4 container boxes for HDR10 outputs. + // x265 emits these as in-band SEI messages, but FFmpeg's mov muxer + // doesn't propagate them to the container. YouTube, Apple AirPlay, and + // most HDR TVs need the container-level boxes to recognize the file as + // HDR — without them they tone-map as SDR BT.2020. See mp4HdrBoxes.ts. + // Only relevant for h265 + hdr (the only path that emits the SEI in + // the first place); a no-op otherwise. + if (options.hdr && options.codec === "h265" && existsSync(outputPath)) { + try { + injectHdrBoxes(outputPath, DEFAULT_HDR10_MASTERING); + } catch (err) { + // Best-effort: a malformed MP4 from FFmpeg shouldn't fail the + // whole encode, but we want to know about it in logs. The file + // is still playable; only HDR recognition on strict ingests is + // affected. + // eslint-disable-next-line no-console + console.warn( + `[streamingEncoder] HDR box injection failed for ${outputPath}: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + const fileSize = existsSync(outputPath) ? statSync(outputPath).size : 0; return { success: true, durationMs, fileSize }; diff --git a/packages/engine/src/services/videoFrameExtractor.test.ts b/packages/engine/src/services/videoFrameExtractor.test.ts index fc7ccb21f..49d7d3c18 100644 --- a/packages/engine/src/services/videoFrameExtractor.test.ts +++ b/packages/engine/src/services/videoFrameExtractor.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { parseVideoElements } from "./videoFrameExtractor.js"; +import { parseVideoElements, parseImageElements } from "./videoFrameExtractor.js"; describe("parseVideoElements", () => { it("parses videos without an id or data-start attribute", () => { @@ -32,3 +32,58 @@ describe("parseVideoElements", () => { }); }); }); + +describe("parseImageElements", () => { + it("parses img elements with data-start and data-duration", () => { + const html = `
`; + const images = parseImageElements(html); + expect(images).toHaveLength(1); + expect(images[0]).toEqual({ id: "i1", src: "photo.jpg", start: 2, end: 7 }); + }); + + it("skips img without data-duration", () => { + const html = `
`; + const images = parseImageElements(html); + expect(images).toHaveLength(0); + }); + + it("generates stable IDs for img without id attribute", () => { + const html = `
`; + const images = parseImageElements(html); + expect(images).toHaveLength(2); + expect(images[0].id).toBe("hf-img-0"); + expect(images[1].id).toBe("hf-img-1"); + }); + + it("returns an empty array when no elements are present", () => { + expect(parseImageElements("
")).toEqual([]); + expect(parseImageElements('
')).toEqual([]); + }); + + it("skips images with non-positive or non-finite data-duration", () => { + const html = ` + + + + + `; + expect(parseImageElements(html)).toEqual([]); + }); + + it("defaults data-start to 0 when missing", () => { + const html = ``; + const images = parseImageElements(html); + expect(images).toHaveLength(1); + expect(images[0]).toEqual({ id: "i1", src: "a.jpg", start: 0, end: 4 }); + }); + + it("preserves duplicate ids (current behavior — caller is responsible for uniqueness)", () => { + const html = ` + + + `; + const images = parseImageElements(html); + expect(images).toHaveLength(2); + expect(images.map((i) => i.id)).toEqual(["dup", "dup"]); + }); +}); diff --git a/packages/engine/src/services/videoFrameExtractor.ts b/packages/engine/src/services/videoFrameExtractor.ts index 1252d1ee0..2d43603ba 100644 --- a/packages/engine/src/services/videoFrameExtractor.ts +++ b/packages/engine/src/services/videoFrameExtractor.ts @@ -24,6 +24,13 @@ export interface VideoElement { hasAudio: boolean; } +export interface ImageElement { + id: string; + src: string; + start: number; + end: number; +} + export interface ExtractedFrames { videoId: string; srcPath: string; @@ -97,6 +104,29 @@ export function parseVideoElements(html: string): VideoElement[] { return videos; } +export function parseImageElements(html: string): ImageElement[] { + const images: ImageElement[] = []; + const { document } = parseHTML(html); + + const imgEls = document.querySelectorAll("img[src]"); + let autoIdCounter = 0; + for (const el of imgEls) { + const src = el.getAttribute("src"); + if (!src) continue; + const id = el.getAttribute("id") || `hf-img-${autoIdCounter++}`; + const start = parseFloat(el.getAttribute("data-start") ?? "0"); + const duration = parseFloat(el.getAttribute("data-duration") ?? "0"); + if (!Number.isFinite(duration) || duration <= 0) continue; + images.push({ + id, + src, + start: Number.isFinite(start) ? start : 0, + end: (Number.isFinite(start) ? start : 0) + duration, + }); + } + return images; +} + export async function extractVideoFramesRange( videoPath: string, videoId: string, @@ -261,6 +291,11 @@ export async function extractAllVideoFrames( signal?: AbortSignal, config?: Partial>, compiledDir?: string, + /** Skip SDR→HDR conversion. Set true when the HDR compositing path handles + * color conversion in the blit step (sRGB→HLG/PQ LUT). Without this, the + * colorspace filter produces bt2020 pixels that Chrome misinterprets as sRGB, + * making SDR content invisible in HDR renders. */ + skipSdrConversion?: boolean, ): Promise { const startTime = Date.now(); const extracted: ExtractedFrames[] = []; @@ -304,7 +339,7 @@ export async function extractAllVideoFrames( ); const hasAnyHdr = videoColorSpaces.some(isHdrColorSpaceUtil); - if (hasAnyHdr) { + if (hasAnyHdr && !skipSdrConversion) { const convertDir = join(options.outputDir, "_hdr_normalized"); mkdirSync(convertDir, { recursive: true }); diff --git a/packages/engine/src/services/videoFrameInjector.ts b/packages/engine/src/services/videoFrameInjector.ts index 288b8903a..a2dbf485b 100644 --- a/packages/engine/src/services/videoFrameInjector.ts +++ b/packages/engine/src/services/videoFrameInjector.ts @@ -259,6 +259,16 @@ export async function queryElementStacking( ): Promise { const hdrIds = Array.from(nativeHdrVideoIds); return page.evaluate((hdrIdList: string[]): ElementStackingInfo[] => { + // NOTE: esbuild (used by tsx and bun) wraps every nested `function` + // declaration in this callback with a `__name(fn, "name")` helper to + // preserve `Function.prototype.name`. The helper has to be installed in + // the browser context before this callback runs — see + // `initializeSession` in frameCapture.ts, which calls + // `page.evaluateOnNewDocument` with a string (esbuild does not transform + // string contents) to define `globalThis.__name` as a no-op identity. + // Trying to define the shim *inside* this callback fails because esbuild + // also wraps the shim's own assignment with a `__name(...)` call, + // creating a use-before-define cycle. const hdrSet = new Set(hdrIdList); const elements = document.querySelectorAll("[data-start]"); const results: ElementStackingInfo[] = []; diff --git a/packages/engine/src/utils/alphaBlit.test.ts b/packages/engine/src/utils/alphaBlit.test.ts index c5a5961b3..4edbfb7b5 100644 --- a/packages/engine/src/utils/alphaBlit.test.ts +++ b/packages/engine/src/utils/alphaBlit.test.ts @@ -495,18 +495,30 @@ describe("blitRgba8OverRgb48le", () => { expect(canvas.readUInt16LE(4)).toBe(50000); }); - it("fully opaque DOM: sRGB→HLG converted values overwrite canvas", () => { + it("fully opaque DOM: sRGB→BT.2020+HLG converted values overwrite canvas", () => { const canvas = makeHdrFrame(1, 1, 10000, 20000, 30000); - const dom = makeDomRgba(1, 1, 255, 128, 0, 255); // R=255, G=128, B=0, full opaque + const dom = makeDomRgba(1, 1, 255, 128, 0, 255); // sRGB orange blitRgba8OverRgb48le(dom, canvas, 1, 1); - // sRGB 255 → HLG 65535 (white maps to white) - // sRGB 128 → HLG ~46484 (mid-gray maps higher due to HLG OETF) - // sRGB 0 → HLG 0 - expect(canvas.readUInt16LE(0)).toBe(65535); - expect(canvas.readUInt16LE(2)).toBeGreaterThan(40000); // HLG mid-gray > sRGB mid-gray - expect(canvas.readUInt16LE(2)).toBeLessThan(50000); - expect(canvas.readUInt16LE(4)).toBe(0); + // sRGB orange (255, 128, 0) → linear (1.0, 0.216, 0) + // → BT.2020 linear (0.699, 0.268, 0.035) + // → HLG signal ≈ (61220, 49285, 21360) + // Without the primary conversion you'd get (65535, 49285ish, 0) — the + // ~21000 in B is what proves the matrix is being applied. + const r = canvas.readUInt16LE(0); + const g = canvas.readUInt16LE(2); + const b = canvas.readUInt16LE(4); + + expect(r).toBeGreaterThan(58000); // ~61220, no longer pinned at 65535 + expect(r).toBeLessThan(64000); + expect(g).toBeGreaterThan(46000); // ~49285 + expect(g).toBeLessThan(53000); + expect(b).toBeGreaterThan(18000); // ~21360 — was 0 before primary conversion + expect(b).toBeLessThan(25000); + + // Channel ordering still reflects "orange" intent. + expect(r).toBeGreaterThan(g); + expect(g).toBeGreaterThan(b); }); it("sRGB→HLG: black stays black, white stays white", () => { @@ -808,11 +820,11 @@ describe("decodePng + blitRgba8OverRgb48le integration", () => { } }); - it("fully opaque PNG overlay overwrites all canvas pixels (sRGB→HLG)", () => { + it("fully opaque PNG overlay overwrites all canvas pixels (sRGB→BT.2020+HLG)", () => { const width = 2; const height = 2; - // Build a fully opaque blue PNG (sRGB blue = 0,0,255) + // Fully opaque pure-blue sRGB PNG const pixels = Array(width * height) .fill(null) .flatMap(() => [0, 0, 255, 255]); @@ -822,15 +834,122 @@ describe("decodePng + blitRgba8OverRgb48le integration", () => { const canvas = makeHdrFrame(width, height, 50000, 40000, 30000); blitRgba8OverRgb48le(domRgba, canvas, width, height); - // sRGB blue (0,0,255) → HLG (0, 0, 65535) — black/white map identically + // sRGB blue (0,0,255) → linear (0,0,1) → BT.2020 linear (~0.043, ~0.011, ~0.896) + // → HLG signal. After primary conversion the BT.2020 blue primary is + // *much wider* than sRGB blue, so the encoded blue is no longer pinned at + // 65535 — and R/G pick up small signals from the matrix mixing. for (let i = 0; i < width * height; i++) { - expect(canvas.readUInt16LE(i * 6 + 0)).toBe(0); - expect(canvas.readUInt16LE(i * 6 + 2)).toBe(0); - expect(canvas.readUInt16LE(i * 6 + 4)).toBe(65535); + const r = canvas.readUInt16LE(i * 6 + 0); + const g = canvas.readUInt16LE(i * 6 + 2); + const b = canvas.readUInt16LE(i * 6 + 4); + + // B should still be the dominant channel and bright (close to but + // distinctly below full HLG white). + expect(b).toBeGreaterThan(60000); + expect(b).toBeLessThan(65535); + + // R and G should pick up small HLG signals from primary mixing. + // (HLG's sqrt response makes even tiny linear values produce + // measurable signal — e.g. linear ~0.04 → signal ~23000.) + expect(r).toBeGreaterThan(15000); + expect(r).toBeLessThan(30000); + expect(g).toBeGreaterThan(8000); + expect(g).toBeLessThan(20000); + + // B should still dominate. + expect(b).toBeGreaterThan(r); + expect(b).toBeGreaterThan(g); } }); }); +// ── BT.709 → BT.2020 primary conversion ────────────────────────────────────── + +describe("blitRgba8OverRgb48le BT.709→BT.2020 primaries", () => { + it("grayscale (R=G=B) is invariant under primary conversion", () => { + // Each row of the BT.709→BT.2020 matrix sums to 1.0, so neutral inputs + // map to neutral outputs — text and UI in pure gray must stay pure gray. + const canvas = makeHdrFrame(1, 1, 0, 0, 0); + const dom = makeDomRgba(1, 1, 200, 200, 200, 255); + blitRgba8OverRgb48le(dom, canvas, 1, 1); + + const r = canvas.readUInt16LE(0); + const g = canvas.readUInt16LE(2); + const b = canvas.readUInt16LE(4); + + // Allow a 1-LSB tolerance for LUT rounding noise. + expect(Math.abs(r - g)).toBeLessThanOrEqual(1); + expect(Math.abs(g - b)).toBeLessThanOrEqual(1); + expect(Math.abs(r - b)).toBeLessThanOrEqual(1); + }); + + it("pure sRGB red (255, 0, 0) leaks measurable G and B (proves matrix is applied)", () => { + // sRGB red (255, 0, 0) → linear (1, 0, 0) + // → BT.2020 linear (0.627, 0.069, 0.016) + // → HLG signal ≈ (59910, 29840, 14530) + // Without the matrix this would be (65535, 0, 0). The non-zero G and B + // are the regression pin. + const canvas = makeHdrFrame(1, 1, 0, 0, 0); + const dom = makeDomRgba(1, 1, 255, 0, 0, 255); + blitRgba8OverRgb48le(dom, canvas, 1, 1); + + const r = canvas.readUInt16LE(0); + const g = canvas.readUInt16LE(2); + const b = canvas.readUInt16LE(4); + + // R is desaturated relative to BT.2020 red primary (no longer 65535). + expect(r).toBeGreaterThan(57000); // ~59910 + expect(r).toBeLessThan(62000); + + // G and B must be strictly positive — this is the regression pin. + // Without primary conversion these would both be exactly 0. + expect(g).toBeGreaterThan(25000); // ~29840 + expect(g).toBeLessThan(35000); + expect(b).toBeGreaterThan(11000); // ~14530 + expect(b).toBeLessThan(18000); + + // R remains the dominant channel. + expect(r).toBeGreaterThan(g); + expect(r).toBeGreaterThan(b); + }); + + it("pure sRGB green (0, 255, 0) leaks measurable R and B", () => { + // sRGB green (0, 255, 0) → linear (0, 1, 0) + // → BT.2020 linear (0.329, 0.920, 0.088) + // → HLG signal ≈ (51925, 64530, 33650) + const canvas = makeHdrFrame(1, 1, 0, 0, 0); + const dom = makeDomRgba(1, 1, 0, 255, 0, 255); + blitRgba8OverRgb48le(dom, canvas, 1, 1); + + const r = canvas.readUInt16LE(0); + const g = canvas.readUInt16LE(2); + const b = canvas.readUInt16LE(4); + + expect(r).toBeGreaterThan(48000); // ~51925, matrix mixes G into R + expect(r).toBeLessThan(56000); + expect(g).toBeGreaterThan(62000); // ~64530, green stays bright but no longer pinned + expect(b).toBeGreaterThan(30000); // ~33650, matrix mixes G into B + expect(b).toBeLessThan(38000); + expect(g).toBeGreaterThan(r); // green still dominant + expect(g).toBeGreaterThan(b); + }); + + it("PQ also performs the BT.709→BT.2020 conversion", () => { + // Same regression-pin as the HLG red test but on the PQ path, so we + // catch a regression that only removes the matrix from one transfer. + const canvas = makeHdrFrame(1, 1, 0, 0, 0); + const dom = makeDomRgba(1, 1, 255, 0, 0, 255); + blitRgba8OverRgb48le(dom, canvas, 1, 1, "pq"); + + const g = canvas.readUInt16LE(2); + const b = canvas.readUInt16LE(4); + + // Without primary conversion both would be 0 in PQ as well. + expect(g).toBeGreaterThan(0); + expect(b).toBeGreaterThan(0); + }); +}); + // ── roundedRectAlpha tests ────────────────────────────────────────────────── describe("roundedRectAlpha", () => { diff --git a/packages/engine/src/utils/alphaBlit.ts b/packages/engine/src/utils/alphaBlit.ts index 22261f9da..b6c78942a 100644 --- a/packages/engine/src/utils/alphaBlit.ts +++ b/packages/engine/src/utils/alphaBlit.ts @@ -224,33 +224,71 @@ export function decodePngToRgb48le(buf: Buffer): { width: number; height: number } // ── sRGB → HDR color conversion ─────────────────────────────────────────────── +// +// Pipeline per pixel: +// sRGB 8-bit → linear BT.709 (sRGB EOTF, 256-entry LUT) +// → linear BT.2020 (3×3 primary matrix) +// → HDR signal 16-bit (HLG/PQ OETF, 4096-entry LUT) +// +// ## Why both transfer AND primaries +// +// HLG and PQ HDR video is encoded in the BT.2020 color volume, which has +// substantially wider primaries than sRGB/BT.709. Skipping the primary +// conversion and treating sRGB R/G/B values as if they were already BT.2020 +// makes saturated colors *look more saturated than the source intended* — +// e.g. sRGB pure blue (0, 0, 255) lands on BT.2020 blue, which is far more +// vivid than what the designer specified. +// +// For grayscale content (R = G = B) the matrix is the identity (each row of +// the BT.709→BT.2020 matrix sums to 1.0), so neutral text/UI is unaffected. +// For chromatic content (icons, progress bars, accent colors) the conversion +// is essential for color-accurate compositing. +// +// ## Conventions +// +// "Linear" means **scene-referred light in [0, 1] relative to SDR reference +// white** (not absolute nits). The HLG branch applies the OETF directly — no +// OOTF (no scene→display 1.2 gamma). DOM overlays are composited ON TOP of +// HLG video pixels which already live in HLG signal space, so we need the +// overlay to live in the same space; applying the OOTF here would +// double-apply it. +// +// For PQ, SDR white is placed at 203 nits per ITU-R BT.2408 ("SDR white" +// reference level) and normalized against the 10,000-nit PQ peak. This lets +// SDR text/UI sit at the conventional SDR-white brightness within a PQ frame +// rather than at peak brightness. + +// BT.709 → BT.2020 primary conversion matrix (linear light). +// Source: ITU-R BT.2087-0, Annex 2. Each row sums to 1.0 so neutrals +// (R = G = B) are invariant — only chromatic content gets remapped. +const M709_TO_2020 = [ + [0.6274039, 0.3292832, 0.0433128], + [0.0690973, 0.9195403, 0.0113624], + [0.0163914, 0.088013, 0.8955953], +] as const; + +/** sRGB 8-bit signal → linear scene light in [0, 1]. Exact (256 entries). */ +const SRGB_TO_LINEAR: Float32Array = (() => { + const lut = new Float32Array(256); + for (let i = 0; i < 256; i++) { + const v = i / 255; + lut[i] = v <= 0.04045 ? v / 12.92 : Math.pow((v + 0.055) / 1.055, 2.4); + } + return lut; +})(); -/** - * Build a 256-entry LUT: sRGB 8-bit value → HDR 16-bit signal value. - * - * Pipeline per channel: sRGB EOTF (decode gamma) → linear → HDR OETF → 16-bit. - * - * ## Convention - * - * "Linear" here means **scene light in [0, 1] relative to SDR reference white** - * (not absolute nits). The HLG branch applies the OETF directly — no OOTF (no - * gamma 1.2 scene→display conversion). This is the right choice for DOM - * overlays that will be composited ON TOP of HLG video pixels (which are - * already in HLG signal space); we need the overlay to sit in the same space - * as what it’s blending onto. Applying the OOTF here would double-apply it - * when the HDR video already carries scene-light semantics. - * - * For PQ, SDR white is placed at 203 nits per ITU-R BT.2408 ("SDR white" - * reference level) and normalized against 10,000-nit peak. This lets SDR - * content (text, UI) sit at the conventional SDR-white brightness within a - * PQ frame rather than at peak brightness. - * - * Note: converts the transfer function but not the color primaries (bt709 → - * bt2020). For neutral/near-neutral content (text, UI) the gamut difference - * is negligible. - */ -function buildSrgbToHdrLut(transfer: "hlg" | "pq"): Uint16Array { - const lut = new Uint16Array(256); +// Linear-light → HDR signal LUTs. +// +// We use a 4096-entry table indexed by `Math.round(linear * 4095)`. This +// trades a tiny amount of precision in the highlights for a 16× smaller +// memory footprint vs. a 65536-entry LUT. The OETF is steepest near zero +// (where dense sampling matters most), and at the dark end 1 / 4095 ≈ 0.024% +// of full-scale — far below visible threshold for compositing 8-bit overlays. +const HDR_LUT_SIZE = 4096; +const HDR_LUT_MAX = HDR_LUT_SIZE - 1; + +function buildLinearToHdrLut(transfer: "hlg" | "pq"): Uint16Array { + const lut = new Uint16Array(HDR_LUT_SIZE); // HLG OETF constants (Rec. 2100) const hlgA = 0.17883277; @@ -266,34 +304,71 @@ function buildSrgbToHdrLut(transfer: "hlg" | "pq"): Uint16Array { const pqMaxNits = 10000.0; const sdrNits = 203.0; - for (let i = 0; i < 256; i++) { - // sRGB EOTF: signal → linear (range 0–1, relative to SDR white) - const v = i / 255; - const linear = v <= 0.04045 ? v / 12.92 : Math.pow((v + 0.055) / 1.055, 2.4); - + for (let i = 0; i < HDR_LUT_SIZE; i++) { + const linear = i / HDR_LUT_MAX; let signal: number; if (transfer === "hlg") { signal = linear <= 1 / 12 ? Math.sqrt(3 * linear) : hlgA * Math.log(12 * linear - hlgB) + hlgC; } else { - // PQ OETF: linear light (in SDR nits) → PQ signal + // PQ: scale linear (relative to SDR white) into absolute PQ light + // before applying the OETF, so SDR white lands at 203 nits. const Lp = Math.max(0, (linear * sdrNits) / pqMaxNits); const Lm1 = Math.pow(Lp, pqM1); signal = Math.pow((pqC1 + pqC2 * Lm1) / (1.0 + pqC3 * Lm1), pqM2); } - lut[i] = Math.min(65535, Math.round(signal * 65535)); } return lut; } -const SRGB_TO_HLG = buildSrgbToHdrLut("hlg"); -const SRGB_TO_PQ = buildSrgbToHdrLut("pq"); +const LINEAR_TO_HLG = buildLinearToHdrLut("hlg"); +const LINEAR_TO_PQ = buildLinearToHdrLut("pq"); -/** Select the correct sRGB→HDR LUT for the given transfer function. */ -export function getSrgbToHdrLut(transfer: "hlg" | "pq"): Uint16Array { - return transfer === "pq" ? SRGB_TO_PQ : SRGB_TO_HLG; +function selectLinearToHdrLut(transfer: "hlg" | "pq"): Uint16Array { + return transfer === "pq" ? LINEAR_TO_PQ : LINEAR_TO_HLG; +} + +/** + * Convert one sRGB 8-bit pixel to an HDR 16-bit pixel via the full pipeline: + * sRGB EOTF → BT.709→BT.2020 primary matrix → HDR OETF. + * + * Writes the result into `out` at offset 0/1/2. + */ +function srgbToHdr16( + r8: number, + g8: number, + b8: number, + hdrLut: Uint16Array, + out: { r: number; g: number; b: number }, +): void { + const lr = SRGB_TO_LINEAR[r8] ?? 0; + const lg = SRGB_TO_LINEAR[g8] ?? 0; + const lb = SRGB_TO_LINEAR[b8] ?? 0; + + // Matrix indices match the ITU-R BT.2087-0 layout above. + const m0 = M709_TO_2020[0]!; + const m1 = M709_TO_2020[1]!; + const m2 = M709_TO_2020[2]!; + + let r2 = m0[0]! * lr + m0[1]! * lg + m0[2]! * lb; + let g2 = m1[0]! * lr + m1[1]! * lg + m1[2]! * lb; + let b2 = m2[0]! * lr + m2[1]! * lg + m2[2]! * lb; + + // For in-gamut sRGB inputs (which is everything we get from an 8-bit + // canvas), each row sums to 1.0 with non-negative coefficients, so outputs + // are guaranteed in [0, 1]. The clamps guard against fp drift only. + if (r2 < 0) r2 = 0; + else if (r2 > 1) r2 = 1; + if (g2 < 0) g2 = 0; + else if (g2 > 1) g2 = 1; + if (b2 < 0) b2 = 0; + else if (b2 > 1) b2 = 1; + + out.r = hdrLut[Math.round(r2 * HDR_LUT_MAX)] ?? 0; + out.g = hdrLut[Math.round(g2 * HDR_LUT_MAX)] ?? 0; + out.b = hdrLut[Math.round(b2 * HDR_LUT_MAX)] ?? 0; } // ── Alpha compositing ───────────────────────────────────────────────────────── @@ -302,15 +377,22 @@ export function getSrgbToHdrLut(transfer: "hlg" | "pq"): Uint16Array { * Alpha-composite a DOM RGBA overlay (8-bit sRGB) onto an HDR canvas * (rgb48le) in-place. * - * DOM pixels are converted from sRGB to the target HDR signal space (HLG or PQ) - * before blending so the composited output is uniformly encoded. Without this - * conversion, sRGB content appears orange/washed in HDR playback. + * DOM pixels are converted from sRGB to the target HDR signal space (HLG or + * PQ) via a full sRGB EOTF → BT.709→BT.2020 primary matrix → HDR OETF + * pipeline, then alpha-blended against the existing HDR canvas in HDR signal + * space. Without the primary conversion, saturated sRGB colors (UI accents, + * icons) over-saturate when interpreted as BT.2020. + * + * Alpha blending is intentionally performed in HDR signal space (not linear + * light) to match the existing GPU compositing path. Since text/UI overlays + * are usually fully opaque, this only affects soft edges where the difference + * is imperceptible. * * @param domRgba Raw RGBA pixel data from decodePng() — width*height*4 bytes * @param canvas HDR canvas in rgb48le format — width*height*6 bytes, mutated in-place * @param width Canvas width in pixels * @param height Canvas height in pixels - * @param transfer HDR transfer function — selects the correct sRGB→HDR LUT + * @param transfer HDR transfer function — selects PQ or HLG OETF */ export function blitRgba8OverRgb48le( domRgba: Uint8Array, @@ -320,20 +402,25 @@ export function blitRgba8OverRgb48le( transfer: "hlg" | "pq" = "hlg", ): void { const pixelCount = width * height; - const lut = getSrgbToHdrLut(transfer); + const hdrLut = selectLinearToHdrLut(transfer); + const out = { r: 0, g: 0, b: 0 }; for (let i = 0; i < pixelCount; i++) { const da = domRgba[i * 4 + 3] ?? 0; + if (da === 0) continue; + + srgbToHdr16( + domRgba[i * 4 + 0] ?? 0, + domRgba[i * 4 + 1] ?? 0, + domRgba[i * 4 + 2] ?? 0, + hdrLut, + out, + ); - if (da === 0) { - continue; - } else if (da === 255) { - const r16 = lut[domRgba[i * 4 + 0] ?? 0] ?? 0; - const g16 = lut[domRgba[i * 4 + 1] ?? 0] ?? 0; - const b16 = lut[domRgba[i * 4 + 2] ?? 0] ?? 0; - canvas.writeUInt16LE(r16, i * 6); - canvas.writeUInt16LE(g16, i * 6 + 2); - canvas.writeUInt16LE(b16, i * 6 + 4); + if (da === 255) { + canvas.writeUInt16LE(out.r, i * 6); + canvas.writeUInt16LE(out.g, i * 6 + 2); + canvas.writeUInt16LE(out.b, i * 6 + 4); } else { const alpha = da / 255; const invAlpha = 1 - alpha; @@ -342,13 +429,9 @@ export function blitRgba8OverRgb48le( const hdrG = (canvas[i * 6 + 2] ?? 0) | ((canvas[i * 6 + 3] ?? 0) << 8); const hdrB = (canvas[i * 6 + 4] ?? 0) | ((canvas[i * 6 + 5] ?? 0) << 8); - const domR = lut[domRgba[i * 4 + 0] ?? 0] ?? 0; - const domG = lut[domRgba[i * 4 + 1] ?? 0] ?? 0; - const domB = lut[domRgba[i * 4 + 2] ?? 0] ?? 0; - - canvas.writeUInt16LE(Math.round(domR * alpha + hdrR * invAlpha), i * 6); - canvas.writeUInt16LE(Math.round(domG * alpha + hdrG * invAlpha), i * 6 + 2); - canvas.writeUInt16LE(Math.round(domB * alpha + hdrB * invAlpha), i * 6 + 4); + canvas.writeUInt16LE(Math.round(out.r * alpha + hdrR * invAlpha), i * 6); + canvas.writeUInt16LE(Math.round(out.g * alpha + hdrG * invAlpha), i * 6 + 2); + canvas.writeUInt16LE(Math.round(out.b * alpha + hdrB * invAlpha), i * 6 + 4); } } } diff --git a/packages/engine/src/utils/mp4HdrBoxes.test.ts b/packages/engine/src/utils/mp4HdrBoxes.test.ts new file mode 100644 index 000000000..adf709db8 --- /dev/null +++ b/packages/engine/src/utils/mp4HdrBoxes.test.ts @@ -0,0 +1,398 @@ +import { copyFileSync, existsSync, readFileSync, statSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; +import { afterAll, beforeAll, describe, expect, it } from "vitest"; + +import { DEFAULT_HDR10_MASTERING } from "./hdr.js"; +import { + buildClliBox, + buildMdcvBox, + findBox, + injectHdrBoxes, + injectHdrBoxesInBuffer, + parseMasteringDisplayString, + parseMaxCllString, + shiftChunkOffsetsAfter, +} from "./mp4HdrBoxes.js"; + +// --------------------------------------------------------------------------- +// Parsers +// --------------------------------------------------------------------------- + +describe("parseMasteringDisplayString", () => { + it("parses the canonical HDR10 P3-D65 string", () => { + const parsed = parseMasteringDisplayString( + "G(13250,34500)B(7500,3000)R(34000,16000)WP(15635,16450)L(10000000,1)", + ); + expect(parsed).toEqual({ + greenX: 13250, + greenY: 34500, + blueX: 7500, + blueY: 3000, + redX: 34000, + redY: 16000, + whitePointX: 15635, + whitePointY: 16450, + maxLuminance: 10000000, + minLuminance: 1, + }); + }); + + it("parses BT.2020 primaries (full color volume)", () => { + const parsed = parseMasteringDisplayString( + "G(8500,39850)B(6550,2300)R(35400,14600)WP(15635,16450)L(40000000,50)", + ); + expect(parsed.greenX).toBe(8500); + expect(parsed.maxLuminance).toBe(40000000); + expect(parsed.minLuminance).toBe(50); + }); + + it("throws on a malformed string", () => { + expect(() => parseMasteringDisplayString("not a real string")).toThrow( + /Invalid mastering-display string/, + ); + }); + + it("throws when a coordinate is missing", () => { + expect(() => + parseMasteringDisplayString("G(13250)B(7500,3000)R(34000,16000)WP(15635,16450)L(10000000,1)"), + ).toThrow(/Invalid mastering-display string/); + }); +}); + +describe("parseMaxCllString", () => { + it("parses MaxCLL,MaxFALL", () => { + expect(parseMaxCllString("1000,400")).toEqual({ maxCll: 1000, maxFall: 400 }); + }); + + it("parses zeros", () => { + expect(parseMaxCllString("0,0")).toEqual({ maxCll: 0, maxFall: 0 }); + }); + + it("throws on missing comma", () => { + expect(() => parseMaxCllString("1000")).toThrow(/Invalid max-cll string/); + }); + + it("throws on non-numeric values", () => { + expect(() => parseMaxCllString("foo,bar")).toThrow(/non-numeric values/); + }); +}); + +// --------------------------------------------------------------------------- +// Box builders — validate exact byte layout against ISO/IEC 23001-8 +// --------------------------------------------------------------------------- + +describe("buildMdcvBox", () => { + it("emits a 32-byte box with G/B/R primary order (NOT R/G/B)", () => { + const box = buildMdcvBox({ + greenX: 13250, + greenY: 34500, + blueX: 7500, + blueY: 3000, + redX: 34000, + redY: 16000, + whitePointX: 15635, + whitePointY: 16450, + maxLuminance: 10000000, + minLuminance: 1, + }); + + expect(box.length).toBe(32); + expect(box.readUInt32BE(0)).toBe(32); + expect(box.toString("ascii", 4, 8)).toBe("mdcv"); + + expect(box.readUInt16BE(8)).toBe(13250); + expect(box.readUInt16BE(10)).toBe(34500); + + expect(box.readUInt16BE(12)).toBe(7500); + expect(box.readUInt16BE(14)).toBe(3000); + + expect(box.readUInt16BE(16)).toBe(34000); + expect(box.readUInt16BE(18)).toBe(16000); + + expect(box.readUInt16BE(20)).toBe(15635); + expect(box.readUInt16BE(22)).toBe(16450); + + expect(box.readUInt32BE(24)).toBe(10000000); + expect(box.readUInt32BE(28)).toBe(1); + }); +}); + +describe("buildClliBox", () => { + it("emits a 12-byte box with maxCll then maxFall", () => { + const box = buildClliBox({ maxCll: 1000, maxFall: 400 }); + + expect(box.length).toBe(12); + expect(box.readUInt32BE(0)).toBe(12); + expect(box.toString("ascii", 4, 8)).toBe("clli"); + expect(box.readUInt16BE(8)).toBe(1000); + expect(box.readUInt16BE(10)).toBe(400); + }); + + it("clamps within the uint16 range", () => { + const box = buildClliBox({ maxCll: 65535, maxFall: 65535 }); + expect(box.readUInt16BE(8)).toBe(65535); + expect(box.readUInt16BE(10)).toBe(65535); + }); +}); + +// --------------------------------------------------------------------------- +// Synthetic-MP4 round-trip — exercises the box walker without needing FFmpeg +// --------------------------------------------------------------------------- + +/** + * Build a minimal MP4 buffer that has just enough structure for the injector + * to find an HEVC sample entry. This isn't a playable file — it's the + * smallest tree that exercises every parent-bumping path. + */ +function makeSyntheticHevcMp4(): { + buffer: Buffer; + insertPos: number; + chunkOffsets: number[]; +} { + // Build inside-out so we know each box's size at construction time. + const colr = box("colr", Buffer.from("nclx", "ascii")); + // Minimal hvcC — content doesn't matter for the walker, only the type. + const hvcC = box("hvcC", Buffer.alloc(8)); + // VisualSampleEntry header is 78 bytes after the box header + // (offsets 8..86 inside the box). Zeroes are fine for our purposes. + const sampleEntryBody = Buffer.concat([Buffer.alloc(78), hvcC, colr]); + const hvc1 = box("hvc1", sampleEntryBody); + + // stsd is a FullBox: 4 bytes version+flags + 4 bytes entry_count + entries. + const stsdBody = Buffer.concat([Buffer.from([0, 0, 0, 0]), u32(1), hvc1]); + const stsd = box("stsd", stsdBody); + + // stco with two chunks. Offsets will be patched once we know where mdat lands. + const stcoBody = Buffer.concat([Buffer.from([0, 0, 0, 0]), u32(2), u32(0), u32(0)]); + const stco = box("stco", stcoBody); + + const stbl = box("stbl", Buffer.concat([stsd, stco])); + const minf = box("minf", stbl); + const mdia = box("mdia", minf); + const trak = box("trak", mdia); + const moov = box("moov", trak); + + const ftyp = box("ftyp", Buffer.from("isomavc1\x00\x00\x00\x00", "binary")); + const mdatPayload = Buffer.from("FAKE_VIDEO_DATA"); + const mdat = box("mdat", mdatPayload); + + // ftyp + moov + mdat (faststart layout). + const buffer = Buffer.concat([ftyp, moov, mdat]); + + // Patch stco entries to point at mdat's payload region. We need the file + // offsets (post-concatenation), so locate stco in the final buffer. + const stcoLoc = findBox(buffer, 0, buffer.length, "moov"); + if (!stcoLoc) throw new Error("synthetic moov missing"); + const found = locateAllBoxes(buffer, stcoLoc.offset + 8, stcoLoc.offset + stcoLoc.size); + const stcoFinal = found.find((b) => b.type === "stco"); + if (!stcoFinal) throw new Error("synthetic stco missing"); + + const mdatStart = ftyp.length + moov.length; + const chunk1 = mdatStart + 8; + const chunk2 = mdatStart + 8 + 5; + buffer.writeUInt32BE(chunk1, stcoFinal.offset + 16); + buffer.writeUInt32BE(chunk2, stcoFinal.offset + 20); + + // The insertion site is right after `colr` inside hvc1. + const insertPos = locateColrEnd(buffer); + + return { buffer, insertPos, chunkOffsets: [chunk1, chunk2] }; +} + +function box(type: string, body: Buffer): Buffer { + const size = 8 + body.length; + const header = Buffer.alloc(8); + header.writeUInt32BE(size, 0); + header.write(type, 4, "ascii"); + return Buffer.concat([header, body]); +} + +function u32(n: number): Buffer { + const b = Buffer.alloc(4); + b.writeUInt32BE(n, 0); + return b; +} + +function locateAllBoxes( + buf: Buffer, + start: number, + end: number, +): Array<{ type: string; offset: number; size: number }> { + const out: Array<{ type: string; offset: number; size: number }> = []; + let p = start; + while (p < end - 8) { + const size = buf.readUInt32BE(p); + const type = buf.toString("ascii", p + 4, p + 8); + if (size <= 0 || size > end - p) { + p += 1; + continue; + } + out.push({ type, offset: p, size }); + if ( + type === "trak" || + type === "mdia" || + type === "minf" || + type === "stbl" || + type === "stsd" || + type === "hvc1" + ) { + const childStart = type === "stsd" ? p + 16 : type === "hvc1" ? p + 86 : p + 8; + out.push(...locateAllBoxes(buf, childStart, p + size)); + } + p += size; + } + return out; +} + +function locateColrEnd(buf: Buffer): number { + const colr = findColrInTree(buf); + if (!colr) throw new Error("synthetic colr missing"); + return colr.offset + colr.size; +} + +function findColrInTree(buf: Buffer): { offset: number; size: number } | null { + // Tiny scan — the synthetic file only contains one `colr`. + for (let p = 0; p < buf.length - 4; p++) { + if (buf.toString("ascii", p, p + 4) === "colr") { + const size = buf.readUInt32BE(p - 4); + return { offset: p - 4, size }; + } + } + return null; +} + +describe("injectHdrBoxesInBuffer (synthetic MP4)", () => { + it("inserts mdcv + clli (44 bytes) and bumps every parent box", () => { + const { buffer, insertPos, chunkOffsets } = makeSyntheticHevcMp4(); + + const result = injectHdrBoxesInBuffer(buffer, DEFAULT_HDR10_MASTERING); + + expect(result.injected).toBe(true); + expect(result.addedBytes).toBe(44); + expect(result.buffer.length).toBe(buffer.length + 44); + + // Boxes appear at the expected position with the expected types. + expect(result.buffer.toString("ascii", insertPos + 4, insertPos + 8)).toBe("mdcv"); + expect(result.buffer.toString("ascii", insertPos + 32 + 4, insertPos + 32 + 8)).toBe("clli"); + + // moov size grew by 44. + const oldMoov = findBox(buffer, 0, buffer.length, "moov")!; + const newMoov = findBox(result.buffer, 0, result.buffer.length, "moov")!; + expect(newMoov.size).toBe(oldMoov.size + 44); + + // stco chunk offsets shifted forward by 44 (faststart layout: moov before mdat). + const stco = findBox(result.buffer, newMoov.offset + 8, newMoov.offset + newMoov.size, "trak")!; + const trakChildren = locateAllBoxes(result.buffer, stco.offset + 8, stco.offset + stco.size); + const newStco = trakChildren.find((b) => b.type === "stco")!; + expect(result.buffer.readUInt32BE(newStco.offset + 16)).toBe(chunkOffsets[0] + 44); + expect(result.buffer.readUInt32BE(newStco.offset + 20)).toBe(chunkOffsets[1] + 44); + }); + + it("is idempotent — second call is a no-op", () => { + const { buffer } = makeSyntheticHevcMp4(); + const first = injectHdrBoxesInBuffer(buffer, DEFAULT_HDR10_MASTERING); + expect(first.injected).toBe(true); + + const second = injectHdrBoxesInBuffer(first.buffer, DEFAULT_HDR10_MASTERING); + expect(second.injected).toBe(false); + expect(second.reason).toMatch(/already present/); + expect(second.buffer).toBe(first.buffer); + }); + + it("returns injected=false for a buffer without a moov box", () => { + const buffer = box("ftyp", Buffer.from("isom\x00\x00\x00\x00", "binary")); + const result = injectHdrBoxesInBuffer(buffer, DEFAULT_HDR10_MASTERING); + expect(result.injected).toBe(false); + expect(result.reason).toMatch(/no moov box/); + }); + + it("returns injected=false for a moov without an HEVC sample entry", () => { + // A trak with avc1 (H.264) instead of hvc1 — the injector should not + // accidentally tag SDR or H.264 files as HDR10. + const avc1 = box("avc1", Buffer.alloc(78)); + const stsd = box("stsd", Buffer.concat([Buffer.from([0, 0, 0, 0]), u32(1), avc1])); + const stbl = box("stbl", stsd); + const minf = box("minf", stbl); + const mdia = box("mdia", minf); + const trak = box("trak", mdia); + const moov = box("moov", trak); + const ftyp = box("ftyp", Buffer.from("isomavc1\x00\x00\x00\x00", "binary")); + const buffer = Buffer.concat([ftyp, moov]); + + const result = injectHdrBoxesInBuffer(buffer, DEFAULT_HDR10_MASTERING); + expect(result.injected).toBe(false); + expect(result.reason).toMatch(/HEVC/); + }); +}); + +// --------------------------------------------------------------------------- +// shiftChunkOffsetsAfter — verifies the "only past-insertion-site" rule +// --------------------------------------------------------------------------- + +describe("shiftChunkOffsetsAfter", () => { + it("only shifts offsets >= insertPos (handles mdat-before-moov layouts)", () => { + const { buffer } = makeSyntheticHevcMp4(); + const moov = findBox(buffer, 0, buffer.length, "moov")!; + + const trak = findBox(buffer, moov.offset + 8, moov.offset + moov.size, "trak")!; + const trakChildren = locateAllBoxes(buffer, trak.offset + 8, trak.offset + trak.size); + const stco = trakChildren.find((b) => b.type === "stco")!; + + const before1 = buffer.readUInt32BE(stco.offset + 16); + const before2 = buffer.readUInt32BE(stco.offset + 20); + + // Pretend the insertion site is AFTER all chunk offsets — none should shift. + shiftChunkOffsetsAfter(buffer, moov.offset, buffer.length + 1, 100); + expect(buffer.readUInt32BE(stco.offset + 16)).toBe(before1); + expect(buffer.readUInt32BE(stco.offset + 20)).toBe(before2); + + // Now pretend the insertion site is BEFORE all chunk offsets — both shift. + shiftChunkOffsetsAfter(buffer, moov.offset, 0, 100); + expect(buffer.readUInt32BE(stco.offset + 16)).toBe(before1 + 100); + expect(buffer.readUInt32BE(stco.offset + 20)).toBe(before2 + 100); + }); +}); + +// --------------------------------------------------------------------------- +// Real-file integration — only runs when an HDR10 fixture is present locally. +// Skipped on CI to keep the fixture optional; the synthetic tests above cover +// the byte-level invariants. +// --------------------------------------------------------------------------- + +const FIXTURE = "/tmp/hyperframes-hdr-test/hdr-pq.mp4"; + +describe.skipIf(!existsSync(FIXTURE))("injectHdrBoxes (real HDR10 fixture)", () => { + let workPath: string; + + beforeAll(() => { + workPath = join(tmpdir(), `hdr-inject-test-${Date.now()}.mp4`); + copyFileSync(FIXTURE, workPath); + }); + + afterAll(() => { + // Best-effort cleanup; tmpdir gets pruned anyway. + try { + const { rmSync } = require("fs") as typeof import("fs"); + rmSync(workPath, { force: true }); + } catch { + /* ignore */ + } + }); + + it("injects 44 bytes and produces a still-valid MP4", () => { + const beforeSize = statSync(workPath).size; + const result = injectHdrBoxes(workPath, DEFAULT_HDR10_MASTERING); + + expect(result.injected).toBe(true); + expect(result.addedBytes).toBe(44); + expect(statSync(workPath).size).toBe(beforeSize + 44); + + const data = readFileSync(workPath); + // mdcv and clli should now appear inside the moov tree. + const moov = findBox(data, 0, data.length, "moov")!; + const moovBytes = data.subarray(moov.offset, moov.offset + moov.size); + expect(moovBytes.includes(Buffer.from("mdcv"))).toBe(true); + expect(moovBytes.includes(Buffer.from("clli"))).toBe(true); + }); +}); diff --git a/packages/engine/src/utils/mp4HdrBoxes.ts b/packages/engine/src/utils/mp4HdrBoxes.ts new file mode 100644 index 000000000..db56b0513 --- /dev/null +++ b/packages/engine/src/utils/mp4HdrBoxes.ts @@ -0,0 +1,520 @@ +/** + * MP4 HDR Container Metadata Injection + * + * x265 emits HDR10 mastering display + content light level as in-band SEI + * messages, but FFmpeg's `mov` muxer does not extract those into the + * container-level `mdcv` (Mastering Display Color Volume) and `clli` + * (Content Light Level Info) boxes that ingest pipelines like YouTube, + * Apple AirPlay, and most HDR TVs read. Without those boxes, players see + * stream-level color tagging (`colr` only) and treat the file as SDR + * BT.2020 — see https://support.google.com/youtube/answer/7126552. + * + * This module surgically inserts `mdcv` + `clli` boxes inside the HEVC + * sample entry (`hvc1`/`hev1`), bumps every parent box's size, and + * rewrites every `stco`/`co64` chunk offset that points past the + * insertion site so the file stays decodable. + * + * Reference: ISO/IEC 14496-15 (carriage of NAL-structured video) and + * ISO/IEC 23001-8 (coding-independent code points). + */ + +import { readFileSync, writeFileSync } from "fs"; + +import type { HdrMasteringMetadata } from "./hdr.js"; + +// --------------------------------------------------------------------------- +// Mastering metadata parsers +// --------------------------------------------------------------------------- + +export interface ParsedMasteringDisplay { + /** Green chromaticity (x, y) in units of 0.00002 cd/m². */ + greenX: number; + greenY: number; + /** Blue chromaticity (x, y) in units of 0.00002 cd/m². */ + blueX: number; + blueY: number; + /** Red chromaticity (x, y) in units of 0.00002 cd/m². */ + redX: number; + redY: number; + /** White point (x, y) in units of 0.00002 cd/m². */ + whitePointX: number; + whitePointY: number; + /** Max display luminance in units of 0.0001 cd/m². */ + maxLuminance: number; + /** Min display luminance in units of 0.0001 cd/m². */ + minLuminance: number; +} + +export interface ParsedMaxCll { + /** Maximum content light level (cd/m²). */ + maxCll: number; + /** Maximum frame-average light level (cd/m²). */ + maxFall: number; +} + +const MASTERING_DISPLAY_RE = + /^G\((\d+),(\d+)\)B\((\d+),(\d+)\)R\((\d+),(\d+)\)WP\((\d+),(\d+)\)L\((\d+),(\d+)\)$/; + +/** + * Parse the x265 mastering-display string format + * (`G(Gx,Gy)B(Bx,By)R(Rx,Ry)WP(WPx,WPy)L(Lmax,Lmin)`). + * + * Throws if the string doesn't match the expected shape — corrupt mastering + * metadata is a real bug, not something to silently fall back from. + */ +export function parseMasteringDisplayString(s: string): ParsedMasteringDisplay { + const match = MASTERING_DISPLAY_RE.exec(s); + if (!match) { + throw new Error( + `Invalid mastering-display string: ${s} (expected G(x,y)B(x,y)R(x,y)WP(x,y)L(max,min))`, + ); + } + const [, gx, gy, bx, by, rx, ry, wpx, wpy, lmax, lmin] = match; + return { + greenX: Number(gx), + greenY: Number(gy), + blueX: Number(bx), + blueY: Number(by), + redX: Number(rx), + redY: Number(ry), + whitePointX: Number(wpx), + whitePointY: Number(wpy), + maxLuminance: Number(lmax), + minLuminance: Number(lmin), + }; +} + +/** + * Parse the x265 max-cll string format (`MaxCLL,MaxFALL`). + * Both values are in cd/m² (nits). + */ +export function parseMaxCllString(s: string): ParsedMaxCll { + const parts = s.split(","); + if (parts.length !== 2) { + throw new Error(`Invalid max-cll string: ${s} (expected MaxCLL,MaxFALL)`); + } + const maxCll = Number(parts[0]); + const maxFall = Number(parts[1]); + if (!Number.isFinite(maxCll) || !Number.isFinite(maxFall)) { + throw new Error(`Invalid max-cll string: ${s} (non-numeric values)`); + } + return { maxCll, maxFall }; +} + +// --------------------------------------------------------------------------- +// Box builders +// --------------------------------------------------------------------------- + +/** + * Build an `mdcv` (Mastering Display Color Volume) box. + * + * Per ISO/IEC 23001-8 §7.5, the payload is 24 bytes: + * - display_primaries[3]: (x,y) pairs for G, B, R (uint16 each, 0.00002 cd/m² units) + * - white_point: (x,y) (uint16 each) + * - max_display_mastering_luminance (uint32, 0.0001 cd/m² units) + * - min_display_mastering_luminance (uint32) + * + * Total box size = 8 (header) + 24 (payload) = 32 bytes. + * + * NOTE: ISO 23001-8 specifies G,B,R order, NOT R,G,B. Getting this wrong + * produces visible primary swapping in HDR-aware players. + */ +export function buildMdcvBox(parsed: ParsedMasteringDisplay): Buffer { + const box = Buffer.alloc(32); + box.writeUInt32BE(32, 0); + box.write("mdcv", 4, "ascii"); + box.writeUInt16BE(parsed.greenX, 8); + box.writeUInt16BE(parsed.greenY, 10); + box.writeUInt16BE(parsed.blueX, 12); + box.writeUInt16BE(parsed.blueY, 14); + box.writeUInt16BE(parsed.redX, 16); + box.writeUInt16BE(parsed.redY, 18); + box.writeUInt16BE(parsed.whitePointX, 20); + box.writeUInt16BE(parsed.whitePointY, 22); + box.writeUInt32BE(parsed.maxLuminance, 24); + box.writeUInt32BE(parsed.minLuminance, 28); + return box; +} + +/** + * Build a `clli` (Content Light Level Information) box. + * + * Per ISO/IEC 23001-8 §7.6, the payload is 4 bytes: + * - max_content_light_level (uint16, cd/m²) + * - max_pic_average_light_level (uint16, cd/m²) + * + * Total box size = 8 (header) + 4 (payload) = 12 bytes. + */ +export function buildClliBox(parsed: ParsedMaxCll): Buffer { + const box = Buffer.alloc(12); + box.writeUInt32BE(12, 0); + box.write("clli", 4, "ascii"); + box.writeUInt16BE(parsed.maxCll, 8); + box.writeUInt16BE(parsed.maxFall, 10); + return box; +} + +// --------------------------------------------------------------------------- +// Box walking helpers +// --------------------------------------------------------------------------- + +interface BoxLocation { + /** File offset where the box header begins. */ + offset: number; + /** Total box size including the 8-byte (or 16-byte for size==1) header. */ + size: number; + /** Number of bytes in the header (8 normally, 16 for 64-bit `largesize`). */ + headerSize: number; +} + +function readBoxAt( + buf: Buffer, + offset: number, +): { type: string; size: number; headerSize: number } { + if (offset + 8 > buf.length) { + throw new Error(`Truncated MP4: box header at ${offset} exceeds file length`); + } + let size = buf.readUInt32BE(offset); + const type = buf.toString("ascii", offset + 4, offset + 8); + let headerSize = 8; + if (size === 1) { + if (offset + 16 > buf.length) { + throw new Error(`Truncated MP4: largesize header at ${offset} exceeds file length`); + } + // 64-bit largesize. Node Buffer can't safely read 64-bit unsigned past 2^53, + // but MP4 files capping at 2^53 bytes is a fine practical limit. + const high = buf.readUInt32BE(offset + 8); + const low = buf.readUInt32BE(offset + 12); + size = high * 0x1_0000_0000 + low; + headerSize = 16; + } else if (size === 0) { + // size==0 means "to end of file" — only valid for the last top-level box. + size = buf.length - offset; + } + return { type, size, headerSize }; +} + +/** + * Find the first child box of a given type within `[start, end)`. + * Returns `null` if not found. + */ +export function findBox(buf: Buffer, start: number, end: number, type: string): BoxLocation | null { + let pos = start; + while (pos < end) { + const { type: bt, size, headerSize } = readBoxAt(buf, pos); + if (bt === type) { + return { offset: pos, size, headerSize }; + } + if (size <= 0) { + // Defensive: a malformed box with size==0 or huge would otherwise + // loop forever or bail past the end. + throw new Error(`Invalid box size at offset ${pos}: ${size}`); + } + pos += size; + } + return null; +} + +/** + * Find the first video `trak` box (one containing an `hvc1` or `hev1` sample + * entry). MP4 files commonly contain audio + video traks; we only inject HDR + * metadata into the HEVC video track. + */ +function findVideoHevcTrak(buf: Buffer, moovStart: number, moovEnd: number): BoxLocation | null { + let pos = moovStart + 8; + while (pos < moovEnd) { + const { type, size, headerSize } = readBoxAt(buf, pos); + if (type === "trak") { + const trakEnd = pos + size; + // Cheap substring scan for 'hvc1' or 'hev1' inside the trak. Scoped to + // the trak's bytes so we don't false-positive on neighboring traks. + const slice = buf.subarray(pos, trakEnd); + if (slice.includes(Buffer.from("hvc1")) || slice.includes(Buffer.from("hev1"))) { + return { offset: pos, size, headerSize }; + } + } + pos += size; + } + return null; +} + +// --------------------------------------------------------------------------- +// Injection +// --------------------------------------------------------------------------- + +export interface InjectHdrBoxesResult { + /** Whether boxes were inserted (false = already present or non-HEVC). */ + injected: boolean; + /** Bytes appended to the file (0 when injected=false). */ + addedBytes: number; + /** Reason for skipping when injected=false. */ + reason?: string; +} + +/** + * Inject `mdcv` + `clli` boxes into an HEVC HDR MP4. + * + * Idempotent: if both boxes are already present in the HEVC sample entry, + * the file is left untouched and `injected: false` is returned. + * + * Safe with `moov`-before-`mdat` and `moov`-after-`mdat` layouts: + * only chunk offsets that point past the insertion site are bumped, so a + * faststart-style file (moov first → mdat shifts) and a default file + * (mdat first → mdat doesn't shift) are both handled correctly. + * + * Returns `injected: false` (without throwing) for files without an HEVC + * track — non-HDR encodes can call this safely as a no-op. + */ +export function injectHdrBoxes( + mp4Path: string, + mastering: HdrMasteringMetadata, +): InjectHdrBoxesResult { + const data = readFileSync(mp4Path); + const result = injectHdrBoxesInBuffer(data, mastering); + if (result.injected) { + writeFileSync(mp4Path, result.buffer); + } + return { + injected: result.injected, + addedBytes: result.addedBytes, + reason: result.reason, + }; +} + +interface BufferInjectionResult { + injected: boolean; + addedBytes: number; + reason?: string; + buffer: Buffer; +} + +/** + * In-memory variant of `injectHdrBoxes`. Returned `buffer` is the original + * input when `injected: false`, and a freshly allocated buffer otherwise. + * + * Exported separately so tests can exercise the parser/walker logic without + * touching the filesystem. + */ +export function injectHdrBoxesInBuffer( + data: Buffer, + mastering: HdrMasteringMetadata, +): BufferInjectionResult { + const masteringParsed = parseMasteringDisplayString(mastering.masterDisplay); + const maxCllParsed = parseMaxCllString(mastering.maxCll); + + const moov = findBox(data, 0, data.length, "moov"); + if (!moov) { + return { injected: false, addedBytes: 0, reason: "no moov box", buffer: data }; + } + const moovEnd = moov.offset + moov.size; + + const trak = findVideoHevcTrak(data, moov.offset, moovEnd); + if (!trak) { + return { + injected: false, + addedBytes: 0, + reason: "no HEVC video trak (hvc1/hev1)", + buffer: data, + }; + } + const trakEnd = trak.offset + trak.size; + + const mdia = findBox(data, trak.offset + 8, trakEnd, "mdia"); + if (!mdia) { + return { + injected: false, + addedBytes: 0, + reason: "no mdia box inside trak", + buffer: data, + }; + } + const minf = findBox(data, mdia.offset + 8, mdia.offset + mdia.size, "minf"); + if (!minf) { + return { injected: false, addedBytes: 0, reason: "no minf box", buffer: data }; + } + const stbl = findBox(data, minf.offset + 8, minf.offset + minf.size, "stbl"); + if (!stbl) { + return { injected: false, addedBytes: 0, reason: "no stbl box", buffer: data }; + } + const stsd = findBox(data, stbl.offset + 8, stbl.offset + stbl.size, "stsd"); + if (!stsd) { + return { injected: false, addedBytes: 0, reason: "no stsd box", buffer: data }; + } + + // stsd is a FullBox: 8 (header) + 4 (version+flags) + 4 (entry_count) + // = 16 bytes before the first sample entry. + const stsdEntriesStart = stsd.offset + 16; + const stsdEnd = stsd.offset + stsd.size; + let sampleEntry = findBox(data, stsdEntriesStart, stsdEnd, "hvc1"); + if (!sampleEntry) { + sampleEntry = findBox(data, stsdEntriesStart, stsdEnd, "hev1"); + } + if (!sampleEntry) { + return { + injected: false, + addedBytes: 0, + reason: "no hvc1/hev1 sample entry", + buffer: data, + }; + } + + // VisualSampleEntry: 8 (box header) + 78 bytes (reserved + dataRef + + // pre_defined + reserved + width + height + horiz/vert resolution + + // reserved + frame_count + compressorname + depth + pre_defined). + // Children boxes start at offset 86 inside the sample entry. + const sampleEntryChildrenStart = sampleEntry.offset + 86; + const sampleEntryEnd = sampleEntry.offset + sampleEntry.size; + + // Idempotence — don't double-inject. If both boxes are already present, + // just no-op so calling this on an already-tagged file is safe. + const existingMdcv = findBox(data, sampleEntryChildrenStart, sampleEntryEnd, "mdcv"); + const existingClli = findBox(data, sampleEntryChildrenStart, sampleEntryEnd, "clli"); + if (existingMdcv && existingClli) { + return { + injected: false, + addedBytes: 0, + reason: "mdcv + clli already present", + buffer: data, + }; + } + + // Insert AFTER colr if present (sits with the other color-properties boxes). + // Otherwise after hvcC (the codec config box, always present). + const colr = findBox(data, sampleEntryChildrenStart, sampleEntryEnd, "colr"); + const hvcC = colr ? null : findBox(data, sampleEntryChildrenStart, sampleEntryEnd, "hvcC"); + const anchor = colr ?? hvcC; + if (!anchor) { + return { + injected: false, + addedBytes: 0, + reason: "no colr or hvcC anchor inside sample entry", + buffer: data, + }; + } + const insertPos = anchor.offset + anchor.size; + + const mdcvBox = existingMdcv ? Buffer.alloc(0) : buildMdcvBox(masteringParsed); + const clliBox = existingClli ? Buffer.alloc(0) : buildClliBox(maxCllParsed); + const newBoxes = Buffer.concat([mdcvBox, clliBox]); + const delta = newBoxes.length; + if (delta === 0) { + return { + injected: false, + addedBytes: 0, + reason: "boxes already present (partial)", + buffer: data, + }; + } + + const out = Buffer.alloc(data.length + delta); + data.copy(out, 0, 0, insertPos); + newBoxes.copy(out, insertPos); + data.copy(out, insertPos + delta, insertPos); + + // Bump every parent box's size field. Each ancestor wraps the inserted + // bytes, so each grows by exactly `delta`. Order doesn't matter — they're + // all independent uint32 fields. + bumpBoxSize(out, sampleEntry.offset, delta); + bumpBoxSize(out, stsd.offset, delta); + bumpBoxSize(out, stbl.offset, delta); + bumpBoxSize(out, minf.offset, delta); + bumpBoxSize(out, mdia.offset, delta); + bumpBoxSize(out, trak.offset, delta); + bumpBoxSize(out, moov.offset, delta); + + // Bump chunk offsets for every track. Crucial subtlety: only offsets that + // point PAST the insertion site need to shift. With `moov`-before-`mdat` + // (faststart layout), all chunk offsets are >= insertPos and all shift. + // With `mdat`-before-`moov` (default ffmpeg layout), all chunk offsets + // are < insertPos and none shift. Mixed layouts (e.g. some chunks in a + // pre-moov mdat and others in a post-moov mdat) get the right answer + // per-chunk. This is what makes the function safe across muxers. + shiftChunkOffsetsAfter(out, moov.offset, insertPos, delta); + + return { injected: true, addedBytes: delta, buffer: out }; +} + +function bumpBoxSize(buf: Buffer, boxOffset: number, delta: number): void { + const cur = buf.readUInt32BE(boxOffset); + if (cur === 1) { + // 64-bit largesize: increment the low 32 bits, carrying into the high + // word if needed. Practical files won't overflow but we'd rather not + // silently corrupt them if they do. + const high = buf.readUInt32BE(boxOffset + 8); + const low = buf.readUInt32BE(boxOffset + 12); + const total = high * 0x1_0000_0000 + low + delta; + buf.writeUInt32BE(Math.floor(total / 0x1_0000_0000), boxOffset + 8); + buf.writeUInt32BE(total >>> 0, boxOffset + 12); + return; + } + buf.writeUInt32BE(cur + delta, boxOffset); +} + +/** + * Walk every `trak` inside `moov` and shift `stco`/`co64` offsets past + * `insertPos` by `delta`. Exported for tests. + */ +export function shiftChunkOffsetsAfter( + buf: Buffer, + moovStart: number, + insertPos: number, + delta: number, +): void { + const moovSize = readBoxAt(buf, moovStart).size; + const moovEnd = moovStart + moovSize; + + let pos = moovStart + 8; + while (pos < moovEnd) { + const { type, size } = readBoxAt(buf, pos); + if (type === "trak") { + shiftChunkOffsetsInTrak(buf, pos + 8, pos + size, insertPos, delta); + } + pos += size; + } +} + +function shiftChunkOffsetsInTrak( + buf: Buffer, + start: number, + end: number, + insertPos: number, + delta: number, +): void { + // DFS for stco/co64 within mdia → minf → stbl. Recursion via an explicit + // stack avoids exhausting the Node call stack on pathological boxes. + const stack: Array<[number, number]> = [[start, end]]; + while (stack.length > 0) { + const [s, e] = stack.pop()!; + let p = s; + while (p < e) { + const { type, size, headerSize } = readBoxAt(buf, p); + if (type === "stco") { + // FullBox: 4 bytes version+flags, 4 bytes entry_count, then count×u32. + const entryCount = buf.readUInt32BE(p + headerSize + 4); + let entryPos = p + headerSize + 8; + for (let i = 0; i < entryCount; i++) { + const cur = buf.readUInt32BE(entryPos); + if (cur >= insertPos) buf.writeUInt32BE(cur + delta, entryPos); + entryPos += 4; + } + } else if (type === "co64") { + const entryCount = buf.readUInt32BE(p + headerSize + 4); + let entryPos = p + headerSize + 8; + for (let i = 0; i < entryCount; i++) { + const high = buf.readUInt32BE(entryPos); + const low = buf.readUInt32BE(entryPos + 4); + const cur = high * 0x1_0000_0000 + low; + if (cur >= insertPos) { + const next = cur + delta; + buf.writeUInt32BE(Math.floor(next / 0x1_0000_0000), entryPos); + buf.writeUInt32BE(next >>> 0, entryPos + 4); + } + entryPos += 8; + } + } else if (type === "mdia" || type === "minf" || type === "stbl" || type === "edts") { + stack.push([p + headerSize, p + size]); + } + p += size; + } + } +} diff --git a/packages/producer/scripts/hdr-smoke.ts b/packages/producer/scripts/hdr-smoke.ts new file mode 100644 index 000000000..aa2004192 --- /dev/null +++ b/packages/producer/scripts/hdr-smoke.ts @@ -0,0 +1,329 @@ +#!/usr/bin/env tsx +/** + * HDR smoke test — renders the three hdr-regression fixtures end-to-end and + * verifies the encoded MP4 has the expected color metadata via ffprobe. + * + * Why this exists: + * - The visual regression harness compares against committed goldens, which + * are platform-sensitive (Linux/Docker vs macOS) and don't exist for the + * hdr-regression fixtures yet. + * - The harness also doesn't pass `hdr: true` to createRenderJob, so the + * HDR encode path is never explicitly exercised in CI today. + * - This script bypasses both problems: it drives the orchestrator directly + * with the right `hdr` flag and asserts on color metadata, not pixels. + * That gives us a portable signal on the encode + side-data path. + * + * Usage: + * bunx tsx packages/producer/scripts/hdr-smoke.ts # render all fixtures + * bunx tsx packages/producer/scripts/hdr-smoke.ts hdr-pq # render one fixture + * KEEP_TEMP=1 bunx tsx packages/producer/scripts/hdr-smoke.ts + * + * Exits 0 when every assertion passes, non-zero on the first failure. + */ +import { cpSync, existsSync, mkdirSync, mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; +import { spawnSync } from "node:child_process"; +import { createRenderJob, executeRenderJob } from "../src/services/renderOrchestrator.js"; + +interface ExpectedColor { + pixFmt: string; + colorTransfer: string; + colorPrimaries: string; + /** When true, the file MUST carry HDR side data (MaxCLL / MasteringDisplay). */ + requireHdrSideData?: boolean; +} + +interface Fixture { + id: string; + hdr: boolean; + expected: ExpectedColor; +} + +const FIXTURES: Fixture[] = [ + { + id: "sdr-baseline", + hdr: false, + expected: { + pixFmt: "yuv420p", + colorTransfer: "bt709", + colorPrimaries: "bt709", + }, + }, + { + id: "hdr-pq", + hdr: true, + expected: { + pixFmt: "yuv420p10le", + colorTransfer: "smpte2084", + colorPrimaries: "bt2020", + requireHdrSideData: true, + }, + }, + { + id: "mixed-sdr-hdr", + hdr: true, + expected: { + pixFmt: "yuv420p10le", + colorTransfer: "smpte2084", + colorPrimaries: "bt2020", + requireHdrSideData: true, + }, + }, + { + id: "hdr-feature-stack", + hdr: true, + expected: { + pixFmt: "yuv420p10le", + colorTransfer: "smpte2084", + colorPrimaries: "bt2020", + requireHdrSideData: true, + }, + }, + { + id: "opacity-mixed-fade", + hdr: true, + expected: { + pixFmt: "yuv420p10le", + colorTransfer: "smpte2084", + colorPrimaries: "bt2020", + requireHdrSideData: true, + }, + }, +]; + +interface ProbeResult { + pixFmt: string; + colorTransfer: string; + colorPrimaries: string; + colorSpace: string; + sideDataTypes: string[]; +} + +function probe(filePath: string): ProbeResult { + const result = spawnSync( + "ffprobe", + ["-v", "quiet", "-print_format", "json", "-show_streams", "-show_format", filePath], + { encoding: "utf-8" }, + ); + if (result.status !== 0) { + throw new Error(`ffprobe failed for ${filePath}: ${result.stderr}`); + } + const json = JSON.parse(result.stdout) as { + streams: Array<{ + codec_type: string; + pix_fmt?: string; + color_transfer?: string; + color_primaries?: string; + color_space?: string; + side_data_list?: Array<{ side_data_type?: string }>; + }>; + }; + const video = json.streams.find((s) => s.codec_type === "video"); + if (!video) throw new Error(`No video stream in ${filePath}`); + // Stream-level side data: surfaces mp4 mdcv/clli boxes when ffmpeg's mp4 + // muxer transcodes the x265 SEI into container metadata. + const streamSide = (video.side_data_list ?? []) + .map((d) => d.side_data_type ?? "") + .filter(Boolean); + // Frame-level side data: surfaces the raw HEVC SEI prefix NAL units that + // x265 emits when --master-display / --max-cll are passed. We probe just + // the first frame to keep this fast — the SEI is on every IDR. + const frameSide = probeFirstFrameSideData(filePath); + const merged = Array.from(new Set([...streamSide, ...frameSide])); + return { + pixFmt: video.pix_fmt ?? "", + colorTransfer: video.color_transfer ?? "", + colorPrimaries: video.color_primaries ?? "", + colorSpace: video.color_space ?? "", + sideDataTypes: merged, + }; +} + +function probeFirstFrameSideData(filePath: string): string[] { + const result = spawnSync( + "ffprobe", + [ + "-v", + "quiet", + "-print_format", + "json", + "-select_streams", + "v:0", + "-read_intervals", + "%+#1", + "-show_frames", + filePath, + ], + { encoding: "utf-8" }, + ); + if (result.status !== 0) return []; + try { + const json = JSON.parse(result.stdout) as { + frames?: Array<{ side_data_list?: Array<{ side_data_type?: string }> }>; + }; + const frame = json.frames?.[0]; + return (frame?.side_data_list ?? []).map((d) => d.side_data_type ?? "").filter(Boolean); + } catch { + return []; + } +} + +function checkExpectations( + fixtureId: string, + actual: ProbeResult, + expected: ExpectedColor, +): string[] { + const errors: string[] = []; + if (actual.pixFmt !== expected.pixFmt) { + errors.push(`${fixtureId}: pix_fmt expected ${expected.pixFmt}, got ${actual.pixFmt}`); + } + if (actual.colorTransfer !== expected.colorTransfer) { + errors.push( + `${fixtureId}: color_transfer expected ${expected.colorTransfer}, got ${actual.colorTransfer || "(unset)"}`, + ); + } + if (actual.colorPrimaries !== expected.colorPrimaries) { + errors.push( + `${fixtureId}: color_primaries expected ${expected.colorPrimaries}, got ${actual.colorPrimaries || "(unset)"}`, + ); + } + if (expected.requireHdrSideData) { + const hasMaxCll = actual.sideDataTypes.some((t) => /content light level|maxcll/i.test(t)); + const hasMastering = actual.sideDataTypes.some((t) => /mastering display/i.test(t)); + if (!hasMaxCll && !hasMastering) { + errors.push( + `${fixtureId}: expected HDR side data (MaxCLL or MasteringDisplay), got [${actual.sideDataTypes.join(", ") || "none"}]`, + ); + } + } + return errors; +} + +async function renderFixture( + fixturesRoot: string, + fixture: Fixture, + workDir: string, +): Promise<{ outputPath: string; durationMs: number }> { + const fixtureSrcDir = join(fixturesRoot, fixture.id, "src"); + if (!existsSync(fixtureSrcDir)) { + throw new Error(`Fixture src directory missing: ${fixtureSrcDir}`); + } + + const tempSrcDir = join(workDir, "src"); + cpSync(fixtureSrcDir, tempSrcDir, { recursive: true }); + + const fixtureAssetsDir = join(fixturesRoot, fixture.id, "assets"); + if (existsSync(fixtureAssetsDir)) { + // Mixed-sdr-hdr's assets/ is largely symlinks to sibling fixtures (e.g. + // ../../hdr-pq/assets/...). Dereference them on copy so the workdir is + // self-contained — otherwise the file server 404s on the relative paths. + cpSync(fixtureAssetsDir, join(workDir, "assets"), { + recursive: true, + dereference: true, + }); + } + + const outputPath = join(workDir, "output.mp4"); + const job = createRenderJob({ + fps: 30, + quality: "high", + format: "mp4", + useGpu: false, + debug: false, + hdr: fixture.hdr, + }); + + const start = Date.now(); + await executeRenderJob(job, tempSrcDir, outputPath); + const durationMs = Date.now() - start; + + if (!existsSync(outputPath)) { + throw new Error(`Render reported success but no output at ${outputPath}`); + } + return { outputPath, durationMs }; +} + +async function main(): Promise { + const filterId = process.argv[2]; + const keepTemp = process.env.KEEP_TEMP === "1"; + + const fixturesRoot = resolve( + new URL(".", import.meta.url).pathname, + "..", + "tests", + "hdr-regression", + ); + if (!existsSync(fixturesRoot)) { + console.error(`hdr-regression fixtures not found at ${fixturesRoot}`); + return 1; + } + + const targets = filterId ? FIXTURES.filter((f) => f.id === filterId) : FIXTURES; + if (targets.length === 0) { + console.error( + `No fixture matched "${filterId}". Available: ${FIXTURES.map((f) => f.id).join(", ")}`, + ); + return 1; + } + + const tempRoot = mkdtempSync(join(tmpdir(), "hf-hdr-smoke-")); + console.log(`workdir: ${tempRoot}`); + + const allErrors: string[] = []; + let firstFailingFixture: string | null = null; + + try { + for (const fixture of targets) { + const fixtureDir = join(tempRoot, fixture.id); + mkdirSync(fixtureDir, { recursive: true }); + + console.log(`\n=== ${fixture.id} (hdr=${fixture.hdr}) ===`); + const { outputPath, durationMs } = await renderFixture(fixturesRoot, fixture, fixtureDir); + console.log(` rendered in ${(durationMs / 1000).toFixed(1)}s`); + + const probed = probe(outputPath); + console.log(` pix_fmt=${probed.pixFmt}`); + console.log(` color_transfer=${probed.colorTransfer || "(unset)"}`); + console.log(` color_primaries=${probed.colorPrimaries || "(unset)"}`); + console.log(` color_space=${probed.colorSpace || "(unset)"}`); + console.log(` side_data=[${probed.sideDataTypes.join(", ") || "none"}]`); + + const errors = checkExpectations(fixture.id, probed, fixture.expected); + if (errors.length === 0) { + console.log(` PASS`); + } else { + console.log(` FAIL:`); + errors.forEach((e) => console.log(` - ${e}`)); + allErrors.push(...errors); + if (!firstFailingFixture) firstFailingFixture = fixture.id; + } + } + } finally { + if (keepTemp) { + console.log(`\nKEEP_TEMP=1 — leaving ${tempRoot} on disk`); + } else { + try { + rmSync(tempRoot, { recursive: true, force: true }); + } catch (e) { + console.warn(`Failed to clean up ${tempRoot}: ${e}`); + } + } + } + + console.log("\n=== summary ==="); + console.log(`fixtures: ${targets.length}`); + console.log(`failures: ${allErrors.length}`); + if (allErrors.length > 0) { + console.log(`first failure: ${firstFailingFixture}`); + return 1; + } + return 0; +} + +main() + .then((code) => process.exit(code)) + .catch((err) => { + console.error("hdr-smoke crashed:", err); + process.exit(2); + }); diff --git a/packages/producer/src/services/htmlCompiler.ts b/packages/producer/src/services/htmlCompiler.ts index 9c7ee5a47..356c07673 100644 --- a/packages/producer/src/services/htmlCompiler.ts +++ b/packages/producer/src/services/htmlCompiler.ts @@ -28,6 +28,8 @@ import { isPathInside, toExternalAssetKey } from "../utils/paths.js"; import { parseVideoElements, type VideoElement, + parseImageElements, + type ImageElement, parseAudioElements, type AudioElement, analyzeKeyframeIntervals, @@ -41,6 +43,7 @@ export interface CompiledComposition { subCompositions: Map; videos: VideoElement[]; audios: AudioElement[]; + images?: ImageElement[]; unresolvedCompositions: UnresolvedElement[]; /** Assets that resolve outside projectDir. Keys are the path used in HTML, values are absolute filesystem paths. */ externalAssets: Map; @@ -980,12 +983,14 @@ export async function compileForRender( // Parse main HTML elements const mainVideos = parseVideoElements(html); const mainAudios = parseAudioElements(html); + const mainImages = parseImageElements(html); // Keep inlined sub-composition media authoritative on ID collisions. // inlineSubCompositions() hoists those nodes into the final HTML, so the // producer should follow the same precedence the runtime sees in the merged DOM. const videos = dedupeElementsById([...mainVideos, ...subVideos]); const audios = dedupeElementsById([...mainAudios, ...subAudios]); + const images = dedupeElementsById(mainImages); // Advisory video checks (sparse keyframes, VFR). Fire-and-forget — these spawn // ffprobe subprocesses and should not block compilation since they only produce warnings. @@ -1032,6 +1037,7 @@ export async function compileForRender( subCompositions, videos, audios, + images, unresolvedCompositions, externalAssets, width, diff --git a/packages/producer/src/services/renderOrchestrator.test.ts b/packages/producer/src/services/renderOrchestrator.test.ts index 6ec80f584..60ea6bac0 100644 --- a/packages/producer/src/services/renderOrchestrator.test.ts +++ b/packages/producer/src/services/renderOrchestrator.test.ts @@ -1,12 +1,14 @@ -import { afterEach, describe, expect, it, vi } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; -import type { EngineConfig } from "@hyperframes/engine"; +import { deflateSync } from "node:zlib"; +import type { ElementStackingInfo, EngineConfig } from "@hyperframes/engine"; import type { CompiledComposition } from "./htmlCompiler.js"; import { applyRenderModeHints, + blitHdrVideoLayer, extractStandaloneEntryFromIndex, writeCompiledArtifacts, } from "./renderOrchestrator.js"; @@ -244,3 +246,221 @@ describe("applyRenderModeHints", () => { expect(log.warn).not.toHaveBeenCalled(); }); }); + +describe("blitHdrVideoLayer", () => { + // Inline 16-bit PNG helpers (mirrors makePng16 / makeChunk in + // packages/engine alphaBlit.test.ts). We tag each frame's first pixel R + // channel with its 1-based index so we can identify which frame the blit + // selected by reading canvas.readUInt16LE(0). + const PNG_SIG = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]); + const CRC_TABLE = (() => { + const table = new Uint32Array(256); + for (let n = 0; n < 256; n++) { + let c = n; + for (let k = 0; k < 8; k++) c = c & 1 ? 0xedb88320 ^ (c >>> 1) : c >>> 1; + table[n] = c; + } + return table; + })(); + function crc32(buf: Buffer): number { + let c = 0xffffffff; + for (let i = 0; i < buf.length; i++) c = CRC_TABLE[(c ^ buf[i]!) & 0xff]! ^ (c >>> 8); + return (c ^ 0xffffffff) >>> 0; + } + function uint32BE(n: number): Buffer { + const b = Buffer.alloc(4); + b.writeUInt32BE(n >>> 0, 0); + return b; + } + function makeChunk(type: string, data: Buffer): Buffer { + const typeBuf = Buffer.from(type, "ascii"); + const crc = crc32(Buffer.concat([typeBuf, data])); + return Buffer.concat([uint32BE(data.length), typeBuf, data, uint32BE(crc)]); + } + /** Produces a width×height PNG with bit depth 16, color type 2 (RGB). */ + function makePng16(width: number, height: number, fillR: number): Buffer { + const ihdr = Buffer.concat([uint32BE(width), uint32BE(height), Buffer.from([16, 2, 0, 0, 0])]); + const rowBytes = width * 6; + const raw = Buffer.alloc((rowBytes + 1) * height); + for (let y = 0; y < height; y++) { + raw[y * (rowBytes + 1)] = 0; + for (let x = 0; x < width; x++) { + const off = y * (rowBytes + 1) + 1 + x * 6; + raw.writeUInt16BE(fillR, off); + raw.writeUInt16BE(0, off + 2); + raw.writeUInt16BE(0, off + 4); + } + } + return Buffer.concat([ + PNG_SIG, + makeChunk("IHDR", ihdr), + makeChunk("IDAT", deflateSync(raw)), + makeChunk("IEND", Buffer.alloc(0)), + ]); + } + + function writeFrameSet(dir: string, count: number): void { + for (let i = 1; i <= count; i++) { + const png = makePng16(8, 8, i); + writeFileSync(join(dir, `frame_${String(i).padStart(4, "0")}.png`), png); + } + } + + function makeElement(overrides: Partial = {}): ElementStackingInfo { + return { + id: "v1", + zIndex: 0, + x: 0, + y: 0, + width: 8, + height: 8, + layoutWidth: 8, + layoutHeight: 8, + opacity: 1, + visible: true, + isHdr: true, + transform: "none", + borderRadius: [0, 0, 0, 0], + ...overrides, + }; + } + + let workDir: string; + beforeEach(() => { + workDir = mkdtempSync(join(tmpdir(), "hdr-blit-")); + writeFrameSet(workDir, 5); + }); + afterEach(() => { + rmSync(workDir, { recursive: true, force: true }); + }); + + it("returns without modifying canvas when element is not in frame-dir map", () => { + const canvas = Buffer.alloc(8 * 8 * 6); + const el = makeElement(); + blitHdrVideoLayer(canvas, el, 0, 30, new Map(), new Map(), 8, 8); + expect(canvas.every((b) => b === 0)).toBe(true); + }); + + it("returns without modifying canvas when computed frame index is < 1", () => { + const canvas = Buffer.alloc(8 * 8 * 6); + const el = makeElement(); + blitHdrVideoLayer(canvas, el, -0.5, 30, new Map([["v1", workDir]]), new Map([["v1", 0]]), 8, 8); + expect(canvas.every((b) => b === 0)).toBe(true); + }); + + it("blits frame 1 at time = startTime", () => { + const canvas = Buffer.alloc(8 * 8 * 6); + const el = makeElement(); + blitHdrVideoLayer(canvas, el, 0, 30, new Map([["v1", workDir]]), new Map([["v1", 0]]), 8, 8); + expect(canvas.readUInt16LE(0)).toBe(1); + }); + + it("computes frame index as round((time - startTime) * fps) + 1", () => { + const canvas = Buffer.alloc(8 * 8 * 6); + const el = makeElement(); + blitHdrVideoLayer( + canvas, + el, + 2 / 30, + 30, + new Map([["v1", workDir]]), + new Map([["v1", 0]]), + 8, + 8, + ); + expect(canvas.readUInt16LE(0)).toBe(3); + }); + + it("freezes on the last available frame when time outlives the clip", () => { + const canvas = Buffer.alloc(8 * 8 * 6); + const el = makeElement(); + blitHdrVideoLayer( + canvas, + el, + 10, // 10s @ 30fps would request frame 301; we have 5 → clamp to 5 + 30, + new Map([["v1", workDir]]), + new Map([["v1", 0]]), + 8, + 8, + ); + expect(canvas.readUInt16LE(0)).toBe(5); + }); + + it("respects startTime offset", () => { + const canvas = Buffer.alloc(8 * 8 * 6); + const el = makeElement(); + blitHdrVideoLayer( + canvas, + el, + 4 / 30, // time + 30, + new Map([["v1", workDir]]), + new Map([["v1", 2 / 30]]), // startTime → effective video frame index = 4-2+1 = 3 + 8, + 8, + ); + expect(canvas.readUInt16LE(0)).toBe(3); + }); + + it("uses region blit (placement at el.x,el.y) when transform is 'none'", () => { + const canvas = Buffer.alloc(16 * 16 * 6); + const el = makeElement({ x: 4, y: 0, width: 8, height: 8 }); + blitHdrVideoLayer(canvas, el, 0, 30, new Map([["v1", workDir]]), new Map([["v1", 0]]), 16, 16); + // (0,0) untouched, (4,0) is frame-1 R channel + expect(canvas.readUInt16LE(0)).toBe(0); + expect(canvas.readUInt16LE((0 * 16 + 4) * 6)).toBe(1); + }); + + it("uses affine blit when transform parses to a matrix", () => { + const canvas = Buffer.alloc(16 * 16 * 6); + // matrix(a,b,c,d,e,f) — translate(4,0) + const el = makeElement({ + x: 0, + y: 0, + transform: "matrix(1, 0, 0, 1, 4, 0)", + }); + blitHdrVideoLayer(canvas, el, 0, 30, new Map([["v1", workDir]]), new Map([["v1", 0]]), 16, 16); + expect(canvas.readUInt16LE(0)).toBe(0); + expect(canvas.readUInt16LE((0 * 16 + 4) * 6)).toBe(1); + }); + + it("does not throw when target frame file does not exist", () => { + const canvas = Buffer.alloc(8 * 8 * 6); + const el = makeElement(); + rmSync(join(workDir, "frame_0001.png")); + expect(() => + blitHdrVideoLayer(canvas, el, 0, 30, new Map([["v1", workDir]]), new Map([["v1", 0]]), 8, 8), + ).not.toThrow(); + expect(canvas.every((b) => b === 0)).toBe(true); + }); + + it("logs decode errors via the supplied logger and does not throw", () => { + const canvas = Buffer.alloc(8 * 8 * 6); + const el = makeElement(); + // Replace frame_0001.png with garbage so decodePngToRgb48le throws. + writeFileSync(join(workDir, "frame_0001.png"), Buffer.from("not a png")); + const log = { + error: vi.fn(), + warn: vi.fn(), + info: vi.fn(), + debug: vi.fn(), + }; + expect(() => + blitHdrVideoLayer( + canvas, + el, + 0, + 30, + new Map([["v1", workDir]]), + new Map([["v1", 0]]), + 8, + 8, + log, + ), + ).not.toThrow(); + expect(log.warn).toHaveBeenCalledOnce(); + const call = log.warn.mock.calls[0]!; + expect(call[0]).toContain("HDR blit failed for v1"); + }); +}); diff --git a/packages/producer/src/services/renderOrchestrator.ts b/packages/producer/src/services/renderOrchestrator.ts index c8db0e86b..0e44c3d75 100644 --- a/packages/producer/src/services/renderOrchestrator.ts +++ b/packages/producer/src/services/renderOrchestrator.ts @@ -30,6 +30,7 @@ import { extractAllVideoFrames, createFrameLookupTable, type VideoElement, + type ImageElement, FrameLookupTable, type HdrTransfer, detectTransfer, @@ -76,6 +77,8 @@ import { TRANSITIONS, crossfade, convertTransfer, + injectHdrBoxes, + DEFAULT_HDR10_MASTERING, type TransitionFn, type ElementStackingInfo, type HfTransitionMeta, @@ -252,6 +255,7 @@ export interface CompositionMetadata { duration: number; videos: VideoElement[]; audios: AudioElement[]; + images: ImageElement[]; width: number; height: number; } @@ -390,8 +394,14 @@ export function applyRenderModeHints( * Shared between the normal-frame compositing path (compositeToBuffer) * and the transition dual-scene compositing loop to avoid duplicating * the frame lookup, fallback, decode, transform, and blit logic. + * + * Exported for unit testing — owns the time→frame math, last-frame freeze, + * border-radius detection, and affine-vs-region branch. */ -function blitHdrVideoLayer( +type HdrDecodeCacheEntry = { hdrRgb: Buffer; srcW: number; srcH: number }; +type HdrDecodeCache = Map; + +export function blitHdrVideoLayer( canvas: Buffer, el: ElementStackingInfo, time: number, @@ -403,6 +413,7 @@ function blitHdrVideoLayer( log?: ProducerLogger, sourceTransfer?: HdrTransfer, targetTransfer?: HdrTransfer, + decodeCache?: HdrDecodeCache, ): void { const frameDir = hdrFrameDirs.get(el.id); const startTime = hdrStartTimes.get(el.id); @@ -426,11 +437,38 @@ function blitHdrVideoLayer( } try { - const { data: hdrRgb, width: srcW, height: srcH } = decodePngToRgb48le(readFileSync(framePath)); + // Decode-once cache for HDR image layers (still images blitted on every frame + // they're visible). Caller passes `decodeCache` only for image layers — video + // layers would bloat memory because each frame has a unique path. Cache key + // includes both transfers because convertTransfer mutates the decoded buffer + // in-place. Cached entries are read-only (blit functions don't mutate source). + const cacheKey = decodeCache + ? `${framePath}::${sourceTransfer ?? "none"}::${targetTransfer ?? "none"}` + : undefined; + const cached = cacheKey ? decodeCache?.get(cacheKey) : undefined; + + let hdrRgb: Buffer; + let srcW: number; + let srcH: number; + + if (cached) { + hdrRgb = cached.hdrRgb; + srcW = cached.srcW; + srcH = cached.srcH; + } else { + const decoded = decodePngToRgb48le(readFileSync(framePath)); + hdrRgb = decoded.data; + srcW = decoded.width; + srcH = decoded.height; + + // Convert between HDR transfer functions if source doesn't match output + if (sourceTransfer && targetTransfer && sourceTransfer !== targetTransfer) { + convertTransfer(hdrRgb, sourceTransfer, targetTransfer); + } - // Convert between HDR transfer functions if source doesn't match output - if (sourceTransfer && targetTransfer && sourceTransfer !== targetTransfer) { - convertTransfer(hdrRgb, sourceTransfer, targetTransfer); + if (cacheKey && decodeCache) { + decodeCache.set(cacheKey, { hdrRgb, srcW, srcH }); + } } const viewportMatrix = parseTransformMatrix(el.transform); @@ -470,7 +508,7 @@ function blitHdrVideoLayer( } } catch (err) { if (log) { - log.debug(`HDR blit failed for ${el.id}`, { + log.warn(`HDR blit failed for ${el.id}`, { error: err instanceof Error ? err.message : String(err), }); } @@ -648,6 +686,7 @@ export async function executeRenderJob( duration: compiled.staticDuration, videos: compiled.videos, audios: compiled.audios, + images: compiled.images ?? [], width: compiled.width, height: compiled.height, }; @@ -919,6 +958,43 @@ export async function executeRenderJob( ); } + // Probe images for HDR color space (also gated by --hdr). Tolerates per-image + // probe failures (corrupt file, unsupported format) so one bad image doesn't + // abort the render — mirrors the video probe path. + if (job.config.hdr && composition.images.length > 0) { + await Promise.all( + composition.images.map(async (img) => { + let imgPath = img.src; + if (!imgPath.startsWith("/")) { + const fromCompiled = existsSync(join(compiledDir, imgPath)) + ? join(compiledDir, imgPath) + : join(projectDir, imgPath); + imgPath = fromCompiled; + } + if (!existsSync(imgPath)) { + log.warn(`HDR probe skipped — image not found: ${img.src} (id=${img.id})`); + return; + } + try { + const meta = await extractVideoMetadata(imgPath); + if (isHdrColorSpace(meta.colorSpace)) { + if (meta.fps > 0 || meta.durationSeconds > 0.05) { + log.warn( + `HDR image ${img.id} appears animated (fps=${meta.fps}, duration=${meta.durationSeconds.toFixed(3)}s). Only the first frame will be used — HDR currently supports still images only.`, + ); + } + nativeHdrVideoIds.add(img.id); + videoTransfers.set(img.id, detectTransfer(meta.colorSpace)); + } + } catch (err) { + log.warn( + `HDR probe failed for ${img.id}: ${err instanceof Error ? err.message : String(err)}`, + ); + } + }), + ); + } + if (composition.videos.length > 0) { extractionResult = await extractAllVideoFrames( composition.videos, @@ -927,6 +1003,10 @@ export async function executeRenderJob( abortSignal, undefined, compiledDir, + // Skip SDR→HDR conversion when HDR compositing will handle it in the blit step. + // convertSdrToHdr produces bt2020 pixels that Chrome misinterprets as sRGB, + // making SDR content invisible. + nativeHdrVideoIds.size > 0, ); assertNotAborted(); @@ -1165,15 +1245,20 @@ export async function executeRenderJob( // visible at t=0 (e.g., data-start > 0) need to be queried at their own // start time so their layout dimensions are available. const hdrExtractionDims = new Map(); - const hdrVideoStartTimes = new Map(); + const hdrLayerStartTimes = new Map(); for (const v of composition.videos) { if (hdrVideoIds.includes(v.id)) { - hdrVideoStartTimes.set(v.id, v.start); + hdrLayerStartTimes.set(v.id, v.start); + } + } + for (const img of composition.images) { + if (nativeHdrVideoIds.has(img.id)) { + hdrLayerStartTimes.set(img.id, img.start); } } // Collect unique start times to minimize seek operations - const uniqueStartTimes = [...new Set(hdrVideoStartTimes.values())].sort((a, b) => a - b); + const uniqueStartTimes = [...new Set(hdrLayerStartTimes.values())].sort((a, b) => a - b); for (const seekTime of uniqueStartTimes) { await domSession.page.evaluate((t: number) => { if (window.__hf && typeof window.__hf.seek === "function") window.__hf.seek(t); @@ -1235,6 +1320,42 @@ export async function executeRenderJob( hdrFrameDirs.set(videoId, frameDir); } + // ── Extract HDR images as single-frame 16-bit PNGs ─────────────── + for (const img of composition.images) { + if (!nativeHdrVideoIds.has(img.id)) continue; + let imgPath = img.src; + if (!imgPath.startsWith("/")) { + const fromCompiled = join(compiledDir, imgPath); + imgPath = existsSync(fromCompiled) ? fromCompiled : join(projectDir, imgPath); + } + const frameDir = join(framesDir, `hdr_img_${img.id}`); + mkdirSync(frameDir, { recursive: true }); + const dims = hdrExtractionDims.get(img.id) ?? { width, height }; + const imgResult = await runFfmpeg( + [ + "-i", + imgPath, + "-frames:v", + "1", + "-vf", + `scale=${dims.width}:${dims.height}:force_original_aspect_ratio=increase,crop=${dims.width}:${dims.height}`, + "-pix_fmt", + "rgb48le", + "-c:v", + "png", + "-y", + join(frameDir, "frame_0001.png"), + ], + { signal: abortSignal }, + ); + if (!imgResult.success) { + log.warn(`HDR image extraction failed for ${img.id}: ${imgResult.stderr.slice(-200)}`); + nativeHdrVideoIds.delete(img.id); + continue; + } + hdrFrameDirs.set(img.id, frameDir); + } + assertNotAborted(); try { @@ -1335,17 +1456,18 @@ export async function executeRenderJob( time, job.config.fps, hdrFrameDirs, - hdrVideoStartTimes, + hdrLayerStartTimes, width, height, log, videoTransfers.get(layer.element.id), effectiveHdr?.transfer, + hdrImageIds.has(layer.element.id) ? hdrDecodeCache : undefined, ); if (shouldLog) { const after = countNonZeroRgb48(canvas); const frameDir = hdrFrameDirs.get(layer.element.id); - const startTime = hdrVideoStartTimes.get(layer.element.id) ?? 0; + const startTime = hdrLayerStartTimes.get(layer.element.id) ?? 0; const localTime = time - startTime; const frameNum = Math.floor(localTime * job.config.fps) + 1; const expectedFrame = frameDir @@ -1469,6 +1591,14 @@ export async function executeRenderJob( // to avoid ~37 MB allocation per frame in the hot loop. const normalCanvas = Buffer.alloc(bufSize); + // ── Decode-once cache for HDR image layers ────────────────────────── + // Render-scoped cache for decoded rgb48le buffers. Only image layers use it + // (videos have unique paths per-frame and would bloat memory: 300 frames × + // ~37 MB at 1080p = ~11 GB). Images decode once and are reused for every + // visible frame (~150–1800 hits each). Cleared when the render job ends. + const hdrDecodeCache: HdrDecodeCache = new Map(); + const hdrImageIds = new Set(composition.images.map((i) => i.id)); + for (let i = 0; i < totalFrames; i++) { assertNotAborted(); const time = i / job.config.fps; @@ -1541,12 +1671,13 @@ export async function executeRenderJob( time, job.config.fps, hdrFrameDirs, - hdrVideoStartTimes, + hdrLayerStartTimes, width, height, log, videoTransfers.get(el.id), effectiveHdr?.transfer, + hdrImageIds.has(el.id) ? hdrDecodeCache : undefined, ); } @@ -1972,6 +2103,22 @@ export async function executeRenderJob( } } + // FFmpeg's mp4 muxer rebuilds the container during mux/faststart and + // drops the mdcv/clli boxes we injected into videoOnlyPath. Re-inject + // them into the final outputPath so YouTube and HDR TVs recognize the + // file as HDR10. Frame-level HEVC SEI metadata always survives stream + // copy, but the container-level boxes do not. + if (preset.hdr && preset.codec === "h265" && outputPath.endsWith(".mp4")) { + try { + injectHdrBoxes(outputPath, DEFAULT_HDR10_MASTERING); + log.debug(`Injected HDR10 mdcv/clli boxes into ${outputPath}`); + } catch (err) { + log.warn( + `HDR mdcv/clli injection failed for ${outputPath}: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + perfStages.assembleMs = Date.now() - stage6Start; // ── Complete ───────────────────────────────────────────────────────── diff --git a/packages/producer/tests/hdr-regression/README.md b/packages/producer/tests/hdr-regression/README.md new file mode 100644 index 000000000..68c7a196e --- /dev/null +++ b/packages/producer/tests/hdr-regression/README.md @@ -0,0 +1,144 @@ +# HDR Regression Fixtures + +End-to-end fixtures that exercise the HDR rendering pipeline (BT.2020 PQ +10-bit encode, sRGB→BT.2020 overlay conversion, HDR10 container metadata, +mixed SDR + HDR source compositing). + +These fixtures live next to the SDR regression tests but are verified by a +different mechanism, and **are not currently part of the CI regression +matrix**. See [Known gaps](#known-gaps) below. + +## Fixtures + +Each fixture is a self-contained composition under +`packages/producer/tests/hdr-regression//`: + +| ID | What it covers | `meta.json` | +| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | ----------- | +| `sdr-baseline` | All media types in plain SDR. Confirms the HDR pipeline doesn't regress SDR output. | yes | +| `hdr-pq` | Native HDR composition: BT.2020 PQ video + 16-bit HDR PNG + audio + captions + shader transitions, rendered with `--hdr`. | yes | +| `mixed-sdr-hdr` | SDR (BT.709) and HDR (BT.2020 PQ) sources in the same scene stack. Renders twice (SDR and HDR) to exercise cross-transfer compositing. | yes | +| `hdr-feature-stack` | Six-scene tour through the HDR feature surface (overlays, transforms, mixed sources, white background, etc.). | no | +| `opacity-mixed-fade` | Single scene with one SDR and one HDR clip, both fading in and yo-yoing opacity. Pinned regression for the SDR opacity bug. | no | + +Fixtures with a `meta.json` are discoverable by the regression harness; +fixtures without it are only invoked by the smoke script (and manual renders +through the studio / CLI). + +## How HDR is tested today + +Three layers, in order of strength: + +### 1. Engine unit tests (in CI, vitest) + +Run by `bun run --filter @hyperframes/engine test`: + +- `packages/engine/src/utils/hdr.test.ts` — transfer detection, BT.2020 vs + BT.709 encoder param selection, `analyzeCompositionHdr`. +- `packages/engine/src/services/hdrCapture.test.ts` — float16 → PQ RGB + conversion (the rgb48le capture path). +- `packages/engine/src/utils/mp4HdrBoxes.test.ts` — HDR10 mastering display + (`mdcv`) and content light level (`clli`) box construction and post-mux + injection. +- `packages/engine/src/utils/layerCompositor.test.ts`, + `packages/engine/src/utils/alphaBlit.test.ts`, + `packages/engine/src/utils/uint16-alignment-audit.test.ts` — 16-bpc + compositing correctness. + +These cover the building blocks but not the assembled pipeline. + +### 2. `hdr-smoke.ts` (manual, not in CI) + +`packages/producer/scripts/hdr-smoke.ts` renders every fixture in this +directory through the orchestrator with the right `hdr` flag, then asserts +on **color metadata** via `ffprobe`: + +- `pix_fmt` (e.g. `yuv420p10le` for HDR) +- `color_transfer` (`smpte2084` for PQ, `bt709` for SDR) +- `color_primaries` (`bt2020` for HDR, `bt709` for SDR) +- HDR10 side data (MaxCLL / MasteringDisplay) when `requireHdrSideData` is + set on the fixture + +This gives a portable signal that the encode and side-data path are intact, +without requiring committed pixel goldens. **It does not verify visual +correctness** — opacity bugs, layer order regressions, transition glitches, +and similar visual issues will still pass it. Run locally: + +```bash +bunx tsx packages/producer/scripts/hdr-smoke.ts # all fixtures +bunx tsx packages/producer/scripts/hdr-smoke.ts hdr-pq # one fixture +KEEP_TEMP=1 bunx tsx packages/producer/scripts/hdr-smoke.ts # keep workdir for inspection +``` + +Requires a working `ffmpeg` and `ffprobe` on `PATH`. + +### 3. Visual regression harness (partial) + +`packages/producer/src/regression-harness.ts` discovers fixtures by walking +`packages/producer/tests/*` and looking for `meta.json` + `src/index.html`. +It compares rendered output to a committed `output/output.mp4` golden using +PSNR and audio correlation. + +For HDR: + +- The three fixtures with `meta.json` (`sdr-baseline`, `hdr-pq`, + `mixed-sdr-hdr`) are discoverable by the harness. +- **No HDR golden MP4s are committed**, so the harness will not actually + validate them today. +- The `regression.yml` shards in `.github/workflows/regression.yml` enumerate + named fixtures explicitly and do not include the HDR ones. +- The harness also doesn't pass `hdr: true` to `createRenderJob`, so even if + goldens existed, the HDR encode path wouldn't be exercised through this + route. + +## Known gaps + +- **No visual goldens.** Pixel-level correctness for HDR is uncovered. This + is how the SDR opacity yoyo bug on `