From d1cd31fc7a02d31c236f5bd5fa0f714891bbf26a Mon Sep 17 00:00:00 2001 From: James Date: Thu, 23 Apr 2026 16:08:38 +0000 Subject: [PATCH 1/3] =?UTF-8?q?perf(engine):=20segment-scope=20SDR?= =?UTF-8?q?=E2=86=92HDR=20preflight?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit convertSdrToHdr now takes startTime + duration and limits the re-encode to the segment the composition actually uses. Mirrors the VFR→CFR fix: a 30-minute SDR source contributing a 2-second clip was transcoded in full pre-fix (a >100× waste); post-fix only the used segment is touched and entry.video.mediaStart is zeroed so downstream extraction seeks from 0. Phase 2 now captures the full VideoMetadata per resolvedVideos entry (previously only colorSpace) so segDuration can fall back to the source's natural duration when video.end is Infinity without an extra ffprobe. Validation: /tmp/hf-fixtures/hdr-sdr-mixed-scope hdrPreflightMs: >1000 → 150 (-87%) videoExtractMs: 1272 → 237 (-82%) tmpPeakBytes: 8.2MB → 4.5MB (-45%) Adds a regression test that synthesizes a 10s SDR + 2s HDR fixture inline and asserts the converted SDR file's duration matches the 2s used segment, not the 10s source. --- .../src/services/videoFrameExtractor.test.ts | 81 +++++++++++++++++++ .../src/services/videoFrameExtractor.ts | 46 +++++++++-- 2 files changed, 120 insertions(+), 7 deletions(-) diff --git a/packages/engine/src/services/videoFrameExtractor.test.ts b/packages/engine/src/services/videoFrameExtractor.test.ts index 84f59cff..c63e0e93 100644 --- a/packages/engine/src/services/videoFrameExtractor.test.ts +++ b/packages/engine/src/services/videoFrameExtractor.test.ts @@ -196,6 +196,87 @@ describe.skipIf(!HAS_FFMPEG)("extractAllVideoFrames on a VFR source", () => { expect(result.phaseBreakdown.vfrPreflightMs).toBeGreaterThan(0); }, 60_000); + // Regression test for the segment-scope HDR preflight fix: pre-fix, + // convertSdrToHdr re-encoded the entire source, so a 30-minute SDR source + // contributing a 2-second clip took ~200× longer than needed. Post-fix the + // converted file's duration matches the used segment. + it("bounds the SDR→HDR preflight re-encode to the used segment", async () => { + const SDR_LONG = join(FIXTURE_DIR, "sdr-long.mp4"); + const HDR_SHORT = join(FIXTURE_DIR, "hdr-short.mp4"); + + const sdrResult = await runFfmpeg([ + "-y", + "-hide_banner", + "-loglevel", + "error", + "-f", + "lavfi", + "-i", + "testsrc2=s=320x180:d=10:rate=30", + "-c:v", + "libx264", + "-preset", + "ultrafast", + "-pix_fmt", + "yuv420p", + SDR_LONG, + ]); + if (!sdrResult.success) { + throw new Error(`SDR fixture synthesis failed: ${sdrResult.stderr.slice(-400)}`); + } + + // Tag as bt2020nc / smpte2084 so the preflight path considers the timeline mixed-HDR. + const hdrResult = await runFfmpeg([ + "-y", + "-hide_banner", + "-loglevel", + "error", + "-f", + "lavfi", + "-i", + "testsrc2=s=320x180:d=2:rate=30", + "-c:v", + "libx264", + "-preset", + "ultrafast", + "-pix_fmt", + "yuv420p", + "-color_primaries", + "bt2020", + "-color_trc", + "smpte2084", + "-colorspace", + "bt2020nc", + HDR_SHORT, + ]); + if (!hdrResult.success) { + throw new Error(`HDR fixture synthesis failed: ${hdrResult.stderr.slice(-400)}`); + } + + const outputDir = join(FIXTURE_DIR, "out-hdr-segment"); + mkdirSync(outputDir, { recursive: true }); + + const videos: VideoElement[] = [ + { id: "sdr", src: SDR_LONG, start: 0, end: 2, mediaStart: 0, hasAudio: false }, + { id: "hdr", src: HDR_SHORT, start: 2, end: 4, mediaStart: 0, hasAudio: false }, + ]; + + const result = await extractAllVideoFrames(videos, FIXTURE_DIR, { + fps: 30, + outputDir, + }); + expect(result.errors).toEqual([]); + expect(result.phaseBreakdown.hdrPreflightCount).toBe(1); + + const convertedPath = join(outputDir, "_hdr_normalized", "sdr_hdr.mp4"); + expect(existsSync(convertedPath)).toBe(true); + const convertedMeta = await extractVideoMetadata(convertedPath); + // Pre-fix duration matched the 10s source; post-fix it matches the 2s segment + // (±0.2s for encoder keyframe/seek alignment). + expect(convertedMeta.durationSeconds).toBeGreaterThan(1.8); + expect(convertedMeta.durationSeconds).toBeLessThan(2.5); + }, 60_000); + // Asserts both frame-count correctness and that we don't emit long runs of // byte-identical "duplicate" frames — the user-visible "frozen screen // recording" symptom. Pre-fix duplicate rate on this fixture is ~38% diff --git a/packages/engine/src/services/videoFrameExtractor.ts b/packages/engine/src/services/videoFrameExtractor.ts index 406d45cf..c83423e0 100644 --- a/packages/engine/src/services/videoFrameExtractor.ts +++ b/packages/engine/src/services/videoFrameExtractor.ts @@ -293,10 +293,17 @@ export async function extractVideoFramesRange( * function (PQ for HDR10, HLG for broadcast HDR). The output transfer must * match the dominant transfer of the surrounding HDR content; otherwise the * downstream encoder will tag the final video with the wrong curve. + * + * `startTime` and `duration` bound the re-encode to the segment the composition + * actually uses. Without them a 30-minute screen recording that contributes a + * 2-second clip was transcoded in full — a >100× waste for long sources. + * Mirrors the segment-scope fix already applied to the VFR→CFR preflight. */ async function convertSdrToHdr( inputPath: string, outputPath: string, + startTime: number, + duration: number, targetTransfer: HdrTransfer, signal?: AbortSignal, config?: Partial>, @@ -307,8 +314,12 @@ async function convertSdrToHdr( const colorTrc = targetTransfer === "pq" ? "smpte2084" : "arib-std-b67"; const args = [ + "-ss", + String(startTime), "-i", inputPath, + "-t", + String(duration), "-vf", "colorspace=all=bt2020:iall=bt709:range=tv", "-color_primaries", @@ -455,12 +466,10 @@ export async function extractAllVideoFrames( // Phase 2: Probe color spaces and normalize if mixed HDR/SDR const phase2ProbeStart = Date.now(); - const videoColorSpaces = await Promise.all( - resolvedVideos.map(async ({ videoPath }) => { - const metadata = await extractMediaMetadata(videoPath); - return metadata.colorSpace; - }), + const videoMetadata = await Promise.all( + resolvedVideos.map(({ videoPath }) => extractMediaMetadata(videoPath)), ); + const videoColorSpaces = videoMetadata.map((m) => m.colorSpace); breakdown.hdrProbeMs = Date.now() - phase2ProbeStart; const hdrPreflightStart = Date.now(); @@ -483,11 +492,34 @@ export async function extractAllVideoFrames( // SDR video in a mixed timeline — convert to the dominant HDR transfer // so the encoder tags the final video correctly (PQ vs HLG). const entry = resolvedVideos[i]; - if (!entry) continue; + const metadata = videoMetadata[i]; + if (!entry || !metadata) continue; + + // Scope the re-encode to the segment the composition actually uses. + // Long sources (e.g. 30-minute screen recordings) contributing short + // clips were transcoded in full pre-fix — a >100× waste. + let segDuration = entry.video.end - entry.video.start; + if (!Number.isFinite(segDuration) || segDuration <= 0) { + const sourceRemaining = metadata.durationSeconds - entry.video.mediaStart; + segDuration = sourceRemaining > 0 ? sourceRemaining : metadata.durationSeconds; + } + const convertedPath = join(convertDir, `${entry.video.id}_hdr.mp4`); try { - await convertSdrToHdr(entry.videoPath, convertedPath, targetTransfer, signal, config); + await convertSdrToHdr( + entry.videoPath, + convertedPath, + entry.video.mediaStart, + segDuration, + targetTransfer, + signal, + config, + ); entry.videoPath = convertedPath; + // Segment-scoped re-encode starts the new file at t=0, so downstream + // extraction must seek from 0, not the original mediaStart. Shallow-copy + // to avoid mutating the caller's VideoElement (mirrors the VFR fix). + entry.video = { ...entry.video, mediaStart: 0 }; breakdown.hdrPreflightCount += 1; } catch (err) { errors.push({ From b66273d14c6971fec7d8890af2ed813dc998a9bd Mon Sep 17 00:00:00 2001 From: James Russo Date: Thu, 23 Apr 2026 22:03:54 +0000 Subject: [PATCH 2/3] =?UTF-8?q?perf(engine):=20guard=20SDR=E2=86=92HDR=20p?= =?UTF-8?q?reflight=20against=20zero-duration=20+=20past-EOF=20mediaStart?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Miguel's review on #445 — harden against two silent-failure modes in convertSdrToHdr that produce 0-byte output files: - duration <= 0: throw explicit Error inside convertSdrToHdr so every caller (current and future) is protected. FFmpeg's `-t 0` silently writes an empty output that the downstream extractor treats as a valid (empty) file; the throw surfaces the misuse. - startTime >= metadata.durationSeconds at the HDR call site: skip the entry and push a descriptive error instead of invoking ffmpeg with `-ss` past EOF (also yields 0-byte output). Matches Miguel's suggested caller-side shape with errors.push + continue. This edge case exists in the VFR path too; kept scope tight here since that's inherited and not a regression. --- .../engine/src/services/videoFrameExtractor.ts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/engine/src/services/videoFrameExtractor.ts b/packages/engine/src/services/videoFrameExtractor.ts index c83423e0..f9035975 100644 --- a/packages/engine/src/services/videoFrameExtractor.ts +++ b/packages/engine/src/services/videoFrameExtractor.ts @@ -308,6 +308,11 @@ async function convertSdrToHdr( signal?: AbortSignal, config?: Partial>, ): Promise { + // Positive duration is required — FFmpeg's `-t 0` silently produces a 0-byte + // output that the downstream extractor then treats as a valid (empty) file. + if (duration <= 0) { + throw new Error(`convertSdrToHdr: duration must be positive (got ${duration})`); + } const timeout = config?.ffmpegProcessTimeout ?? DEFAULT_CONFIG.ffmpegProcessTimeout; // smpte2084 = PQ (HDR10), arib-std-b67 = HLG. @@ -495,6 +500,17 @@ export async function extractAllVideoFrames( const metadata = videoMetadata[i]; if (!entry || !metadata) continue; + // Guard against mediaStart past EOF — FFmpeg's `-ss` silently produces + // a 0-byte file when seeking beyond the source duration, and the + // downstream extractor then points at a broken input. + if (entry.video.mediaStart >= metadata.durationSeconds) { + errors.push({ + videoId: entry.video.id, + error: `SDR→HDR conversion skipped: mediaStart (${entry.video.mediaStart}s) ≥ source duration (${metadata.durationSeconds}s)`, + }); + continue; + } + // Scope the re-encode to the segment the composition actually uses. // Long sources (e.g. 30-minute screen recordings) contributing short // clips were transcoded in full pre-fix — a >100× waste. From 17d16224cfdeff5da7b5a0efd70d8fd11b3b07b8 Mon Sep 17 00:00:00 2001 From: James Russo Date: Thu, 23 Apr 2026 22:41:55 +0000 Subject: [PATCH 3/3] perf(engine): remove past-EOF entries from all parallel arrays before Phase 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Miguel's P3 on #445 (https://github.com/heygen-com/hyperframes/pull/445#discussion_r3134305555): The previous `errors.push({...}); continue;` in the HDR preflight only short-circuited the normalization step — it did NOT remove the invalid entry from `resolvedVideos`, so Phase 3 still called extractVideoFramesRange with the same past-EOF mediaStart and surfaced a second raw FFmpeg error ("Nothing was written into output file") for the same clip. Miguel reproduced this with a synthetic 2s SDR + 1s HDR case and mediaStart=5. Fix: track the skipped-at-HDR-preflight indices in a set, then splice them out of every parallel array (resolvedVideos, videoMetadata, videoColorSpaces) after the HDR preflight loop completes. Iterate backwards so the indices stay stable while splicing. Phase 2b (VFR preflight) and Phase 3 (extract) now see only validated entries, so there's exactly one descriptive error per bad clip instead of two. Downstack #446 adds a fourth parallel array (cacheKeyInputs) and will extend this splice to cover that array during its rebase. The same inherited bug still exists on the VFR path — leaving that for a follow-up PR since it's out of scope for this HDR-preflight fix and was noted as inherited (not a regression) in the original review. --- .../src/services/videoFrameExtractor.ts | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/packages/engine/src/services/videoFrameExtractor.ts b/packages/engine/src/services/videoFrameExtractor.ts index f9035975..c4ff8253 100644 --- a/packages/engine/src/services/videoFrameExtractor.ts +++ b/packages/engine/src/services/videoFrameExtractor.ts @@ -479,6 +479,13 @@ export async function extractAllVideoFrames( const hdrPreflightStart = Date.now(); const hdrInfo = analyzeCompositionHdr(videoColorSpaces); + // Track entries the HDR preflight validated as non-extractable so they can + // be removed from every parallel array before Phase 2b and Phase 3 see them. + // Without this, `errors.push({...}); continue;` only short-circuits the + // normalization step — the invalid entry stays in `resolvedVideos` and + // Phase 3 still calls `extractVideoFramesRange` on the same past-EOF + // mediaStart, surfacing a second raw FFmpeg error for the same clip. + const hdrSkippedIndices = new Set(); if (hdrInfo.hasHdr && hdrInfo.dominantTransfer) { // dominantTransfer is "majority wins" — if a composition mixes PQ and HLG // sources (rare but legal), the minority transfer's videos get converted @@ -508,6 +515,7 @@ export async function extractAllVideoFrames( videoId: entry.video.id, error: `SDR→HDR conversion skipped: mediaStart (${entry.video.mediaStart}s) ≥ source duration (${metadata.durationSeconds}s)`, }); + hdrSkippedIndices.add(i); continue; } @@ -548,6 +556,19 @@ export async function extractAllVideoFrames( } breakdown.hdrPreflightMs = Date.now() - hdrPreflightStart; + // Remove HDR-preflight-skipped entries from every parallel array so Phase 2b + // (VFR) and Phase 3 (extract) don't re-process them. Iterate backwards to + // keep indices stable while splicing. + if (hdrSkippedIndices.size > 0) { + for (let i = resolvedVideos.length - 1; i >= 0; i--) { + if (hdrSkippedIndices.has(i)) { + resolvedVideos.splice(i, 1); + videoMetadata.splice(i, 1); + videoColorSpaces.splice(i, 1); + } + } + } + // Phase 2b: Re-encode VFR inputs to CFR so the fps filter in Phase 3 produces // the expected frame count. Only the used segment is transcoded. const vfrPreflightStart = Date.now();