Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions packages/engine/src/services/videoFrameExtractor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,125 @@ describe.skipIf(!HAS_FFMPEG)("extractAllVideoFrames on a VFR source", () => {
expect(duplicateRate).toBeLessThan(0.1);
}, 60_000);
});

// Regression test for the segment-scoped SDR→HDR preflight. Before this
// change the preflight re-encoded the full source — for a 4-second window
// of a 60-minute recording this was the difference between seconds and
// minutes of pipeline time. The fix mirrors what convertVfrToCfr already
// does. Validation: the converted file's on-disk duration must match the
// composition window, not the source's natural duration.
describe.skipIf(!HAS_FFMPEG)("extractAllVideoFrames with mixed HDR/SDR segment scoping", () => {
const FIXTURE_DIR = mkdtempSync(join(tmpdir(), "hf-hdr-scope-test-"));
const SDR_LONG = join(FIXTURE_DIR, "sdr_long.mp4");
const HDR_SHORT = join(FIXTURE_DIR, "hdr_short.mp4");

beforeAll(async () => {
// 10-second SDR source — the "long recording" we want to AVOID
// re-encoding in full.
const sdrResult = await runFfmpeg([
"-y",
"-hide_banner",
"-loglevel",
"error",
"-f",
"lavfi",
"-i",
"testsrc2=s=160x120:d=10:rate=30",
"-c:v",
"libx264",
"-preset",
"ultrafast",
"-pix_fmt",
"yuv420p",
SDR_LONG,
]);
if (!sdrResult.success) {
throw new Error(`SDR fixture synthesis failed: ${sdrResult.stderr.slice(-400)}`);
}

// 2-second HDR-tagged source. ffprobe picks up colorspace/primaries/
// transfer tags and returns VideoColorSpace; isHdrColorSpace() returns
// true for colorTransfer === "arib-std-b67" (HLG), which is what the
// Phase 2a gate checks.
const hdrResult = await runFfmpeg([
"-y",
"-hide_banner",
"-loglevel",
"error",
"-f",
"lavfi",
"-i",
"testsrc2=s=160x120:d=2:rate=30",
"-c:v",
"libx264",
"-preset",
"ultrafast",
"-pix_fmt",
"yuv420p",
"-color_primaries",
"bt2020",
"-color_trc",
"arib-std-b67",
"-colorspace",
"bt2020nc",
HDR_SHORT,
]);
if (!hdrResult.success) {
throw new Error(`HDR fixture synthesis failed: ${hdrResult.stderr.slice(-400)}`);
}
}, 30_000);

afterAll(() => {
if (existsSync(FIXTURE_DIR)) rmSync(FIXTURE_DIR, { recursive: true, force: true });
});

it("re-encodes only the used SDR window, not the full source", async () => {
const outputDir = join(FIXTURE_DIR, "out-hdr-scope");
mkdirSync(outputDir, { recursive: true });

// Compose a 2-second window out of the 10-second SDR source, alongside
// the 2-second HDR clip. Phase 2a must trigger (mixed timeline) and
// must re-encode only 2 seconds, not 10.
const sdrVideo: VideoElement = {
id: "sdr-segment",
src: SDR_LONG,
start: 0,
end: 2,
mediaStart: 3,
hasAudio: false,
};
const hdrVideo: VideoElement = {
id: "hdr-clip",
src: HDR_SHORT,
start: 2,
end: 4,
mediaStart: 0,
hasAudio: false,
};

const result = await extractAllVideoFrames([sdrVideo, hdrVideo], FIXTURE_DIR, {
fps: 30,
outputDir,
});

expect(result.errors).toEqual([]);
expect(result.phaseBreakdown.hdrPreflightCount).toBe(1);
expect(result.phaseBreakdown.hdrPreflightMs).toBeGreaterThan(0);

// The converted file lives at {outputDir}/_hdr_normalized/{id}_hdr.mp4.
// Before this change it would be ~10s (full source). After: ~2s (the
// composition window).
const convertedPath = join(outputDir, "_hdr_normalized", "sdr-segment_hdr.mp4");
expect(existsSync(convertedPath)).toBe(true);
const convertedMeta = await extractVideoMetadata(convertedPath);
// 0.5s slack for codec container overhead and ffmpeg's -ss keyframe snap.
expect(convertedMeta.durationSeconds).toBeGreaterThan(1.5);
expect(convertedMeta.durationSeconds).toBeLessThan(2.5);

// Phase 3 extraction still produces the expected number of frames —
// the composition window is 2s @ 30fps = 60 frames.
const frames = readdirSync(join(outputDir, "sdr-segment")).filter((f) => f.endsWith(".jpg"));
expect(frames.length).toBeGreaterThanOrEqual(58);
expect(frames.length).toBeLessThanOrEqual(62);
}, 60_000);
});
47 changes: 42 additions & 5 deletions packages/engine/src/services/videoFrameExtractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -273,18 +273,30 @@ export async function extractVideoFramesRange(
* Uses zscale for color space conversion with a nominal peak luminance of
* 600 nits — high enough that SDR content doesn't appear too dark next to
* HDR, matching the approach used by HeyGen's Rio pipeline.
*
* Only the [startTime, startTime+duration] window is re-encoded, matching
* the segment-scoping used by `convertVfrToCfr`. This avoids transcoding
* a full 60-minute source when only a 4-second clip is used in the
* composition — on typical long-form inputs this is the difference
* between minutes of preflight and a second of preflight.
*/
async function convertSdrToHdr(
inputPath: string,
outputPath: string,
startTime: number,
duration: number,
signal?: AbortSignal,
config?: Partial<Pick<EngineConfig, "ffmpegProcessTimeout">>,
): Promise<void> {
const timeout = config?.ffmpegProcessTimeout ?? DEFAULT_CONFIG.ffmpegProcessTimeout;

const args = [
"-ss",
String(startTime),
"-i",
inputPath,
"-t",
String(duration),
"-vf",
"colorspace=all=bt2020:iall=bt709:range=tv",
"-color_primaries",
Expand Down Expand Up @@ -423,31 +435,56 @@ export async function extractAllVideoFrames(

// Phase 2: Probe color spaces and normalize if mixed HDR/SDR
const probeStart = Date.now();
const videoColorSpaces = await Promise.all(
const videoProbes = await Promise.all(
resolvedVideos.map(async ({ videoPath }) => {
const metadata = await extractVideoMetadata(videoPath);
return metadata.colorSpace;
return { colorSpace: metadata.colorSpace, durationSeconds: metadata.durationSeconds };
}),
);
phaseBreakdown.probeMs += Date.now() - probeStart;

const hasAnyHdr = videoColorSpaces.some(isHdrColorSpaceUtil);
const hasAnyHdr = videoProbes.some((p) => isHdrColorSpaceUtil(p.colorSpace));
if (hasAnyHdr) {
const convertDir = join(options.outputDir, "_hdr_normalized");
mkdirSync(convertDir, { recursive: true });

for (let i = 0; i < resolvedVideos.length; i++) {
if (signal?.aborted) break;
const cs = videoColorSpaces[i] ?? null;
const probe = videoProbes[i];
const cs = probe?.colorSpace ?? null;
if (!isHdrColorSpaceUtil(cs)) {
// SDR video in a mixed timeline — convert to HDR color space
const entry = resolvedVideos[i];
if (!entry) continue;

// Segment-scope the re-encode to the used window. For an explicit
// [start, end] pair this is end-start; for unbounded clips fall back
// to the source's natural duration minus mediaStart (same fallback
// used by Phase 3 and Phase 2b).
let segDuration = entry.video.end - entry.video.start;
if (!Number.isFinite(segDuration) || segDuration <= 0) {
const sourceDuration = probe?.durationSeconds ?? 0;
const sourceRemaining = sourceDuration - entry.video.mediaStart;
segDuration = sourceRemaining > 0 ? sourceRemaining : sourceDuration;
}

const convertedPath = join(convertDir, `${entry.video.id}_hdr.mp4`);
const hdrStart = Date.now();
try {
await convertSdrToHdr(entry.videoPath, convertedPath, signal, config);
await convertSdrToHdr(
entry.videoPath,
convertedPath,
entry.video.mediaStart,
segDuration,
signal,
config,
);
entry.videoPath = convertedPath;
// Segment-scoped re-encode starts the new file at t=0, so
// downstream phases (VFR preflight + Phase 3 extraction) must seek
// from 0, not the original mediaStart. Shallow-copy to avoid
// mutating the caller's VideoElement.
entry.video = { ...entry.video, mediaStart: 0 };
phaseBreakdown.hdrPreflightCount += 1;
} catch (err) {
errors.push({
Expand Down
Loading