diff --git a/CLAUDE.md b/CLAUDE.md index b65f7b2a0..e9fc57bc4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -45,6 +45,26 @@ When adding a new CLI command: 5. **Document it** in `docs/packages/cli.mdx` — add a section with usage examples and flags. 6. Validate by running `npx tsx packages/cli/src/cli.ts --help` (command appears in the list) and `npx tsx packages/cli/src/cli.ts --help` (examples appear). +### Regression Test Golden Baselines (producer) + +`packages/producer/tests//output/output.mp4` baselines MUST be generated +inside `Dockerfile.test`, not on your host. CI renders inside that Docker image +with a specific Chrome + ffmpeg build; pixel-level output drifts across +different host Chrome/ffmpeg versions and will fail PSNR at dozens of +checkpoints even when the code is correct. + +```bash +# Build the test image once: +docker build -t hyperframes-producer:test -f Dockerfile.test . + +# Generate or update a baseline (runs the harness with --update inside Docker): +bun run --cwd packages/producer docker:test:update +``` + +Never run `bun run --cwd packages/producer test:update` directly from the +host to capture a baseline that will be committed — the resulting output.mp4 +will not match CI. Use it only for local-only experimentation. + ## Skills Composition authoring (not repo development) is guided by skills installed via `npx skills add heygen-com/hyperframes`. See `skills/` for source. Invoke `/hyperframes`, `/hyperframes-cli`, `/hyperframes-registry`, or `/gsap` when authoring compositions. When a user provides a website URL and wants a video, invoke `/website-to-hyperframes` — it runs the full 7-step capture-to-video pipeline. diff --git a/packages/engine/src/services/videoFrameExtractor.test.ts b/packages/engine/src/services/videoFrameExtractor.test.ts index 5415f1fda..d41ac47fa 100644 --- a/packages/engine/src/services/videoFrameExtractor.test.ts +++ b/packages/engine/src/services/videoFrameExtractor.test.ts @@ -1,5 +1,25 @@ -import { describe, expect, it } from "vitest"; -import { parseVideoElements, parseImageElements } from "./videoFrameExtractor.js"; +import { afterAll, beforeAll, describe, expect, it } from "vitest"; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync } from "node:fs"; +import { createHash } from "node:crypto"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { spawnSync } from "node:child_process"; +import { + parseVideoElements, + parseImageElements, + extractAllVideoFrames, + type VideoElement, +} from "./videoFrameExtractor.js"; +import { extractVideoMetadata } from "../utils/ffprobe.js"; +import { runFfmpeg } from "../utils/runFfmpeg.js"; + +// ffmpeg is not preinstalled on GitHub's ubuntu-24.04 runners. The producer +// regression test at packages/producer/tests/vfr-screen-recording/ runs inside +// Dockerfile.test (which does include ffmpeg) and is the primary CI signal +// for this bug. Locally and in any CI job with ffmpeg on PATH, the tests +// below run too — they exercise the extractor in isolation against a +// synthesized VFR fixture. +const HAS_FFMPEG = spawnSync("ffmpeg", ["-version"]).status === 0; describe("parseVideoElements", () => { it("parses videos without an id or data-start attribute", () => { @@ -81,3 +101,139 @@ describe("parseImageElements", () => { expect(images[0]!.end).toBe(5); }); }); + +// Regression test for the VFR (variable frame rate) freeze bug. +// Screen recordings and phone videos often have irregular timestamps. +// When such inputs hit `extractVideoFramesRange`'s `-ss -i ... -t +// -vf fps=N` pipeline, the fps filter can emit fewer frames than requested — +// e.g. a 4-second segment at 30fps would produce ~90 frames instead of 120. +// FrameLookupTable.getFrameAtTime then returns null for out-of-range indices +// and the compositor holds the last valid frame, which the user perceives as +// the video freezing. extractAllVideoFrames normalizes VFR sources to CFR +// before extraction to fix this. +describe.skipIf(!HAS_FFMPEG)("extractAllVideoFrames on a VFR source", () => { + const FIXTURE_DIR = mkdtempSync(join(tmpdir(), "hf-vfr-test-")); + const VFR_FIXTURE = join(FIXTURE_DIR, "vfr_screen.mp4"); + + beforeAll(async () => { + // 10s testsrc2 at 60fps, ~40% of frames dropped via select filter and + // encoded with -vsync vfr so timestamps are irregular. Declared fps 60, + // actual average ~36 — well over the 10% threshold used by isVFR. + // The select expression drops four 1-second windows (frames 30-89, + // 180-239, 330-389, 480-539) to simulate static segments in a screen + // recording where no pixels changed. + // -g/-keyint_min 600 forces a single keyframe so mid-segment seeks in the + // mediaStart=3 test don't snap to an intermediate IDR and drift the count. + const result = await runFfmpeg([ + "-y", + "-hide_banner", + "-loglevel", + "error", + "-f", + "lavfi", + "-i", + "testsrc2=s=320x180:d=10:rate=60", + "-vf", + "drawtext=text='n=%{n}':fontsize=24:fontcolor=white:x=10:y=10:box=1:boxcolor=black@0.6," + + "select='not(between(n,30,89))*not(between(n,180,239))*not(between(n,330,389))*not(between(n,480,539))'", + "-vsync", + "vfr", + "-c:v", + "libx264", + "-preset", + "ultrafast", + "-pix_fmt", + "yuv420p", + "-g", + "600", + "-keyint_min", + "600", + VFR_FIXTURE, + ]); + if (!result.success) { + throw new Error( + `ffmpeg fixture synthesis failed (${result.exitCode}): ${result.stderr.slice(-400)}`, + ); + } + }, 30_000); + + afterAll(() => { + if (existsSync(FIXTURE_DIR)) rmSync(FIXTURE_DIR, { recursive: true, force: true }); + }); + + it("detects the synthesized fixture as VFR", async () => { + const md = await extractVideoMetadata(VFR_FIXTURE); + expect(md.isVFR).toBe(true); + }); + + it("produces the expected frame count for a mid-file segment", async () => { + const outputDir = join(FIXTURE_DIR, "out-mid-segment"); + mkdirSync(outputDir, { recursive: true }); + + const video: VideoElement = { + id: "v1", + src: VFR_FIXTURE, + start: 0, + end: 4, + mediaStart: 3, + hasAudio: false, + }; + + const result = await extractAllVideoFrames([video], FIXTURE_DIR, { + fps: 30, + outputDir, + }); + + expect(result.errors).toEqual([]); + expect(result.extracted).toHaveLength(1); + const frames = readdirSync(join(outputDir, "v1")).filter((f) => f.endsWith(".jpg")); + // Pre-fix behavior produced ~90 frames (a 25% shortfall). + expect(frames.length).toBeGreaterThanOrEqual(119); + expect(frames.length).toBeLessThanOrEqual(121); + }, 60_000); + + // Asserts both frame-count correctness and that we don't emit long runs of + // byte-identical "duplicate" frames — the user-visible "frozen screen + // recording" symptom. Pre-fix duplicate rate on this fixture is ~38% + // (116/300); on the actual reporter's ScreenCaptureKit clip, 18–44% across + // segments. <10% threshold leaves margin across ffmpeg versions without + // letting a regression slip through. + it("produces the full frame count and no duplicate-frame runs on the full VFR file", async () => { + const outputDir = join(FIXTURE_DIR, "out-full"); + mkdirSync(outputDir, { recursive: true }); + + const video: VideoElement = { + id: "vfull", + src: VFR_FIXTURE, + start: 0, + end: 10, + mediaStart: 0, + hasAudio: false, + }; + + const result = await extractAllVideoFrames([video], FIXTURE_DIR, { + fps: 30, + outputDir, + }); + expect(result.errors).toEqual([]); + + const frameDir = join(outputDir, "vfull"); + const frames = readdirSync(frameDir) + .filter((f) => f.endsWith(".jpg")) + .sort(); + expect(frames.length).toBeGreaterThanOrEqual(299); + expect(frames.length).toBeLessThanOrEqual(301); + + let prevHash: string | null = null; + let duplicates = 0; + for (const f of frames) { + const hash = createHash("sha256") + .update(readFileSync(join(frameDir, f))) + .digest("hex"); + if (hash === prevHash) duplicates += 1; + prevHash = hash; + } + const duplicateRate = duplicates / frames.length; + expect(duplicateRate).toBeLessThan(0.1); + }, 60_000); +}); diff --git a/packages/engine/src/services/videoFrameExtractor.ts b/packages/engine/src/services/videoFrameExtractor.ts index b59d7112d..a78ac6408 100644 --- a/packages/engine/src/services/videoFrameExtractor.ts +++ b/packages/engine/src/services/videoFrameExtractor.ts @@ -296,6 +296,62 @@ async function convertSdrToHdr( } } +/** + * Re-encode a VFR (variable frame rate) video segment to CFR so the downstream + * fps filter can extract frames reliably. Screen recordings, phone videos, and + * some webcams emit irregular timestamps that cause two failure modes: + * 1. Output has fewer frames than expected (e.g. -ss 3 -t 4 produces 90 + * frames instead of 120 @ 30fps). FrameLookupTable.getFrameAtTime then + * returns null for late timestamps and the caller freezes on the last + * valid frame. + * 2. Large duplicate-frame runs where source PTS don't land on target + * timestamps. + * + * Only the [startTime, startTime+duration] window is re-encoded, so long + * recordings aren't fully transcoded when only a short clip is used. + */ +async function convertVfrToCfr( + inputPath: string, + outputPath: string, + targetFps: number, + startTime: number, + duration: number, + signal?: AbortSignal, + config?: Partial>, +): Promise { + const timeout = config?.ffmpegProcessTimeout ?? DEFAULT_CONFIG.ffmpegProcessTimeout; + + const args = [ + "-ss", + String(startTime), + "-i", + inputPath, + "-t", + String(duration), + "-fps_mode", + "cfr", + "-r", + String(targetFps), + "-c:v", + "libx264", + "-preset", + "fast", + "-crf", + "18", + "-c:a", + "copy", + "-y", + outputPath, + ]; + + const result = await runFfmpeg(args, { signal, timeout }); + if (!result.success) { + throw new Error( + `VFR→CFR conversion failed (exit ${result.exitCode}): ${result.stderr.slice(-300)}`, + ); + } +} + export async function extractAllVideoFrames( videos: VideoElement[], baseDir: string, @@ -371,6 +427,47 @@ export async function extractAllVideoFrames( } } + // Phase 2b: Re-encode VFR inputs to CFR so the fps filter in Phase 3 produces + // the expected frame count. Only the used segment is transcoded. + const vfrNormDir = join(options.outputDir, "_vfr_normalized"); + for (let i = 0; i < resolvedVideos.length; i++) { + if (signal?.aborted) break; + const entry = resolvedVideos[i]; + if (!entry) continue; + const metadata = await extractVideoMetadata(entry.videoPath); + if (!metadata.isVFR) continue; + + let segDuration = entry.video.end - entry.video.start; + if (!Number.isFinite(segDuration) || segDuration <= 0) { + const sourceRemaining = metadata.durationSeconds - entry.video.mediaStart; + segDuration = sourceRemaining > 0 ? sourceRemaining : metadata.durationSeconds; + } + + mkdirSync(vfrNormDir, { recursive: true }); + const normalizedPath = join(vfrNormDir, `${entry.video.id}_cfr.mp4`); + try { + await convertVfrToCfr( + entry.videoPath, + normalizedPath, + options.fps, + entry.video.mediaStart, + segDuration, + signal, + config, + ); + entry.videoPath = normalizedPath; + // Segment-scoped re-encode starts the new file at t=0, so downstream + // extraction must seek from 0, not the original mediaStart. Shallow-copy + // to avoid mutating the caller's VideoElement. + entry.video = { ...entry.video, mediaStart: 0 }; + } catch (err) { + errors.push({ + videoId: entry.video.id, + error: err instanceof Error ? err.message : String(err), + }); + } + } + // Phase 3: Extract frames (parallel) const results = await Promise.all( resolvedVideos.map(async ({ video, videoPath }) => { diff --git a/packages/producer/src/services/htmlCompiler.ts b/packages/producer/src/services/htmlCompiler.ts index 1cf41cc3d..0750814d1 100644 --- a/packages/producer/src/services/htmlCompiler.ts +++ b/packages/producer/src/services/htmlCompiler.ts @@ -1028,9 +1028,10 @@ export async function compileForRender( ); } if (metadata.isVFR) { - console.warn( - `[Compiler] WARNING: Video "${video.id}" is variable frame rate (VFR). ` + - `Screen recordings and phone videos are often VFR, which causes stuttering and frame skipping in renders. Re-encode with: ${reencode}`, + console.info( + `[Compiler] Video "${video.id}" is variable frame rate (VFR); ` + + `the engine will normalize it to CFR before frame extraction. ` + + `If rendering feels slow on this video, pre-encode once with: ${reencode}`, ); } }) diff --git a/packages/producer/tests/vfr-screen-recording/NOTICE.md b/packages/producer/tests/vfr-screen-recording/NOTICE.md new file mode 100644 index 000000000..6c2635f73 --- /dev/null +++ b/packages/producer/tests/vfr-screen-recording/NOTICE.md @@ -0,0 +1,22 @@ +# Source attribution + +`src/clip.mp4` is a 5-second excerpt from a macOS ScreenCaptureKit (ReplayKit) +recording, used here as a regression fixture for the VFR (variable-frame-rate) +freeze bug fixed in PR #360. + +- **Original duration**: 21s, recorded via `ReplayKitRecording` (the + `com.apple.quicktime.author` QuickTime tag identifies this). +- **Excerpt**: 16s–21s of the original, downscaled from 2746×1902 to 480×332, + re-encoded with `ffmpeg -fps_mode passthrough -c:v libx264 -preset slow + -crf 28 -an` to preserve the original VFR timestamps. +- **Recorded content**: the public `heygen-com/hyperframes` GitHub repo root + page. No private, proprietary, or user-identifying content. + +## Properties preserved from the original + +- `r_frame_rate`: 120/1 +- `avg_frame_rate`: ~36.1fps (21720/601) +- `isVFR`: true (70% delta vs `r_frame_rate`, well over the 10% threshold in + `ffprobe.ts`) +- Pre-fix duplicate-frame rate: ~34% on a mid-file 3s segment extracted at + 30fps — matches the 18–44% observed across segments of the full recording. diff --git a/packages/producer/tests/vfr-screen-recording/meta.json b/packages/producer/tests/vfr-screen-recording/meta.json new file mode 100644 index 000000000..643dc2e41 --- /dev/null +++ b/packages/producer/tests/vfr-screen-recording/meta.json @@ -0,0 +1,13 @@ +{ + "name": "vfr-screen-recording", + "description": "Regression test for the VFR (variable-frame-rate) screen-recording freeze bug (PR #360). Renders a 3-second composition with a macOS ScreenCaptureKit clip (r_frame_rate=120, avg≈36fps) seeked to mediaStart=1. Pre-fix, the fps filter emitted long runs of duplicate frames that the compositor held as a frozen image; post-fix, VFR→CFR normalization keeps frame-accurate timing.", + "tags": ["regression", "video", "vfr"], + "minPsnr": 28, + "maxFrameFailures": 2, + "minAudioCorrelation": 0, + "maxAudioLagWindows": 1, + "renderConfig": { + "fps": 30, + "workers": 1 + } +} diff --git a/packages/producer/tests/vfr-screen-recording/output/compiled.html b/packages/producer/tests/vfr-screen-recording/output/compiled.html new file mode 100644 index 000000000..5d2dcc0c4 --- /dev/null +++ b/packages/producer/tests/vfr-screen-recording/output/compiled.html @@ -0,0 +1,72 @@ + + + + + VFR Screen Recording Regression + + + +
+ + +
VFR
+
+ + + diff --git a/packages/producer/tests/vfr-screen-recording/output/output.mp4 b/packages/producer/tests/vfr-screen-recording/output/output.mp4 new file mode 100644 index 000000000..4e8f676d0 --- /dev/null +++ b/packages/producer/tests/vfr-screen-recording/output/output.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aec7945d08be6b3c69464d9cf89445c44d52c9e0f89bad367592268964b2b22 +size 472149 diff --git a/packages/producer/tests/vfr-screen-recording/src/clip.mp4 b/packages/producer/tests/vfr-screen-recording/src/clip.mp4 new file mode 100644 index 000000000..091ae23d3 --- /dev/null +++ b/packages/producer/tests/vfr-screen-recording/src/clip.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:602459ff96bcd7cd4bf306935b857676434bb65b2a1e395d30ce19a225bbd359 +size 108396 diff --git a/packages/producer/tests/vfr-screen-recording/src/index.html b/packages/producer/tests/vfr-screen-recording/src/index.html new file mode 100644 index 000000000..ea409fdcd --- /dev/null +++ b/packages/producer/tests/vfr-screen-recording/src/index.html @@ -0,0 +1,69 @@ + + + + + VFR Screen Recording Regression + + + +
+ + +
VFR
+
+ + +