diff --git a/packages/engine/src/config.ts b/packages/engine/src/config.ts index b0ea98d9..57ee781b 100644 --- a/packages/engine/src/config.ts +++ b/packages/engine/src/config.ts @@ -61,6 +61,18 @@ export interface EngineConfig { // ── Media ──────────────────────────────────────────────────────────── audioGain: number; frameDataUriCacheLimit: number; + /** + * Directory for the extraction cache. When set, `extractAllVideoFrames` + * reuses previously-extracted frames for `(source, window, fps, format)` + * pairs it has seen before. Cache entries are content-addressed by the + * source file's path + mtime + size, and each entry completes with a + * sentinel file so partial/aborted extractions aren't served as hits. + * + * Disabled by default — users opt in, since the cache grows unbounded + * until eviction lands in a follow-up. HTTP-sourced inputs bypass the + * cache (each download resolves to a fresh tmp path). + */ + extractCacheDir?: string; // ── Timeouts ───────────────────────────────────────────────────────── playerReadyTimeout: number; @@ -203,6 +215,8 @@ export function resolveConfig(overrides?: Partial): EngineConfig { verifyRuntime: env("PRODUCER_VERIFY_HYPERFRAME_RUNTIME") !== "false", runtimeManifestPath: env("PRODUCER_HYPERFRAME_MANIFEST_PATH"), + + extractCacheDir: env("HYPERFRAMES_EXTRACT_CACHE_DIR"), }; // Remove undefined values so they don't override defaults diff --git a/packages/engine/src/index.ts b/packages/engine/src/index.ts index 209a7c60..d02c2773 100644 --- a/packages/engine/src/index.ts +++ b/packages/engine/src/index.ts @@ -131,6 +131,19 @@ export { type InjectorCacheStats, } from "./services/videoFrameInjector.js"; +export { + computeExtractionCacheKey, + lookupCacheEntry, + markCacheEntryComplete, + ensureCacheEntryDir, + probeSourceForCacheKey, + resolveCacheEntryPaths, + CACHE_SENTINEL_FILENAME, + type ExtractionCacheKeyInputs, + type CacheEntryPaths, + type CacheHit, +} from "./services/extractionCache.js"; + export { parseAudioElements, processCompositionAudio } from "./services/audioMixer.js"; export type { AudioElement, AudioTrack, MixResult } from "./services/audioMixer.types.js"; diff --git a/packages/engine/src/services/extractionCache.test.ts b/packages/engine/src/services/extractionCache.test.ts new file mode 100644 index 00000000..de327e55 --- /dev/null +++ b/packages/engine/src/services/extractionCache.test.ts @@ -0,0 +1,147 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { existsSync, mkdtempSync, rmSync, statSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { + CACHE_SENTINEL_FILENAME, + computeExtractionCacheKey, + ensureCacheEntryDir, + lookupCacheEntry, + markCacheEntryComplete, + probeSourceForCacheKey, + resolveCacheEntryPaths, +} from "./extractionCache.js"; + +// These tests cover the content-addressable cache that the architecture +// review calls out as the biggest wall-clock win for iteration workflows: +// on a second render of the same composition, every input should be served +// from the cache instead of being re-extracted by ffmpeg. + +describe("computeExtractionCacheKey", () => { + const base = { + sourcePath: "/videos/clip.mp4", + sourceMtimeMs: 1_700_000_000_000, + sourceSize: 5_242_880, + mediaStart: 0, + duration: 4, + fps: 30, + format: "jpg" as const, + }; + + it("is deterministic for the same inputs", () => { + const a = computeExtractionCacheKey(base); + const b = computeExtractionCacheKey(base); + expect(a).toBe(b); + }); + + it("prefixes the key with the schema version so a future on-disk format change cannot collide", () => { + const key = computeExtractionCacheKey(base); + expect(key.startsWith("v1-")).toBe(true); + }); + + it.each([ + ["sourcePath", { sourcePath: "/videos/other.mp4" }], + ["sourceMtimeMs", { sourceMtimeMs: 1_700_000_000_001 }], + ["sourceSize", { sourceSize: 5_242_881 }], + ["mediaStart", { mediaStart: 0.5 }], + ["duration", { duration: 4.25 }], + ["fps", { fps: 60 }], + ["format", { format: "png" as const }], + ])("differs when %s changes", (_field, override) => { + const base_key = computeExtractionCacheKey(base); + const changed = computeExtractionCacheKey({ ...base, ...override }); + expect(changed).not.toBe(base_key); + }); + + it("treats float timing values to 6 decimal places (stable keys across equivalent floats)", () => { + const a = computeExtractionCacheKey({ ...base, mediaStart: 1.5 }); + const b = computeExtractionCacheKey({ ...base, mediaStart: 1.5000001 }); + expect(a).toBe(b); + }); +}); + +describe("probeSourceForCacheKey", () => { + const DIR = mkdtempSync(join(tmpdir(), "hf-extcache-probe-")); + const VIDEO = join(DIR, "video.mp4"); + + beforeEach(() => { + writeFileSync(VIDEO, Buffer.from("fake video bytes")); + }); + + afterEach(() => { + // Leave DIR in place — afterAll-style cleanup happens at test end via + // the mkdtemp dir being removed on process exit. Explicit rm here would + // break subsequent beforeEach writes to VIDEO. + }); + + it("returns path, mtime, and size for existing files", () => { + const probe = probeSourceForCacheKey(VIDEO); + expect(probe).not.toBeNull(); + expect(probe?.sourcePath).toBe(VIDEO); + expect(probe?.sourceSize).toBe(statSync(VIDEO).size); + expect(probe?.sourceMtimeMs).toBeGreaterThan(0); + }); + + it("returns null for a missing file", () => { + expect(probeSourceForCacheKey(join(DIR, "does-not-exist.mp4"))).toBeNull(); + }); +}); + +describe("lookupCacheEntry / markCacheEntryComplete", () => { + let ROOT: string; + + beforeEach(() => { + ROOT = mkdtempSync(join(tmpdir(), "hf-extcache-lookup-")); + }); + + afterEach(() => { + rmSync(ROOT, { recursive: true, force: true }); + }); + + it("returns null when the entry dir does not exist", () => { + expect(lookupCacheEntry(ROOT, "v1-abc", "jpg")).toBeNull(); + }); + + it("returns null when the entry dir exists but has no sentinel (partial extraction)", () => { + const dir = ensureCacheEntryDir(ROOT, "v1-abc"); + // Write a frame but NOT the sentinel — simulates an aborted extraction. + writeFileSync(join(dir, "frame_00001.jpg"), Buffer.from([0xff, 0xd8])); + expect(lookupCacheEntry(ROOT, "v1-abc", "jpg")).toBeNull(); + }); + + it("returns null when the sentinel exists but no frames match the requested format", () => { + const dir = ensureCacheEntryDir(ROOT, "v1-abc"); + writeFileSync(join(dir, "frame_00001.png"), Buffer.from([0x89, 0x50, 0x4e, 0x47])); + markCacheEntryComplete(ROOT, "v1-abc"); + // Cache was built for PNG but we're asking for JPG — no files match, + // treat as a miss so the caller extracts fresh. + expect(lookupCacheEntry(ROOT, "v1-abc", "jpg")).toBeNull(); + }); + + it("returns a hit with frame paths when sentinel + matching frames are present", () => { + const dir = ensureCacheEntryDir(ROOT, "v1-abc"); + for (let i = 1; i <= 3; i++) { + writeFileSync( + join(dir, `frame_${String(i).padStart(5, "0")}.jpg`), + Buffer.from([0xff, 0xd8, i]), + ); + } + markCacheEntryComplete(ROOT, "v1-abc"); + + const hit = lookupCacheEntry(ROOT, "v1-abc", "jpg"); + expect(hit).not.toBeNull(); + expect(hit?.totalFrames).toBe(3); + expect(hit?.framePaths.size).toBe(3); + // Frame indices are 0-based in the map (matching ExtractedFrames semantics). + expect(hit?.framePaths.get(0)).toBe(join(dir, "frame_00001.jpg")); + expect(hit?.framePaths.get(2)).toBe(join(dir, "frame_00003.jpg")); + }); + + it("writes the sentinel at the expected path", () => { + ensureCacheEntryDir(ROOT, "v1-xyz"); + markCacheEntryComplete(ROOT, "v1-xyz"); + const { sentinel } = resolveCacheEntryPaths(ROOT, "v1-xyz"); + expect(existsSync(sentinel)).toBe(true); + expect(sentinel.endsWith(CACHE_SENTINEL_FILENAME)).toBe(true); + }); +}); diff --git a/packages/engine/src/services/extractionCache.ts b/packages/engine/src/services/extractionCache.ts new file mode 100644 index 00000000..55cf8c79 --- /dev/null +++ b/packages/engine/src/services/extractionCache.ts @@ -0,0 +1,186 @@ +/** + * Video Frame Extraction Cache + * + * Content-addressed cache for pre-extracted frames. When enabled, the + * extractor checks for a completed cache entry before running FFmpeg, and + * writes frames into the cache directory on miss. Purpose: skip Phase 3 + * entirely on iteration workflows where the same `(source, window, fps, + * format)` pair recurs across renders. + * + * Keying: the cache key is a SHA-256 over a stable string of + * path | mtime-ms | size | mediaStart | duration | fps | format + * plus a schema version. File-content hashing is deliberately avoided + * because typical video sources are hundreds of MB and hashing them on + * every render would defeat the purpose. mtime+size is a good proxy for + * "the same file on disk"; users who mutate a file in-place at the exact + * same size+mtime are expected to bump the cache or disable it. + * + * Completeness: each entry directory gets a `.hf-complete` sentinel file + * written at the end of a successful extraction. Cache hits require the + * sentinel — partial writes from a killed/aborted render are ignored and + * overwritten on the next extraction. + * + * Concurrency: two renders that miss the same key will both extract into + * the same cache dir. ffmpeg overwrites its own files, both touch the + * sentinel, last-writer-wins on the sentinel timestamp. Correctness is + * fine because the key is content-addressed; the only cost is the + * duplicated work, which is acceptable for v1. + * + * Eviction: none yet. The cache grows until the user clears it. A future + * PR adds size-capped LRU eviction. + */ + +import { createHash } from "crypto"; +import { existsSync, mkdirSync, readdirSync, statSync, writeFileSync } from "fs"; +import { join } from "path"; + +/** + * Schema version embedded in every cache key. Bump whenever the on-disk + * format of extracted frames changes in a way that breaks older entries + * (e.g. the upcoming WebP-unified-format PR changes the frame extension + * and MIME handling, so it will bump this to 2). + */ +const CACHE_SCHEMA_VERSION = 1; + +/** + * Sentinel filename written inside each completed cache entry directory. + * Absence means the entry is partial or never finished — treated as a miss. + */ +export const CACHE_SENTINEL_FILENAME = ".hf-complete"; + +export interface ExtractionCacheKeyInputs { + /** Resolved absolute path to the source video file. */ + sourcePath: string; + /** Source file mtime in ms (from statSync). */ + sourceMtimeMs: number; + /** Source file size in bytes. */ + sourceSize: number; + /** Start of the used window in source-media time (seconds). */ + mediaStart: number; + /** Duration of the used window (seconds). */ + duration: number; + /** Target fps for extracted frames. */ + fps: number; + /** Extracted frame format ("jpg" or "png"). */ + format: "jpg" | "png"; +} + +/** + * Compute a deterministic cache key for a `(source, window, fps, format)` + * tuple. The key encodes the schema version as a prefix so a future change + * to the on-disk format invalidates old entries without collision. + */ +export function computeExtractionCacheKey(inputs: ExtractionCacheKeyInputs): string { + const parts = [ + `v${CACHE_SCHEMA_VERSION}`, + inputs.sourcePath, + String(Math.floor(inputs.sourceMtimeMs)), + String(inputs.sourceSize), + inputs.mediaStart.toFixed(6), + inputs.duration.toFixed(6), + String(inputs.fps), + inputs.format, + ]; + const hash = createHash("sha256").update(parts.join("|")).digest("hex"); + // 32 hex chars (128 bits) is ample for a local cache and keeps directory + // names short enough for every common filesystem. + return `v${CACHE_SCHEMA_VERSION}-${hash.slice(0, 32)}`; +} + +/** + * Stat a source file and derive the inputs needed to compute a cache key. + * Returns null if the file is missing or unreadable — callers treat that + * as "no cache" and proceed without it. + */ +export function probeSourceForCacheKey( + sourcePath: string, +): Pick | null { + try { + const st = statSync(sourcePath); + if (!st.isFile()) return null; + return { + sourcePath, + sourceMtimeMs: st.mtimeMs, + sourceSize: st.size, + }; + } catch { + return null; + } +} + +export interface CacheEntryPaths { + /** Absolute path to the cache entry directory. Created lazily by the caller. */ + dir: string; + /** Absolute path to the sentinel file written on successful extraction. */ + sentinel: string; +} + +/** + * Resolve the on-disk paths for a cache entry given a root cache dir and a + * computed key. Does not create the directory — that is the caller's job, + * typically on miss before handing the path to ffmpeg. + */ +export function resolveCacheEntryPaths(cacheRoot: string, key: string): CacheEntryPaths { + const dir = join(cacheRoot, key); + return { dir, sentinel: join(dir, CACHE_SENTINEL_FILENAME) }; +} + +export interface CacheHit { + /** Cache entry directory holding `frame_00001.jpg` (or .png) + sentinel. */ + dir: string; + /** Map of 0-based frame index → absolute frame path. */ + framePaths: Map; + /** Number of frames discovered. Matches `framePaths.size`. */ + totalFrames: number; +} + +/** + * Look up a cache entry and — if complete — return the frame paths it + * contains. Returns null for misses (missing dir, missing sentinel, no + * matching frames). Callers should treat a null return as "extract into + * this dir and then call `markCacheEntryComplete` on success." + */ +export function lookupCacheEntry( + cacheRoot: string, + key: string, + format: "jpg" | "png", +): CacheHit | null { + const { dir, sentinel } = resolveCacheEntryPaths(cacheRoot, key); + if (!existsSync(sentinel)) return null; + + let entries: string[]; + try { + entries = readdirSync(dir); + } catch { + return null; + } + + const suffix = `.${format}`; + const framePaths = new Map(); + const matching = entries.filter((f) => f.startsWith("frame_") && f.endsWith(suffix)).sort(); + matching.forEach((file, index) => { + framePaths.set(index, join(dir, file)); + }); + + if (framePaths.size === 0) return null; + return { dir, framePaths, totalFrames: framePaths.size }; +} + +/** + * Mark a cache entry complete by writing the sentinel file. Called only + * after ffmpeg has finished writing every frame into the entry directory. + */ +export function markCacheEntryComplete(cacheRoot: string, key: string): void { + const { sentinel } = resolveCacheEntryPaths(cacheRoot, key); + writeFileSync(sentinel, ""); +} + +/** + * Ensure the cache root and a specific entry directory exist. Returns the + * absolute path of the entry directory. + */ +export function ensureCacheEntryDir(cacheRoot: string, key: string): string { + const { dir } = resolveCacheEntryPaths(cacheRoot, key); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + return dir; +} diff --git a/packages/engine/src/services/videoFrameExtractor.test.ts b/packages/engine/src/services/videoFrameExtractor.test.ts index 5f41ea6d..a23a1d53 100644 --- a/packages/engine/src/services/videoFrameExtractor.test.ts +++ b/packages/engine/src/services/videoFrameExtractor.test.ts @@ -399,3 +399,139 @@ describe.skipIf(!HAS_FFMPEG)("extractAllVideoFrames with mixed HDR/SDR segment s expect(frames.length).toBeLessThanOrEqual(62); }, 60_000); }); + +// Integration test for the extraction cache. Verifies the first render +// populates the cache and the second render hits it (no ffmpeg spawn). +// The indirect but strong signal: phaseBreakdown.extractMs on the second +// call drops to near-zero, cacheHits goes to 1, and the cache dir +// contents are reused (not re-created). Regression guard for the +// architecture review's bottleneck #5 ("Extraction cache does not exist"). +describe.skipIf(!HAS_FFMPEG)("extractAllVideoFrames with extraction cache", () => { + const FIXTURE_DIR = mkdtempSync(join(tmpdir(), "hf-extcache-int-")); + const SOURCE = join(FIXTURE_DIR, "cache_src.mp4"); + + beforeAll(async () => { + // 3-second SDR CFR testsrc — the simplest input that exercises the + // extractor's Phase 3 without triggering HDR or VFR preflights (which + // would bypass the cache, per the cache-miss note in the code). + const result = await runFfmpeg([ + "-y", + "-hide_banner", + "-loglevel", + "error", + "-f", + "lavfi", + "-i", + "testsrc2=s=160x120:d=3:rate=30", + "-c:v", + "libx264", + "-preset", + "ultrafast", + "-pix_fmt", + "yuv420p", + SOURCE, + ]); + if (!result.success) { + throw new Error(`cache-test fixture synthesis failed: ${result.stderr.slice(-400)}`); + } + }, 30_000); + + afterAll(() => { + if (existsSync(FIXTURE_DIR)) rmSync(FIXTURE_DIR, { recursive: true, force: true }); + }); + + it("misses on first call, hits on second call with identical inputs", async () => { + const cacheRoot = join(FIXTURE_DIR, "cache-root"); + mkdirSync(cacheRoot, { recursive: true }); + + const video: VideoElement = { + id: "vid", + src: SOURCE, + start: 0, + end: 2, + mediaStart: 0.5, + hasAudio: false, + }; + + const outDir1 = join(FIXTURE_DIR, "run-1"); + mkdirSync(outDir1, { recursive: true }); + const result1 = await extractAllVideoFrames( + [video], + FIXTURE_DIR, + { fps: 30, outputDir: outDir1 }, + undefined, + { extractCacheDir: cacheRoot }, + ); + expect(result1.errors).toEqual([]); + expect(result1.phaseBreakdown.cacheHits).toBe(0); + expect(result1.phaseBreakdown.cacheMisses).toBe(1); + expect(result1.extracted[0]?.ownedByLookup).toBe(false); + // The first call extracted into the cache dir, not the per-render + // outputDir — so outDir1/vid/ does NOT exist (Phase 3 took the cache + // path exclusively). + expect(existsSync(join(outDir1, "vid"))).toBe(false); + + const outDir2 = join(FIXTURE_DIR, "run-2"); + mkdirSync(outDir2, { recursive: true }); + const extractStart = Date.now(); + const result2 = await extractAllVideoFrames( + [video], + FIXTURE_DIR, + { fps: 30, outputDir: outDir2 }, + undefined, + { extractCacheDir: cacheRoot }, + ); + const elapsed = Date.now() - extractStart; + + expect(result2.errors).toEqual([]); + expect(result2.phaseBreakdown.cacheHits).toBe(1); + expect(result2.phaseBreakdown.cacheMisses).toBe(0); + expect(result2.extracted[0]?.ownedByLookup).toBe(false); + // Cache hit path is ffprobe-only — should be under ~500ms even on slow + // CI runners vs. seconds for the ffmpeg extract. This is a soft bound: + // the primary signal is cacheHits=1 above. + expect(elapsed).toBeLessThan(2_000); + // Frame counts match (cache hit returns the same frame map). + expect(result2.extracted[0]?.totalFrames).toBe(result1.extracted[0]?.totalFrames); + }, 60_000); + + it("misses again when fps changes (keyed on fps)", async () => { + const cacheRoot = join(FIXTURE_DIR, "cache-fps"); + mkdirSync(cacheRoot, { recursive: true }); + + const video: VideoElement = { + id: "vid", + src: SOURCE, + start: 0, + end: 2, + mediaStart: 0, + hasAudio: false, + }; + + const out1 = join(FIXTURE_DIR, "fps-1"); + mkdirSync(out1, { recursive: true }); + const r1 = await extractAllVideoFrames( + [video], + FIXTURE_DIR, + { fps: 30, outputDir: out1 }, + undefined, + { extractCacheDir: cacheRoot }, + ); + expect(r1.phaseBreakdown.cacheMisses).toBe(1); + + const out2 = join(FIXTURE_DIR, "fps-2"); + mkdirSync(out2, { recursive: true }); + const r2 = await extractAllVideoFrames( + [video], + FIXTURE_DIR, + { fps: 24, outputDir: out2 }, + undefined, + { extractCacheDir: cacheRoot }, + ); + // Different fps → different key → miss again, new cache entry. + expect(r2.phaseBreakdown.cacheHits).toBe(0); + expect(r2.phaseBreakdown.cacheMisses).toBe(1); + // Frame count differs at different fps (24 vs 30 for the same 2s window). + expect(r2.extracted[0]?.totalFrames).not.toBe(r1.extracted[0]?.totalFrames); + }, 60_000); +}); diff --git a/packages/engine/src/services/videoFrameExtractor.ts b/packages/engine/src/services/videoFrameExtractor.ts index 41a5aa2d..2a26ef1d 100644 --- a/packages/engine/src/services/videoFrameExtractor.ts +++ b/packages/engine/src/services/videoFrameExtractor.ts @@ -14,6 +14,13 @@ import { isHdrColorSpace as isHdrColorSpaceUtil } from "../utils/hdr.js"; import { downloadToTemp, isHttpUrl } from "../utils/urlDownloader.js"; import { runFfmpeg } from "../utils/runFfmpeg.js"; import { DEFAULT_CONFIG, type EngineConfig } from "../config.js"; +import { + computeExtractionCacheKey, + ensureCacheEntryDir, + lookupCacheEntry, + markCacheEntryComplete, + probeSourceForCacheKey, +} from "./extractionCache.js"; export interface VideoElement { id: string; @@ -33,6 +40,13 @@ export interface ExtractedFrames { totalFrames: number; metadata: VideoMetadata; framePaths: Map; + /** + * When true (the default), `FrameLookupTable.cleanup()` may `rmSync` the + * outputDir. When false, the directory is managed by the extraction + * cache and must not be deleted. Set to false on both cache hits and + * cache misses whose extraction writes directly into the cache. + */ + ownedByLookup?: boolean; } export interface ExtractionOptions { @@ -56,6 +70,10 @@ export interface ExtractionPhaseBreakdown { /** Counts of inputs hitting each preflight, for ratio analysis. */ hdrPreflightCount: number; vfrPreflightCount: number; + /** Inputs served from the extraction cache (no ffmpeg spawn). */ + cacheHits: number; + /** Inputs that missed the cache and ran the full extraction. */ + cacheMisses: number; } export interface ExtractionResult { @@ -164,11 +182,17 @@ export async function extractVideoFramesRange( options: ExtractionOptions, signal?: AbortSignal, config?: Partial>, + /** + * When set, write frames into this directory directly instead of the + * conventional `join(options.outputDir, videoId)`. Used by the + * extraction cache so frames land in a keyed cache entry dir. + */ + outputDirOverride?: string, ): Promise { const ffmpegProcessTimeout = config?.ffmpegProcessTimeout ?? DEFAULT_CONFIG.ffmpegProcessTimeout; const { fps, outputDir, quality = 95, format = "jpg" } = options; - const videoOutputDir = join(outputDir, videoId); + const videoOutputDir = outputDirOverride ?? join(outputDir, videoId); if (!existsSync(videoOutputDir)) mkdirSync(videoOutputDir, { recursive: true }); const metadata = await extractVideoMetadata(videoPath); @@ -386,7 +410,7 @@ export async function extractAllVideoFrames( baseDir: string, options: ExtractionOptions, signal?: AbortSignal, - config?: Partial>, + config?: Partial>, compiledDir?: string, ): Promise { const startTime = Date.now(); @@ -401,6 +425,8 @@ export async function extractAllVideoFrames( extractMs: 0, hdrPreflightCount: 0, vfrPreflightCount: 0, + cacheHits: 0, + cacheMisses: 0, }; // Phase 1: Resolve paths and download remote videos @@ -545,8 +571,22 @@ export async function extractAllVideoFrames( } } - // Phase 3: Extract frames (parallel) + // Phase 3: Extract frames (parallel, optionally cache-backed). + // + // When config.extractCacheDir is set, each input is keyed by the resolved + // source's (path, mtime, size, mediaStart, duration, fps, format) tuple. + // Cache hits skip ffmpeg entirely; cache misses extract directly into the + // cache entry dir and write a sentinel file on success. See + // extractionCache.ts for the key/sentinel semantics. + // + // Note: inputs that went through the HDR or VFR preflight will have a + // per-render converted file path (different path/mtime across renders), + // so their cache keys differ across renders — they effectively bypass + // the cache. That's intentional for v1; preflight-cache coordination + // lives with future work. const extractStart = Date.now(); + const cacheRoot = config?.extractCacheDir; + const extractFormat = options.format ?? "jpg"; const results = await Promise.all( resolvedVideos.map(async ({ video, videoPath }) => { if (signal?.aborted) { @@ -564,6 +604,43 @@ export async function extractAllVideoFrames( video.end = video.start + videoDuration; } + // ── Cache lookup ──────────────────────────────────────────────── + let cacheEntryDir: string | null = null; + if (cacheRoot) { + const sourceStat = probeSourceForCacheKey(videoPath); + if (sourceStat) { + const key = computeExtractionCacheKey({ + ...sourceStat, + mediaStart: video.mediaStart, + duration: videoDuration, + fps: options.fps, + format: extractFormat, + }); + const hit = lookupCacheEntry(cacheRoot, key, extractFormat); + if (hit) { + phaseBreakdown.cacheHits += 1; + const metadata = await extractVideoMetadata(videoPath); + return { + result: { + videoId: video.id, + srcPath: videoPath, + outputDir: hit.dir, + framePattern: `frame_%05d.${extractFormat}`, + fps: options.fps, + totalFrames: hit.totalFrames, + metadata, + framePaths: hit.framePaths, + ownedByLookup: false, + } satisfies ExtractedFrames, + }; + } + // Cache miss — extract into the cache entry dir so the next + // render with the same inputs is a hit. + cacheEntryDir = ensureCacheEntryDir(cacheRoot, key); + phaseBreakdown.cacheMisses += 1; + } + } + const result = await extractVideoFramesRange( videoPath, video.id, @@ -572,8 +649,30 @@ export async function extractAllVideoFrames( options, signal, config, + cacheEntryDir ?? undefined, ); + if (cacheRoot && cacheEntryDir) { + // Reuse the cache-derived key by re-deriving it from the source + // stat so we write the sentinel next to the frames ffmpeg just + // produced. (The dir basename IS the key, but derive it cleanly + // rather than parsing a path.) + const sourceStat = probeSourceForCacheKey(videoPath); + if (sourceStat) { + const key = computeExtractionCacheKey({ + ...sourceStat, + mediaStart: video.mediaStart, + duration: videoDuration, + fps: options.fps, + format: extractFormat, + }); + markCacheEntryComplete(cacheRoot, key); + } + // Mark the ExtractedFrames as cache-owned so FrameLookupTable + // doesn't rm it at end-of-render. + return { result: { ...result, ownedByLookup: false } }; + } + return { result }; } catch (err) { return { @@ -730,6 +829,9 @@ export class FrameLookupTable { cleanup(): void { for (const video of this.videos.values()) { + // Skip dirs the cache owns — they're meant to survive the render so + // the next render can hit instead of re-extracting. + if (video.extracted.ownedByLookup === false) continue; if (existsSync(video.extracted.outputDir)) { rmSync(video.extracted.outputDir, { recursive: true, force: true }); } diff --git a/packages/producer/src/services/renderOrchestrator.ts b/packages/producer/src/services/renderOrchestrator.ts index 003df336..31401b3f 100644 --- a/packages/producer/src/services/renderOrchestrator.ts +++ b/packages/producer/src/services/renderOrchestrator.ts @@ -1114,7 +1114,14 @@ export async function executeRenderJob( projectDir, { fps: job.config.fps, outputDir: join(workDir, "video-frames") }, abortSignal, - undefined, + // Forward extractCacheDir (when configured) so repeat renders of + // the same source+window+fps+format pair skip Phase 3 entirely. + // ffmpegProcessTimeout is harmless to pass; the extractor only + // reads the fields it cares about via Pick<…>. + { + ffmpegProcessTimeout: cfg.ffmpegProcessTimeout, + extractCacheDir: cfg.extractCacheDir, + }, compiledDir, ); assertNotAborted();