diff --git a/packages/engine/src/config.ts b/packages/engine/src/config.ts index 57ee781b3..e15cc142d 100644 --- a/packages/engine/src/config.ts +++ b/packages/engine/src/config.ts @@ -52,6 +52,28 @@ export interface EngineConfig { /** Timeout for FFmpeg streaming encode (ms). Default: 600_000 */ ffmpegStreamingTimeout: number; + // ── FFmpeg hardware acceleration ───────────────────────────────────── + /** + * Enable `-hwaccel auto` on Phase 3 extraction for SDR, non-alpha + * sources that exceed `hwaccelMinDurationSeconds`. Default: true. + * + * Gating is enforced inside the extractor: + * - HDR sources stay on software decode (on macOS they already use + * VideoToolbox via the existing HDR path — generic hwaccel could + * conflict). + * - Alpha-bearing pixel formats stay on software decode — hwaccel + * decoders generally collapse the alpha plane. + * - Short segments skip hwaccel because decoder init cost typically + * wipes out any decode speedup. + */ + hwaccelSdrDecode: boolean; + /** + * Minimum segment duration (seconds) before `-hwaccel auto` is + * enabled. Default: 2.0. Tune down on platforms where decoder init + * is cheap, or up when profiling shows no win on short clips. + */ + hwaccelMinDurationSeconds: number; + // ── HDR ────────────────────────────────────────────────────────────── /** HDR output transfer function. false = SDR output (default). */ hdr: { transfer: "hlg" | "pq" } | false; @@ -114,6 +136,9 @@ export const DEFAULT_CONFIG: EngineConfig = { ffmpegProcessTimeout: 300_000, ffmpegStreamingTimeout: 600_000, + hwaccelSdrDecode: true, + hwaccelMinDurationSeconds: 2.0, + hdr: false, hdrAutoDetect: true, @@ -191,6 +216,12 @@ export function resolveConfig(overrides?: Partial): EngineConfig { DEFAULT_CONFIG.ffmpegStreamingTimeout, ), + hwaccelSdrDecode: envBool("PRODUCER_HWACCEL_SDR_DECODE", DEFAULT_CONFIG.hwaccelSdrDecode), + hwaccelMinDurationSeconds: envNum( + "PRODUCER_HWACCEL_MIN_DURATION_SECONDS", + DEFAULT_CONFIG.hwaccelMinDurationSeconds, + ), + hdr: (() => { const raw = env("PRODUCER_HDR_TRANSFER"); if (raw === "hlg" || raw === "pq") return { transfer: raw }; diff --git a/packages/engine/src/index.ts b/packages/engine/src/index.ts index d02c27737..7efc75b67 100644 --- a/packages/engine/src/index.ts +++ b/packages/engine/src/index.ts @@ -117,6 +117,7 @@ export { getFrameAtTime, createFrameLookupTable, FrameLookupTable, + shouldEnableHwaccelSdr, type VideoElement, type ImageElement, type ExtractedFrames, @@ -173,6 +174,7 @@ export { extractVideoMetadata, extractAudioMetadata, analyzeKeyframeIntervals, + pixelFormatHasAlpha, type VideoMetadata, type AudioMetadata, type KeyframeAnalysis, diff --git a/packages/engine/src/services/videoFrameExtractor.test.ts b/packages/engine/src/services/videoFrameExtractor.test.ts index 9a8133aab..5cba67f2a 100644 --- a/packages/engine/src/services/videoFrameExtractor.test.ts +++ b/packages/engine/src/services/videoFrameExtractor.test.ts @@ -8,6 +8,7 @@ import { parseVideoElements, parseImageElements, extractAllVideoFrames, + shouldEnableHwaccelSdr, type VideoElement, } from "./videoFrameExtractor.js"; import { extractVideoMetadata } from "../utils/ffprobe.js"; @@ -21,6 +22,94 @@ import { runFfmpeg } from "../utils/runFfmpeg.js"; // synthesized VFR fixture. const HAS_FFMPEG = spawnSync("ffmpeg", ["-version"]).status === 0; +// Gating logic that controls whether -hwaccel auto gets added to the +// Phase 3 ffmpeg args. The architecture review explicitly cautions +// against a blanket default; these cases are the fence posts it names. +describe("shouldEnableHwaccelSdr", () => { + const defaults = { hwaccelSdrDecode: true, hwaccelMinDurationSeconds: 2.0 }; + + it("enables hwaccel for a long opaque SDR input", () => { + expect( + shouldEnableHwaccelSdr({ + isHdr: false, + hasAlpha: false, + durationSeconds: 30, + config: defaults, + }), + ).toBe(true); + }); + + it("disables hwaccel when the source has an alpha plane", () => { + // Hardware decoders silently drop alpha — this guard is the whole + // reason PR 5 doesn't land as a blanket default. + expect( + shouldEnableHwaccelSdr({ + isHdr: false, + hasAlpha: true, + durationSeconds: 30, + config: defaults, + }), + ).toBe(false); + }); + + it("disables hwaccel for HDR sources (HDR path has its own VideoToolbox handling)", () => { + expect( + shouldEnableHwaccelSdr({ + isHdr: true, + hasAlpha: false, + durationSeconds: 30, + config: defaults, + }), + ).toBe(false); + }); + + it("disables hwaccel when segment duration is below the floor", () => { + // Init cost of a hwaccel context often wipes out any decode speedup + // on sub-2-second segments. The floor is tunable per platform. + expect( + shouldEnableHwaccelSdr({ + isHdr: false, + hasAlpha: false, + durationSeconds: 1.5, + config: defaults, + }), + ).toBe(false); + }); + + it("respects the hwaccelSdrDecode master switch", () => { + expect( + shouldEnableHwaccelSdr({ + isHdr: false, + hasAlpha: false, + durationSeconds: 30, + config: { hwaccelSdrDecode: false, hwaccelMinDurationSeconds: 2.0 }, + }), + ).toBe(false); + }); + + it("honors a lowered duration floor (platforms where init cost is negligible)", () => { + expect( + shouldEnableHwaccelSdr({ + isHdr: false, + hasAlpha: false, + durationSeconds: 0.5, + config: { hwaccelSdrDecode: true, hwaccelMinDurationSeconds: 0.25 }, + }), + ).toBe(true); + }); + + it("enables right at the duration floor (inclusive boundary)", () => { + expect( + shouldEnableHwaccelSdr({ + isHdr: false, + hasAlpha: false, + durationSeconds: 2.0, + config: defaults, + }), + ).toBe(true); + }); +}); + describe("parseVideoElements", () => { it("parses videos without an id or data-start attribute", () => { const videos = parseVideoElements(''); diff --git a/packages/engine/src/services/videoFrameExtractor.ts b/packages/engine/src/services/videoFrameExtractor.ts index be1226bfd..6a8776e5d 100644 --- a/packages/engine/src/services/videoFrameExtractor.ts +++ b/packages/engine/src/services/videoFrameExtractor.ts @@ -184,6 +184,40 @@ export function parseImageElements(html: string): ImageElement[] { return images; } +/** + * Decide whether to add `-hwaccel auto` to the Phase 3 ffmpeg args for a + * given input. Gated on three independent conditions, all of which must + * be met: + * + * 1. `hwaccelSdrDecode` is enabled in config (default true). + * 2. Source is SDR. HDR already takes a macOS-specific VideoToolbox + * path; generic hwaccel would conflict, and Linux HDR handling needs + * the filter graph that hwaccel bypasses. + * 3. Source pixel format has no alpha. Hardware decoders generally + * collapse the alpha plane — a silent correctness regression for + * alpha-bearing WebM / ProRes 4444 sources. + * 4. Segment duration ≥ `hwaccelMinDurationSeconds` (default 2). Short + * clips don't amortize the decoder init cost — measured and called + * out in the architecture review. + * + * Exported for unit testing; the extractor inlines this decision into + * its ffmpeg-args construction. External callers who want to know + * whether the extractor will pick up hwaccel for a given input can use + * this directly. + */ +export function shouldEnableHwaccelSdr(input: { + isHdr: boolean; + hasAlpha: boolean; + durationSeconds: number; + config: Pick; +}): boolean { + if (!input.config.hwaccelSdrDecode) return false; + if (input.isHdr) return false; + if (input.hasAlpha) return false; + if (input.durationSeconds < input.config.hwaccelMinDurationSeconds) return false; + return true; +} + export async function extractVideoFramesRange( videoPath: string, videoId: string, @@ -191,7 +225,9 @@ export async function extractVideoFramesRange( duration: number, options: ExtractionOptions, signal?: AbortSignal, - config?: Partial>, + config?: Partial< + Pick + >, /** * When set, write frames into this directory directly instead of the * conventional `join(options.outputDir, videoId)`. Used by the @@ -216,9 +252,28 @@ export async function extractVideoFramesRange( const isHdr = isHdrColorSpaceUtil(metadata.colorSpace); const isMacOS = process.platform === "darwin"; + const hwaccelEnabled = shouldEnableHwaccelSdr({ + isHdr, + hasAlpha: metadata.hasAlpha, + durationSeconds: duration, + config: { + hwaccelSdrDecode: config?.hwaccelSdrDecode ?? DEFAULT_CONFIG.hwaccelSdrDecode, + hwaccelMinDurationSeconds: + config?.hwaccelMinDurationSeconds ?? DEFAULT_CONFIG.hwaccelMinDurationSeconds, + }, + }); + const args: string[] = []; if (isHdr && isMacOS) { + // HDR path keeps its existing VideoToolbox-on-macOS handling — this + // branch is exclusive with the generic hwaccel below. args.push("-hwaccel", "videotoolbox"); + } else if (hwaccelEnabled) { + // `-hwaccel auto` lets ffmpeg pick the best available accelerator + // (VideoToolbox on macOS, VAAPI on Linux, NVDEC on CUDA-equipped + // hosts, etc.). If none is available ffmpeg silently falls back to + // software decode — safe as a production default with gating above. + args.push("-hwaccel", "auto"); } args.push("-ss", String(startTime), "-i", videoPath, "-t", String(duration)); @@ -427,7 +482,12 @@ export async function extractAllVideoFrames( baseDir: string, options: ExtractionOptions, signal?: AbortSignal, - config?: Partial>, + config?: Partial< + Pick< + EngineConfig, + "ffmpegProcessTimeout" | "extractCacheDir" | "hwaccelSdrDecode" | "hwaccelMinDurationSeconds" + > + >, compiledDir?: string, ): Promise { const startTime = Date.now(); diff --git a/packages/engine/src/utils/ffprobe.test.ts b/packages/engine/src/utils/ffprobe.test.ts index 20ebd5887..05551cecb 100644 --- a/packages/engine/src/utils/ffprobe.test.ts +++ b/packages/engine/src/utils/ffprobe.test.ts @@ -1,7 +1,11 @@ import { readFileSync } from "fs"; import { resolve } from "path"; import { describe, expect, it } from "vitest"; -import { extractPngMetadataFromBuffer, extractVideoMetadata } from "./ffprobe.js"; +import { + extractPngMetadataFromBuffer, + extractVideoMetadata, + pixelFormatHasAlpha, +} from "./ffprobe.js"; function crc32(buf: Buffer): number { let crc = 0xffffffff; @@ -107,3 +111,45 @@ describe("extractPngMetadataFromBuffer", () => { expect(extractPngMetadataFromBuffer(fixture)?.colorSpace?.colorTransfer).toBe("smpte2084"); }); }); + +// Drives the hwaccel gating in the extractor — a misclassification here +// would either strip alpha silently (false negatives) or disable a safe +// optimization (false positives). Covers the common alpha pix_fmts plus +// the 10/12-bit yuva variants ProRes 4444 and WebM-alpha emit. +describe("pixelFormatHasAlpha", () => { + it.each([ + "yuva420p", + "yuva422p", + "yuva444p", + "yuva444p10le", + "yuva444p12le", + "rgba", + "bgra", + "argb", + "abgr", + "rgba64le", + "rgba64be", + ])("returns true for alpha-bearing pix_fmt %s", (pf) => { + expect(pixelFormatHasAlpha(pf)).toBe(true); + }); + + it.each([ + "yuv420p", + "yuv422p", + "yuv444p", + "yuv444p10le", + "gbrp", + "nv12", + "rgb24", + "bgr24", + "rgb48le", + "", + ])("returns false for opaque pix_fmt %s", (pf) => { + expect(pixelFormatHasAlpha(pf)).toBe(false); + }); + + it("is case-insensitive", () => { + expect(pixelFormatHasAlpha("YUVA420P")).toBe(true); + expect(pixelFormatHasAlpha("Rgba")).toBe(true); + }); +}); diff --git a/packages/engine/src/utils/ffprobe.ts b/packages/engine/src/utils/ffprobe.ts index d72befc35..62a840ecb 100644 --- a/packages/engine/src/utils/ffprobe.ts +++ b/packages/engine/src/utils/ffprobe.ts @@ -64,6 +64,14 @@ export interface VideoMetadata { isVFR: boolean; /** Color space info from the video stream. Null if ffprobe didn't report it. */ colorSpace: VideoColorSpace | null; + /** Pixel format reported by ffprobe (`pix_fmt`), e.g. "yuv420p", "yuva420p". Empty string when unknown. */ + pixelFormat: string; + /** + * True when the pixel format carries an alpha plane (e.g. yuva420p, + * rgba, bgra). Consumers use this to gate decisions that would lose + * alpha — notably hardware decoders, which generally strip it. + */ + hasAlpha: boolean; } export interface AudioMetadata { @@ -86,6 +94,35 @@ interface FFProbeStream { color_transfer?: string; color_primaries?: string; color_space?: string; + pix_fmt?: string; +} + +/** + * Determine whether an ffprobe `pix_fmt` value carries an alpha plane. + * Coverage: `yuva*` (planar YUV with alpha, including 10/12-bit variants + * like yuva444p10le), `rgba`/`bgra`/`argb`/`abgr` (packed 8-bit RGBA), + * and 16-bit-per-channel RGBA variants like `rgba64le`. Returns false + * for anything else including `yuv420p`, `yuv444p`, `gbrp`, `nv12`. + * + * Exported for unit testing and for external callers that derive their + * own pixel formats (e.g. Puppeteer capture paths). + */ +export function pixelFormatHasAlpha(pixFmt: string): boolean { + if (!pixFmt) return false; + const lower = pixFmt.toLowerCase(); + if (lower.startsWith("yuva")) return true; + // 8-bit packed RGBA variants — exact match not substring because "bgr" + // (no alpha) is a prefix of "bgra". + if (lower === "rgba" || lower === "bgra" || lower === "argb" || lower === "abgr") return true; + // 16-bit packed RGBA variants. + if ( + lower.startsWith("rgba64") || + lower.startsWith("bgra64") || + lower.startsWith("argb64") || + lower.startsWith("abgr64") + ) + return true; + return false; } interface FFProbeFormat { @@ -245,6 +282,8 @@ export async function extractVideoMetadata(filePath: string): Promise s.codec_type === "audio") ?? false, isVFR, colorSpace, + pixelFormat, + hasAlpha: pixelFormatHasAlpha(pixelFormat), }; })();