Skip to content
Merged
5 changes: 5 additions & 0 deletions .changeset/wet-pugs-sip.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/track-processors": minor
---

Use webGL for video processors
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"src"
],
"dependencies": {
"@mediapipe/tasks-vision": "0.10.21"
"@mediapipe/tasks-vision": "^0.10.22-rc.20250304"
},
"peerDependencies": {
"livekit-client": "^1.12.0 || ^2.1.0"
Expand Down
10 changes: 5 additions & 5 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion src/ProcessorWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ export default class ProcessorWrapper<TransformerOptions extends Record<string,
.pipeTo(this.trackGenerator.writable)
.catch((e) => console.error('error when trying to pipe', e))
.finally(() => this.destroy());

this.processedTrack = this.trackGenerator as MediaStreamVideoTrack;
}

Expand All @@ -96,7 +97,8 @@ export default class ProcessorWrapper<TransformerOptions extends Record<string,
}

async destroy() {
await this.transformer.destroy();
await this.processor?.writableControl?.close();
this.trackGenerator?.stop();
await this.transformer.destroy();
}
}
153 changes: 41 additions & 112 deletions src/transformers/BackgroundTransformer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ export default class BackgroundProcessor extends VideoTransformer<BackgroundOpti

backgroundImage: ImageBitmap | null = null;

blurRadius?: number;

options: BackgroundOptions;

constructor(opts: BackgroundOptions) {
Expand All @@ -44,6 +42,8 @@ export default class BackgroundProcessor extends VideoTransformer<BackgroundOpti
}

async init({ outputCanvas, inputElement: inputVideo }: VideoTransformerInitOptions) {
// Initialize WebGL with appropriate options based on our current state

await super.init({ outputCanvas, inputElement: inputVideo });

const fileSet = await vision.FilesetResolver.forVisionTasks(
Expand All @@ -59,6 +59,7 @@ export default class BackgroundProcessor extends VideoTransformer<BackgroundOpti
delegate: 'GPU',
...this.options.segmenterOptions,
},
canvas: this.canvas,
runningMode: 'VIDEO',
outputCategoryMask: true,
outputConfidenceMasks: false,
Expand All @@ -70,6 +71,9 @@ export default class BackgroundProcessor extends VideoTransformer<BackgroundOpti
console.error('Error while loading processor background image: ', err),
);
}
if (this.options.blurRadius) {
this.gl?.setBlurRadius(this.options.blurRadius);
}
}

async destroy() {
Expand All @@ -88,145 +92,70 @@ export default class BackgroundProcessor extends VideoTransformer<BackgroundOpti
img.src = path;
});
const imageData = await createImageBitmap(img);
this.backgroundImage = imageData;
this.gl?.setBackgroundImage(imageData);
}

async transform(frame: VideoFrame, controller: TransformStreamDefaultController<VideoFrame>) {
try {
if (!(frame instanceof VideoFrame)) {
if (!(frame instanceof VideoFrame) || frame.codedWidth === 0 || frame.codedHeight === 0) {
console.debug('empty frame detected, ignoring');
return;
}

if (this.isDisabled) {
controller.enqueue(frame);
return;
}
if (!this.canvas) {
throw TypeError('Canvas needs to be initialized first');
}
this.canvas.width = frame.displayWidth;
this.canvas.height = frame.displayHeight;
let startTimeMs = performance.now();

this.imageSegmenter?.segmentForVideo(
this.inputVideo!,
startTimeMs,
(result) => (this.segmentationResults = result),
);
const segmentationTimeMs = performance.now() - startTimeMs;

if (this.blurRadius) {
await this.blurBackground(frame);
} else {
await this.drawVirtualBackground(frame);
}
const newFrame = new VideoFrame(this.canvas, {
timestamp: frame.timestamp || Date.now(),
this.imageSegmenter?.segmentForVideo(frame, startTimeMs, (result) => {
const segmentationTimeMs = performance.now() - startTimeMs;
this.segmentationResults = result;
this.drawFrame(frame);
if (this.canvas && this.canvas.width > 0 && this.canvas.height > 0) {
const newFrame = new VideoFrame(this.canvas, {
timestamp: frame.timestamp || Date.now(),
});
const filterTimeMs = performance.now() - startTimeMs - segmentationTimeMs;
const stats: FrameProcessingStats = {
processingTimeMs: performance.now() - startTimeMs,
segmentationTimeMs,
filterTimeMs,
};
this.options.onFrameProcessed?.(stats);

controller.enqueue(newFrame);
} else {
controller.enqueue(frame);
}
frame.close();
});
const filterTimeMs = performance.now() - startTimeMs - segmentationTimeMs;

controller.enqueue(newFrame);
const stats: FrameProcessingStats = {
processingTimeMs: performance.now() - startTimeMs,
segmentationTimeMs,
filterTimeMs,
};
this.options.onFrameProcessed?.(stats);
} finally {
} catch (e) {
console.error('Error while processing frame: ', e);
frame?.close();
}
}

async update(opts: BackgroundOptions) {
this.options = opts;
this.options = { ...this.options, ...opts };
if (opts.blurRadius) {
this.blurRadius = opts.blurRadius;
this.gl?.setBlurRadius(opts.blurRadius);
} else if (opts.imagePath) {
await this.loadBackground(opts.imagePath);
}
}

async drawVirtualBackground(frame: VideoFrame) {
if (!this.canvas || !this.ctx || !this.segmentationResults || !this.inputVideo) return;
// this.ctx.save();
// this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
if (this.segmentationResults?.categoryMask && this.segmentationResults.categoryMask.width > 0) {
this.ctx.globalCompositeOperation = 'copy';

this.ctx.putImageData(
maskToImageData(
this.segmentationResults.categoryMask,
this.segmentationResults.categoryMask.width,
this.segmentationResults.categoryMask.height,
),
0,
0,
);
this.ctx.filter = 'none';
this.ctx.globalCompositeOperation = 'source-in';
if (this.backgroundImage) {
this.ctx.drawImage(
this.backgroundImage,
0,
0,
this.backgroundImage.width,
this.backgroundImage.height,
0,
0,
this.canvas.width,
this.canvas.height,
);
} else {
this.ctx.fillStyle = '#00FF00';
this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height);
}

this.ctx.globalCompositeOperation = 'destination-over';
}
this.ctx.drawImage(frame, 0, 0, this.canvas.width, this.canvas.height);
}

async blurBackground(frame: VideoFrame) {
if (
!this.ctx ||
!this.canvas ||
!this.segmentationResults?.categoryMask?.canvas ||
!this.inputVideo
) {
return;
}
async drawFrame(frame: VideoFrame) {
if (!this.canvas || !this.gl || !this.segmentationResults || !this.inputVideo) return;

this.ctx.save();
this.ctx.globalCompositeOperation = 'copy';

if (this.segmentationResults?.categoryMask && this.segmentationResults.categoryMask.width > 0) {
this.ctx.putImageData(
maskToImageData(
this.segmentationResults.categoryMask,
this.segmentationResults.categoryMask.width,
this.segmentationResults.categoryMask.height,
),
0,
0,
);
this.ctx.filter = 'none';
this.ctx.globalCompositeOperation = 'source-out';
this.ctx.drawImage(frame, 0, 0, this.canvas.width, this.canvas.height);
this.ctx.globalCompositeOperation = 'destination-over';
this.ctx.filter = `blur(${this.blurRadius}px)`;
this.ctx.drawImage(frame, 0, 0, this.canvas.width, this.canvas.height);
this.ctx.restore();
const mask = this.segmentationResults.categoryMask;
if (mask) {
this.gl.render(frame, mask);
}
}
}

function maskToImageData(mask: vision.MPMask, videoWidth: number, videoHeight: number): ImageData {
const dataArray: Uint8ClampedArray = new Uint8ClampedArray(videoWidth * videoHeight * 4);
const result = mask.getAsUint8Array();
for (let i = 0; i < result.length; i += 1) {
const offset = i * 4;
dataArray[offset] = result[i];
dataArray[offset + 1] = result[i];
dataArray[offset + 2] = result[i];
dataArray[offset + 3] = result[i];
}
return new ImageData(dataArray, videoWidth, videoHeight);
}
18 changes: 14 additions & 4 deletions src/transformers/VideoTransformer.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { setupWebGL } from '../webgl/index';
import { VideoTrackTransformer, VideoTransformerInitOptions } from './types';

export default abstract class VideoTransformer<Options extends Record<string, unknown>>
Expand All @@ -7,10 +8,12 @@ export default abstract class VideoTransformer<Options extends Record<string, un

canvas?: OffscreenCanvas;

ctx?: OffscreenCanvasRenderingContext2D;
// ctx?: OffscreenCanvasRenderingContext2D;

inputVideo?: HTMLVideoElement;

gl?: ReturnType<typeof setupWebGL>;

protected isDisabled?: Boolean = false;

async init({
Expand All @@ -26,15 +29,21 @@ export default abstract class VideoTransformer<Options extends Record<string, un
});
this.canvas = outputCanvas || null;
if (outputCanvas) {
this.ctx = this.canvas?.getContext('2d') || undefined;
// this.ctx = this.canvas?.getContext('2d') || undefined;
this.gl = setupWebGL(
this.canvas || new OffscreenCanvas(inputVideo.videoWidth, inputVideo.videoHeight),
);
}
this.inputVideo = inputVideo;
this.isDisabled = false;
}

async restart({ outputCanvas, inputElement: inputVideo }: VideoTransformerInitOptions) {
this.canvas = outputCanvas || null;
this.ctx = this.canvas.getContext('2d') || undefined;
this.gl?.cleanup();
this.gl = setupWebGL(
this.canvas || new OffscreenCanvas(inputVideo.videoWidth, inputVideo.videoHeight),
);

this.inputVideo = inputVideo;
this.isDisabled = false;
Expand All @@ -43,7 +52,8 @@ export default abstract class VideoTransformer<Options extends Record<string, un
async destroy() {
this.isDisabled = true;
this.canvas = undefined;
this.ctx = undefined;
this.gl?.cleanup();
this.gl = undefined;
}

abstract transform(
Expand Down
1 change: 1 addition & 0 deletions src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/* eslint-disable @typescript-eslint/naming-convention */
export const supportsProcessor = typeof MediaStreamTrackGenerator !== 'undefined';
export const supportsOffscreenCanvas = typeof OffscreenCanvas !== 'undefined';

Expand Down
Loading