livekit · lukasIO · Apr 14, 2025 · Apr 14, 2025 · Apr 14, 2025 · Apr 14, 2025
diff --git a/.changeset/wet-pugs-sip.md b/.changeset/wet-pugs-sip.md
@@ -0,0 +1,5 @@
+---
+"@livekit/track-processors": minor
+---
+
+Use webGL for video processors
diff --git a/package.json b/package.json
@@ -22,7 +22,7 @@
     "src"
   ],
   "dependencies": {
-    "@mediapipe/tasks-vision": "0.10.21"
+    "@mediapipe/tasks-vision": "^0.10.22-rc.20250304"
   },
   "peerDependencies": {
     "livekit-client": "^1.12.0 || ^2.1.0"

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/src/ProcessorWrapper.ts b/src/ProcessorWrapper.ts
@@ -78,6 +78,7 @@ export default class ProcessorWrapper<TransformerOptions extends Record<string,
       .pipeTo(this.trackGenerator.writable)
       .catch((e) => console.error('error when trying to pipe', e))
       .finally(() => this.destroy());
+
     this.processedTrack = this.trackGenerator as MediaStreamVideoTrack;
   }
 
@@ -96,7 +97,8 @@ export default class ProcessorWrapper<TransformerOptions extends Record<string,
   }
 
   async destroy() {
-    await this.transformer.destroy();
+    await this.processor?.writableControl?.close();
     this.trackGenerator?.stop();
+    await this.transformer.destroy();
   }
 }
diff --git a/src/transformers/BackgroundTransformer.ts b/src/transformers/BackgroundTransformer.ts
@@ -33,8 +33,6 @@ export default class BackgroundProcessor extends VideoTransformer<BackgroundOpti
 
   backgroundImage: ImageBitmap | null = null;
 
-  blurRadius?: number;
-
   options: BackgroundOptions;
 
   constructor(opts: BackgroundOptions) {
@@ -44,6 +42,8 @@ export default class BackgroundProcessor extends VideoTransformer<BackgroundOpti
   }
 
   async init({ outputCanvas, inputElement: inputVideo }: VideoTransformerInitOptions) {
+    // Initialize WebGL with appropriate options based on our current state
+
     await super.init({ outputCanvas, inputElement: inputVideo });
 
     const fileSet = await vision.FilesetResolver.forVisionTasks(
@@ -59,6 +59,7 @@ export default class BackgroundProcessor extends VideoTransformer<BackgroundOpti
         delegate: 'GPU',
         ...this.options.segmenterOptions,
       },
+      canvas: this.canvas,
       runningMode: 'VIDEO',
       outputCategoryMask: true,
       outputConfidenceMasks: false,
@@ -70,6 +71,9 @@ export default class BackgroundProcessor extends VideoTransformer<BackgroundOpti
         console.error('Error while loading processor background image: ', err),
       );
     }
+    if (this.options.blurRadius) {
+      this.gl?.setBlurRadius(this.options.blurRadius);
+    }
   }
 
   async destroy() {
@@ -88,145 +92,70 @@ export default class BackgroundProcessor extends VideoTransformer<BackgroundOpti
       img.src = path;
     });
     const imageData = await createImageBitmap(img);
-    this.backgroundImage = imageData;
+    this.gl?.setBackgroundImage(imageData);
   }
 
   async transform(frame: VideoFrame, controller: TransformStreamDefaultController<VideoFrame>) {
     try {
-      if (!(frame instanceof VideoFrame)) {
+      if (!(frame instanceof VideoFrame) || frame.codedWidth === 0 || frame.codedHeight === 0) {
         console.debug('empty frame detected, ignoring');
         return;
       }
+
       if (this.isDisabled) {
         controller.enqueue(frame);
         return;
       }
       if (!this.canvas) {
         throw TypeError('Canvas needs to be initialized first');
       }
+      this.canvas.width = frame.displayWidth;
+      this.canvas.height = frame.displayHeight;
       let startTimeMs = performance.now();
 
-      this.imageSegmenter?.segmentForVideo(
-        this.inputVideo!,
-        startTimeMs,
-        (result) => (this.segmentationResults = result),
-      );
-      const segmentationTimeMs = performance.now() - startTimeMs;
-
-      if (this.blurRadius) {
-        await this.blurBackground(frame);
-      } else {
-        await this.drawVirtualBackground(frame);
-      }
-      const newFrame = new VideoFrame(this.canvas, {
-        timestamp: frame.timestamp || Date.now(),
+      this.imageSegmenter?.segmentForVideo(frame, startTimeMs, (result) => {
+        const segmentationTimeMs = performance.now() - startTimeMs;
+        this.segmentationResults = result;
+        this.drawFrame(frame);
+        if (this.canvas && this.canvas.width > 0 && this.canvas.height > 0) {
+          const newFrame = new VideoFrame(this.canvas, {
+            timestamp: frame.timestamp || Date.now(),
+          });
+          const filterTimeMs = performance.now() - startTimeMs - segmentationTimeMs;
+          const stats: FrameProcessingStats = {
+            processingTimeMs: performance.now() - startTimeMs,
+            segmentationTimeMs,
+            filterTimeMs,
+          };
+          this.options.onFrameProcessed?.(stats);
+
+          controller.enqueue(newFrame);
+        } else {
+          controller.enqueue(frame);
+        }
+        frame.close();
       });
-      const filterTimeMs = performance.now() - startTimeMs - segmentationTimeMs;
-
-      controller.enqueue(newFrame);
-      const stats: FrameProcessingStats = {
-        processingTimeMs: performance.now() - startTimeMs,
-        segmentationTimeMs,
-        filterTimeMs,
-      };
-      this.options.onFrameProcessed?.(stats);
-    } finally {
+    } catch (e) {
+      console.error('Error while processing frame: ', e);
       frame?.close();
     }
   }
 
   async update(opts: BackgroundOptions) {
-    this.options = opts;
+    this.options = { ...this.options, ...opts };
     if (opts.blurRadius) {
-      this.blurRadius = opts.blurRadius;
+      this.gl?.setBlurRadius(opts.blurRadius);
     } else if (opts.imagePath) {
       await this.loadBackground(opts.imagePath);
     }
   }
 
-  async drawVirtualBackground(frame: VideoFrame) {
-    if (!this.canvas || !this.ctx || !this.segmentationResults || !this.inputVideo) return;
-    // this.ctx.save();
-    // this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
-    if (this.segmentationResults?.categoryMask && this.segmentationResults.categoryMask.width > 0) {
-      this.ctx.globalCompositeOperation = 'copy';
-
-      this.ctx.putImageData(
-        maskToImageData(
-          this.segmentationResults.categoryMask,
-          this.segmentationResults.categoryMask.width,
-          this.segmentationResults.categoryMask.height,
-        ),
-        0,
-        0,
-      );
-      this.ctx.filter = 'none';
-      this.ctx.globalCompositeOperation = 'source-in';
-      if (this.backgroundImage) {
-        this.ctx.drawImage(
-          this.backgroundImage,
-          0,
-          0,
-          this.backgroundImage.width,
-          this.backgroundImage.height,
-          0,
-          0,
-          this.canvas.width,
-          this.canvas.height,
-        );
-      } else {
-        this.ctx.fillStyle = '#00FF00';
-        this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height);
-      }
-
-      this.ctx.globalCompositeOperation = 'destination-over';
-    }
-    this.ctx.drawImage(frame, 0, 0, this.canvas.width, this.canvas.height);
-  }
-
-  async blurBackground(frame: VideoFrame) {
-    if (
-      !this.ctx ||
-      !this.canvas ||
-      !this.segmentationResults?.categoryMask?.canvas ||
-      !this.inputVideo
-    ) {
-      return;
-    }
+  async drawFrame(frame: VideoFrame) {
+    if (!this.canvas || !this.gl || !this.segmentationResults || !this.inputVideo) return;
 
-    this.ctx.save();
-    this.ctx.globalCompositeOperation = 'copy';
-
-    if (this.segmentationResults?.categoryMask && this.segmentationResults.categoryMask.width > 0) {
-      this.ctx.putImageData(
-        maskToImageData(
-          this.segmentationResults.categoryMask,
-          this.segmentationResults.categoryMask.width,
-          this.segmentationResults.categoryMask.height,
-        ),
-        0,
-        0,
-      );
-      this.ctx.filter = 'none';
-      this.ctx.globalCompositeOperation = 'source-out';
-      this.ctx.drawImage(frame, 0, 0, this.canvas.width, this.canvas.height);
-      this.ctx.globalCompositeOperation = 'destination-over';
-      this.ctx.filter = `blur(${this.blurRadius}px)`;
-      this.ctx.drawImage(frame, 0, 0, this.canvas.width, this.canvas.height);
-      this.ctx.restore();
+    const mask = this.segmentationResults.categoryMask;
+    if (mask) {
+      this.gl.render(frame, mask);
     }
   }
 }
-
-function maskToImageData(mask: vision.MPMask, videoWidth: number, videoHeight: number): ImageData {
-  const dataArray: Uint8ClampedArray = new Uint8ClampedArray(videoWidth * videoHeight * 4);
-  const result = mask.getAsUint8Array();
-  for (let i = 0; i < result.length; i += 1) {
-    const offset = i * 4;
-    dataArray[offset] = result[i];
-    dataArray[offset + 1] = result[i];
-    dataArray[offset + 2] = result[i];
-    dataArray[offset + 3] = result[i];
-  }
-  return new ImageData(dataArray, videoWidth, videoHeight);
-}
diff --git a/src/transformers/VideoTransformer.ts b/src/transformers/VideoTransformer.ts
@@ -1,3 +1,4 @@
+import { setupWebGL } from '../webgl/index';
 import { VideoTrackTransformer, VideoTransformerInitOptions } from './types';
 
 export default abstract class VideoTransformer<Options extends Record<string, unknown>>
@@ -7,10 +8,12 @@ export default abstract class VideoTransformer<Options extends Record<string, un
 
   canvas?: OffscreenCanvas;
 
-  ctx?: OffscreenCanvasRenderingContext2D;
+  // ctx?: OffscreenCanvasRenderingContext2D;
 
   inputVideo?: HTMLVideoElement;
 
+  gl?: ReturnType<typeof setupWebGL>;
+
   protected isDisabled?: Boolean = false;
 
   async init({
@@ -26,15 +29,21 @@ export default abstract class VideoTransformer<Options extends Record<string, un
     });
     this.canvas = outputCanvas || null;
     if (outputCanvas) {
-      this.ctx = this.canvas?.getContext('2d') || undefined;
+      // this.ctx = this.canvas?.getContext('2d') || undefined;
+      this.gl = setupWebGL(
+        this.canvas || new OffscreenCanvas(inputVideo.videoWidth, inputVideo.videoHeight),
+      );
     }
     this.inputVideo = inputVideo;
     this.isDisabled = false;
   }
 
   async restart({ outputCanvas, inputElement: inputVideo }: VideoTransformerInitOptions) {
     this.canvas = outputCanvas || null;
-    this.ctx = this.canvas.getContext('2d') || undefined;
+    this.gl?.cleanup();
+    this.gl = setupWebGL(
+      this.canvas || new OffscreenCanvas(inputVideo.videoWidth, inputVideo.videoHeight),
+    );
 
     this.inputVideo = inputVideo;
     this.isDisabled = false;
@@ -43,7 +52,8 @@ export default abstract class VideoTransformer<Options extends Record<string, un
   async destroy() {
     this.isDisabled = true;
     this.canvas = undefined;
-    this.ctx = undefined;
+    this.gl?.cleanup();
+    this.gl = undefined;
   }
 
   abstract transform(

diff --git a/src/utils.ts b/src/utils.ts
@@ -1,3 +1,4 @@
+/* eslint-disable @typescript-eslint/naming-convention */
 export const supportsProcessor = typeof MediaStreamTrackGenerator !== 'undefined';
 export const supportsOffscreenCanvas = typeof OffscreenCanvas !== 'undefined';