diff --git a/.changeset/add-audio-processor.md b/.changeset/add-audio-processor.md new file mode 100644 index 0000000..123db25 --- /dev/null +++ b/.changeset/add-audio-processor.md @@ -0,0 +1,5 @@ +--- +"@livekit/track-processors": minor +--- + +Add GainAudioProcessor — a reference audio TrackProcessor implementation using the Web Audio API. Includes gain control, browser support detection via `isSupported`, and a complete example in the sample app. Also restructures documentation into separate video and audio processor guides. diff --git a/README.md b/README.md index 79b3e57..a54c230 100644 --- a/README.md +++ b/README.md @@ -1,91 +1,51 @@ # LiveKit track processors +Prebuilt audio and video track processors for [LiveKit](https://livekit.io), implementing the [`TrackProcessor`](https://docs.livekit.io/reference/client-sdk-js/interfaces/TrackProcessor.html) interface from `livekit-client`. + ## Install ``` npm add @livekit/track-processors ``` -## Usage of prebuilt processors - -### Available processors - -This package exposes the `BackgroundProcessor` pre-prepared processor pipeline, which can be used in a few ways: +## Video processors -- `BackgroundProcessor({ mode: 'background-blur', blurRadius: 10 /* (optional) */ })` -- `BackgroundProcessor({ mode: 'virtual-background', imagePath: "http://path.to/image.png" })` -- `BackgroundProcessor({ mode: 'disabled' })` - -### Usage example +Video track processors intercept a local video track's frames and transform them before they are sent to other participants. This package provides a prebuilt `BackgroundProcessor` that supports background blur and virtual backgrounds: ```ts -import { BackgroundProcessor, supportsBackgroundProcessors, supportsModernBackgroundProcessors } from '@livekit/track-processors'; - -if(!supportsBackgroundProcessors()) { - throw new Error("this browser does not support background processors") -} +import { BackgroundProcessor } from '@livekit/track-processors'; -if(supportsModernBackgroundProcessors()) { - console.log("this browser supports modern APIs that are more performant"); -} - -const videoTrack = await createLocalVideoTrack(); -const processor = BackgroundProcessor({ mode: 'background-blur' }); +const processor = BackgroundProcessor({ mode: 'background-blur', blurRadius: 10 }); await videoTrack.setProcessor(processor); -room.localParticipant.publishTrack(videoTrack); - -async function disableBackgroundBlur() { - await videoTrack.stopProcessor(); -} - -async function updateBlurRadius(radius) { - return processor.switchTo({ mode: 'background-blur', blurRadius: radius }); -} ``` -In a real application, it's likely you will want to only sometimes apply background effects. You -could accomplish this by calling `videoTrack.setProcessor(...)` / `videoTrack.stopProcessor(...)` on -demand, but these functions can sometimes result in output visual artifacts as part of the switching -process, which can result in a poor user experience. +Available modes: `background-blur`, `virtual-background`, and `disabled` (passthrough). -A better option which won't result in any visual artifacts while switching is to initialize the -`BackgroundProcessor` in its "disabled" mode, and then later on switch to the desired mode. For -example: -```ts -const videoTrack = await createLocalVideoTrack(); -const processor = BackgroundProcessor({ mode: 'disabled' }); -await videoTrack.setProcessor(processor); -room.localParticipant.publishTrack(videoTrack); - -async function enableBlur(radius) { - await processor.switchTo({ mode: 'background-blur', blurRadius: radius }); -} - -async function disableBlur() { - await processor.switchTo({ mode: 'disabled' }); -} -``` +See [processor-docs/video-processors.md](processor-docs/video-processors.md) for full usage, browser support checks, and how to avoid visual artifacts when switching modes. -## Developing your own processors +## Audio processors -A track processor is instantiated with a Transformer. +Audio track processors work similarly — they intercept the local audio track and pipe it through a Web Audio API processing graph before publishing. The included `GainAudioProcessor` provides gain control and serves as a reference implementation for building custom audio processors: ```ts -// src/index.ts -export const VirtualBackground = (imagePath: string) => { - const pipeline = new ProcessorWrapper(new BackgroundTransformer({ imagePath })); - return pipeline; -}; +import { GainAudioProcessor } from '@livekit/track-processors'; + +const processor = new GainAudioProcessor({ gainValue: 1.5 }); +await audioTrack.setProcessor(processor); ``` -### Available base transformers +See [processor-docs/audio-processors.md](processor-docs/audio-processors.md) for full usage, the `TrackProcessor` interface for audio, and a guide to building your own audio processor with the Web Audio API. + +## Developing your own processors -- BackgroundTransformer (can blur background, use a virtual background, or be put into a disabled state); +This package implements the `TrackProcessor` interface from `livekit-client`. Video and audio processors take different approaches: +- **Video processors** use `ProcessorWrapper` with a transformer pipeline — see [processor-docs/video-processors.md](processor-docs/video-processors.md#developing-your-own-video-processor) +- **Audio processors** implement `TrackProcessor` directly using the Web Audio API — see [processor-docs/audio-processors.md](processor-docs/audio-processors.md#building-your-own-audio-processor) ## Running the sample app -This repository includes a small example app built on [Vite](https://vitejs.dev/). Run it with: +This repository includes a small example app built on [Vite](https://vitejs.dev/) that demonstrates both video and audio processors. Run it with: ``` # install pnpm: https://pnpm.io/installation diff --git a/example/index.html b/example/index.html index c9278de..5e39c46 100644 --- a/example/index.html +++ b/example/index.html @@ -159,6 +159,37 @@

LiveKit track processor sample

+ +
+
+ +
+ + +
diff --git a/example/sample.ts b/example/sample.ts index 8b61483..6f3a092 100644 --- a/example/sample.ts +++ b/example/sample.ts @@ -24,7 +24,7 @@ import { facingModeFromLocalTrack, setLogLevel, } from 'livekit-client'; -import { BackgroundProcessor, BackgroundProcessorOptions } from '../src'; +import { BackgroundProcessor, BackgroundProcessorOptions, GainAudioProcessor } from '../src'; const $ = (id: string) => document.getElementById(id) as T; @@ -36,6 +36,8 @@ const state = { bitrateInterval: undefined as any, isBackgroundProcessorEnabled: false, backgroundProcessor: BackgroundProcessor({ mode: 'background-blur', blurRadius: BLUR_RADIUS }), + isAudioProcessorEnabled: false, + gainProcessor: new GainAudioProcessor({ gainValue: 1.0 }), }; let currentRoom: Room | undefined; @@ -339,6 +341,44 @@ const appActions = { } }, + toggleAudioProcessorEnabled: async () => { + if (!currentRoom) return; + + setButtonDisabled('toggle-audio-processor', true); + + try { + const micPub = currentRoom.localParticipant.getTrackPublication(Track.Source.Microphone); + if (!micPub || !micPub.track) { + appendLog('ERROR: No microphone track found. Enable audio first.'); + return; + } + const micTrack = micPub.track as LocalAudioTrack; + + if (state.isAudioProcessorEnabled) { + await micTrack.stopProcessor(); + state.isAudioProcessorEnabled = false; + } else { + await micTrack.setProcessor(state.gainProcessor); + state.isAudioProcessorEnabled = true; + } + } catch (e: any) { + appendLog(`ERROR: ${e.message}`); + } finally { + updateAudioProcessorButtons(); + if (currentRoom?.state === ConnectionState.Connected) { + setButtonDisabled('toggle-audio-processor', false); + } + } + }, + + updateGain: (value: number) => { + state.gainProcessor.setGain(value); + const display = $('gain-value'); + if (display) { + display.textContent = value.toFixed(1); + } + }, + startAudio: () => { currentRoom?.startAudio(); }, @@ -409,7 +449,9 @@ function handleRoomDisconnect(reason?: DisconnectReason) { appendLog('disconnected from room', { reason }); setButtonsForState(false); state.isBackgroundProcessorEnabled = false; + state.isAudioProcessorEnabled = false; updateTrackProcessorModeButtons(); + updateAudioProcessorButtons(); renderParticipant(currentRoom.localParticipant, true); currentRoom.remoteParticipants.forEach((p) => { renderParticipant(p, true); @@ -662,6 +704,7 @@ function setButtonsForState(connected: boolean) { 'switch-to-background-blur-button', 'switch-to-virtual-background-button', 'switch-to-disabled-button', + 'toggle-audio-processor', ]; const disconnectedSet = ['connect-button']; @@ -766,6 +809,17 @@ function updateTrackProcessorModeButtons() { } } +function updateAudioProcessorButtons() { + const toggleButtonEnabled = currentRoom?.state === ConnectionState.Connected; + if (state.isAudioProcessorEnabled) { + setButtonState('toggle-audio-processor', 'Remove Audio Processor', false, !toggleButtonEnabled); + $('audio-processor-controls').style.display = 'block'; + } else { + setButtonState('toggle-audio-processor', 'Insert Audio Processor', false, !toggleButtonEnabled); + $('audio-processor-controls').style.display = 'none'; + } +} + async function acquireDeviceList() { handleDevicesChanged(); } diff --git a/processor-docs/audio-processors.md b/processor-docs/audio-processors.md new file mode 100644 index 0000000..acaf121 --- /dev/null +++ b/processor-docs/audio-processors.md @@ -0,0 +1,176 @@ +# Audio Processors + +This document covers the audio track processors available in `@livekit/track-processors` and how to build your own. + +## GainAudioProcessor + +The `GainAudioProcessor` is a minimal audio processor that applies a Web Audio [`GainNode`](https://developer.mozilla.org/en-US/docs/Web/API/GainNode) to a local audio track. It serves both as a ready-to-use volume control and as a reference implementation for building custom audio processors. + +### Browser support + +The Web Audio API used by `GainAudioProcessor` is [widely supported](https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API#browser_compatibility) in modern browsers. You can check support before use: + +```ts +import { GainAudioProcessor } from '@livekit/track-processors'; + +if (!GainAudioProcessor.isSupported) { + console.warn('GainAudioProcessor is not supported in this environment'); +} +``` + +### Basic usage + +```ts +import { createLocalAudioTrack } from 'livekit-client'; +import { GainAudioProcessor } from '@livekit/track-processors'; + +const audioTrack = await createLocalAudioTrack(); +const processor = new GainAudioProcessor({ gainValue: 1.5 }); +await audioTrack.setProcessor(processor); +room.localParticipant.publishTrack(audioTrack); + +// Update gain on the fly +processor.setGain(0.5); + +// Remove the processor +await audioTrack.stopProcessor(); +``` + +### Constructor options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `gainValue` | `number` | `1.0` | Initial gain multiplier, clamped to [0, 10]. `1.0` = unity (no change), `0.0` = silence, `> 1.0` = amplify. | + +## The TrackProcessor interface for audio + +Audio processors implement the `TrackProcessor` interface from `livekit-client`: + +```ts +// Generic signature from livekit-client +interface TrackProcessor> { + name: string; + init(opts: U): Promise; + restart(opts: U): Promise; + destroy(): Promise; + processedTrack?: MediaStreamTrack; + onPublish?(room: Room): Promise; + onUnpublish?(): Promise; +} + +// For audio processors, T = Track.Kind.Audio and U = AudioProcessorOptions +``` + +When you call `audioTrack.setProcessor(processor)`, the SDK: + +1. Creates an `AudioContext` and passes it to your processor via `AudioProcessorOptions` +2. Calls `processor.init()` with the options +3. Reads `processor.processedTrack` and uses it as the track sent to the SFU +4. Calls `sender.replaceTrack()` to swap the raw track for the processed one + +### AudioProcessorOptions + +The SDK provides these options when calling `init()` and `restart()`: + +```ts +interface AudioProcessorOptions { + kind: Track.Kind.Audio; + track: MediaStreamTrack; // The raw microphone MediaStreamTrack + audioContext: AudioContext; // A shared AudioContext managed by the SDK + element?: HTMLMediaElement; // The media element, if one exists +} +``` + +Key points: + +- **Use the provided `AudioContext`** rather than creating your own. This avoids hitting browser limits on AudioContext instances and ensures the context is in the correct state. +- **`track`** is the raw `MediaStreamTrack` from the user's microphone. On device switch, the SDK calls `restart()` with a new track. +- **Set `this.processedTrack`** to the output `MediaStreamTrack` from your processing pipeline. The SDK reads this property after `init()` returns. + +### Lifecycle methods + +| Method | When called | What to do | +|--------|-------------|------------| +| `init(opts)` | `audioTrack.setProcessor(processor)` | Build your Web Audio graph, set `this.processedTrack` | +| `restart(opts)` | Device switch or track change | Tear down old graph, rebuild with the new `opts.track` | +| `destroy()` | `audioTrack.stopProcessor()` | Disconnect all nodes, clean up resources | +| `onPublish(room)` | Track is published to a room | Optional — use if you need room context | +| `onUnpublish()` | Track is unpublished | Optional — use for cleanup tied to room lifecycle | + +## Building your own audio processor + +### Architecture overview + +```mermaid +flowchart LR + A[Microphone\nMediaStreamTrack] --> B[MediaStreamSource] + B --> C[Your Processing Nodes
e.g. GainNode, BiquadFilter,
AudioWorklet] + C --> D[MediaStreamDestination] + D --> E[processedTrack
MediaStreamTrack] + E --> F[Published to SFU] +``` + +The general pattern for a custom audio processor is: + +1. Create a `MediaStreamSource` from the input track +2. Connect it through your processing nodes +3. Connect the final node to a `MediaStreamDestination` +4. Expose `destination.stream.getAudioTracks()[0]` as `processedTrack` + +Here's a skeleton: + +```ts +import { Track } from 'livekit-client'; +import type { AudioProcessorOptions, TrackProcessor } from 'livekit-client'; + +class MyAudioProcessor implements TrackProcessor { + name = 'my-audio-processor'; + processedTrack?: MediaStreamTrack; + + private source?: MediaStreamAudioSourceNode; + private destination?: MediaStreamAudioDestinationNode; + // ... your processing nodes + + async init(opts: AudioProcessorOptions): Promise { + const { track, audioContext } = opts; + + // Create source from the raw microphone track + this.source = audioContext.createMediaStreamSource(new MediaStream([track])); + + // Create your processing chain + // const myNode = audioContext.create...(...); + + // Create destination + this.destination = audioContext.createMediaStreamDestination(); + + // Wire it up: source → [your nodes] → destination + this.source.connect(/* myNode */); + // myNode.connect(this.destination); + + // Expose the processed track + this.processedTrack = this.destination.stream.getAudioTracks()[0]; + } + + async restart(opts: AudioProcessorOptions): Promise { + await this.destroy(); + await this.init(opts); + } + + async destroy(): Promise { + this.source?.disconnect(); + // Disconnect your other nodes... + this.destination?.disconnect(); + this.processedTrack = undefined; + } +} +``` + +### Things to keep in mind + +**Device switching.** When a user switches microphones, the SDK calls `restart()` with a new `MediaStreamTrack`. Your processor must tear down the old Web Audio graph and rebuild with the new track. The simplest approach (shown above) is to call `destroy()` then `init()` inside `restart()`. + +**AudioContext lifecycle.** The SDK provides an `AudioContext` via the options. Always use it rather than creating your own — this avoids browser limits on AudioContext instances and ensures the context state is managed correctly. + +**Browser compatibility.** The Web Audio API nodes used in this pattern (`MediaStreamSource`, `GainNode`, `MediaStreamDestination`) are well-supported across modern browsers. No special fallbacks are needed, unlike the video processor path which requires `canvas.captureStream()` fallbacks. + +**Advanced processing.** Since you receive a full `AudioContext`, you can wire in any Web Audio processing chain — including `AudioWorkletNode` for off-main-thread processing, or WASM-backed worklets for computationally intensive tasks. The pattern is the same: route audio through your nodes and connect the final output to the `MediaStreamDestination`. diff --git a/processor-docs/video-processors.md b/processor-docs/video-processors.md new file mode 100644 index 0000000..2b33a0e --- /dev/null +++ b/processor-docs/video-processors.md @@ -0,0 +1,98 @@ +# Video Processors + +This document covers the video track processors available in `@livekit/track-processors`. + +## BackgroundProcessor + +The `BackgroundProcessor` is a prebuilt video processor that supports blurring the background of a user's local video or replacing it with a virtual background image. It can be switched between modes on the fly. + +### Available modes + +- `BackgroundProcessor({ mode: 'background-blur', blurRadius: 10 })` — Blur the background with an optional blur radius (defaults to 10) +- `BackgroundProcessor({ mode: 'virtual-background', imagePath: "http://path.to/image.png" })` — Replace the background with an image +- `BackgroundProcessor({ mode: 'disabled' })` — Passthrough mode, no effect applied (useful for avoiding switching artifacts, see below) + +### Browser support + +Before using `BackgroundProcessor`, check for browser compatibility: + +```ts +import { + BackgroundProcessor, + supportsBackgroundProcessors, + supportsModernBackgroundProcessors, +} from '@livekit/track-processors'; + +if (!supportsBackgroundProcessors()) { + throw new Error('This browser does not support background processors'); +} + +if (supportsModernBackgroundProcessors()) { + console.log('This browser supports modern APIs that are more performant'); +} +``` + +### Usage + +The simplest approach is to create a processor and attach it to a local video track: + +```ts +import { BackgroundProcessor } from '@livekit/track-processors'; + +const videoTrack = await createLocalVideoTrack(); +const processor = BackgroundProcessor({ mode: 'background-blur' }); +await videoTrack.setProcessor(processor); +room.localParticipant.publishTrack(videoTrack); +``` + +### Avoiding visual artifacts when toggling + +Calling `videoTrack.setProcessor()` / `videoTrack.stopProcessor()` on demand can produce visual artifacts during the switch. A better approach is to initialize the processor in `disabled` mode up front and use `switchTo()` to toggle effects. This avoids artifacts entirely: + +```ts +const videoTrack = await createLocalVideoTrack(); +const processor = BackgroundProcessor({ mode: 'disabled' }); +await videoTrack.setProcessor(processor); +room.localParticipant.publishTrack(videoTrack); + +async function enableBlur(radius) { + await processor.switchTo({ mode: 'background-blur', blurRadius: radius }); +} + +async function disableBlur() { + await processor.switchTo({ mode: 'disabled' }); +} +``` + +## Developing your own video processor + +### Architecture overview + +```mermaid +flowchart LR + A[Camera\nMediaStreamTrack] --> B[ProcessorWrapper] + B -->|VideoFrame| C[Transformer
e.g. BackgroundTransformer] + C -->|Transformed
VideoFrame| B + B --> D[Processed
MediaStreamTrack] + D --> E[Published to SFU] +``` + +Video processors in this package are built on two layers: + +1. **`ProcessorWrapper`** — Handles the plumbing of intercepting a video track's frames, passing them through a transformer, and producing a processed output track. It manages browser compatibility (using `MediaStreamTrackProcessor`/`MediaStreamTrackGenerator` where available, with a `canvas.captureStream()` fallback). + +2. **A Transformer** (e.g., `BackgroundTransformer`) — Implements the actual frame-by-frame processing logic. + +> **Note:** You don't have to follow this `Transformer` + `ProcessorWrapper` pattern. You can implement the `TrackProcessor` interface directly if you prefer. However, using `ProcessorWrapper` is convenient because it abstracts away the `MediaStreamTrack` → `VideoFrame` → transformer → `VideoFrame` → `MediaStreamTrack` conversion, which most use cases don't need to worry about. + +To create a custom video processor using `ProcessorWrapper`, instantiate it with your own transformer: + +```ts +import { ProcessorWrapper } from '@livekit/track-processors'; + +const pipeline = new ProcessorWrapper(new MyCustomTransformer(options)); +``` + +### Available base transformers + +- **BackgroundTransformer** — Can blur the background, replace it with a virtual background image, or operate in a disabled passthrough state. diff --git a/src/audio/GainAudioProcessor.ts b/src/audio/GainAudioProcessor.ts new file mode 100644 index 0000000..6f8a064 --- /dev/null +++ b/src/audio/GainAudioProcessor.ts @@ -0,0 +1,138 @@ +import type { Track, Room } from 'livekit-client'; +import type { AudioProcessorOptions, TrackProcessor } from 'livekit-client'; + +/** Gain is clamped to this range to avoid accidental silence or extreme amplification. */ +const MIN_GAIN = 0; +const MAX_GAIN = 10; + +export interface GainAudioProcessorOptions { + /** + * Initial gain value. Defaults to 1.0 (unity gain). + * Clamped to [0, 10]. Values outside this range are clamped. + * - 0.0 = silence + * - 1.0 = no change + * - > 1.0 = amplify + */ + gainValue?: number; +} + +/** + * A minimal audio track processor that applies a Web Audio GainNode to the audio pipeline. + * + * Serves as both a ready-to-use gain control and a reference implementation for building + * custom audio processors using the TrackProcessor interface. + * + * @example + * ```ts + * const processor = new GainAudioProcessor({ gainValue: 1.5 }); + * await audioTrack.setProcessor(processor); + * + * // Update gain on the fly + * processor.setGain(0.5); + * + * // Remove the processor + * await audioTrack.stopProcessor(); + * ``` + */ +export class GainAudioProcessor + implements TrackProcessor +{ + name = 'gain-audio-processor'; + + processedTrack?: MediaStreamTrack; + + private gainValue: number; + + /** + * Whether the current environment supports GainAudioProcessor (Web Audio API). + * Use this for consistency with video processors before attaching the processor. + */ + static get isSupported(): boolean { + return ( + typeof AudioContext !== 'undefined' && + typeof GainNode !== 'undefined' && + typeof MediaStreamAudioSourceNode !== 'undefined' && + typeof MediaStreamAudioDestinationNode !== 'undefined' + ); + } + + private sourceNode?: MediaStreamAudioSourceNode; + + private gainNode?: GainNode; + + private destinationNode?: MediaStreamAudioDestinationNode; + + constructor(options: GainAudioProcessorOptions = {}) { + const raw = options.gainValue ?? 1.0; + this.gainValue = Math.max(MIN_GAIN, Math.min(MAX_GAIN, raw)); + } + + async init(opts: AudioProcessorOptions): Promise { + const { track, audioContext } = opts; + + // Create source from the raw microphone track + this.sourceNode = audioContext.createMediaStreamSource(new MediaStream([track])); + + // Create gain node + this.gainNode = audioContext.createGain(); + this.gainNode.gain.value = this.gainValue; + + // Create destination + this.destinationNode = audioContext.createMediaStreamDestination(); + + // Wire up: source → gain → destination + this.sourceNode.connect(this.gainNode); + this.gainNode.connect(this.destinationNode); + + // Expose the processed track for the SDK + this.processedTrack = this.destinationNode.stream.getAudioTracks()[0]; + } + + async restart(opts: AudioProcessorOptions): Promise { + // Tear down old graph and rebuild with the new track + await this.destroy(); + await this.init(opts); + } + + async destroy(): Promise { + this.sourceNode?.disconnect(); + this.gainNode?.disconnect(); + this.destinationNode?.disconnect(); + this.processedTrack?.stop(); + this.sourceNode = undefined; + this.gainNode = undefined; + this.destinationNode = undefined; + this.processedTrack = undefined; + } + + /** + * Update the gain value. Can be called while the processor is active. + * Value is clamped to [0, 10]. + * @param value - Gain multiplier (0.0 = silence, 1.0 = unity, > 1.0 = amplify) + */ + setGain(value: number): void { + if (!Number.isFinite(value)) { + return; + } + this.gainValue = Math.max(MIN_GAIN, Math.min(MAX_GAIN, value)); + if (this.gainNode) { + this.gainNode.gain.value = this.gainValue; + } + } + + /** + * Get the current gain value. + */ + getGain(): number { + return this.gainValue; + } + + // Optional lifecycle hooks — included for completeness as a reference implementation + async onPublish(room: Room): Promise { + console.debug(`[${this.name}] onPublish — room: ${room.name}`); + } + + async onUnpublish(): Promise { + console.debug(`[${this.name}] onUnpublish`); + } +} diff --git a/src/index.ts b/src/index.ts index 6cdc867..4951dea 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,6 +7,7 @@ import BackgroundTransformer, { export * from './transformers/types'; export { default as VideoTransformer } from './transformers/VideoTransformer'; +export { GainAudioProcessor, type GainAudioProcessorOptions } from './audio/GainAudioProcessor'; export { ProcessorWrapper, type BackgroundOptions,