livekit · toubatbrian · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/.changeset/short-badgers-turn.md b/.changeset/short-badgers-turn.md
@@ -0,0 +1,6 @@
+---
+"@livekit/agents-plugin-assemblyai": patch
+"@livekit/agents-plugin-mistral": patch
+---
+
+fix(mistral): align plugin packaging with conventions and add unit tests
diff --git a/examples/package.json b/examples/package.json
@@ -41,6 +41,7 @@
     "@livekit/agents-plugin-resemble": "workspace:*",
     "@livekit/agents-plugin-silero": "workspace:*",
     "@livekit/agents-plugin-trugen": "workspace:*",
+    "@livekit/agents-plugin-mistral": "workspace:*",
     "@livekit/agents-plugin-xai": "workspace:*",
     "@livekit/noise-cancellation-node": "^0.1.9",
     "@livekit/rtc-node": "catalog:",

diff --git a/plugins/assemblyai/src/models.ts b/plugins/assemblyai/src/models.ts
@@ -2,7 +2,6 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 48-54 lines
 export type STTModels =
   | 'universal-streaming-english'
   | 'universal-streaming-multilingual'
@@ -12,5 +11,4 @@ export type STTModels =
   // break if they already pass it.
   | 'u3-pro';
 
-// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 47 lines
 export type STTEncoding = 'pcm_s16le' | 'pcm_mulaw';
diff --git a/plugins/assemblyai/src/stt.ts b/plugins/assemblyai/src/stt.ts
@@ -2,9 +2,6 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 //
-// Ported from:
-//   livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py
-// See `// Ref: python ... - N-M lines` comments throughout for cross-references.
 import {
   type APIConnectOptions,
   type AudioBuffer,
@@ -34,15 +31,22 @@ interface StreamEventMessage {
   transcript?: string;
   utterance?: string;
   end_of_turn?: boolean;
+  end_of_turn_confidence?: number;
   turn_is_formatted?: boolean;
   language_code?: string;
-  words?: Array<{ text?: string; start?: number; end?: number; confidence?: number }>;
+  speaker_label?: string;
+  words?: Array<{
+    text?: string;
+    start?: number;
+    end?: number;
+    confidence?: number;
+    speaker?: string;
+  }>;
   // Termination
   audio_duration_seconds?: number;
   session_duration_seconds?: number;
 }
 
-// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 46-66 lines
 export interface STTOptions {
   apiKey?: string;
   sampleRate: number;
@@ -79,7 +83,6 @@ export interface STTOptions {
   baseUrl: string;
 }
 
-// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 72-97 lines
 const defaultSTTOptions: STTOptions = {
   apiKey: process.env.ASSEMBLYAI_API_KEY,
   sampleRate: 16000,
@@ -89,9 +92,9 @@ const defaultSTTOptions: STTOptions = {
   baseUrl: 'wss://streaming.assemblyai.com',
 };
 
-// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 72 lines
 export class STT extends stt.STT {
   #opts: STTOptions;
+  #streams = new Set<WeakRef<SpeechStream>>();
   label = 'assemblyai.STT';
 
   get model(): string {
@@ -103,33 +106,28 @@ export class STT extends stt.STT {
   }
 
   constructor(opts: Partial<STTOptions> = {}) {
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 111-119 lines
     super({
       streaming: true,
       interimResults: true,
       alignedTranscript: 'word',
     });
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 120-122 lines
     if (opts.speechModel === 'u3-pro') {
       log().warn("'u3-pro' is deprecated, use 'u3-rt-pro' instead.");
       opts.speechModel = 'u3-rt-pro';
     }
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 124-125 lines
     if (opts.prompt !== undefined && opts.speechModel !== 'u3-rt-pro') {
       throw new Error("The 'prompt' parameter is only supported with the 'u3-rt-pro' model.");
     }
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 127-135 lines
     const apiKey = opts.apiKey ?? defaultSTTOptions.apiKey;
     if (!apiKey) {
       throw new Error(
         'AssemblyAI API key is required. Pass one in via the `apiKey` parameter, or set it as the `ASSEMBLYAI_API_KEY` environment variable',
       );
     }
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 147-149 lines
     // Minimize latency; matches LK's end-of-turn detector well.
     const minTurnSilence = opts.minTurnSilence ?? 100;
 
@@ -143,24 +141,29 @@ export class STT extends stt.STT {
 
   // eslint-disable-next-line @typescript-eslint/no-unused-vars
   async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 185-192 lines
     throw new Error('Non-streaming recognize is not supported on AssemblyAI STT');
   }
 
-  // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 212-257 lines
   updateOptions(opts: Partial<STTOptions>) {
     this.#opts = { ...this.#opts, ...opts };
+    for (const ref of this.#streams) {
+      const stream = ref.deref();
+      if (stream) {
+        stream.updateOptions(opts);
+      } else {
+        this.#streams.delete(ref);
+      }
+    }
   }
 
-  // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 194-210 lines
   stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {
-    return new SpeechStream(this, this.#opts, options?.connOptions);
+    const stream = new SpeechStream(this, this.#opts, options?.connOptions);
+    this.#streams.add(new WeakRef(stream));
+    return stream;
   }
 }
 
-// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 260 lines
 export class SpeechStream extends stt.SpeechStream {
-  // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 262 lines
   static readonly CLOSE_MSG = JSON.stringify({ type: 'Terminate' });
 
   #opts: STTOptions;
@@ -184,7 +187,6 @@ export class SpeechStream extends stt.SpeechStream {
    * (before any speech events). Null until the connection completes.
    * Share this with the AssemblyAI team when reporting issues.
    */
-  // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 286-291 lines
   get sessionId(): string | null {
     return this.#sessionId;
   }
@@ -193,12 +195,10 @@ export class SpeechStream extends stt.SpeechStream {
    * Unix timestamp when the AssemblyAI session expires. Set alongside
    * {@link sessionId} when the WebSocket connection is established.
    */
-  // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 293-297 lines
   get expiresAt(): number | null {
     return this.#expiresAt;
   }
 
-  // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 299-351 lines
   updateOptions(opts: Partial<STTOptions>) {
     this.#opts = { ...this.#opts, ...opts };
 
@@ -222,7 +222,6 @@ export class SpeechStream extends stt.SpeechStream {
   /**
    * Force-finalize the current turn immediately.
    */
-  // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 353-355 lines
   forceEndpoint() {
     this.#pendingConfigMessages.push({ type: 'ForceEndpoint' });
     if (!this.#configMessagePending.done) this.#configMessagePending.resolve();
@@ -262,9 +261,7 @@ export class SpeechStream extends stt.SpeechStream {
     this.closed = true;
   }
 
-  // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 442-505 lines
   async #connectWS(): Promise<WebSocket> {
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 443-461 lines
     // u3-rt-pro has different silence defaults — if unset, both min and max default to 100ms.
     let minSilence = this.#opts.minTurnSilence;
     let maxSilence = this.#opts.maxTurnSilence;
@@ -273,13 +270,11 @@ export class SpeechStream extends stt.SpeechStream {
       if (maxSilence === undefined) maxSilence = minSilence;
     }
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 476-480 lines
     // Default language_detection to true for multilingual / u3-rt-pro models, false otherwise.
     const defaultLanguageDetection =
       this.#opts.speechModel.includes('multilingual') || this.#opts.speechModel === 'u3-rt-pro';
     const languageDetection = this.#opts.languageDetection ?? defaultLanguageDetection;
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 463-502 lines
     const liveConfig: Record<string, unknown> = {
       sample_rate: this.#opts.sampleRate,
       encoding: this.#opts.encoding,
@@ -301,7 +296,6 @@ export class SpeechStream extends stt.SpeechStream {
     };
 
     const url = new URL(`${this.#opts.baseUrl}/v3/ws`);
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 498-502 lines
     // Python serializes booleans as the strings "true"/"false", so we mirror that.
     for (const [key, value] of Object.entries(liveConfig)) {
       if (value === undefined || value === null) continue;
@@ -312,7 +306,6 @@ export class SpeechStream extends stt.SpeechStream {
       }
     }
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 492-496 lines
     const ws = new WebSocket(url, {
       headers: {
         Authorization: this.#opts.apiKey!,
@@ -330,7 +323,6 @@ export class SpeechStream extends stt.SpeechStream {
     return ws;
   }
 
-  // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 357-440 lines
   async #runWS(ws: WebSocket) {
     let closing = false;
     const sessionController = new AbortController();
@@ -349,7 +341,6 @@ export class SpeechStream extends stt.SpeechStream {
       await Promise.race([closed, waitForAbort(controller.signal)]);
     });
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 361-385 lines
     const sendTask = async () => {
       const samplesPerBuffer = Math.floor((this.#opts.sampleRate * this.#opts.bufferSizeMs) / 1000);
       const audioStream = new AudioByteStream(this.#opts.sampleRate, 1, samplesPerBuffer);
@@ -393,7 +384,6 @@ export class SpeechStream extends stt.SpeechStream {
       }
     };
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 387-418 lines
     let messageHandler: ((msg: RawData, isBinary: boolean) => void) | null = null;
     const listenTask = Task.from(async (controller) => {
       const listenMessage = new Promise<void>((resolve, reject) => {
@@ -419,7 +409,6 @@ export class SpeechStream extends stt.SpeechStream {
       await Promise.race([listenMessage, waitForAbort(controller.signal)]);
     });
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 420-424 lines
     const configTask = Task.from(async (controller) => {
       // Drain any messages queued while the socket was reconnecting.
       while (this.#pendingConfigMessages.length > 0) {
@@ -460,11 +449,9 @@ export class SpeechStream extends stt.SpeechStream {
     return words.reduce((sum, w) => sum + (w.confidence ?? 0), 0) / words.length;
   }
 
-  // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 507-661 lines
   #processStreamEvent(data: StreamEventMessage) {
     const messageType = data.type;
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 510-518 lines
     if (messageType === 'Begin') {
       this.#sessionId = data.id ?? null;
       this.#expiresAt = data.expires_at ?? null;
@@ -474,13 +461,11 @@ export class SpeechStream extends stt.SpeechStream {
       return;
     }
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 520-522 lines
     if (messageType === 'SpeechStarted') {
       this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
       return;
     }
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 524-532 lines
     if (messageType === 'Termination') {
       this.#logger.debug(
         `AssemblyAI session terminated audio_duration=${data.audio_duration_seconds}s session_duration=${data.session_duration_seconds}s`,
@@ -492,15 +477,13 @@ export class SpeechStream extends stt.SpeechStream {
       return;
     }
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 536-546 lines
     const words = data.words ?? [];
     const endOfTurn = Boolean(data.end_of_turn);
     const turnIsFormatted = Boolean(data.turn_is_formatted);
     const utterance = data.utterance ?? '';
     const transcript = data.transcript ?? '';
     const language = normalizeLanguage(data.language_code ?? 'en');
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 555-564 lines
     // Word timestamps are in milliseconds:
     // https://www.assemblyai.com/docs/api-reference/streaming-api/streaming-api#receive.receiveTurn.words
     const timedWords = words.map((word) =>
@@ -517,7 +500,6 @@ export class SpeechStream extends stt.SpeechStream {
     let endTime = 0;
     let confidence = 0;
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 566-588 lines
     // `words` are cumulative for the turn — emit as an interim transcript.
     if (timedWords.length > 0) {
       const interimText = timedWords.map((w) => w.text).join(' ');
@@ -540,7 +522,6 @@ export class SpeechStream extends stt.SpeechStream {
       });
     }
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 590-621 lines
     // `utterance` is chunk-based (not cumulative) — emit as a preflight transcript
     // covering only the words since the last preflight.
     if (utterance) {
@@ -569,7 +550,6 @@ export class SpeechStream extends stt.SpeechStream {
       this.#lastPreflightStartTime = endTime;
     }
 
-    // Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 623-661 lines
     // End-of-turn: emit FINAL_TRANSCRIPT + END_OF_SPEECH.
     // If the user asked for formatted turns, wait for a formatted final.
     const waitingForFormatted = this.#opts.formatTurns === true && !turnIsFormatted;
@@ -589,7 +569,6 @@ export class SpeechStream extends stt.SpeechStream {
       });
 
       this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });
-
       if (this.#speechDurationInS > 0) {
         this.queue.put({
           type: stt.SpeechEventType.RECOGNITION_USAGE,

diff --git a/plugins/mistral/README.md b/plugins/mistral/README.md
@@ -3,6 +3,7 @@ SPDX-FileCopyrightText: 2026 LiveKit, Inc.
 
 SPDX-License-Identifier: Apache-2.0
 -->
+
 # Mistral AI plugin for LiveKit Agents
 
 The Agents Framework is designed for building realtime, programmable

diff --git a/plugins/mistral/api-extractor.json b/plugins/mistral/api-extractor.json
@@ -1,3 +1,4 @@
+
 {
   "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
   "extends": "../../api-extractor-shared.json",

diff --git a/plugins/mistral/package.json b/plugins/mistral/package.json
@@ -30,12 +30,14 @@
     "clean": "rm -rf dist",
     "clean:build": "pnpm clean && pnpm build",
     "lint": "eslint -f unix \"src/**/*.{ts,js}\"",
-    "test": "vitest"
+    "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
+    "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
   },
   "devDependencies": {
     "@livekit/agents": "workspace:*",
     "@livekit/agents-plugins-test": "workspace:*",
     "@livekit/rtc-node": "catalog:",
+    "@microsoft/api-extractor": "^7.35.0",
     "tsup": "^8.3.5",
     "typescript": "^5.0.0",
     "vitest": "^4.0.17"
@@ -47,4 +49,4 @@
     "@livekit/agents": "workspace:*",
     "@livekit/rtc-node": "catalog:"
   }
-}
+}
diff --git a/plugins/mistral/src/llm.test.ts b/plugins/mistral/src/llm.test.ts
@@ -1,18 +1,14 @@
 // SPDX-FileCopyrightText: 2026 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
-import { llm } from '@livekit/agents-plugins-test';
-import { describe, it } from 'vitest';
+import { llm as llmTest } from '@livekit/agents-plugins-test';
+import { describe } from 'vitest';
 import { LLM } from './llm.js';
 
 const hasMistralApiKey = Boolean(process.env.MISTRAL_API_KEY);
 
 if (hasMistralApiKey) {
-  describe('Mistral', async () => {
-    await llm(new LLM({ temperature: 0 }), true);
-  });
-} else {
-  describe('Mistral', () => {
-    it.skip('requires MISTRAL_API_KEY', () => {});
+  describe('Mistral integration', async () => {
+    await llmTest(new LLM({ temperature: 0 }), true);
   });
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,7 @@ SPDX-FileCopyrightText: 2026 LiveKit, Inc. @@
     SPDX-License-Identifier: Apache-2.0
     -->
     # Mistral AI plugin for LiveKit Agents
     The Agents Framework is designed for building realtime, programmable
@@ Expand Down @@