Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/short-badgers-turn.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@livekit/agents-plugin-assemblyai": patch
"@livekit/agents-plugin-mistral": patch
---

fix(mistral): align plugin packaging with conventions and add unit tests
1 change: 1 addition & 0 deletions examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"@livekit/agents-plugin-resemble": "workspace:*",
"@livekit/agents-plugin-silero": "workspace:*",
"@livekit/agents-plugin-trugen": "workspace:*",
"@livekit/agents-plugin-mistral": "workspace:*",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Mistral dependency added out of alphabetical order in examples/package.json

@livekit/agents-plugin-mistral is inserted at line 44 after @livekit/agents-plugin-trugen, breaking the alphabetical ordering convention that all other @livekit/agents-plugin-* dependencies follow. Alphabetically, mistral should be placed between @livekit/agents-plugin-livekit (line 37) and @livekit/agents-plugin-neuphonic (line 38).

Prompt for agents
Move the line `"@livekit/agents-plugin-mistral": "workspace:*",` from its current position (after trugen, before xai) to its correct alphabetical position between `@livekit/agents-plugin-livekit` and `@livekit/agents-plugin-neuphonic` in the dependencies block of examples/package.json.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

"@livekit/agents-plugin-xai": "workspace:*",
"@livekit/noise-cancellation-node": "^0.1.9",
"@livekit/rtc-node": "catalog:",
Expand Down
2 changes: 0 additions & 2 deletions plugins/assemblyai/src/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
//
// SPDX-License-Identifier: Apache-2.0

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 48-54 lines
export type STTModels =
| 'universal-streaming-english'
| 'universal-streaming-multilingual'
Expand All @@ -12,5 +11,4 @@ export type STTModels =
// break if they already pass it.
| 'u3-pro';

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 47 lines
export type STTEncoding = 'pcm_s16le' | 'pcm_mulaw';
63 changes: 21 additions & 42 deletions plugins/assemblyai/src/stt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
//
// SPDX-License-Identifier: Apache-2.0
//
// Ported from:
// livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py
// See `// Ref: python ... - N-M lines` comments throughout for cross-references.
import {
type APIConnectOptions,
type AudioBuffer,
Expand Down Expand Up @@ -34,15 +31,22 @@ interface StreamEventMessage {
transcript?: string;
utterance?: string;
end_of_turn?: boolean;
end_of_turn_confidence?: number;
turn_is_formatted?: boolean;
language_code?: string;
words?: Array<{ text?: string; start?: number; end?: number; confidence?: number }>;
speaker_label?: string;
words?: Array<{
text?: string;
start?: number;
end?: number;
confidence?: number;
speaker?: string;
}>;
// Termination
audio_duration_seconds?: number;
session_duration_seconds?: number;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 46-66 lines
export interface STTOptions {
apiKey?: string;
sampleRate: number;
Expand Down Expand Up @@ -79,7 +83,6 @@ export interface STTOptions {
baseUrl: string;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 72-97 lines
const defaultSTTOptions: STTOptions = {
apiKey: process.env.ASSEMBLYAI_API_KEY,
sampleRate: 16000,
Expand All @@ -89,9 +92,9 @@ const defaultSTTOptions: STTOptions = {
baseUrl: 'wss://streaming.assemblyai.com',
};

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 72 lines
export class STT extends stt.STT {
#opts: STTOptions;
#streams = new Set<WeakRef<SpeechStream>>();
label = 'assemblyai.STT';

get model(): string {
Expand All @@ -103,33 +106,28 @@ export class STT extends stt.STT {
}

constructor(opts: Partial<STTOptions> = {}) {
// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 111-119 lines
super({
streaming: true,
interimResults: true,
alignedTranscript: 'word',
});

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 120-122 lines
if (opts.speechModel === 'u3-pro') {
log().warn("'u3-pro' is deprecated, use 'u3-rt-pro' instead.");
opts.speechModel = 'u3-rt-pro';
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 124-125 lines
if (opts.prompt !== undefined && opts.speechModel !== 'u3-rt-pro') {
throw new Error("The 'prompt' parameter is only supported with the 'u3-rt-pro' model.");
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 127-135 lines
const apiKey = opts.apiKey ?? defaultSTTOptions.apiKey;
if (!apiKey) {
throw new Error(
'AssemblyAI API key is required. Pass one in via the `apiKey` parameter, or set it as the `ASSEMBLYAI_API_KEY` environment variable',
);
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 147-149 lines
// Minimize latency; matches LK's end-of-turn detector well.
const minTurnSilence = opts.minTurnSilence ?? 100;

Expand All @@ -143,24 +141,29 @@ export class STT extends stt.STT {

// eslint-disable-next-line @typescript-eslint/no-unused-vars
async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {
// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 185-192 lines
throw new Error('Non-streaming recognize is not supported on AssemblyAI STT');
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 212-257 lines
updateOptions(opts: Partial<STTOptions>) {
this.#opts = { ...this.#opts, ...opts };
for (const ref of this.#streams) {
const stream = ref.deref();
if (stream) {
stream.updateOptions(opts);
} else {
this.#streams.delete(ref);
}
}
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 194-210 lines
stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {
return new SpeechStream(this, this.#opts, options?.connOptions);
const stream = new SpeechStream(this, this.#opts, options?.connOptions);
this.#streams.add(new WeakRef(stream));
return stream;
}
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 260 lines
export class SpeechStream extends stt.SpeechStream {
// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 262 lines
static readonly CLOSE_MSG = JSON.stringify({ type: 'Terminate' });

#opts: STTOptions;
Expand All @@ -184,7 +187,6 @@ export class SpeechStream extends stt.SpeechStream {
* (before any speech events). Null until the connection completes.
* Share this with the AssemblyAI team when reporting issues.
*/
// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 286-291 lines
get sessionId(): string | null {
return this.#sessionId;
}
Expand All @@ -193,12 +195,10 @@ export class SpeechStream extends stt.SpeechStream {
* Unix timestamp when the AssemblyAI session expires. Set alongside
* {@link sessionId} when the WebSocket connection is established.
*/
// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 293-297 lines
get expiresAt(): number | null {
return this.#expiresAt;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 299-351 lines
updateOptions(opts: Partial<STTOptions>) {
this.#opts = { ...this.#opts, ...opts };

Expand All @@ -222,7 +222,6 @@ export class SpeechStream extends stt.SpeechStream {
/**
* Force-finalize the current turn immediately.
*/
// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 353-355 lines
forceEndpoint() {
this.#pendingConfigMessages.push({ type: 'ForceEndpoint' });
if (!this.#configMessagePending.done) this.#configMessagePending.resolve();
Expand Down Expand Up @@ -262,9 +261,7 @@ export class SpeechStream extends stt.SpeechStream {
this.closed = true;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 442-505 lines
async #connectWS(): Promise<WebSocket> {
// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 443-461 lines
// u3-rt-pro has different silence defaults — if unset, both min and max default to 100ms.
let minSilence = this.#opts.minTurnSilence;
let maxSilence = this.#opts.maxTurnSilence;
Expand All @@ -273,13 +270,11 @@ export class SpeechStream extends stt.SpeechStream {
if (maxSilence === undefined) maxSilence = minSilence;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 476-480 lines
// Default language_detection to true for multilingual / u3-rt-pro models, false otherwise.
const defaultLanguageDetection =
this.#opts.speechModel.includes('multilingual') || this.#opts.speechModel === 'u3-rt-pro';
const languageDetection = this.#opts.languageDetection ?? defaultLanguageDetection;

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 463-502 lines
const liveConfig: Record<string, unknown> = {
sample_rate: this.#opts.sampleRate,
encoding: this.#opts.encoding,
Expand All @@ -301,7 +296,6 @@ export class SpeechStream extends stt.SpeechStream {
};

const url = new URL(`${this.#opts.baseUrl}/v3/ws`);
// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 498-502 lines
// Python serializes booleans as the strings "true"/"false", so we mirror that.
for (const [key, value] of Object.entries(liveConfig)) {
if (value === undefined || value === null) continue;
Expand All @@ -312,7 +306,6 @@ export class SpeechStream extends stt.SpeechStream {
}
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 492-496 lines
const ws = new WebSocket(url, {
headers: {
Authorization: this.#opts.apiKey!,
Expand All @@ -330,7 +323,6 @@ export class SpeechStream extends stt.SpeechStream {
return ws;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 357-440 lines
async #runWS(ws: WebSocket) {
let closing = false;
const sessionController = new AbortController();
Expand All @@ -349,7 +341,6 @@ export class SpeechStream extends stt.SpeechStream {
await Promise.race([closed, waitForAbort(controller.signal)]);
});

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 361-385 lines
const sendTask = async () => {
const samplesPerBuffer = Math.floor((this.#opts.sampleRate * this.#opts.bufferSizeMs) / 1000);
const audioStream = new AudioByteStream(this.#opts.sampleRate, 1, samplesPerBuffer);
Expand Down Expand Up @@ -393,7 +384,6 @@ export class SpeechStream extends stt.SpeechStream {
}
};

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 387-418 lines
let messageHandler: ((msg: RawData, isBinary: boolean) => void) | null = null;
const listenTask = Task.from(async (controller) => {
const listenMessage = new Promise<void>((resolve, reject) => {
Expand All @@ -419,7 +409,6 @@ export class SpeechStream extends stt.SpeechStream {
await Promise.race([listenMessage, waitForAbort(controller.signal)]);
});

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 420-424 lines
const configTask = Task.from(async (controller) => {
// Drain any messages queued while the socket was reconnecting.
while (this.#pendingConfigMessages.length > 0) {
Expand Down Expand Up @@ -460,11 +449,9 @@ export class SpeechStream extends stt.SpeechStream {
return words.reduce((sum, w) => sum + (w.confidence ?? 0), 0) / words.length;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 507-661 lines
#processStreamEvent(data: StreamEventMessage) {
const messageType = data.type;

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 510-518 lines
if (messageType === 'Begin') {
this.#sessionId = data.id ?? null;
this.#expiresAt = data.expires_at ?? null;
Expand All @@ -474,13 +461,11 @@ export class SpeechStream extends stt.SpeechStream {
return;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 520-522 lines
if (messageType === 'SpeechStarted') {
this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
return;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 524-532 lines
if (messageType === 'Termination') {
this.#logger.debug(
`AssemblyAI session terminated audio_duration=${data.audio_duration_seconds}s session_duration=${data.session_duration_seconds}s`,
Expand All @@ -492,15 +477,13 @@ export class SpeechStream extends stt.SpeechStream {
return;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 536-546 lines
const words = data.words ?? [];
const endOfTurn = Boolean(data.end_of_turn);
const turnIsFormatted = Boolean(data.turn_is_formatted);
const utterance = data.utterance ?? '';
const transcript = data.transcript ?? '';
const language = normalizeLanguage(data.language_code ?? 'en');

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 555-564 lines
// Word timestamps are in milliseconds:
// https://www.assemblyai.com/docs/api-reference/streaming-api/streaming-api#receive.receiveTurn.words
const timedWords = words.map((word) =>
Expand All @@ -517,7 +500,6 @@ export class SpeechStream extends stt.SpeechStream {
let endTime = 0;
let confidence = 0;

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 566-588 lines
// `words` are cumulative for the turn — emit as an interim transcript.
if (timedWords.length > 0) {
const interimText = timedWords.map((w) => w.text).join(' ');
Expand All @@ -540,7 +522,6 @@ export class SpeechStream extends stt.SpeechStream {
});
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 590-621 lines
// `utterance` is chunk-based (not cumulative) — emit as a preflight transcript
// covering only the words since the last preflight.
if (utterance) {
Expand Down Expand Up @@ -569,7 +550,6 @@ export class SpeechStream extends stt.SpeechStream {
this.#lastPreflightStartTime = endTime;
}

// Ref: python livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py - 623-661 lines
// End-of-turn: emit FINAL_TRANSCRIPT + END_OF_SPEECH.
// If the user asked for formatted turns, wait for a formatted final.
const waitingForFormatted = this.#opts.formatTurns === true && !turnIsFormatted;
Expand All @@ -589,7 +569,6 @@ export class SpeechStream extends stt.SpeechStream {
});

this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });

if (this.#speechDurationInS > 0) {
this.queue.put({
type: stt.SpeechEventType.RECOGNITION_USAGE,
Expand Down
1 change: 1 addition & 0 deletions plugins/mistral/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ SPDX-FileCopyrightText: 2026 LiveKit, Inc.

SPDX-License-Identifier: Apache-2.0
-->

# Mistral AI plugin for LiveKit Agents

The Agents Framework is designed for building realtime, programmable
Expand Down
1 change: 1 addition & 0 deletions plugins/mistral/api-extractor.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

{
"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
"extends": "../../api-extractor-shared.json",
Expand Down
6 changes: 4 additions & 2 deletions plugins/mistral/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@
"clean": "rm -rf dist",
"clean:build": "pnpm clean && pnpm build",
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
"test": "vitest"
"api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
"api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
},
"devDependencies": {
"@livekit/agents": "workspace:*",
"@livekit/agents-plugins-test": "workspace:*",
"@livekit/rtc-node": "catalog:",
"@microsoft/api-extractor": "^7.35.0",
"tsup": "^8.3.5",
"typescript": "^5.0.0",
"vitest": "^4.0.17"
Expand All @@ -47,4 +49,4 @@
"@livekit/agents": "workspace:*",
"@livekit/rtc-node": "catalog:"
}
}
}
12 changes: 4 additions & 8 deletions plugins/mistral/src/llm.test.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import { llm } from '@livekit/agents-plugins-test';
import { describe, it } from 'vitest';
import { llm as llmTest } from '@livekit/agents-plugins-test';
import { describe } from 'vitest';
import { LLM } from './llm.js';

const hasMistralApiKey = Boolean(process.env.MISTRAL_API_KEY);

if (hasMistralApiKey) {
describe('Mistral', async () => {
await llm(new LLM({ temperature: 0 }), true);
});
} else {
describe('Mistral', () => {
it.skip('requires MISTRAL_API_KEY', () => {});
describe('Mistral integration', async () => {
await llmTest(new LLM({ temperature: 0 }), true);
});
}
Loading
Loading