diff --git a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx index 676610c7aa92..9f234861cd4d 100644 --- a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx @@ -9,7 +9,7 @@ import { dim, fg, } from "@opentui/core" -import { createEffect, createMemo, Match, Switch, type JSX, onMount, batch } from "solid-js" +import { createEffect, createMemo, Match, Switch, Show, type JSX, onMount, batch, createSignal } from "solid-js" import { useLocal } from "@tui/context/local" import { useTheme } from "@tui/context/theme" import { SplitBorder } from "@tui/component/border" @@ -28,6 +28,9 @@ import { useExit } from "../../context/exit" import { Clipboard } from "../../util/clipboard" import type { FilePart } from "@opencode-ai/sdk" import { TuiEvent } from "../../event" +import { Audio } from "@/util/audio" +import { Whisper } from "@/util/whisper" +import { useToast } from "../../ui/toast" export type PromptProps = { sessionID?: string @@ -61,6 +64,20 @@ export function Prompt(props: PromptProps) { const command = useCommandDialog() const renderer = useRenderer() const { theme, syntax } = useTheme() + const toast = useToast() + + const [recording, setRecording] = createSignal(null) + const [recordingAvailable, setRecordingAvailable] = createSignal(false) + const [whisperConfigured, setWhisperConfigured] = createSignal(false) + + onMount(async () => { + const available = await Audio.checkRecordingAvailable() + console.log("Recording available:", available) + setRecordingAvailable(available) + + // Whisper uses OpenAI provider key - always available if OpenAI is configured + setWhisperConfigured(true) + }) const textareaKeybindings = createMemo(() => { const newlineBindings = keybind.all.input_newline || [] @@ -156,6 +173,16 @@ export function Prompt(props: PromptProps) { } }, }, + { + title: recording() ? "Stop notation" : "Start notation", + value: "prompt.voice", + disabled: !recordingAvailable(), + category: "Prompt", + onSelect: (dialog) => { + toggleRecording() + dialog.clear() + }, + }, ] }) @@ -456,6 +483,70 @@ export function Prompt(props: PromptProps) { return } + async function toggleRecording() { + const currentRecording = recording() + + if (currentRecording) { + try { + toast.show({ + message: "Processing audio...", + variant: "info", + }) + + const audioBlob = await currentRecording.stop() + setRecording(null) + + console.log("Audio blob:", { size: audioBlob.size, type: audioBlob.type }) + + const text = await Whisper.transcribe(audioBlob) + + console.log("Transcription result:", text) + + if (text && text.trim()) { + input.insertText(text + " ") + + toast.show({ + message: "Transcription complete", + variant: "success", + }) + } else { + toast.show({ + message: "No text detected in audio", + variant: "warning", + }) + } + } catch (error) { + setRecording(null) + + if (error instanceof Whisper.ConfigError) { + toast.show({ + message: "OpenAI not configured. Run: opencode auth login", + variant: "error", + }) + } else { + toast.show({ + message: `Recording failed: ${error instanceof Error ? error.message : String(error)}`, + variant: "error", + }) + } + } + } else { + try { + const session = await Audio.startRecording() + setRecording(session) + toast.show({ + message: "Recording... Click mic to stop", + variant: "info", + }) + } catch (error) { + toast.show({ + message: `Failed to start recording: ${error instanceof Error ? error.message : String(error)}`, + variant: "error", + }) + } + } + } + return ( <> + > + + + + { + e.preventDefault() + toggleRecording() + }} + > + + Stop + + + + + { + e.preventDefault() + toggleRecording() + }} + > + + Notate + + + + + + diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index de69dc0f2e43..a2e37382a46a 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -597,6 +597,13 @@ export namespace Config { ) .optional() .describe("Custom provider configurations and model overrides"), + whisper: z + .object({ + model: z.string().optional().default("whisper-1").describe("Whisper model to use (default: whisper-1)"), + baseURL: z.string().optional().default("https://api.openai.com/v1").describe("OpenAI API base URL (default: https://api.openai.com/v1)"), + }) + .optional() + .describe("OpenAI Whisper configuration for voice input (uses OpenAI provider API key from 'opencode auth login')"), mcp: z .record(z.string(), Mcp) .optional() diff --git a/packages/opencode/src/util/audio.ts b/packages/opencode/src/util/audio.ts new file mode 100644 index 000000000000..15c34b1265cc --- /dev/null +++ b/packages/opencode/src/util/audio.ts @@ -0,0 +1,117 @@ +import { Log } from "./log" +import { $ } from "bun" +import fs from "fs/promises" +import path from "path" +import os from "os" + +export namespace Audio { + const log = Log.create({ service: "audio" }) + + export interface RecordingSession { + stop: () => Promise + isRecording: boolean + } + + export async function startRecording(): Promise { + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "opencode-audio-")) + const audioFile = path.join(tempDir, "recording.wav") + + let isRecording = true + let proc: any + + // Try to use available recording tools based on platform + const platform = process.platform + + log.info("starting audio recording", { platform, audioFile }) + + if (platform === "darwin") { + // macOS - use sox with default audio device, let it run until killed + proc = Bun.spawn(['sox', '-d', '-t', 'wav', audioFile], { + stdout: 'ignore', + stderr: 'ignore', + }) + } else if (platform === "linux") { + // Linux - use arecord (ALSA) + proc = Bun.spawn(['arecord', '-f', 'cd', '-c', '1', '-r', '16000', audioFile], { + stdout: 'ignore', + stderr: 'ignore', + }) + } else { + throw new Error("Audio recording not supported on this platform") + } + + // Give sox time to start and create file + await new Promise(resolve => setTimeout(resolve, 1000)) + + log.info("recording started", { pid: proc.pid }) + + return { + async stop() { + isRecording = false + log.info("stopping audio recording", { audioFile, pid: proc.pid }) + + try { + proc.kill() + await proc.exited + log.info("recording process exited") + } catch (e) { + log.error("error killing recording process", { error: e }) + } + + // Wait for file to be fully written and flushed to disk + await new Promise(resolve => setTimeout(resolve, 2000)) + + const file = Bun.file(audioFile) + + // Check if file exists and has content + const exists = await file.exists() + if (!exists) { + log.error("audio file does not exist", { audioFile }) + throw new Error("Recording file not found") + } + + const size = await file.size + log.info("audio file info", { audioFile, size, exists }) + + if (size === 0) { + log.error("audio file is empty", { audioFile }) + throw new Error("Recording file is empty") + } + + const arrayBuffer = await file.arrayBuffer() + const blob = new Blob([arrayBuffer], { type: "audio/wav" }) + + log.info("created blob", { blobSize: blob.size, blobType: blob.type }) + + // Cleanup + await fs.rm(tempDir, { recursive: true, force: true }).catch(() => {}) + + return blob + }, + get isRecording() { + return isRecording + } + } + } + + export async function checkRecordingAvailable(): Promise { + const platform = process.platform + + try { + if (platform === "darwin") { + // Check for sox + const result = await $`which sox`.nothrow().quiet() + return result.exitCode === 0 + } else if (platform === "linux") { + // Check for arecord + const result = await $`which arecord`.nothrow().quiet() + return result.exitCode === 0 + } + } catch (error) { + log.error("error checking recording availability", { error }) + } + + return false + } +} + diff --git a/packages/opencode/src/util/whisper.ts b/packages/opencode/src/util/whisper.ts new file mode 100644 index 000000000000..55a569a50217 --- /dev/null +++ b/packages/opencode/src/util/whisper.ts @@ -0,0 +1,62 @@ +import { Config } from "../config/config" +import { Auth } from "../auth" +import { Log } from "./log" +import { NamedError } from "./error" + +export namespace Whisper { + const log = Log.create({ service: "whisper" }) + + export class ConfigError extends NamedError { + constructor() { + super("WhisperConfigError", "OpenAI API key not configured. Run: opencode auth login") + } + } + + export async function transcribe(audioBlob: Blob): Promise { + // Get OpenAI API key from auth (same as provider) + const auth = await Auth.get("openai") + const config = await Config.get() + + // Use auth key first, fall back to config.whisper.apiKey + const apiKey = auth?.type === "api" ? auth.key : config.whisper?.apiKey + + if (!apiKey) { + log.error("no OpenAI API key found in auth or config") + throw new ConfigError() + } + + const baseURL = config.whisper?.baseURL ?? "https://api.openai.com/v1" + const model = config.whisper?.model ?? "whisper-1" + + const formData = new FormData() + formData.append("file", audioBlob, "recording.wav") + formData.append("model", model) + + log.info("transcribing audio", { model, size: audioBlob.size, type: audioBlob.type }) + + const response = await fetch(`${baseURL}/audio/transcriptions`, { + method: "POST", + headers: { + "Authorization": `Bearer ${apiKey}`, + }, + body: formData, + }) + + if (!response.ok) { + const error = await response.text() + log.error("transcription failed", { status: response.status, error }) + throw new Error(`Whisper API error (${response.status}): ${error}`) + } + + const result = await response.json() + log.info("transcription complete", { text: result.text }) + + if (!result.text) { + log.error("no text in response", { result }) + throw new Error("No transcription text returned") + } + + return result.text + } +} +