From e5d50ca98d082380bfebffa0029596c4fe805f4b Mon Sep 17 00:00:00 2001 From: Den <2119348+dzianisv@users.noreply.github.com> Date: Sat, 24 Jan 2026 09:36:51 -0800 Subject: [PATCH 1/3] feat: Add Telegram two-way communication with voice message support Implements full Telegram integration for OpenCode notifications: Outbound notifications: - Task completion notifications via Telegram (text + TTS audio) - Session context tracking for reply routing Inbound replies: - Text message replies forwarded to OpenCode sessions - Voice/video message support with local Whisper STT transcription - Unified architecture: voice messages use telegram_replies table Key components: - telegram-webhook Edge Function: handles /start, /stop, /status, replies - send-notify Edge Function: sends notifications with session context - Whisper server (localhost:8787): local speech-to-text transcription - Supabase Realtime: WebSocket subscription for incoming messages Database schema: - telegram_subscribers: user subscriptions - telegram_reply_contexts: active session routing (24h TTL) - telegram_replies: incoming messages (text + voice with audio_base64) Tests: 168 passing --- README.md | 192 +++ docs/telegram.design.md | 434 +++++++ docs/telegram.md | 288 +++++ package-lock.json | 136 ++- package.json | 3 + supabase/.gitignore | 8 + supabase/config.toml | 382 ++++++ supabase/functions/send-notify/index.ts | 318 +++++ supabase/functions/telegram-webhook/index.ts | 487 ++++++++ .../20240113000000_create_subscribers.sql | 56 + .../20240114000000_add_telegram_replies.sql | 162 +++ .../20240116000000_add_voice_to_replies.sql | 41 + test/tts.test.ts | 725 ++++++++++++ tts.ts | 1036 ++++++++++++++++- whisper/whisper_server.py | 275 +++++ 15 files changed, 4508 insertions(+), 35 deletions(-) create mode 100644 docs/telegram.design.md create mode 100644 docs/telegram.md create mode 100644 supabase/.gitignore create mode 100644 supabase/config.toml create mode 100644 supabase/functions/send-notify/index.ts create mode 100644 supabase/functions/telegram-webhook/index.ts create mode 100644 supabase/migrations/20240113000000_create_subscribers.sql create mode 100644 supabase/migrations/20240114000000_add_telegram_replies.sql create mode 100644 supabase/migrations/20240116000000_add_voice_to_replies.sql create mode 100644 whisper/whisper_server.py diff --git a/README.md b/README.md index fe4bb1e..084dd84 100644 --- a/README.md +++ b/README.md @@ -265,6 +265,198 @@ Create/edit `~/.config/opencode/tts.json`: /tts off Disable TTS ``` +### Telegram Notifications + +Get notified on Telegram when OpenCode tasks complete - includes text summaries and optional voice messages. + +#### Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ SUPABASE (Backend) │ +│ - PostgreSQL: telegram_subscribers table (uuid → chat_id) │ +│ - Edge Function: /telegram-webhook (handles /start, /stop) │ +│ - Edge Function: /send-notify (receives notifications) │ +└─────────────────────────────────────────────────────────────────┘ + ↑ + │ HTTPS POST + │ +┌─────────────────────────────────────────────────────────────────┐ +│ OpenCode TTS Plugin (tts.ts) │ +│ - On task complete: generates TTS audio locally │ +│ - Converts WAV → OGG (ffmpeg) │ +│ - Sends text + voice_base64 to Supabase Edge Function │ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Design principles:** +- **Privacy-first**: Your UUID is never linked to your identity - only to your Telegram chat ID +- **Serverless**: Supabase Edge Functions scale automatically, no server to maintain +- **Self-hostable**: All backend code is in `supabase/` directory - deploy to your own Supabase project + +#### Quick Setup (Using Existing Backend) + +1. **Generate your UUID:** + ```bash + uuidgen | tr '[:upper:]' '[:lower:]' + # Example output: a0dcb5d4-30c2-4dd0-bfbe-e569a42f47bb + ``` + +2. **Subscribe via Telegram:** + - Open [@OpenCodeMgrBot](https://t.me/OpenCodeMgrBot) + - Send: `/start ` + - You'll receive a confirmation message + +3. **Configure TTS plugin** (`~/.config/opencode/tts.json`): + ```json + { + "enabled": true, + "engine": "coqui", + "telegram": { + "enabled": true, + "uuid": "", + "sendText": true, + "sendVoice": true + } + } + ``` + +4. **Restart OpenCode** - you'll now receive Telegram notifications when tasks complete + +#### Telegram Bot Commands + +| Command | Description | +|---------|-------------| +| `/start ` | Subscribe with your UUID | +| `/stop` | Unsubscribe from notifications | +| `/status` | Check subscription status | +| `/help` | Show available commands | + +#### Telegram Configuration Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `telegram.enabled` | boolean | `false` | Enable Telegram notifications | +| `telegram.uuid` | string | - | Your subscription UUID (required) | +| `telegram.sendText` | boolean | `true` | Send text message summaries | +| `telegram.sendVoice` | boolean | `true` | Send voice messages (requires ffmpeg) | +| `telegram.serviceUrl` | string | (default) | Custom backend URL (for self-hosted) | + +**Environment variables** (override config): +- `TELEGRAM_DISABLED=1` - Disable Telegram notifications + +#### Self-Hosting the Backend + +To deploy your own Telegram notification backend: + +**Prerequisites:** +- [Supabase CLI](https://supabase.com/docs/guides/cli) installed +- A Supabase project (free tier works fine) +- A Telegram bot token from [@BotFather](https://t.me/BotFather) + +**1. Link to your Supabase project:** +```bash +cd opencode-reflection-plugin +supabase link --project-ref +``` + +**2. Push the database migration:** +```bash +supabase db push +``` + +This creates the `telegram_subscribers` table: +```sql +CREATE TABLE telegram_subscribers ( + uuid UUID PRIMARY KEY DEFAULT gen_random_uuid(), + chat_id BIGINT NOT NULL UNIQUE, + username TEXT, + is_active BOOLEAN DEFAULT true, + notifications_sent INTEGER DEFAULT 0, + created_at TIMESTAMPTZ DEFAULT now(), + updated_at TIMESTAMPTZ DEFAULT now() +); +``` + +**3. Deploy edge functions:** +```bash +supabase functions deploy telegram-webhook +supabase functions deploy send-notify +``` + +**4. Set secrets:** +```bash +supabase secrets set TELEGRAM_BOT_TOKEN= +``` + +**5. Configure Telegram webhook:** +```bash +curl "https://api.telegram.org/bot/setWebhook?url=https://.supabase.co/functions/v1/telegram-webhook" +``` + +**6. Update your TTS config to use your backend:** +```json +{ + "telegram": { + "enabled": true, + "uuid": "", + "serviceUrl": "https://.supabase.co/functions/v1/send-notify" + } +} +``` + +#### Backend Files + +``` +supabase/ +├── migrations/ +│ └── 20240113000000_create_subscribers.sql # Database schema +└── functions/ + ├── telegram-webhook/ + │ └── index.ts # Handles /start, /stop, /status + └── send-notify/ + └── index.ts # Receives notifications from plugin +``` + +#### How UUID Subscription Works + +``` +┌──────────────────┐ ┌──────────────────┐ +│ User generates │ │ Telegram Bot │ +│ UUID locally │ │ @OpenCodeMgrBot │ +└────────┬─────────┘ └────────┬─────────┘ + │ │ + │ 1. User sends │ + │ /start │ + │ ─────────────────────────────────────▶│ + │ │ + │ 2. Bot stores mapping: + │ uuid → chat_id + │ │ + │ 3. User configures │ + │ tts.json with uuid │ + │ │ + ▼ ▼ +┌──────────────────┐ ┌──────────────────┐ +│ OpenCode │ │ Supabase DB │ +│ sends notify │───────────────────▶│ looks up │ +│ with uuid │ │ chat_id by uuid │ +└──────────────────┘ └────────┬─────────┘ + │ + ▼ + ┌──────────────────┐ + │ Telegram API │ + │ sends message │ + │ to chat_id │ + └──────────────────┘ +``` + +**Security model:** +- UUID is generated locally and never transmitted except when subscribing +- Backend only stores UUID → chat_id mapping (no personal data) +- Rate limiting: 10 requests/minute per UUID +- You can unsubscribe anytime with `/stop` + ### Available macOS Voices Run `say -v ?` to list all available voices. Popular choices: diff --git a/docs/telegram.design.md b/docs/telegram.design.md new file mode 100644 index 0000000..1f0ff16 --- /dev/null +++ b/docs/telegram.design.md @@ -0,0 +1,434 @@ +# Telegram Integration Architecture + +## Overview + +The Telegram integration enables two-way communication between OpenCode and users via Telegram: +- **Outbound**: Notifications when tasks complete (text + voice) +- **Inbound**: Users can reply to messages (text, voice, video) to continue conversations + +## System Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────────┐ +│ TELEGRAM TWO-WAY INTEGRATION │ +├─────────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────────────────────────────────────────────────────────────────────┐│ +│ │ OPENCODE (Local Machine) ││ +│ │ ││ +│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ││ +│ │ │ Session 1 │ │ Session 2 │ │ Session 3 │ │ Session N │ ││ +│ │ │ ses_abc... │ │ ses_def... │ │ ses_ghi... │ │ ses_xyz... │ ││ +│ │ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ ││ +│ │ │ │ │ │ ││ +│ │ └────────────────┴────────────────┴────────────────┘ ││ +│ │ │ ││ +│ │ ┌────────▼────────┐ ││ +│ │ │ TTS Plugin │ ││ +│ │ │ (tts.ts) │ ││ +│ │ └────────┬────────┘ ││ +│ │ │ ││ +│ │ ┌─────────────────────────┼─────────────────────────┐ ││ +│ │ │ │ │ ││ +│ │ ▼ ▼ ▼ ││ +│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ││ +│ │ │ TTS Engine │ │ Send HTTP │ │ Supabase │ ││ +│ │ │ (Coqui/OS) │ │ Notifica- │ │ Realtime │ ││ +│ │ │ │ │ tion │ │ Listener │ ││ +│ │ └─────────────┘ └──────┬──────┘ └──────┬──────┘ ││ +│ │ │ │ ││ +│ └──────────────────────────────────┼────────────────────────┼────────────────┘│ +│ │ │ │ +│ │ HTTPS POST │ WebSocket │ +│ │ + session_id │ (postgres_changes) +│ ▼ │ │ +│ ┌──────────────────────────────────────────────────────────┴────────────────┐│ +│ │ SUPABASE ││ +│ │ ││ +│ │ ┌────────────────┐ ┌────────────────┐ ┌────────────────────────┐ ││ +│ │ │ send-notify │ │ telegram- │ │ PostgreSQL DB │ ││ +│ │ │ Edge Function │ │ webhook │ │ │ ││ +│ │ │ │ │ Edge Function │ │ telegram_subscribers │ ││ +│ │ │ • Lookup UUID │ │ │ │ telegram_reply_contexts││ +│ │ │ • Send to TG │ │ • Commands │ │ telegram_replies │ ││ +│ │ │ • Store context│ │ • Voice STT │ │ │ ││ +│ │ └───────┬────────┘ │ • Video STT │ └────────────────────────┘ ││ +│ │ │ │ • Text replies │ ││ +│ │ │ └───────┬────────┘ ││ +│ └──────────┼─────────────────────┼──────────────────────────────────────────┘│ +│ │ │ │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌─────────────────────────────────────────────────────────────────────────┐ │ +│ │ TELEGRAM BOT API │ │ +│ │ │ │ +│ │ sendMessage ◄─────────────────────────────────► getFile + webhook │ │ +│ │ sendVoice (voice/video/text) │ │ +│ └─────────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────────────────────────────────────────────┐ │ +│ │ USER'S TELEGRAM │ │ +│ │ │ │ +│ │ 📱 Receives: "Task Complete [ses_abc123]" │ │ +│ │ 🎤 Can reply: Text, Voice Message, or Video Note │ │ +│ │ │ │ +│ └─────────────────────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────────┘ +``` + +## Message Flow Diagrams + +### 1. Outbound Notification Flow + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ OpenCode │ │ TTS Plugin │ │ send-notify │ │ Telegram │ +│ Session │ │ │ │ Edge Func │ │ User │ +└──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ + │ │ │ │ + │ session.idle │ │ │ + │──────────────────>│ │ │ + │ │ │ │ + │ │ POST /send-notify │ │ + │ │ { │ │ + │ │ uuid, │ │ + │ │ text, │ │ + │ │ session_id, │ │ + │ │ voice_base64 │ │ + │ │ } │ │ + │ │──────────────────>│ │ + │ │ │ │ + │ │ │ Store context │ + │ │ │ in reply_contexts │ + │ │ │ │ + │ │ │ sendMessage │ + │ │ │ "[ses_abc123] │ + │ │ │ Task Complete" │ + │ │ │──────────────────>│ + │ │ │ │ + │ │ │ sendVoice (opt) │ + │ │ │──────────────────>│ + │ │ │ │ +``` + +### 2. Inbound Reply Flow (Text) + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Telegram │ │ telegram- │ │ Supabase │ │ OpenCode │ +│ User │ │ webhook │ │ Realtime │ │ Session │ +└──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ + │ │ │ │ + │ "Fix the bug" │ │ │ + │──────────────────>│ │ │ + │ │ │ │ + │ │ Lookup context │ │ + │ │ by chat_id │ │ + │ │──────────────────>│ │ + │ │ │ │ + │ │ Get session_id │ │ + │ │<──────────────────│ │ + │ │ │ │ + │ │ INSERT reply │ │ + │ │ {session_id, │ │ + │ │ reply_text} │ │ + │ │──────────────────>│ │ + │ │ │ │ + │ │ │ Realtime event │ + │ │ │ (postgres_changes)│ + │ │ │──────────────────>│ + │ │ │ │ + │ │ │ promptAsync() │ + │ │ │ "[Telegram]: Fix │ + │ │ │ the bug" │ + │ │ │ │ + │ "Reply sent ✓" │ │ │ + │<──────────────────│ │ │ + │ │ │ │ +``` + +### 3. Inbound Reply Flow (Voice/Video with STT) + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Telegram │ │ telegram- │ │ Whisper STT │ │ Supabase │ +│ User │ │ webhook │ │ Server │ │ Realtime │ +└──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ + │ │ │ │ + │ 🎤 Voice Message │ │ │ + │──────────────────>│ │ │ + │ │ │ │ + │ │ getFile (file_id) │ │ + │ │ Download audio │ │ + │ │ │ │ + │ │ POST /transcribe │ │ + │ │ (audio bytes) │ │ + │ │──────────────────>│ │ + │ │ │ │ + │ │ {"text": "..."} │ │ + │ │<──────────────────│ │ + │ │ │ │ + │ │ INSERT reply │ │ + │ │ {reply_text: │ │ + │ │ transcribed} │ │ + │ │──────────────────────────────────────>│ + │ │ │ │ + │ "Voice received: │ │ │ + │ 'Fix the bug'" │ │ │ + │<──────────────────│ │ │ + │ │ │ │ +``` + +## Database Schema + +### Tables + +```sql +-- User subscriptions (existing) +telegram_subscribers ( + uuid UUID PRIMARY KEY, + chat_id BIGINT NOT NULL, + username TEXT, + is_active BOOLEAN DEFAULT TRUE, + notifications_sent INTEGER DEFAULT 0 +) + +-- Reply context tracking (for multi-session support) +telegram_reply_contexts ( + id UUID PRIMARY KEY, + chat_id BIGINT NOT NULL, + uuid UUID REFERENCES telegram_subscribers(uuid), + session_id TEXT NOT NULL, -- OpenCode session ID + message_id INTEGER, -- Telegram message ID + directory TEXT, -- Working directory + expires_at TIMESTAMPTZ, -- 24-hour expiration + is_active BOOLEAN DEFAULT TRUE +) + +-- Incoming replies (Realtime-enabled) +telegram_replies ( + id UUID PRIMARY KEY, + uuid UUID REFERENCES telegram_subscribers(uuid), + session_id TEXT NOT NULL, -- Target OpenCode session + directory TEXT, + reply_text TEXT NOT NULL, -- Text or transcribed audio + telegram_message_id INTEGER, + telegram_chat_id BIGINT NOT NULL, + processed BOOLEAN DEFAULT FALSE, + processed_at TIMESTAMPTZ +) +``` + +### Entity Relationship + +``` +┌─────────────────────┐ ┌─────────────────────┐ +│ telegram_subscribers│ │telegram_reply_contexts +│ │ │ │ +│ uuid (PK) │◄─────│ uuid (FK) │ +│ chat_id │ │ chat_id │ +│ username │ │ session_id │ +│ is_active │ │ message_id │ +│ notifications_sent │ │ directory │ +└─────────────────────┘ │ expires_at │ + │ │ is_active │ + │ └─────────────────────┘ + │ + │ ┌─────────────────────┐ + │ │ telegram_replies │ + │ │ │ + └───────────────────│ uuid (FK) │ + │ session_id │ + │ reply_text │ + │ processed │ + └─────────────────────┘ +``` + +## Session ID in Messages + +To support multiple concurrent OpenCode sessions, the session ID is embedded in outgoing messages: + +``` +🔔 *OpenCode Task Complete* [ses_abc12345] + +Model: claude-sonnet-4 | Dir: my-project +──────────────────────────────────── + +I've completed the refactoring of the authentication module... + +_💬 Reply to continue this session_ +``` + +When a user replies, the webhook: +1. Looks up the most recent `reply_context` for that `chat_id` +2. Extracts the `session_id` +3. Stores the reply with the correct `session_id` +4. Plugin receives via Realtime and routes to correct session + +## Voice/Video Message Processing + +### Faster Whisper STT Server + +The Telegram webhook connects to a locally-running Faster Whisper server for speech-to-text: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ FASTER WHISPER STT SERVER │ +│ │ +│ Location: ~/.config/opencode/whisper/ │ +│ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ whisper_server.py │ │ +│ │ │ │ +│ │ - Loads faster-whisper model (base/small/medium/large) │ │ +│ │ - HTTP server on localhost:8787 │ │ +│ │ - Endpoint: POST /transcribe │ │ +│ │ - Accepts: audio file (OGG, MP3, WAV, MP4) │ │ +│ │ - Returns: {"text": "transcribed text", "language": "en"}│ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ Files: │ +│ - whisper_server.py (HTTP server script) │ +│ - venv/ (Python virtualenv) │ +│ - server.pid (Running server PID) │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Configuration + +Add to `~/.config/opencode/tts.json`: + +```json +{ + "telegram": { + "enabled": true, + "uuid": "your-uuid", + "receiveReplies": true, + "whisperUrl": "http://localhost:8787/transcribe", + "whisperModel": "base" + } +} +``` + +### Supported Audio/Video Formats + +| Telegram Type | File Format | Handling | +|---------------|-------------|----------| +| Voice Message | OGG Opus | Direct transcription | +| Video Note | MP4 | Extract audio, transcribe | +| Audio File | MP3/WAV/OGG | Direct transcription | +| Video File | MP4/MOV | Extract audio, transcribe | + +## Security Model + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ SECURITY LAYERS │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ 1. UUID Authentication │ +│ - User generates UUID locally (never transmitted) │ +│ - UUID maps to chat_id (no personal data stored) │ +│ - Can revoke anytime with /stop │ +│ │ +│ 2. Rate Limiting │ +│ - 10 notifications per minute per UUID │ +│ - Prevents abuse of notification endpoint │ +│ │ +│ 3. Row Level Security (RLS) │ +│ - All tables have RLS enabled │ +│ - Only service_role can access (Edge Functions) │ +│ - Anon key for Realtime only (filtered by UUID) │ +│ │ +│ 4. Context Expiration │ +│ - Reply contexts expire after 24 hours │ +│ - Automatic cleanup of stale data │ +│ │ +│ 5. Whisper Server (Local) │ +│ - Runs on localhost only │ +│ - No audio data leaves local machine │ +│ - Audio transcribed locally, only text sent to Supabase │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Multi-Session Support + +When multiple OpenCode sessions are running concurrently: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ CONCURRENT SESSIONS │ +│ │ +│ Session 1 (ses_abc) Session 2 (ses_def) │ +│ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ Working on │ │ Working on │ │ +│ │ auth module │ │ API endpoints │ │ +│ └────────┬────────┘ └────────┬────────┘ │ +│ │ │ │ +│ ▼ ▼ │ +│ Notification sent: Notification sent: │ +│ "[ses_abc] Auth done" "[ses_def] API done" │ +│ │ +│ ┌─────────────────┐ │ +│ │ User replies: │ │ +│ │ "Add tests" │ │ +│ └────────┬────────┘ │ +│ │ │ +│ ▼ │ +│ Routed to most recent │ +│ context (ses_def) │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Routing Rules:** +1. Each notification creates a new `reply_context` entry +2. Previous contexts for same `chat_id` are deactivated +3. User reply goes to the **most recent** active session +4. To reply to a specific session, user can quote the message + +## Files Reference + +``` +opencode-reflection-plugin/ +├── tts.ts # Main plugin (client-side) +│ ├── sendTelegramNotification() # Send notifications +│ ├── subscribeToReplies() # Realtime subscription for text replies +│ ├── subscribeToVoiceMessages() # Realtime subscription for voice messages +│ ├── processVoiceMessage() # Download, transcribe, forward voice +│ ├── transcribeWithWhisper() # Local Whisper STT transcription +│ ├── startWhisperServer() # Manage local Whisper server +│ └── initSupabaseClient() # Supabase client setup +│ +├── whisper/ +│ └── whisper_server.py # Local Faster Whisper STT server (port 8787) +│ +├── supabase/ +│ ├── functions/ +│ │ ├── send-notify/ +│ │ │ └── index.ts # Send notifications endpoint +│ │ └── telegram-webhook/ +│ │ └── index.ts # Handle incoming messages (text, voice, video) +│ │ +│ └── migrations/ +│ ├── 20240113000000_create_subscribers.sql # User subscriptions +│ ├── 20240114000000_add_telegram_replies.sql # Text reply support +│ └── 20240115000000_add_voice_messages.sql # Voice/video message support +│ +└── docs/ + └── telegram.design.md # This file +``` + +## Deployment Checklist + +- [ ] Apply database migrations: `supabase db push` +- [ ] Deploy Edge Functions: `supabase functions deploy` +- [ ] Set Telegram webhook URL +- [ ] Configure `tts.json` with UUID +- [ ] Start Whisper STT server (for voice messages) +- [ ] Copy plugin to `~/.config/opencode/plugin/` +- [ ] Restart OpenCode diff --git a/docs/telegram.md b/docs/telegram.md new file mode 100644 index 0000000..e24390a --- /dev/null +++ b/docs/telegram.md @@ -0,0 +1,288 @@ +# Telegram Integration Architecture + +## Overview + +Two-way communication between OpenCode and Telegram: +- **Outbound**: Task completion notifications (text + TTS audio) +- **Inbound**: User replies via text, voice, or video messages + +## System Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────────┐ +│ TELEGRAM INTEGRATION ARCHITECTURE │ +├─────────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌───────────────────────────────────────────────────────────────────────────┐ │ +│ │ USER'S TELEGRAM APP │ │ +│ │ │ │ +│ │ 📱 Receives notifications 🎤 Sends voice/text replies │ │ +│ └──────────────────┬─────────────────────────────────┬──────────────────────┘ │ +│ │ │ │ +│ │ Bot sends │ User sends │ +│ │ messages │ replies │ +│ ▼ ▼ │ +│ ┌───────────────────────────────────────────────────────────────────────────┐ │ +│ │ TELEGRAM BOT API │ │ +│ │ │ │ +│ │ sendMessage/sendVoice ◄────────────────────► Webhook (incoming) │ │ +│ └──────────────────┬─────────────────────────────────┬──────────────────────┘ │ +│ │ │ │ +│ │ │ POST to webhook URL │ +│ │ ▼ │ +│ ┌──────────────────┼─────────────────────────────────────────────────────────┐ │ +│ │ │ SUPABASE (Cloud) │ │ +│ │ │ │ │ +│ │ │ ┌─────────────────────────────────────────────────┐ │ │ +│ │ │ │ telegram-webhook │ │ │ +│ │ │ │ Edge Function │ │ │ +│ │ │ │ │ │ │ +│ │ │ │ • Receives incoming messages │ │ │ +│ │ │ │ • Handles /start, /stop, /status commands │ │ │ +│ │ │ │ • For voice: downloads audio via Bot API │ │ │ +│ │ │ │ • Inserts into telegram_replies table │ │ │ +│ │ │ │ (text or audio_base64 for voice) │ │ │ +│ │ │ └──────────────────────┬──────────────────────────┘ │ │ +│ │ │ │ │ │ +│ │ │ │ INSERT │ │ +│ │ │ ▼ │ │ +│ │ │ ┌─────────────────────────────────────────────────┐ │ │ +│ │ │ │ PostgreSQL │ │ │ +│ │ │ │ │ │ │ +│ │ ┌──────────────┴──┐ │ telegram_subscribers (user subscriptions) │ │ │ +│ │ │ send-notify │ │ telegram_reply_contexts (active sessions) │ │ │ +│ │ │ Edge Function │ │ telegram_replies (incoming messages) │ │ │ +│ │ │ │ │ ▲ │ │ │ +│ │ │ • Lookup UUID │ │ │ Realtime │ │ │ +│ │ │ • Send to TG │ │ │ (WebSocket) │ │ │ +│ │ │ • Store context │ │ │ │ │ │ +│ │ └────────▲────────┘ └──────────────────────────┼───────────────────────┘ │ │ +│ │ │ │ │ │ +│ └────────────┼─────────────────────────────────────┼──────────────────────────┘ │ +│ │ │ │ +│ │ HTTPS POST │ WebSocket │ +│ │ (notification) │ (replies + audio) │ +│ │ │ │ +│ ┌────────────┼─────────────────────────────────────┼──────────────────────────┐ │ +│ │ │ LOCAL MACHINE │ │ │ +│ │ │ │ │ │ +│ │ │ ▼ │ │ +│ │ ┌────────┴────────────────────────────────────────────────────────────┐ │ │ +│ │ │ TTS Plugin (tts.ts) │ │ │ +│ │ │ │ │ │ +│ │ │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────────┐ │ │ │ +│ │ │ │ Outbound │ │ Inbound │ │ Voice Processing │ │ │ │ +│ │ │ │ │ │ │ │ │ │ │ │ +│ │ │ │ session.idle │ │ Supabase │ │ Receives audio_b64 │ │ │ │ +│ │ │ │ ───────────► │ │ Realtime sub │ │ via WebSocket │ │ │ │ +│ │ │ │ Generate TTS │ │ ◄─────────── │ │ ─────────────────► │ │ │ │ +│ │ │ │ ───────────► │ │ Forward to │ │ Transcribe locally │ │ │ │ +│ │ │ │ Send to Supabase│ │ OpenCode session│ │ (Whisper STT) │ │ │ │ +│ │ │ └─────────────────┘ └─────────────────┘ └──────────┬──────────┘ │ │ │ +│ │ │ │ │ │ │ +│ │ └───────────────────────────────────────────────────────┼─────────────┘ │ │ +│ │ │ │ │ +│ │ ▼ │ │ +│ │ ┌───────────────────────────────────────────────────────────────────────┐│ │ +│ │ │ Whisper STT Server (localhost:8787) ││ │ +│ │ │ ││ │ +│ │ │ • FastAPI HTTP server ││ │ +│ │ │ • faster-whisper library ││ │ +│ │ │ • Converts OGG → WAV (ffmpeg) ││ │ +│ │ │ • Returns transcribed text ││ │ +│ │ └───────────────────────────────────────────────────────────────────────┘│ │ +│ │ │ │ +│ │ ┌───────────────────────────────────────────────────────────────────────┐│ │ +│ │ │ OpenCode Sessions ││ │ +│ │ │ ││ │ +│ │ │ ses_abc123 ses_def456 ses_ghi789 ││ │ +│ │ │ (working on (working on (idle) ││ │ +│ │ │ auth module) API routes) ││ │ +│ │ └───────────────────────────────────────────────────────────────────────┘│ │ +│ └─────────────────────────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────────┘ +``` + +## Message Flows + +### 1. Outbound: Task Completion Notification + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ OpenCode │ │ TTS Plugin │ │ send-notify │ │ Telegram │ +│ Session │ │ │ │ Edge Func │ │ User │ +└──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ + │ │ │ │ + │ session.idle │ │ │ + │──────────────────>│ │ │ + │ │ │ │ + │ │ Generate TTS │ │ + │ │ (Coqui/OS) │ │ + │ │ │ │ + │ │ POST /send-notify │ │ + │ │ {uuid, text, │ │ + │ │ session_id, │ │ + │ │ voice_base64} │ │ + │ │──────────────────>│ │ + │ │ │ │ + │ │ │ Store reply_context + │ │ │ (session_id, uuid)│ + │ │ │ │ + │ │ │ sendMessage() │ + │ │ │ sendVoice() │ + │ │ │──────────────────>│ + │ │ │ │ + │ │ │ │ 📱 Notification + │ │ │ │ received! +``` + +### 2. Inbound: Text Reply + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Telegram │ │ telegram- │ │ Supabase │ │ TTS Plugin │ +│ User │ │ webhook │ │ Realtime │ │ │ +└──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ + │ │ │ │ + │ Reply: "Add tests"│ │ │ + │──────────────────>│ │ │ + │ │ │ │ + │ │ Lookup active │ │ + │ │ reply_context │ │ + │ │ │ │ + │ │ INSERT into │ │ + │ │ telegram_replies │ │ + │ │ {session_id, │ │ + │ │ reply_text} │ │ + │ │──────────────────>│ │ + │ │ │ │ + │ │ │ WebSocket push │ + │ │ │ (postgres_changes)│ + │ │ │──────────────────>│ + │ │ │ │ + │ │ │ │ Forward to + │ │ │ │ OpenCode session + │ │ │ │ + │ ✓ Reply sent │ │ │ + │<──────────────────│ │ │ +``` + +### 3. Inbound: Voice Message + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Telegram │ │ telegram- │ │ Supabase │ │ TTS Plugin │ │ Whisper │ +│ User │ │ webhook │ │ Realtime │ │ │ │ Server │ +└──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ + │ │ │ │ │ + │ 🎤 Voice message │ │ │ │ + │──────────────────>│ │ │ │ + │ │ │ │ │ + │ │ getFile(file_id) │ │ │ + │ │ Download audio │ │ │ + │ │ from Telegram API │ │ │ + │ │ │ │ │ + │ │ INSERT into │ │ │ + │ │ telegram_replies │ │ │ + │ │ {session_id, │ │ │ + │ │ audio_base64, │ │ │ + │ │ is_voice: true} │ │ │ + │ │──────────────────>│ │ │ + │ │ │ │ │ + │ │ │ WebSocket push │ │ + │ │ │──────────────────>│ │ + │ │ │ │ │ + │ │ │ │ POST /transcribe │ + │ │ │ │ {audio_base64} │ + │ │ │ │──────────────────>│ + │ │ │ │ │ + │ │ │ │ Transcribe │ + │ │ │ │ (faster-whisper) + │ │ │ │ │ + │ │ │ │ {text: "Add tests"} + │ │ │ │<──────────────────│ + │ │ │ │ │ + │ │ │ │ Forward to │ + │ │ │ │ OpenCode session │ + │ │ │ │ │ + │ ✓ Voice processed │ │ │ │ + │<──────────────────│ │ │ │ +``` + +## Key Design Decisions + +### Audio Data Flow (Voice Messages) + +1. **Edge Function downloads audio** - Has BOT_TOKEN, can access Telegram file API +2. **Audio sent via WebSocket** - Temporary transport, not stored long-term +3. **Plugin transcribes locally** - Whisper STT on localhost:8787 +4. **Only text forwarded to session** - Audio discarded after transcription + +### Why Local Transcription? + +- **Privacy**: Audio never leaves local machine after transport +- **Speed**: Local Whisper is fast, no cloud API latency +- **Cost**: No per-request STT API fees +- **Offline**: Works without internet (after initial model download) + +### Data Retention + +| Table | Retention | Purpose | +|--------------------------|-----------|----------------------------------| +| telegram_subscribers | Permanent | User subscription info | +| telegram_reply_contexts | 24 hours | Active session routing | +| telegram_replies | Ephemeral | Transport for replies + audio | + +## Configuration + +### tts.json + +```json +{ + "enabled": true, + "engine": "coqui", + "telegram": { + "enabled": true, + "uuid": "your-uuid-here", + "receiveReplies": true + }, + "whisper": { + "enabled": true, + "model": "base", + "port": 8787 + } +} +``` + +### Environment Variables (Edge Functions) + +Set via `supabase secrets set`: +- `TELEGRAM_BOT_TOKEN` - Bot API token +- `SUPABASE_SERVICE_ROLE_KEY` - Auto-set by Supabase + +## Files + +``` +opencode-reflection-plugin/ +├── tts.ts # Main plugin +│ ├── sendTelegramNotification() # Outbound notifications +│ ├── subscribeToReplies() # WebSocket subscription (handles both text + voice) +│ └── transcribeWithWhisper() # Local STT for voice messages +│ +├── whisper/ +│ └── whisper_server.py # Local Whisper HTTP server +│ +├── supabase/ +│ ├── functions/ +│ │ ├── send-notify/index.ts # Send notifications +│ │ └── telegram-webhook/index.ts # Receive messages (downloads voice audio) +│ │ +│ └── migrations/ +│ ├── 20240113_create_subscribers.sql +│ ├── 20240114_add_telegram_replies.sql +│ └── 20240116_add_voice_to_replies.sql # Voice support in replies table +│ +└── docs/ + └── telegram.md # This file +``` diff --git a/package-lock.json b/package-lock.json index 4dcc9b8..762dc32 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,9 @@ "name": "opencode-reflection-plugin", "version": "1.0.0", "license": "MIT", + "dependencies": { + "@supabase/supabase-js": "^2.49.0" + }, "devDependencies": { "@opencode-ai/plugin": "latest", "@opencode-ai/sdk": "latest", @@ -31,16 +34,125 @@ "integrity": "sha512-Nz9Di8UD/GK01w3N+jpiGNB733pYkNY8RNLbuE/HUxEGSP5apbXBY0IdhbW7859sXZZK38kF1NqOx4UxwBf4Bw==", "dev": true }, + "node_modules/@supabase/auth-js": { + "version": "2.91.1", + "resolved": "https://registry.npmjs.org/@supabase/auth-js/-/auth-js-2.91.1.tgz", + "integrity": "sha512-3gFGMPuif2BOuAHXLAGsoOlDa64PROct1v7G94pMnvUAhh75u6+vnx4MYz1wyoyDBN5lCkJPGQNg5+RIgqxnpA==", + "license": "MIT", + "dependencies": { + "tslib": "2.8.1" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@supabase/functions-js": { + "version": "2.91.1", + "resolved": "https://registry.npmjs.org/@supabase/functions-js/-/functions-js-2.91.1.tgz", + "integrity": "sha512-xKepd3HZ6K6rKibriehKggIegsoz+jjV67tikN51q/YQq3AlUAkjUMSnMrqs8t5LMlAi+a3dJU812acXanR0cw==", + "license": "MIT", + "dependencies": { + "tslib": "2.8.1" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@supabase/postgrest-js": { + "version": "2.91.1", + "resolved": "https://registry.npmjs.org/@supabase/postgrest-js/-/postgrest-js-2.91.1.tgz", + "integrity": "sha512-UKumTC6SGHd65G/5Gj0V58u+SkUyiH4zEJ8OP2eb06+Tqnges1E/3Tl7lyq2qbcMP8nEyH/0M7m2bYjrn++haw==", + "license": "MIT", + "dependencies": { + "tslib": "2.8.1" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@supabase/realtime-js": { + "version": "2.91.1", + "resolved": "https://registry.npmjs.org/@supabase/realtime-js/-/realtime-js-2.91.1.tgz", + "integrity": "sha512-Y4rifuvzekFgd2hUfiEvcMoh/JU3s1hmpWYS7tNGL2QHuFfWg8a4w/qg5qoSMVDvgGRz6G4L6yB1FaQRTplENQ==", + "license": "MIT", + "dependencies": { + "@types/phoenix": "^1.6.6", + "@types/ws": "^8.18.1", + "tslib": "2.8.1", + "ws": "^8.18.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@supabase/storage-js": { + "version": "2.91.1", + "resolved": "https://registry.npmjs.org/@supabase/storage-js/-/storage-js-2.91.1.tgz", + "integrity": "sha512-hMJNT2tSleOrWwx4FmHTpihIA2PRDixAsWflECuQ4YDkeduBZGX5m2txnstMnteWW+H+mm+92WRRFLuidXqbfA==", + "license": "MIT", + "dependencies": { + "iceberg-js": "^0.8.1", + "tslib": "2.8.1" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@supabase/supabase-js": { + "version": "2.91.1", + "resolved": "https://registry.npmjs.org/@supabase/supabase-js/-/supabase-js-2.91.1.tgz", + "integrity": "sha512-57Fb4s5nfLn5ed2a1rPtl+LI1Wbtms8MS4qcUa0w6luaStBlFhmSeD2TLBgJWdMIupWRF6iFTH4QTrO2+pG/ZQ==", + "license": "MIT", + "dependencies": { + "@supabase/auth-js": "2.91.1", + "@supabase/functions-js": "2.91.1", + "@supabase/postgrest-js": "2.91.1", + "@supabase/realtime-js": "2.91.1", + "@supabase/storage-js": "2.91.1" + }, + "engines": { + "node": ">=20.0.0" + } + }, "node_modules/@types/node": { "version": "25.0.2", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.2.tgz", "integrity": "sha512-gWEkeiyYE4vqjON/+Obqcoeffmk0NF15WSBwSs7zwVA2bAbTaE0SJ7P0WNGoJn8uE7fiaV5a7dKYIJriEqOrmA==", - "dev": true, "license": "MIT", "dependencies": { "undici-types": "~7.16.0" } }, + "node_modules/@types/phoenix": { + "version": "1.6.7", + "resolved": "https://registry.npmjs.org/@types/phoenix/-/phoenix-1.6.7.tgz", + "integrity": "sha512-oN9ive//QSBkf19rfDv45M7eZPi0eEXylht2OLEXicu5b4KoQ1OzXIw+xDSGWxSxe1JmepRR/ZH283vsu518/Q==", + "license": "MIT" + }, + "node_modules/@types/ws": { + "version": "8.18.1", + "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz", + "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==", + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/iceberg-js": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/iceberg-js/-/iceberg-js-0.8.1.tgz", + "integrity": "sha512-1dhVQZXhcHje7798IVM+xoo/1ZdVfzOMIc8/rgVSijRK38EDqOJoGula9N/8ZI5RD8QTxNQtK/Gozpr+qUqRRA==", + "license": "MIT", + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "license": "0BSD" + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", @@ -59,9 +171,29 @@ "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", - "dev": true, "license": "MIT" }, + "node_modules/ws": { + "version": "8.19.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz", + "integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/zod": { "version": "4.1.8", "resolved": "https://registry.npmjs.org/zod/-/zod-4.1.8.tgz", diff --git a/package.json b/package.json index f33330b..c76beea 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,9 @@ ], "author": "", "license": "MIT", + "dependencies": { + "@supabase/supabase-js": "^2.49.0" + }, "devDependencies": { "@opencode-ai/plugin": "latest", "@opencode-ai/sdk": "latest", diff --git a/supabase/.gitignore b/supabase/.gitignore new file mode 100644 index 0000000..ad9264f --- /dev/null +++ b/supabase/.gitignore @@ -0,0 +1,8 @@ +# Supabase +.branches +.temp + +# dotenvx +.env.keys +.env.local +.env.*.local diff --git a/supabase/config.toml b/supabase/config.toml new file mode 100644 index 0000000..869d08d --- /dev/null +++ b/supabase/config.toml @@ -0,0 +1,382 @@ +# For detailed configuration reference documentation, visit: +# https://supabase.com/docs/guides/local-development/cli/config +# A string used to distinguish different Supabase projects on the same host. Defaults to the +# working directory name when running `supabase init`. +project_id = "opencode-reflection-plugin" + +[api] +enabled = true +# Port to use for the API URL. +port = 54321 +# Schemas to expose in your API. Tables, views and stored procedures in this schema will get API +# endpoints. `public` and `graphql_public` schemas are included by default. +schemas = ["public", "graphql_public"] +# Extra schemas to add to the search_path of every request. +extra_search_path = ["public", "extensions"] +# The maximum number of rows returns from a view, table, or stored procedure. Limits payload size +# for accidental or malicious requests. +max_rows = 1000 + +[api.tls] +# Enable HTTPS endpoints locally using a self-signed certificate. +enabled = false +# Paths to self-signed certificate pair. +# cert_path = "../certs/my-cert.pem" +# key_path = "../certs/my-key.pem" + +[db] +# Port to use for the local database URL. +port = 54322 +# Port used by db diff command to initialize the shadow database. +shadow_port = 54320 +# The database major version to use. This has to be the same as your remote database's. Run `SHOW +# server_version;` on the remote database to check. +major_version = 17 + +[db.pooler] +enabled = false +# Port to use for the local connection pooler. +port = 54329 +# Specifies when a server connection can be reused by other clients. +# Configure one of the supported pooler modes: `transaction`, `session`. +pool_mode = "transaction" +# How many server connections to allow per user/database pair. +default_pool_size = 20 +# Maximum number of client connections allowed. +max_client_conn = 100 + +# [db.vault] +# secret_key = "env(SECRET_VALUE)" + +[db.migrations] +# If disabled, migrations will be skipped during a db push or reset. +enabled = true +# Specifies an ordered list of schema files that describe your database. +# Supports glob patterns relative to supabase directory: "./schemas/*.sql" +schema_paths = [] + +[db.seed] +# If enabled, seeds the database after migrations during a db reset. +enabled = true +# Specifies an ordered list of seed files to load during db reset. +# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +sql_paths = ["./seed.sql"] + +[db.network_restrictions] +# Enable management of network restrictions. +enabled = false +# List of IPv4 CIDR blocks allowed to connect to the database. +# Defaults to allow all IPv4 connections. Set empty array to block all IPs. +allowed_cidrs = ["0.0.0.0/0"] +# List of IPv6 CIDR blocks allowed to connect to the database. +# Defaults to allow all IPv6 connections. Set empty array to block all IPs. +allowed_cidrs_v6 = ["::/0"] + +[realtime] +enabled = true +# Bind realtime via either IPv4 or IPv6. (default: IPv4) +# ip_version = "IPv6" +# The maximum length in bytes of HTTP request headers. (default: 4096) +# max_header_length = 4096 + +[studio] +enabled = true +# Port to use for Supabase Studio. +port = 54323 +# External URL of the API server that frontend connects to. +api_url = "http://127.0.0.1" +# OpenAI API Key to use for Supabase AI in the Supabase Studio. +openai_api_key = "env(OPENAI_API_KEY)" + +# Email testing server. Emails sent with the local dev setup are not actually sent - rather, they +# are monitored, and you can view the emails that would have been sent from the web interface. +[inbucket] +enabled = true +# Port to use for the email testing server web interface. +port = 54324 +# Uncomment to expose additional ports for testing user applications that send emails. +# smtp_port = 54325 +# pop3_port = 54326 +# admin_email = "admin@email.com" +# sender_name = "Admin" + +[storage] +enabled = true +# The maximum file size allowed (e.g. "5MB", "500KB"). +file_size_limit = "50MiB" + +# Uncomment to configure local storage buckets +# [storage.buckets.images] +# public = false +# file_size_limit = "50MiB" +# allowed_mime_types = ["image/png", "image/jpeg"] +# objects_path = "./images" + +# Allow connections via S3 compatible clients +[storage.s3_protocol] +enabled = true + +# Image transformation API is available to Supabase Pro plan. +# [storage.image_transformation] +# enabled = true + +# Store analytical data in S3 for running ETL jobs over Iceberg Catalog +# This feature is only available on the hosted platform. +[storage.analytics] +enabled = false +max_namespaces = 5 +max_tables = 10 +max_catalogs = 2 + +# Analytics Buckets is available to Supabase Pro plan. +# [storage.analytics.buckets.my-warehouse] + +# Store vector embeddings in S3 for large and durable datasets +# This feature is only available on the hosted platform. +[storage.vector] +enabled = false +max_buckets = 10 +max_indexes = 5 + +# Vector Buckets is available to Supabase Pro plan. +# [storage.vector.buckets.documents-openai] + +[auth] +enabled = true +# The base URL of your website. Used as an allow-list for redirects and for constructing URLs used +# in emails. +site_url = "http://127.0.0.1:3000" +# A list of *exact* URLs that auth providers are permitted to redirect to post authentication. +additional_redirect_urls = ["https://127.0.0.1:3000"] +# How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 (1 week). +jwt_expiry = 3600 +# JWT issuer URL. If not set, defaults to the local API URL (http://127.0.0.1:/auth/v1). +# jwt_issuer = "" +# Path to JWT signing key. DO NOT commit your signing keys file to git. +# signing_keys_path = "./signing_keys.json" +# If disabled, the refresh token will never expire. +enable_refresh_token_rotation = true +# Allows refresh tokens to be reused after expiry, up to the specified interval in seconds. +# Requires enable_refresh_token_rotation = true. +refresh_token_reuse_interval = 10 +# Allow/disallow new user signups to your project. +enable_signup = true +# Allow/disallow anonymous sign-ins to your project. +enable_anonymous_sign_ins = false +# Allow/disallow testing manual linking of accounts +enable_manual_linking = false +# Passwords shorter than this value will be rejected as weak. Minimum 6, recommended 8 or more. +minimum_password_length = 6 +# Passwords that do not meet the following requirements will be rejected as weak. Supported values +# are: `letters_digits`, `lower_upper_letters_digits`, `lower_upper_letters_digits_symbols` +password_requirements = "" + +[auth.rate_limit] +# Number of emails that can be sent per hour. Requires auth.email.smtp to be enabled. +email_sent = 2 +# Number of SMS messages that can be sent per hour. Requires auth.sms to be enabled. +sms_sent = 30 +# Number of anonymous sign-ins that can be made per hour per IP address. Requires enable_anonymous_sign_ins = true. +anonymous_users = 30 +# Number of sessions that can be refreshed in a 5 minute interval per IP address. +token_refresh = 150 +# Number of sign up and sign-in requests that can be made in a 5 minute interval per IP address (excludes anonymous users). +sign_in_sign_ups = 30 +# Number of OTP / Magic link verifications that can be made in a 5 minute interval per IP address. +token_verifications = 30 +# Number of Web3 logins that can be made in a 5 minute interval per IP address. +web3 = 30 + +# Configure one of the supported captcha providers: `hcaptcha`, `turnstile`. +# [auth.captcha] +# enabled = true +# provider = "hcaptcha" +# secret = "" + +[auth.email] +# Allow/disallow new user signups via email to your project. +enable_signup = true +# If enabled, a user will be required to confirm any email change on both the old, and new email +# addresses. If disabled, only the new email is required to confirm. +double_confirm_changes = true +# If enabled, users need to confirm their email address before signing in. +enable_confirmations = false +# If enabled, users will need to reauthenticate or have logged in recently to change their password. +secure_password_change = false +# Controls the minimum amount of time that must pass before sending another signup confirmation or password reset email. +max_frequency = "1s" +# Number of characters used in the email OTP. +otp_length = 6 +# Number of seconds before the email OTP expires (defaults to 1 hour). +otp_expiry = 3600 + +# Use a production-ready SMTP server +# [auth.email.smtp] +# enabled = true +# host = "smtp.sendgrid.net" +# port = 587 +# user = "apikey" +# pass = "env(SENDGRID_API_KEY)" +# admin_email = "admin@email.com" +# sender_name = "Admin" + +# Uncomment to customize email template +# [auth.email.template.invite] +# subject = "You have been invited" +# content_path = "./supabase/templates/invite.html" + +# Uncomment to customize notification email template +# [auth.email.notification.password_changed] +# enabled = true +# subject = "Your password has been changed" +# content_path = "./templates/password_changed_notification.html" + +[auth.sms] +# Allow/disallow new user signups via SMS to your project. +enable_signup = false +# If enabled, users need to confirm their phone number before signing in. +enable_confirmations = false +# Template for sending OTP to users +template = "Your code is {{ .Code }}" +# Controls the minimum amount of time that must pass before sending another sms otp. +max_frequency = "5s" + +# Use pre-defined map of phone number to OTP for testing. +# [auth.sms.test_otp] +# 4152127777 = "123456" + +# Configure logged in session timeouts. +# [auth.sessions] +# Force log out after the specified duration. +# timebox = "24h" +# Force log out if the user has been inactive longer than the specified duration. +# inactivity_timeout = "8h" + +# This hook runs before a new user is created and allows developers to reject the request based on the incoming user object. +# [auth.hook.before_user_created] +# enabled = true +# uri = "pg-functions://postgres/auth/before-user-created-hook" + +# This hook runs before a token is issued and allows you to add additional claims based on the authentication method used. +# [auth.hook.custom_access_token] +# enabled = true +# uri = "pg-functions:////" + +# Configure one of the supported SMS providers: `twilio`, `twilio_verify`, `messagebird`, `textlocal`, `vonage`. +[auth.sms.twilio] +enabled = false +account_sid = "" +message_service_sid = "" +# DO NOT commit your Twilio auth token to git. Use environment variable substitution instead: +auth_token = "env(SUPABASE_AUTH_SMS_TWILIO_AUTH_TOKEN)" + +# Multi-factor-authentication is available to Supabase Pro plan. +[auth.mfa] +# Control how many MFA factors can be enrolled at once per user. +max_enrolled_factors = 10 + +# Control MFA via App Authenticator (TOTP) +[auth.mfa.totp] +enroll_enabled = false +verify_enabled = false + +# Configure MFA via Phone Messaging +[auth.mfa.phone] +enroll_enabled = false +verify_enabled = false +otp_length = 6 +template = "Your code is {{ .Code }}" +max_frequency = "5s" + +# Configure MFA via WebAuthn +# [auth.mfa.web_authn] +# enroll_enabled = true +# verify_enabled = true + +# Use an external OAuth provider. The full list of providers are: `apple`, `azure`, `bitbucket`, +# `discord`, `facebook`, `github`, `gitlab`, `google`, `keycloak`, `linkedin_oidc`, `notion`, `twitch`, +# `twitter`, `slack`, `spotify`, `workos`, `zoom`. +[auth.external.apple] +enabled = false +client_id = "" +# DO NOT commit your OAuth provider secret to git. Use environment variable substitution instead: +secret = "env(SUPABASE_AUTH_EXTERNAL_APPLE_SECRET)" +# Overrides the default auth redirectUrl. +redirect_uri = "" +# Overrides the default auth provider URL. Used to support self-hosted gitlab, single-tenant Azure, +# or any other third-party OIDC providers. +url = "" +# If enabled, the nonce check will be skipped. Required for local sign in with Google auth. +skip_nonce_check = false +# If enabled, it will allow the user to successfully authenticate when the provider does not return an email address. +email_optional = false + +# Allow Solana wallet holders to sign in to your project via the Sign in with Solana (SIWS, EIP-4361) standard. +# You can configure "web3" rate limit in the [auth.rate_limit] section and set up [auth.captcha] if self-hosting. +[auth.web3.solana] +enabled = false + +# Use Firebase Auth as a third-party provider alongside Supabase Auth. +[auth.third_party.firebase] +enabled = false +# project_id = "my-firebase-project" + +# Use Auth0 as a third-party provider alongside Supabase Auth. +[auth.third_party.auth0] +enabled = false +# tenant = "my-auth0-tenant" +# tenant_region = "us" + +# Use AWS Cognito (Amplify) as a third-party provider alongside Supabase Auth. +[auth.third_party.aws_cognito] +enabled = false +# user_pool_id = "my-user-pool-id" +# user_pool_region = "us-east-1" + +# Use Clerk as a third-party provider alongside Supabase Auth. +[auth.third_party.clerk] +enabled = false +# Obtain from https://clerk.com/setup/supabase +# domain = "example.clerk.accounts.dev" + +# OAuth server configuration +[auth.oauth_server] +# Enable OAuth server functionality +enabled = false +# Path for OAuth consent flow UI +authorization_url_path = "/oauth/consent" +# Allow dynamic client registration +allow_dynamic_registration = false + +[edge_runtime] +enabled = true +# Supported request policies: `oneshot`, `per_worker`. +# `per_worker` (default) — enables hot reload during local development. +# `oneshot` — fallback mode if hot reload causes issues (e.g. in large repos or with symlinks). +policy = "per_worker" +# Port to attach the Chrome inspector for debugging edge functions. +inspector_port = 8083 +# The Deno major version to use. +deno_version = 2 + +# [edge_runtime.secrets] +# secret_key = "env(SECRET_VALUE)" + +[analytics] +enabled = true +port = 54327 +# Configure one of the supported backends: `postgres`, `bigquery`. +backend = "postgres" + +# Experimental features may be deprecated any time +[experimental] +# Configures Postgres storage engine to use OrioleDB (S3) +orioledb_version = "" +# Configures S3 bucket URL, eg. .s3-.amazonaws.com +s3_host = "env(S3_HOST)" +# Configures S3 bucket region, eg. us-east-1 +s3_region = "env(S3_REGION)" +# Configures AWS_ACCESS_KEY_ID for S3 bucket +s3_access_key = "env(S3_ACCESS_KEY)" +# Configures AWS_SECRET_ACCESS_KEY for S3 bucket +s3_secret_key = "env(S3_SECRET_KEY)" diff --git a/supabase/functions/send-notify/index.ts b/supabase/functions/send-notify/index.ts new file mode 100644 index 0000000..15949c7 --- /dev/null +++ b/supabase/functions/send-notify/index.ts @@ -0,0 +1,318 @@ +/** + * Send Notification Edge Function for OpenCode TTS Plugin + * + * Called by the OpenCode plugin to send text and voice messages to Telegram. + * Stores session context so users can reply to notifications. + * + * Request body: + * { + * uuid: string, // User's UUID + * text?: string, // Text message to send + * voice_base64?: string, // Base64 encoded OGG audio + * session_id?: string, // OpenCode session ID (for reply support) + * directory?: string, // Working directory (for context) + * } + */ + +import { createClient } from 'https://esm.sh/@supabase/supabase-js@2' + +const BOT_TOKEN = Deno.env.get('TELEGRAM_BOT_TOKEN')! +const SUPABASE_URL = Deno.env.get('SUPABASE_URL')! +const SUPABASE_SERVICE_ROLE_KEY = Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')! + +// UUID v4 validation regex +const UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i + +// Rate limiting: max 10 notifications per minute per UUID +const RATE_LIMIT_WINDOW_MS = 60 * 1000 +const RATE_LIMIT_MAX_REQUESTS = 10 +const rateLimitMap = new Map() + +interface SendNotifyRequest { + uuid: string + text?: string + voice_base64?: string + session_id?: string // OpenCode session ID for reply support + directory?: string // Working directory for context +} + +function isValidUUID(str: string): boolean { + return UUID_REGEX.test(str) +} + +function isRateLimited(uuid: string): boolean { + const now = Date.now() + const entry = rateLimitMap.get(uuid) + + if (!entry || now - entry.windowStart > RATE_LIMIT_WINDOW_MS) { + rateLimitMap.set(uuid, { count: 1, windowStart: now }) + return false + } + + if (entry.count >= RATE_LIMIT_MAX_REQUESTS) { + return true + } + + entry.count++ + return false +} + +async function sendTelegramMessage(chatId: number, text: string): Promise<{ success: boolean; messageId?: number }> { + try { + const response = await fetch(`https://api.telegram.org/bot${BOT_TOKEN}/sendMessage`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + chat_id: chatId, + text, + parse_mode: 'Markdown', + }), + }) + + if (!response.ok) { + const error = await response.text() + console.error('Telegram sendMessage failed:', error) + return { success: false } + } + + // Extract message_id from response for reply context tracking + const result = await response.json() + return { success: true, messageId: result.result?.message_id } + } catch (error) { + console.error('Failed to send Telegram message:', error) + return { success: false } + } +} + +async function sendTelegramVoice(chatId: number, audioBase64: string): Promise { + try { + // Decode base64 to Uint8Array + const binaryString = atob(audioBase64) + const bytes = new Uint8Array(binaryString.length) + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i) + } + + // Create form data with the voice file + const formData = new FormData() + formData.append('chat_id', chatId.toString()) + formData.append('voice', new Blob([bytes], { type: 'audio/ogg' }), 'voice.ogg') + + const response = await fetch(`https://api.telegram.org/bot${BOT_TOKEN}/sendVoice`, { + method: 'POST', + body: formData, + }) + + if (!response.ok) { + const error = await response.text() + console.error('Telegram sendVoice failed:', error) + + // Fallback: try sending as audio file instead + return await sendTelegramAudio(chatId, audioBase64) + } + return true + } catch (error) { + console.error('Failed to send Telegram voice:', error) + return false + } +} + +async function sendTelegramAudio(chatId: number, audioBase64: string): Promise { + try { + const binaryString = atob(audioBase64) + const bytes = new Uint8Array(binaryString.length) + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i) + } + + const formData = new FormData() + formData.append('chat_id', chatId.toString()) + formData.append('audio', new Blob([bytes], { type: 'audio/ogg' }), 'notification.ogg') + formData.append('title', 'OpenCode Notification') + + const response = await fetch(`https://api.telegram.org/bot${BOT_TOKEN}/sendAudio`, { + method: 'POST', + body: formData, + }) + + if (!response.ok) { + const error = await response.text() + console.error('Telegram sendAudio failed:', error) + return false + } + return true + } catch (error) { + console.error('Failed to send Telegram audio:', error) + return false + } +} + +const corsHeaders = { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type', +} + +Deno.serve(async (req) => { + // Handle CORS preflight + if (req.method === 'OPTIONS') { + return new Response('ok', { headers: corsHeaders }) + } + + // Only accept POST requests + if (req.method !== 'POST') { + return new Response( + JSON.stringify({ success: false, error: 'Method not allowed' }), + { status: 405, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ) + } + + // Verify required environment variables + if (!BOT_TOKEN || !SUPABASE_URL || !SUPABASE_SERVICE_ROLE_KEY) { + console.error('Missing required environment variables') + return new Response( + JSON.stringify({ success: false, error: 'Server configuration error' }), + { status: 500, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ) + } + + try { + const body: SendNotifyRequest = await req.json() + const { uuid, text, voice_base64, session_id, directory } = body + + // Validate UUID + if (!uuid || !isValidUUID(uuid)) { + return new Response( + JSON.stringify({ success: false, error: 'Invalid or missing UUID' }), + { status: 400, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ) + } + + // Check rate limit + if (isRateLimited(uuid)) { + return new Response( + JSON.stringify({ success: false, error: 'Rate limit exceeded. Max 10 notifications per minute.' }), + { status: 429, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ) + } + + // Must have at least text or voice + if (!text && !voice_base64) { + return new Response( + JSON.stringify({ success: false, error: 'Must provide text or voice_base64' }), + { status: 400, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ) + } + + // Initialize Supabase client with service role + const supabase = createClient(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY) + + // Lookup subscriber by UUID + const { data: subscriber, error: lookupError } = await supabase + .from('telegram_subscribers') + .select('chat_id, is_active') + .eq('uuid', uuid) + .single() + + if (lookupError || !subscriber) { + return new Response( + JSON.stringify({ success: false, error: 'UUID not found. Use /start in Telegram bot to subscribe.' }), + { status: 404, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ) + } + + if (!subscriber.is_active) { + return new Response( + JSON.stringify({ success: false, error: 'Subscription is inactive. Use /start in Telegram to reactivate.' }), + { status: 403, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ) + } + + const chatId = subscriber.chat_id + let textSent = false + let voiceSent = false + let sentMessageId: number | undefined + + // Send text message + if (text) { + // Truncate text if too long (Telegram limit is 4096 chars) + const truncatedText = text.length > 4000 + ? text.slice(0, 4000) + '...\n\n_(Message truncated)_' + : text + + // Add reply hint if session context is provided + const replyHint = session_id + ? '\n\n_💬 Reply to this message to continue the conversation_' + : '' + + const messageResult = await sendTelegramMessage(chatId, `🔔 *OpenCode Task Complete*\n\n${truncatedText}${replyHint}`) + textSent = messageResult.success + sentMessageId = messageResult.messageId + } + + // Send voice message + if (voice_base64) { + // Validate base64 (rough size check: ~50MB max) + if (voice_base64.length > 70_000_000) { + return new Response( + JSON.stringify({ success: false, error: 'Voice file too large (max 50MB)' }), + { status: 400, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ) + } + + voiceSent = await sendTelegramVoice(chatId, voice_base64) + } + + // Update notification stats + if (textSent || voiceSent) { + await supabase.rpc('increment_notifications', { row_uuid: uuid }) + } + + // Store reply context if session_id is provided (enables two-way communication) + if (session_id && (textSent || voiceSent)) { + try { + // First, deactivate any previous contexts for this chat (user can only reply to most recent) + await supabase + .from('telegram_reply_contexts') + .update({ is_active: false }) + .eq('chat_id', chatId) + .eq('is_active', true) + + // Insert new reply context + const { error: contextError } = await supabase + .from('telegram_reply_contexts') + .insert({ + chat_id: chatId, + uuid, + session_id, + directory, + message_id: sentMessageId, + is_active: true, + expires_at: new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString(), // 24 hours + }) + + if (contextError) { + console.error('Failed to store reply context:', contextError) + // Don't fail the request, notification was still sent + } + } catch (contextErr) { + console.error('Error storing reply context:', contextErr) + } + } + + return new Response( + JSON.stringify({ + success: true, + text_sent: textSent, + voice_sent: voiceSent, + reply_enabled: !!session_id, + }), + { status: 200, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ) + } catch (error) { + console.error('Send notify error:', error) + return new Response( + JSON.stringify({ success: false, error: 'Internal server error' }), + { status: 500, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ) + } +}) diff --git a/supabase/functions/telegram-webhook/index.ts b/supabase/functions/telegram-webhook/index.ts new file mode 100644 index 0000000..4c2df72 --- /dev/null +++ b/supabase/functions/telegram-webhook/index.ts @@ -0,0 +1,487 @@ +/** + * Telegram Webhook Handler for OpenCode Notifications + * + * This Edge Function handles incoming Telegram updates: + * - /start - Subscribe to notifications + * - /stop - Unsubscribe from notifications + * - /status - Check subscription status + * - Non-command messages - Forward as replies to active OpenCode sessions + */ + +import { createClient } from 'https://esm.sh/@supabase/supabase-js@2' + +const BOT_TOKEN = Deno.env.get('TELEGRAM_BOT_TOKEN')! +const SUPABASE_URL = Deno.env.get('SUPABASE_URL')! +const SUPABASE_SERVICE_ROLE_KEY = Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')! + +// UUID v4 validation regex +const UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i + +interface TelegramVoice { + duration: number + mime_type?: string + file_id: string + file_unique_id: string + file_size?: number +} + +interface TelegramVideoNote { + duration: number + length: number + file_id: string + file_unique_id: string + file_size?: number +} + +interface TelegramVideo { + duration: number + width: number + height: number + file_id: string + file_unique_id: string + file_size?: number + mime_type?: string +} + +interface TelegramUpdate { + update_id: number + message?: { + message_id: number + from?: { + id: number + is_bot: boolean + first_name: string + last_name?: string + username?: string + } + chat: { + id: number + type: string + } + date: number + text?: string + voice?: TelegramVoice + video_note?: TelegramVideoNote + video?: TelegramVideo + } +} + +function isValidUUID(str: string): boolean { + return UUID_REGEX.test(str) +} + +async function sendTelegramMessage(chatId: number, text: string, parseMode: string = 'Markdown'): Promise { + try { + const response = await fetch(`https://api.telegram.org/bot${BOT_TOKEN}/sendMessage`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + chat_id: chatId, + text, + parse_mode: parseMode, + }), + }) + return response.ok + } catch (error) { + console.error('Failed to send Telegram message:', error) + return false + } +} + +Deno.serve(async (req) => { + // Only accept POST requests + if (req.method !== 'POST') { + return new Response('Method not allowed', { status: 405 }) + } + + // Verify required environment variables + if (!BOT_TOKEN || !SUPABASE_URL || !SUPABASE_SERVICE_ROLE_KEY) { + console.error('Missing required environment variables') + return new Response('Server configuration error', { status: 500 }) + } + + try { + const update: TelegramUpdate = await req.json() + + // Must have a message with chat + if (!update.message?.chat) { + return new Response('OK') + } + + const chatId = update.message.chat.id + const messageId = update.message.message_id + const username = update.message.from?.username + const firstName = update.message.from?.first_name + + // Initialize Supabase client with service role + const supabase = createClient(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY) + + // ==================== HANDLE VOICE/VIDEO MESSAGES ==================== + const voice = update.message.voice + const videoNote = update.message.video_note + const video = update.message.video + + if (voice || videoNote || video) { + // Get active reply context to know which session to send to + const { data: context, error: contextError } = await supabase + .rpc('get_active_reply_context', { p_chat_id: chatId }) + + if (contextError || !context || context.length === 0) { + await sendTelegramMessage(chatId, + `ℹ️ *No active session*\n\n` + + `There's no active OpenCode session to send voice messages to.\n\n` + + `Start a new task in OpenCode first to receive notifications.` + ) + return new Response('OK') + } + + const activeContext = context[0] + + // Determine file info + let fileId: string + let fileType: string + let duration: number + let fileSize: number | undefined + + if (voice) { + fileId = voice.file_id + fileType = 'voice' + duration = voice.duration + fileSize = voice.file_size + } else if (videoNote) { + fileId = videoNote.file_id + fileType = 'video_note' + duration = videoNote.duration + fileSize = videoNote.file_size + } else if (video) { + fileId = video.file_id + fileType = 'video' + duration = video.duration + fileSize = video.file_size + } else { + return new Response('OK') + } + + // Download the audio file from Telegram + let audioBase64: string | null = null + try { + // Get file path from Telegram + const fileInfoResponse = await fetch( + `https://api.telegram.org/bot${BOT_TOKEN}/getFile?file_id=${fileId}` + ) + + if (fileInfoResponse.ok) { + const fileInfo = await fileInfoResponse.json() as { ok: boolean; result?: { file_path: string } } + + if (fileInfo.ok && fileInfo.result?.file_path) { + // Download the actual file + const fileUrl = `https://api.telegram.org/file/bot${BOT_TOKEN}/${fileInfo.result.file_path}` + const fileResponse = await fetch(fileUrl) + + if (fileResponse.ok) { + const arrayBuffer = await fileResponse.arrayBuffer() + // Convert to base64 + const bytes = new Uint8Array(arrayBuffer) + let binary = '' + for (let i = 0; i < bytes.length; i++) { + binary += String.fromCharCode(bytes[i]) + } + audioBase64 = btoa(binary) + } + } + } + } catch (downloadError) { + console.error('Error downloading audio from Telegram:', downloadError) + } + + // Check if audio download failed - we can't proceed without the audio + if (!audioBase64) { + console.error('Failed to download audio from Telegram') + await sendTelegramMessage(chatId, + `❌ *Failed to download voice message*\n\n` + + `Could not retrieve the audio from Telegram. Please try again.` + ) + return new Response('OK') + } + + // Store voice message in telegram_replies table for plugin to process + // Plugin will receive this via Supabase Realtime and transcribe locally with Whisper + const { error: insertError } = await supabase + .from('telegram_replies') + .insert({ + uuid: activeContext.uuid, + session_id: activeContext.session_id, + directory: activeContext.directory, + telegram_chat_id: chatId, + telegram_message_id: messageId, + reply_text: null, // Will be filled after transcription by plugin + is_voice: true, + audio_base64: audioBase64, + voice_file_type: fileType, + voice_duration_seconds: duration, + processed: false, + }) + + if (insertError) { + console.error('Error storing voice message:', insertError) + await sendTelegramMessage(chatId, + `❌ *Failed to process voice message*\n\n` + + `Please try again.` + ) + return new Response('OK') + } + + // Confirm to user + await sendTelegramMessage(chatId, + `🎤 *Voice message received*\n\n` + + `Your ${duration}s ${fileType === 'video_note' ? 'video' : 'voice'} message will be transcribed and sent to OpenCode.\n\n` + + `_Processing may take a few seconds..._` + ) + + return new Response('OK') + } + + // ==================== HANDLE TEXT MESSAGES ==================== + const text = update.message.text?.trim() + + // Skip if no text + if (!text) { + return new Response('OK') + } + + // Handle /start command + if (text.startsWith('/start')) { + const parts = text.split(/\s+/) + const uuid = parts[1] + + if (!uuid) { + await sendTelegramMessage(chatId, + `*Welcome to OpenCode Notifications!* 🔔\n\n` + + `To subscribe, send your UUID:\n` + + `\`/start \`\n\n` + + `*How to get your UUID:*\n` + + `1. Generate one: \`uuidgen\` (in terminal)\n` + + `2. Add to your config file:\n` + + `\`~/.config/opencode/tts.json\`\n\n` + + `\`\`\`json\n{\n "telegram": {\n "enabled": true,\n "uuid": "your-uuid-here"\n }\n}\`\`\`\n\n` + + `Need help? Visit: github.com/opencode-ai/opencode` + ) + return new Response('OK') + } + + if (!isValidUUID(uuid)) { + await sendTelegramMessage(chatId, + `❌ *Invalid UUID format*\n\n` + + `Please provide a valid UUID v4.\n` + + `Generate one with: \`uuidgen\`` + ) + return new Response('OK') + } + + // Check if this UUID is already linked to a different chat + const { data: existing } = await supabase + .from('telegram_subscribers') + .select('chat_id') + .eq('uuid', uuid) + .single() + + if (existing && existing.chat_id !== chatId) { + await sendTelegramMessage(chatId, + `⚠️ *UUID already in use*\n\n` + + `This UUID is linked to another Telegram account.\n` + + `Please generate a new UUID with \`uuidgen\`.` + ) + return new Response('OK') + } + + // Upsert subscription + const { error } = await supabase + .from('telegram_subscribers') + .upsert({ + uuid, + chat_id: chatId, + username, + first_name: firstName, + is_active: true, + }, { onConflict: 'uuid' }) + + if (error) { + console.error('Database error:', error) + await sendTelegramMessage(chatId, + `❌ *Subscription failed*\n\n` + + `Please try again later or contact support.` + ) + return new Response('OK') + } + + await sendTelegramMessage(chatId, + `✅ *Subscribed successfully!*\n\n` + + `You'll receive notifications when OpenCode tasks complete.\n\n` + + `*Your UUID:* \`${uuid}\`\n\n` + + `*Commands:*\n` + + `• /status - Check subscription\n` + + `• /stop - Unsubscribe` + ) + return new Response('OK') + } + + // Handle /stop command + if (text === '/stop') { + const { data: subscriber } = await supabase + .from('telegram_subscribers') + .select('uuid') + .eq('chat_id', chatId) + .eq('is_active', true) + .single() + + if (!subscriber) { + await sendTelegramMessage(chatId, + `ℹ️ *Not subscribed*\n\n` + + `You don't have an active subscription.\n` + + `Use /start to subscribe.` + ) + return new Response('OK') + } + + const { error } = await supabase + .from('telegram_subscribers') + .update({ is_active: false }) + .eq('chat_id', chatId) + + if (error) { + console.error('Database error:', error) + await sendTelegramMessage(chatId, `❌ *Failed to unsubscribe*\n\nPlease try again.`) + return new Response('OK') + } + + await sendTelegramMessage(chatId, + `👋 *Unsubscribed*\n\n` + + `You won't receive notifications anymore.\n` + + `Use /start to resubscribe anytime.` + ) + return new Response('OK') + } + + // Handle /status command + if (text === '/status') { + const { data: subscriber } = await supabase + .from('telegram_subscribers') + .select('uuid, created_at, notifications_sent, last_notified_at, is_active') + .eq('chat_id', chatId) + .single() + + if (!subscriber) { + await sendTelegramMessage(chatId, + `ℹ️ *No subscription found*\n\n` + + `Use /start to subscribe.` + ) + return new Response('OK') + } + + const status = subscriber.is_active ? '✅ Active' : '❌ Inactive' + const lastNotified = subscriber.last_notified_at + ? new Date(subscriber.last_notified_at).toLocaleString() + : 'Never' + + await sendTelegramMessage(chatId, + `📊 *Subscription Status*\n\n` + + `*Status:* ${status}\n` + + `*UUID:* \`${subscriber.uuid}\`\n` + + `*Notifications sent:* ${subscriber.notifications_sent}\n` + + `*Last notification:* ${lastNotified}\n` + + `*Subscribed since:* ${new Date(subscriber.created_at).toLocaleDateString()}` + ) + return new Response('OK') + } + + // Handle /help command + if (text === '/help') { + await sendTelegramMessage(chatId, + `*OpenCode Notification Bot* 🤖\n\n` + + `*Commands:*\n` + + `• /start - Subscribe with your UUID\n` + + `• /stop - Unsubscribe from notifications\n` + + `• /status - Check subscription status\n` + + `• /help - Show this message\n\n` + + `*Setup Instructions:*\n` + + `1. Generate a UUID: \`uuidgen\`\n` + + `2. Add to ~/.config/opencode/tts.json\n` + + `3. Send /start here\n\n` + + `*More info:* github.com/opencode-ai/opencode` + ) + return new Response('OK') + } + + // Unknown command + if (text.startsWith('/')) { + await sendTelegramMessage(chatId, + `❓ *Unknown command*\n\n` + + `Use /help to see available commands.` + ) + return new Response('OK') + } + + // ==================== HANDLE REPLY MESSAGES ==================== + // Non-command messages are treated as replies to the most recent notification + // Look up active reply context and forward to OpenCode session + + // Get the most recent active reply context for this chat + const { data: context, error: contextError } = await supabase + .rpc('get_active_reply_context', { p_chat_id: chatId }) + + if (contextError) { + console.error('Error looking up reply context:', contextError) + await sendTelegramMessage(chatId, + `❌ *Error processing reply*\n\n` + + `Please try again later.` + ) + return new Response('OK') + } + + // Check if we found an active context + if (!context || context.length === 0) { + await sendTelegramMessage(chatId, + `ℹ️ *No active session*\n\n` + + `There's no active OpenCode session to reply to.\n\n` + + `Replies are available for 24 hours after receiving a notification.\n` + + `Start a new task in OpenCode to receive notifications.` + ) + return new Response('OK') + } + + // We have an active context - store the reply for OpenCode to pick up + const activeContext = context[0] + + const { error: insertError } = await supabase + .from('telegram_replies') + .insert({ + uuid: activeContext.uuid, + session_id: activeContext.session_id, + directory: activeContext.directory, + reply_text: text, + telegram_message_id: update.message.message_id, + telegram_chat_id: chatId, + processed: false, + }) + + if (insertError) { + console.error('Error storing reply:', insertError) + await sendTelegramMessage(chatId, + `❌ *Failed to send reply*\n\n` + + `Please try again.` + ) + return new Response('OK') + } + + // Confirm to user that reply was sent + await sendTelegramMessage(chatId, + `✓ *Reply sent to OpenCode*\n\n` + + `Your message has been forwarded to the active session.` + ) + + return new Response('OK') + } catch (error) { + console.error('Webhook error:', error) + return new Response('Internal server error', { status: 500 }) + } +}) diff --git a/supabase/migrations/20240113000000_create_subscribers.sql b/supabase/migrations/20240113000000_create_subscribers.sql new file mode 100644 index 0000000..d02ac98 --- /dev/null +++ b/supabase/migrations/20240113000000_create_subscribers.sql @@ -0,0 +1,56 @@ +-- Create subscribers table for Telegram notification service +-- Maps user UUID to Telegram chat_id + +CREATE TABLE IF NOT EXISTS public.telegram_subscribers ( + uuid UUID PRIMARY KEY, + chat_id BIGINT NOT NULL, + username TEXT, + first_name TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + last_notified_at TIMESTAMPTZ, + notifications_sent INTEGER DEFAULT 0, + is_active BOOLEAN DEFAULT TRUE +); + +-- Index for quick lookup by chat_id (to check existing subscription) +CREATE INDEX IF NOT EXISTS idx_telegram_subscribers_chat_id ON public.telegram_subscribers(chat_id); + +-- Index for active subscribers +CREATE INDEX IF NOT EXISTS idx_telegram_subscribers_active ON public.telegram_subscribers(is_active) WHERE is_active = TRUE; + +-- Add comment for documentation +COMMENT ON TABLE public.telegram_subscribers IS 'Maps OpenCode user UUIDs to Telegram chat IDs for notifications'; +COMMENT ON COLUMN public.telegram_subscribers.uuid IS 'User-generated UUID secret, shared between OpenCode plugin and Telegram bot'; +COMMENT ON COLUMN public.telegram_subscribers.chat_id IS 'Telegram chat ID where notifications are sent'; +COMMENT ON COLUMN public.telegram_subscribers.username IS 'Telegram username (optional, for display)'; +COMMENT ON COLUMN public.telegram_subscribers.is_active IS 'Whether the subscription is active (set to false on /stop)'; + +-- Enable Row Level Security +ALTER TABLE public.telegram_subscribers ENABLE ROW LEVEL SECURITY; + +-- Only service role can access this table (no public access) +-- This ensures the table is only accessible via Edge Functions with service_role key +CREATE POLICY "Service role only" ON public.telegram_subscribers + FOR ALL + USING (auth.role() = 'service_role') + WITH CHECK (auth.role() = 'service_role'); + +-- Function to increment notification count atomically +CREATE OR REPLACE FUNCTION public.increment_notifications(row_uuid UUID) +RETURNS INTEGER +LANGUAGE plpgsql +SECURITY DEFINER +AS $$ +DECLARE + new_count INTEGER; +BEGIN + UPDATE public.telegram_subscribers + SET + notifications_sent = notifications_sent + 1, + last_notified_at = NOW() + WHERE uuid = row_uuid + RETURNING notifications_sent INTO new_count; + + RETURN new_count; +END; +$$; diff --git a/supabase/migrations/20240114000000_add_telegram_replies.sql b/supabase/migrations/20240114000000_add_telegram_replies.sql new file mode 100644 index 0000000..7bf82dd --- /dev/null +++ b/supabase/migrations/20240114000000_add_telegram_replies.sql @@ -0,0 +1,162 @@ +-- Add tables for Telegram reply support +-- Enables two-way communication: users can reply to notifications and have them forwarded to OpenCode + +-- ==================== REPLY CONTEXTS TABLE ==================== +-- Tracks active sessions that can receive replies +-- When a notification is sent, the session context is stored here + +CREATE TABLE IF NOT EXISTS public.telegram_reply_contexts ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + chat_id BIGINT NOT NULL, -- Telegram chat ID + uuid UUID NOT NULL REFERENCES public.telegram_subscribers(uuid) ON DELETE CASCADE, + session_id TEXT NOT NULL, -- OpenCode session ID + message_id INTEGER, -- Telegram message ID sent (for reply matching) + directory TEXT, -- Working directory for context + created_at TIMESTAMPTZ DEFAULT NOW(), + expires_at TIMESTAMPTZ DEFAULT (NOW() + INTERVAL '24 hours'), + is_active BOOLEAN DEFAULT TRUE +); + +-- Index for quick lookup by chat_id (when user replies) +CREATE INDEX IF NOT EXISTS idx_reply_contexts_chat_id ON public.telegram_reply_contexts(chat_id); + +-- Index for active contexts lookup +CREATE INDEX IF NOT EXISTS idx_reply_contexts_active ON public.telegram_reply_contexts(is_active, chat_id) + WHERE is_active = TRUE; + +-- Index for cleanup of expired contexts +CREATE INDEX IF NOT EXISTS idx_reply_contexts_expires ON public.telegram_reply_contexts(expires_at); + +-- Comments for documentation +COMMENT ON TABLE public.telegram_reply_contexts IS 'Tracks active OpenCode sessions that can receive Telegram replies'; +COMMENT ON COLUMN public.telegram_reply_contexts.session_id IS 'OpenCode session ID where replies will be forwarded'; +COMMENT ON COLUMN public.telegram_reply_contexts.message_id IS 'Telegram message ID of the notification, for reply thread tracking'; +COMMENT ON COLUMN public.telegram_reply_contexts.expires_at IS 'Context expires after 24 hours to prevent stale sessions'; + +-- ==================== REPLIES TABLE ==================== +-- Stores incoming replies from Telegram users +-- OpenCode plugin subscribes to this table via Supabase Realtime + +CREATE TABLE IF NOT EXISTS public.telegram_replies ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + uuid UUID NOT NULL REFERENCES public.telegram_subscribers(uuid) ON DELETE CASCADE, + session_id TEXT NOT NULL, -- OpenCode session ID to forward to + directory TEXT, -- Working directory context + reply_text TEXT NOT NULL, -- The user's reply message + telegram_message_id INTEGER, -- Telegram message ID of the reply + telegram_chat_id BIGINT NOT NULL, -- Chat ID where reply came from + created_at TIMESTAMPTZ DEFAULT NOW(), + processed BOOLEAN DEFAULT FALSE, -- Set to true after OpenCode processes it + processed_at TIMESTAMPTZ -- When it was processed +); + +-- Index for realtime subscriptions by UUID +CREATE INDEX IF NOT EXISTS idx_telegram_replies_uuid ON public.telegram_replies(uuid); + +-- Index for unprocessed replies +CREATE INDEX IF NOT EXISTS idx_telegram_replies_unprocessed ON public.telegram_replies(processed, uuid) + WHERE processed = FALSE; + +-- Comments for documentation +COMMENT ON TABLE public.telegram_replies IS 'Incoming replies from Telegram users to be forwarded to OpenCode sessions'; +COMMENT ON COLUMN public.telegram_replies.processed IS 'Set to true after OpenCode successfully receives and processes the reply'; + +-- ==================== ROW LEVEL SECURITY ==================== + +-- Enable RLS on new tables +ALTER TABLE public.telegram_reply_contexts ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.telegram_replies ENABLE ROW LEVEL SECURITY; + +-- Only service role can access these tables (Edge Functions use service role key) +CREATE POLICY "Service role only" ON public.telegram_reply_contexts + FOR ALL + USING (auth.role() = 'service_role') + WITH CHECK (auth.role() = 'service_role'); + +CREATE POLICY "Service role only" ON public.telegram_replies + FOR ALL + USING (auth.role() = 'service_role') + WITH CHECK (auth.role() = 'service_role'); + +-- ==================== ENABLE REALTIME ==================== +-- Enable realtime for telegram_replies so OpenCode plugin can subscribe + +-- Note: This requires the supabase_realtime publication to exist +-- If it doesn't, the table will still work, just without realtime subscriptions +DO $$ +BEGIN + -- Try to add table to realtime publication + ALTER PUBLICATION supabase_realtime ADD TABLE public.telegram_replies; +EXCEPTION + WHEN undefined_object THEN + -- Publication doesn't exist, that's OK for local dev + RAISE NOTICE 'supabase_realtime publication not found, skipping realtime setup'; + WHEN duplicate_object THEN + -- Table already in publication + RAISE NOTICE 'Table already in supabase_realtime publication'; +END $$; + +-- ==================== CLEANUP FUNCTION ==================== +-- Function to clean up expired reply contexts (can be called by cron job) + +CREATE OR REPLACE FUNCTION public.cleanup_expired_reply_contexts() +RETURNS INTEGER +LANGUAGE plpgsql +SECURITY DEFINER +AS $$ +DECLARE + deleted_count INTEGER; +BEGIN + -- Deactivate expired contexts + WITH deactivated AS ( + UPDATE public.telegram_reply_contexts + SET is_active = FALSE + WHERE is_active = TRUE AND expires_at < NOW() + RETURNING id + ) + SELECT COUNT(*) INTO deleted_count FROM deactivated; + + -- Delete very old contexts (older than 7 days) + DELETE FROM public.telegram_reply_contexts + WHERE expires_at < NOW() - INTERVAL '7 days'; + + -- Delete old processed replies (older than 7 days) + DELETE FROM public.telegram_replies + WHERE processed = TRUE AND processed_at < NOW() - INTERVAL '7 days'; + + RETURN deleted_count; +END; +$$; + +COMMENT ON FUNCTION public.cleanup_expired_reply_contexts IS 'Cleans up expired reply contexts and old processed replies. Call periodically via cron.'; + +-- ==================== HELPER FUNCTION ==================== +-- Function to get the most recent active context for a chat + +CREATE OR REPLACE FUNCTION public.get_active_reply_context(p_chat_id BIGINT) +RETURNS TABLE( + session_id TEXT, + directory TEXT, + uuid UUID, + created_at TIMESTAMPTZ +) +LANGUAGE plpgsql +SECURITY DEFINER +AS $$ +BEGIN + RETURN QUERY + SELECT + rc.session_id, + rc.directory, + rc.uuid, + rc.created_at + FROM public.telegram_reply_contexts rc + WHERE rc.chat_id = p_chat_id + AND rc.is_active = TRUE + AND rc.expires_at > NOW() + ORDER BY rc.created_at DESC + LIMIT 1; +END; +$$; + +COMMENT ON FUNCTION public.get_active_reply_context IS 'Returns the most recent active reply context for a chat, used when user replies to a notification'; diff --git a/supabase/migrations/20240116000000_add_voice_to_replies.sql b/supabase/migrations/20240116000000_add_voice_to_replies.sql new file mode 100644 index 0000000..0651783 --- /dev/null +++ b/supabase/migrations/20240116000000_add_voice_to_replies.sql @@ -0,0 +1,41 @@ +-- Migration: Add voice message support to telegram_replies table +-- Voice messages are now stored directly in telegram_replies with audio_base64 +-- This simplifies the architecture: one table for all types of replies + +-- Add columns for voice message data +ALTER TABLE public.telegram_replies + ADD COLUMN IF NOT EXISTS is_voice BOOLEAN DEFAULT FALSE, + ADD COLUMN IF NOT EXISTS audio_base64 TEXT, + ADD COLUMN IF NOT EXISTS voice_file_type TEXT, + ADD COLUMN IF NOT EXISTS voice_duration_seconds INTEGER; + +-- Make reply_text nullable to allow voice-only messages +-- The text will be populated after local transcription +ALTER TABLE public.telegram_replies + ALTER COLUMN reply_text DROP NOT NULL; + +-- Add index for voice messages that need processing +CREATE INDEX IF NOT EXISTS idx_telegram_replies_voice_unprocessed + ON public.telegram_replies(is_voice, processed) + WHERE is_voice = TRUE AND processed = FALSE; + +-- Add comment explaining voice flow +COMMENT ON COLUMN public.telegram_replies.is_voice IS 'True if this reply is a voice/video message requiring transcription'; +COMMENT ON COLUMN public.telegram_replies.audio_base64 IS 'Base64-encoded audio data downloaded by Edge Function from Telegram'; +COMMENT ON COLUMN public.telegram_replies.voice_file_type IS 'Type of voice message: voice, video_note, or video'; +COMMENT ON COLUMN public.telegram_replies.voice_duration_seconds IS 'Duration of the voice/video message in seconds'; + +-- Drop the old telegram_voice_messages table as it is no longer needed +-- First remove from realtime publication (if it exists) +DO $$ +BEGIN + ALTER PUBLICATION supabase_realtime DROP TABLE telegram_voice_messages; +EXCEPTION + WHEN undefined_object THEN + RAISE NOTICE 'Table not in publication or publication does not exist'; + WHEN undefined_table THEN + RAISE NOTICE 'Table telegram_voice_messages does not exist'; +END $$; + +-- Drop the old table if it exists +DROP TABLE IF EXISTS public.telegram_voice_messages; diff --git a/test/tts.test.ts b/test/tts.test.ts index b9aa107..fdeb1ae 100644 --- a/test/tts.test.ts +++ b/test/tts.test.ts @@ -389,3 +389,728 @@ describe("TTS Plugin - Embedded Python Scripts Validation", () => { }) }) }) + +describe("TTS Plugin - Telegram Notification Features", () => { + let pluginContent: string + + before(async () => { + pluginContent = await readFile( + join(__dirname, "../tts.ts"), + "utf-8" + ) + }) + + it("has Telegram configuration section in TTSConfig", () => { + assert.ok(pluginContent.includes("telegram?:"), "Missing telegram config section") + assert.ok(pluginContent.includes("telegram?: {"), "Missing telegram config object") + }) + + it("supports Telegram enabled flag", () => { + assert.ok(pluginContent.includes("telegram?.enabled"), "Missing telegram enabled check") + assert.ok(pluginContent.includes("isTelegramEnabled"), "Missing isTelegramEnabled function") + }) + + it("supports UUID configuration for Telegram subscription", () => { + assert.ok(pluginContent.includes("uuid?:"), "Missing uuid config option") + assert.ok(pluginContent.includes("TELEGRAM_NOTIFICATION_UUID"), "Missing UUID env var support") + }) + + it("supports custom service URL for Telegram backend", () => { + assert.ok(pluginContent.includes("serviceUrl?:"), "Missing serviceUrl config option") + assert.ok(pluginContent.includes("DEFAULT_TELEGRAM_SERVICE_URL"), "Missing default service URL") + }) + + it("supports sendText and sendVoice toggle options", () => { + assert.ok(pluginContent.includes("sendText?:"), "Missing sendText config option") + assert.ok(pluginContent.includes("sendVoice?:"), "Missing sendVoice config option") + }) + + it("has sendTelegramNotification function", () => { + assert.ok(pluginContent.includes("sendTelegramNotification"), "Missing sendTelegramNotification function") + assert.ok(pluginContent.includes("voice_base64"), "Missing voice base64 encoding") + }) + + it("converts WAV to OGG for Telegram voice messages", () => { + assert.ok(pluginContent.includes("convertWavToOgg"), "Missing WAV to OGG conversion function") + assert.ok(pluginContent.includes("libopus"), "Missing Opus codec for OGG conversion") + assert.ok(pluginContent.includes("ffmpeg"), "Missing ffmpeg for audio conversion") + }) + + it("checks ffmpeg availability before conversion", () => { + assert.ok(pluginContent.includes("isFfmpegAvailable"), "Missing ffmpeg availability check") + assert.ok(pluginContent.includes("which ffmpeg"), "Missing ffmpeg path check") + }) + + it("integrates Telegram notification with speak function", () => { + assert.ok(pluginContent.includes("telegramEnabled"), "Missing telegram enabled check in speak") + assert.ok(pluginContent.includes("Sending Telegram notification"), "Missing telegram notification log") + }) + + it("supports TELEGRAM_DISABLED env var", () => { + assert.ok(pluginContent.includes("TELEGRAM_DISABLED"), "Missing TELEGRAM_DISABLED env var support") + }) + + it("returns audio path from TTS engines for Telegram", () => { + assert.ok(pluginContent.includes("speakWithCoquiAndGetPath"), "Missing speakWithCoquiAndGetPath function") + assert.ok(pluginContent.includes("speakWithChatterboxAndGetPath"), "Missing speakWithChatterboxAndGetPath function") + assert.ok(pluginContent.includes("audioPath?:"), "Missing audioPath return type") + }) + + it("has proper error handling for Telegram notifications", () => { + assert.ok(pluginContent.includes("Telegram notification failed"), "Missing Telegram error log") + assert.ok(pluginContent.includes("success: false"), "Missing failure handling") + }) +}) + +describe("TTS Plugin - Telegram UUID Validation", () => { + // UUID v4 regex (same as in edge function) + const UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i + + it("validates correct UUID v4 format", () => { + const validUUIDs = [ + "550e8400-e29b-41d4-a716-446655440000", + "6ba7b810-9dad-41d1-80b4-00c04fd430c8", + "f47ac10b-58cc-4372-a567-0e02b2c3d479", + ] + for (const uuid of validUUIDs) { + assert.ok(UUID_REGEX.test(uuid), `UUID should be valid: ${uuid}`) + } + }) + + it("rejects invalid UUID formats", () => { + const invalidUUIDs = [ + "not-a-uuid", + "550e8400-e29b-41d4-a716", // Too short + "550e8400-e29b-51d4-a716-446655440000", // Version 5, not 4 + "550e8400-e29b-41d4-c716-446655440000", // Invalid variant + "g50e8400-e29b-41d4-a716-446655440000", // Invalid character + ] + for (const uuid of invalidUUIDs) { + assert.ok(!UUID_REGEX.test(uuid), `UUID should be invalid: ${uuid}`) + } + }) +}) + +describe("Supabase Edge Functions - Structure Validation", () => { + let webhookContent: string + let sendNotifyContent: string + + before(async () => { + try { + webhookContent = await readFile( + join(__dirname, "../supabase/functions/telegram-webhook/index.ts"), + "utf-8" + ) + sendNotifyContent = await readFile( + join(__dirname, "../supabase/functions/send-notify/index.ts"), + "utf-8" + ) + } catch (e) { + console.log(" [SKIP] Supabase functions not found") + } + }) + + describe("telegram-webhook function", () => { + it("exists and has content", () => { + if (!webhookContent) { + console.log(" [SKIP] telegram-webhook function not found") + return + } + assert.ok(webhookContent.length > 0) + }) + + it("handles /start command", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("/start"), "Missing /start command handler") + assert.ok(webhookContent.includes("uuid"), "Missing UUID handling") + }) + + it("handles /stop command", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("/stop"), "Missing /stop command handler") + assert.ok(webhookContent.includes("is_active"), "Missing deactivation logic") + }) + + it("handles /status command", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("/status"), "Missing /status command handler") + assert.ok(webhookContent.includes("notifications_sent"), "Missing notification count") + }) + + it("validates UUID format", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("isValidUUID"), "Missing UUID validation function") + assert.ok(webhookContent.includes("UUID_REGEX"), "Missing UUID regex") + }) + + it("uses Supabase client with service role", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("createClient"), "Missing Supabase client creation") + assert.ok(webhookContent.includes("SUPABASE_SERVICE_ROLE_KEY"), "Missing service role key") + }) + + it("sends response messages via Telegram API", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("sendTelegramMessage"), "Missing Telegram message function") + assert.ok(webhookContent.includes("api.telegram.org"), "Missing Telegram API URL") + }) + }) + + describe("send-notify function", () => { + it("exists and has content", () => { + if (!sendNotifyContent) { + console.log(" [SKIP] send-notify function not found") + return + } + assert.ok(sendNotifyContent.length > 0) + }) + + it("accepts uuid, text, and voice_base64 in request", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("uuid"), "Missing uuid field") + assert.ok(sendNotifyContent.includes("text"), "Missing text field") + assert.ok(sendNotifyContent.includes("voice_base64"), "Missing voice_base64 field") + }) + + it("looks up subscriber by UUID", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("telegram_subscribers"), "Missing subscribers table") + assert.ok(sendNotifyContent.includes(".eq('uuid'"), "Missing UUID lookup") + }) + + it("sends text messages via Telegram API", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("sendTelegramMessage"), "Missing text message function") + assert.ok(sendNotifyContent.includes("sendMessage"), "Missing Telegram sendMessage endpoint") + }) + + it("sends voice messages via Telegram API", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("sendTelegramVoice"), "Missing voice message function") + assert.ok(sendNotifyContent.includes("sendVoice"), "Missing Telegram sendVoice endpoint") + }) + + it("has rate limiting", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("isRateLimited"), "Missing rate limiting function") + assert.ok(sendNotifyContent.includes("RATE_LIMIT"), "Missing rate limit constants") + }) + + it("handles CORS headers", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("Access-Control-Allow-Origin"), "Missing CORS header") + assert.ok(sendNotifyContent.includes("OPTIONS"), "Missing OPTIONS method handling") + }) + + it("increments notification count", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("increment_notifications"), "Missing notification count increment") + }) + + it("checks subscription is active", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("is_active"), "Missing active status check") + }) + }) +}) + +describe("Supabase Database Schema - Structure Validation", () => { + let migrationContent: string + + before(async () => { + try { + // Find migration file + const { readdir } = await import("fs/promises") + const migrationsDir = join(__dirname, "../supabase/migrations") + const files = await readdir(migrationsDir) + const migrationFile = files.find(f => f.includes("subscribers")) + if (migrationFile) { + migrationContent = await readFile(join(migrationsDir, migrationFile), "utf-8") + } + } catch { + console.log(" [SKIP] Migration files not found") + } + }) + + it("creates telegram_subscribers table", () => { + if (!migrationContent) { + console.log(" [SKIP] Migration file not found") + return + } + assert.ok(migrationContent.includes("telegram_subscribers"), "Missing table creation") + }) + + it("has uuid as primary key", () => { + if (!migrationContent) return + assert.ok(migrationContent.includes("uuid UUID PRIMARY KEY"), "Missing UUID primary key") + }) + + it("has chat_id column", () => { + if (!migrationContent) return + assert.ok(migrationContent.includes("chat_id BIGINT"), "Missing chat_id column") + }) + + it("has notification tracking columns", () => { + if (!migrationContent) return + assert.ok(migrationContent.includes("notifications_sent"), "Missing notifications_sent column") + assert.ok(migrationContent.includes("last_notified_at"), "Missing last_notified_at column") + }) + + it("has is_active column for subscription status", () => { + if (!migrationContent) return + assert.ok(migrationContent.includes("is_active"), "Missing is_active column") + }) + + it("enables Row Level Security", () => { + if (!migrationContent) return + assert.ok(migrationContent.includes("ROW LEVEL SECURITY"), "Missing RLS enablement") + }) + + it("has service role only policy", () => { + if (!migrationContent) return + assert.ok(migrationContent.includes("service_role"), "Missing service role policy") + }) + + it("has increment_notifications function", () => { + if (!migrationContent) return + assert.ok(migrationContent.includes("increment_notifications"), "Missing increment function") + }) +}) + +describe("Telegram Reply Support - Structure Validation", () => { + let webhookContent: string + let sendNotifyContent: string + let replyMigrationContent: string + let ttsContent: string + + before(async () => { + try { + webhookContent = await readFile( + join(__dirname, "../supabase/functions/telegram-webhook/index.ts"), + "utf-8" + ) + sendNotifyContent = await readFile( + join(__dirname, "../supabase/functions/send-notify/index.ts"), + "utf-8" + ) + ttsContent = await readFile( + join(__dirname, "../tts.ts"), + "utf-8" + ) + + // Find reply migration file + const { readdir } = await import("fs/promises") + const migrationsDir = join(__dirname, "../supabase/migrations") + const files = await readdir(migrationsDir) + const replyMigrationFile = files.find(f => f.includes("replies")) + if (replyMigrationFile) { + replyMigrationContent = await readFile(join(migrationsDir, replyMigrationFile), "utf-8") + } + } catch (e) { + console.log(" [SKIP] Files not found for reply support tests") + } + }) + + describe("telegram_reply_contexts table", () => { + it("creates telegram_reply_contexts table", () => { + if (!replyMigrationContent) { + console.log(" [SKIP] Reply migration file not found") + return + } + assert.ok(replyMigrationContent.includes("telegram_reply_contexts"), "Missing reply contexts table") + }) + + it("has session_id column for OpenCode session tracking", () => { + if (!replyMigrationContent) return + assert.ok(replyMigrationContent.includes("session_id TEXT"), "Missing session_id column") + }) + + it("has chat_id column for Telegram chat identification", () => { + if (!replyMigrationContent) return + assert.ok(replyMigrationContent.includes("chat_id BIGINT"), "Missing chat_id column") + }) + + it("has expires_at column for context expiration", () => { + if (!replyMigrationContent) return + assert.ok(replyMigrationContent.includes("expires_at"), "Missing expires_at column") + }) + + it("has is_active column for context status", () => { + if (!replyMigrationContent) return + assert.ok(replyMigrationContent.includes("is_active BOOLEAN"), "Missing is_active column") + }) + }) + + describe("telegram_replies table", () => { + it("creates telegram_replies table", () => { + if (!replyMigrationContent) { + console.log(" [SKIP] Reply migration file not found") + return + } + assert.ok(replyMigrationContent.includes("telegram_replies"), "Missing replies table") + }) + + it("has reply_text column for user message content", () => { + if (!replyMigrationContent) return + assert.ok(replyMigrationContent.includes("reply_text TEXT"), "Missing reply_text column") + }) + + it("has processed column for tracking delivery status", () => { + if (!replyMigrationContent) return + assert.ok(replyMigrationContent.includes("processed BOOLEAN"), "Missing processed column") + }) + + it("enables Supabase Realtime for replies table", () => { + if (!replyMigrationContent) return + assert.ok(replyMigrationContent.includes("supabase_realtime"), "Missing realtime enablement") + }) + }) + + describe("send-notify session context support", () => { + it("accepts session_id in request body", () => { + if (!sendNotifyContent) { + console.log(" [SKIP] send-notify function not found") + return + } + assert.ok(sendNotifyContent.includes("session_id"), "Missing session_id field") + }) + + it("accepts directory in request body", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("directory"), "Missing directory field") + }) + + it("stores reply context in database", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("telegram_reply_contexts"), "Missing context storage") + }) + + it("deactivates previous contexts before creating new one", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("is_active: false") || sendNotifyContent.includes("is_active = false"), + "Missing previous context deactivation") + }) + + it("returns message_id from Telegram API", () => { + if (!sendNotifyContent) return + assert.ok(sendNotifyContent.includes("messageId"), "Missing message ID extraction") + }) + }) + + describe("telegram-webhook reply handling", () => { + it("handles non-command messages as replies", () => { + if (!webhookContent) { + console.log(" [SKIP] telegram-webhook function not found") + return + } + assert.ok(webhookContent.includes("get_active_reply_context"), "Missing reply context lookup") + }) + + it("stores replies in telegram_replies table", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("telegram_replies"), "Missing reply storage") + }) + + it("confirms reply receipt to user", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("Reply sent"), "Missing confirmation message") + }) + + it("handles missing reply context gracefully", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("No active session"), "Missing no-context message") + }) + }) + + describe("tts.ts Telegram reply subscription", () => { + it("has receiveReplies config option", () => { + if (!ttsContent) { + console.log(" [SKIP] tts.ts not found") + return + } + assert.ok(ttsContent.includes("receiveReplies"), "Missing receiveReplies config option") + }) + + it("has supabaseUrl config option", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("supabaseUrl"), "Missing supabaseUrl config option") + }) + + it("has supabaseAnonKey config option", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("supabaseAnonKey"), "Missing supabaseAnonKey config option") + }) + + it("has subscribeToReplies function", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("subscribeToReplies"), "Missing subscribeToReplies function") + }) + + it("uses Supabase Realtime for reply subscription", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("postgres_changes"), "Missing Supabase Realtime subscription") + }) + + it("forwards replies to OpenCode session via promptAsync", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("promptAsync"), "Missing promptAsync call for reply forwarding") + assert.ok(ttsContent.includes("[User via Telegram]"), "Missing Telegram reply prefix") + }) + + it("marks replies as processed after forwarding", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("markReplyProcessed"), "Missing reply processed marking") + }) + + it("passes sessionId to sendTelegramNotification", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("sessionId?: string") || ttsContent.includes("sessionId: string"), + "Missing sessionId in notification context") + }) + + it("includes session_id in notification request body", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("body.session_id"), "Missing session_id in request body") + }) + }) + + describe("helper functions", () => { + it("has get_active_reply_context function in migration", () => { + if (!replyMigrationContent) { + console.log(" [SKIP] Reply migration file not found") + return + } + assert.ok(replyMigrationContent.includes("get_active_reply_context"), "Missing helper function") + }) + + it("has cleanup_expired_reply_contexts function", () => { + if (!replyMigrationContent) return + assert.ok(replyMigrationContent.includes("cleanup_expired_reply_contexts"), "Missing cleanup function") + }) + + it("has unsubscribeFromReplies function in tts.ts", () => { + if (!ttsContent) { + console.log(" [SKIP] tts.ts not found") + return + } + assert.ok(ttsContent.includes("unsubscribeFromReplies"), "Missing unsubscribe function") + }) + }) +}) + +// ==================== VOICE MESSAGE SUPPORT TESTS ==================== + +describe("Telegram Voice Message Support - Structure Validation", () => { + let ttsContent: string | null = null + let webhookContent: string | null = null + let voiceToRepliesMigrationContent: string | null = null + let whisperServerContent: string | null = null + + before(async () => { + try { + ttsContent = await readFile(join(__dirname, "..", "tts.ts"), "utf-8") + } catch { ttsContent = null } + + try { + webhookContent = await readFile(join(__dirname, "..", "supabase", "functions", "telegram-webhook", "index.ts"), "utf-8") + } catch { webhookContent = null } + + try { + // Load the new migration that adds voice support to telegram_replies + voiceToRepliesMigrationContent = await readFile(join(__dirname, "..", "supabase", "migrations", "20240116000000_add_voice_to_replies.sql"), "utf-8") + } catch { voiceToRepliesMigrationContent = null } + + try { + whisperServerContent = await readFile(join(__dirname, "..", "whisper", "whisper_server.py"), "utf-8") + } catch { whisperServerContent = null } + }) + + describe("tts.ts whisper integration", () => { + it("has whisper config interface", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("whisper?:"), "Missing whisper config in TTSConfig") + }) + + it("has WHISPER_DIR constant", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("WHISPER_DIR"), "Missing WHISPER_DIR constant") + }) + + it("has setupWhisper function", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("async function setupWhisper"), "Missing setupWhisper function") + }) + + it("has startWhisperServer function", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("async function startWhisperServer"), "Missing startWhisperServer function") + }) + + it("has transcribeWithWhisper function", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("async function transcribeWithWhisper"), "Missing transcribeWithWhisper function") + }) + + it("has isWhisperServerRunning function", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("async function isWhisperServerRunning"), "Missing isWhisperServerRunning function") + }) + + it("has subscribeToReplies function", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("subscribeToReplies"), "Missing subscribeToReplies function") + }) + + it("subscribeToReplies handles voice messages with audio_base64", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("reply.is_voice && reply.audio_base64"), "Missing voice message handling in subscribeToReplies") + }) + + it("transcribes voice messages with Whisper", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("transcribeWithWhisper(reply.audio_base64"), "Missing transcribeWithWhisper call for voice messages") + }) + + it("TelegramReply interface has voice message fields", () => { + if (!ttsContent) return + assert.ok(ttsContent.includes("is_voice?: boolean"), "Missing is_voice field in TelegramReply") + assert.ok(ttsContent.includes("audio_base64?: string"), "Missing audio_base64 field in TelegramReply") + }) + }) + + describe("telegram-webhook voice handling", () => { + it("has TelegramVoice interface", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("interface TelegramVoice"), "Missing TelegramVoice interface") + }) + + it("has TelegramVideoNote interface", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("interface TelegramVideoNote"), "Missing TelegramVideoNote interface") + }) + + it("has TelegramVideo interface", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("interface TelegramVideo"), "Missing TelegramVideo interface") + }) + + it("handles voice messages in TelegramUpdate", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("voice?: TelegramVoice"), "Missing voice in TelegramUpdate") + }) + + it("handles video_note messages", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("video_note?: TelegramVideoNote"), "Missing video_note in TelegramUpdate") + }) + + it("stores voice messages in telegram_replies table with is_voice flag", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("is_voice: true"), "Missing is_voice flag in insert") + assert.ok(webhookContent.includes("telegram_replies"), "Should insert into telegram_replies table") + }) + + it("includes audio_base64 in voice message insert", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("audio_base64: audioBase64"), "Missing audio_base64 in insert") + }) + + it("includes voice_file_type and voice_duration_seconds", () => { + if (!webhookContent) return + assert.ok(webhookContent.includes("voice_file_type: fileType"), "Missing voice_file_type in insert") + assert.ok(webhookContent.includes("voice_duration_seconds: duration"), "Missing voice_duration_seconds in insert") + }) + }) + + describe("voice to replies migration", () => { + it("adds voice columns to telegram_replies table", () => { + if (!voiceToRepliesMigrationContent) { + console.log(" [SKIP] Voice to replies migration file not found") + return + } + assert.ok(voiceToRepliesMigrationContent.includes("ALTER TABLE"), "Missing ALTER TABLE") + assert.ok(voiceToRepliesMigrationContent.includes("telegram_replies"), "Missing telegram_replies table reference") + }) + + it("has is_voice column", () => { + if (!voiceToRepliesMigrationContent) return + assert.ok(voiceToRepliesMigrationContent.includes("is_voice BOOLEAN"), "Missing is_voice column") + }) + + it("has audio_base64 column", () => { + if (!voiceToRepliesMigrationContent) return + assert.ok(voiceToRepliesMigrationContent.includes("audio_base64 TEXT"), "Missing audio_base64 column") + }) + + it("has voice_file_type column", () => { + if (!voiceToRepliesMigrationContent) return + assert.ok(voiceToRepliesMigrationContent.includes("voice_file_type TEXT"), "Missing voice_file_type column") + }) + + it("has voice_duration_seconds column", () => { + if (!voiceToRepliesMigrationContent) return + assert.ok(voiceToRepliesMigrationContent.includes("voice_duration_seconds INTEGER"), "Missing voice_duration_seconds column") + }) + + it("makes reply_text nullable for voice messages", () => { + if (!voiceToRepliesMigrationContent) return + assert.ok(voiceToRepliesMigrationContent.includes("reply_text DROP NOT NULL"), "Missing reply_text nullability change") + }) + + it("drops old telegram_voice_messages table", () => { + if (!voiceToRepliesMigrationContent) return + assert.ok(voiceToRepliesMigrationContent.includes("DROP TABLE IF EXISTS"), "Missing DROP TABLE") + assert.ok(voiceToRepliesMigrationContent.includes("telegram_voice_messages"), "Missing telegram_voice_messages drop") + }) + }) + + describe("whisper server script", () => { + it("exists at whisper/whisper_server.py", () => { + if (!whisperServerContent) { + console.log(" [SKIP] Whisper server script not found") + return + } + assert.ok(whisperServerContent.length > 0, "Whisper server script is empty") + }) + + it("uses faster_whisper library", () => { + if (!whisperServerContent) return + assert.ok(whisperServerContent.includes("faster_whisper"), "Missing faster_whisper import") + }) + + it("has FastAPI app", () => { + if (!whisperServerContent) return + assert.ok(whisperServerContent.includes("FastAPI"), "Missing FastAPI import") + }) + + it("has /health endpoint", () => { + if (!whisperServerContent) return + assert.ok(whisperServerContent.includes('@app.get("/health")'), "Missing /health endpoint") + }) + + it("has /transcribe endpoint", () => { + if (!whisperServerContent) return + assert.ok(whisperServerContent.includes('@app.post("/transcribe")'), "Missing /transcribe endpoint") + }) + + it("uses VAD filtering", () => { + if (!whisperServerContent) return + assert.ok(whisperServerContent.includes("vad_filter=True"), "Missing VAD filter") + }) + + it("converts audio to WAV format", () => { + if (!whisperServerContent) return + assert.ok(whisperServerContent.includes("convert_to_wav"), "Missing audio conversion function") + }) + + it("uses ffmpeg for conversion", () => { + if (!whisperServerContent) return + assert.ok(whisperServerContent.includes("ffmpeg"), "Missing ffmpeg usage") + }) + + it("runs on port 8787 by default", () => { + if (!whisperServerContent) return + assert.ok(whisperServerContent.includes("8787"), "Missing default port 8787") + }) + }) +}) diff --git a/tts.ts b/tts.ts index e35d191..7b63fb8 100644 --- a/tts.ts +++ b/tts.ts @@ -79,8 +79,39 @@ interface TTSConfig { useTurbo?: boolean // Use Turbo model for 10x faster inference serverMode?: boolean // Keep model loaded for fast subsequent requests (default: true) } + // Telegram notification options + telegram?: { + enabled?: boolean // Enable Telegram notifications (default: false) + uuid?: string // User's unique identifier (required for subscription) + serviceUrl?: string // Supabase Edge Function URL (has default) + sendText?: boolean // Send text message (default: true) + sendVoice?: boolean // Send voice message (default: true) + receiveReplies?: boolean // Enable receiving replies from Telegram (default: true) + supabaseUrl?: string // Supabase project URL (for realtime subscription) + supabaseAnonKey?: string // Supabase anonymous key (for realtime subscription) + } + // Whisper STT options (for transcribing Telegram voice messages) + whisper?: { + enabled?: boolean // Enable Whisper STT for voice messages (default: true if telegram enabled) + model?: string // Whisper model: "tiny", "base", "small", "medium", "large-v2", "large-v3" + device?: "cuda" | "cpu" | "auto" // Device for inference (default: auto) + port?: number // HTTP server port (default: 8787) + } } +// ==================== WHISPER STT ==================== + +const WHISPER_DIR = join(homedir(), ".config", "opencode", "whisper") +const WHISPER_VENV = join(WHISPER_DIR, "venv") +const WHISPER_SERVER_SCRIPT = join(WHISPER_DIR, "whisper_server.py") +const WHISPER_PID = join(WHISPER_DIR, "server.pid") +const WHISPER_LOCK = join(WHISPER_DIR, "server.lock") +const WHISPER_DEFAULT_PORT = 8787 + +let whisperInstalled: boolean | null = null +let whisperSetupAttempted = false +let whisperServerProcess: ReturnType | null = null + // ==================== CHATTERBOX ==================== const CHATTERBOX_DIR = join(homedir(), ".config", "opencode", "chatterbox") @@ -644,6 +675,14 @@ async function startChatterboxServer(config: TTSConfig): Promise { } async function speakWithChatterboxServer(text: string, config: TTSConfig): Promise { + const result = await speakWithChatterboxServerAndGetPath(text, config) + return result.success +} + +/** + * Speak with Chatterbox server and return both success status and audio file path + */ +async function speakWithChatterboxServerAndGetPath(text: string, config: TTSConfig): Promise<{ success: boolean; audioPath?: string }> { const net = await import("net") const opts = config.chatterbox || {} const outputPath = join(tmpdir(), `opencode_tts_${Date.now()}.wav`) @@ -668,10 +707,11 @@ async function speakWithChatterboxServer(text: string, config: TTSConfig): Promi try { const result = JSON.parse(response.trim()) if (!result.success) { - resolve(false) + resolve({ success: false }) return } + // Play the audio if (platform() === "darwin") { await execAsync(`afplay "${outputPath}"`) } else { @@ -681,20 +721,20 @@ async function speakWithChatterboxServer(text: string, config: TTSConfig): Promi await execAsync(`aplay "${outputPath}"`) } } - await unlink(outputPath).catch(() => {}) - resolve(true) + // Return the path - caller is responsible for cleanup + resolve({ success: true, audioPath: outputPath }) } catch { - resolve(false) + resolve({ success: false }) } }) client.on("error", () => { - resolve(false) + resolve({ success: false }) }) setTimeout(() => { client.destroy() - resolve(false) + resolve({ success: false }) }, 120000) }) } @@ -716,14 +756,23 @@ async function isChatterboxAvailable(config: TTSConfig): Promise { } async function speakWithChatterbox(text: string, config: TTSConfig): Promise { + const result = await speakWithChatterboxAndGetPath(text, config) + return result.success +} + +/** + * Speak with Chatterbox TTS and return both success status and audio file path + * The caller is responsible for cleaning up the audio file + */ +async function speakWithChatterboxAndGetPath(text: string, config: TTSConfig): Promise<{ success: boolean; audioPath?: string }> { const opts = config.chatterbox || {} const useServer = opts.serverMode !== false if (useServer) { const serverReady = await startChatterboxServer(config) if (serverReady) { - const success = await speakWithChatterboxServer(text, config) - if (success) return true + const result = await speakWithChatterboxServerAndGetPath(text, config) + if (result.success) return result } } @@ -757,17 +806,18 @@ async function speakWithChatterbox(text: string, config: TTSConfig): Promise { proc.kill() - resolve(false) + resolve({ success: false }) }, timeout) proc.on("close", async (code) => { clearTimeout(timer) if (code !== 0) { - resolve(false) + resolve({ success: false }) return } try { + // Play the audio if (platform() === "darwin") { await execAsync(`afplay "${outputPath}"`) } else { @@ -777,17 +827,17 @@ async function speakWithChatterbox(text: string, config: TTSConfig): Promise {}) - resolve(true) + // Return the path - caller is responsible for cleanup + resolve({ success: true, audioPath: outputPath }) } catch { await unlink(outputPath).catch(() => {}) - resolve(false) + resolve({ success: false }) } }) proc.on("error", () => { clearTimeout(timer) - resolve(false) + resolve({ success: false }) }) }) } @@ -1223,14 +1273,23 @@ async function isCoquiAvailable(config: TTSConfig): Promise { } async function speakWithCoqui(text: string, config: TTSConfig): Promise { + const result = await speakWithCoquiAndGetPath(text, config) + return result.success +} + +/** + * Speak with Coqui TTS and return both success status and audio file path + * The caller is responsible for cleaning up the audio file + */ +async function speakWithCoquiAndGetPath(text: string, config: TTSConfig): Promise<{ success: boolean; audioPath?: string }> { const opts = config.coqui || {} const useServer = opts.serverMode !== false if (useServer) { const serverReady = await startCoquiServer(config) if (serverReady) { - const success = await speakWithCoquiServer(text, config) - if (success) return true + const result = await speakWithCoquiServerAndGetPath(text, config) + if (result.success) return result } } @@ -1267,17 +1326,18 @@ async function speakWithCoqui(text: string, config: TTSConfig): Promise const timeout = device === "cpu" ? 300000 : 180000 const timer = setTimeout(() => { proc.kill() - resolve(false) + resolve({ success: false }) }, timeout) proc.on("close", async (code) => { clearTimeout(timer) if (code !== 0) { - resolve(false) + resolve({ success: false }) return } try { + // Play the audio if (platform() === "darwin") { await execAsync(`afplay "${outputPath}"`) } else { @@ -1287,21 +1347,466 @@ async function speakWithCoqui(text: string, config: TTSConfig): Promise await execAsync(`aplay "${outputPath}"`) } } - await unlink(outputPath).catch(() => {}) - resolve(true) + // Return the path - caller is responsible for cleanup + resolve({ success: true, audioPath: outputPath }) } catch { await unlink(outputPath).catch(() => {}) - resolve(false) + resolve({ success: false }) } }) proc.on("error", () => { clearTimeout(timer) - resolve(false) + resolve({ success: false }) + }) + }) +} + +/** + * Speak with Coqui server and return both success status and audio file path + */ +async function speakWithCoquiServerAndGetPath(text: string, config: TTSConfig): Promise<{ success: boolean; audioPath?: string }> { + const net = await import("net") + const opts = config.coqui || {} + const outputPath = join(tmpdir(), `opencode_coqui_${Date.now()}.wav`) + + return new Promise((resolve) => { + const client = net.createConnection(COQUI_SOCKET, () => { + const request = JSON.stringify({ + text, + output: outputPath, + voice_ref: opts.voiceRef, + speaker: opts.speaker, + language: opts.language || "en", + }) + "\n" + client.write(request) + }) + + let response = "" + client.on("data", (data) => { + response += data.toString() + }) + + client.on("end", async () => { + try { + const result = JSON.parse(response.trim()) + if (!result.success) { + resolve({ success: false }) + return + } + + // Play the audio + if (platform() === "darwin") { + await execAsync(`afplay "${outputPath}"`) + } else { + try { + await execAsync(`paplay "${outputPath}"`) + } catch { + await execAsync(`aplay "${outputPath}"`) + } + } + // Return the path - caller is responsible for cleanup + resolve({ success: true, audioPath: outputPath }) + } catch { + resolve({ success: false }) + } + }) + + client.on("error", () => { + resolve({ success: false }) }) + + setTimeout(() => { + client.destroy() + resolve({ success: false }) + }, 120000) }) } +// ==================== WHISPER STT ==================== + +/** + * Ensure Whisper server script is installed + */ +async function ensureWhisperServerScript(): Promise { + await mkdir(WHISPER_DIR, { recursive: true }) + + // Copy the whisper_server.py from the plugin source + // For now, we embed a minimal version here + const script = `#!/usr/bin/env python3 +""" +Faster Whisper STT Server for OpenCode TTS Plugin +""" + +import os +import sys +import json +import tempfile +import logging +import subprocess +import shutil +import base64 +from pathlib import Path +from typing import Optional + +try: + from fastapi import FastAPI, HTTPException + from fastapi.responses import JSONResponse + import uvicorn +except ImportError: + print("Installing required packages...") + subprocess.check_call([sys.executable, "-m", "pip", "install", "fastapi", "uvicorn", "python-multipart"]) + from fastapi import FastAPI, HTTPException + from fastapi.responses import JSONResponse + import uvicorn + +try: + from faster_whisper import WhisperModel +except ImportError: + print("Installing faster-whisper...") + subprocess.check_call([sys.executable, "-m", "pip", "install", "faster-whisper"]) + from faster_whisper import WhisperModel + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app = FastAPI(title="OpenCode Whisper STT Server", version="1.0.0") + +MODELS_DIR = os.environ.get("WHISPER_MODELS_DIR", str(Path.home() / ".cache" / "whisper")) +DEFAULT_MODEL = os.environ.get("WHISPER_DEFAULT_MODEL", "base") +DEVICE = os.environ.get("WHISPER_DEVICE", "auto") +COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE_TYPE", "auto") + +AVAILABLE_MODELS = ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v2", "large-v3"] + +model_cache: dict[str, WhisperModel] = {} +current_model_name: Optional[str] = None + + +def convert_to_wav(input_path: str) -> str: + output_path = input_path.rsplit('.', 1)[0] + '_converted.wav' + ffmpeg_path = shutil.which('ffmpeg') + if not ffmpeg_path: + return input_path + try: + result = subprocess.run([ + ffmpeg_path, '-y', '-i', input_path, + '-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', + output_path + ], capture_output=True, timeout=30) + if result.returncode == 0 and os.path.exists(output_path): + return output_path + return input_path + except: + return input_path + + +def get_model(model_name: str = DEFAULT_MODEL) -> WhisperModel: + global current_model_name + if model_name not in AVAILABLE_MODELS: + model_name = DEFAULT_MODEL + if model_name in model_cache: + return model_cache[model_name] + + logger.info(f"Loading Whisper model: {model_name}") + device = DEVICE + if device == "auto": + try: + import torch + device = "cuda" if torch.cuda.is_available() else "cpu" + except ImportError: + device = "cpu" + compute_type = COMPUTE_TYPE + if compute_type == "auto": + compute_type = "float16" if device == "cuda" else "int8" + + model = WhisperModel(model_name, device=device, compute_type=compute_type, download_root=MODELS_DIR) + model_cache[model_name] = model + current_model_name = model_name + logger.info(f"Model {model_name} loaded on {device}") + return model + + +@app.on_event("startup") +async def startup_event(): + logger.info("Starting OpenCode Whisper STT Server...") + try: + get_model(DEFAULT_MODEL) + except Exception as e: + logger.warning(f"Could not pre-load model: {e}") + + +@app.get("/health") +async def health(): + return {"status": "healthy", "model_loaded": current_model_name is not None, "current_model": current_model_name} + + +@app.post("/transcribe") +async def transcribe(request: dict): + audio_data = request.get("audio") + model_name = request.get("model", DEFAULT_MODEL) + language = request.get("language") + if language in ("auto", ""): + language = None + file_format = request.get("format", "ogg") + + if not audio_data: + raise HTTPException(status_code=400, detail="No audio data provided") + + tmp_path = None + converted_path = None + + try: + if "," in audio_data: + audio_data = audio_data.split(",")[1] + audio_bytes = base64.b64decode(audio_data) + + with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_format}") as tmp_file: + tmp_file.write(audio_bytes) + tmp_path = tmp_file.name + + audio_path = tmp_path + if file_format.lower() in ['webm', 'ogg', 'mp4', 'm4a', 'opus', 'oga']: + converted_path = convert_to_wav(tmp_path) + if converted_path != tmp_path: + audio_path = converted_path + + whisper_model = get_model(model_name) + segments, info = whisper_model.transcribe( + audio_path, language=language, task="transcribe", + vad_filter=True, vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=400) + ) + + segments_list = list(segments) + full_text = " ".join(segment.text.strip() for segment in segments_list) + + return JSONResponse(content={ + "text": full_text, "language": info.language, + "language_probability": info.language_probability, "duration": info.duration + }) + except Exception as e: + logger.error(f"Transcription error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + finally: + if tmp_path: + try: os.unlink(tmp_path) + except: pass + if converted_path and converted_path != tmp_path: + try: os.unlink(converted_path) + except: pass + + +if __name__ == "__main__": + port = int(os.environ.get("WHISPER_PORT", "8787")) + host = os.environ.get("WHISPER_HOST", "127.0.0.1") + logger.info(f"Starting Whisper server on {host}:{port}") + uvicorn.run(app, host=host, port=port, log_level="info") +` + await writeFile(WHISPER_SERVER_SCRIPT, script, { mode: 0o755 }) +} + +/** + * Setup Whisper virtualenv and dependencies + */ +async function setupWhisper(): Promise { + if (whisperSetupAttempted) return whisperInstalled === true + whisperSetupAttempted = true + + const python = await findPython311() || await findPython3() + if (!python) return false + + try { + await mkdir(WHISPER_DIR, { recursive: true }) + + const venvPython = join(WHISPER_VENV, "bin", "python") + try { + await access(venvPython) + const { stdout } = await execAsync(`"${venvPython}" -c "from faster_whisper import WhisperModel; print('ok')"`, { timeout: 30000 }) + if (stdout.includes("ok")) { + await ensureWhisperServerScript() + whisperInstalled = true + return true + } + } catch { + // Need to create/setup venv + } + + await execAsync(`"${python}" -m venv "${WHISPER_VENV}"`, { timeout: 60000 }) + + const pip = join(WHISPER_VENV, "bin", "pip") + await execAsync(`"${pip}" install --upgrade pip`, { timeout: 120000 }) + await execAsync(`"${pip}" install faster-whisper fastapi uvicorn python-multipart`, { timeout: 600000 }) + + await ensureWhisperServerScript() + whisperInstalled = true + return true + } catch { + whisperInstalled = false + return false + } +} + +/** + * Check if Whisper server is running + */ +async function isWhisperServerRunning(port: number = WHISPER_DEFAULT_PORT): Promise { + try { + const response = await fetch(`http://127.0.0.1:${port}/health`, { + signal: AbortSignal.timeout(2000) + }) + return response.ok + } catch { + return false + } +} + +/** + * Acquire lock for starting Whisper server + */ +async function acquireWhisperLock(): Promise { + const lockContent = `${process.pid}\n${Date.now()}` + try { + const { open } = await import("fs/promises") + const handle = await open(WHISPER_LOCK, "wx") + await handle.writeFile(lockContent) + await handle.close() + return true + } catch (e: any) { + if (e.code === "EEXIST") { + try { + const content = await readFile(WHISPER_LOCK, "utf-8") + const timestamp = parseInt(content.split("\n")[1] || "0", 10) + if (Date.now() - timestamp > 120000) { + await unlink(WHISPER_LOCK) + return acquireWhisperLock() + } + } catch { + await unlink(WHISPER_LOCK).catch(() => {}) + return acquireWhisperLock() + } + } + return false + } +} + +/** + * Release Whisper server lock + */ +async function releaseWhisperLock(): Promise { + await unlink(WHISPER_LOCK).catch(() => {}) +} + +/** + * Start the Whisper STT server + */ +async function startWhisperServer(config: TTSConfig): Promise { + const port = config.whisper?.port || WHISPER_DEFAULT_PORT + + if (await isWhisperServerRunning(port)) { + return true + } + + if (!(await acquireWhisperLock())) { + // Another process is starting the server, wait for it + const startTime = Date.now() + while (Date.now() - startTime < 120000) { + await new Promise(r => setTimeout(r, 1000)) + if (await isWhisperServerRunning(port)) { + return true + } + } + return false + } + + try { + if (await isWhisperServerRunning(port)) { + return true + } + + const installed = await setupWhisper() + if (!installed) { + return false + } + + const venvPython = join(WHISPER_VENV, "bin", "python") + const model = config.whisper?.model || "base" + const device = config.whisper?.device || "auto" + + const env: Record = { + ...process.env as Record, + WHISPER_PORT: port.toString(), + WHISPER_HOST: "127.0.0.1", + WHISPER_DEFAULT_MODEL: model, + WHISPER_DEVICE: device, + PYTHONUNBUFFERED: "1" + } + + whisperServerProcess = spawn(venvPython, [WHISPER_SERVER_SCRIPT], { + env, + stdio: ["ignore", "pipe", "pipe"], + detached: true, + }) + + if (whisperServerProcess.pid) { + await writeFile(WHISPER_PID, String(whisperServerProcess.pid)) + } + + whisperServerProcess.unref() + + // Wait for server to be ready + const startTime = Date.now() + while (Date.now() - startTime < 180000) { // 3 minutes for model download + if (await isWhisperServerRunning(port)) { + return true + } + await new Promise(r => setTimeout(r, 500)) + } + + return false + } finally { + await releaseWhisperLock() + } +} + +/** + * Transcribe audio using local Whisper server + */ +async function transcribeWithWhisper( + audioBase64: string, + config: TTSConfig, + format: string = "ogg" +): Promise<{ text: string; language: string; duration: number } | null> { + const port = config.whisper?.port || WHISPER_DEFAULT_PORT + + // Ensure server is running + const serverReady = await startWhisperServer(config) + if (!serverReady) { + return null + } + + try { + const response = await fetch(`http://127.0.0.1:${port}/transcribe`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + audio: audioBase64, + model: config.whisper?.model || "base", + format, + }), + signal: AbortSignal.timeout(120000) // 2 minute timeout + }) + + if (!response.ok) { + return null + } + + const result = await response.json() as { text: string; language: string; duration: number } + return result + } catch { + return null + } +} + // ==================== OS TTS ==================== async function speakWithOS(text: string, config: TTSConfig): Promise { @@ -1322,6 +1827,425 @@ async function speakWithOS(text: string, config: TTSConfig): Promise { } } +// ==================== TELEGRAM NOTIFICATIONS ==================== + +// Default Supabase Edge Function URL for sending notifications +const DEFAULT_TELEGRAM_SERVICE_URL = "https://slqxwymujuoipyiqscrl.supabase.co/functions/v1/send-notify" + +/** + * Check if ffmpeg is available for audio conversion + */ +async function isFfmpegAvailable(): Promise { + try { + await execAsync("which ffmpeg") + return true + } catch { + return false + } +} + +/** + * Convert WAV file to OGG (Opus) format for Telegram voice messages + * Returns the path to the OGG file, or null if conversion failed + */ +async function convertWavToOgg(wavPath: string): Promise { + const oggPath = wavPath.replace(/\.wav$/i, ".ogg") + + try { + // Use ffmpeg to convert WAV to OGG with Opus codec + // -c:a libopus: Use Opus codec (required for Telegram voice) + // -b:a 32k: 32kbps bitrate (good quality for speech) + // -ar 48000: 48kHz sample rate (Opus standard) + // -ac 1: Mono audio (voice doesn't need stereo) + await execAsync( + `ffmpeg -y -i "${wavPath}" -c:a libopus -b:a 32k -ar 48000 -ac 1 "${oggPath}"`, + { timeout: 30000 } + ) + return oggPath + } catch (err) { + console.error("[TTS] Failed to convert WAV to OGG:", err) + return null + } +} + +/** + * Send notification to Telegram via Supabase Edge Function + */ +async function sendTelegramNotification( + text: string, + voicePath: string | null, + config: TTSConfig, + context?: { model?: string; directory?: string; sessionId?: string } +): Promise<{ success: boolean; error?: string }> { + const telegramConfig = config.telegram + if (!telegramConfig?.enabled) { + return { success: false, error: "Telegram notifications disabled" } + } + + // Get UUID from config or environment variable + const uuid = telegramConfig.uuid || process.env.TELEGRAM_NOTIFICATION_UUID + if (!uuid) { + return { success: false, error: "No UUID configured for Telegram notifications" } + } + + const serviceUrl = telegramConfig.serviceUrl || DEFAULT_TELEGRAM_SERVICE_URL + const sendText = telegramConfig.sendText !== false + const sendVoice = telegramConfig.sendVoice !== false + + try { + const body: { + uuid: string + text?: string + voice_base64?: string + session_id?: string + directory?: string + } = { uuid } + + // Add session context for reply support + if (context?.sessionId) { + body.session_id = context.sessionId + } + if (context?.directory) { + body.directory = context.directory + } + + // Add text if enabled + if (sendText && text) { + // Build message with context header + const dirName = context?.directory ? context.directory.split("/").pop() || context.directory : undefined + const header = [ + context?.model ? `Model: ${context.model}` : null, + dirName ? `Dir: ${dirName}` : null + ].filter(Boolean).join(" | ") + + const formattedText = header + ? `${header}\n${"─".repeat(Math.min(header.length, 30))}\n\n${text}` + : text + + // Truncate to Telegram's limit (leave room for header) + body.text = formattedText.slice(0, 3900) + } + + // Add voice if enabled and path provided + if (sendVoice && voicePath) { + try { + // First check if ffmpeg is available + const ffmpegAvailable = await isFfmpegAvailable() + + let audioPath = voicePath + let oggPath: string | null = null + + if (ffmpegAvailable && voicePath.endsWith(".wav")) { + // Convert WAV to OGG for better Telegram compatibility + oggPath = await convertWavToOgg(voicePath) + if (oggPath) { + audioPath = oggPath + } + } + + // Read the audio file and encode to base64 + const audioData = await readFile(audioPath) + body.voice_base64 = audioData.toString("base64") + + // Clean up converted OGG file + if (oggPath) { + await unlink(oggPath).catch(() => {}) + } + } catch (err) { + console.error("[TTS] Failed to read voice file for Telegram:", err) + // Continue without voice - text notification is still valuable + } + } + + // Only send if we have something to send + if (!body.text && !body.voice_base64) { + return { success: false, error: "No content to send" } + } + + // Send to Supabase Edge Function + const response = await fetch(serviceUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }) + + if (!response.ok) { + const errorText = await response.text() + let errorJson: any = {} + try { + errorJson = JSON.parse(errorText) + } catch {} + return { + success: false, + error: errorJson.error || `HTTP ${response.status}: ${errorText.slice(0, 100)}` + } + } + + const result = await response.json() + return { success: result.success, error: result.error } + } catch (err: any) { + return { success: false, error: err?.message || "Network error" } + } +} + +/** + * Check if Telegram notifications are enabled + */ +async function isTelegramEnabled(): Promise { + if (process.env.TELEGRAM_DISABLED === "1") return false + const config = await loadConfig() + return config.telegram?.enabled === true +} + +// ==================== TELEGRAM REPLY SUBSCRIPTION ==================== + +// Default Supabase configuration for reply subscription +const DEFAULT_SUPABASE_URL = "https://slqxwymujuoipyiqscrl.supabase.co" +// Note: Anon key is safe to expose - it only allows public access with RLS +const DEFAULT_SUPABASE_ANON_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InNscXh3eW11anVvaXB5aXFzY3JsIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NjYxMTgwNDUsImV4cCI6MjA4MTY5NDA0NX0.cW79nLOdKsUhZaXIvgY4gGcO4Y4R0lDGNg7SE_zEfb8" + +// Global subscription state +let replySubscription: any = null +let supabaseClient: any = null + +interface TelegramReply { + id: string + uuid: string + session_id: string + directory: string | null + reply_text: string | null // Can be null for voice messages before transcription + telegram_message_id: number + telegram_chat_id: number + created_at: string + processed: boolean + // Voice message fields (populated when is_voice = true) + is_voice?: boolean + audio_base64?: string | null + voice_file_type?: string | null + voice_duration_seconds?: number | null +} + +/** + * Mark a reply as processed in the database + */ +async function markReplyProcessed(replyId: string): Promise { + if (!supabaseClient) return + + try { + await supabaseClient + .from('telegram_replies') + .update({ + processed: true, + processed_at: new Date().toISOString() + }) + .eq('id', replyId) + } catch (err) { + console.error('[TTS] Failed to mark reply as processed:', err) + } +} + +/** + * Initialize Supabase client for realtime subscriptions + * Uses dynamic import to avoid bundling issues + */ +async function initSupabaseClient(config: TTSConfig): Promise { + if (supabaseClient) return supabaseClient + + const telegramConfig = config.telegram + if (!telegramConfig?.enabled) return null + if (telegramConfig.receiveReplies === false) return null + + const supabaseUrl = telegramConfig.supabaseUrl || DEFAULT_SUPABASE_URL + const supabaseKey = telegramConfig.supabaseAnonKey || DEFAULT_SUPABASE_ANON_KEY + + if (!supabaseKey || supabaseKey.includes('example')) { + // Anon key not configured - skip realtime subscription + return null + } + + try { + // Dynamic import to avoid bundling issues in Node.js environment + const { createClient } = await import('@supabase/supabase-js') + supabaseClient = createClient(supabaseUrl, supabaseKey, { + realtime: { + params: { + eventsPerSecond: 2 + } + } + }) + return supabaseClient + } catch (err) { + console.error('[TTS] Failed to initialize Supabase client:', err) + console.error('[TTS] Install @supabase/supabase-js to enable Telegram reply subscription') + return null + } +} + +/** + * Subscribe to Telegram replies for this user + * Replies are forwarded to the appropriate OpenCode session + */ +async function subscribeToReplies( + config: TTSConfig, + client: any, + debugLog: (msg: string) => Promise +): Promise { + if (replySubscription) { + await debugLog('Already subscribed to Telegram replies') + return + } + + const telegramConfig = config.telegram + if (!telegramConfig?.enabled) return + if (telegramConfig.receiveReplies === false) return + + const uuid = telegramConfig.uuid || process.env.TELEGRAM_NOTIFICATION_UUID + if (!uuid) { + await debugLog('No UUID configured, skipping reply subscription') + return + } + + const supabase = await initSupabaseClient(config) + if (!supabase) { + await debugLog('Supabase client not available, skipping reply subscription') + return + } + + await debugLog(`Subscribing to Telegram replies for UUID: ${uuid.slice(0, 8)}...`) + + try { + // Subscribe to new replies for this user + replySubscription = supabase + .channel('telegram_replies') + .on( + 'postgres_changes', + { + event: 'INSERT', + schema: 'public', + table: 'telegram_replies', + filter: `uuid=eq.${uuid}` + }, + async (payload: { new: TelegramReply }) => { + const reply = payload.new + + if (reply.processed) { + await debugLog('Reply already processed, skipping') + return + } + + try { + let messageText: string + + // Check if this is a voice message that needs transcription + if (reply.is_voice && reply.audio_base64) { + await debugLog(`Received voice message (${reply.voice_duration_seconds}s ${reply.voice_file_type})`) + + // Transcribe the audio locally with Whisper + const format = reply.voice_file_type === 'voice' ? 'ogg' : 'mp4' + const transcription = await transcribeWithWhisper(reply.audio_base64, config, format) + + if (!transcription || !transcription.text) { + await debugLog('Transcription failed or returned empty text') + + // Show error toast + await client.tui.publish({ + body: { + type: "toast", + toast: { + title: "Telegram Voice Error", + description: "Failed to transcribe voice message", + severity: "error" + } + } + }) + + // Mark as processed even though it failed (to avoid retry loops) + await markReplyProcessed(reply.id) + return + } + + messageText = transcription.text + await debugLog(`Transcribed: "${messageText.slice(0, 100)}..."`) + } else if (reply.reply_text) { + // Regular text message + await debugLog(`Received Telegram reply: ${reply.reply_text.slice(0, 50)}...`) + messageText = reply.reply_text + } else { + await debugLog('Reply has no text and is not a voice message, skipping') + await markReplyProcessed(reply.id) + return + } + + // Forward the reply to the OpenCode session + const prefix = reply.is_voice ? '[User via Telegram Voice]' : '[User via Telegram]' + await debugLog(`Forwarding reply to session: ${reply.session_id}`) + + await client.session.promptAsync({ + path: { id: reply.session_id }, + body: { + parts: [{ + type: "text", + text: `${prefix}: ${messageText}` + }] + } + }) + + await debugLog('Reply forwarded successfully') + + // Mark as processed + await markReplyProcessed(reply.id) + + // Show toast notification + const toastTitle = reply.is_voice ? "Telegram Voice Message" : "Telegram Reply" + await client.tui.publish({ + body: { + type: "toast", + toast: { + title: toastTitle, + description: `Received: "${messageText.slice(0, 50)}${messageText.length > 50 ? '...' : ''}"`, + severity: "info" + } + } + }) + } catch (err: any) { + await debugLog(`Failed to process reply: ${err?.message || err}`) + + // Show error toast + await client.tui.publish({ + body: { + type: "toast", + toast: { + title: "Telegram Reply Error", + description: `Failed to process reply`, + severity: "error" + } + } + }) + } + } + ) + .subscribe((status: string) => { + debugLog(`Reply subscription status: ${status}`) + }) + + await debugLog('Successfully subscribed to Telegram replies') + } catch (err: any) { + await debugLog(`Failed to subscribe to replies: ${err?.message || err}`) + } +} + +/** + * Cleanup reply subscription + */ +async function unsubscribeFromReplies(): Promise { + if (replySubscription && supabaseClient) { + try { + await supabaseClient.removeChannel(replySubscription) + replySubscription = null + } catch {} + } +} + // ==================== PLUGIN ==================== export const TTSPlugin: Plugin = async ({ client, directory }) => { @@ -1375,7 +2299,7 @@ export const TTSPlugin: Plugin = async ({ client, directory }) => { .trim() } - async function speak(text: string, sessionId: string): Promise { + async function speak(text: string, sessionId: string, modelID?: string): Promise { const cleaned = cleanTextForSpeech(text) if (!cleaned) return @@ -1391,6 +2315,8 @@ export const TTSPlugin: Plugin = async ({ client, directory }) => { return } + let generatedAudioPath: string | null = null + try { const config = await loadConfig() const engine = await getEngine() @@ -1403,30 +2329,56 @@ export const TTSPlugin: Plugin = async ({ client, directory }) => { engine, timestamp: new Date().toISOString() }) + + // Check if Telegram is enabled - we may need to keep the audio file + const telegramEnabled = await isTelegramEnabled() + // Generate and play audio based on engine if (engine === "coqui") { const available = await isCoquiAvailable(config) if (available) { - const success = await speakWithCoqui(toSpeak, config) - if (success) { - return + const result = await speakWithCoquiAndGetPath(toSpeak, config) + if (result.success) { + generatedAudioPath = result.audioPath || null } } } - if (engine === "chatterbox") { + if (!generatedAudioPath && engine === "chatterbox") { const available = await isChatterboxAvailable(config) if (available) { - const success = await speakWithChatterbox(toSpeak, config) - if (success) { - return + const result = await speakWithChatterboxAndGetPath(toSpeak, config) + if (result.success) { + generatedAudioPath = result.audioPath || null } } } - // OS TTS (fallback or explicit choice) - await speakWithOS(toSpeak, config) + // OS TTS (fallback or explicit choice) - no audio file generated + if (!generatedAudioPath && engine === "os") { + await speakWithOS(toSpeak, config) + } + + // Send Telegram notification if enabled (runs in parallel, non-blocking) + if (telegramEnabled) { + await debugLog(`Sending Telegram notification...`) + const telegramResult = await sendTelegramNotification( + cleaned, + generatedAudioPath, + config, + { model: modelID, directory, sessionId } + ) + if (telegramResult.success) { + await debugLog(`Telegram notification sent successfully`) + } else { + await debugLog(`Telegram notification failed: ${telegramResult.error}`) + } + } } finally { + // Clean up generated audio file + if (generatedAudioPath) { + await unlink(generatedAudioPath).catch(() => {}) + } await releaseSpeechLock(ticketId) await removeSpeechTicket(ticketId) } @@ -1465,6 +2417,19 @@ export const TTSPlugin: Plugin = async ({ client, directory }) => { } catch {} } + // Initialize Telegram reply subscription (non-blocking) + // This handles both text replies and voice messages (voice messages are transcribed with Whisper) + ;(async () => { + try { + const config = await loadConfig() + if (config.telegram?.enabled) { + await subscribeToReplies(config, client, debugLog) + } + } catch (err: any) { + await debugLog(`Failed to initialize reply subscription: ${err?.message || err}`) + } + })() + return { event: async ({ event }) => { if (event.type === "session.idle") { @@ -1525,10 +2490,15 @@ export const TTSPlugin: Plugin = async ({ client, directory }) => { const finalResponse = extractFinalResponse(messages) await debugLog(`Final response length: ${finalResponse?.length || 0}`) + // Extract model ID from the last assistant message (use any to handle SDK type limitations) + const msgInfo = lastAssistant?.info as any + const modelID = msgInfo?.modelID || msgInfo?.model || undefined + await debugLog(`Model ID: ${modelID || "unknown"}`) + if (finalResponse) { shouldKeepInSet = true await debugLog(`Speaking now...`) - await speak(finalResponse, sessionId) + await speak(finalResponse, sessionId, modelID) await debugLog(`Speech complete`) } } catch (e: any) { diff --git a/whisper/whisper_server.py b/whisper/whisper_server.py new file mode 100644 index 0000000..f314ef2 --- /dev/null +++ b/whisper/whisper_server.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +""" +Faster Whisper STT Server for OpenCode TTS Plugin + +Lightweight HTTP server that provides speech-to-text transcription +for Telegram voice messages. Runs as a subprocess managed by tts.ts. + +Based on the implementation from opencode-manager. +""" + +import os +import sys +import json +import tempfile +import logging +import subprocess +import shutil +import base64 +from pathlib import Path +from typing import Optional + +# Auto-install dependencies if missing +try: + from fastapi import FastAPI, HTTPException + from fastapi.responses import JSONResponse + import uvicorn +except ImportError: + print("Installing required packages...") + subprocess.check_call([sys.executable, "-m", "pip", "install", "fastapi", "uvicorn", "python-multipart"]) + from fastapi import FastAPI, HTTPException + from fastapi.responses import JSONResponse + import uvicorn + +try: + from faster_whisper import WhisperModel +except ImportError: + print("Installing faster-whisper...") + subprocess.check_call([sys.executable, "-m", "pip", "install", "faster-whisper"]) + from faster_whisper import WhisperModel + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app = FastAPI(title="OpenCode Whisper STT Server", version="1.0.0") + +# Configuration from environment +MODELS_DIR = os.environ.get("WHISPER_MODELS_DIR", str(Path.home() / ".cache" / "whisper")) +DEFAULT_MODEL = os.environ.get("WHISPER_DEFAULT_MODEL", "base") +DEVICE = os.environ.get("WHISPER_DEVICE", "auto") +COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE_TYPE", "auto") + +AVAILABLE_MODELS = [ + "tiny", "tiny.en", + "base", "base.en", + "small", "small.en", + "medium", "medium.en", + "large-v2", "large-v3" +] + +# Model cache to avoid reloading +model_cache: dict[str, WhisperModel] = {} +current_model_name: Optional[str] = None + + +def convert_to_wav(input_path: str) -> str: + """Convert audio file to WAV format using ffmpeg for better compatibility.""" + output_path = input_path.rsplit('.', 1)[0] + '_converted.wav' + + ffmpeg_path = shutil.which('ffmpeg') + if not ffmpeg_path: + logger.warning("ffmpeg not found, using original file") + return input_path + + try: + result = subprocess.run([ + ffmpeg_path, '-y', '-i', input_path, + '-ar', '16000', # 16kHz sample rate (Whisper's expected rate) + '-ac', '1', # Mono + '-c:a', 'pcm_s16le', # 16-bit PCM + output_path + ], capture_output=True, timeout=30) + + if result.returncode == 0 and os.path.exists(output_path): + logger.debug(f"Converted {input_path} to {output_path}") + return output_path + else: + logger.warning(f"ffmpeg conversion failed: {result.stderr.decode()[:200]}") + return input_path + except Exception as e: + logger.warning(f"Audio conversion failed: {e}") + return input_path + + +def get_model(model_name: str = DEFAULT_MODEL) -> WhisperModel: + """Get or load a Whisper model (cached).""" + global current_model_name + + if model_name not in AVAILABLE_MODELS: + model_name = DEFAULT_MODEL + + if model_name in model_cache: + return model_cache[model_name] + + logger.info(f"Loading Whisper model: {model_name}") + + # Auto-detect device + device = DEVICE + if device == "auto": + try: + import torch + if torch.cuda.is_available(): + device = "cuda" + elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): + device = "cpu" # MPS not fully supported by faster-whisper, use CPU + else: + device = "cpu" + except ImportError: + device = "cpu" + + # Auto-detect compute type + compute_type = COMPUTE_TYPE + if compute_type == "auto": + compute_type = "float16" if device == "cuda" else "int8" + + model = WhisperModel( + model_name, + device=device, + compute_type=compute_type, + download_root=MODELS_DIR + ) + + model_cache[model_name] = model + current_model_name = model_name + logger.info(f"Model {model_name} loaded successfully on {device} with {compute_type}") + + return model + + +@app.on_event("startup") +async def startup_event(): + """Pre-load the default model on startup.""" + logger.info("Starting OpenCode Whisper STT Server...") + logger.info(f"Models directory: {MODELS_DIR}") + logger.info(f"Default model: {DEFAULT_MODEL}") + try: + get_model(DEFAULT_MODEL) + logger.info("Default model pre-loaded successfully") + except Exception as e: + logger.warning(f"Could not pre-load model: {e}. Will load on first request.") + + +@app.get("/health") +async def health(): + """Health check endpoint.""" + return { + "status": "healthy", + "model_loaded": current_model_name is not None, + "current_model": current_model_name, + "available_models": AVAILABLE_MODELS + } + + +@app.get("/models") +async def list_models(): + """List available Whisper models.""" + return { + "models": AVAILABLE_MODELS, + "current": current_model_name, + "default": DEFAULT_MODEL + } + + +@app.post("/transcribe") +async def transcribe(request: dict): + """ + Transcribe audio from base64-encoded data. + + Request body: + { + "audio": "", + "model": "base", // optional, defaults to env var + "language": "en", // optional, null for auto-detect + "format": "ogg" // audio format hint + } + + Response: + { + "text": "transcribed text", + "language": "en", + "language_probability": 0.98, + "duration": 2.5 + } + """ + audio_data = request.get("audio") + model_name = request.get("model", DEFAULT_MODEL) + language = request.get("language") + if language in ("auto", ""): + language = None + file_format = request.get("format", "ogg") + + if not audio_data: + raise HTTPException(status_code=400, detail="No audio data provided") + + tmp_path = None + converted_path = None + + try: + # Handle data URL format + if "," in audio_data: + audio_data = audio_data.split(",")[1] + + # Decode base64 + audio_bytes = base64.b64decode(audio_data) + + # Write to temp file + with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_format}") as tmp_file: + tmp_file.write(audio_bytes) + tmp_path = tmp_file.name + + # Convert to WAV if needed (Telegram voice messages are OGG Opus) + audio_path = tmp_path + if file_format.lower() in ['webm', 'ogg', 'mp4', 'm4a', 'opus', 'oga']: + converted_path = convert_to_wav(tmp_path) + if converted_path != tmp_path: + audio_path = converted_path + + # Load model and transcribe + whisper_model = get_model(model_name) + + segments, info = whisper_model.transcribe( + audio_path, + language=language, + task="transcribe", + vad_filter=True, + vad_parameters=dict( + min_silence_duration_ms=500, + speech_pad_ms=400 + ) + ) + + # Collect all segments + segments_list = list(segments) + full_text = " ".join(segment.text.strip() for segment in segments_list) + + return JSONResponse(content={ + "text": full_text, + "language": info.language, + "language_probability": info.language_probability, + "duration": info.duration + }) + + except Exception as e: + logger.error(f"Transcription error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + finally: + # Cleanup temp files + if tmp_path: + try: + os.unlink(tmp_path) + except: + pass + if converted_path and converted_path != tmp_path: + try: + os.unlink(converted_path) + except: + pass + + +if __name__ == "__main__": + port = int(os.environ.get("WHISPER_PORT", "8787")) + host = os.environ.get("WHISPER_HOST", "127.0.0.1") + + logger.info(f"Starting Whisper server on {host}:{port}") + uvicorn.run(app, host=host, port=port, log_level="info") From 63e6323d448391a9845d3d372ac6423c5f33e19b Mon Sep 17 00:00:00 2001 From: Den <2119348+dzianisv@users.noreply.github.com> Date: Sat, 24 Jan 2026 09:54:52 -0800 Subject: [PATCH 2/3] docs: Consolidate telegram docs and add Whisper integration tests - Merge telegram.design.md content into telegram.md (cleaner architecture) - Delete obsolete telegram.design.md - Add Whisper Server integration tests (health, models, transcribe) - Add Whisper dependencies availability checks - All 176 tests passing --- docs/telegram.design.md | 434 ---------------------------------------- docs/telegram.md | 102 ++++++++++ test/tts.test.ts | 200 ++++++++++++++++++ 3 files changed, 302 insertions(+), 434 deletions(-) delete mode 100644 docs/telegram.design.md diff --git a/docs/telegram.design.md b/docs/telegram.design.md deleted file mode 100644 index 1f0ff16..0000000 --- a/docs/telegram.design.md +++ /dev/null @@ -1,434 +0,0 @@ -# Telegram Integration Architecture - -## Overview - -The Telegram integration enables two-way communication between OpenCode and users via Telegram: -- **Outbound**: Notifications when tasks complete (text + voice) -- **Inbound**: Users can reply to messages (text, voice, video) to continue conversations - -## System Architecture - -``` -┌─────────────────────────────────────────────────────────────────────────────────┐ -│ TELEGRAM TWO-WAY INTEGRATION │ -├─────────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─────────────────────────────────────────────────────────────────────────────┐│ -│ │ OPENCODE (Local Machine) ││ -│ │ ││ -│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ││ -│ │ │ Session 1 │ │ Session 2 │ │ Session 3 │ │ Session N │ ││ -│ │ │ ses_abc... │ │ ses_def... │ │ ses_ghi... │ │ ses_xyz... │ ││ -│ │ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ ││ -│ │ │ │ │ │ ││ -│ │ └────────────────┴────────────────┴────────────────┘ ││ -│ │ │ ││ -│ │ ┌────────▼────────┐ ││ -│ │ │ TTS Plugin │ ││ -│ │ │ (tts.ts) │ ││ -│ │ └────────┬────────┘ ││ -│ │ │ ││ -│ │ ┌─────────────────────────┼─────────────────────────┐ ││ -│ │ │ │ │ ││ -│ │ ▼ ▼ ▼ ││ -│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ││ -│ │ │ TTS Engine │ │ Send HTTP │ │ Supabase │ ││ -│ │ │ (Coqui/OS) │ │ Notifica- │ │ Realtime │ ││ -│ │ │ │ │ tion │ │ Listener │ ││ -│ │ └─────────────┘ └──────┬──────┘ └──────┬──────┘ ││ -│ │ │ │ ││ -│ └──────────────────────────────────┼────────────────────────┼────────────────┘│ -│ │ │ │ -│ │ HTTPS POST │ WebSocket │ -│ │ + session_id │ (postgres_changes) -│ ▼ │ │ -│ ┌──────────────────────────────────────────────────────────┴────────────────┐│ -│ │ SUPABASE ││ -│ │ ││ -│ │ ┌────────────────┐ ┌────────────────┐ ┌────────────────────────┐ ││ -│ │ │ send-notify │ │ telegram- │ │ PostgreSQL DB │ ││ -│ │ │ Edge Function │ │ webhook │ │ │ ││ -│ │ │ │ │ Edge Function │ │ telegram_subscribers │ ││ -│ │ │ • Lookup UUID │ │ │ │ telegram_reply_contexts││ -│ │ │ • Send to TG │ │ • Commands │ │ telegram_replies │ ││ -│ │ │ • Store context│ │ • Voice STT │ │ │ ││ -│ │ └───────┬────────┘ │ • Video STT │ └────────────────────────┘ ││ -│ │ │ │ • Text replies │ ││ -│ │ │ └───────┬────────┘ ││ -│ └──────────┼─────────────────────┼──────────────────────────────────────────┘│ -│ │ │ │ -│ │ │ │ -│ ▼ ▼ │ -│ ┌─────────────────────────────────────────────────────────────────────────┐ │ -│ │ TELEGRAM BOT API │ │ -│ │ │ │ -│ │ sendMessage ◄─────────────────────────────────► getFile + webhook │ │ -│ │ sendVoice (voice/video/text) │ │ -│ └─────────────────────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌─────────────────────────────────────────────────────────────────────────┐ │ -│ │ USER'S TELEGRAM │ │ -│ │ │ │ -│ │ 📱 Receives: "Task Complete [ses_abc123]" │ │ -│ │ 🎤 Can reply: Text, Voice Message, or Video Note │ │ -│ │ │ │ -│ └─────────────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────────────┘ -``` - -## Message Flow Diagrams - -### 1. Outbound Notification Flow - -``` -┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ -│ OpenCode │ │ TTS Plugin │ │ send-notify │ │ Telegram │ -│ Session │ │ │ │ Edge Func │ │ User │ -└──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ - │ │ │ │ - │ session.idle │ │ │ - │──────────────────>│ │ │ - │ │ │ │ - │ │ POST /send-notify │ │ - │ │ { │ │ - │ │ uuid, │ │ - │ │ text, │ │ - │ │ session_id, │ │ - │ │ voice_base64 │ │ - │ │ } │ │ - │ │──────────────────>│ │ - │ │ │ │ - │ │ │ Store context │ - │ │ │ in reply_contexts │ - │ │ │ │ - │ │ │ sendMessage │ - │ │ │ "[ses_abc123] │ - │ │ │ Task Complete" │ - │ │ │──────────────────>│ - │ │ │ │ - │ │ │ sendVoice (opt) │ - │ │ │──────────────────>│ - │ │ │ │ -``` - -### 2. Inbound Reply Flow (Text) - -``` -┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ -│ Telegram │ │ telegram- │ │ Supabase │ │ OpenCode │ -│ User │ │ webhook │ │ Realtime │ │ Session │ -└──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ - │ │ │ │ - │ "Fix the bug" │ │ │ - │──────────────────>│ │ │ - │ │ │ │ - │ │ Lookup context │ │ - │ │ by chat_id │ │ - │ │──────────────────>│ │ - │ │ │ │ - │ │ Get session_id │ │ - │ │<──────────────────│ │ - │ │ │ │ - │ │ INSERT reply │ │ - │ │ {session_id, │ │ - │ │ reply_text} │ │ - │ │──────────────────>│ │ - │ │ │ │ - │ │ │ Realtime event │ - │ │ │ (postgres_changes)│ - │ │ │──────────────────>│ - │ │ │ │ - │ │ │ promptAsync() │ - │ │ │ "[Telegram]: Fix │ - │ │ │ the bug" │ - │ │ │ │ - │ "Reply sent ✓" │ │ │ - │<──────────────────│ │ │ - │ │ │ │ -``` - -### 3. Inbound Reply Flow (Voice/Video with STT) - -``` -┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ -│ Telegram │ │ telegram- │ │ Whisper STT │ │ Supabase │ -│ User │ │ webhook │ │ Server │ │ Realtime │ -└──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ - │ │ │ │ - │ 🎤 Voice Message │ │ │ - │──────────────────>│ │ │ - │ │ │ │ - │ │ getFile (file_id) │ │ - │ │ Download audio │ │ - │ │ │ │ - │ │ POST /transcribe │ │ - │ │ (audio bytes) │ │ - │ │──────────────────>│ │ - │ │ │ │ - │ │ {"text": "..."} │ │ - │ │<──────────────────│ │ - │ │ │ │ - │ │ INSERT reply │ │ - │ │ {reply_text: │ │ - │ │ transcribed} │ │ - │ │──────────────────────────────────────>│ - │ │ │ │ - │ "Voice received: │ │ │ - │ 'Fix the bug'" │ │ │ - │<──────────────────│ │ │ - │ │ │ │ -``` - -## Database Schema - -### Tables - -```sql --- User subscriptions (existing) -telegram_subscribers ( - uuid UUID PRIMARY KEY, - chat_id BIGINT NOT NULL, - username TEXT, - is_active BOOLEAN DEFAULT TRUE, - notifications_sent INTEGER DEFAULT 0 -) - --- Reply context tracking (for multi-session support) -telegram_reply_contexts ( - id UUID PRIMARY KEY, - chat_id BIGINT NOT NULL, - uuid UUID REFERENCES telegram_subscribers(uuid), - session_id TEXT NOT NULL, -- OpenCode session ID - message_id INTEGER, -- Telegram message ID - directory TEXT, -- Working directory - expires_at TIMESTAMPTZ, -- 24-hour expiration - is_active BOOLEAN DEFAULT TRUE -) - --- Incoming replies (Realtime-enabled) -telegram_replies ( - id UUID PRIMARY KEY, - uuid UUID REFERENCES telegram_subscribers(uuid), - session_id TEXT NOT NULL, -- Target OpenCode session - directory TEXT, - reply_text TEXT NOT NULL, -- Text or transcribed audio - telegram_message_id INTEGER, - telegram_chat_id BIGINT NOT NULL, - processed BOOLEAN DEFAULT FALSE, - processed_at TIMESTAMPTZ -) -``` - -### Entity Relationship - -``` -┌─────────────────────┐ ┌─────────────────────┐ -│ telegram_subscribers│ │telegram_reply_contexts -│ │ │ │ -│ uuid (PK) │◄─────│ uuid (FK) │ -│ chat_id │ │ chat_id │ -│ username │ │ session_id │ -│ is_active │ │ message_id │ -│ notifications_sent │ │ directory │ -└─────────────────────┘ │ expires_at │ - │ │ is_active │ - │ └─────────────────────┘ - │ - │ ┌─────────────────────┐ - │ │ telegram_replies │ - │ │ │ - └───────────────────│ uuid (FK) │ - │ session_id │ - │ reply_text │ - │ processed │ - └─────────────────────┘ -``` - -## Session ID in Messages - -To support multiple concurrent OpenCode sessions, the session ID is embedded in outgoing messages: - -``` -🔔 *OpenCode Task Complete* [ses_abc12345] - -Model: claude-sonnet-4 | Dir: my-project -──────────────────────────────────── - -I've completed the refactoring of the authentication module... - -_💬 Reply to continue this session_ -``` - -When a user replies, the webhook: -1. Looks up the most recent `reply_context` for that `chat_id` -2. Extracts the `session_id` -3. Stores the reply with the correct `session_id` -4. Plugin receives via Realtime and routes to correct session - -## Voice/Video Message Processing - -### Faster Whisper STT Server - -The Telegram webhook connects to a locally-running Faster Whisper server for speech-to-text: - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ FASTER WHISPER STT SERVER │ -│ │ -│ Location: ~/.config/opencode/whisper/ │ -│ │ -│ ┌─────────────────────────────────────────────────────────┐ │ -│ │ whisper_server.py │ │ -│ │ │ │ -│ │ - Loads faster-whisper model (base/small/medium/large) │ │ -│ │ - HTTP server on localhost:8787 │ │ -│ │ - Endpoint: POST /transcribe │ │ -│ │ - Accepts: audio file (OGG, MP3, WAV, MP4) │ │ -│ │ - Returns: {"text": "transcribed text", "language": "en"}│ │ -│ └─────────────────────────────────────────────────────────┘ │ -│ │ -│ Files: │ -│ - whisper_server.py (HTTP server script) │ -│ - venv/ (Python virtualenv) │ -│ - server.pid (Running server PID) │ -│ │ -└─────────────────────────────────────────────────────────────────┘ -``` - -### Configuration - -Add to `~/.config/opencode/tts.json`: - -```json -{ - "telegram": { - "enabled": true, - "uuid": "your-uuid", - "receiveReplies": true, - "whisperUrl": "http://localhost:8787/transcribe", - "whisperModel": "base" - } -} -``` - -### Supported Audio/Video Formats - -| Telegram Type | File Format | Handling | -|---------------|-------------|----------| -| Voice Message | OGG Opus | Direct transcription | -| Video Note | MP4 | Extract audio, transcribe | -| Audio File | MP3/WAV/OGG | Direct transcription | -| Video File | MP4/MOV | Extract audio, transcribe | - -## Security Model - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ SECURITY LAYERS │ -├─────────────────────────────────────────────────────────────────┤ -│ │ -│ 1. UUID Authentication │ -│ - User generates UUID locally (never transmitted) │ -│ - UUID maps to chat_id (no personal data stored) │ -│ - Can revoke anytime with /stop │ -│ │ -│ 2. Rate Limiting │ -│ - 10 notifications per minute per UUID │ -│ - Prevents abuse of notification endpoint │ -│ │ -│ 3. Row Level Security (RLS) │ -│ - All tables have RLS enabled │ -│ - Only service_role can access (Edge Functions) │ -│ - Anon key for Realtime only (filtered by UUID) │ -│ │ -│ 4. Context Expiration │ -│ - Reply contexts expire after 24 hours │ -│ - Automatic cleanup of stale data │ -│ │ -│ 5. Whisper Server (Local) │ -│ - Runs on localhost only │ -│ - No audio data leaves local machine │ -│ - Audio transcribed locally, only text sent to Supabase │ -│ │ -└─────────────────────────────────────────────────────────────────┘ -``` - -## Multi-Session Support - -When multiple OpenCode sessions are running concurrently: - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ CONCURRENT SESSIONS │ -│ │ -│ Session 1 (ses_abc) Session 2 (ses_def) │ -│ ┌─────────────────┐ ┌─────────────────┐ │ -│ │ Working on │ │ Working on │ │ -│ │ auth module │ │ API endpoints │ │ -│ └────────┬────────┘ └────────┬────────┘ │ -│ │ │ │ -│ ▼ ▼ │ -│ Notification sent: Notification sent: │ -│ "[ses_abc] Auth done" "[ses_def] API done" │ -│ │ -│ ┌─────────────────┐ │ -│ │ User replies: │ │ -│ │ "Add tests" │ │ -│ └────────┬────────┘ │ -│ │ │ -│ ▼ │ -│ Routed to most recent │ -│ context (ses_def) │ -│ │ -└─────────────────────────────────────────────────────────────────┘ -``` - -**Routing Rules:** -1. Each notification creates a new `reply_context` entry -2. Previous contexts for same `chat_id` are deactivated -3. User reply goes to the **most recent** active session -4. To reply to a specific session, user can quote the message - -## Files Reference - -``` -opencode-reflection-plugin/ -├── tts.ts # Main plugin (client-side) -│ ├── sendTelegramNotification() # Send notifications -│ ├── subscribeToReplies() # Realtime subscription for text replies -│ ├── subscribeToVoiceMessages() # Realtime subscription for voice messages -│ ├── processVoiceMessage() # Download, transcribe, forward voice -│ ├── transcribeWithWhisper() # Local Whisper STT transcription -│ ├── startWhisperServer() # Manage local Whisper server -│ └── initSupabaseClient() # Supabase client setup -│ -├── whisper/ -│ └── whisper_server.py # Local Faster Whisper STT server (port 8787) -│ -├── supabase/ -│ ├── functions/ -│ │ ├── send-notify/ -│ │ │ └── index.ts # Send notifications endpoint -│ │ └── telegram-webhook/ -│ │ └── index.ts # Handle incoming messages (text, voice, video) -│ │ -│ └── migrations/ -│ ├── 20240113000000_create_subscribers.sql # User subscriptions -│ ├── 20240114000000_add_telegram_replies.sql # Text reply support -│ └── 20240115000000_add_voice_messages.sql # Voice/video message support -│ -└── docs/ - └── telegram.design.md # This file -``` - -## Deployment Checklist - -- [ ] Apply database migrations: `supabase db push` -- [ ] Deploy Edge Functions: `supabase functions deploy` -- [ ] Set Telegram webhook URL -- [ ] Configure `tts.json` with UUID -- [ ] Start Whisper STT server (for voice messages) -- [ ] Copy plugin to `~/.config/opencode/plugin/` -- [ ] Restart OpenCode diff --git a/docs/telegram.md b/docs/telegram.md index e24390a..8693b5a 100644 --- a/docs/telegram.md +++ b/docs/telegram.md @@ -286,3 +286,105 @@ opencode-reflection-plugin/ └── docs/ └── telegram.md # This file ``` + +## Database Schema + +### Tables + +```sql +-- User subscriptions +telegram_subscribers ( + uuid UUID PRIMARY KEY, + chat_id BIGINT NOT NULL, + username TEXT, + is_active BOOLEAN DEFAULT TRUE, + notifications_sent INTEGER DEFAULT 0 +) + +-- Reply context tracking (for multi-session support) +telegram_reply_contexts ( + id UUID PRIMARY KEY, + chat_id BIGINT NOT NULL, + uuid UUID REFERENCES telegram_subscribers(uuid), + session_id TEXT NOT NULL, -- OpenCode session ID + message_id INTEGER, -- Telegram message ID + directory TEXT, -- Working directory + expires_at TIMESTAMPTZ, -- 24-hour expiration + is_active BOOLEAN DEFAULT TRUE +) + +-- Incoming replies (Realtime-enabled) - unified for text + voice +telegram_replies ( + id UUID PRIMARY KEY, + uuid UUID REFERENCES telegram_subscribers(uuid), + session_id TEXT NOT NULL, + directory TEXT, + reply_text TEXT, -- Text content (nullable for voice) + telegram_message_id INTEGER, + telegram_chat_id BIGINT NOT NULL, + processed BOOLEAN DEFAULT FALSE, + processed_at TIMESTAMPTZ, + -- Voice message fields + is_voice BOOLEAN DEFAULT FALSE, + audio_base64 TEXT, -- Base64 audio from Edge Function + voice_file_type TEXT, -- 'voice', 'video_note', or 'video' + voice_duration_seconds INTEGER +) +``` + +### Supported Audio/Video Formats + +| Telegram Type | File Format | Handling | +|---------------|-------------|----------| +| Voice Message | OGG Opus | Direct transcription | +| Video Note | MP4 | Extract audio, transcribe | +| Audio File | MP3/WAV/OGG | Direct transcription | +| Video File | MP4/MOV | Extract audio, transcribe | + +## Multi-Session Support + +When multiple OpenCode sessions are running concurrently: + +``` +Session 1 (ses_abc) Session 2 (ses_def) +┌─────────────────┐ ┌─────────────────┐ +│ Working on │ │ Working on │ +│ auth module │ │ API endpoints │ +└────────┬────────┘ └────────┬────────┘ + │ │ + ▼ ▼ +Notification sent: Notification sent: +"[ses_abc] Auth done" "[ses_def] API done" + + User replies: + "Add tests" + │ + ▼ + Routed to most recent + context (ses_def) +``` + +**Routing Rules:** +1. Each notification creates a new `reply_context` entry +2. Previous contexts for same `chat_id` are deactivated +3. User reply goes to the **most recent** active session + +## Security Model + +| Layer | Description | +|-------|-------------| +| UUID Authentication | User generates UUID locally, maps to chat_id | +| Rate Limiting | 10 notifications per minute per UUID | +| Row Level Security | All tables have RLS, only service_role can access | +| Context Expiration | Reply contexts expire after 24 hours | +| Local Whisper | Audio transcribed locally, never leaves machine | + +## Deployment Checklist + +- [ ] Apply database migrations: `supabase db push` +- [ ] Deploy Edge Functions: `supabase functions deploy` +- [ ] Set Telegram webhook URL to Edge Function +- [ ] Configure `tts.json` with UUID +- [ ] Copy plugin to `~/.config/opencode/plugin/` +- [ ] Restart OpenCode +- [ ] (Optional) Whisper server auto-starts on first voice message diff --git a/test/tts.test.ts b/test/tts.test.ts index fdeb1ae..eb17791 100644 --- a/test/tts.test.ts +++ b/test/tts.test.ts @@ -900,6 +900,206 @@ describe("Telegram Reply Support - Structure Validation", () => { // ==================== VOICE MESSAGE SUPPORT TESTS ==================== +// ==================== WHISPER INTEGRATION TESTS ==================== + +describe("Whisper Server - Integration Tests", () => { + const WHISPER_URL = "http://localhost:8787" + + /** + * Helper to check if Whisper server is running + */ + async function isWhisperRunning(): Promise { + try { + const response = await fetch(`${WHISPER_URL}/health`, { + signal: AbortSignal.timeout(2000) + }) + return response.ok + } catch { + return false + } + } + + /** + * Generate a simple test audio (silence) as base64 + * This is a minimal valid WAV file with 0.1s of silence + */ + function generateTestSilenceWav(): string { + // Minimal WAV header for 16-bit PCM, mono, 16kHz + const sampleRate = 16000 + const numChannels = 1 + const bitsPerSample = 16 + const durationSeconds = 0.1 + const numSamples = Math.floor(sampleRate * durationSeconds) + const dataSize = numSamples * numChannels * (bitsPerSample / 8) + const fileSize = 44 + dataSize - 8 + + const buffer = Buffer.alloc(44 + dataSize) + + // RIFF header + buffer.write('RIFF', 0) + buffer.writeUInt32LE(fileSize, 4) + buffer.write('WAVE', 8) + + // fmt chunk + buffer.write('fmt ', 12) + buffer.writeUInt32LE(16, 16) // chunk size + buffer.writeUInt16LE(1, 20) // audio format (PCM) + buffer.writeUInt16LE(numChannels, 22) + buffer.writeUInt32LE(sampleRate, 24) + buffer.writeUInt32LE(sampleRate * numChannels * (bitsPerSample / 8), 28) // byte rate + buffer.writeUInt16LE(numChannels * (bitsPerSample / 8), 32) // block align + buffer.writeUInt16LE(bitsPerSample, 34) + + // data chunk + buffer.write('data', 36) + buffer.writeUInt32LE(dataSize, 40) + // Audio data is already zeros (silence) + + return buffer.toString('base64') + } + + it("health endpoint responds when server is running", async () => { + const running = await isWhisperRunning() + if (!running) { + console.log(" [SKIP] Whisper server not running on localhost:8787") + console.log(" Start with: cd ~/.config/opencode/whisper && python whisper_server.py") + return + } + + const response = await fetch(`${WHISPER_URL}/health`) + assert.ok(response.ok, "Health endpoint should return 200") + + const data = await response.json() as { status: string; model_loaded: boolean } + assert.strictEqual(data.status, "healthy", "Status should be healthy") + assert.ok("model_loaded" in data, "Should report model status") + console.log(` [INFO] Whisper server healthy, model loaded: ${data.model_loaded}`) + }) + + it("models endpoint lists available models", async () => { + const running = await isWhisperRunning() + if (!running) { + console.log(" [SKIP] Whisper server not running") + return + } + + const response = await fetch(`${WHISPER_URL}/models`) + assert.ok(response.ok, "Models endpoint should return 200") + + const data = await response.json() as { models: string[]; default: string } + assert.ok(Array.isArray(data.models), "Should return array of models") + assert.ok(data.models.includes("base"), "Should include base model") + assert.ok(data.models.includes("tiny"), "Should include tiny model") + }) + + it("transcribe endpoint accepts audio and returns text", async () => { + const running = await isWhisperRunning() + if (!running) { + console.log(" [SKIP] Whisper server not running") + return + } + + // Use minimal silence audio - Whisper should return empty or minimal text + const testAudio = generateTestSilenceWav() + + const response = await fetch(`${WHISPER_URL}/transcribe`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + audio: testAudio, + format: "wav", + model: "base" // Use base model for faster testing + }), + signal: AbortSignal.timeout(30000) // 30 second timeout for transcription + }) + + assert.ok(response.ok, `Transcribe endpoint should return 200, got ${response.status}`) + + const data = await response.json() as { text: string; language: string; duration: number } + assert.ok("text" in data, "Response should include text field") + assert.ok("language" in data, "Response should include language field") + assert.ok("duration" in data, "Response should include duration field") + + console.log(` [INFO] Transcription successful - text: "${data.text}", duration: ${data.duration}s`) + }) + + it("transcribe endpoint handles invalid audio gracefully", async () => { + const running = await isWhisperRunning() + if (!running) { + console.log(" [SKIP] Whisper server not running") + return + } + + // Send invalid base64 that decodes to garbage + const response = await fetch(`${WHISPER_URL}/transcribe`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + audio: Buffer.from("not valid audio data").toString("base64"), + format: "ogg" + }), + signal: AbortSignal.timeout(10000) + }) + + // Server should return 500 for invalid audio, not crash + assert.ok(response.status === 500 || response.status === 400, + `Should return error status for invalid audio, got ${response.status}`) + }) + + it("transcribe endpoint requires audio field", async () => { + const running = await isWhisperRunning() + if (!running) { + console.log(" [SKIP] Whisper server not running") + return + } + + const response = await fetch(`${WHISPER_URL}/transcribe`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({}) + }) + + assert.strictEqual(response.status, 400, "Should return 400 for missing audio") + }) +}) + +describe("Whisper Dependencies - Availability Check", () => { + it("checks if faster-whisper can be imported", async () => { + try { + await execAsync('python3 -c "from faster_whisper import WhisperModel; print(\'ok\')"', { timeout: 10000 }) + console.log(" [INFO] faster-whisper is installed and available") + } catch { + console.log(" [INFO] faster-whisper not installed") + console.log(" Install with: pip install faster-whisper") + } + // Test always passes - informational only + assert.ok(true) + }) + + it("checks if fastapi and uvicorn are available", async () => { + try { + await execAsync('python3 -c "from fastapi import FastAPI; import uvicorn; print(\'ok\')"', { timeout: 10000 }) + console.log(" [INFO] FastAPI and uvicorn are installed") + } catch { + console.log(" [INFO] FastAPI/uvicorn not installed") + console.log(" Install with: pip install fastapi uvicorn") + } + assert.ok(true) + }) + + it("checks if ffmpeg is available for audio conversion", async () => { + try { + await execAsync("which ffmpeg") + console.log(" [INFO] ffmpeg is available for audio format conversion") + } catch { + console.log(" [INFO] ffmpeg not installed - audio conversion will be limited") + console.log(" Install with: brew install ffmpeg") + } + assert.ok(true) + }) +}) + +// ==================== VOICE MESSAGE SUPPORT TESTS ==================== + describe("Telegram Voice Message Support - Structure Validation", () => { let ttsContent: string | null = null let webhookContent: string | null = null From 39105a4c1d3527b984e0fc41cb6728cdd7069bb8 Mon Sep 17 00:00:00 2001 From: Den <2119348+dzianisv@users.noreply.github.com> Date: Sat, 24 Jan 2026 10:06:26 -0800 Subject: [PATCH 3/3] refactor: Consolidate plugin helpers under ~/.config/opencode/opencode-helpers - Move whisper/, chatterbox/, coqui/ under opencode-helpers/ - Add HELPERS_DIR base constant in tts.ts - Update all paths in code, tests, and documentation - All 176 tests passing --- AGENTS.md | 8 ++++---- README.md | 13 +++++++------ docs/tts.design.md | 8 ++++---- test/tts.e2e.test.ts | 2 +- test/tts.test.ts | 2 +- tts.ts | 10 +++++++--- 6 files changed, 24 insertions(+), 19 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index ad04942..13be8c1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -62,7 +62,7 @@ Edit `~/.config/opencode/tts.json`: ``` ### Chatterbox Server Files -Located in `~/.config/opencode/chatterbox/`: +Located in `~/.config/opencode/opencode-helpers/chatterbox/`: - `tts.py` - One-shot TTS script - `tts_server.py` - Persistent server script - `tts.sock` - Unix socket for IPC @@ -79,13 +79,13 @@ npm run test:tts:manual # Actually speaks test phrases ### Debugging ```bash # Check if Chatterbox server is running -ls -la ~/.config/opencode/chatterbox/tts.sock +ls -la ~/.config/opencode/opencode-helpers/chatterbox/tts.sock # Check server PID -cat ~/.config/opencode/chatterbox/server.pid +cat ~/.config/opencode/opencode-helpers/chatterbox/server.pid # Stop server manually -kill $(cat ~/.config/opencode/chatterbox/server.pid) +kill $(cat ~/.config/opencode/opencode-helpers/chatterbox/server.pid) # Check server logs (stderr) # Server automatically restarts on next TTS request diff --git a/README.md b/README.md index 084dd84..f88cd7b 100644 --- a/README.md +++ b/README.md @@ -494,23 +494,24 @@ When using Coqui or Chatterbox with `serverMode: true` (default), the plugin run ``` **Server files:** -- Coqui: `~/.config/opencode/coqui/` (tts.sock, server.pid, server.lock, venv/) -- Chatterbox: `~/.config/opencode/chatterbox/` (tts.sock, server.pid, server.lock, venv/) +- Coqui: `~/.config/opencode/opencode-helpers/coqui/` (tts.sock, server.pid, server.lock, venv/) +- Chatterbox: `~/.config/opencode/opencode-helpers/chatterbox/` (tts.sock, server.pid, server.lock, venv/) +- Whisper: `~/.config/opencode/opencode-helpers/whisper/` (whisper_server.py, server.pid, venv/) - Speech lock: `~/.config/opencode/speech.lock` **Managing the server:** ```bash # Check if Coqui server is running -ls -la ~/.config/opencode/coqui/tts.sock +ls -la ~/.config/opencode/opencode-helpers/coqui/tts.sock # Stop the Coqui server manually -kill $(cat ~/.config/opencode/coqui/server.pid) +kill $(cat ~/.config/opencode/opencode-helpers/coqui/server.pid) # Check if Chatterbox server is running -ls -la ~/.config/opencode/chatterbox/tts.sock +ls -la ~/.config/opencode/opencode-helpers/chatterbox/tts.sock # Stop the Chatterbox server manually -kill $(cat ~/.config/opencode/chatterbox/server.pid) +kill $(cat ~/.config/opencode/opencode-helpers/chatterbox/server.pid) # Server restarts automatically on next TTS request ``` diff --git a/docs/tts.design.md b/docs/tts.design.md index e54067d..8fe2382 100644 --- a/docs/tts.design.md +++ b/docs/tts.design.md @@ -89,7 +89,7 @@ Ensures multiple OpenCode sessions speak one at a time in FIFO order. Single persistent process that keeps the TTS model loaded for fast inference. -**Location:** `~/.config/opencode/coqui/` +**Location:** `~/.config/opencode/opencode-helpers/coqui/` **Files:** - `tts_server.py` - Server script @@ -175,10 +175,10 @@ Response (JSON): ps aux | grep tts_server # Check server PID -cat ~/.config/opencode/coqui/server.pid +cat ~/.config/opencode/opencode-helpers/coqui/server.pid # Stop server -kill $(cat ~/.config/opencode/coqui/server.pid) +kill $(cat ~/.config/opencode/opencode-helpers/coqui/server.pid) # Server auto-restarts on next TTS request @@ -187,7 +187,7 @@ tail -f /tmp/tts_server.log # Test server directly echo '{"text": "Hello", "output": "/tmp/test.wav"}' | \ - nc -U ~/.config/opencode/coqui/tts.sock && \ + nc -U ~/.config/opencode/opencode-helpers/coqui/tts.sock && \ afplay /tmp/test.wav ``` diff --git a/test/tts.e2e.test.ts b/test/tts.e2e.test.ts index 31cf2bd..9864d06 100644 --- a/test/tts.e2e.test.ts +++ b/test/tts.e2e.test.ts @@ -23,7 +23,7 @@ const __dirname = dirname(fileURLToPath(import.meta.url)) const RUN_E2E = process.env.OPENCODE_TTS_E2E === "1" // Paths -const CHATTERBOX_DIR = join(process.env.HOME || "", ".config/opencode/chatterbox") +const CHATTERBOX_DIR = join(process.env.HOME || "", ".config/opencode/opencode-helpers/chatterbox") const CHATTERBOX_VENV = join(CHATTERBOX_DIR, "venv") const CHATTERBOX_SCRIPT = join(CHATTERBOX_DIR, "tts.py") const VENV_PYTHON = join(CHATTERBOX_VENV, "bin/python") diff --git a/test/tts.test.ts b/test/tts.test.ts index eb17791..662da43 100644 --- a/test/tts.test.ts +++ b/test/tts.test.ts @@ -962,7 +962,7 @@ describe("Whisper Server - Integration Tests", () => { const running = await isWhisperRunning() if (!running) { console.log(" [SKIP] Whisper server not running on localhost:8787") - console.log(" Start with: cd ~/.config/opencode/whisper && python whisper_server.py") + console.log(" Start with: cd ~/.config/opencode/opencode-helpers/whisper && python whisper_server.py") return } diff --git a/tts.ts b/tts.ts index 7b63fb8..32be233 100644 --- a/tts.ts +++ b/tts.ts @@ -99,9 +99,13 @@ interface TTSConfig { } } +// ==================== HELPERS BASE DIRECTORY ==================== + +const HELPERS_DIR = join(homedir(), ".config", "opencode", "opencode-helpers") + // ==================== WHISPER STT ==================== -const WHISPER_DIR = join(homedir(), ".config", "opencode", "whisper") +const WHISPER_DIR = join(HELPERS_DIR, "whisper") const WHISPER_VENV = join(WHISPER_DIR, "venv") const WHISPER_SERVER_SCRIPT = join(WHISPER_DIR, "whisper_server.py") const WHISPER_PID = join(WHISPER_DIR, "server.pid") @@ -114,7 +118,7 @@ let whisperServerProcess: ReturnType | null = null // ==================== CHATTERBOX ==================== -const CHATTERBOX_DIR = join(homedir(), ".config", "opencode", "chatterbox") +const CHATTERBOX_DIR = join(HELPERS_DIR, "chatterbox") const CHATTERBOX_VENV = join(CHATTERBOX_DIR, "venv") const CHATTERBOX_SCRIPT = join(CHATTERBOX_DIR, "tts.py") const CHATTERBOX_SERVER_SCRIPT = join(CHATTERBOX_DIR, "tts_server.py") @@ -127,7 +131,7 @@ let chatterboxSetupAttempted = false // ==================== COQUI TTS ==================== -const COQUI_DIR = join(homedir(), ".config", "opencode", "coqui") +const COQUI_DIR = join(HELPERS_DIR, "coqui") const COQUI_VENV = join(COQUI_DIR, "venv") const COQUI_SCRIPT = join(COQUI_DIR, "tts.py") const COQUI_SERVER_SCRIPT = join(COQUI_DIR, "tts_server.py")