From edfc98b22c013cbc0741f4632ec01ebd05c7a288 Mon Sep 17 00:00:00 2001 From: Korivi Date: Fri, 3 Apr 2026 04:23:10 +0900 Subject: [PATCH 1/2] Added STT (Speech-to-Text) to the chat area Added STT (Speech-to-Text) to the chat area, which supports multiple languages. --- app/config/settings.json | 3 +- .../src/pages/Chat/ChatPage.module.css | 122 +++++++++++++++ .../frontend/src/pages/Chat/ChatPage.tsx | 145 +++++++++++++++++- 3 files changed, 266 insertions(+), 4 deletions(-) diff --git a/app/config/settings.json b/app/config/settings.json index 93816e9..02af9b6 100644 --- a/app/config/settings.json +++ b/app/config/settings.json @@ -19,7 +19,8 @@ "openai": "", "anthropic": "", "google": "", - "byteplus": "" + "byteplus": "", + "deepseek": "sk-680c33e184184d9cb27a6ecdc2123910" }, "endpoints": { "remote_model_url": "", diff --git a/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.module.css b/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.module.css index 966211f..21b5d5f 100644 --- a/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.module.css +++ b/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.module.css @@ -249,6 +249,128 @@ color: var(--text-muted); } +.inputListening { + border-color: var(--color-primary); + box-shadow: 0 0 0 2px var(--color-primary-subtle); +} + +/* Mic button + language selector grouped together */ +.micGroup { + display: flex; + align-items: center; + gap: 2px; + position: relative; +} + +.langBtn { + background: transparent; + border: none; + color: var(--text-primary); + font-size: 10px; + font-family: inherit; + font-weight: 600; + cursor: pointer; + padding: 2px 3px; + border-radius: var(--radius-sm); + line-height: 1; + outline: none; + white-space: nowrap; +} + +.langBtn:hover:not(:disabled) { + background: var(--bg-tertiary); +} + +.langBtn:disabled { + opacity: 0.4; + cursor: not-allowed; +} + +.langDropdown { + position: absolute; + bottom: calc(100% + 6px); + left: 0; + background: var(--bg-secondary); + border: 1px solid var(--border-primary); + border-radius: var(--radius-md); + box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5); + overflow: hidden; + z-index: 999; + min-width: 130px; +} + +.langOption { + display: flex; + align-items: center; + gap: 8px; + width: 100%; + background: transparent; + border: none; + color: var(--text-secondary); + font-family: inherit; + font-size: var(--text-sm); + padding: 7px 12px; + cursor: pointer; + text-align: left; +} + +.langOption:hover { + background: var(--bg-tertiary); + color: var(--text-primary); +} + +.langOptionActive { + color: var(--color-primary); +} + +.langCode { + font-weight: 600; + font-size: 11px; + width: 36px; + flex-shrink: 0; +} + +.langFull { + font-size: 11px; + opacity: 0.8; +} + +/* 3 bouncing dots shown while listening */ +.listeningDots { + display: flex; + align-items: center; + gap: 4px; + padding: 4px var(--space-3) 0; +} + +.listeningDots span { + display: block; + width: 6px; + height: 6px; + border-radius: 50%; + background: var(--color-primary); + animation: dotBounce 1.2s ease-in-out infinite; +} + +.listeningDots span:nth-child(1) { animation-delay: 0s; } +.listeningDots span:nth-child(2) { animation-delay: 0.2s; } +.listeningDots span:nth-child(3) { animation-delay: 0.4s; } + +@keyframes dotBounce { + 0%, 60%, 100% { transform: translateY(0); opacity: 0.4; } + 30% { transform: translateY(-5px); opacity: 1; } +} + +/* Mic button pulse animation when recording */ +.micListening { + animation: micPulse 1.2s ease-in-out infinite; +} + +@keyframes micPulse { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.4; } +} + /* Action Panel - Right Side (resizable) */ .actionPanel { display: flex; diff --git a/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.tsx b/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.tsx index 776c9da..d1252e6 100644 --- a/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.tsx @@ -1,5 +1,5 @@ import React, { useState, useRef, useEffect, useLayoutEffect, KeyboardEvent, useCallback, ChangeEvent, useMemo } from 'react' -import { Send, Paperclip, X, Loader2, File, AlertCircle, Reply } from 'lucide-react' +import { Send, Paperclip, X, Loader2, File, AlertCircle, Reply, Mic, MicOff } from 'lucide-react' import { useVirtualizer } from '@tanstack/react-virtual' import { useLocation } from 'react-router-dom' import { useWebSocket } from '../../contexts/WebSocketContext' @@ -16,6 +16,22 @@ interface PendingAttachment { content: string // base64 } +const MIC_LANGUAGES = [ + { code: 'en-US', label: 'EN', full: 'English' }, + { code: 'ja-JP', label: 'JA', full: '日本語' }, + { code: 'zh-CN', label: 'ZH', full: '中文 (简体)' }, + { code: 'zh-TW', label: 'ZH-TW', full: '中文 (繁體)' }, + { code: 'ko-KR', label: 'KO', full: '한국어' }, + { code: 'ar-SA', label: 'AR', full: 'العربية' }, + { code: 'es-ES', label: 'ES', full: 'Español' }, + { code: 'fr-FR', label: 'FR', full: 'Français' }, + { code: 'de-DE', label: 'DE', full: 'Deutsch' }, + { code: 'pt-BR', label: 'PT', full: 'Português' }, + { code: 'hi-IN', label: 'HI', full: 'हिन्दी' }, + { code: 'ru-RU', label: 'RU', full: 'Русский' }, + { code: 'it-IT', label: 'IT', full: 'Italiano' }, +] + // Panel width limits const DEFAULT_PANEL_WIDTH = 380 const MIN_PANEL_WIDTH = 200 @@ -55,6 +71,14 @@ export function ChatPage() { const historyIndexRef = useRef(-1) const draftRef = useRef('') const fileInputRef = useRef(null) + const [isListening, setIsListening] = useState(false) + const recognitionRef = useRef(null) + const [micLang, setMicLang] = useState(() => { + const browserLang = navigator.language || 'en-US' + return MIC_LANGUAGES.some(l => l.code === browserLang) ? browserLang : 'en-US' + }) + const [langOpen, setLangOpen] = useState(false) + const langDropdownRef = useRef(null) // Virtualization refs const parentRef = useRef(null) @@ -110,6 +134,18 @@ export function ChatPage() { return lastSeenIdx + 1 // First unread is after last seen }, [messages, lastSeenMessageId]) + // Close language dropdown when clicking outside + useEffect(() => { + if (!langOpen) return + const handler = (e: MouseEvent) => { + if (langDropdownRef.current && !langDropdownRef.current.contains(e.target as Node)) { + setLangOpen(false) + } + } + document.addEventListener('mousedown', handler) + return () => document.removeEventListener('mousedown', handler) + }, [langOpen]) + // Check if user is scrolled near the bottom const isNearBottom = useCallback(() => { const container = parentRef.current @@ -252,6 +288,61 @@ export function ChatPage() { inputRef.current?.focus() }, [setReplyTarget]) + const toggleListening = useCallback(() => { + if (isListening) { + recognitionRef.current?.stop() + setIsListening(false) + return + } + + const SpeechRecognitionAPI = (window as typeof window & { SpeechRecognition?: typeof SpeechRecognition; webkitSpeechRecognition?: typeof SpeechRecognition }).SpeechRecognition + || (window as typeof window & { SpeechRecognition?: typeof SpeechRecognition; webkitSpeechRecognition?: typeof SpeechRecognition }).webkitSpeechRecognition + + if (!SpeechRecognitionAPI) { + alert('Speech recognition is not supported in this browser.') + return + } + + const recognition = new SpeechRecognitionAPI() + recognition.continuous = true + recognition.interimResults = true + recognition.lang = micLang + + recognition.onresult = (event: SpeechRecognitionEvent) => { + let finalTranscript = '' + for (let i = event.resultIndex; i < event.results.length; i++) { + if (event.results[i].isFinal) { + finalTranscript += event.results[i][0].transcript + } + } + if (finalTranscript) { + setInput(prev => prev + (prev.endsWith(' ') || prev === '' ? '' : ' ') + finalTranscript) + if (inputRef.current) { + inputRef.current.style.height = 'auto' + inputRef.current.style.height = inputRef.current.scrollHeight + 'px' + } + } + } + + recognition.onerror = (event: SpeechRecognitionErrorEvent) => { + setIsListening(false) + if (event.error === 'not-allowed' || event.error === 'service-not-allowed') { + alert('Microphone access denied. Please allow microphone permission in your browser settings.') + } + } + recognition.onend = () => setIsListening(false) + + recognitionRef.current = recognition + recognition.start() + setIsListening(true) + inputRef.current?.focus() + }, [isListening, micLang]) + + // Stop mic if component unmounts while listening + useEffect(() => { + return () => { recognitionRef.current?.abort() } + }, []) + const handleSend = () => { // Don't send if there are validation errors if (!attachmentValidation.valid) return @@ -270,6 +361,12 @@ export function ChatPage() { originalMessage: replyTarget.originalContent, } : undefined + // Stop mic if still listening when message is sent + if (isListening) { + recognitionRef.current?.stop() + setIsListening(false) + } + sendMessage( input.trim(), pendingAttachments.length > 0 ? pendingAttachments : undefined, @@ -491,6 +588,39 @@ export function ChatPage() { onClick={handleAttachClick} /> +
+ : } + variant="ghost" + active={isListening} + tooltip={isListening ? 'Stop listening' : 'Voice input'} + onClick={toggleListening} + className={isListening ? styles.micListening : undefined} + /> + + {langOpen && ( +
+ {MIC_LANGUAGES.map(lang => ( + + ))} +
+ )} +
+
{/* Attachment error message */} {(attachmentError || !attachmentValidation.valid) && ( @@ -548,14 +678,23 @@ export function ChatPage() {
)} + {isListening && ( +
+ +
+ )} + +