Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,10 @@ jobs:
pip install -r backend/requirements.txt
pip install --no-deps chatterbox-tts

- name: Install PyTorch with CUDA 12.1
- name: Install PyTorch with CUDA 12.6
run: |
pip install torch --index-url https://download.pytorch.org/whl/cu121 --force-reinstall --no-deps
pip install torchaudio --index-url https://download.pytorch.org/whl/cu121
pip install torch --index-url https://download.pytorch.org/whl/cu126 --force-reinstall --no-deps
pip install torchaudio --index-url https://download.pytorch.org/whl/cu126 --force-reinstall --no-deps

- name: Verify CUDA support in torch
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ logs/
app/openapi.json
tauri/src-tauri/binaries/*
tauri/src-tauri/gen/Assets.car
tauri/src-tauri/gen/voicebox.icns

# Temporary
tmp/
Expand Down
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
&& rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir --upgrade pip

COPY backend/requirements.txt .
RUN pip install --no-cache-dir --prefix=/install -r requirements.txt
RUN pip install --no-cache-dir --prefix=/install \
Expand Down
17 changes: 16 additions & 1 deletion app/src/components/History/HistoryTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,29 @@ export function HistoryTable() {
}
}, [historyData, page]);

// Reset to page 0 when deletions or imports occur
// Reset to page 0 when deletions, imports, or generation completions occur
const pendingCount = useGenerationStore((state) => state.pendingGenerationIds.size);
const prevPendingCountRef = useRef(pendingCount);
useEffect(() => {
if (deleteGeneration.isSuccess || importGeneration.isSuccess) {
setPage(0);
setAllHistory([]);
}
}, [deleteGeneration.isSuccess, importGeneration.isSuccess]);

useEffect(() => {
// A generation finished (pending count decreased) — scroll back to show it
if (
prevPendingCountRef.current > 0 &&
pendingCount < prevPendingCountRef.current &&
page !== 0
) {
setPage(0);
setAllHistory([]);
}
prevPendingCountRef.current = pendingCount;
}, [pendingCount, page]);

// Intersection Observer for infinite scroll
useEffect(() => {
const loadMoreEl = loadMoreRef.current;
Expand Down
47 changes: 35 additions & 12 deletions app/src/lib/api/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,24 @@ import type {
TranscriptionResponse,
VoiceProfileCreate,
VoiceProfileResponse,
WhisperModelSize,
} from './types';

function formatErrorDetail(detail: unknown, fallback: string): string {
if (typeof detail === 'string') return detail;
if (Array.isArray(detail)) {
return detail
.map((e: Record<string, unknown>) => e.msg || e.message || JSON.stringify(e))
.join('; ');
}
if (detail && typeof detail === 'object') {
const obj = detail as Record<string, unknown>;
if (typeof obj.message === 'string') return obj.message;
return JSON.stringify(detail);
}
return fallback;
}

class ApiClient {
private getBaseUrl(): string {
const serverUrl = useServerStore.getState().serverUrl;
Expand All @@ -54,7 +70,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.json();
Expand Down Expand Up @@ -113,7 +129,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.json();
Expand Down Expand Up @@ -147,7 +163,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.blob();
Expand All @@ -167,7 +183,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.json();
Expand All @@ -187,7 +203,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.json();
Expand Down Expand Up @@ -257,7 +273,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.blob();
Expand All @@ -271,7 +287,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.blob();
Expand All @@ -297,7 +313,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.json();
Expand All @@ -318,12 +334,19 @@ class ApiClient {
}

// Transcription
async transcribeAudio(file: File, language?: LanguageCode): Promise<TranscriptionResponse> {
async transcribeAudio(
file: File,
language?: LanguageCode,
model?: WhisperModelSize,
): Promise<TranscriptionResponse> {
const formData = new FormData();
formData.append('file', file);
if (language) {
formData.append('language', language);
}
if (model) {
formData.append('model', model);
}

const url = `${this.getBaseUrl()}/transcribe`;
const response = await fetch(url, {
Expand All @@ -335,7 +358,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.json();
Expand Down Expand Up @@ -608,7 +631,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.blob();
Expand Down Expand Up @@ -705,7 +728,7 @@ class ApiClient {
const error = await response.json().catch(() => ({
detail: response.statusText,
}));
throw new Error(error.detail || `HTTP error! status: ${response.status}`);
throw new Error(formatErrorDetail(error.detail, `HTTP error! status: ${response.status}`));
}

return response.blob();
Expand Down
3 changes: 3 additions & 0 deletions app/src/lib/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,11 @@ export interface HistoryListResponse {
total: number;
}

export type WhisperModelSize = 'base' | 'small' | 'medium' | 'large' | 'turbo';

export interface TranscriptionRequest {
language?: LanguageCode;
model?: WhisperModelSize;
}

export interface TranscriptionResponse {
Expand Down
11 changes: 6 additions & 5 deletions app/src/lib/hooks/useGenerationProgress.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ export function useGenerationProgress() {
currentSources.delete(id);
removePendingGeneration(id);

// Refresh history to pick up the completed generation
queryClient.invalidateQueries({ queryKey: ['history'] });
// Refetch history to pick up the completed generation
queryClient.refetchQueries({ queryKey: ['history'] });

// If this generation was queued for a story, add it now
const storyId = removePendingStoryAdd(id);
Expand Down Expand Up @@ -120,7 +120,7 @@ export function useGenerationProgress() {
removePendingGeneration(id);
removePendingStoryAdd(id);

queryClient.invalidateQueries({ queryKey: ['history'] });
queryClient.refetchQueries({ queryKey: ['history'] });

toast({
title: data.status === 'not_found' ? 'Generation not found' : 'Generation failed',
Expand All @@ -134,11 +134,12 @@ export function useGenerationProgress() {
};

source.onerror = () => {
// EventSource auto-reconnects, but if we get repeated errors
// just clean up
// SSE connection dropped — clean up and refresh history so any
// completed/failed generation still appears in the list
source.close();
currentSources.delete(id);
removePendingGeneration(id);
queryClient.refetchQueries({ queryKey: ['history'] });
};

currentSources.set(id, source);
Expand Down
12 changes: 10 additions & 2 deletions app/src/lib/hooks/useTranscription.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import { useMutation } from '@tanstack/react-query';
import { apiClient } from '@/lib/api/client';
import type { WhisperModelSize } from '@/lib/api/types';
import type { LanguageCode } from '@/lib/constants/languages';

export function useTranscription() {
return useMutation({
mutationFn: ({ file, language }: { file: File; language?: LanguageCode }) =>
apiClient.transcribeAudio(file, language),
mutationFn: ({
file,
language,
model,
}: {
file: File;
language?: LanguageCode;
model?: WhisperModelSize;
}) => apiClient.transcribeAudio(file, language, model),
});
}
1 change: 1 addition & 0 deletions backend/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ async def transcribe(
self,
audio_path: str,
language: Optional[str] = None,
model_size: Optional[str] = None,
) -> str:
"""
Transcribe audio to text.
Expand Down
6 changes: 4 additions & 2 deletions backend/backends/mlx_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,18 +345,20 @@ async def transcribe(
self,
audio_path: str,
language: Optional[str] = None,
model_size: Optional[str] = None,
) -> str:
"""
Transcribe audio to text.

Args:
audio_path: Path to audio file
language: Optional language hint (en or zh)
language: Optional language hint
model_size: Optional model size override

Returns:
Transcribed text
"""
await self.load_model_async(None)
await self.load_model_async(model_size)

def _transcribe_sync():
"""Run synchronous transcription in thread pool."""
Expand Down
6 changes: 4 additions & 2 deletions backend/backends/pytorch_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,18 +306,20 @@ async def transcribe(
self,
audio_path: str,
language: Optional[str] = None,
model_size: Optional[str] = None,
) -> str:
"""
Transcribe audio to text.

Args:
audio_path: Path to audio file
language: Optional language hint (en or zh)
language: Optional language hint
model_size: Optional model size override

Returns:
Transcribed text
"""
await self.load_model_async(None)
await self.load_model_async(model_size)

def _transcribe_sync():
"""Run synchronous transcription in thread pool."""
Expand Down
3 changes: 2 additions & 1 deletion backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ class HistoryListResponse(BaseModel):
class TranscriptionRequest(BaseModel):
"""Request model for audio transcription."""

language: Optional[str] = Field(None, pattern="^(en|zh)$")
language: Optional[str] = Field(None, pattern="^(en|zh|ja|ko|de|fr|ru|pt|es|it)$")
model: Optional[str] = Field(None, pattern="^(base|small|medium|large|turbo)$")


class TranscriptionResponse(BaseModel):
Expand Down
Loading