diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f7ea583a..b4a3b0ea 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -359,7 +359,7 @@ Releases are managed by maintainers: ## Troubleshooting -See [docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) for common issues and solutions. +See [docs/content/docs/overview/troubleshooting.mdx](docs/content/docs/overview/troubleshooting.mdx) for common issues and solutions. **Quick fixes:** @@ -372,7 +372,7 @@ See [docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) for common issues and sol - Open an issue for bugs or feature requests - Check existing issues and discussions - Review the codebase to understand patterns -- See [docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) for common issues +- See [docs/content/docs/overview/troubleshooting.mdx](docs/content/docs/overview/troubleshooting.mdx) for common issues ## Additional Resources diff --git a/README.md b/README.md index 1ee08c33..8d220202 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,8 @@ DocsDownloadFeatures • - API + API • + Troubleshooting


@@ -91,6 +92,8 @@ Voicebox is a **local-first voice cloning studio** — a free and open-source al > **Linux** — Pre-built binaries are not yet available. See [voicebox.sh/linux-install](https://voicebox.sh/linux-install) for build-from-source instructions. +> **Having trouble?** See the [Troubleshooting Guide](docs/content/docs/overview/troubleshooting.mdx) for common install, generation, model-download, and GPU issues. + --- ## Features diff --git a/bun.lock b/bun.lock index d6b63ee8..a6f353a2 100644 --- a/bun.lock +++ b/bun.lock @@ -17,7 +17,7 @@ }, "app": { "name": "@voicebox/app", - "version": "0.2.0", + "version": "0.4.1", "dependencies": { "@dnd-kit/core": "^6.3.1", "@dnd-kit/sortable": "^10.0.0", @@ -72,9 +72,10 @@ }, "landing": { "name": "@voicebox/landing", - "version": "0.2.0", + "version": "0.4.1", "dependencies": { "@fontsource/space-grotesk": "^5.2.10", + "@icons-pack/react-simple-icons": "^13.13.0", "@radix-ui/react-separator": "^1.1.8", "@radix-ui/react-slot": "^1.2.4", "autoprefixer": "^10.4.17", @@ -100,7 +101,7 @@ }, "tauri": { "name": "@voicebox/tauri", - "version": "0.2.0", + "version": "0.4.1", "dependencies": { "@tauri-apps/api": "^2.0.0", "@tauri-apps/plugin-dialog": "^2.0.0", @@ -123,7 +124,7 @@ }, "web": { "name": "@voicebox/web", - "version": "0.2.0", + "version": "0.4.1", "dependencies": { "@tanstack/react-query": "^5.0.0", "react": "^18.3.0", @@ -287,6 +288,8 @@ "@humanwhocodes/object-schema": ["@humanwhocodes/object-schema@2.0.3", "", {}, "sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA=="], + "@icons-pack/react-simple-icons": ["@icons-pack/react-simple-icons@13.13.0", "", { "peerDependencies": { "react": "^16.13 || ^17 || ^18 || ^19" } }, "sha512-B5HhQMIpcSH4z8IZ8HFhD59CboHceKYMpPC9kAwGyKntvPdyJJv26DLu4Z1wAjcCLyrJhf11tMhiQGom9Rxb9g=="], + "@img/colour": ["@img/colour@1.0.0", "", {}, "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw=="], "@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.2.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w=="], diff --git a/docs/PROJECT_STATUS.md b/docs/PROJECT_STATUS.md index d6a1daff..cb12ef2e 100644 --- a/docs/PROJECT_STATUS.md +++ b/docs/PROJECT_STATUS.md @@ -1,6 +1,6 @@ # Voicebox Project Status & Roadmap -> Last updated: 2026-04-18 | Current version: **v0.4.1** | ~155 open issues | 12 open PRs +> Last updated: 2026-04-18 | Current version: **v0.4.1** | 232 open issues | 12 open PRs --- @@ -224,6 +224,8 @@ POST /generate - **Blackwell (RTX 50-series) CUDA**: cu128 + sm_120 kernel support shipped (PR #401, #316), but users still report `cudaErrorNoKernelImageForDevice` (#417, #400, #396, #395, #390, #362) — likely a stale CUDA binary on upgraded installs. Needs a follow-up diagnostic / forced re-download path. - **Long text 50k character limit** (#464, #365, #354): Still hit on GPU despite chunking (PR #266). Chunking reliability needs another pass. - **ROCm on RDNA 3/4** (#469): `HSA_OVERRIDE_GFX_VERSION` is hardcoded and harms newer cards. +- **`flash-attn is not installed` warning on every platform (cosmetic, common user complaint)**: Our transformer-based engines (Chatterbox / Qwen) emit `Warning: flash-attn is not installed. Will only run the manual PyTorch version. Please install flash-attn for faster inference.` on every startup, on every platform — we don't pin `flash-attn` in requirements because installing it is fragile and version-sensitive. Fallback is PyTorch SDPA, which is near-FA2 throughput on Ampere+ and is what actually runs. **Per-platform reality:** (a) **macOS/Apple Silicon** — FlashAttention is CUDA-only, irrelevant here; MLX has its own attention kernels. (b) **Linux** — `pip install flash-attn --no-build-isolation` works but takes 20+ min to compile. (c) **Windows** — no official support (Dao-AILab README still says only "Might work"; source builds routinely fail on recent CUDA/MSVC, issues #1715, #1828, #2395). Windows users can install community prebuilt wheels from `kingbri1/flash-attention` or `bdashore3/flash-attention` (latest v2.8.3, Aug 2025; `win_amd64` wheels for CUDA 12.4/12.8, Torch 2.6–2.9, Python 3.10–3.13) matching their exact CUDA/Torch/Python, or use WSL2. **Native-Windows alternatives worth considering as a build-time swap:** SageAttention (thu-ml, Apache 2.0, claims 2–5× over FA2) and xformers (official Windows wheels). **Action for us:** troubleshooting doc now covers it (see `docs/content/docs/overview/troubleshooting.mdx`), and we should optionally suppress the warning via `logging.getLogger(...).setLevel(ERROR)` at backend import since the fallback is functionally fine. +- **WebAudio playback dies after audio-session interruption** (#41, plus an internal repro where the app is backgrounded long enough): WaveSurfer's `AudioContext` gets suspended by macOS — either because another app grabs the audio output, or because the WKWebView throttles when backgrounded. `play()` resolves and `timeupdate` can still fire, but no audio reaches the output. Only app restart fixes it. **Things already tried that didn't work:** (a) swapping WaveSurfer backend away from WebAudio — introduced more bugs, not an option; (b) remount hook on the player — doesn't help because a freshly-created `AudioContext` is born suspended and only resumes on a user gesture. PR #293 was a prior partial fix that doesn't cover this path. **Next thing to try** (not yet attempted — confirmed via grep of `AudioPlayer.tsx`): call `wavesurfer.getMediaElement().getGainNode().context.resume()` on the play button click (the click itself is a valid user gesture), plus a `visibilitychange` + `statechange` listener as belt-and-suspenders. The `ctx.resume()` pattern already exists in the codebase at `useStoryPlayback.ts:52` — just not wired into the main player. --- @@ -303,9 +305,15 @@ POST /generate Still reported. Users get stuck downloads, can't resume, offline mode edge cases. -**Key issues:** #475 (MAC CustomVoice install error), #449 (infinite loading macOS), #445 (can't download CustomVoice), #462 (Qwen requires internet even when loaded — regression from #150), #434 (infinite retry loop offline — PR #443 open), #432 (storage location change hangs when empty — partly fixed by PR #439/#433), #181, #180. +**Key issues:** #475 (MAC CustomVoice install error), #449 (infinite loading macOS), #445 (can't download CustomVoice), #462 (Qwen requires internet even when loaded — regression from #150), #434 (infinite retry loop offline — PR #443 open), #432 (storage location change hangs when empty — partly fixed by PR #439/#433), #348 (TADA 3B Multilingual download fails), #336 (TADA model not listed in app), #275 (`No module named 'chatterbox'` on download), #304 (whisper-base feature extractor load error), #287 (macOS ARM `check_model_inputs` ImportError on new version), #181, #180. -**Fix path:** PR #443 addresses infinite offline retry. CustomVoice-specific download failures (#475, #445) need triage — likely related to frozen-binary import fixes in PR #438. +**Fix path:** PR #443 addresses infinite offline retry. CustomVoice-specific download failures (#475, #445) need triage — likely related to frozen-binary import fixes in PR #438. TADA cluster (#336, #348) and macOS ARM import regressions (#287, #275, #304) need a dedicated triage pass. + +**Qwen 0.6B-downloads-1.7B reports:** **#485** (2026-04-19), **#423** (macOS M1), **#329**. Platform-dependent: + +- **On MLX (Apple Silicon) — not a bug.** `mlx-community` only publishes 1.7B-Base-bf16 weights, so the 0.6B Base option intentionally resolves to the same repo (`backend/backends/__init__.py:180` — `# 0.6B not available in MLX, falls back`). UX gap: the selector offers a size that doesn't exist on the active backend. Fix: (a) hide the 0.6B option on MLX, or (b) label it "0.6B (uses 1.7B on Apple Silicon)". +- **On PyTorch (Windows/Linux/CUDA/ROCm/XPU/CPU) — real bug if reported.** Both 0.6B and 1.7B have distinct repos (`Qwen/Qwen3-TTS-12Hz-0.6B-Base` vs `-1.7B-Base`). Triage each report by platform before merging into the MLX cluster. +- **Qwen CustomVoice (either platform)** — no fallback, both sizes always have dedicated repos. ### Language Requests (ongoing) @@ -364,6 +372,9 @@ Notable: - **#383** — Concatenate partial reference audio into generated audio - **#382** — Lightning.ai support - **#376** — Remote mode +- **#353** — Audio transcoding +- **#317** — Voice pitch control +- **#189** — "Auto" language option - **#173** — Vocal intonation/inflection control - **#165, #270** — Audiobook mode (PR #154 open) - **#242** — Seed value pinning @@ -371,17 +382,40 @@ Notable: - **#235** — Finetuned Qwen3-TTS tokenizer (PR #253 open) - **#144** — Copy text to clipboard +### Housekeeping / Triage Needed + +| Issue | Reason | +|-------|--------| +| **#431**, **#408** | Spam — Chinese "free Claude API" promos. Close. | +| **#398** ("Excelente") | Non-issue. Close. | +| **#357** | Informational — project featured in Awesome MLX. Close after acknowledgement. | +| **#374**, **#377** | Version-release questions, no bug. Close. | +| **#306** ("voice model"), **#389** ("New model"), **#473** ("New functionality") | Title-only issues, no content. Request details or close. | +| **#309** | Uninstall/cleanup question. Answer and close. | +| **#241** | "How to use in Colab" — support question, not a bug. | +| **#423** / **#485** / **#329** | Platform-dependent. On MLX: not a bug (0.6B weights don't exist upstream, fallback is intentional — fix UX). On PyTorch: real bug if reproducible. Classify each by reporter's platform before deduping. | +| **#336** / **#348** | TADA download/registration cluster — triage together. | +| **#287** / **#275** / **#304** | macOS ARM import regressions on new version — likely one root cause. | +| **#292**, **#349** | Possibly already fixed by merged PRs (#321/#412 and #345). Verify + close. | + +**~70 older issues (pre-#170) not individually categorized above.** Most are long-tail support questions or duplicates of problems now addressed by the multi-engine / model-registry work. A dedicated backlog-sweep pass is overdue. + ### Bugs (ongoing) | Category | Issues | |----------|--------| -| Generation failures | #476, #467, #452, #459 (voice clone fetch error), #468 (tada-1b marked error), #437, #282 | -| Audio quality | #456 (clipping errors v0.4.0), #436 (emotion labels), #333 (pitch/echo), #307 (by-model breakdown) | -| File ops | #477 (spacy_pkuseg dict missing on frozen Windows build), #472 (storage location change) | -| Windows | #466 (install problem), #273 (port 8000 conflict) | -| Linux | #471 (thread-safe PULSE_SOURCE), #413 (Arch build), #409 (Kubuntu build), #341 | -| macOS | #441 (older macOS), #369 (malware flag), #171 (ARM64 binary won't open) | -| Profile/UI | #360 (Kokoro profile hides others — partly addressed by auto-switch), #299 (drag-drop on Win11), #329 (size selector state bug) | +| Generation failures | #476, #467, #452, #459 (voice clone fetch error), #468 (tada-1b marked error), #437, #300, #301, #282 | +| Audio quality | #456 (clipping errors v0.4.0), #436 (emotion labels), #333 (pitch/echo), #307 (by-model breakdown), #340 (all generations say "www...") | +| Transcription | #371 (fails every time), #291 (extract transcription from generated audio) | +| Effects / presets | #349 ("Failed to save" when creating effects presets — possibly fixed by merged #345) | +| File ops | #477 (spacy_pkuseg dict missing on frozen Windows build), #472 (storage location change), #283 (allow longer files for voice creation + in-app trim), #350 (failed to add sample) | +| History | #292 (can't delete failed generations — possibly fixed by merged #321/#412) | +| Windows | #466 (install problem), #375 (WinError 5 access denied), #273 (port 8000 conflict), #201 (model doesn't stay loaded) | +| Linux | #471 (thread-safe PULSE_SOURCE), #413 (Arch build), #409 (Kubuntu build), #351, #341 | +| macOS | #441 (older macOS), #369 (malware flag), #334 (microphone permission), #287 (`check_model_inputs` ImportError — regression), #171 (ARM64 binary won't open) | +| Profile/UI | #360 (Kokoro profile hides others — partly addressed by auto-switch), #299 (drag-drop on Win11), #329 (size selector state bug), #393 (stuck loading screen after reinstall to new dir) | +| Integrations | #397 (SAMMI-bot 422 Unprocessable Entity) | +| Audio playback / session | **#41** (macOS: Voicebox goes silent after another app takes audio output; restart restores it) — see deep-dive below | | Database | #174 (sqlite3 IntegrityError) | --- diff --git a/docs/content/docs/TROUBLESHOOTING.md b/docs/content/docs/TROUBLESHOOTING.md deleted file mode 100644 index d9a5584e..00000000 --- a/docs/content/docs/TROUBLESHOOTING.md +++ /dev/null @@ -1,311 +0,0 @@ ---- -title: "Troubleshooting Guide" -description: "Common issues and solutions for Voicebox" ---- - -Common issues and solutions for Voicebox. - -## Installation Issues - -### macOS: "Voicebox cannot be opened because it is from an unidentified developer" - -**Solution:** -1. Right-click the `.dmg` file -2. Select "Open" -3. Click "Open" in the security dialog -4. Alternatively, go to System Settings → Privacy & Security → Allow Voicebox - -### Windows: "Windows protected your PC" - -**Solution:** -1. Click "More info" -2. Click "Run anyway" -3. Windows Defender may flag new software; this is normal for unsigned apps - -### Linux: AppImage won't run - -**Solution:** -```bash -chmod +x voicebox-*.AppImage -./voicebox-*.AppImage -``` - -## Runtime Issues - -### Server won't start - -**Symptoms:** App opens but shows "Server not connected" - -**Solutions:** -1. **Check Python installation** - ```bash - python --version # Should be 3.11+ - ``` - -2. **Check server binary exists** - - Look in `tauri/src-tauri/binaries/` for your platform - - Binary should match your system architecture - -3. **Check permissions** - ```bash - # macOS/Linux - chmod +x tauri/src-tauri/binaries/voicebox-server-* - ``` - -4. **Check logs** - - macOS: Open Console.app and search for "voicebox" - - Linux: Check `~/.local/share/voicebox/` for logs - - Windows: Check Event Viewer - -### "Model download failed" - -**Symptoms:** First generation fails with download error - -**Solutions:** -1. **Check internet connection** - - Models download from HuggingFace Hub (~2-4GB) - - First download may take several minutes - -2. **Check disk space** - - Models are cached in `~/.cache/huggingface/` - - Ensure at least 5GB free space - -3. **Manual download** (if automatic fails) - ```bash - pip install huggingface_hub - huggingface-cli download Qwen/Qwen3-TTS-12Hz-1.7B-Base - ``` - -### "Out of memory" errors - -**Symptoms:** Generation fails with CUDA/VRAM errors - -**Solutions:** -1. **Use smaller model** - - Switch to 0.6B model instead of 1.7B - - Settings → Model Management → Load 0.6B - -2. **Close other applications** - - Free up GPU memory - - Close browser tabs, other ML apps - -3. **Use CPU mode** - - Slower but works without GPU - - Backend automatically falls back to CPU - -### MLX "Failed to load the default metallib" error (Apple Silicon) - -**Symptoms:** Generation fails with "library not found" or "metallib" errors - -**Solutions:** -1. **Rebuild server binary** - ```bash - just build-server - ``` - The build script automatically includes MLX Metal shader libraries on Apple Silicon. - -2. **Check MLX installation** - ```bash - pip install -r backend/requirements-mlx.txt - ``` - -3. **Verify backend detection** - - Check server logs for "Backend: MLX" - - If showing "Backend: PYTORCH", MLX may not be installed correctly - -### Audio playback issues - -**Symptoms:** Generated audio won't play - -**Solutions:** -1. **Check audio format** - - Audio is saved as WAV files - - Ensure your system supports WAV playback - -2. **Try downloading audio** - - Right-click → Download - - Play in external player - -3. **Check browser permissions** (web version) - - Allow audio autoplay in browser settings - -### Slow generation - -**Symptoms:** Generation takes >30 seconds - -**Solutions:** -1. **Check backend type** (Apple Silicon) - - Check Settings → Server Status - - Should show "Backend: MLX" on Apple Silicon - - If showing "Backend: PYTORCH", install MLX: `pip install -r backend/requirements-mlx.txt` - - MLX provides 4-5x faster inference on Apple Silicon - -2. **Use GPU** (if available) - - Check Settings → Server Status - - Should show "GPU available: true" - - Apple Silicon: Should show "Metal (Apple Silicon via MLX)" - - Windows/Linux: Should show "CUDA" if GPU available - -3. **Enable caching** - - Voice prompts are cached automatically - - Second generation with same voice should be faster - -4. **Use smaller model** - - 0.6B model is faster than 1.7B - - Quality difference is minimal for most voices - -5. **Check system resources** - - Close other CPU/GPU intensive apps - - Ensure adequate RAM (8GB+ recommended) - -## API Issues - -### "Connection refused" when using API - -**Solutions:** -1. **Check server is running** - ```bash - curl http://localhost:17493/health - ``` - -2. **Check remote mode** - - If connecting remotely, ensure server is started with `--host 0.0.0.0` - - Check firewall settings - -3. **Check port availability** - - The current local app and dev workflow uses port 17493 by default - - Ensure no other service is using it - -### CORS errors in browser - -**Solutions:** -1. **Use desktop app** (recommended) - - Desktop app doesn't have CORS restrictions - -2. **Configure CORS** (for web deployment) - - Update `backend/main.py` CORS settings - - Add your domain to allowed origins - -## Update Issues - -### "Update check failed" - -**Solutions:** -1. **Check internet connection** - - Updates are fetched from GitHub releases - -2. **Check GitHub access** - - Ensure `github.com` is accessible - - Check firewall/proxy settings - -3. **Manual update** - - Download latest release from GitHub - - Install manually - -### "Invalid signature" error - -**Solutions:** -1. **Re-download installer** - - Signature may be corrupted - - Download fresh copy from GitHub - -2. **Check release integrity** - - Verify `.sig` file matches installer - - Report issue if signature is invalid - -## Data Issues - -### Profiles disappeared - -**Solutions:** -1. **Check data directory** - - macOS: `~/Library/Application Support/sh.voicebox.app/` - - Windows: `%APPDATA%/sh.voicebox.app/` - - Linux: `~/.config/sh.voicebox.app/` - -2. **Check database** - - Database: `data/voicebox.db` - - Ensure file exists and is readable - -3. **Restore from backup** - - Profiles can be exported/imported - - Check for backup files - -### "Database locked" error - -**Solutions:** -1. **Close other instances** - - Ensure only one Voicebox instance is running - -2. **Restart app** - - Close and reopen Voicebox - -3. **Check file permissions** - - Ensure database file is writable - - Check directory permissions - -## Development Issues - -### Build fails - -**Solutions:** -1. **Check Rust installation** - ```bash - rustc --version - rustup update - ``` - -2. **Check Tauri dependencies** - ```bash - cd tauri - bun install - ``` - -3. **Clean build** - ```bash - cd tauri/src-tauri - cargo clean - cd ../.. - just build - ``` - -### API client generation fails - -**Solutions:** -1. **Start backend server** - ```bash - just dev-backend - ``` - -2. **Check OpenAPI endpoint** - ```bash - curl http://localhost:17493/openapi.json - ``` - -3. **Regenerate client** - ```bash - just generate-api - ``` - -## Still Having Issues? - -1. **Check existing issues** - - Search GitHub issues for similar problems - - Check closed issues for solutions - -2. **Create new issue** - - Include: - - OS and version - - Voicebox version - - Steps to reproduce - - Error messages/logs - - Screenshots (if applicable) - -3. **Get help** - - Check documentation in `docs/` - - Review `backend/README.md` for API details - - See `CONTRIBUTING.md` for development help - ---- - -For more help, open an issue on [GitHub](https://github.com/jamiepine/voicebox/issues). diff --git a/docs/content/docs/overview/troubleshooting.mdx b/docs/content/docs/overview/troubleshooting.mdx index ff087955..59e759fb 100644 --- a/docs/content/docs/overview/troubleshooting.mdx +++ b/docs/content/docs/overview/troubleshooting.mdx @@ -29,6 +29,14 @@ Windows SmartScreen may warn that the app is unrecognized. This is expected for unsigned applications. We're working on code signing for future releases. +### Linux: AppImage Won't Run + +**Solution:** +```bash +chmod +x voicebox-*.AppImage +./voicebox-*.AppImage +``` + ## Server Issues ### Backend Server Won't Start @@ -85,6 +93,52 @@ Windows SmartScreen may warn that the app is unrecognized. +### `flash-attn is not installed` Warning in Server Logs + +**Symptoms:** +``` +Warning: flash-attn is not installed. Will only run the manual PyTorch version. +Please install flash-attn for faster inference. +``` + +**This is harmless.** The warning is emitted by our transformer-based engines (Chatterbox / Qwen) on every startup. FlashAttention is an optional acceleration library — when it's not present, PyTorch's built-in scaled-dot-product attention (SDPA) runs instead, which is near-FA2 throughput on modern GPUs. Generation works normally. + +**Why it shows up on every platform:** +- **Windows:** `flash-attn` has no official Windows support. The upstream project (Dao-AILab/flash-attention) still only says it *might* work, and source builds typically fail on recent CUDA/MSVC combinations. +- **macOS (Apple Silicon):** FlashAttention is CUDA-only and doesn't apply here at all. MLX has its own optimized attention kernels. +- **Linux:** It's not pinned in our requirements because installing it is fragile and version-sensitive; users who want it install it themselves. + +**Solutions (all optional):** + + + + PyTorch SDPA is what actually runs the model, and on Ampere/Ada/Hopper GPUs it's within a few percent of FA2 for our workloads. You won't notice a meaningful speed difference. + + + + ```bash + pip install flash-attn --no-build-isolation + ``` + + Requires a matching CUDA toolkit. Build can take 20+ minutes. + + + + Official builds don't exist, but community maintainers publish prebuilt wheels: + + - [kingbri1/flash-attention releases](https://github.com/kingbri1/flash-attention/releases) + - [bdashore3/flash-attention releases](https://github.com/bdashore3/flash-attention/releases) + + Pick the wheel matching your exact CUDA + PyTorch + Python combination. Example: + + ```bash + pip install https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.8.0cxx11abiFALSE-cp312-cp312-win_amd64.whl + ``` + + Alternatively, run Voicebox's backend inside WSL2 and use the standard Linux wheels. + + + ### Connection Timeout **Symptoms:** @@ -174,6 +228,34 @@ This is expected behavior. The first generation downloads the selected TTS engin +### MLX "Failed to load the default metallib" (Apple Silicon) + +**Symptoms:** +- Generation fails with "library not found" or "metallib" errors +- Server logs reference missing Metal shader libraries + +**Solutions:** + + + + ```bash + just build-server + ``` + + The build script bundles MLX Metal shader libraries on Apple Silicon automatically. + + + + ```bash + pip install -r backend/requirements-mlx.txt + ``` + + + + Check Settings → Server Status. Should show **Backend: MLX** on Apple Silicon. If it shows **Backend: PYTORCH**, MLX isn't installed correctly. + + + ## Audio Issues ### No Audio Playback @@ -357,7 +439,26 @@ Restart the app to create a fresh database. - Check your internet connection - Check HuggingFace Hub status - Try using a VPN if HuggingFace is blocked in your region -- Manually download and place in cache directory +- Manually download via the HuggingFace CLI and place in the cache directory: + + ```bash + pip install huggingface_hub + huggingface-cli download Qwen/Qwen3-TTS-12Hz-1.7B-Base + ``` + +### Qwen 0.6B Downloads the Same Files as 1.7B on Apple Silicon + +**Symptoms:** +- You select Qwen 0.6B on an Apple Silicon Mac and the download is the same size as 1.7B +- Generation speed and VRAM usage match 1.7B, not the expected smaller model + +**Explanation:** +This is intentional, not a bug. The MLX community only publishes `mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16` — there is no 0.6B MLX build. Voicebox's model registry falls back to the 1.7B weights when 0.6B is selected on MLX (see `backend/backends/__init__.py`). + +**Solution:** +- On Apple Silicon, both size options use the 1.7B model — pick either. +- If you specifically need a smaller model, switch to **Kokoro 82M** (~350 MB) or **LuxTTS** (~300 MB) — both CPU-realtime. +- On Windows/Linux with PyTorch, 0.6B and 1.7B are distinct repos and behave differently. ### Wrong Model Version @@ -404,6 +505,16 @@ rmdir /s %USERPROFILE%\.cache\huggingface\hub\models--Qwen* Outdated drivers can cause performance issues. Update to the latest NVIDIA drivers. + + + Check Settings → Server Status. Should show **Backend: MLX** on Apple Silicon — MLX is 4–5× faster than PyTorch here. If it shows **Backend: PYTORCH**, reinstall MLX: + + ```bash + pip install -r backend/requirements-mlx.txt + ``` + + GPU availability should read "Metal (Apple Silicon via MLX)". + ### High Memory Usage @@ -417,6 +528,21 @@ rmdir /s %USERPROFILE%\.cache\huggingface\hub\models--Qwen* - Clear generation history - Restart the app periodically +## Update Issues + +### "Update Check Failed" + +**Solutions:** +- Confirm your internet connection — updates are fetched from GitHub releases. +- Ensure `github.com` is accessible and not blocked by a firewall or proxy. +- As a fallback, download the latest release from GitHub and install manually. + +### "Invalid Signature" Error + +**Solutions:** +- Re-download the installer — the signature may have been corrupted in transit. +- Verify the `.sig` file matches the installer; if it doesn't, file an issue. + ## Remote Mode Issues ### Can't Connect to Remote Server @@ -482,5 +608,3 @@ python --version # GPU info (if generation issues) nvidia-smi # NVIDIA GPUs ``` - -For more detailed troubleshooting, see the [TROUBLESHOOTING.md](https://github.com/jamiepine/voicebox/blob/main/docs/TROUBLESHOOTING.md) file in the repository. diff --git a/landing/package.json b/landing/package.json index 372c8d97..6f0a752e 100644 --- a/landing/package.json +++ b/landing/package.json @@ -3,13 +3,14 @@ "version": "0.4.1", "description": "Landing page for voicebox.sh", "scripts": { - "dev": "bun --bun next dev --turbo", + "dev": "next dev --turbo", "build": "bun --bun next build", "start": "bun --bun next start", "lint": "next lint" }, "dependencies": { "@fontsource/space-grotesk": "^5.2.10", + "@icons-pack/react-simple-icons": "^13.13.0", "@radix-ui/react-separator": "^1.1.8", "@radix-ui/react-slot": "^1.2.4", "autoprefixer": "^10.4.17", diff --git a/landing/src/app/download/[platform]/route.ts b/landing/src/app/download/[platform]/route.ts index af76c315..b3b0b9b8 100644 --- a/landing/src/app/download/[platform]/route.ts +++ b/landing/src/app/download/[platform]/route.ts @@ -1,42 +1,31 @@ import { type NextRequest, NextResponse } from 'next/server'; -import { getLatestRelease } from '@/lib/releases'; export const dynamic = 'force-dynamic'; -const PLATFORM_MAP: Record< - string, - keyof Awaited>['downloadLinks'] -> = { +// Pretty URLs from README / docs (e.g. /download/mac-arm) are kept for +// compatibility, but we now always route through the /download page so users +// see context + a donate prompt + resources while the download kicks off. +// The page handles the actual file trigger itself — no more silent redirects +// to GitHub or direct asset URLs. +const PLATFORM_ALIAS: Record = { 'mac-arm': 'macArm', + macArm: 'macArm', 'mac-intel': 'macIntel', + macIntel: 'macIntel', windows: 'windows', - linux: 'linux', }; export async function GET( - _request: NextRequest, + request: NextRequest, { params }: { params: Promise<{ platform: string }> }, ) { const { platform } = await params; - const key = PLATFORM_MAP[platform]; - - if (!key) { - return NextResponse.json( - { error: `Unknown platform: ${platform}. Use: ${Object.keys(PLATFORM_MAP).join(', ')}` }, - { status: 404 }, - ); - } - - try { - const release = await getLatestRelease(); - const url = release.downloadLinks[key]; - - if (!url) { - return NextResponse.json({ error: `No download available for ${platform}` }, { status: 404 }); - } - - return NextResponse.redirect(url); - } catch { - return NextResponse.redirect(`https://github.com/jamiepine/voicebox/releases/latest`); + // No prebuilt Linux binary yet — send straight to the build-from-source page. + if (platform === 'linux') { + return NextResponse.redirect(new URL('/linux-install', request.url), 307); } + const normalized = PLATFORM_ALIAS[platform]; + const target = new URL('/download', request.url); + if (normalized) target.searchParams.set('platform', normalized); + return NextResponse.redirect(target, 307); } diff --git a/landing/src/app/download/page.tsx b/landing/src/app/download/page.tsx new file mode 100644 index 00000000..b1852ae8 --- /dev/null +++ b/landing/src/app/download/page.tsx @@ -0,0 +1,313 @@ +'use client'; + +import { + ArrowLeft, + Bot, + Coffee, + Download as DownloadIcon, + FileText, + Github, +} from 'lucide-react'; +import Image from 'next/image'; +import Link from 'next/link'; +import { useEffect, useMemo, useState } from 'react'; +import { AppleIcon, LinuxIcon, WindowsIcon } from '@/components/PlatformIcons'; +import { Button } from '@/components/ui/button'; +import { DONATE_URL, GITHUB_RELEASES_PAGE, GITHUB_REPO } from '@/lib/constants'; +import type { DownloadLinks } from '@/lib/releases'; + +type Platform = keyof DownloadLinks; + +type PlatformMeta = { + key: Platform; + label: string; + description: string; + icon: React.ComponentType<{ className?: string }>; +}; + +const PLATFORMS: PlatformMeta[] = [ + { key: 'macArm', label: 'macOS', description: 'Apple Silicon', icon: AppleIcon }, + { key: 'macIntel', label: 'macOS', description: 'Intel (x64)', icon: AppleIcon }, + { key: 'windows', label: 'Windows', description: '64-bit (MSI)', icon: WindowsIcon }, + { key: 'linux', label: 'Linux', description: 'Build from source', icon: LinuxIcon }, +]; + +function detectPlatform(): Platform | null { + if (typeof navigator === 'undefined') return null; + const ua = navigator.userAgent; + if (/Windows/i.test(ua)) return 'windows'; + if (/Linux/i.test(ua) && !/Android/i.test(ua)) return 'linux'; + if (/Mac/i.test(ua)) { + // Apple Silicon Safari reports "Intel" for compat; default to ARM since + // M-series is the majority. Users can click the Intel button if needed. + return 'macArm'; + } + return null; +} + +function parseQueryPlatform(search: string): Platform | null { + const params = new URLSearchParams(search); + const raw = params.get('platform'); + if (!raw) return null; + // Accept both camelCase and hyphenated forms (/download/mac-arm → ?platform=mac-arm). + const normalized = raw + .toLowerCase() + .replace(/[-_\s]/g, '') + .replace('macarm', 'macArm') + .replace('macintel', 'macIntel'); + const valid: Platform[] = ['macArm', 'macIntel', 'windows', 'linux']; + return (valid as string[]).includes(normalized) ? (normalized as Platform) : null; +} + +export default function DownloadPage() { + const [links, setLinks] = useState(null); + const [linksError, setLinksError] = useState(false); + const [platform, setPlatform] = useState(null); + const [triggered, setTriggered] = useState(false); + + useEffect(() => { + const fromQuery = parseQueryPlatform(window.location.search); + const resolved = fromQuery ?? detectPlatform(); + // No prebuilt Linux binary yet — send Linux users to the build-from-source + // instructions instead of sitting on /download trying to trigger a + // download that doesn't exist. + if (resolved === 'linux') { + window.location.replace('/linux-install'); + return; + } + setPlatform(resolved); + }, []); + + useEffect(() => { + let cancelled = false; + fetch('/api/releases') + .then((r) => { + if (!r.ok) throw new Error(`releases ${r.status}`); + return r.json(); + }) + .then((data) => { + if (cancelled) return; + if (data.downloadLinks) setLinks(data.downloadLinks as DownloadLinks); + }) + .catch(() => { + if (!cancelled) setLinksError(true); + }); + return () => { + cancelled = true; + }; + }, []); + + useEffect(() => { + if (triggered || !links || !platform) return; + const url = links[platform]; + if (!url) return; + + const a = document.createElement('a'); + a.href = url; + a.rel = 'noopener'; + a.style.display = 'none'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + setTriggered(true); + }, [triggered, links, platform]); + + const activeMeta = useMemo( + () => PLATFORMS.find((p) => p.key === platform) ?? null, + [platform], + ); + + return ( +
+ {/* Minimal branded header */} +
+
+ + Voicebox + Voicebox + + + + Back to voicebox.sh + +
+
+ +
+ {/* Hero */} +
+ Voicebox +
+ {triggered ? ( + <> +

+ Your download has started. +

+

+ {activeMeta + ? `Downloading Voicebox for ${activeMeta.label} (${activeMeta.description}). Check your downloads folder.` + : 'Check your downloads folder for Voicebox.'} +

+ + ) : ( + <> +

+ {linksError ? "We couldn't load the latest release." : 'Download Voicebox'} +

+

+ {linksError + ? 'Our release server is temporarily unreachable. Please try again in a moment.' + : 'Pick your platform to get started.'} +

+ + )} +
+
+ + {/* Platform buttons — always visible as a fallback */} + {linksError ? ( +
+

+ If this keeps happening, you can{' '} + + browse releases on GitHub + + {' '}and grab the build for your platform manually. +

+
+ ) : ( +
+

+ {triggered ? 'Download not working?' : 'Choose your platform'} +

+ +
+ )} + + {/* Donate — prominent, heartfelt, post-click context */} +
+
+
+
+ + + Hi from the maintainer + +
+

+ Jamie here — Voicebox is a side project. +

+

+ I build and maintain Voicebox in my spare time. It's completely + free, open source, runs entirely on your machine — no accounts, no + cloud, no subscriptions, no upsells. If it saves you an ElevenLabs + bill or just made your day, a coffee genuinely helps me keep + shipping updates, adding new models, and fixing bugs. Every little + bit keeps the lights on. +

+ +
+
+ + {/* Resources */} + +
+
+ ); +} diff --git a/landing/src/app/page.tsx b/landing/src/app/page.tsx index 2d34c37a..02831109 100644 --- a/landing/src/app/page.tsx +++ b/landing/src/app/page.tsx @@ -17,12 +17,9 @@ import {Navbar} from "@/components/Navbar"; import {AppleIcon, LinuxIcon, WindowsIcon} from "@/components/PlatformIcons"; import {TutorialsSection} from "@/components/TutorialsSection"; import {VoiceCreator} from "@/components/VoiceCreator"; -import {DOWNLOAD_LINKS, GITHUB_REPO} from "@/lib/constants"; -import type {DownloadLinks} from "@/lib/releases"; +import {GITHUB_REPO} from "@/lib/constants"; export default function Home() { - const [downloadLinks, setDownloadLinks] = - useState(DOWNLOAD_LINKS); const [version, setVersion] = useState(null); const [totalDownloads, setTotalDownloads] = useState(null); @@ -33,7 +30,6 @@ export default function Home() { return res.json(); }) .then((data) => { - if (data.downloadLinks) setDownloadLinks(data.downloadLinks); if (data.version) setVersion(data.version); if (data.totalDownloads != null) setTotalDownloads(data.totalDownloads); }) @@ -92,7 +88,7 @@ export default function Home() { style={{animationDelay: "300ms"}} > Download @@ -403,8 +399,7 @@ export default function Home() {
{/* macOS ARM */} @@ -418,8 +413,7 @@ export default function Home() { {/* macOS Intel */} @@ -431,8 +425,7 @@ export default function Home() { {/* Windows */} diff --git a/landing/src/components/ApiSection.tsx b/landing/src/components/ApiSection.tsx index 55c5216d..51e784f9 100644 --- a/landing/src/components/ApiSection.tsx +++ b/landing/src/components/ApiSection.tsx @@ -29,8 +29,8 @@ const CURL_SNIPPET = `curl -X POST http://127.0.0.1:17493/generate \\ -H "Content-Type: application/json" \\ -d '{ "text": "Welcome to the game, player one.", - "profile_id": "morgan-freeman", - "engine": "qwen", + "profile_id": "b3f1c2d4-5e6f-4a7b-8c9d-0e1f2a3b4c5d", + "engine": "qwen_custom_voice", "instruct": "warm, slow, cinematic" }' \\ --output line.wav`; diff --git a/landing/src/components/Footer.tsx b/landing/src/components/Footer.tsx index de3d6031..da7b6b75 100644 --- a/landing/src/components/Footer.tsx +++ b/landing/src/components/Footer.tsx @@ -45,7 +45,7 @@ export function Footer() {
  • - + Download
  • diff --git a/landing/src/components/Navbar.tsx b/landing/src/components/Navbar.tsx index c9e064dc..e0c44099 100644 --- a/landing/src/components/Navbar.tsx +++ b/landing/src/components/Navbar.tsx @@ -66,7 +66,7 @@ export function Navbar() { API Download diff --git a/landing/src/components/PlatformIcons.tsx b/landing/src/components/PlatformIcons.tsx index 5fa37a1d..51b321a7 100644 --- a/landing/src/components/PlatformIcons.tsx +++ b/landing/src/components/PlatformIcons.tsx @@ -1,23 +1,29 @@ +import { SiApple, SiLinux } from '@icons-pack/react-simple-icons'; + +// Official brand icons via Simple Icons (apple/linux). Simple Icons drops +// Microsoft's mark due to trademark policy, so the Windows 11 flag is +// inlined from Microsoft's public brand guidance. + export function AppleIcon({ className }: { className?: string }) { - return ( - - - - ); + return ; } -export function WindowsIcon({ className }: { className?: string }) { - return ( - - - - ); +export function LinuxIcon({ className }: { className?: string }) { + return ; } -export function LinuxIcon({ className }: { className?: string }) { +export function WindowsIcon({ className }: { className?: string }) { return ( - - + + Windows + ); }