From ca6ed0998a7e5a34d7aeab9be7b0a032bee2215f Mon Sep 17 00:00:00 2001
From: Abraham <bob.haddad.33@gmail.com>
Date: Wed, 18 Feb 2026 09:41:44 -0800
Subject: [PATCH] Fix model size selection ignored when generating speech

The /generate endpoint created the voice prompt before loading the
user's requested model size. Since create_voice_prompt() internally
calls load_model_async(None), it fell back to the hardcoded default
of "1.7B", causing the 1.7B model to be downloaded even when the
user explicitly selected 0.6B.

This reorders the operations so the requested model is loaded first,
ensuring create_voice_prompt() and generate() use the correct model.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/main.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/backend/main.py b/backend/main.py
index 59fb9e18..94810c1e 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -542,12 +542,6 @@ async def generate_speech(
         if not profile:
             raise HTTPException(status_code=404, detail="Profile not found")
         
-        # Create voice prompt from profile
-        voice_prompt = await profiles.create_voice_prompt_for_profile(
-            data.profile_id,
-            db,
-        )
-        
         # Generate audio
         tts_model = tts.get_tts_model()
         # Load the requested model size if different from current (async to not block)
@@ -582,7 +576,16 @@ async def download_model_background():
                     }
                 )
 
+        # Load the requested model BEFORE creating voice prompt,
+        # so create_voice_prompt uses the correct model size
         await tts_model.load_model_async(model_size)
+
+        # Create voice prompt from profile
+        voice_prompt = await profiles.create_voice_prompt_for_profile(
+            data.profile_id,
+            db,
+        )
+
         audio, sample_rate = await tts_model.generate(
             data.text,
             voice_prompt,