-
Notifications
You must be signed in to change notification settings - Fork 30
rootflo_llm -> lazy fetching llm config #166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
64f65c9
eb942e1
ff32552
c7c4fb5
4b0910e
560c1bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,10 +4,12 @@ | |
| from flo_ai.models.chat_message import ImageMessageContent | ||
| import jwt | ||
| import httpx | ||
| import asyncio | ||
| from .base_llm import BaseLLM | ||
| from .openai_llm import OpenAI | ||
| from .gemini_llm import Gemini | ||
| from .anthropic_llm import Anthropic | ||
| from .openai_vllm import OpenAIVLLM | ||
| from flo_ai.tool.base_tool import Tool | ||
|
|
||
|
|
||
|
|
@@ -17,12 +19,13 @@ class LLMProvider(Enum): | |
| OPENAI = 'openai' | ||
| GEMINI = 'gemini' | ||
| ANTHROPIC = 'anthropic' | ||
| VLLM = 'vllm' | ||
|
|
||
|
|
||
| class RootFloLLM(BaseLLM): | ||
| """ | ||
| Proxy LLM class that routes to different SDK implementations based on type. | ||
| Acts as a unified interface to OpenAI, Gemini, and Anthropic SDKs via a proxy URL. | ||
| Acts as a unified interface to OpenAI, Gemini, Anthropic SDKs and VLLM via a proxy URL. | ||
| """ | ||
|
|
||
| def __init__( | ||
|
|
@@ -50,9 +53,12 @@ def __init__( | |
| access_token: Optional pre-generated access token (if provided, skips JWT generation) | ||
| temperature: Temperature parameter for generation | ||
| **kwargs: Additional parameters to pass to the underlying SDK | ||
|
|
||
| Note: | ||
| The actual LLM configuration is fetched lazily on first use (generate/stream) | ||
| to avoid blocking HTTP calls during initialization. | ||
| """ | ||
| # Validate required parameters | ||
|
|
||
| if not model_id: | ||
| raise ValueError('model_id is required') | ||
|
|
||
|
|
@@ -80,88 +86,41 @@ def __init__( | |
| if not app_key: | ||
| raise ValueError('app_key is required even when using access_token') | ||
|
|
||
| # Use provided access_token or generate JWT token | ||
| if access_token: | ||
| api_token = access_token | ||
| else: | ||
| now = datetime.now() | ||
| payload = { | ||
| 'iss': issuer, | ||
| 'aud': audience, | ||
| 'iat': int(now.timestamp()), | ||
| 'exp': int((now + timedelta(seconds=3600)).timestamp()), | ||
| 'role_id': 'floconsole-service', | ||
| 'user_id': 'service', | ||
| 'service_auth': True, | ||
| } | ||
| service_token = jwt.encode(payload, app_secret, algorithm='HS256') | ||
| api_token = f'fc_{service_token}' | ||
|
|
||
| # Fetch LLM configuration from API | ||
| config = self._fetch_llm_config(base_url, model_id, api_token, app_key) | ||
| llm_model = config['llm_model'] | ||
| llm_type = config['type'] | ||
|
|
||
| # Map type string to LLMProvider enum | ||
| try: | ||
| llm_provider = LLMProvider(llm_type.lower()) | ||
| except ValueError: | ||
| raise ValueError( | ||
| f'Unsupported LLM provider type from API: {llm_type}. ' | ||
| f'Supported types: {[p.value for p in LLMProvider]}' | ||
| ) | ||
| # Store initialization parameters for lazy initialization | ||
| self._base_url = base_url | ||
| self._model_id = model_id | ||
| self._app_key = app_key | ||
| self._app_secret = app_secret | ||
| self._issuer = issuer | ||
| self._audience = audience | ||
| self._access_token = access_token | ||
| self._temperature = temperature | ||
| self._kwargs = kwargs | ||
|
|
||
| # Lazy initialization state | ||
| self._llm = None | ||
| self._initialized = False | ||
| self._init_lock = asyncio.Lock() | ||
|
|
||
|
Comment on lines
+89
to
+104
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chainFix race in lazy initialization lock creation The current double-checked locking pattern can create two different Given this lock is lightweight, it’s simpler and safer to create a single instance in Consider a change along these lines: @@
- # Lazy initialization state
- self._llm = None
- self._initialized = False
- self._init_lock = None
+ # Lazy initialization state
+ self._llm = None
+ self._initialized = False
+ # Single lock instance to protect one-time async initialization
+ self._init_lock = asyncio.Lock()
@@
- # Lazily create the lock in an async context
- if self._init_lock is None:
- self._init_lock = asyncio.Lock()
-
- # Acquire lock for initialization
- async with self._init_lock:
+ # Acquire lock for initialization
+ async with self._init_lock:
# Double-check: another task might have initialized while we waited
if self._initialized:
returnPlease verify this matches your supported Python/asyncio versions’ expectations for creating To double-check the lock semantics for your runtime, you can consult the asyncio docs: Also applies to: 173-188 🌐 Web query: 💡 Result: Short answer: No for older Pythons — yes (safe) on modern Python, but prefer lazy creation. Details:
Sources:
(References: [1] asyncio policy/event loop docs; [2] asyncio sync docs; [3] StackOverflow; [6] Python issue/changes.) I need to determine the target Python version for this codebase to verify whether the proposed fix is safe. 🏁 Script executed: #!/bin/bash
# Search for Python version specifications in common config files
fd -e 'pyproject.toml' -e 'setup.py' -e 'setup.cfg' -e '.python-version' -e 'tox.ini' -e 'README.md' | head -20Length of output: 0 🏁 Script executed: #!/bin/bash
# Check pyproject.toml if it exists for Python version
if [ -f pyproject.toml ]; then
echo "=== pyproject.toml ===";
cat pyproject.toml | grep -A5 -B5 -i "python"
fiLength of output: 0 🏁 Script executed: #!/bin/bash
# Check setup.py if it exists
if [ -f setup.py ]; then
echo "=== setup.py ===";
cat setup.py | grep -i "python_requires\|version"
fiLength of output: 0 🏁 Script executed: #!/bin/bash
# Check for .python-version or similar
if [ -f .python-version ]; then
echo "=== .python-version ===";
cat .python-version
fiLength of output: 0 🏁 Script executed: #!/bin/bash
# List root directory to see structure
ls -la | head -30Length of output: 1094 🏁 Script executed: #!/bin/bash
# Broader search for config files anywhere in repo
fd -i 'setup|pyproject|requirements|tox' --type f | head -20Length of output: 65 🏁 Script executed: #!/bin/bash
# Check the actual rootflo_llm.py file for imports and context
cat flo_ai/flo_ai/llm/rootflo_llm.py | head -120Length of output: 3902 🏁 Script executed: #!/bin/bash
# Check flo_ai/pyproject.toml for Python version
echo "=== flo_ai/pyproject.toml ===" && cat flo_ai/pyproject.tomlLength of output: 1831 🏁 Script executed: #!/bin/bash
# Check flo_ai/setup.py for Python version
echo "=== flo_ai/setup.py ===" && cat flo_ai/setup.pyLength of output: 651 🏁 Script executed: #!/bin/bash
# Search for the helper methods mentioned in the review
rg -n 'format_tool_for_llm|format_tools_for_llm|format_image_in_message' flo_ai/flo_ai/llm/rootflo_llm.pyLength of output: 405 🏁 Script executed: #!/bin/bash
# Get full view of the _ensure_initialized method and surrounding code
sed -n '170,220p' flo_ai/flo_ai/llm/rootflo_llm.pyLength of output: 1962 Fix race condition in lazy initialization lock and add guards to helper methods The double-checked locking pattern at lines 182-184 has a race window: two concurrent coroutines can each see Additionally, Suggested approach:
- self._init_lock = None
+ # Single lock instance to protect one-time async initialization
+ self._init_lock = asyncio.Lock()Then remove the lazy lock creation from
def format_tool_for_llm(self, tool: 'Tool') -> Dict[str, Any]:
if not self._initialized:
raise RuntimeError('LLM not yet initialized; call generate/stream first')
return self._llm.format_tool_for_llm(tool)Apply the same guard pattern to 🤖 Prompt for AI Agents
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please fix There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @vizsatiz, I'll provide the fixes for both issues: Fix 1: Create lock in
|
||
| # Will be set during initialization | ||
| self.base_url = base_url | ||
| self.model_id = model_id | ||
| self.llm_provider = None | ||
|
|
||
| # Call parent __init__ with minimal parameters | ||
| # Actual model will be set during lazy initialization | ||
| super().__init__( | ||
| model=llm_model, api_key=api_token, temperature=temperature, **kwargs | ||
| model='', | ||
| api_key='', | ||
| temperature=temperature, | ||
| **kwargs, | ||
| ) | ||
|
|
||
| self.base_url = base_url | ||
| self.model_id = model_id | ||
| self.llm_provider = llm_provider | ||
|
|
||
| # Construct full URL for LLM inference | ||
| full_url = f'{base_url}/v1/llm-inference/{model_id}' | ||
|
|
||
| # Prepare custom headers for proxy authentication | ||
| custom_headers = {'X-Rootflo-Key': app_key} | ||
|
|
||
| # Instantiate appropriate SDK wrapper based on llm_provider | ||
| if llm_provider == LLMProvider.OPENAI: | ||
| self._llm = OpenAI( | ||
| model=llm_model, | ||
| base_url=full_url, | ||
| api_key=api_token, | ||
| temperature=temperature, | ||
| custom_headers=custom_headers, | ||
| **kwargs, | ||
| ) | ||
| elif llm_provider == LLMProvider.ANTHROPIC: | ||
| self._llm = Anthropic( | ||
| model=llm_model, | ||
| base_url=full_url, | ||
| api_key=api_token, | ||
| temperature=temperature, | ||
| custom_headers=custom_headers, | ||
| **kwargs, | ||
| ) | ||
| elif llm_provider == LLMProvider.GEMINI: | ||
| # Gemini SDK - pass base_url which will be handled via http_options | ||
| self._llm = Gemini( | ||
| model=llm_model, | ||
| api_key=api_token, | ||
| temperature=temperature, | ||
| base_url=full_url, | ||
| custom_headers=custom_headers, | ||
| **kwargs, | ||
| ) | ||
| else: | ||
| raise ValueError(f'Unsupported LLM provider: {llm_provider}') | ||
|
|
||
| def _fetch_llm_config( | ||
| async def _fetch_llm_config_async( | ||
| self, base_url: str, model_id: str, api_token: str, app_key: str | ||
| ) -> Dict[str, Any]: | ||
| """ | ||
| Fetch LLM configuration from the API endpoint. | ||
| Fetch LLM configuration from the API endpoint asynchronously. | ||
|
|
||
| Args: | ||
| base_url: The base URL of the API server | ||
|
|
@@ -182,8 +141,8 @@ def _fetch_llm_config( | |
| } | ||
|
|
||
| try: | ||
| with httpx.Client() as client: | ||
| response = client.get(config_url, headers=headers, timeout=30.0) | ||
| async with httpx.AsyncClient() as client: | ||
| response = await client.get(config_url, headers=headers, timeout=30.0) | ||
| response.raise_for_status() | ||
|
|
||
| data = response.json() | ||
|
|
@@ -211,6 +170,110 @@ def _fetch_llm_config( | |
| except Exception as e: | ||
| raise Exception(f'Failed to fetch LLM config: {str(e)}') from e | ||
|
|
||
| async def _ensure_initialized(self): | ||
| """ | ||
| Ensure the LLM is initialized by fetching config on first use. | ||
| Uses double-checked locking to prevent race conditions in concurrent scenarios. | ||
| """ | ||
| # Fast path: already initialized | ||
| if self._initialized: | ||
| return | ||
|
|
||
| # Acquire lock for initialization | ||
| async with self._init_lock: | ||
| # Double-check: another task might have initialized while we waited | ||
| if self._initialized: | ||
| return | ||
|
|
||
| # Generate or use provided access token | ||
| if self._access_token: | ||
| api_token = self._access_token | ||
| else: | ||
| now = datetime.now() | ||
| payload = { | ||
| 'iss': self._issuer, | ||
| 'aud': self._audience, | ||
| 'iat': int(now.timestamp()), | ||
| 'exp': int((now + timedelta(seconds=3600)).timestamp()), | ||
| 'role_id': 'floconsole-service', | ||
| 'user_id': 'service', | ||
| 'service_auth': True, | ||
| } | ||
| service_token = jwt.encode(payload, self._app_secret, algorithm='HS256') | ||
| api_token = f'fc_{service_token}' | ||
|
|
||
| # Fetch LLM configuration from API | ||
| config = await self._fetch_llm_config_async( | ||
| self._base_url, self._model_id, api_token, self._app_key | ||
| ) | ||
| llm_model = config['llm_model'] | ||
| llm_type = config['type'] | ||
|
|
||
| # Map type string to LLMProvider enum | ||
| try: | ||
| llm_provider = LLMProvider(llm_type.lower()) | ||
| except ValueError: | ||
| raise ValueError( | ||
| f'Unsupported LLM provider type from API: {llm_type}. ' | ||
| f'Supported types: {[p.value for p in LLMProvider]}' | ||
| ) | ||
|
|
||
| # Update instance attributes | ||
| self.llm_provider = llm_provider | ||
| self.model = llm_model | ||
| self.api_key = api_token | ||
|
|
||
| # Construct full URL for LLM inference | ||
| full_url = f'{self._base_url}/v1/llm-inference/{self._model_id}' | ||
|
|
||
| # Prepare custom headers for proxy authentication | ||
| custom_headers = {'X-Rootflo-Key': self._app_key} | ||
|
|
||
| # Instantiate appropriate SDK wrapper based on llm_provider | ||
| if llm_provider == LLMProvider.OPENAI: | ||
| self._llm = OpenAI( | ||
| model=llm_model, | ||
| base_url=full_url, | ||
| api_key=api_token, | ||
| temperature=self._temperature, | ||
| custom_headers=custom_headers, | ||
| **self._kwargs, | ||
| ) | ||
| elif llm_provider == LLMProvider.ANTHROPIC: | ||
| self._llm = Anthropic( | ||
| model=llm_model, | ||
| base_url=full_url, | ||
| api_key=api_token, | ||
| temperature=self._temperature, | ||
| custom_headers=custom_headers, | ||
| **self._kwargs, | ||
| ) | ||
| elif llm_provider == LLMProvider.GEMINI: | ||
| # Gemini SDK - pass base_url which will be handled via http_options | ||
| self._llm = Gemini( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add support for vLLM and Ollama |
||
| model=llm_model, | ||
| api_key=api_token, | ||
| temperature=self._temperature, | ||
| base_url=full_url, | ||
| custom_headers=custom_headers, | ||
| **self._kwargs, | ||
| ) | ||
| elif llm_provider == LLMProvider.VLLM: | ||
| # vLLM via OpenAI-compatible API | ||
| self._llm = OpenAIVLLM( | ||
| model=llm_model, | ||
| base_url=full_url, | ||
| api_key=api_token, | ||
| temperature=self._temperature, | ||
| custom_headers=custom_headers, | ||
| **self._kwargs, | ||
| ) | ||
| else: | ||
| raise ValueError(f'Unsupported LLM provider: {llm_provider}') | ||
|
|
||
| # Mark as initialized | ||
| self._initialized = True | ||
|
|
||
| async def generate( | ||
| self, | ||
| messages: List[Dict[str, str]], | ||
|
|
@@ -219,6 +282,7 @@ async def generate( | |
| **kwargs, | ||
| ) -> Dict[str, Any]: | ||
| """Generate a response from the LLM""" | ||
| await self._ensure_initialized() | ||
| return await self._llm.generate( | ||
| messages, functions=functions, output_schema=output_schema, **kwargs | ||
| ) | ||
|
|
@@ -230,11 +294,16 @@ async def stream( | |
| **kwargs: Any, | ||
| ) -> AsyncIterator[Dict[str, Any]]: | ||
| """Generate a streaming response from the LLM""" | ||
| await self._ensure_initialized() | ||
| async for chunk in self._llm.stream(messages, functions=functions, **kwargs): | ||
| yield chunk | ||
|
|
||
| def get_message_content(self, response: Any) -> str: | ||
| """Extract message content from response""" | ||
| if not getattr(self, '_initialized', False) or self._llm is None: | ||
| raise RuntimeError( | ||
| 'RootFloLLM is not initialized yet; call generate() or stream() first.' | ||
| ) | ||
| return self._llm.get_message_content(response) | ||
|
|
||
| def format_tool_for_llm(self, tool: 'Tool') -> Dict[str, Any]: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.