From 64f65c91ea325a7108885920e0e64622750b9ae9 Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Fri, 21 Nov 2025 18:16:00 +0530 Subject: [PATCH 1/9] rootflo_llm -> lazy fetching llm config - the fetch_llm_config call was synchronous, and if the server calling flo-ai and base_url are same, that was causing a deadlock. --- flo_ai/flo_ai/llm/rootflo_llm.py | 209 +++++++++++++++++++------------ 1 file changed, 131 insertions(+), 78 deletions(-) diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py index 9d59c38a..4cfe5604 100644 --- a/flo_ai/flo_ai/llm/rootflo_llm.py +++ b/flo_ai/flo_ai/llm/rootflo_llm.py @@ -4,6 +4,7 @@ from flo_ai.models.chat_message import ImageMessageContent import jwt import httpx +import asyncio from .base_llm import BaseLLM from .openai_llm import OpenAI from .gemini_llm import Gemini @@ -50,9 +51,12 @@ def __init__( access_token: Optional pre-generated access token (if provided, skips JWT generation) temperature: Temperature parameter for generation **kwargs: Additional parameters to pass to the underlying SDK + + Note: + The actual LLM configuration is fetched lazily on first use (generate/stream) + to avoid blocking HTTP calls during initialization. """ # Validate required parameters - if not model_id: raise ValueError('model_id is required') @@ -80,88 +84,41 @@ def __init__( if not app_key: raise ValueError('app_key is required even when using access_token') - # Use provided access_token or generate JWT token - if access_token: - api_token = access_token - else: - now = datetime.now() - payload = { - 'iss': issuer, - 'aud': audience, - 'iat': int(now.timestamp()), - 'exp': int((now + timedelta(seconds=3600)).timestamp()), - 'role_id': 'floconsole-service', - 'user_id': 'service', - 'service_auth': True, - } - service_token = jwt.encode(payload, app_secret, algorithm='HS256') - api_token = f'fc_{service_token}' - - # Fetch LLM configuration from API - config = self._fetch_llm_config(base_url, model_id, api_token, app_key) - llm_model = config['llm_model'] - llm_type = config['type'] - - # Map type string to LLMProvider enum - try: - llm_provider = LLMProvider(llm_type.lower()) - except ValueError: - raise ValueError( - f'Unsupported LLM provider type from API: {llm_type}. ' - f'Supported types: {[p.value for p in LLMProvider]}' - ) + # Store initialization parameters for lazy initialization + self._base_url = base_url + self._model_id = model_id + self._app_key = app_key + self._app_secret = app_secret + self._issuer = issuer + self._audience = audience + self._access_token = access_token + self._temperature = temperature + self._kwargs = kwargs + + # Lazy initialization state + self._llm = None + self._initialized = False + self._init_lock = asyncio.Lock() + + # Will be set during initialization + self.base_url = base_url + self.model_id = model_id + self.llm_provider = None + # Call parent __init__ with minimal parameters + # Actual model will be set during lazy initialization super().__init__( - model=llm_model, api_key=api_token, temperature=temperature, **kwargs + model='placeholder', # Will be updated during lazy init + api_key='placeholder', # Will be updated during lazy init + temperature=temperature, + **kwargs, ) - self.base_url = base_url - self.model_id = model_id - self.llm_provider = llm_provider - - # Construct full URL for LLM inference - full_url = f'{base_url}/v1/llm-inference/{model_id}' - - # Prepare custom headers for proxy authentication - custom_headers = {'X-Rootflo-Key': app_key} - - # Instantiate appropriate SDK wrapper based on llm_provider - if llm_provider == LLMProvider.OPENAI: - self._llm = OpenAI( - model=llm_model, - base_url=full_url, - api_key=api_token, - temperature=temperature, - custom_headers=custom_headers, - **kwargs, - ) - elif llm_provider == LLMProvider.ANTHROPIC: - self._llm = Anthropic( - model=llm_model, - base_url=full_url, - api_key=api_token, - temperature=temperature, - custom_headers=custom_headers, - **kwargs, - ) - elif llm_provider == LLMProvider.GEMINI: - # Gemini SDK - pass base_url which will be handled via http_options - self._llm = Gemini( - model=llm_model, - api_key=api_token, - temperature=temperature, - base_url=full_url, - custom_headers=custom_headers, - **kwargs, - ) - else: - raise ValueError(f'Unsupported LLM provider: {llm_provider}') - - def _fetch_llm_config( + async def _fetch_llm_config_async( self, base_url: str, model_id: str, api_token: str, app_key: str ) -> Dict[str, Any]: """ - Fetch LLM configuration from the API endpoint. + Fetch LLM configuration from the API endpoint asynchronously. Args: base_url: The base URL of the API server @@ -182,8 +139,8 @@ def _fetch_llm_config( } try: - with httpx.Client() as client: - response = client.get(config_url, headers=headers, timeout=30.0) + async with httpx.AsyncClient() as client: + response = await client.get(config_url, headers=headers, timeout=30.0) response.raise_for_status() data = response.json() @@ -211,6 +168,100 @@ def _fetch_llm_config( except Exception as e: raise Exception(f'Failed to fetch LLM config: {str(e)}') from e + async def _ensure_initialized(self): + """ + Ensure the LLM is initialized by fetching config on first use. + Uses double-checked locking to prevent race conditions in concurrent scenarios. + """ + # Fast path: already initialized + if self._initialized: + return + + # Acquire lock for initialization + async with self._init_lock: + # Double-check: another task might have initialized while we waited + if self._initialized: + return + + # Generate or use provided access token + if self._access_token: + api_token = self._access_token + else: + now = datetime.now() + payload = { + 'iss': self._issuer, + 'aud': self._audience, + 'iat': int(now.timestamp()), + 'exp': int((now + timedelta(seconds=3600)).timestamp()), + 'role_id': 'floconsole-service', + 'user_id': 'service', + 'service_auth': True, + } + service_token = jwt.encode(payload, self._app_secret, algorithm='HS256') + api_token = f'fc_{service_token}' + + # Fetch LLM configuration from API + config = await self._fetch_llm_config_async( + self._base_url, self._model_id, api_token, self._app_key + ) + llm_model = config['llm_model'] + llm_type = config['type'] + + # Map type string to LLMProvider enum + try: + llm_provider = LLMProvider(llm_type.lower()) + except ValueError: + raise ValueError( + f'Unsupported LLM provider type from API: {llm_type}. ' + f'Supported types: {[p.value for p in LLMProvider]}' + ) + + # Update instance attributes + self.llm_provider = llm_provider + self.model = llm_model + self.api_key = api_token + + # Construct full URL for LLM inference + full_url = f'{self._base_url}/v1/llm-inference/{self._model_id}' + + # Prepare custom headers for proxy authentication + custom_headers = {'X-Rootflo-Key': self._app_key} + + # Instantiate appropriate SDK wrapper based on llm_provider + if llm_provider == LLMProvider.OPENAI: + self._llm = OpenAI( + model=llm_model, + base_url=full_url, + api_key=api_token, + temperature=self._temperature, + custom_headers=custom_headers, + **self._kwargs, + ) + elif llm_provider == LLMProvider.ANTHROPIC: + self._llm = Anthropic( + model=llm_model, + base_url=full_url, + api_key=api_token, + temperature=self._temperature, + custom_headers=custom_headers, + **self._kwargs, + ) + elif llm_provider == LLMProvider.GEMINI: + # Gemini SDK - pass base_url which will be handled via http_options + self._llm = Gemini( + model=llm_model, + api_key=api_token, + temperature=self._temperature, + base_url=full_url, + custom_headers=custom_headers, + **self._kwargs, + ) + else: + raise ValueError(f'Unsupported LLM provider: {llm_provider}') + + # Mark as initialized + self._initialized = True + async def generate( self, messages: List[Dict[str, str]], @@ -219,6 +270,7 @@ async def generate( **kwargs, ) -> Dict[str, Any]: """Generate a response from the LLM""" + await self._ensure_initialized() return await self._llm.generate( messages, functions=functions, output_schema=output_schema, **kwargs ) @@ -230,6 +282,7 @@ async def stream( **kwargs: Any, ) -> AsyncIterator[Dict[str, Any]]: """Generate a streaming response from the LLM""" + await self._ensure_initialized() async for chunk in self._llm.stream(messages, functions=functions, **kwargs): yield chunk From eb942e1761010658a6d22335fc3f98ae3c5adead Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Sat, 22 Nov 2025 15:15:12 +0530 Subject: [PATCH 2/9] llm_factory -> added openai_vllm --- flo_ai/flo_ai/helpers/llm_factory.py | 41 ++++++++++++++++++++++++++++ flo_ai/flo_ai/llm/rootflo_llm.py | 4 +-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/flo_ai/flo_ai/helpers/llm_factory.py b/flo_ai/flo_ai/helpers/llm_factory.py index 27891d3d..3328f17b 100644 --- a/flo_ai/flo_ai/helpers/llm_factory.py +++ b/flo_ai/flo_ai/helpers/llm_factory.py @@ -22,6 +22,7 @@ class LLMFactory: 'ollama', 'vertexai', 'rootflo', + 'openai_vllm', } @staticmethod @@ -63,6 +64,14 @@ def create_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM': ... {'provider': 'rootflo', 'model_id': 'model-123'}, ... app_key='key', app_secret='secret', issuer='iss', audience='aud' ... ) + + >>> # OpenAI vLLM with base_url + >>> llm = LLMFactory.create_llm({ + ... 'provider': 'openai_vllm', + ... 'name': 'microsoft/phi-4', + ... 'base_url': 'http://localhost:8000/v1', + ... 'api_key': 'vllm-key' + ... }) """ provider = model_config.get('provider', 'openai').lower() @@ -76,6 +85,8 @@ def create_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM': return LLMFactory._create_rootflo_llm(model_config, **kwargs) elif provider == 'vertexai': return LLMFactory._create_vertexai_llm(model_config, **kwargs) + elif provider == 'openai_vllm': + return LLMFactory._create_openai_vllm_llm(model_config, **kwargs) else: return LLMFactory._create_standard_llm(provider, model_config, **kwargs) @@ -134,6 +145,36 @@ def _create_vertexai_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM': base_url=base_url, ) + @staticmethod + def _create_openai_vllm_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM': + """Create OpenAI vLLM instance with base_url handling.""" + from flo_ai.llm import OpenAIVLLM + + model_name = model_config.get('name') + if not model_name: + raise ValueError( + 'openai_vllm provider requires "name" parameter in model configuration' + ) + + # Priority: kwargs > model_config > None + base_url = kwargs.get('base_url') or model_config.get('base_url') + if not base_url: + raise ValueError( + 'openai_vllm provider requires "base_url" parameter. ' + 'Provide it in model_config or as a kwarg.' + ) + + # Optional parameters + api_key = kwargs.get('api_key') or model_config.get('api_key') + temperature = kwargs.get('temperature') or model_config.get('temperature', 0.7) + + return OpenAIVLLM( + model=model_name, + base_url=base_url, + api_key=api_key, + temperature=temperature, + ) + @staticmethod def _create_rootflo_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM': """Create RootFlo LLM instance with authentication.""" diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py index 4cfe5604..e32eaa9a 100644 --- a/flo_ai/flo_ai/llm/rootflo_llm.py +++ b/flo_ai/flo_ai/llm/rootflo_llm.py @@ -108,8 +108,8 @@ def __init__( # Call parent __init__ with minimal parameters # Actual model will be set during lazy initialization super().__init__( - model='placeholder', # Will be updated during lazy init - api_key='placeholder', # Will be updated during lazy init + model='', + api_key='', temperature=temperature, **kwargs, ) From ff325521b92b74040c3dd9d8f405abd4871f8ff3 Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Sat, 22 Nov 2025 15:22:06 +0530 Subject: [PATCH 3/9] added OpenAIVLLM to rootflo_llm --- flo_ai/flo_ai/llm/rootflo_llm.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py index e32eaa9a..7ea13e6e 100644 --- a/flo_ai/flo_ai/llm/rootflo_llm.py +++ b/flo_ai/flo_ai/llm/rootflo_llm.py @@ -9,6 +9,7 @@ from .openai_llm import OpenAI from .gemini_llm import Gemini from .anthropic_llm import Anthropic +from .openai_vllm import OpenAIVLLM from flo_ai.tool.base_tool import Tool @@ -18,6 +19,7 @@ class LLMProvider(Enum): OPENAI = 'openai' GEMINI = 'gemini' ANTHROPIC = 'anthropic' + VLLM = 'vllm' class RootFloLLM(BaseLLM): @@ -256,6 +258,15 @@ async def _ensure_initialized(self): custom_headers=custom_headers, **self._kwargs, ) + elif llm_provider == LLMProvider.VLLM: + # vLLM via OpenAI-compatible API + self._llm = OpenAIVLLM( + model=llm_model, + base_url=full_url, + api_key=api_token, + temperature=self._temperature, + **self._kwargs, + ) else: raise ValueError(f'Unsupported LLM provider: {llm_provider}') From c7c4fb552147423724fb2a1c0e1e20e5226a7c75 Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Sat, 22 Nov 2025 15:39:19 +0530 Subject: [PATCH 4/9] resolved comments --- flo_ai/flo_ai/llm/rootflo_llm.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py index 7ea13e6e..15ef4d26 100644 --- a/flo_ai/flo_ai/llm/rootflo_llm.py +++ b/flo_ai/flo_ai/llm/rootflo_llm.py @@ -100,7 +100,7 @@ def __init__( # Lazy initialization state self._llm = None self._initialized = False - self._init_lock = asyncio.Lock() + self._init_lock = None # Will be set during initialization self.base_url = base_url @@ -179,6 +179,10 @@ async def _ensure_initialized(self): if self._initialized: return + # Lazily create the lock in an async context + if self._init_lock is None: + self._init_lock = asyncio.Lock() + # Acquire lock for initialization async with self._init_lock: # Double-check: another task might have initialized while we waited @@ -265,6 +269,7 @@ async def _ensure_initialized(self): base_url=full_url, api_key=api_token, temperature=self._temperature, + custom_headers=custom_headers, **self._kwargs, ) else: @@ -299,6 +304,10 @@ async def stream( def get_message_content(self, response: Any) -> str: """Extract message content from response""" + if not getattr(self, '_initialized', False) or self._llm is None: + raise RuntimeError( + 'RootFloLLM is not initialized yet; call generate() or stream() first.' + ) return self._llm.get_message_content(response) def format_tool_for_llm(self, tool: 'Tool') -> Dict[str, Any]: From 4b0910e6bb296e632848bb659de8420135154352 Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Sat, 22 Nov 2025 15:58:11 +0530 Subject: [PATCH 5/9] resolved comments v2 --- flo_ai/flo_ai/helpers/llm_factory.py | 5 ++++- flo_ai/flo_ai/llm/rootflo_llm.py | 6 +----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/flo_ai/flo_ai/helpers/llm_factory.py b/flo_ai/flo_ai/helpers/llm_factory.py index 3328f17b..1dd21f28 100644 --- a/flo_ai/flo_ai/helpers/llm_factory.py +++ b/flo_ai/flo_ai/helpers/llm_factory.py @@ -166,7 +166,10 @@ def _create_openai_vllm_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM' # Optional parameters api_key = kwargs.get('api_key') or model_config.get('api_key') - temperature = kwargs.get('temperature') or model_config.get('temperature', 0.7) + temperature = kwargs.get( + 'temperature', + model_config.get('temperature', 0.7), + ) return OpenAIVLLM( model=model_name, diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py index 15ef4d26..e641a5a2 100644 --- a/flo_ai/flo_ai/llm/rootflo_llm.py +++ b/flo_ai/flo_ai/llm/rootflo_llm.py @@ -100,7 +100,7 @@ def __init__( # Lazy initialization state self._llm = None self._initialized = False - self._init_lock = None + self._init_lock = asyncio.Lock() # Will be set during initialization self.base_url = base_url @@ -179,10 +179,6 @@ async def _ensure_initialized(self): if self._initialized: return - # Lazily create the lock in an async context - if self._init_lock is None: - self._init_lock = asyncio.Lock() - # Acquire lock for initialization async with self._init_lock: # Double-check: another task might have initialized while we waited From 560c1bfccea0531d16bc245a5624b1f6f669c804 Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Sat, 22 Nov 2025 16:04:23 +0530 Subject: [PATCH 6/9] resolved comments v3 --- flo_ai/flo_ai/llm/rootflo_llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py index e641a5a2..6aa0ad5e 100644 --- a/flo_ai/flo_ai/llm/rootflo_llm.py +++ b/flo_ai/flo_ai/llm/rootflo_llm.py @@ -25,7 +25,7 @@ class LLMProvider(Enum): class RootFloLLM(BaseLLM): """ Proxy LLM class that routes to different SDK implementations based on type. - Acts as a unified interface to OpenAI, Gemini, and Anthropic SDKs via a proxy URL. + Acts as a unified interface to OpenAI, Gemini, Anthropic SDKs and VLLM via a proxy URL. """ def __init__( From 69bdeb89f23600a0c3727c8f5d4f22dd0eb7d7df Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Sat, 22 Nov 2025 17:11:00 +0530 Subject: [PATCH 7/9] if access_token -> then app_key not required --- flo_ai/flo_ai/helpers/llm_factory.py | 15 ++++----------- flo_ai/flo_ai/llm/rootflo_llm.py | 28 ++++++++++++++++------------ 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/flo_ai/flo_ai/helpers/llm_factory.py b/flo_ai/flo_ai/helpers/llm_factory.py index 1dd21f28..c942626b 100644 --- a/flo_ai/flo_ai/helpers/llm_factory.py +++ b/flo_ai/flo_ai/helpers/llm_factory.py @@ -220,18 +220,11 @@ def _create_rootflo_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM': f'(ROOTFLO_BASE_URL, ROOTFLO_APP_KEY, ROOTFLO_APP_SECRET, ROOTFLO_ISSUER, ROOTFLO_AUDIENCE).' ) else: - # Access token flow - only needs base_url and app_key - required_params = { - 'base_url': base_url, - 'app_key': app_key, - } - missing = [k for k, v in required_params.items() if not v] - - if missing: + # Access token flow - only needs base_url + if not base_url: raise ValueError( - f'RootFlo configuration incomplete. Missing required parameters: {", ".join(missing)}. ' - f'These can be provided via kwargs or environment variables ' - f'(ROOTFLO_BASE_URL, ROOTFLO_APP_KEY).' + 'RootFlo configuration incomplete. Missing required parameter: base_url. ' + 'Provide it in model_config, as a kwarg, or via ROOTFLO_BASE_URL environment variable.' ) return RootFloLLM( diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py index 6aa0ad5e..9efbbd7b 100644 --- a/flo_ai/flo_ai/llm/rootflo_llm.py +++ b/flo_ai/flo_ai/llm/rootflo_llm.py @@ -32,10 +32,10 @@ def __init__( self, base_url: str, model_id: str, - app_key: str, - app_secret: str, - issuer: str, - audience: str, + app_key: Optional[str] = None, + app_secret: Optional[str] = None, + issuer: Optional[str] = None, + audience: Optional[str] = None, access_token: Optional[str] = None, temperature: float = 0.7, **kwargs, @@ -82,9 +82,6 @@ def __init__( f'Missing required parameters for JWT generation: {", ".join(missing)}. ' f'Either provide these parameters or pass an access_token directly.' ) - else: # app key is still required - if not app_key: - raise ValueError('app_key is required even when using access_token') # Store initialization parameters for lazy initialization self._base_url = base_url @@ -117,7 +114,11 @@ def __init__( ) async def _fetch_llm_config_async( - self, base_url: str, model_id: str, api_token: str, app_key: str + self, + base_url: str, + model_id: str, + api_token: str, + app_key: Optional[str] = None, ) -> Dict[str, Any]: """ Fetch LLM configuration from the API endpoint asynchronously. @@ -126,7 +127,7 @@ async def _fetch_llm_config_async( base_url: The base URL of the API server model_id: The model identifier (config_id) api_token: The JWT token for authorization - app_key: Application key for X-Rootflo-Key header + app_key: Optional application key for X-Rootflo-Key header Returns: Dict containing llm_model and type @@ -137,9 +138,12 @@ async def _fetch_llm_config_async( config_url = f'{base_url}/v1/llm-inference-configs/{model_id}' headers = { 'Authorization': f'Bearer {api_token}', - 'X-Rootflo-Key': app_key, } + # Only add X-Rootflo-Key header if app_key is provided + if app_key: + headers['X-Rootflo-Key'] = app_key + try: async with httpx.AsyncClient() as client: response = await client.get(config_url, headers=headers, timeout=30.0) @@ -226,8 +230,8 @@ async def _ensure_initialized(self): # Construct full URL for LLM inference full_url = f'{self._base_url}/v1/llm-inference/{self._model_id}' - # Prepare custom headers for proxy authentication - custom_headers = {'X-Rootflo-Key': self._app_key} + # Prepare custom headers for proxy authentication (only if app_key is provided) + custom_headers = {'X-Rootflo-Key': self._app_key} if self._app_key else {} # Instantiate appropriate SDK wrapper based on llm_provider if llm_provider == LLMProvider.OPENAI: From baedf018818384b95c74d9d7becbb55082e1438b Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Sat, 22 Nov 2025 17:32:25 +0530 Subject: [PATCH 8/9] if access_token -> passing other jwt params as None --- flo_ai/flo_ai/helpers/llm_factory.py | 8 ++++---- flo_ai/flo_ai/llm/rootflo_llm.py | 10 +++++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/flo_ai/flo_ai/helpers/llm_factory.py b/flo_ai/flo_ai/helpers/llm_factory.py index c942626b..268c5e91 100644 --- a/flo_ai/flo_ai/helpers/llm_factory.py +++ b/flo_ai/flo_ai/helpers/llm_factory.py @@ -230,10 +230,10 @@ def _create_rootflo_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM': return RootFloLLM( base_url=base_url, model_id=model_id, - app_key=app_key, - app_secret=app_secret, - issuer=issuer, - audience=audience, + app_key=None if access_token else app_key, + app_secret=None if access_token else app_secret, + issuer=None if access_token else issuer, + audience=None if access_token else audience, access_token=access_token, ) diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py index 9efbbd7b..cb398bfb 100644 --- a/flo_ai/flo_ai/llm/rootflo_llm.py +++ b/flo_ai/flo_ai/llm/rootflo_llm.py @@ -140,8 +140,8 @@ async def _fetch_llm_config_async( 'Authorization': f'Bearer {api_token}', } - # Only add X-Rootflo-Key header if app_key is provided - if app_key: + # Only add X-Rootflo-Key header if app_key is provided and access_token is not used + if app_key and self._access_token is None: headers['X-Rootflo-Key'] = app_key try: @@ -231,7 +231,11 @@ async def _ensure_initialized(self): full_url = f'{self._base_url}/v1/llm-inference/{self._model_id}' # Prepare custom headers for proxy authentication (only if app_key is provided) - custom_headers = {'X-Rootflo-Key': self._app_key} if self._app_key else {} + custom_headers = ( + {'X-Rootflo-Key': self._app_key} + if (self._app_key and self._access_token is None) + else {} + ) # Instantiate appropriate SDK wrapper based on llm_provider if llm_provider == LLMProvider.OPENAI: From f3f5fa409341d37718523a4c7e710091ab418171 Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Sat, 22 Nov 2025 18:03:04 +0530 Subject: [PATCH 9/9] Revert "if access_token -> passing other jwt params as None" This reverts commit baedf018818384b95c74d9d7becbb55082e1438b. --- flo_ai/flo_ai/helpers/llm_factory.py | 8 ++++---- flo_ai/flo_ai/llm/rootflo_llm.py | 10 +++------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/flo_ai/flo_ai/helpers/llm_factory.py b/flo_ai/flo_ai/helpers/llm_factory.py index 268c5e91..c942626b 100644 --- a/flo_ai/flo_ai/helpers/llm_factory.py +++ b/flo_ai/flo_ai/helpers/llm_factory.py @@ -230,10 +230,10 @@ def _create_rootflo_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM': return RootFloLLM( base_url=base_url, model_id=model_id, - app_key=None if access_token else app_key, - app_secret=None if access_token else app_secret, - issuer=None if access_token else issuer, - audience=None if access_token else audience, + app_key=app_key, + app_secret=app_secret, + issuer=issuer, + audience=audience, access_token=access_token, ) diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py index cb398bfb..9efbbd7b 100644 --- a/flo_ai/flo_ai/llm/rootflo_llm.py +++ b/flo_ai/flo_ai/llm/rootflo_llm.py @@ -140,8 +140,8 @@ async def _fetch_llm_config_async( 'Authorization': f'Bearer {api_token}', } - # Only add X-Rootflo-Key header if app_key is provided and access_token is not used - if app_key and self._access_token is None: + # Only add X-Rootflo-Key header if app_key is provided + if app_key: headers['X-Rootflo-Key'] = app_key try: @@ -231,11 +231,7 @@ async def _ensure_initialized(self): full_url = f'{self._base_url}/v1/llm-inference/{self._model_id}' # Prepare custom headers for proxy authentication (only if app_key is provided) - custom_headers = ( - {'X-Rootflo-Key': self._app_key} - if (self._app_key and self._access_token is None) - else {} - ) + custom_headers = {'X-Rootflo-Key': self._app_key} if self._app_key else {} # Instantiate appropriate SDK wrapper based on llm_provider if llm_provider == LLMProvider.OPENAI: