From 64f65c91ea325a7108885920e0e64622750b9ae9 Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Fri, 21 Nov 2025 18:16:00 +0530
Subject: [PATCH 1/9] rootflo_llm -> lazy fetching llm config

- the fetch_llm_config call was synchronous, and if the server calling flo-ai and base_url are same, that was causing a deadlock.
---
 flo_ai/flo_ai/llm/rootflo_llm.py | 209 +++++++++++++++++++------------
 1 file changed, 131 insertions(+), 78 deletions(-)

diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py
index 9d59c38a..4cfe5604 100644
--- a/flo_ai/flo_ai/llm/rootflo_llm.py
+++ b/flo_ai/flo_ai/llm/rootflo_llm.py
@@ -4,6 +4,7 @@
 from flo_ai.models.chat_message import ImageMessageContent
 import jwt
 import httpx
+import asyncio
 from .base_llm import BaseLLM
 from .openai_llm import OpenAI
 from .gemini_llm import Gemini
@@ -50,9 +51,12 @@ def __init__(
             access_token: Optional pre-generated access token (if provided, skips JWT generation)
             temperature: Temperature parameter for generation
             **kwargs: Additional parameters to pass to the underlying SDK
+
+        Note:
+            The actual LLM configuration is fetched lazily on first use (generate/stream)
+            to avoid blocking HTTP calls during initialization.
         """
         # Validate required parameters
-
         if not model_id:
             raise ValueError('model_id is required')
 
@@ -80,88 +84,41 @@ def __init__(
             if not app_key:
                 raise ValueError('app_key is required even when using access_token')
 
-        # Use provided access_token or generate JWT token
-        if access_token:
-            api_token = access_token
-        else:
-            now = datetime.now()
-            payload = {
-                'iss': issuer,
-                'aud': audience,
-                'iat': int(now.timestamp()),
-                'exp': int((now + timedelta(seconds=3600)).timestamp()),
-                'role_id': 'floconsole-service',
-                'user_id': 'service',
-                'service_auth': True,
-            }
-            service_token = jwt.encode(payload, app_secret, algorithm='HS256')
-            api_token = f'fc_{service_token}'
-
-        # Fetch LLM configuration from API
-        config = self._fetch_llm_config(base_url, model_id, api_token, app_key)
-        llm_model = config['llm_model']
-        llm_type = config['type']
-
-        # Map type string to LLMProvider enum
-        try:
-            llm_provider = LLMProvider(llm_type.lower())
-        except ValueError:
-            raise ValueError(
-                f'Unsupported LLM provider type from API: {llm_type}. '
-                f'Supported types: {[p.value for p in LLMProvider]}'
-            )
+        # Store initialization parameters for lazy initialization
+        self._base_url = base_url
+        self._model_id = model_id
+        self._app_key = app_key
+        self._app_secret = app_secret
+        self._issuer = issuer
+        self._audience = audience
+        self._access_token = access_token
+        self._temperature = temperature
+        self._kwargs = kwargs
+
+        # Lazy initialization state
+        self._llm = None
+        self._initialized = False
+        self._init_lock = asyncio.Lock()
+
+        # Will be set during initialization
+        self.base_url = base_url
+        self.model_id = model_id
+        self.llm_provider = None
 
+        # Call parent __init__ with minimal parameters
+        # Actual model will be set during lazy initialization
         super().__init__(
-            model=llm_model, api_key=api_token, temperature=temperature, **kwargs
+            model='placeholder',  # Will be updated during lazy init
+            api_key='placeholder',  # Will be updated during lazy init
+            temperature=temperature,
+            **kwargs,
         )
 
-        self.base_url = base_url
-        self.model_id = model_id
-        self.llm_provider = llm_provider
-
-        # Construct full URL for LLM inference
-        full_url = f'{base_url}/v1/llm-inference/{model_id}'
-
-        # Prepare custom headers for proxy authentication
-        custom_headers = {'X-Rootflo-Key': app_key}
-
-        # Instantiate appropriate SDK wrapper based on llm_provider
-        if llm_provider == LLMProvider.OPENAI:
-            self._llm = OpenAI(
-                model=llm_model,
-                base_url=full_url,
-                api_key=api_token,
-                temperature=temperature,
-                custom_headers=custom_headers,
-                **kwargs,
-            )
-        elif llm_provider == LLMProvider.ANTHROPIC:
-            self._llm = Anthropic(
-                model=llm_model,
-                base_url=full_url,
-                api_key=api_token,
-                temperature=temperature,
-                custom_headers=custom_headers,
-                **kwargs,
-            )
-        elif llm_provider == LLMProvider.GEMINI:
-            # Gemini SDK - pass base_url which will be handled via http_options
-            self._llm = Gemini(
-                model=llm_model,
-                api_key=api_token,
-                temperature=temperature,
-                base_url=full_url,
-                custom_headers=custom_headers,
-                **kwargs,
-            )
-        else:
-            raise ValueError(f'Unsupported LLM provider: {llm_provider}')
-
-    def _fetch_llm_config(
+    async def _fetch_llm_config_async(
         self, base_url: str, model_id: str, api_token: str, app_key: str
     ) -> Dict[str, Any]:
         """
-        Fetch LLM configuration from the API endpoint.
+        Fetch LLM configuration from the API endpoint asynchronously.
 
         Args:
             base_url: The base URL of the API server
@@ -182,8 +139,8 @@ def _fetch_llm_config(
         }
 
         try:
-            with httpx.Client() as client:
-                response = client.get(config_url, headers=headers, timeout=30.0)
+            async with httpx.AsyncClient() as client:
+                response = await client.get(config_url, headers=headers, timeout=30.0)
                 response.raise_for_status()
 
                 data = response.json()
@@ -211,6 +168,100 @@ def _fetch_llm_config(
         except Exception as e:
             raise Exception(f'Failed to fetch LLM config: {str(e)}') from e
 
+    async def _ensure_initialized(self):
+        """
+        Ensure the LLM is initialized by fetching config on first use.
+        Uses double-checked locking to prevent race conditions in concurrent scenarios.
+        """
+        # Fast path: already initialized
+        if self._initialized:
+            return
+
+        # Acquire lock for initialization
+        async with self._init_lock:
+            # Double-check: another task might have initialized while we waited
+            if self._initialized:
+                return
+
+            # Generate or use provided access token
+            if self._access_token:
+                api_token = self._access_token
+            else:
+                now = datetime.now()
+                payload = {
+                    'iss': self._issuer,
+                    'aud': self._audience,
+                    'iat': int(now.timestamp()),
+                    'exp': int((now + timedelta(seconds=3600)).timestamp()),
+                    'role_id': 'floconsole-service',
+                    'user_id': 'service',
+                    'service_auth': True,
+                }
+                service_token = jwt.encode(payload, self._app_secret, algorithm='HS256')
+                api_token = f'fc_{service_token}'
+
+            # Fetch LLM configuration from API
+            config = await self._fetch_llm_config_async(
+                self._base_url, self._model_id, api_token, self._app_key
+            )
+            llm_model = config['llm_model']
+            llm_type = config['type']
+
+            # Map type string to LLMProvider enum
+            try:
+                llm_provider = LLMProvider(llm_type.lower())
+            except ValueError:
+                raise ValueError(
+                    f'Unsupported LLM provider type from API: {llm_type}. '
+                    f'Supported types: {[p.value for p in LLMProvider]}'
+                )
+
+            # Update instance attributes
+            self.llm_provider = llm_provider
+            self.model = llm_model
+            self.api_key = api_token
+
+            # Construct full URL for LLM inference
+            full_url = f'{self._base_url}/v1/llm-inference/{self._model_id}'
+
+            # Prepare custom headers for proxy authentication
+            custom_headers = {'X-Rootflo-Key': self._app_key}
+
+            # Instantiate appropriate SDK wrapper based on llm_provider
+            if llm_provider == LLMProvider.OPENAI:
+                self._llm = OpenAI(
+                    model=llm_model,
+                    base_url=full_url,
+                    api_key=api_token,
+                    temperature=self._temperature,
+                    custom_headers=custom_headers,
+                    **self._kwargs,
+                )
+            elif llm_provider == LLMProvider.ANTHROPIC:
+                self._llm = Anthropic(
+                    model=llm_model,
+                    base_url=full_url,
+                    api_key=api_token,
+                    temperature=self._temperature,
+                    custom_headers=custom_headers,
+                    **self._kwargs,
+                )
+            elif llm_provider == LLMProvider.GEMINI:
+                # Gemini SDK - pass base_url which will be handled via http_options
+                self._llm = Gemini(
+                    model=llm_model,
+                    api_key=api_token,
+                    temperature=self._temperature,
+                    base_url=full_url,
+                    custom_headers=custom_headers,
+                    **self._kwargs,
+                )
+            else:
+                raise ValueError(f'Unsupported LLM provider: {llm_provider}')
+
+            # Mark as initialized
+            self._initialized = True
+
     async def generate(
         self,
         messages: List[Dict[str, str]],
@@ -219,6 +270,7 @@ async def generate(
         **kwargs,
     ) -> Dict[str, Any]:
         """Generate a response from the LLM"""
+        await self._ensure_initialized()
         return await self._llm.generate(
             messages, functions=functions, output_schema=output_schema, **kwargs
         )
@@ -230,6 +282,7 @@ async def stream(
         **kwargs: Any,
     ) -> AsyncIterator[Dict[str, Any]]:
         """Generate a streaming response from the LLM"""
+        await self._ensure_initialized()
         async for chunk in self._llm.stream(messages, functions=functions, **kwargs):
             yield chunk
 

From eb942e1761010658a6d22335fc3f98ae3c5adead Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Sat, 22 Nov 2025 15:15:12 +0530
Subject: [PATCH 2/9] llm_factory -> added openai_vllm

---
 flo_ai/flo_ai/helpers/llm_factory.py | 41 ++++++++++++++++++++++++++++
 flo_ai/flo_ai/llm/rootflo_llm.py     |  4 +--
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/flo_ai/flo_ai/helpers/llm_factory.py b/flo_ai/flo_ai/helpers/llm_factory.py
index 27891d3d..3328f17b 100644
--- a/flo_ai/flo_ai/helpers/llm_factory.py
+++ b/flo_ai/flo_ai/helpers/llm_factory.py
@@ -22,6 +22,7 @@ class LLMFactory:
         'ollama',
         'vertexai',
         'rootflo',
+        'openai_vllm',
     }
 
     @staticmethod
@@ -63,6 +64,14 @@ def create_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM':
             ...     {'provider': 'rootflo', 'model_id': 'model-123'},
             ...     app_key='key', app_secret='secret', issuer='iss', audience='aud'
             ... )
+
+            >>> # OpenAI vLLM with base_url
+            >>> llm = LLMFactory.create_llm({
+            ...     'provider': 'openai_vllm',
+            ...     'name': 'microsoft/phi-4',
+            ...     'base_url': 'http://localhost:8000/v1',
+            ...     'api_key': 'vllm-key'
+            ... })
         """
         provider = model_config.get('provider', 'openai').lower()
 
@@ -76,6 +85,8 @@ def create_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM':
             return LLMFactory._create_rootflo_llm(model_config, **kwargs)
         elif provider == 'vertexai':
             return LLMFactory._create_vertexai_llm(model_config, **kwargs)
+        elif provider == 'openai_vllm':
+            return LLMFactory._create_openai_vllm_llm(model_config, **kwargs)
         else:
             return LLMFactory._create_standard_llm(provider, model_config, **kwargs)
 
@@ -134,6 +145,36 @@ def _create_vertexai_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM':
             base_url=base_url,
         )
 
+    @staticmethod
+    def _create_openai_vllm_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM':
+        """Create OpenAI vLLM instance with base_url handling."""
+        from flo_ai.llm import OpenAIVLLM
+
+        model_name = model_config.get('name')
+        if not model_name:
+            raise ValueError(
+                'openai_vllm provider requires "name" parameter in model configuration'
+            )
+
+        # Priority: kwargs > model_config > None
+        base_url = kwargs.get('base_url') or model_config.get('base_url')
+        if not base_url:
+            raise ValueError(
+                'openai_vllm provider requires "base_url" parameter. '
+                'Provide it in model_config or as a kwarg.'
+            )
+
+        # Optional parameters
+        api_key = kwargs.get('api_key') or model_config.get('api_key')
+        temperature = kwargs.get('temperature') or model_config.get('temperature', 0.7)
+
+        return OpenAIVLLM(
+            model=model_name,
+            base_url=base_url,
+            api_key=api_key,
+            temperature=temperature,
+        )
+
     @staticmethod
     def _create_rootflo_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM':
         """Create RootFlo LLM instance with authentication."""
diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py
index 4cfe5604..e32eaa9a 100644
--- a/flo_ai/flo_ai/llm/rootflo_llm.py
+++ b/flo_ai/flo_ai/llm/rootflo_llm.py
@@ -108,8 +108,8 @@ def __init__(
         # Call parent __init__ with minimal parameters
         # Actual model will be set during lazy initialization
         super().__init__(
-            model='placeholder',  # Will be updated during lazy init
-            api_key='placeholder',  # Will be updated during lazy init
+            model='',
+            api_key='',
             temperature=temperature,
             **kwargs,
         )

From ff325521b92b74040c3dd9d8f405abd4871f8ff3 Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Sat, 22 Nov 2025 15:22:06 +0530
Subject: [PATCH 3/9] added OpenAIVLLM to rootflo_llm

---
 flo_ai/flo_ai/llm/rootflo_llm.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py
index e32eaa9a..7ea13e6e 100644
--- a/flo_ai/flo_ai/llm/rootflo_llm.py
+++ b/flo_ai/flo_ai/llm/rootflo_llm.py
@@ -9,6 +9,7 @@
 from .openai_llm import OpenAI
 from .gemini_llm import Gemini
 from .anthropic_llm import Anthropic
+from .openai_vllm import OpenAIVLLM
 from flo_ai.tool.base_tool import Tool
 
 
@@ -18,6 +19,7 @@ class LLMProvider(Enum):
     OPENAI = 'openai'
     GEMINI = 'gemini'
     ANTHROPIC = 'anthropic'
+    VLLM = 'vllm'
 
 
 class RootFloLLM(BaseLLM):
@@ -256,6 +258,15 @@ async def _ensure_initialized(self):
                     custom_headers=custom_headers,
                     **self._kwargs,
                 )
+            elif llm_provider == LLMProvider.VLLM:
+                # vLLM via OpenAI-compatible API
+                self._llm = OpenAIVLLM(
+                    model=llm_model,
+                    base_url=full_url,
+                    api_key=api_token,
+                    temperature=self._temperature,
+                    **self._kwargs,
+                )
             else:
                 raise ValueError(f'Unsupported LLM provider: {llm_provider}')
 

From c7c4fb552147423724fb2a1c0e1e20e5226a7c75 Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Sat, 22 Nov 2025 15:39:19 +0530
Subject: [PATCH 4/9] resolved comments

---
 flo_ai/flo_ai/llm/rootflo_llm.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py
index 7ea13e6e..15ef4d26 100644
--- a/flo_ai/flo_ai/llm/rootflo_llm.py
+++ b/flo_ai/flo_ai/llm/rootflo_llm.py
@@ -100,7 +100,7 @@ def __init__(
         # Lazy initialization state
         self._llm = None
         self._initialized = False
-        self._init_lock = asyncio.Lock()
+        self._init_lock = None
 
         # Will be set during initialization
         self.base_url = base_url
@@ -179,6 +179,10 @@ async def _ensure_initialized(self):
         if self._initialized:
             return
 
+        # Lazily create the lock in an async context
+        if self._init_lock is None:
+            self._init_lock = asyncio.Lock()
+
         # Acquire lock for initialization
         async with self._init_lock:
             # Double-check: another task might have initialized while we waited
@@ -265,6 +269,7 @@ async def _ensure_initialized(self):
                     base_url=full_url,
                     api_key=api_token,
                     temperature=self._temperature,
+                    custom_headers=custom_headers,
                     **self._kwargs,
                 )
             else:
@@ -299,6 +304,10 @@ async def stream(
 
     def get_message_content(self, response: Any) -> str:
         """Extract message content from response"""
+        if not getattr(self, '_initialized', False) or self._llm is None:
+            raise RuntimeError(
+                'RootFloLLM is not initialized yet; call generate() or stream() first.'
+            )
         return self._llm.get_message_content(response)
 
     def format_tool_for_llm(self, tool: 'Tool') -> Dict[str, Any]:

From 4b0910e6bb296e632848bb659de8420135154352 Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Sat, 22 Nov 2025 15:58:11 +0530
Subject: [PATCH 5/9] resolved comments v2

---
 flo_ai/flo_ai/helpers/llm_factory.py | 5 ++++-
 flo_ai/flo_ai/llm/rootflo_llm.py     | 6 +-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/flo_ai/flo_ai/helpers/llm_factory.py b/flo_ai/flo_ai/helpers/llm_factory.py
index 3328f17b..1dd21f28 100644
--- a/flo_ai/flo_ai/helpers/llm_factory.py
+++ b/flo_ai/flo_ai/helpers/llm_factory.py
@@ -166,7 +166,10 @@ def _create_openai_vllm_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM'
 
         # Optional parameters
         api_key = kwargs.get('api_key') or model_config.get('api_key')
-        temperature = kwargs.get('temperature') or model_config.get('temperature', 0.7)
+        temperature = kwargs.get(
+            'temperature',
+            model_config.get('temperature', 0.7),
+        )
 
         return OpenAIVLLM(
             model=model_name,
diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py
index 15ef4d26..e641a5a2 100644
--- a/flo_ai/flo_ai/llm/rootflo_llm.py
+++ b/flo_ai/flo_ai/llm/rootflo_llm.py
@@ -100,7 +100,7 @@ def __init__(
         # Lazy initialization state
         self._llm = None
         self._initialized = False
-        self._init_lock = None
+        self._init_lock = asyncio.Lock()
 
         # Will be set during initialization
         self.base_url = base_url
@@ -179,10 +179,6 @@ async def _ensure_initialized(self):
         if self._initialized:
             return
 
-        # Lazily create the lock in an async context
-        if self._init_lock is None:
-            self._init_lock = asyncio.Lock()
-
         # Acquire lock for initialization
         async with self._init_lock:
             # Double-check: another task might have initialized while we waited

From 560c1bfccea0531d16bc245a5624b1f6f669c804 Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Sat, 22 Nov 2025 16:04:23 +0530
Subject: [PATCH 6/9] resolved comments v3

---
 flo_ai/flo_ai/llm/rootflo_llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py
index e641a5a2..6aa0ad5e 100644
--- a/flo_ai/flo_ai/llm/rootflo_llm.py
+++ b/flo_ai/flo_ai/llm/rootflo_llm.py
@@ -25,7 +25,7 @@ class LLMProvider(Enum):
 class RootFloLLM(BaseLLM):
     """
     Proxy LLM class that routes to different SDK implementations based on type.
-    Acts as a unified interface to OpenAI, Gemini, and Anthropic SDKs via a proxy URL.
+    Acts as a unified interface to OpenAI, Gemini, Anthropic SDKs and VLLM via a proxy URL.
     """
 
     def __init__(

From 69bdeb89f23600a0c3727c8f5d4f22dd0eb7d7df Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Sat, 22 Nov 2025 17:11:00 +0530
Subject: [PATCH 7/9] if access_token -> then app_key not required

---
 flo_ai/flo_ai/helpers/llm_factory.py | 15 ++++-----------
 flo_ai/flo_ai/llm/rootflo_llm.py     | 28 ++++++++++++++++------------
 2 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/flo_ai/flo_ai/helpers/llm_factory.py b/flo_ai/flo_ai/helpers/llm_factory.py
index 1dd21f28..c942626b 100644
--- a/flo_ai/flo_ai/helpers/llm_factory.py
+++ b/flo_ai/flo_ai/helpers/llm_factory.py
@@ -220,18 +220,11 @@ def _create_rootflo_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM':
                     f'(ROOTFLO_BASE_URL, ROOTFLO_APP_KEY, ROOTFLO_APP_SECRET, ROOTFLO_ISSUER, ROOTFLO_AUDIENCE).'
                 )
         else:
-            # Access token flow - only needs base_url and app_key
-            required_params = {
-                'base_url': base_url,
-                'app_key': app_key,
-            }
-            missing = [k for k, v in required_params.items() if not v]
-
-            if missing:
+            # Access token flow - only needs base_url
+            if not base_url:
                 raise ValueError(
-                    f'RootFlo configuration incomplete. Missing required parameters: {", ".join(missing)}. '
-                    f'These can be provided via kwargs or environment variables '
-                    f'(ROOTFLO_BASE_URL, ROOTFLO_APP_KEY).'
+                    'RootFlo configuration incomplete. Missing required parameter: base_url. '
+                    'Provide it in model_config, as a kwarg, or via ROOTFLO_BASE_URL environment variable.'
                 )
 
         return RootFloLLM(
diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py
index 6aa0ad5e..9efbbd7b 100644
--- a/flo_ai/flo_ai/llm/rootflo_llm.py
+++ b/flo_ai/flo_ai/llm/rootflo_llm.py
@@ -32,10 +32,10 @@ def __init__(
         self,
         base_url: str,
         model_id: str,
-        app_key: str,
-        app_secret: str,
-        issuer: str,
-        audience: str,
+        app_key: Optional[str] = None,
+        app_secret: Optional[str] = None,
+        issuer: Optional[str] = None,
+        audience: Optional[str] = None,
         access_token: Optional[str] = None,
         temperature: float = 0.7,
         **kwargs,
@@ -82,9 +82,6 @@ def __init__(
                     f'Missing required parameters for JWT generation: {", ".join(missing)}. '
                     f'Either provide these parameters or pass an access_token directly.'
                 )
-        else:  # app key is still required
-            if not app_key:
-                raise ValueError('app_key is required even when using access_token')
 
         # Store initialization parameters for lazy initialization
         self._base_url = base_url
@@ -117,7 +114,11 @@ def __init__(
         )
 
     async def _fetch_llm_config_async(
-        self, base_url: str, model_id: str, api_token: str, app_key: str
+        self,
+        base_url: str,
+        model_id: str,
+        api_token: str,
+        app_key: Optional[str] = None,
     ) -> Dict[str, Any]:
         """
         Fetch LLM configuration from the API endpoint asynchronously.
@@ -126,7 +127,7 @@ async def _fetch_llm_config_async(
             base_url: The base URL of the API server
             model_id: The model identifier (config_id)
             api_token: The JWT token for authorization
-            app_key: Application key for X-Rootflo-Key header
+            app_key: Optional application key for X-Rootflo-Key header
 
         Returns:
             Dict containing llm_model and type
@@ -137,9 +138,12 @@ async def _fetch_llm_config_async(
         config_url = f'{base_url}/v1/llm-inference-configs/{model_id}'
         headers = {
             'Authorization': f'Bearer {api_token}',
-            'X-Rootflo-Key': app_key,
         }
 
+        # Only add X-Rootflo-Key header if app_key is provided
+        if app_key:
+            headers['X-Rootflo-Key'] = app_key
+
         try:
             async with httpx.AsyncClient() as client:
                 response = await client.get(config_url, headers=headers, timeout=30.0)
@@ -226,8 +230,8 @@ async def _ensure_initialized(self):
             # Construct full URL for LLM inference
             full_url = f'{self._base_url}/v1/llm-inference/{self._model_id}'
 
-            # Prepare custom headers for proxy authentication
-            custom_headers = {'X-Rootflo-Key': self._app_key}
+            # Prepare custom headers for proxy authentication (only if app_key is provided)
+            custom_headers = {'X-Rootflo-Key': self._app_key} if self._app_key else {}
 
             # Instantiate appropriate SDK wrapper based on llm_provider
             if llm_provider == LLMProvider.OPENAI:

From baedf018818384b95c74d9d7becbb55082e1438b Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Sat, 22 Nov 2025 17:32:25 +0530
Subject: [PATCH 8/9] if access_token -> passing other jwt params as None

---
 flo_ai/flo_ai/helpers/llm_factory.py |  8 ++++----
 flo_ai/flo_ai/llm/rootflo_llm.py     | 10 +++++++---
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/flo_ai/flo_ai/helpers/llm_factory.py b/flo_ai/flo_ai/helpers/llm_factory.py
index c942626b..268c5e91 100644
--- a/flo_ai/flo_ai/helpers/llm_factory.py
+++ b/flo_ai/flo_ai/helpers/llm_factory.py
@@ -230,10 +230,10 @@ def _create_rootflo_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM':
         return RootFloLLM(
             base_url=base_url,
             model_id=model_id,
-            app_key=app_key,
-            app_secret=app_secret,
-            issuer=issuer,
-            audience=audience,
+            app_key=None if access_token else app_key,
+            app_secret=None if access_token else app_secret,
+            issuer=None if access_token else issuer,
+            audience=None if access_token else audience,
             access_token=access_token,
         )
 
diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py
index 9efbbd7b..cb398bfb 100644
--- a/flo_ai/flo_ai/llm/rootflo_llm.py
+++ b/flo_ai/flo_ai/llm/rootflo_llm.py
@@ -140,8 +140,8 @@ async def _fetch_llm_config_async(
             'Authorization': f'Bearer {api_token}',
         }
 
-        # Only add X-Rootflo-Key header if app_key is provided
-        if app_key:
+        # Only add X-Rootflo-Key header if app_key is provided and access_token is not used
+        if app_key and self._access_token is None:
             headers['X-Rootflo-Key'] = app_key
 
         try:
@@ -231,7 +231,11 @@ async def _ensure_initialized(self):
             full_url = f'{self._base_url}/v1/llm-inference/{self._model_id}'
 
             # Prepare custom headers for proxy authentication (only if app_key is provided)
-            custom_headers = {'X-Rootflo-Key': self._app_key} if self._app_key else {}
+            custom_headers = (
+                {'X-Rootflo-Key': self._app_key}
+                if (self._app_key and self._access_token is None)
+                else {}
+            )
 
             # Instantiate appropriate SDK wrapper based on llm_provider
             if llm_provider == LLMProvider.OPENAI:

From f3f5fa409341d37718523a4c7e710091ab418171 Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Sat, 22 Nov 2025 18:03:04 +0530
Subject: [PATCH 9/9] Revert "if access_token -> passing other jwt params as
 None"

This reverts commit baedf018818384b95c74d9d7becbb55082e1438b.
---
 flo_ai/flo_ai/helpers/llm_factory.py |  8 ++++----
 flo_ai/flo_ai/llm/rootflo_llm.py     | 10 +++-------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/flo_ai/flo_ai/helpers/llm_factory.py b/flo_ai/flo_ai/helpers/llm_factory.py
index 268c5e91..c942626b 100644
--- a/flo_ai/flo_ai/helpers/llm_factory.py
+++ b/flo_ai/flo_ai/helpers/llm_factory.py
@@ -230,10 +230,10 @@ def _create_rootflo_llm(model_config: Dict[str, Any], **kwargs) -> 'BaseLLM':
         return RootFloLLM(
             base_url=base_url,
             model_id=model_id,
-            app_key=None if access_token else app_key,
-            app_secret=None if access_token else app_secret,
-            issuer=None if access_token else issuer,
-            audience=None if access_token else audience,
+            app_key=app_key,
+            app_secret=app_secret,
+            issuer=issuer,
+            audience=audience,
             access_token=access_token,
         )
 
diff --git a/flo_ai/flo_ai/llm/rootflo_llm.py b/flo_ai/flo_ai/llm/rootflo_llm.py
index cb398bfb..9efbbd7b 100644
--- a/flo_ai/flo_ai/llm/rootflo_llm.py
+++ b/flo_ai/flo_ai/llm/rootflo_llm.py
@@ -140,8 +140,8 @@ async def _fetch_llm_config_async(
             'Authorization': f'Bearer {api_token}',
         }
 
-        # Only add X-Rootflo-Key header if app_key is provided and access_token is not used
-        if app_key and self._access_token is None:
+        # Only add X-Rootflo-Key header if app_key is provided
+        if app_key:
             headers['X-Rootflo-Key'] = app_key
 
         try:
@@ -231,11 +231,7 @@ async def _ensure_initialized(self):
             full_url = f'{self._base_url}/v1/llm-inference/{self._model_id}'
 
             # Prepare custom headers for proxy authentication (only if app_key is provided)
-            custom_headers = (
-                {'X-Rootflo-Key': self._app_key}
-                if (self._app_key and self._access_token is None)
-                else {}
-            )
+            custom_headers = {'X-Rootflo-Key': self._app_key} if self._app_key else {}
 
             # Instantiate appropriate SDK wrapper based on llm_provider
             if llm_provider == LLMProvider.OPENAI: