From c17190e37fd8870bd593f0ee3ed78f56ca252b45 Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Mon, 4 Aug 2025 19:43:36 +0530
Subject: [PATCH 1/4] vllm integration

- added vllm openai integration
- added vllm agent usage example
- fixed retry_count bug
---
 flo_ai/examples/vllm_agent_usage.py | 256 ++++++++++++++++++++++++++++
 flo_ai/flo_ai/llm/openai_vllm.py    |  58 +++++++
 flo_ai/flo_ai/models/agent.py       |  10 +-
 3 files changed, 319 insertions(+), 5 deletions(-)
 create mode 100644 flo_ai/examples/vllm_agent_usage.py
 create mode 100644 flo_ai/flo_ai/llm/openai_vllm.py

diff --git a/flo_ai/examples/vllm_agent_usage.py b/flo_ai/examples/vllm_agent_usage.py
new file mode 100644
index 00000000..905b7c91
--- /dev/null
+++ b/flo_ai/examples/vllm_agent_usage.py
@@ -0,0 +1,256 @@
+import asyncio
+import os
+from flo_ai.builder.agent_builder import AgentBuilder
+from flo_ai.tool.base_tool import Tool
+from flo_ai.models.base_agent import ReasoningPattern
+from flo_ai.llm.openai_vllm import OpenAIVLLM
+from dotenv import load_dotenv
+
+load_dotenv()
+
+vllm_base_url = os.getenv('VLLM_BASE_URL')
+
+
+async def example_simple_vllm_agent():
+    # Create a simple conversational agent with vLLM
+    agent = (
+        AgentBuilder()
+        .with_name('Math Tutor')
+        .with_prompt('You are a helpful math tutor.')
+        .with_llm(
+            OpenAIVLLM(
+                model='microsoft/phi-4',
+                base_url=vllm_base_url,
+                temperature=0.7,
+                api_key='',
+            )
+        )
+        .build()
+    )
+
+    response = await agent.run('What is the formula for the area of a circle?')
+    print(f'vLLM Simple Agent Response: {response}')
+
+
+async def example_vllm_tool_agent():
+    # Define a calculator tool
+    async def calculate(operation: str, x: float, y: float) -> float:
+        if operation == 'add':
+            return x + y
+        elif operation == 'multiply':
+            return x * y
+        elif operation == 'subtract':
+            return x - y
+        elif operation == 'divide':
+            return x / y if y != 0 else float('inf')
+        raise ValueError(f'Unknown operation: {operation}')
+
+    calculator_tool = Tool(
+        name='calculate',
+        description='Perform basic calculations',
+        function=calculate,
+        parameters={
+            'operation': {
+                'type': 'string',
+                'description': 'The operation to perform (add, subtract, multiply, or divide)',
+            },
+            'x': {'type': 'number', 'description': 'First number'},
+            'y': {'type': 'number', 'description': 'Second number'},
+        },
+    )
+
+    # Create a tool-using agent with vLLM
+    agent = (
+        AgentBuilder()
+        .with_name('vLLM Calculator Assistant')
+        .with_prompt(
+            'You are a math assistant that can perform calculations using tools.'
+        )
+        .with_llm(
+            OpenAIVLLM(
+                model='microsoft/phi-4',
+                base_url=vllm_base_url,
+                temperature=0.7,
+                api_key='',
+            )
+        )
+        .with_tools([calculator_tool])
+        .with_reasoning(ReasoningPattern.REACT)
+        .with_retries(2)
+        .build()
+    )
+
+    response = await agent.run(
+        'Calculate 15 divided by 3, then multiply the result by 7'
+    )
+    print(f'vLLM Tool Agent Response: {response}')
+
+
+async def example_vllm_structured_output():
+    # Define output schema for structured responses with name field
+    math_schema = {
+        'name': 'math_solution',
+        'schema': {
+            'type': 'object',
+            'properties': {
+                'problem': {'type': 'string', 'description': 'The original problem'},
+                'steps': {
+                    'type': 'array',
+                    'items': {'type': 'string'},
+                    'description': 'Step-by-step solution process',
+                },
+                'final_answer': {
+                    'type': 'string',
+                    'description': 'The final numerical answer',
+                },
+                'explanation': {
+                    'type': 'string',
+                    'description': 'Brief explanation of the approach used',
+                },
+            },
+            'required': ['problem', 'steps', 'final_answer', 'explanation'],
+        },
+    }
+
+    # Create an agent with structured output using vLLM
+    agent = (
+        AgentBuilder()
+        .with_name('vLLM Structured Math Solver')
+        .with_prompt(
+            'You are a math problem solver that provides detailed structured solutions. '
+            'Always break down problems into clear steps and explain your reasoning.'
+        )
+        .with_llm(
+            OpenAIVLLM(
+                model='microsoft/phi-4',
+                base_url=vllm_base_url,
+                temperature=0.3,
+                api_key='',
+            )
+        )
+        .with_output_schema(math_schema)
+        .build()
+    )
+
+    response = await agent.run('Solve the equation: x + y = 4, x - y = 1')
+    print(f'vLLM Structured Output Response: {response}')
+
+
+async def example_vllm_tool_agent_structured_output():
+    # Define a calculator tool
+    async def calculate(operation: str, x: float, y: float) -> float:
+        if operation == 'add':
+            return x + y
+        elif operation == 'multiply':
+            return x * y
+        elif operation == 'subtract':
+            return x - y
+        elif operation == 'divide':
+            return x / y if y != 0 else float('inf')
+        raise ValueError(f'Unknown operation: {operation}')
+
+    calculator_tool = Tool(
+        name='calculate',
+        description='Perform basic calculations',
+        function=calculate,
+        parameters={
+            'operation': {
+                'type': 'string',
+                'description': 'The operation to perform (add, subtract, multiply, or divide)',
+            },
+            'x': {'type': 'number', 'description': 'First number'},
+            'y': {'type': 'number', 'description': 'Second number'},
+        },
+    )
+
+    # Define structured output schema for calculation results
+    calculation_report_schema = {
+        'name': 'calculation_report',
+        'schema': {
+            'type': 'object',
+            'properties': {
+                'task': {
+                    'type': 'string',
+                    'description': 'Description of the calculation task',
+                },
+                'calculations': {
+                    'type': 'array',
+                    'items': {
+                        'type': 'object',
+                        'properties': {
+                            'operation': {
+                                'type': 'string',
+                                'description': 'The operation performed',
+                            },
+                            'numbers': {
+                                'type': 'string',
+                                'description': 'The numbers used',
+                            },
+                            'result': {
+                                'type': 'number',
+                                'description': 'The result of the operation',
+                            },
+                        },
+                        'required': ['operation', 'numbers', 'result'],
+                    },
+                    'description': 'List of calculations performed',
+                },
+                'final_answer': {
+                    'type': 'number',
+                    'description': 'The final numerical result',
+                },
+                'summary': {
+                    'type': 'string',
+                    'description': 'Summary of the calculation process',
+                },
+            },
+            'required': ['task', 'calculations', 'final_answer', 'summary'],
+        },
+    }
+
+    # Create a tool-using agent with structured output
+    agent = (
+        AgentBuilder()
+        .with_name('vLLM Calculator with Reports')
+        .with_prompt(
+            'You are a calculator assistant that performs calculations using tools and provides structured reports.'
+        )
+        .with_llm(
+            OpenAIVLLM(
+                model='microsoft/phi-4',
+                base_url=vllm_base_url,
+                temperature=0.3,
+                api_key='',
+            )
+        )
+        .with_tools([calculator_tool])
+        .with_reasoning(ReasoningPattern.REACT)
+        .with_output_schema(calculation_report_schema)
+        .build()
+    )
+
+    response = await agent.run(
+        'Calculate 25 multiplied by 4, then add 15 to the result'
+    )
+    print(f'vLLM Tool + Structured Output Response: {response}')
+
+
+async def main():
+    print('=== vLLM Agent Examples ===')
+    print(f'Note: Make sure vLLM server is running at {vllm_base_url}')
+
+    print('\n=== Simple vLLM Conversational Agent ===')
+    await example_simple_vllm_agent()
+
+    print('\n=== vLLM Tool-using Agent ===')
+    await example_vllm_tool_agent()
+
+    print('\n=== vLLM Structured Output Agent ===')
+    await example_vllm_structured_output()
+
+    print('\n=== vLLM Tool Agent with Structured Output ===')
+    await example_vllm_tool_agent_structured_output()
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/flo_ai/flo_ai/llm/openai_vllm.py b/flo_ai/flo_ai/llm/openai_vllm.py
new file mode 100644
index 00000000..423880e7
--- /dev/null
+++ b/flo_ai/flo_ai/llm/openai_vllm.py
@@ -0,0 +1,58 @@
+from typing import Any
+from .openai_llm import OpenAI
+
+
+class OpenAIVLLM(OpenAI):
+    def __init__(
+        self,
+        base_url: str,
+        model='microsoft/phi-4',
+        api_key: str = None,
+        temperature: float = 0.7,
+        **kwargs,
+    ):
+        super().__init__(
+            model=model,
+            api_key=api_key,
+            temperature=temperature,
+            base_url=base_url,
+            **kwargs,
+        )
+
+    # overriden
+    async def generate(
+        self, messages: list[dict], output_schema: dict = None, **kwargs
+    ) -> Any:
+        # Convert output_schema to OpenAI format if provided
+        if output_schema:
+            kwargs['extra_body'] = {'guided_json': output_schema.get('schema')}
+
+            # Add JSON format instruction to the system prompt
+            if messages and messages[0]['role'] == 'system':
+                messages[0]['content'] = (
+                    messages[0]['content']
+                    + '\n\nPlease provide your response in JSON format according to the specified schema.'
+                )
+            else:
+                messages.insert(
+                    0,
+                    {
+                        'role': 'system',
+                        'content': 'Please provide your response in JSON format according to the specified schema.',
+                    },
+                )
+
+        # Prepare OpenAI API parameters
+        vllm_openai_kwargs = {
+            'model': self.model,
+            'messages': messages,
+            **kwargs,
+            **self.kwargs,
+        }
+
+        # Make the API call
+        response = await self.client.chat.completions.create(**vllm_openai_kwargs)
+        message = response.choices[0].message
+
+        # Return the full message object instead of just the content
+        return message
diff --git a/flo_ai/flo_ai/models/agent.py b/flo_ai/flo_ai/models/agent.py
index 053d42ee..00682d71 100644
--- a/flo_ai/flo_ai/models/agent.py
+++ b/flo_ai/flo_ai/models/agent.py
@@ -106,7 +106,7 @@ async def _run_conversational(
         """Run as a conversational agent when no tools are provided"""
         variables = variables or {}
 
-        while retry_count < self.max_retries:
+        while retry_count <= self.max_retries:
             try:
                 # Resolve variables in system prompt
                 system_content = (
@@ -150,7 +150,7 @@ async def _run_conversational(
 
                 should_retry, analysis = await self.handle_error(e, context)
 
-                if should_retry and retry_count < self.max_retries:
+                if should_retry and retry_count <= self.max_retries:
                     self.add_to_history(
                         'system', f'Error occurred. Analysis: {analysis}'
                     )
@@ -167,7 +167,7 @@ async def _run_with_tools(
         """Run as a tool-using agent when tools are provided"""
         variables = variables or {}
 
-        while retry_count < self.max_retries:
+        while retry_count <= self.max_retries:
             try:
                 # Resolve variables in system prompt based on reasoning pattern
                 if self.reasoning_pattern == ReasoningPattern.REACT:
@@ -248,7 +248,7 @@ async def _run_with_tools(
                             'attempt': retry_count,
                         }
                         should_retry, analysis = await self.handle_error(e, context)
-                        if should_retry and retry_count < self.max_retries:
+                        if should_retry and retry_count <= self.max_retries:
                             self.add_to_history(
                                 'system', f'Tool execution error: {analysis}'
                             )
@@ -284,7 +284,7 @@ async def _run_with_tools(
                 }
 
                 should_retry, analysis = await self.handle_error(e, context)
-                if should_retry and retry_count < self.max_retries:
+                if should_retry and retry_count <= self.max_retries:
                     self.add_to_history(
                         'system', f'Error occurred. Analysis: {analysis}'
                     )

From 3f1e1b5713ad9e9247511846efbfbc864773465a Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Tue, 5 Aug 2025 11:46:37 +0530
Subject: [PATCH 2/4] made openai_vllm required

---
 flo_ai/flo_ai/llm/__init__.py    | 3 ++-
 flo_ai/flo_ai/llm/openai_vllm.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/flo_ai/flo_ai/llm/__init__.py b/flo_ai/flo_ai/llm/__init__.py
index 59cddc14..838484fd 100644
--- a/flo_ai/flo_ai/llm/__init__.py
+++ b/flo_ai/flo_ai/llm/__init__.py
@@ -3,5 +3,6 @@
 from .openai_llm import OpenAI
 from .ollama_llm import OllamaLLM
 from .gemini_llm import Gemini
+from .openai_vllm import OpenAIVLLM
 
-__all__ = ['BaseLLM', 'Anthropic', 'OpenAI', 'OllamaLLM', 'Gemini']
+__all__ = ['BaseLLM', 'Anthropic', 'OpenAI', 'OllamaLLM', 'Gemini', 'OpenAIVLLM']
diff --git a/flo_ai/flo_ai/llm/openai_vllm.py b/flo_ai/flo_ai/llm/openai_vllm.py
index 423880e7..e888e33d 100644
--- a/flo_ai/flo_ai/llm/openai_vllm.py
+++ b/flo_ai/flo_ai/llm/openai_vllm.py
@@ -6,7 +6,7 @@ class OpenAIVLLM(OpenAI):
     def __init__(
         self,
         base_url: str,
-        model='microsoft/phi-4',
+        model: str,
         api_key: str = None,
         temperature: float = 0.7,
         **kwargs,

From fd5605df7b39a7ad40dfbdb4c136655b2e86bc9a Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Tue, 5 Aug 2025 13:25:39 +0530
Subject: [PATCH 3/4] exporting ImageMessage

---
 flo_ai/flo_ai/__init__.py     |  4 +++-
 flo_ai/flo_ai/llm/__init__.py | 12 ++++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/flo_ai/flo_ai/__init__.py b/flo_ai/flo_ai/__init__.py
index b0a3ace4..307561dd 100644
--- a/flo_ai/flo_ai/__init__.py
+++ b/flo_ai/flo_ai/__init__.py
@@ -8,7 +8,7 @@
 from .builder.agent_builder import AgentBuilder
 
 # LLM package - Language model integrations
-from .llm import BaseLLM, Anthropic, OpenAI, OllamaLLM, Gemini
+from .llm import BaseLLM, Anthropic, OpenAI, OllamaLLM, Gemini, ImageMessage
 
 # Tool package - Tool framework components
 from .tool import Tool, ToolExecutionError, flo_tool, create_tool_from_function
@@ -43,6 +43,8 @@
     'OpenAI',
     'OllamaLLM',
     'Gemini',
+    # LLM DataClass
+    'ImageMessage',
     # Tools
     'Tool',
     'ToolExecutionError',
diff --git a/flo_ai/flo_ai/llm/__init__.py b/flo_ai/flo_ai/llm/__init__.py
index 838484fd..29768237 100644
--- a/flo_ai/flo_ai/llm/__init__.py
+++ b/flo_ai/flo_ai/llm/__init__.py
@@ -1,8 +1,16 @@
-from .base_llm import BaseLLM
+from .base_llm import BaseLLM, ImageMessage
 from .anthropic_llm import Anthropic
 from .openai_llm import OpenAI
 from .ollama_llm import OllamaLLM
 from .gemini_llm import Gemini
 from .openai_vllm import OpenAIVLLM
 
-__all__ = ['BaseLLM', 'Anthropic', 'OpenAI', 'OllamaLLM', 'Gemini', 'OpenAIVLLM']
+__all__ = [
+    'BaseLLM',
+    'Anthropic',
+    'OpenAI',
+    'OllamaLLM',
+    'Gemini',
+    'OpenAIVLLM',
+    'ImageMessage',
+]

From 9cf6b270c4c1913dd90a58c92e3ea298be8ed43c Mon Sep 17 00:00:00 2001
From: rootflo-hardik <hardik@rootflo.ai>
Date: Tue, 5 Aug 2025 14:05:21 +0530
Subject: [PATCH 4/4] added OpenAIVLLM to flo_ai init

---
 flo_ai/flo_ai/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flo_ai/flo_ai/__init__.py b/flo_ai/flo_ai/__init__.py
index 307561dd..80e5a364 100644
--- a/flo_ai/flo_ai/__init__.py
+++ b/flo_ai/flo_ai/__init__.py
@@ -8,7 +8,7 @@
 from .builder.agent_builder import AgentBuilder
 
 # LLM package - Language model integrations
-from .llm import BaseLLM, Anthropic, OpenAI, OllamaLLM, Gemini, ImageMessage
+from .llm import BaseLLM, Anthropic, OpenAI, OllamaLLM, Gemini, OpenAIVLLM, ImageMessage
 
 # Tool package - Tool framework components
 from .tool import Tool, ToolExecutionError, flo_tool, create_tool_from_function
@@ -43,6 +43,7 @@
     'OpenAI',
     'OllamaLLM',
     'Gemini',
+    'OpenAIVLLM',
     # LLM DataClass
     'ImageMessage',
     # Tools