From 08f9006ac41cb284dd416102fc78458066d6f9f9 Mon Sep 17 00:00:00 2001
From: vizsatiz <satis.vishnu@gmail.com>
Date: Wed, 15 Oct 2025 15:38:41 +0530
Subject: [PATCH 1/5] fix(agent): Improve agent execution algorithm

---
 flo_ai/examples/tool_usage.py |  14 ++--
 flo_ai/flo_ai/models/agent.py | 142 ++++++++++++++++++++++++++++++----
 flo_ai/flo_ai/utils/logger.py |   5 +-
 3 files changed, 137 insertions(+), 24 deletions(-)

diff --git a/flo_ai/examples/tool_usage.py b/flo_ai/examples/tool_usage.py
index 0d9cbbbd..5ba7cdb1 100644
--- a/flo_ai/examples/tool_usage.py
+++ b/flo_ai/examples/tool_usage.py
@@ -108,7 +108,7 @@ async def calculate(operation: str, x: float, y: float) -> float:
     llm = OpenAI(model='gpt-3.5-turbo', temperature=0.7)
     agent = ToolAgent(
         name='CalculatorAssistant',
-        system_prompt='You are a helpful calculator assistant. Use the calculator tool directly without explanation.',
+        system_prompt='You are a helpful calculator assistant. Use the calculator tool if needed and answer the question asked.',
         llm=llm,
         tools=[calculator_tool],
         reasoning_pattern=ReasoningPattern.DIRECT,
@@ -120,14 +120,14 @@ async def calculate(operation: str, x: float, y: float) -> float:
 
 # Run the examples
 if __name__ == '__main__':
-    print('Testing conversational agent...\n')
-    asyncio.run(test_conversational())
+    # print('Testing conversational agent...\n')
+    # asyncio.run(test_conversational())
 
-    print('\nTesting tool agent...\n')
-    asyncio.run(test_tool_agent())
+    # print('\nTesting tool agent...\n')
+    # asyncio.run(test_tool_agent())
 
-    print('\nTesting error handling...\n')
-    asyncio.run(test_error_handling())
+    # print('\nTesting error handling...\n')
+    # asyncio.run(test_error_handling())
 
     print('\nTesting direct reasoning...\n')
     asyncio.run(test_direct_reasoning())
diff --git a/flo_ai/flo_ai/models/agent.py b/flo_ai/flo_ai/models/agent.py
index 0cd9243f..a1fa1800 100644
--- a/flo_ai/flo_ai/models/agent.py
+++ b/flo_ai/flo_ai/models/agent.py
@@ -206,12 +206,37 @@ async def _run_with_tools(
                     # Handle ReACT and CoT patterns
                     function_call = await self.llm.get_function_call(response)
 
-                    # If no function call, we have our final answer
+                    # If no function call, check if this is truly a final answer
                     if not function_call:
                         assistant_message = self.llm.get_message_content(response)
                         if assistant_message:
-                            self.add_to_history('assistant', assistant_message)
-                            return assistant_message
+                            # Check if this is a final answer or just intermediate reasoning
+                            is_final = await self._is_final_answer(
+                                assistant_message, tool_call_count, messages
+                            )
+                            if is_final:
+                                self.add_to_history('assistant', assistant_message)
+                                return assistant_message
+                            else:
+                                # This is intermediate reasoning, add to context and continue
+                                logger.debug(
+                                    f'Detected intermediate reasoning (not final answer): {assistant_message[:100]}...'
+                                )
+                                self.add_to_history('assistant', assistant_message)
+                                messages.append(
+                                    {
+                                        'role': 'assistant',
+                                        'content': assistant_message,
+                                    }
+                                )
+                                # Prompt the agent to take action
+                                messages.append(
+                                    {
+                                        'role': 'user',
+                                        'content': 'Based on your reasoning, please proceed with the necessary tool calls to complete the task.',
+                                    }
+                                )
+                                continue
                         break
 
                     # Execute the tool
@@ -324,15 +349,12 @@ def _get_react_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str:
             Action: Use available tools in the format: tool_name(param1: "value1", param2: "value2")
             Observation: The result of the action
             ... (repeat Thought/Action/Observation if needed)
+            Final Answer: [Your complete answer to the user's question]
 
             Available tools:
             {tools_desc}
 
-            Remember to:
-            1. Think carefully about what needs to be done
-            2. Use tools when needed
-            3. Make observations about tool results
-            4. Conclude with a final answer when the task is complete"""
+            IMPORTANT: When you have enough information to answer the user's question, you MUST prefix your response with "Final Answer:" to indicate completion."""
 
         return react_prompt
 
@@ -359,11 +381,103 @@ def _get_cot_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str:
             Available tools:
             {tools_desc}
 
-            Remember to:
-            1. Break down complex problems into smaller steps
-            2. Think through each step logically
-            3. Use tools when needed to gather information
-            4. Provide clear reasoning for your conclusions
-            5. End with a final, well-justified answer"""
+            IMPORTANT: When you have gathered all necessary information and are ready to provide your complete answer, you MUST prefix your response with "Final Answer:" to indicate completion."""
 
         return cot_prompt
+
+    async def _is_final_answer(
+        self, message: str, tool_call_count: int, messages: List[Dict[str, Any]]
+    ) -> bool:
+        """
+        Determine if a message is a final answer or intermediate reasoning.
+        Uses structured token detection (like LangChain's ReAct) with LLM fallback.
+
+        Approach inspired by LangChain/CrewAI:
+        1. Primary: Check for explicit "Final Answer:" token
+        2. Fallback: Use LLM-based classification for robustness
+        """
+        message_stripped = message.strip()
+        message_lower = message_stripped.lower()
+
+        # Primary Detection: Explicit "Final Answer:" token (ReAct pattern)
+        # This is the most reliable method used by LangChain and similar frameworks
+        if message_stripped.startswith('Final Answer:') or message_lower.startswith(
+            'final answer:'
+        ):
+            logger.debug('Explicit "Final Answer:" token detected - this is final')
+            return True
+
+        # Check if "Final Answer:" appears anywhere in the response
+        # (agent might add context before the token)
+        if 'final answer:' in message_lower:
+            logger.debug('"Final Answer:" token found in response - treating as final')
+            return True
+
+        # Secondary Detection: Use LLM-based analysis for cases without explicit tokens
+        # This handles:
+        # - Agents not following the format perfectly
+        # - Direct mode (without ReAct/CoT patterns)
+        # - Edge cases where the agent provides answer without token
+
+        analysis_prompt = f"""You are a classifier that determines if an AI agent's response is a FINAL ANSWER or INTERMEDIATE REASONING.
+
+Agent's Response:
+"{message_stripped}"
+
+Context:
+- Tool calls executed so far: {tool_call_count}
+- Total conversation turns: {len(messages)}
+
+Classification Criteria:
+
+FINAL ANSWER - The response is final if it:
+✓ Directly answers the user's original question with concrete information
+✓ Provides specific data, results, or conclusions
+✓ Does not suggest or request additional actions
+✓ Reads like a complete, standalone answer
+✓ Contains synthesis of information already gathered
+
+INTERMEDIATE REASONING - The response is intermediate if it:
+✗ Describes plans or intentions for what to do next
+✗ Expresses need to gather more information
+✗ Contains thinking/reasoning WITHOUT providing the actual answer
+✗ Poses questions or expresses uncertainty about next steps
+✗ Mentions specific tools it wants to use
+
+Examples of INTERMEDIATE:
+- "I need to query the database schema first"
+- "Let me check the table structure"
+- "First, I should examine..."
+
+Examples of FINAL:
+- "Based on the query results, the table contains 1,245 records..."
+- "The analysis shows that revenue increased by 23%..."
+- "After examining the data, the answer is..."
+
+Respond with EXACTLY one word: "FINAL" or "INTERMEDIATE"
+"""
+
+        try:
+            analysis_messages = [
+                {
+                    'role': 'system',
+                    'content': 'You are a precise classification system. Respond with only FINAL or INTERMEDIATE.',
+                },
+                {'role': 'user', 'content': analysis_prompt},
+            ]
+            analysis_response = await self.llm.generate(analysis_messages)
+            analysis = self.llm.get_message_content(analysis_response).strip().upper()
+
+            is_final = 'FINAL' in analysis
+            logger.debug(
+                f'LLM classifier: "{analysis}" -> is_final={is_final} (message preview: "{message_stripped[:80]}...")'
+            )
+            return is_final
+
+        except Exception as e:
+            logger.warning(
+                f'LLM classification failed: {e}. Defaulting to final=False to allow continuation.'
+            )
+            # Conservative default: treat as intermediate to avoid premature exit
+            # This is safer as it allows the agent to continue rather than stopping too early
+            return False
diff --git a/flo_ai/flo_ai/utils/logger.py b/flo_ai/flo_ai/utils/logger.py
index a6ebfbc4..a90d09a1 100644
--- a/flo_ai/flo_ai/utils/logger.py
+++ b/flo_ai/flo_ai/utils/logger.py
@@ -1,8 +1,7 @@
 import logging
 import os
 
-log_level = os.environ.get('LOG_LEVEL', 'INFO')
-logging.getLogger('uvicorn').setLevel(log_level)
+log_level = os.environ.get('FLO_AI_LOG_LEVEL', 'INFO')
 log_format = (
     '%(asctime)s | %(levelname)-8s | %(name)s | %(filename)s:%(lineno)d | %(message)s'
 )
@@ -13,4 +12,4 @@
     datefmt='%Y-%m-%d %H:%M:%S',
 )
 
-logger = logging.getLogger('floware')
+logger = logging.getLogger('flo_ai')

From a8d9ce792737240bb49e0b2a64a9fcd851ce168a Mon Sep 17 00:00:00 2001
From: vizsatiz <satis.vishnu@gmail.com>
Date: Wed, 15 Oct 2025 21:49:31 +0530
Subject: [PATCH 2/5] Fix for tests

---
 flo_ai/examples/tool_usage.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/flo_ai/examples/tool_usage.py b/flo_ai/examples/tool_usage.py
index 5ba7cdb1..c4f353a5 100644
--- a/flo_ai/examples/tool_usage.py
+++ b/flo_ai/examples/tool_usage.py
@@ -120,14 +120,14 @@ async def calculate(operation: str, x: float, y: float) -> float:
 
 # Run the examples
 if __name__ == '__main__':
-    # print('Testing conversational agent...\n')
-    # asyncio.run(test_conversational())
+    print('Testing conversational agent...\n')
+    asyncio.run(test_conversational())
 
-    # print('\nTesting tool agent...\n')
-    # asyncio.run(test_tool_agent())
+    print('\nTesting tool agent...\n')
+    asyncio.run(test_tool_agent())
 
-    # print('\nTesting error handling...\n')
-    # asyncio.run(test_error_handling())
+    print('\nTesting error handling...\n')
+    asyncio.run(test_error_handling())
 
     print('\nTesting direct reasoning...\n')
     asyncio.run(test_direct_reasoning())

From 49270f8c750e99626ee3dfdd3f40c4c05bb439c6 Mon Sep 17 00:00:00 2001
From: vizsatiz <satis.vishnu@gmail.com>
Date: Wed, 15 Oct 2025 22:02:59 +0530
Subject: [PATCH 3/5] Testing with more tools

---
 flo_ai/examples/multi_tool_example.py |  2 +-
 flo_ai/flo_ai/utils/logger.py         | 17 +++++++++++------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/flo_ai/examples/multi_tool_example.py b/flo_ai/examples/multi_tool_example.py
index 7be1464e..f5926291 100644
--- a/flo_ai/examples/multi_tool_example.py
+++ b/flo_ai/examples/multi_tool_example.py
@@ -132,7 +132,7 @@ async def test_multi_tool_agent(llm: BaseLLM, agent_name: str):
 
 async def main():
     # Test with OpenAI
-    openai_llm = OpenAI(model='gpt-4-turbo-preview', temperature=0.7)
+    openai_llm = OpenAI(model='gpt-4o-mini', temperature=0.7)
     await test_multi_tool_agent(openai_llm, 'OpenAI Multi-Tool Agent')
 
     # Test with Claude
diff --git a/flo_ai/flo_ai/utils/logger.py b/flo_ai/flo_ai/utils/logger.py
index a90d09a1..84153e4e 100644
--- a/flo_ai/flo_ai/utils/logger.py
+++ b/flo_ai/flo_ai/utils/logger.py
@@ -6,10 +6,15 @@
     '%(asctime)s | %(levelname)-8s | %(name)s | %(filename)s:%(lineno)d | %(message)s'
 )
 
-logging.basicConfig(
-    level=log_level,
-    format=log_format,
-    datefmt='%Y-%m-%d %H:%M:%S',
-)
-
 logger = logging.getLogger('flo_ai')
+logger.setLevel(log_level)
+
+# Prevent affecting the root logger
+if not logger.handlers:
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter(log_format, datefmt='%Y-%m-%d %H:%M:%S')
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+
+# Optional: stop logs from propagating to the root logger
+logger.propagate = False

From 1ed323cb156fe2ef3e16739f5849db13be60f2c5 Mon Sep 17 00:00:00 2001
From: vizsatiz <satis.vishnu@gmail.com>
Date: Thu, 16 Oct 2025 10:06:27 +0530
Subject: [PATCH 4/5] fix(prompt):brought back what was deleted

---
 flo_ai/flo_ai/models/agent.py   | 13 +++++++++++++
 flo_ai/flo_ai/tool/base_tool.py |  1 +
 2 files changed, 14 insertions(+)

diff --git a/flo_ai/flo_ai/models/agent.py b/flo_ai/flo_ai/models/agent.py
index a1fa1800..bc9eab92 100644
--- a/flo_ai/flo_ai/models/agent.py
+++ b/flo_ai/flo_ai/models/agent.py
@@ -354,6 +354,12 @@ def _get_react_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str:
             Available tools:
             {tools_desc}
 
+            Remember to:
+            1. Think carefully about what needs to be done
+            2. Use tools when needed
+            3. Make observations about tool results
+            4. Conclude with a final answer when the task is complete
+
             IMPORTANT: When you have enough information to answer the user's question, you MUST prefix your response with "Final Answer:" to indicate completion."""
 
         return react_prompt
@@ -381,6 +387,13 @@ def _get_cot_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str:
             Available tools:
             {tools_desc}
 
+            Remember to:
+            1. Break down complex problems into smaller steps
+            2. Think through each step logically
+            3. Use tools when needed to gather information
+            4. Provide clear reasoning for your conclusions
+            5. End with a final, well-justified answer
+
             IMPORTANT: When you have gathered all necessary information and are ready to provide your complete answer, you MUST prefix your response with "Final Answer:" to indicate completion."""
 
         return cot_prompt
diff --git a/flo_ai/flo_ai/tool/base_tool.py b/flo_ai/flo_ai/tool/base_tool.py
index a2b338d7..7d182982 100644
--- a/flo_ai/flo_ai/tool/base_tool.py
+++ b/flo_ai/flo_ai/tool/base_tool.py
@@ -37,6 +37,7 @@ async def execute(self, **kwargs) -> Any:
             logger.info(f'Tool {self.name} returned: {tool_result}')
             return tool_result
         except Exception as e:
+            logger.error(f'Error executing tool {self.name}: {str(e)}', exc_info=True)
             raise ToolExecutionError(
                 f'Error executing tool {self.name}: {str(e)}', original_error=e
             )

From 00857e7fb9de96640b6e971da4e830bf7ee90823 Mon Sep 17 00:00:00 2001
From: vizsatiz <satis.vishnu@gmail.com>
Date: Thu, 16 Oct 2025 10:22:22 +0530
Subject: [PATCH 5/5] Max tool count change

---
 flo_ai/flo_ai/arium/arium.py       | 2 +-
 flo_ai/flo_ai/models/agent.py      | 6 +++---
 flo_ai/flo_ai/models/base_agent.py | 2 ++
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/flo_ai/flo_ai/arium/arium.py b/flo_ai/flo_ai/arium/arium.py
index a86243a6..8ff2083e 100644
--- a/flo_ai/flo_ai/arium/arium.py
+++ b/flo_ai/flo_ai/arium/arium.py
@@ -381,7 +381,7 @@ async def _execute_node(
             )
 
             # Re-raise the exception
-            raise
+            raise e
 
     def _add_to_memory(self, result: str):
         # TODO result will be None for start and end nodes
diff --git a/flo_ai/flo_ai/models/agent.py b/flo_ai/flo_ai/models/agent.py
index bc9eab92..97719dbc 100644
--- a/flo_ai/flo_ai/models/agent.py
+++ b/flo_ai/flo_ai/models/agent.py
@@ -22,6 +22,7 @@ def __init__(
         llm: BaseLLM,
         tools: Optional[List[Tool]] = None,
         max_retries: int = 3,
+        max_tool_calls: int = 5,
         reasoning_pattern: ReasoningPattern = ReasoningPattern.DIRECT,
         output_schema: Optional[Dict[str, Any]] = None,
         role: Optional[str] = None,
@@ -40,6 +41,7 @@ def __init__(
             agent_type=agent_type,
             llm=llm,
             max_retries=max_retries,
+            max_tool_calls=max_tool_calls,
         )
         self.tools = tools or []
         self.tools_dict = {tool.name: tool for tool in self.tools}
@@ -192,10 +194,8 @@ async def _run_with_tools(
                 ] + self.conversation_history
 
                 # Keep executing tools until we get a final answer
-                max_tool_calls = 5  # Limit the number of tool calls per query
                 tool_call_count = 0
-
-                while tool_call_count < max_tool_calls:
+                while tool_call_count < self.max_tool_calls:
                     formatted_tools = self.llm.format_tools_for_llm(self.tools)
                     response = await self.llm.generate(
                         messages,
diff --git a/flo_ai/flo_ai/models/base_agent.py b/flo_ai/flo_ai/models/base_agent.py
index ab506b46..96c984b2 100644
--- a/flo_ai/flo_ai/models/base_agent.py
+++ b/flo_ai/flo_ai/models/base_agent.py
@@ -23,12 +23,14 @@ def __init__(
         agent_type: AgentType,
         llm: BaseLLM,
         max_retries: int = 3,
+        max_tool_calls: int = 5,
     ):
         self.name = name
         self.system_prompt = system_prompt
         self.agent_type = agent_type
         self.llm = llm
         self.max_retries = max_retries
+        self.max_tool_calls = max_tool_calls
         self.resolved_variables = False
         self.conversation_history: List[Dict[str, str]] = []