rootflo · vizsatiz · Oct 19, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/flo_ai/examples/multi_tool_example.py b/flo_ai/examples/multi_tool_example.py
@@ -132,7 +132,7 @@ async def test_multi_tool_agent(llm: BaseLLM, agent_name: str):
 
 async def main():
     # Test with OpenAI
-    openai_llm = OpenAI(model='gpt-4-turbo-preview', temperature=0.7)
+    openai_llm = OpenAI(model='gpt-4o-mini', temperature=0.7)
     await test_multi_tool_agent(openai_llm, 'OpenAI Multi-Tool Agent')
 
     # Test with Claude

diff --git a/flo_ai/examples/tool_usage.py b/flo_ai/examples/tool_usage.py
@@ -108,7 +108,7 @@ async def calculate(operation: str, x: float, y: float) -> float:
     llm = OpenAI(model='gpt-3.5-turbo', temperature=0.7)
     agent = ToolAgent(
         name='CalculatorAssistant',
-        system_prompt='You are a helpful calculator assistant. Use the calculator tool directly without explanation.',
+        system_prompt='You are a helpful calculator assistant. Use the calculator tool if needed and answer the question asked.',
         llm=llm,
         tools=[calculator_tool],
         reasoning_pattern=ReasoningPattern.DIRECT,

diff --git a/flo_ai/flo_ai/arium/arium.py b/flo_ai/flo_ai/arium/arium.py
@@ -381,7 +381,7 @@ async def _execute_node(
             )
 
             # Re-raise the exception
-            raise
+            raise e
 
     def _add_to_memory(self, result: str):
         # TODO result will be None for start and end nodes

diff --git a/flo_ai/flo_ai/models/agent.py b/flo_ai/flo_ai/models/agent.py
@@ -22,6 +22,7 @@ def __init__(
         llm: BaseLLM,
         tools: Optional[List[Tool]] = None,
         max_retries: int = 3,
+        max_tool_calls: int = 5,
         reasoning_pattern: ReasoningPattern = ReasoningPattern.DIRECT,
         output_schema: Optional[Dict[str, Any]] = None,
         role: Optional[str] = None,
@@ -40,6 +41,7 @@ def __init__(
             agent_type=agent_type,
             llm=llm,
             max_retries=max_retries,
+            max_tool_calls=max_tool_calls,
         )
         self.tools = tools or []
         self.tools_dict = {tool.name: tool for tool in self.tools}
@@ -192,10 +194,8 @@ async def _run_with_tools(
                 ] + self.conversation_history
 
                 # Keep executing tools until we get a final answer
-                max_tool_calls = 5  # Limit the number of tool calls per query
                 tool_call_count = 0
-
-                while tool_call_count < max_tool_calls:
+                while tool_call_count < self.max_tool_calls:
                     formatted_tools = self.llm.format_tools_for_llm(self.tools)
                     response = await self.llm.generate(
                         messages,
@@ -206,12 +206,37 @@ async def _run_with_tools(
                     # Handle ReACT and CoT patterns
                     function_call = await self.llm.get_function_call(response)
 
-                    # If no function call, we have our final answer
+                    # If no function call, check if this is truly a final answer
                     if not function_call:
                         assistant_message = self.llm.get_message_content(response)
                         if assistant_message:
-                            self.add_to_history('assistant', assistant_message)
-                            return assistant_message
+                            # Check if this is a final answer or just intermediate reasoning
+                            is_final = await self._is_final_answer(
+                                assistant_message, tool_call_count, messages
+                            )
+                            if is_final:
+                                self.add_to_history('assistant', assistant_message)
+                                return assistant_message
+                            else:
+                                # This is intermediate reasoning, add to context and continue
+                                logger.debug(
+                                    f'Detected intermediate reasoning (not final answer): {assistant_message[:100]}...'
+                                )
+                                self.add_to_history('assistant', assistant_message)
+                                messages.append(
+                                    {
+                                        'role': 'assistant',
+                                        'content': assistant_message,
+                                    }
+                                )
+                                # Prompt the agent to take action
+                                messages.append(
+                                    {
+                                        'role': 'user',
+                                        'content': 'Based on your reasoning, please proceed with the necessary tool calls to complete the task.',
+                                    }
+                                )
+                                continue
                         break
 
                     # Execute the tool
@@ -324,6 +349,7 @@ def _get_react_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str:
             Action: Use available tools in the format: tool_name(param1: "value1", param2: "value2")
             Observation: The result of the action
             ... (repeat Thought/Action/Observation if needed)
+            Final Answer: [Your complete answer to the user's question]
 
             Available tools:
             {tools_desc}
@@ -332,7 +358,9 @@ def _get_react_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str:
             1. Think carefully about what needs to be done
             2. Use tools when needed
             3. Make observations about tool results
-            4. Conclude with a final answer when the task is complete"""
+            4. Conclude with a final answer when the task is complete
+
+            IMPORTANT: When you have enough information to answer the user's question, you MUST prefix your response with "Final Answer:" to indicate completion."""
 
         return react_prompt
 
@@ -364,6 +392,105 @@ def _get_cot_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str:
             2. Think through each step logically
             3. Use tools when needed to gather information
             4. Provide clear reasoning for your conclusions
-            5. End with a final, well-justified answer"""
+            5. End with a final, well-justified answer
+
+            IMPORTANT: When you have gathered all necessary information and are ready to provide your complete answer, you MUST prefix your response with "Final Answer:" to indicate completion."""
 
         return cot_prompt
+
+    async def _is_final_answer(
+        self, message: str, tool_call_count: int, messages: List[Dict[str, Any]]
+    ) -> bool:
+        """
+        Determine if a message is a final answer or intermediate reasoning.
+        Uses structured token detection (like LangChain's ReAct) with LLM fallback.
+
+        Approach inspired by LangChain/CrewAI:
+        1. Primary: Check for explicit "Final Answer:" token
+        2. Fallback: Use LLM-based classification for robustness
+        """
+        message_stripped = message.strip()
+        message_lower = message_stripped.lower()
+
+        # Primary Detection: Explicit "Final Answer:" token (ReAct pattern)
+        # This is the most reliable method used by LangChain and similar frameworks
+        if message_stripped.startswith('Final Answer:') or message_lower.startswith(
+            'final answer:'
+        ):
+            logger.debug('Explicit "Final Answer:" token detected - this is final')
+            return True
+
+        # Check if "Final Answer:" appears anywhere in the response
+        # (agent might add context before the token)
+        if 'final answer:' in message_lower:
+            logger.debug('"Final Answer:" token found in response - treating as final')
+            return True
+
+        # Secondary Detection: Use LLM-based analysis for cases without explicit tokens
+        # This handles:
+        # - Agents not following the format perfectly
+        # - Direct mode (without ReAct/CoT patterns)
+        # - Edge cases where the agent provides answer without token
+
+        analysis_prompt = f"""You are a classifier that determines if an AI agent's response is a FINAL ANSWER or INTERMEDIATE REASONING.
+
+Agent's Response:
+"{message_stripped}"
+
+Context:
+- Tool calls executed so far: {tool_call_count}
+- Total conversation turns: {len(messages)}
+
+Classification Criteria:
+
+FINAL ANSWER - The response is final if it:
+✓ Directly answers the user's original question with concrete information
+✓ Provides specific data, results, or conclusions
+✓ Does not suggest or request additional actions
+✓ Reads like a complete, standalone answer
+✓ Contains synthesis of information already gathered
+
+INTERMEDIATE REASONING - The response is intermediate if it:
+✗ Describes plans or intentions for what to do next
+✗ Expresses need to gather more information
+✗ Contains thinking/reasoning WITHOUT providing the actual answer
+✗ Poses questions or expresses uncertainty about next steps
+✗ Mentions specific tools it wants to use
+
+Examples of INTERMEDIATE:
+- "I need to query the database schema first"
+- "Let me check the table structure"
+- "First, I should examine..."
+
+Examples of FINAL:
+- "Based on the query results, the table contains 1,245 records..."
+- "The analysis shows that revenue increased by 23%..."
+- "After examining the data, the answer is..."
+
+Respond with EXACTLY one word: "FINAL" or "INTERMEDIATE"
+"""
+
+        try:
+            analysis_messages = [
+                {
+                    'role': 'system',
+                    'content': 'You are a precise classification system. Respond with only FINAL or INTERMEDIATE.',
+                },
+                {'role': 'user', 'content': analysis_prompt},
+            ]
+            analysis_response = await self.llm.generate(analysis_messages)
+            analysis = self.llm.get_message_content(analysis_response).strip().upper()
+
+            is_final = 'FINAL' in analysis
+            logger.debug(
+                f'LLM classifier: "{analysis}" -> is_final={is_final} (message preview: "{message_stripped[:80]}...")'
+            )
+            return is_final
+
+        except Exception as e:
+            logger.warning(
+                f'LLM classification failed: {e}. Defaulting to final=False to allow continuation.'
+            )
+            # Conservative default: treat as intermediate to avoid premature exit
+            # This is safer as it allows the agent to continue rather than stopping too early
+            return False
diff --git a/flo_ai/flo_ai/models/base_agent.py b/flo_ai/flo_ai/models/base_agent.py
@@ -23,12 +23,14 @@ def __init__(
         agent_type: AgentType,
         llm: BaseLLM,
         max_retries: int = 3,
+        max_tool_calls: int = 5,
     ):
         self.name = name
         self.system_prompt = system_prompt
         self.agent_type = agent_type
         self.llm = llm
         self.max_retries = max_retries
+        self.max_tool_calls = max_tool_calls
         self.resolved_variables = False
         self.conversation_history: List[Dict[str, str]] = []
 

diff --git a/flo_ai/flo_ai/tool/base_tool.py b/flo_ai/flo_ai/tool/base_tool.py
@@ -37,6 +37,7 @@ async def execute(self, **kwargs) -> Any:
             logger.info(f'Tool {self.name} returned: {tool_result}')
             return tool_result
         except Exception as e:
+            logger.error(f'Error executing tool {self.name}: {str(e)}', exc_info=True)
             raise ToolExecutionError(
                 f'Error executing tool {self.name}: {str(e)}', original_error=e
             )
diff --git a/flo_ai/flo_ai/utils/logger.py b/flo_ai/flo_ai/utils/logger.py
@@ -1,16 +1,20 @@
 import logging
 import os
 
-log_level = os.environ.get('LOG_LEVEL', 'INFO')
-logging.getLogger('uvicorn').setLevel(log_level)
+log_level = os.environ.get('FLO_AI_LOG_LEVEL', 'INFO')
 log_format = (
     '%(asctime)s | %(levelname)-8s | %(name)s | %(filename)s:%(lineno)d | %(message)s'
 )
 
-logging.basicConfig(
-    level=log_level,
-    format=log_format,
-    datefmt='%Y-%m-%d %H:%M:%S',
-)
+logger = logging.getLogger('flo_ai')
+logger.setLevel(log_level)
+
+# Prevent affecting the root logger
+if not logger.handlers:
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter(log_format, datefmt='%Y-%m-%d %H:%M:%S')
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
 
-logger = logging.getLogger('floware')
+# Optional: stop logs from propagating to the root logger
+logger.propagate = False
-Original file line number
+Diff line change
@@ Expand Up / @@ -381,7 +381,7 @@ async def _execute_node( @@
                 )
                 # Re-raise the exception
-                raise
+                raise e
         def _add_to_memory(self, result: str):
             # TODO result will be None for start and end nodes
@@ Expand Down @@