From 08f9006ac41cb284dd416102fc78458066d6f9f9 Mon Sep 17 00:00:00 2001 From: vizsatiz Date: Wed, 15 Oct 2025 15:38:41 +0530 Subject: [PATCH 1/5] fix(agent): Improve agent execution algorithm --- flo_ai/examples/tool_usage.py | 14 ++-- flo_ai/flo_ai/models/agent.py | 142 ++++++++++++++++++++++++++++++---- flo_ai/flo_ai/utils/logger.py | 5 +- 3 files changed, 137 insertions(+), 24 deletions(-) diff --git a/flo_ai/examples/tool_usage.py b/flo_ai/examples/tool_usage.py index 0d9cbbbd..5ba7cdb1 100644 --- a/flo_ai/examples/tool_usage.py +++ b/flo_ai/examples/tool_usage.py @@ -108,7 +108,7 @@ async def calculate(operation: str, x: float, y: float) -> float: llm = OpenAI(model='gpt-3.5-turbo', temperature=0.7) agent = ToolAgent( name='CalculatorAssistant', - system_prompt='You are a helpful calculator assistant. Use the calculator tool directly without explanation.', + system_prompt='You are a helpful calculator assistant. Use the calculator tool if needed and answer the question asked.', llm=llm, tools=[calculator_tool], reasoning_pattern=ReasoningPattern.DIRECT, @@ -120,14 +120,14 @@ async def calculate(operation: str, x: float, y: float) -> float: # Run the examples if __name__ == '__main__': - print('Testing conversational agent...\n') - asyncio.run(test_conversational()) + # print('Testing conversational agent...\n') + # asyncio.run(test_conversational()) - print('\nTesting tool agent...\n') - asyncio.run(test_tool_agent()) + # print('\nTesting tool agent...\n') + # asyncio.run(test_tool_agent()) - print('\nTesting error handling...\n') - asyncio.run(test_error_handling()) + # print('\nTesting error handling...\n') + # asyncio.run(test_error_handling()) print('\nTesting direct reasoning...\n') asyncio.run(test_direct_reasoning()) diff --git a/flo_ai/flo_ai/models/agent.py b/flo_ai/flo_ai/models/agent.py index 0cd9243f..a1fa1800 100644 --- a/flo_ai/flo_ai/models/agent.py +++ b/flo_ai/flo_ai/models/agent.py @@ -206,12 +206,37 @@ async def _run_with_tools( # Handle ReACT and CoT patterns function_call = await self.llm.get_function_call(response) - # If no function call, we have our final answer + # If no function call, check if this is truly a final answer if not function_call: assistant_message = self.llm.get_message_content(response) if assistant_message: - self.add_to_history('assistant', assistant_message) - return assistant_message + # Check if this is a final answer or just intermediate reasoning + is_final = await self._is_final_answer( + assistant_message, tool_call_count, messages + ) + if is_final: + self.add_to_history('assistant', assistant_message) + return assistant_message + else: + # This is intermediate reasoning, add to context and continue + logger.debug( + f'Detected intermediate reasoning (not final answer): {assistant_message[:100]}...' + ) + self.add_to_history('assistant', assistant_message) + messages.append( + { + 'role': 'assistant', + 'content': assistant_message, + } + ) + # Prompt the agent to take action + messages.append( + { + 'role': 'user', + 'content': 'Based on your reasoning, please proceed with the necessary tool calls to complete the task.', + } + ) + continue break # Execute the tool @@ -324,15 +349,12 @@ def _get_react_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str: Action: Use available tools in the format: tool_name(param1: "value1", param2: "value2") Observation: The result of the action ... (repeat Thought/Action/Observation if needed) + Final Answer: [Your complete answer to the user's question] Available tools: {tools_desc} - Remember to: - 1. Think carefully about what needs to be done - 2. Use tools when needed - 3. Make observations about tool results - 4. Conclude with a final answer when the task is complete""" + IMPORTANT: When you have enough information to answer the user's question, you MUST prefix your response with "Final Answer:" to indicate completion.""" return react_prompt @@ -359,11 +381,103 @@ def _get_cot_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str: Available tools: {tools_desc} - Remember to: - 1. Break down complex problems into smaller steps - 2. Think through each step logically - 3. Use tools when needed to gather information - 4. Provide clear reasoning for your conclusions - 5. End with a final, well-justified answer""" + IMPORTANT: When you have gathered all necessary information and are ready to provide your complete answer, you MUST prefix your response with "Final Answer:" to indicate completion.""" return cot_prompt + + async def _is_final_answer( + self, message: str, tool_call_count: int, messages: List[Dict[str, Any]] + ) -> bool: + """ + Determine if a message is a final answer or intermediate reasoning. + Uses structured token detection (like LangChain's ReAct) with LLM fallback. + + Approach inspired by LangChain/CrewAI: + 1. Primary: Check for explicit "Final Answer:" token + 2. Fallback: Use LLM-based classification for robustness + """ + message_stripped = message.strip() + message_lower = message_stripped.lower() + + # Primary Detection: Explicit "Final Answer:" token (ReAct pattern) + # This is the most reliable method used by LangChain and similar frameworks + if message_stripped.startswith('Final Answer:') or message_lower.startswith( + 'final answer:' + ): + logger.debug('Explicit "Final Answer:" token detected - this is final') + return True + + # Check if "Final Answer:" appears anywhere in the response + # (agent might add context before the token) + if 'final answer:' in message_lower: + logger.debug('"Final Answer:" token found in response - treating as final') + return True + + # Secondary Detection: Use LLM-based analysis for cases without explicit tokens + # This handles: + # - Agents not following the format perfectly + # - Direct mode (without ReAct/CoT patterns) + # - Edge cases where the agent provides answer without token + + analysis_prompt = f"""You are a classifier that determines if an AI agent's response is a FINAL ANSWER or INTERMEDIATE REASONING. + +Agent's Response: +"{message_stripped}" + +Context: +- Tool calls executed so far: {tool_call_count} +- Total conversation turns: {len(messages)} + +Classification Criteria: + +FINAL ANSWER - The response is final if it: +✓ Directly answers the user's original question with concrete information +✓ Provides specific data, results, or conclusions +✓ Does not suggest or request additional actions +✓ Reads like a complete, standalone answer +✓ Contains synthesis of information already gathered + +INTERMEDIATE REASONING - The response is intermediate if it: +✗ Describes plans or intentions for what to do next +✗ Expresses need to gather more information +✗ Contains thinking/reasoning WITHOUT providing the actual answer +✗ Poses questions or expresses uncertainty about next steps +✗ Mentions specific tools it wants to use + +Examples of INTERMEDIATE: +- "I need to query the database schema first" +- "Let me check the table structure" +- "First, I should examine..." + +Examples of FINAL: +- "Based on the query results, the table contains 1,245 records..." +- "The analysis shows that revenue increased by 23%..." +- "After examining the data, the answer is..." + +Respond with EXACTLY one word: "FINAL" or "INTERMEDIATE" +""" + + try: + analysis_messages = [ + { + 'role': 'system', + 'content': 'You are a precise classification system. Respond with only FINAL or INTERMEDIATE.', + }, + {'role': 'user', 'content': analysis_prompt}, + ] + analysis_response = await self.llm.generate(analysis_messages) + analysis = self.llm.get_message_content(analysis_response).strip().upper() + + is_final = 'FINAL' in analysis + logger.debug( + f'LLM classifier: "{analysis}" -> is_final={is_final} (message preview: "{message_stripped[:80]}...")' + ) + return is_final + + except Exception as e: + logger.warning( + f'LLM classification failed: {e}. Defaulting to final=False to allow continuation.' + ) + # Conservative default: treat as intermediate to avoid premature exit + # This is safer as it allows the agent to continue rather than stopping too early + return False diff --git a/flo_ai/flo_ai/utils/logger.py b/flo_ai/flo_ai/utils/logger.py index a6ebfbc4..a90d09a1 100644 --- a/flo_ai/flo_ai/utils/logger.py +++ b/flo_ai/flo_ai/utils/logger.py @@ -1,8 +1,7 @@ import logging import os -log_level = os.environ.get('LOG_LEVEL', 'INFO') -logging.getLogger('uvicorn').setLevel(log_level) +log_level = os.environ.get('FLO_AI_LOG_LEVEL', 'INFO') log_format = ( '%(asctime)s | %(levelname)-8s | %(name)s | %(filename)s:%(lineno)d | %(message)s' ) @@ -13,4 +12,4 @@ datefmt='%Y-%m-%d %H:%M:%S', ) -logger = logging.getLogger('floware') +logger = logging.getLogger('flo_ai') From a8d9ce792737240bb49e0b2a64a9fcd851ce168a Mon Sep 17 00:00:00 2001 From: vizsatiz Date: Wed, 15 Oct 2025 21:49:31 +0530 Subject: [PATCH 2/5] Fix for tests --- flo_ai/examples/tool_usage.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flo_ai/examples/tool_usage.py b/flo_ai/examples/tool_usage.py index 5ba7cdb1..c4f353a5 100644 --- a/flo_ai/examples/tool_usage.py +++ b/flo_ai/examples/tool_usage.py @@ -120,14 +120,14 @@ async def calculate(operation: str, x: float, y: float) -> float: # Run the examples if __name__ == '__main__': - # print('Testing conversational agent...\n') - # asyncio.run(test_conversational()) + print('Testing conversational agent...\n') + asyncio.run(test_conversational()) - # print('\nTesting tool agent...\n') - # asyncio.run(test_tool_agent()) + print('\nTesting tool agent...\n') + asyncio.run(test_tool_agent()) - # print('\nTesting error handling...\n') - # asyncio.run(test_error_handling()) + print('\nTesting error handling...\n') + asyncio.run(test_error_handling()) print('\nTesting direct reasoning...\n') asyncio.run(test_direct_reasoning()) From 49270f8c750e99626ee3dfdd3f40c4c05bb439c6 Mon Sep 17 00:00:00 2001 From: vizsatiz Date: Wed, 15 Oct 2025 22:02:59 +0530 Subject: [PATCH 3/5] Testing with more tools --- flo_ai/examples/multi_tool_example.py | 2 +- flo_ai/flo_ai/utils/logger.py | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/flo_ai/examples/multi_tool_example.py b/flo_ai/examples/multi_tool_example.py index 7be1464e..f5926291 100644 --- a/flo_ai/examples/multi_tool_example.py +++ b/flo_ai/examples/multi_tool_example.py @@ -132,7 +132,7 @@ async def test_multi_tool_agent(llm: BaseLLM, agent_name: str): async def main(): # Test with OpenAI - openai_llm = OpenAI(model='gpt-4-turbo-preview', temperature=0.7) + openai_llm = OpenAI(model='gpt-4o-mini', temperature=0.7) await test_multi_tool_agent(openai_llm, 'OpenAI Multi-Tool Agent') # Test with Claude diff --git a/flo_ai/flo_ai/utils/logger.py b/flo_ai/flo_ai/utils/logger.py index a90d09a1..84153e4e 100644 --- a/flo_ai/flo_ai/utils/logger.py +++ b/flo_ai/flo_ai/utils/logger.py @@ -6,10 +6,15 @@ '%(asctime)s | %(levelname)-8s | %(name)s | %(filename)s:%(lineno)d | %(message)s' ) -logging.basicConfig( - level=log_level, - format=log_format, - datefmt='%Y-%m-%d %H:%M:%S', -) - logger = logging.getLogger('flo_ai') +logger.setLevel(log_level) + +# Prevent affecting the root logger +if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter(log_format, datefmt='%Y-%m-%d %H:%M:%S') + handler.setFormatter(formatter) + logger.addHandler(handler) + +# Optional: stop logs from propagating to the root logger +logger.propagate = False From 1ed323cb156fe2ef3e16739f5849db13be60f2c5 Mon Sep 17 00:00:00 2001 From: vizsatiz Date: Thu, 16 Oct 2025 10:06:27 +0530 Subject: [PATCH 4/5] fix(prompt):brought back what was deleted --- flo_ai/flo_ai/models/agent.py | 13 +++++++++++++ flo_ai/flo_ai/tool/base_tool.py | 1 + 2 files changed, 14 insertions(+) diff --git a/flo_ai/flo_ai/models/agent.py b/flo_ai/flo_ai/models/agent.py index a1fa1800..bc9eab92 100644 --- a/flo_ai/flo_ai/models/agent.py +++ b/flo_ai/flo_ai/models/agent.py @@ -354,6 +354,12 @@ def _get_react_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str: Available tools: {tools_desc} + Remember to: + 1. Think carefully about what needs to be done + 2. Use tools when needed + 3. Make observations about tool results + 4. Conclude with a final answer when the task is complete + IMPORTANT: When you have enough information to answer the user's question, you MUST prefix your response with "Final Answer:" to indicate completion.""" return react_prompt @@ -381,6 +387,13 @@ def _get_cot_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str: Available tools: {tools_desc} + Remember to: + 1. Break down complex problems into smaller steps + 2. Think through each step logically + 3. Use tools when needed to gather information + 4. Provide clear reasoning for your conclusions + 5. End with a final, well-justified answer + IMPORTANT: When you have gathered all necessary information and are ready to provide your complete answer, you MUST prefix your response with "Final Answer:" to indicate completion.""" return cot_prompt diff --git a/flo_ai/flo_ai/tool/base_tool.py b/flo_ai/flo_ai/tool/base_tool.py index a2b338d7..7d182982 100644 --- a/flo_ai/flo_ai/tool/base_tool.py +++ b/flo_ai/flo_ai/tool/base_tool.py @@ -37,6 +37,7 @@ async def execute(self, **kwargs) -> Any: logger.info(f'Tool {self.name} returned: {tool_result}') return tool_result except Exception as e: + logger.error(f'Error executing tool {self.name}: {str(e)}', exc_info=True) raise ToolExecutionError( f'Error executing tool {self.name}: {str(e)}', original_error=e ) From 00857e7fb9de96640b6e971da4e830bf7ee90823 Mon Sep 17 00:00:00 2001 From: vizsatiz Date: Thu, 16 Oct 2025 10:22:22 +0530 Subject: [PATCH 5/5] Max tool count change --- flo_ai/flo_ai/arium/arium.py | 2 +- flo_ai/flo_ai/models/agent.py | 6 +++--- flo_ai/flo_ai/models/base_agent.py | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/flo_ai/flo_ai/arium/arium.py b/flo_ai/flo_ai/arium/arium.py index a86243a6..8ff2083e 100644 --- a/flo_ai/flo_ai/arium/arium.py +++ b/flo_ai/flo_ai/arium/arium.py @@ -381,7 +381,7 @@ async def _execute_node( ) # Re-raise the exception - raise + raise e def _add_to_memory(self, result: str): # TODO result will be None for start and end nodes diff --git a/flo_ai/flo_ai/models/agent.py b/flo_ai/flo_ai/models/agent.py index bc9eab92..97719dbc 100644 --- a/flo_ai/flo_ai/models/agent.py +++ b/flo_ai/flo_ai/models/agent.py @@ -22,6 +22,7 @@ def __init__( llm: BaseLLM, tools: Optional[List[Tool]] = None, max_retries: int = 3, + max_tool_calls: int = 5, reasoning_pattern: ReasoningPattern = ReasoningPattern.DIRECT, output_schema: Optional[Dict[str, Any]] = None, role: Optional[str] = None, @@ -40,6 +41,7 @@ def __init__( agent_type=agent_type, llm=llm, max_retries=max_retries, + max_tool_calls=max_tool_calls, ) self.tools = tools or [] self.tools_dict = {tool.name: tool for tool in self.tools} @@ -192,10 +194,8 @@ async def _run_with_tools( ] + self.conversation_history # Keep executing tools until we get a final answer - max_tool_calls = 5 # Limit the number of tool calls per query tool_call_count = 0 - - while tool_call_count < max_tool_calls: + while tool_call_count < self.max_tool_calls: formatted_tools = self.llm.format_tools_for_llm(self.tools) response = await self.llm.generate( messages, diff --git a/flo_ai/flo_ai/models/base_agent.py b/flo_ai/flo_ai/models/base_agent.py index ab506b46..96c984b2 100644 --- a/flo_ai/flo_ai/models/base_agent.py +++ b/flo_ai/flo_ai/models/base_agent.py @@ -23,12 +23,14 @@ def __init__( agent_type: AgentType, llm: BaseLLM, max_retries: int = 3, + max_tool_calls: int = 5, ): self.name = name self.system_prompt = system_prompt self.agent_type = agent_type self.llm = llm self.max_retries = max_retries + self.max_tool_calls = max_tool_calls self.resolved_variables = False self.conversation_history: List[Dict[str, str]] = []