Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion flo_ai/examples/multi_tool_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ async def test_multi_tool_agent(llm: BaseLLM, agent_name: str):

async def main():
# Test with OpenAI
openai_llm = OpenAI(model='gpt-4-turbo-preview', temperature=0.7)
openai_llm = OpenAI(model='gpt-4o-mini', temperature=0.7)
await test_multi_tool_agent(openai_llm, 'OpenAI Multi-Tool Agent')

# Test with Claude
Expand Down
2 changes: 1 addition & 1 deletion flo_ai/examples/tool_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ async def calculate(operation: str, x: float, y: float) -> float:
llm = OpenAI(model='gpt-3.5-turbo', temperature=0.7)
agent = ToolAgent(
name='CalculatorAssistant',
system_prompt='You are a helpful calculator assistant. Use the calculator tool directly without explanation.',
system_prompt='You are a helpful calculator assistant. Use the calculator tool if needed and answer the question asked.',
llm=llm,
tools=[calculator_tool],
reasoning_pattern=ReasoningPattern.DIRECT,
Expand Down
2 changes: 1 addition & 1 deletion flo_ai/flo_ai/arium/arium.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ async def _execute_node(
)

# Re-raise the exception
raise
raise e

def _add_to_memory(self, result: str):
# TODO result will be None for start and end nodes
Expand Down
143 changes: 135 additions & 8 deletions flo_ai/flo_ai/models/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(
llm: BaseLLM,
tools: Optional[List[Tool]] = None,
max_retries: int = 3,
max_tool_calls: int = 5,
reasoning_pattern: ReasoningPattern = ReasoningPattern.DIRECT,
output_schema: Optional[Dict[str, Any]] = None,
role: Optional[str] = None,
Expand All @@ -40,6 +41,7 @@ def __init__(
agent_type=agent_type,
llm=llm,
max_retries=max_retries,
max_tool_calls=max_tool_calls,
)
self.tools = tools or []
self.tools_dict = {tool.name: tool for tool in self.tools}
Expand Down Expand Up @@ -192,10 +194,8 @@ async def _run_with_tools(
] + self.conversation_history

# Keep executing tools until we get a final answer
max_tool_calls = 5 # Limit the number of tool calls per query
tool_call_count = 0

while tool_call_count < max_tool_calls:
while tool_call_count < self.max_tool_calls:
formatted_tools = self.llm.format_tools_for_llm(self.tools)
response = await self.llm.generate(
messages,
Expand All @@ -206,12 +206,37 @@ async def _run_with_tools(
# Handle ReACT and CoT patterns
function_call = await self.llm.get_function_call(response)

# If no function call, we have our final answer
# If no function call, check if this is truly a final answer
if not function_call:
assistant_message = self.llm.get_message_content(response)
if assistant_message:
self.add_to_history('assistant', assistant_message)
return assistant_message
# Check if this is a final answer or just intermediate reasoning
is_final = await self._is_final_answer(
assistant_message, tool_call_count, messages
)
if is_final:
self.add_to_history('assistant', assistant_message)
return assistant_message
else:
# This is intermediate reasoning, add to context and continue
logger.debug(
f'Detected intermediate reasoning (not final answer): {assistant_message[:100]}...'
)
self.add_to_history('assistant', assistant_message)
messages.append(
{
'role': 'assistant',
'content': assistant_message,
}
)
# Prompt the agent to take action
messages.append(
{
'role': 'user',
'content': 'Based on your reasoning, please proceed with the necessary tool calls to complete the task.',
}
)
continue
break

# Execute the tool
Expand Down Expand Up @@ -324,6 +349,7 @@ def _get_react_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str:
Action: Use available tools in the format: tool_name(param1: "value1", param2: "value2")
Observation: The result of the action
... (repeat Thought/Action/Observation if needed)
Final Answer: [Your complete answer to the user's question]

Available tools:
{tools_desc}
Expand All @@ -332,7 +358,9 @@ def _get_react_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str:
1. Think carefully about what needs to be done
2. Use tools when needed
3. Make observations about tool results
4. Conclude with a final answer when the task is complete"""
4. Conclude with a final answer when the task is complete

IMPORTANT: When you have enough information to answer the user's question, you MUST prefix your response with "Final Answer:" to indicate completion."""

return react_prompt

Expand Down Expand Up @@ -364,6 +392,105 @@ def _get_cot_prompt(self, variables: Optional[Dict[str, Any]] = None) -> str:
2. Think through each step logically
3. Use tools when needed to gather information
4. Provide clear reasoning for your conclusions
5. End with a final, well-justified answer"""
5. End with a final, well-justified answer

IMPORTANT: When you have gathered all necessary information and are ready to provide your complete answer, you MUST prefix your response with "Final Answer:" to indicate completion."""

return cot_prompt

async def _is_final_answer(
self, message: str, tool_call_count: int, messages: List[Dict[str, Any]]
) -> bool:
"""
Determine if a message is a final answer or intermediate reasoning.
Uses structured token detection (like LangChain's ReAct) with LLM fallback.

Approach inspired by LangChain/CrewAI:
1. Primary: Check for explicit "Final Answer:" token
2. Fallback: Use LLM-based classification for robustness
"""
message_stripped = message.strip()
message_lower = message_stripped.lower()

# Primary Detection: Explicit "Final Answer:" token (ReAct pattern)
# This is the most reliable method used by LangChain and similar frameworks
if message_stripped.startswith('Final Answer:') or message_lower.startswith(
'final answer:'
):
logger.debug('Explicit "Final Answer:" token detected - this is final')
return True

# Check if "Final Answer:" appears anywhere in the response
# (agent might add context before the token)
if 'final answer:' in message_lower:
logger.debug('"Final Answer:" token found in response - treating as final')
return True

# Secondary Detection: Use LLM-based analysis for cases without explicit tokens
# This handles:
# - Agents not following the format perfectly
# - Direct mode (without ReAct/CoT patterns)
# - Edge cases where the agent provides answer without token

analysis_prompt = f"""You are a classifier that determines if an AI agent's response is a FINAL ANSWER or INTERMEDIATE REASONING.

Agent's Response:
"{message_stripped}"

Context:
- Tool calls executed so far: {tool_call_count}
- Total conversation turns: {len(messages)}

Classification Criteria:

FINAL ANSWER - The response is final if it:
✓ Directly answers the user's original question with concrete information
✓ Provides specific data, results, or conclusions
✓ Does not suggest or request additional actions
✓ Reads like a complete, standalone answer
✓ Contains synthesis of information already gathered

INTERMEDIATE REASONING - The response is intermediate if it:
✗ Describes plans or intentions for what to do next
✗ Expresses need to gather more information
✗ Contains thinking/reasoning WITHOUT providing the actual answer
✗ Poses questions or expresses uncertainty about next steps
✗ Mentions specific tools it wants to use

Examples of INTERMEDIATE:
- "I need to query the database schema first"
- "Let me check the table structure"
- "First, I should examine..."

Examples of FINAL:
- "Based on the query results, the table contains 1,245 records..."
- "The analysis shows that revenue increased by 23%..."
- "After examining the data, the answer is..."

Respond with EXACTLY one word: "FINAL" or "INTERMEDIATE"
"""

try:
analysis_messages = [
{
'role': 'system',
'content': 'You are a precise classification system. Respond with only FINAL or INTERMEDIATE.',
},
{'role': 'user', 'content': analysis_prompt},
]
analysis_response = await self.llm.generate(analysis_messages)
analysis = self.llm.get_message_content(analysis_response).strip().upper()

is_final = 'FINAL' in analysis
logger.debug(
f'LLM classifier: "{analysis}" -> is_final={is_final} (message preview: "{message_stripped[:80]}...")'
)
return is_final

except Exception as e:
logger.warning(
f'LLM classification failed: {e}. Defaulting to final=False to allow continuation.'
)
# Conservative default: treat as intermediate to avoid premature exit
# This is safer as it allows the agent to continue rather than stopping too early
return False
2 changes: 2 additions & 0 deletions flo_ai/flo_ai/models/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ def __init__(
agent_type: AgentType,
llm: BaseLLM,
max_retries: int = 3,
max_tool_calls: int = 5,
):
self.name = name
self.system_prompt = system_prompt
self.agent_type = agent_type
self.llm = llm
self.max_retries = max_retries
self.max_tool_calls = max_tool_calls
self.resolved_variables = False
self.conversation_history: List[Dict[str, str]] = []

Expand Down
1 change: 1 addition & 0 deletions flo_ai/flo_ai/tool/base_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ async def execute(self, **kwargs) -> Any:
logger.info(f'Tool {self.name} returned: {tool_result}')
return tool_result
except Exception as e:
logger.error(f'Error executing tool {self.name}: {str(e)}', exc_info=True)
raise ToolExecutionError(
f'Error executing tool {self.name}: {str(e)}', original_error=e
)
20 changes: 12 additions & 8 deletions flo_ai/flo_ai/utils/logger.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
import logging
import os

log_level = os.environ.get('LOG_LEVEL', 'INFO')
logging.getLogger('uvicorn').setLevel(log_level)
log_level = os.environ.get('FLO_AI_LOG_LEVEL', 'INFO')
log_format = (
'%(asctime)s | %(levelname)-8s | %(name)s | %(filename)s:%(lineno)d | %(message)s'
)

logging.basicConfig(
level=log_level,
format=log_format,
datefmt='%Y-%m-%d %H:%M:%S',
)
logger = logging.getLogger('flo_ai')
logger.setLevel(log_level)

# Prevent affecting the root logger
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter(log_format, datefmt='%Y-%m-%d %H:%M:%S')
handler.setFormatter(formatter)
logger.addHandler(handler)

logger = logging.getLogger('floware')
# Optional: stop logs from propagating to the root logger
logger.propagate = False