Skip to content
5 changes: 5 additions & 0 deletions astrbot/core/astr_main_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
BaiduWebSearchTool,
BochaWebSearchTool,
BraveWebSearchTool,
FirecrawlExtractWebPageTool,
FirecrawlWebSearchTool,
TavilyExtractWebPageTool,
TavilyWebSearchTool,
normalize_legacy_web_search_config,
Expand Down Expand Up @@ -1047,6 +1049,9 @@ async def _apply_web_search_tools(
req.func_tool.add_tool(tool_mgr.get_builtin_tool(BochaWebSearchTool))
elif provider == "brave":
req.func_tool.add_tool(tool_mgr.get_builtin_tool(BraveWebSearchTool))
elif provider == "firecrawl":
req.func_tool.add_tool(tool_mgr.get_builtin_tool(FirecrawlWebSearchTool))
req.func_tool.add_tool(tool_mgr.get_builtin_tool(FirecrawlExtractWebPageTool))
elif provider == "baidu_ai_search":
req.func_tool.add_tool(tool_mgr.get_builtin_tool(BaiduWebSearchTool))

Expand Down
11 changes: 11 additions & 0 deletions astrbot/core/config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -3202,6 +3202,7 @@ class ChatProviderTemplate(TypedDict):
"baidu_ai_search",
"bocha",
"brave",
"firecrawl",
],
"condition": {
"provider_settings.web_search": True,
Expand Down Expand Up @@ -3237,6 +3238,16 @@ class ChatProviderTemplate(TypedDict):
"provider_settings.web_search": True,
},
},
"provider_settings.websearch_firecrawl_key": {
"description": "Firecrawl API Key",
"type": "list",
"items": {"type": "string"},
"hint": "可添加多个 Key 进行轮询。",
"condition": {
"provider_settings.websearch_provider": "firecrawl",
"provider_settings.web_search": True,
},
},
"provider_settings.websearch_baidu_app_builder_key": {
"description": "百度千帆智能云 APP Builder API Key",
"type": "string",
Expand Down
192 changes: 192 additions & 0 deletions astrbot/core/tools/web_search_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
"tavily_extract_web_page",
"web_search_bocha",
"web_search_brave",
"web_search_firecrawl",
"firecrawl_extract_web_page",
]
_TAVILY_WEB_SEARCH_TOOL_CONFIG = {
"provider_settings.web_search": True,
Expand All @@ -32,6 +34,10 @@
"provider_settings.web_search": True,
"provider_settings.websearch_provider": "brave",
}
_FIRECRAWL_WEB_SEARCH_TOOL_CONFIG = {
"provider_settings.web_search": True,
"provider_settings.websearch_provider": "firecrawl",
}
_BAIDU_WEB_SEARCH_TOOL_CONFIG = {
"provider_settings.web_search": True,
"provider_settings.websearch_provider": "baidu_ai_search",
Expand Down Expand Up @@ -69,6 +75,7 @@ async def get(self, provider_settings: dict) -> str:
_TAVILY_KEY_ROTATOR = _KeyRotator("websearch_tavily_key", "Tavily")
_BOCHA_KEY_ROTATOR = _KeyRotator("websearch_bocha_key", "BoCha")
_BRAVE_KEY_ROTATOR = _KeyRotator("websearch_brave_key", "Brave")
_FIRECRAWL_KEY_ROTATOR = _KeyRotator("websearch_firecrawl_key", "Firecrawl")


def normalize_legacy_web_search_config(cfg) -> None:
Expand All @@ -91,6 +98,7 @@ def normalize_legacy_web_search_config(cfg) -> None:
"websearch_tavily_key",
"websearch_bocha_key",
"websearch_brave_key",
"websearch_firecrawl_key",
):
value = provider_settings.get(setting_name)
if isinstance(value, str):
Expand Down Expand Up @@ -258,6 +266,72 @@ async def _brave_search(
]


async def _firecrawl_search(
provider_settings: dict,
payload: dict,
) -> list[SearchResult]:
firecrawl_key = await _FIRECRAWL_KEY_ROTATOR.get(provider_settings)
header = {
"Authorization": f"Bearer {firecrawl_key}",
"Content-Type": "application/json",
}
async with aiohttp.ClientSession(trust_env=True) as session:
async with session.post(
"https://api.firecrawl.dev/v2/search",
json=payload,
headers=header,
) as response:
if response.status != 200:
reason = await response.text()
raise Exception(
f"Firecrawl web search failed: {reason}, status: {response.status}",
)
data = await response.json()
rows = data.get("data", [])
if isinstance(rows, dict):
rows = rows.get("web", [])
return [
SearchResult(
title=item.get("title", ""),
url=item.get("url", ""),
snippet=(
item.get("description")
or item.get("snippet")
or item.get("markdown")
or ""
),
)
for item in rows
if item.get("url")
]


async def _firecrawl_scrape(provider_settings: dict, payload: dict) -> dict:
firecrawl_key = await _FIRECRAWL_KEY_ROTATOR.get(provider_settings)
header = {
"Authorization": f"Bearer {firecrawl_key}",
"Content-Type": "application/json",
}
async with aiohttp.ClientSession(trust_env=True) as session:
async with session.post(
"https://api.firecrawl.dev/v2/scrape",
json=payload,
headers=header,
) as response:
if response.status != 200:
reason = await response.text()
raise Exception(
f"Firecrawl web scraper failed: {reason}, status: {response.status}",
)
data = await response.json()
result = data.get("data", {})
if not result:
raise ValueError(
"Error: Firecrawl web scraper does not return any results."
)
return result


async def _baidu_search(
provider_settings: dict,
payload: dict,
Expand Down Expand Up @@ -548,6 +622,124 @@ async def call(self, context, **kwargs) -> ToolExecResult:
return _search_result_payload(results)


@builtin_tool(config=_FIRECRAWL_WEB_SEARCH_TOOL_CONFIG)
@pydantic_dataclass
class FirecrawlWebSearchTool(FunctionTool[AstrAgentContext]):
name: str = "web_search_firecrawl"
description: str = (
"A web search tool based on Firecrawl Search API, used to retrieve web "
"pages related to the user's query."
)
parameters: dict = Field(
default_factory=lambda: {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Required. Search query."},
"limit": {
"type": "integer",
"description": "Optional. Number of results to return. Range: 1-100. Default is 5.",
},
"location": {
"type": "string",
"description": "Optional. Geographic location for search results.",
},
"country": {
"type": "string",
"description": 'Optional. Country code for search results, for example "US" or "CN".',
},
"timeout": {
"type": "integer",
"description": "Optional. Request timeout in milliseconds.",
},
},
"required": ["query"],
}
)

async def call(self, context, **kwargs) -> ToolExecResult:
_, provider_settings, _ = _get_runtime(context)
if not provider_settings.get("websearch_firecrawl_key", []):
return "Error: Firecrawl API key is not configured in AstrBot."

payload = {
"query": kwargs["query"],
"limit": kwargs.get("limit", 5),
"sources": ["web"],
}
for key in ("location", "country", "timeout"):
if kwargs.get(key):
payload[key] = kwargs[key]

results = await _firecrawl_search(provider_settings, payload)
if not results:
return "Error: Firecrawl web searcher does not return any results."
return _search_result_payload(results)


@builtin_tool(config=_FIRECRAWL_WEB_SEARCH_TOOL_CONFIG)
@pydantic_dataclass
class FirecrawlExtractWebPageTool(FunctionTool[AstrAgentContext]):
name: str = "firecrawl_extract_web_page"
description: str = "Extract the content of a web page using Firecrawl."
parameters: dict = Field(
default_factory=lambda: {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Required. A URL to extract content from.",
},
"format": {
"type": "string",
"description": 'Optional. Output format, one of "markdown", "html", "rawHtml", "summary". Default is "markdown".',
},
"only_main_content": {
"type": "boolean",
"description": "Optional. Whether to extract only the main page content. Default is true.",
},
"timeout": {
"type": "integer",
"description": "Optional. Request timeout in milliseconds.",
},
"max_age": {
"type": "integer",
"description": "Optional. Maximum cache age in milliseconds.",
},
},
"required": ["url"],
}
)

async def call(self, context, **kwargs) -> ToolExecResult:
_, provider_settings, _ = _get_runtime(context)
if not provider_settings.get("websearch_firecrawl_key", []):
return "Error: Firecrawl API key is not configured in AstrBot."

url = str(kwargs.get("url", "")).strip()
if not url:
return "Error: url must be a non-empty string."

output_format = kwargs.get("format", "markdown")
if output_format not in ["markdown", "html", "rawHtml", "summary"]:
output_format = "markdown"

payload = {
"url": url,
"formats": [output_format],
"onlyMainContent": kwargs.get("only_main_content", True),
}
if kwargs.get("timeout"):
payload["timeout"] = kwargs["timeout"]
if kwargs.get("max_age"):
payload["maxAge"] = kwargs["max_age"]

result = await _firecrawl_scrape(provider_settings, payload)
content = result.get(output_format, "")
result_url = result.get("url") or url
ret = f"URL: {result_url}\nContent: {content}" if content else ""
return ret or "Error: Firecrawl web scraper does not return any results."


@builtin_tool(config=_BAIDU_WEB_SEARCH_TOOL_CONFIG)
@pydantic_dataclass
class BaiduWebSearchTool(FunctionTool[AstrAgentContext]):
Expand Down
2 changes: 1 addition & 1 deletion dashboard/src/components/chat/MessageListDEPRECATED.vue
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ export default {
part.tool_calls.forEach(toolCall => {
// 检查是否是支持引用解析的 web_search 工具调用
if (
!['web_search_baidu', 'web_search_tavily', 'web_search_bocha', 'web_search_brave'].includes(toolCall.name) ||
!['web_search_baidu', 'web_search_tavily', 'web_search_bocha', 'web_search_brave', 'web_search_firecrawl'].includes(toolCall.name) ||
!toolCall.result
) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@
"description": "Brave Search API Key",
"hint": "Multiple keys can be added for rotation."
},
"websearch_firecrawl_key": {
"description": "Firecrawl API Key",
"hint": "Multiple keys can be added for rotation."
},
"websearch_baidu_app_builder_key": {
"description": "Baidu Qianfan Smart Cloud APP Builder API Key",
"hint": "Reference: [https://console.bce.baidu.com/iam/#/iam/apikey/list](https://console.bce.baidu.com/iam/#/iam/apikey/list)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@
"description": "API-ключ Brave Search",
"hint": "Можно добавить несколько ключей для ротации."
},
"websearch_firecrawl_key": {
"description": "API-ключ Firecrawl",
"hint": "Можно добавить несколько ключей для ротации."
},
"websearch_baidu_app_builder_key": {
"description": "API-ключ Baidu Qianfan APP Builder",
"hint": "Ссылка: [https://console.bce.baidu.com/iam/#/iam/apikey/list](https://console.bce.baidu.com/iam/#/iam/apikey/list)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@
"description": "Brave Search API Key",
"hint": "可添加多个 Key 进行轮询。"
},
"websearch_firecrawl_key": {
"description": "Firecrawl API Key",
"hint": "可添加多个 Key 进行轮询。"
},
"websearch_baidu_app_builder_key": {
"description": "百度千帆智能云 APP Builder API Key",
"hint": "参考:[https://console.bce.baidu.com/iam/#/iam/apikey/list](https://console.bce.baidu.com/iam/#/iam/apikey/list)"
Expand Down
31 changes: 31 additions & 0 deletions tests/unit/test_astr_main_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,37 @@ async def test_apply_web_search_tools_uses_builtin_tool_manager(
assert req.func_tool is not None
assert req.func_tool.get_tool("web_search_baidu") is builtin_tool

@pytest.mark.asyncio
async def test_apply_web_search_tools_adds_firecrawl_search_and_extract_tools(
self, mock_event, mock_context
):
"""Test Firecrawl web search injects search and extract tools."""
module = ama
req = ProviderRequest()
mock_context.get_config.return_value = {
"provider_settings": {
"web_search": True,
"websearch_provider": "firecrawl",
}
}
search_tool = MagicMock(spec=FunctionTool)
search_tool.name = "web_search_firecrawl"
extract_tool = MagicMock(spec=FunctionTool)
extract_tool.name = "firecrawl_extract_web_page"
tool_mgr = MagicMock()
tool_mgr.get_builtin_tool.side_effect = [search_tool, extract_tool]
mock_context.get_llm_tool_manager.return_value = tool_mgr

await module._apply_web_search_tools(mock_event, req, mock_context)

assert tool_mgr.get_builtin_tool.call_args_list == [
((module.FirecrawlWebSearchTool,),),
((module.FirecrawlExtractWebPageTool,),),
]
assert req.func_tool is not None
assert req.func_tool.get_tool("web_search_firecrawl") is search_tool
assert req.func_tool.get_tool("firecrawl_extract_web_page") is extract_tool

def test_proactive_cron_job_tools_uses_builtin_tool_manager(self, mock_context):
"""Test cron tool injection through the builtin tool manager."""
module = ama
Expand Down
14 changes: 14 additions & 0 deletions tests/unit/test_func_tool_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from astrbot.core.provider.func_tool_manager import FunctionToolManager
from astrbot.core.tools.computer_tools.shell import ExecuteShellTool
from astrbot.core.tools.message_tools import SendMessageToUserTool
from astrbot.core.tools.web_search_tools import FirecrawlExtractWebPageTool
from astrbot.core.tools.web_search_tools import FirecrawlWebSearchTool


def test_get_builtin_tool_by_class_returns_cached_instance():
Expand Down Expand Up @@ -38,3 +40,15 @@ def test_computer_tools_are_registered_as_builtin_tools():

assert tool.name == "astrbot_execute_shell"
assert manager.is_builtin_tool("astrbot_execute_shell") is True


def test_firecrawl_tools_are_registered_as_builtin_tools():
manager = FunctionToolManager()

search_tool = manager.get_builtin_tool(FirecrawlWebSearchTool)
extract_tool = manager.get_builtin_tool(FirecrawlExtractWebPageTool)

assert search_tool.name == "web_search_firecrawl"
assert extract_tool.name == "firecrawl_extract_web_page"
assert manager.is_builtin_tool("web_search_firecrawl") is True
assert manager.is_builtin_tool("firecrawl_extract_web_page") is True
Loading
Loading