From c3b4be2c46a18f5a997af39e0c0b5c0ccb6ed902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 11:57:39 +0900 Subject: [PATCH 01/33] feat: add CUA computer-use sandbox support --- astrbot/core/astr_agent_tool_exec.py | 19 +- astrbot/core/astr_main_agent.py | 14 + astrbot/core/computer/booters/base.py | 5 + astrbot/core/computer/booters/cua.py | 483 ++++++++++++++++++ astrbot/core/computer/computer_client.py | 17 + astrbot/core/computer/olayer/__init__.py | 2 + astrbot/core/computer/olayer/gui.py | 21 + astrbot/core/config/default.py | 46 +- astrbot/core/tools/computer_tools/__init__.py | 4 + astrbot/core/tools/computer_tools/cua.py | 164 ++++++ tests/unit/test_cua_computer_use.py | 265 ++++++++++ 11 files changed, 1037 insertions(+), 3 deletions(-) create mode 100644 astrbot/core/computer/booters/cua.py create mode 100644 astrbot/core/computer/olayer/gui.py create mode 100644 astrbot/core/tools/computer_tools/cua.py create mode 100644 tests/unit/test_cua_computer_use.py diff --git a/astrbot/core/astr_agent_tool_exec.py b/astrbot/core/astr_agent_tool_exec.py index d668dfeec9..dd8f6e4537 100644 --- a/astrbot/core/astr_agent_tool_exec.py +++ b/astrbot/core/astr_agent_tool_exec.py @@ -31,6 +31,9 @@ from astrbot.core.provider.entites import ProviderRequest from astrbot.core.provider.register import llm_tools from astrbot.core.tools.computer_tools import ( + CuaKeyboardTypeTool, + CuaMouseClickTool, + CuaScreenshotTool, ExecuteShellTool, FileDownloadTool, FileEditTool, @@ -186,6 +189,7 @@ def _get_runtime_computer_tools( cls, runtime: str, tool_mgr, + booter: str | None = None, ) -> dict[str, FunctionTool]: if runtime == "sandbox": shell_tool = tool_mgr.get_builtin_tool(ExecuteShellTool) @@ -196,7 +200,7 @@ def _get_runtime_computer_tools( write_tool = tool_mgr.get_builtin_tool(FileWriteTool) edit_tool = tool_mgr.get_builtin_tool(FileEditTool) grep_tool = tool_mgr.get_builtin_tool(GrepTool) - return { + tools = { shell_tool.name: shell_tool, python_tool.name: python_tool, upload_tool.name: upload_tool, @@ -206,6 +210,18 @@ def _get_runtime_computer_tools( edit_tool.name: edit_tool, grep_tool.name: grep_tool, } + if booter == "cua": + screenshot_tool = tool_mgr.get_builtin_tool(CuaScreenshotTool) + mouse_click_tool = tool_mgr.get_builtin_tool(CuaMouseClickTool) + keyboard_type_tool = tool_mgr.get_builtin_tool(CuaKeyboardTypeTool) + tools.update( + { + screenshot_tool.name: screenshot_tool, + mouse_click_tool.name: mouse_click_tool, + keyboard_type_tool.name: keyboard_type_tool, + } + ) + return tools if runtime == "local": shell_tool = tool_mgr.get_builtin_tool(ExecuteShellTool) python_tool = tool_mgr.get_builtin_tool(LocalPythonTool) @@ -242,6 +258,7 @@ def _build_handoff_toolset( runtime_computer_tools = cls._get_runtime_computer_tools( runtime, tool_mgr, + str(provider_settings.get("sandbox", {}).get("booter", "")), ) # Keep persona semantics aligned with the main agent: tools=None means diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py index 87cb2db064..d2ce3687d9 100644 --- a/astrbot/core/astr_main_agent.py +++ b/astrbot/core/astr_main_agent.py @@ -47,6 +47,9 @@ BrowserExecTool, CreateSkillCandidateTool, CreateSkillPayloadTool, + CuaKeyboardTypeTool, + CuaMouseClickTool, + CuaScreenshotTool, EvaluateSkillCandidateTool, ExecuteShellTool, FileDownloadTool, @@ -1015,6 +1018,17 @@ def _apply_sandbox_tools( req.func_tool.add_tool(tool_mgr.get_builtin_tool(RollbackSkillReleaseTool)) req.func_tool.add_tool(tool_mgr.get_builtin_tool(SyncSkillReleaseTool)) + if booter == "cua": + req.system_prompt += ( + "\n[CUA Desktop Control]\n" + "Use `astrbot_cua_screenshot` to inspect the current desktop before " + "clicking or typing. Use coordinates from screenshots for " + "`astrbot_cua_mouse_click`, then `astrbot_cua_keyboard_type` for text input.\n" + ) + req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaScreenshotTool)) + req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaMouseClickTool)) + req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaKeyboardTypeTool)) + req.system_prompt = f"{req.system_prompt or ''}\n{SANDBOX_MODE_PROMPT}\n" diff --git a/astrbot/core/computer/booters/base.py b/astrbot/core/computer/booters/base.py index 4c74e5edd6..c39032d4bb 100644 --- a/astrbot/core/computer/booters/base.py +++ b/astrbot/core/computer/booters/base.py @@ -1,6 +1,7 @@ from ..olayer import ( BrowserComponent, FileSystemComponent, + GUIComponent, PythonComponent, ShellComponent, ) @@ -29,6 +30,10 @@ def capabilities(self) -> tuple[str, ...] | None: def browser(self) -> BrowserComponent | None: return None + @property + def gui(self) -> GUIComponent | None: + return None + async def boot(self, session_id: str) -> None: ... async def shutdown(self) -> None: ... diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py new file mode 100644 index 0000000000..e60b18a635 --- /dev/null +++ b/astrbot/core/computer/booters/cua.py @@ -0,0 +1,483 @@ +from __future__ import annotations + +import base64 +import inspect +from pathlib import Path +from typing import Any + +from astrbot.api import logger + +from ..olayer import FileSystemComponent, GUIComponent, PythonComponent, ShellComponent +from .base import ComputerBooter +from .shipyard_search_file_util import search_files_via_shell + + +def _maybe_model_dump(value: Any) -> dict[str, Any]: + if isinstance(value, dict): + return value + if hasattr(value, "model_dump"): + dumped = value.model_dump() + if isinstance(dumped, dict): + return dumped + if hasattr(value, "dict"): + dumped = value.dict() + if isinstance(dumped, dict): + return dumped + return {} + + +async def _call_first( + obj: Any, names: tuple[str, ...], *args: Any, **kwargs: Any +) -> Any: + for name in names: + method = getattr(obj, name, None) + if method is None: + continue + return await method(*args, **kwargs) + raise AttributeError(f"None of these methods exist: {', '.join(names)}") + + +def _slice_content_by_lines( + content: str, + *, + offset: int | None = None, + limit: int | None = None, +) -> str: + lines = content.splitlines(keepends=True) + start = 0 if offset is None else offset + selected = lines[start:] if limit is None else lines[start : start + limit] + return "".join(selected) + + +def _result_text(payload: dict[str, Any], *keys: str) -> str: + for key in keys: + value = payload.get(key) + if value is not None: + return str(value) + return "" + + +class CuaShellComponent(ShellComponent): + def __init__(self, sandbox: Any) -> None: + self._sandbox = sandbox + + async def exec( + self, + command: str, + cwd: str | None = None, + env: dict[str, str] | None = None, + timeout: int | None = 30, + shell: bool = True, + background: bool = False, + ) -> dict[str, Any]: + if not shell: + return { + "stdout": "", + "stderr": "error: only shell mode is supported in CUA booter.", + "exit_code": 2, + "success": False, + } + + kwargs: dict[str, Any] = {} + if cwd is not None: + kwargs["cwd"] = cwd + if timeout is not None: + kwargs["timeout"] = timeout + if env: + kwargs["env"] = env + if background: + command = ( + f"nohup sh -lc {command!r} >/tmp/astrbot_cua_bg.log 2>&1 & echo $!" + ) + + result = await _call_first( + self._sandbox.shell, ("run", "exec"), command, **kwargs + ) + payload = _maybe_model_dump(result) + if not payload and isinstance(result, str): + payload = {"stdout": result} + + stdout = _result_text(payload, "stdout", "output") + stderr = _result_text(payload, "stderr", "error") + exit_code = payload.get( + "exit_code", payload.get("returncode", 0 if not stderr else 1) + ) + response = { + "stdout": stdout, + "stderr": stderr, + "exit_code": exit_code, + "success": bool( + payload.get("success", not stderr and exit_code in (0, None)) + ), + } + if background: + try: + response["pid"] = int(stdout.strip().splitlines()[-1]) + except Exception: + response["pid"] = None + return response + + +class CuaPythonComponent(PythonComponent): + def __init__(self, sandbox: Any) -> None: + self._sandbox = sandbox + + async def exec( + self, + code: str, + kernel_id: str | None = None, + timeout: int = 30, + silent: bool = False, + ) -> dict[str, Any]: + _ = kernel_id + python = getattr(self._sandbox, "python", None) + if python is not None: + result = await _call_first(python, ("run", "exec"), code, timeout=timeout) + payload = _maybe_model_dump(result) + else: + shell = CuaShellComponent(self._sandbox) + result = await shell.exec(f"python3 - <<'PY'\n{code}\nPY", timeout=timeout) + payload = { + "output": result.get("stdout", ""), + "error": result.get("stderr", ""), + } + + output_text = "" if silent else _result_text(payload, "stdout", "output") + error_text = _result_text(payload, "stderr", "error") + return { + "success": bool(payload.get("success", not error_text)), + "data": { + "output": {"text": output_text, "images": []}, + "error": error_text, + }, + "output": output_text, + "error": error_text, + } + + +class CuaFileSystemComponent(FileSystemComponent): + def __init__(self, sandbox: Any) -> None: + self._sandbox = sandbox + self._shell = CuaShellComponent(sandbox) + + @property + def _filesystem(self) -> Any: + return getattr(self._sandbox, "filesystem", None) + + async def create_file( + self, + path: str, + content: str = "", + mode: int = 0o644, + ) -> dict[str, Any]: + await self.write_file(path, content) + return {"success": True, "path": path, "mode": mode} + + async def read_file( + self, + path: str, + encoding: str = "utf-8", + offset: int | None = None, + limit: int | None = None, + ) -> dict[str, Any]: + fs = self._filesystem + if fs is not None and hasattr(fs, "read_file"): + content = await fs.read_file(path) + else: + result = await self._shell.exec(f"cat {path!r}") + if result.get("stderr"): + return {"success": False, "path": path, "error": result["stderr"]} + content = result.get("stdout", "") + if isinstance(content, bytes): + content = content.decode(encoding, errors="replace") + return { + "success": True, + "path": path, + "content": _slice_content_by_lines( + str(content), offset=offset, limit=limit + ), + } + + async def search_files( + self, + pattern: str, + path: str | None = None, + glob: str | None = None, + after_context: int | None = None, + before_context: int | None = None, + ) -> dict[str, Any]: + return await search_files_via_shell( + self._shell, + pattern=pattern, + path=path, + glob=glob, + after_context=after_context, + before_context=before_context, + ) + + async def edit_file( + self, + path: str, + old_string: str, + new_string: str, + replace_all: bool = False, + encoding: str = "utf-8", + ) -> dict[str, Any]: + read_result = await self.read_file(path, encoding=encoding) + if not read_result.get("success"): + return read_result + content = read_result.get("content", "") + occurrences = content.count(old_string) + if occurrences == 0: + return { + "success": False, + "error": "old string not found in file", + "replacements": 0, + } + updated = content.replace(old_string, new_string, -1 if replace_all else 1) + await self.write_file(path, updated, encoding=encoding) + return { + "success": True, + "path": path, + "replacements": occurrences if replace_all else 1, + } + + async def write_file( + self, + path: str, + content: str, + mode: str = "w", + encoding: str = "utf-8", + ) -> dict[str, Any]: + _ = mode + fs = self._filesystem + if fs is not None and hasattr(fs, "write_file"): + await fs.write_file(path, content) + else: + encoded = base64.b64encode(content.encode(encoding)).decode() + await self._shell.exec(f"base64 -d > {path!r} <<'EOF'\n{encoded}\nEOF") + return {"success": True, "path": path} + + async def delete_file(self, path: str) -> dict[str, Any]: + fs = self._filesystem + if fs is not None: + if hasattr(fs, "delete"): + await fs.delete(path) + elif hasattr(fs, "delete_file"): + await fs.delete_file(path) + else: + await self._shell.exec(f"rm -rf {path!r}") + else: + await self._shell.exec(f"rm -rf {path!r}") + return {"success": True, "path": path} + + async def list_dir( + self, + path: str = ".", + show_hidden: bool = False, + ) -> dict[str, Any]: + fs = self._filesystem + if fs is not None and hasattr(fs, "list_dir"): + entries = await fs.list_dir(path) + return {"success": True, "path": path, "entries": entries} + flags = "-la" if show_hidden else "-l" + result = await self._shell.exec(f"ls {flags} {path!r}") + return { + "success": not bool(result.get("stderr")), + "path": path, + "entries": result.get("stdout", ""), + "error": result.get("stderr", ""), + } + + +class CuaGUIComponent(GUIComponent): + def __init__(self, sandbox: Any) -> None: + self._sandbox = sandbox + + async def screenshot(self, path: str | None = None) -> dict[str, Any]: + raw = await self._sandbox.screenshot() + data = _screenshot_to_bytes(raw) + if path: + Path(path).parent.mkdir(parents=True, exist_ok=True) + Path(path).write_bytes(data) + return { + "success": True, + "path": path, + "mime_type": "image/png", + "base64": base64.b64encode(data).decode("ascii"), + } + + async def click(self, x: int, y: int, button: str = "left") -> dict[str, Any]: + result = await self._sandbox.mouse.click(x, y, button=button) + payload = _maybe_model_dump(result) + return {"success": bool(payload.get("success", True)), **payload} + + async def type_text(self, text: str) -> dict[str, Any]: + result = await self._sandbox.keyboard.type(text) + payload = _maybe_model_dump(result) + return {"success": bool(payload.get("success", True)), **payload} + + +def _screenshot_to_bytes(raw: Any) -> bytes: + if isinstance(raw, bytes | bytearray): + return bytes(raw) + if isinstance(raw, str): + if raw.startswith("data:image"): + raw = raw.split(",", 1)[1] + try: + return base64.b64decode(raw, validate=True) + except Exception: + candidate = Path(raw) + if candidate.is_file(): + return candidate.read_bytes() + return raw.encode("utf-8") + if hasattr(raw, "save"): + import io + + output = io.BytesIO() + raw.save(output, format="PNG") + return output.getvalue() + payload = _maybe_model_dump(raw) + for key in ("data", "base64", "image"): + value = payload.get(key) + if value: + return _screenshot_to_bytes(value) + raise TypeError(f"Unsupported CUA screenshot result: {type(raw)!r}") + + +class CuaBooter(ComputerBooter): + def __init__( + self, + image: str = "linux", + os_type: str = "linux", + ttl: int = 3600, + telemetry_enabled: bool = False, + ) -> None: + self.image = image + self.os_type = os_type + self.ttl = ttl + self.telemetry_enabled = telemetry_enabled + self._sandbox: Any | None = None + self._sandbox_cm: Any | None = None + self._shell: CuaShellComponent | None = None + self._python: CuaPythonComponent | None = None + self._fs: CuaFileSystemComponent | None = None + self._gui: CuaGUIComponent | None = None + + async def boot(self, session_id: str) -> None: + _ = session_id + try: + from cua import Image, Sandbox + except ImportError as exc: + raise RuntimeError( + "CUA sandbox support requires the optional `cua` package. " + "Install it with `pip install cua` in the AstrBot environment." + ) from exc + + image_obj = self._build_image(Image) + ephemeral_kwargs = self._build_ephemeral_kwargs(Sandbox.ephemeral) + self._sandbox_cm = Sandbox.ephemeral(image_obj, **ephemeral_kwargs) + self._sandbox = await self._sandbox_cm.__aenter__() + self._shell = CuaShellComponent(self._sandbox) + self._python = CuaPythonComponent(self._sandbox) + self._fs = CuaFileSystemComponent(self._sandbox) + self._gui = CuaGUIComponent(self._sandbox) + logger.info( + "[Computer] CUA sandbox booted: image=%s, os_type=%s", + self.image, + self.os_type, + ) + + def _build_image(self, image_cls: Any) -> Any: + image_name = (self.image or self.os_type or "linux").strip().lower() + factory = getattr(image_cls, image_name, None) + if callable(factory): + return factory() + os_factory = getattr(image_cls, (self.os_type or "linux").strip().lower(), None) + if callable(os_factory): + return os_factory() + return image_name + + def _build_ephemeral_kwargs(self, ephemeral: Any) -> dict[str, Any]: + try: + parameters = inspect.signature(ephemeral).parameters + except (TypeError, ValueError): + return {} + kwargs: dict[str, Any] = {} + if "ttl" in parameters: + kwargs["ttl"] = self.ttl + if "telemetry_enabled" in parameters: + kwargs["telemetry_enabled"] = self.telemetry_enabled + return kwargs + + async def shutdown(self) -> None: + if self._sandbox_cm is not None: + await self._sandbox_cm.__aexit__(None, None, None) + self._sandbox_cm = None + self._sandbox = None + + @property + def capabilities(self) -> tuple[str, ...] | None: + return ( + "python", + "shell", + "filesystem", + "gui", + "screenshot", + "mouse", + "keyboard", + ) + + @property + def fs(self) -> FileSystemComponent: + if self._fs is None: + raise RuntimeError("CuaBooter is not initialized.") + return self._fs + + @property + def python(self) -> PythonComponent: + if self._python is None: + raise RuntimeError("CuaBooter is not initialized.") + return self._python + + @property + def shell(self) -> ShellComponent: + if self._shell is None: + raise RuntimeError("CuaBooter is not initialized.") + return self._shell + + @property + def gui(self) -> GUIComponent | None: + return self._gui + + async def upload_file(self, path: str, file_name: str) -> dict: + local_path = Path(path) + if not local_path.is_file(): + return {"success": False, "error": f"File not found: {path}"} + if self._sandbox is not None and hasattr(self._sandbox, "upload_file"): + return _maybe_model_dump( + await self._sandbox.upload_file(str(local_path), file_name) + ) + content = local_path.read_bytes() + encoded = base64.b64encode(content).decode("ascii") + result = await self.shell.exec( + f"base64 -d > {file_name!r} <<'EOF'\n{encoded}\nEOF" + ) + return { + "success": not bool(result.get("stderr")), + "file_path": file_name, + **result, + } + + async def download_file(self, remote_path: str, local_path: str) -> None: + if self._sandbox is not None and hasattr(self._sandbox, "download_file"): + await self._sandbox.download_file(remote_path, local_path) + return + result = await self.shell.exec(f"base64 {remote_path!r}") + if result.get("stderr"): + raise RuntimeError(result["stderr"]) + Path(local_path).parent.mkdir(parents=True, exist_ok=True) + Path(local_path).write_bytes(base64.b64decode(result.get("stdout", ""))) + + async def available(self) -> bool: + return self._sandbox is not None diff --git a/astrbot/core/computer/computer_client.py b/astrbot/core/computer/computer_client.py index 715f938679..c834d653d3 100644 --- a/astrbot/core/computer/computer_client.py +++ b/astrbot/core/computer/computer_client.py @@ -484,6 +484,23 @@ async def get_booter( profile=profile, ttl=ttl, ) + elif booter_type == "cua": + from .booters.cua import CuaBooter + + image = sandbox_cfg.get("cua_image", "linux") + os_type = sandbox_cfg.get("cua_os_type", "linux") + ttl = sandbox_cfg.get("cua_ttl", 3600) + telemetry_enabled = sandbox_cfg.get("cua_telemetry_enabled", False) + + logger.info( + f"[Computer] CUA config: image={image}, os_type={os_type}, ttl={ttl}" + ) + client = CuaBooter( + image=image, + os_type=os_type, + ttl=ttl, + telemetry_enabled=telemetry_enabled, + ) elif booter_type == "boxlite": from .booters.boxlite import BoxliteBooter diff --git a/astrbot/core/computer/olayer/__init__.py b/astrbot/core/computer/olayer/__init__.py index e2348671eb..f446c7dde7 100644 --- a/astrbot/core/computer/olayer/__init__.py +++ b/astrbot/core/computer/olayer/__init__.py @@ -1,5 +1,6 @@ from .browser import BrowserComponent from .filesystem import FileSystemComponent +from .gui import GUIComponent from .python import PythonComponent from .shell import ShellComponent @@ -8,4 +9,5 @@ "ShellComponent", "FileSystemComponent", "BrowserComponent", + "GUIComponent", ] diff --git a/astrbot/core/computer/olayer/gui.py b/astrbot/core/computer/olayer/gui.py new file mode 100644 index 0000000000..f837d57a2a --- /dev/null +++ b/astrbot/core/computer/olayer/gui.py @@ -0,0 +1,21 @@ +""" +GUI automation component. +""" + +from typing import Any, Protocol + + +class GUIComponent(Protocol): + """Desktop GUI operations component.""" + + async def screenshot(self, path: str | None = None) -> dict[str, Any]: + """Capture a screenshot, optionally saving it to path.""" + ... + + async def click(self, x: int, y: int, button: str = "left") -> dict[str, Any]: + """Click at screen coordinates.""" + ... + + async def type_text(self, text: str) -> dict[str, Any]: + """Type text into the active UI target.""" + ... diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index cd1c81a888..2a3a02bf3a 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -175,6 +175,10 @@ "shipyard_neo_access_token": "", "shipyard_neo_profile": "python-default", "shipyard_neo_ttl": 3600, + "cua_image": "linux", + "cua_os_type": "linux", + "cua_ttl": 3600, + "cua_telemetry_enabled": False, }, "image_compress_enabled": True, "image_compress_options": { @@ -3289,8 +3293,8 @@ class ChatProviderTemplate(TypedDict): "provider_settings.sandbox.booter": { "description": "沙箱环境驱动器", "type": "string", - "options": ["shipyard_neo", "shipyard"], - "labels": ["Shipyard Neo", "Shipyard"], + "options": ["shipyard_neo", "shipyard", "cua"], + "labels": ["Shipyard Neo", "Shipyard", "CUA"], "condition": { "provider_settings.computer_use_runtime": "sandbox", }, @@ -3331,6 +3335,44 @@ class ChatProviderTemplate(TypedDict): "provider_settings.sandbox.booter": "shipyard_neo", }, }, + "provider_settings.sandbox.cua_image": { + "description": "CUA Image", + "type": "string", + "hint": "CUA 沙箱镜像/系统类型,默认 linux。可填写 linux、macos、windows、android,具体取决于 CUA SDK 支持。", + "condition": { + "provider_settings.computer_use_runtime": "sandbox", + "provider_settings.sandbox.booter": "cua", + }, + }, + "provider_settings.sandbox.cua_os_type": { + "description": "CUA OS Type", + "type": "string", + "options": ["linux", "macos", "windows", "android"], + "labels": ["Linux", "macOS", "Windows", "Android"], + "hint": "CUA 沙箱操作系统类型,默认 linux。", + "condition": { + "provider_settings.computer_use_runtime": "sandbox", + "provider_settings.sandbox.booter": "cua", + }, + }, + "provider_settings.sandbox.cua_ttl": { + "description": "CUA Sandbox TTL", + "type": "int", + "hint": "CUA 沙箱生存时间(秒)。当前作为会话配置保存,具体生效取决于 CUA SDK。", + "condition": { + "provider_settings.computer_use_runtime": "sandbox", + "provider_settings.sandbox.booter": "cua", + }, + }, + "provider_settings.sandbox.cua_telemetry_enabled": { + "description": "CUA Telemetry", + "type": "bool", + "hint": "是否允许 CUA SDK 发送遥测数据。默认关闭。", + "condition": { + "provider_settings.computer_use_runtime": "sandbox", + "provider_settings.sandbox.booter": "cua", + }, + }, "provider_settings.sandbox.shipyard_endpoint": { "description": "Shipyard API Endpoint", "type": "string", diff --git a/astrbot/core/tools/computer_tools/__init__.py b/astrbot/core/tools/computer_tools/__init__.py index 7e364ffd23..116c813733 100644 --- a/astrbot/core/tools/computer_tools/__init__.py +++ b/astrbot/core/tools/computer_tools/__init__.py @@ -1,3 +1,4 @@ +from .cua import CuaKeyboardTypeTool, CuaMouseClickTool, CuaScreenshotTool from .fs import ( FileDownloadTool, FileEditTool, @@ -32,6 +33,9 @@ "BrowserExecTool", "CreateSkillCandidateTool", "CreateSkillPayloadTool", + "CuaKeyboardTypeTool", + "CuaMouseClickTool", + "CuaScreenshotTool", "EvaluateSkillCandidateTool", "ExecuteShellTool", "FileDownloadTool", diff --git a/astrbot/core/tools/computer_tools/cua.py b/astrbot/core/tools/computer_tools/cua.py new file mode 100644 index 0000000000..01d41ab469 --- /dev/null +++ b/astrbot/core/tools/computer_tools/cua.py @@ -0,0 +1,164 @@ +from __future__ import annotations + +import json +import uuid +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import mcp + +from astrbot.api import FunctionTool +from astrbot.core.agent.run_context import ContextWrapper +from astrbot.core.agent.tool import ToolExecResult +from astrbot.core.astr_agent_context import AstrAgentContext +from astrbot.core.computer.computer_client import get_booter +from astrbot.core.message.message_event_result import MessageChain +from astrbot.core.tools.computer_tools.util import check_admin_permission +from astrbot.core.tools.registry import builtin_tool +from astrbot.core.utils.astrbot_path import get_astrbot_temp_path + +_CUA_TOOL_CONFIG = { + "provider_settings.computer_use_runtime": "sandbox", + "provider_settings.sandbox.booter": "cua", +} + + +def _to_json(data: Any) -> str: + return json.dumps(data, ensure_ascii=False, default=str) + + +async def _get_gui_component(context: ContextWrapper[AstrAgentContext]) -> Any: + booter = await get_booter( + context.context.context, + context.context.event.unified_msg_origin, + ) + gui = getattr(booter, "gui", None) + if gui is None: + raise RuntimeError( + "Current sandbox booter does not support CUA GUI capability. " + "Please switch sandbox booter to cua." + ) + return gui + + +@builtin_tool(config=_CUA_TOOL_CONFIG) +@dataclass +class CuaScreenshotTool(FunctionTool): + name: str = "astrbot_cua_screenshot" + description: str = ( + "Capture a screenshot from the CUA sandbox and optionally send it to the user." + ) + parameters: dict = field( + default_factory=lambda: { + "type": "object", + "properties": { + "send_to_user": { + "type": "boolean", + "description": "Whether to send the screenshot image to the current conversation.", + "default": True, + }, + }, + } + ) + + async def call( + self, + context: ContextWrapper[AstrAgentContext], + send_to_user: bool = True, + ) -> ToolExecResult: + if err := check_admin_permission(context, "Taking CUA screenshots"): + return err + try: + gui = await _get_gui_component(context) + path = _new_screenshot_path(context.context.event.unified_msg_origin) + result = await gui.screenshot(path) + payload = {"success": True, **result, "path": path} + if send_to_user: + await context.context.event.send(MessageChain().file_image(path)) + payload["sent_to_user"] = True + return mcp.types.CallToolResult( + content=[ + mcp.types.TextContent(type="text", text=_to_json(payload)), + mcp.types.ImageContent( + type="image", + data=str(payload.get("base64", "")), + mimeType=str(payload.get("mime_type", "image/png")), + ), + ] + ) + except Exception as e: + return f"Error taking CUA screenshot: {str(e)}" + + +@builtin_tool(config=_CUA_TOOL_CONFIG) +@dataclass +class CuaMouseClickTool(FunctionTool): + name: str = "astrbot_cua_mouse_click" + description: str = "Click a coordinate in the CUA sandbox desktop." + parameters: dict = field( + default_factory=lambda: { + "type": "object", + "properties": { + "x": {"type": "integer", "description": "X coordinate."}, + "y": {"type": "integer", "description": "Y coordinate."}, + "button": { + "type": "string", + "description": "Mouse button, usually left, right, or middle.", + "default": "left", + }, + }, + "required": ["x", "y"], + } + ) + + async def call( + self, + context: ContextWrapper[AstrAgentContext], + x: int, + y: int, + button: str = "left", + ) -> ToolExecResult: + if err := check_admin_permission(context, "Using CUA mouse"): + return err + try: + gui = await _get_gui_component(context) + return _to_json(await gui.click(x, y, button=button)) + except Exception as e: + return f"Error clicking CUA desktop: {str(e)}" + + +@builtin_tool(config=_CUA_TOOL_CONFIG) +@dataclass +class CuaKeyboardTypeTool(FunctionTool): + name: str = "astrbot_cua_keyboard_type" + description: str = "Type text into the CUA sandbox desktop." + parameters: dict = field( + default_factory=lambda: { + "type": "object", + "properties": { + "text": {"type": "string", "description": "Text to type."}, + }, + "required": ["text"], + } + ) + + async def call( + self, + context: ContextWrapper[AstrAgentContext], + text: str, + ) -> ToolExecResult: + if err := check_admin_permission(context, "Using CUA keyboard"): + return err + try: + gui = await _get_gui_component(context) + return _to_json(await gui.type_text(text)) + except Exception as e: + return f"Error typing in CUA desktop: {str(e)}" + + +def _new_screenshot_path(umo: str) -> str: + safe_prefix = uuid.uuid5(uuid.NAMESPACE_DNS, umo).hex[:12] + screenshot_dir = Path(get_astrbot_temp_path()) / "cua_screenshots" + screenshot_dir.mkdir(parents=True, exist_ok=True) + return str(screenshot_dir / f"{safe_prefix}-{uuid.uuid4().hex}.png") diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py new file mode 100644 index 0000000000..18ba14e6ec --- /dev/null +++ b/tests/unit/test_cua_computer_use.py @@ -0,0 +1,265 @@ +import asyncio +import base64 +import json +from pathlib import Path +from types import SimpleNamespace + +import pytest +import mcp + +from astrbot.core.config.default import CONFIG_METADATA_3 +from astrbot.core.astr_agent_tool_exec import FunctionToolExecutor +from astrbot.core.provider.func_tool_manager import FunctionToolManager + + +class FakeContext: + def __init__(self, config: dict): + self._config = config + + def get_config(self, umo: str | None = None): + return self._config + + +class FakeShell: + def __init__(self): + self.commands = [] + + async def run(self, command: str, **kwargs): + self.commands.append((command, kwargs)) + return {"stdout": "ok", "stderr": "", "exit_code": 0} + + +class FakePython: + async def run(self, code: str, **kwargs): + return {"output": "42", "error": ""} + + +class FakeFilesystem: + def __init__(self): + self.files = {} + + async def write_file(self, path: str, content: str): + self.files[path] = content + + async def read_file(self, path: str): + return self.files[path] + + async def delete(self, path: str): + self.files.pop(path, None) + + async def list_dir(self, path: str): + return [path] + + +class FakeMouse: + def __init__(self): + self.clicks = [] + + async def click(self, x: int, y: int, button: str = "left"): + self.clicks.append((x, y, button)) + return {"success": True} + + +class FakeKeyboard: + def __init__(self): + self.typed = [] + + async def type(self, text: str): + self.typed.append(text) + return {"success": True} + + +class FakeSandbox: + def __init__(self): + self.shell = FakeShell() + self.python = FakePython() + self.filesystem = FakeFilesystem() + self.mouse = FakeMouse() + self.keyboard = FakeKeyboard() + + async def screenshot(self): + return b"fake-png" + + +def _agent_computer_use_items(): + return CONFIG_METADATA_3["ai_group"]["metadata"]["agent_computer_use"]["items"] + + +@pytest.mark.asyncio +async def test_get_booter_creates_cua_booter(monkeypatch): + from astrbot.core.computer import computer_client + + created = [] + + class FakeCuaBooter: + def __init__( + self, + image: str, + os_type: str, + ttl: int, + telemetry_enabled: bool, + ): + created.append((image, os_type, ttl, telemetry_enabled)) + + async def boot(self, session_id: str): + self.session_id = session_id + + async def available(self): + return True + + monkeypatch.setattr(computer_client, "_sync_skills_to_sandbox", lambda booter: asyncio.sleep(0)) + monkeypatch.setitem(computer_client.session_booter, "cua-test", None) + computer_client.session_booter.pop("cua-test", None) + monkeypatch.setattr( + "astrbot.core.computer.booters.cua.CuaBooter", + FakeCuaBooter, + raising=False, + ) + + ctx = FakeContext( + { + "provider_settings": { + "computer_use_runtime": "sandbox", + "sandbox": { + "booter": "cua", + "cua_image": "linux", + "cua_os_type": "linux", + "cua_ttl": 120, + "cua_telemetry_enabled": False, + }, + } + } + ) + + booter = await computer_client.get_booter(ctx, "cua-test") + + assert isinstance(booter, FakeCuaBooter) + assert created == [("linux", "linux", 120, False)] + + +@pytest.mark.asyncio +async def test_cua_components_map_sdk_results(tmp_path): + from astrbot.core.computer.booters.cua import ( + CuaFileSystemComponent, + CuaGUIComponent, + CuaPythonComponent, + CuaShellComponent, + ) + + sandbox = FakeSandbox() + + shell_result = await CuaShellComponent(sandbox).exec("echo ok", cwd="/workspace") + python_result = await CuaPythonComponent(sandbox).exec("print(42)") + fs = CuaFileSystemComponent(sandbox) + await fs.write_file("hello.txt", "hello") + read_result = await fs.read_file("hello.txt") + screenshot_path = tmp_path / "screen.png" + gui = CuaGUIComponent(sandbox) + screenshot_result = await gui.screenshot(str(screenshot_path)) + click_result = await gui.click(10, 20, button="right") + type_result = await gui.type_text("hello") + + assert shell_result["stdout"] == "ok" + assert python_result["data"]["output"]["text"] == "42" + assert read_result["content"] == "hello" + assert screenshot_path.read_bytes() == b"fake-png" + assert screenshot_result["mime_type"] == "image/png" + assert click_result["success"] is True + assert type_result["success"] is True + assert sandbox.mouse.clicks == [(10, 20, "right")] + assert sandbox.keyboard.typed == ["hello"] + + +def test_cua_tools_are_registered_as_builtin_tools(): + from astrbot.core.tools.computer_tools.cua import ( + CuaKeyboardTypeTool, + CuaMouseClickTool, + CuaScreenshotTool, + ) + + manager = FunctionToolManager() + + assert manager.get_builtin_tool(CuaScreenshotTool).name == "astrbot_cua_screenshot" + assert manager.get_builtin_tool(CuaMouseClickTool).name == "astrbot_cua_mouse_click" + assert manager.get_builtin_tool(CuaKeyboardTypeTool).name == "astrbot_cua_keyboard_type" + + +def test_cua_runtime_tools_are_available_to_handoffs(): + manager = FunctionToolManager() + + tools = FunctionToolExecutor._get_runtime_computer_tools("sandbox", manager, "cua") + + assert "astrbot_cua_screenshot" in tools + assert "astrbot_cua_mouse_click" in tools + assert "astrbot_cua_keyboard_type" in tools + + +def test_cua_is_exposed_in_sandbox_config_metadata(): + items = _agent_computer_use_items() + booter = items["provider_settings.sandbox.booter"] + + assert "cua" in booter["options"] + assert "CUA" in booter["labels"] + assert "provider_settings.sandbox.cua_image" in items + assert "provider_settings.sandbox.cua_os_type" in items + assert "provider_settings.sandbox.cua_ttl" in items + assert "provider_settings.sandbox.cua_telemetry_enabled" in items + + +@pytest.mark.asyncio +async def test_screenshot_tool_returns_image_and_sends_file(monkeypatch, tmp_path): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaScreenshotTool + + sent_messages = [] + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + async def send(self, message): + sent_messages.append(message) + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext( + { + "provider_settings": { + "computer_use_runtime": "sandbox", + "computer_use_require_admin": True, + "sandbox": {"booter": "cua"}, + } + } + ) + + class FakeWrapper: + context = FakeAstrContext() + + class FakeGUI: + async def screenshot(self, path: str): + Path(path).write_bytes(b"fake-png") + return { + "success": True, + "path": path, + "mime_type": "image/png", + "base64": base64.b64encode(b"fake-png").decode(), + } + + class FakeBooter: + gui = FakeGUI() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) + monkeypatch.setattr(cua_tools, "get_astrbot_temp_path", lambda: str(tmp_path)) + + result = await CuaScreenshotTool().call(FakeWrapper(), send_to_user=True) + + assert isinstance(result, mcp.types.CallToolResult) + image_parts = [part for part in result.content if part.type == "image"] + text_parts = [part for part in result.content if part.type == "text"] + payload = json.loads(text_parts[0].text) + assert image_parts[0].data == base64.b64encode(b"fake-png").decode() + assert Path(payload["path"]).exists() + assert sent_messages From 76cdb53c4d8c255b1d8341e06dc92e5df5beb7eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 11:59:22 +0900 Subject: [PATCH 02/33] fix: add CUA config metadata translations --- .../locales/en-US/features/config-metadata.json | 16 ++++++++++++++++ .../locales/ru-RU/features/config-metadata.json | 16 ++++++++++++++++ .../locales/zh-CN/features/config-metadata.json | 16 ++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json index 4f35dd2859..ff1bc690cf 100644 --- a/dashboard/src/i18n/locales/en-US/features/config-metadata.json +++ b/dashboard/src/i18n/locales/en-US/features/config-metadata.json @@ -186,6 +186,22 @@ "description": "Shipyard Neo Sandbox TTL", "hint": "Sandbox time-to-live in seconds." }, + "cua_image": { + "description": "CUA Image", + "hint": "CUA sandbox image or OS type. Defaults to linux. Supported values depend on the installed CUA SDK." + }, + "cua_os_type": { + "description": "CUA OS Type", + "hint": "CUA sandbox operating system type. Defaults to linux." + }, + "cua_ttl": { + "description": "CUA Sandbox TTL", + "hint": "CUA sandbox time-to-live in seconds. Actual behavior depends on the installed CUA SDK." + }, + "cua_telemetry_enabled": { + "description": "CUA Telemetry", + "hint": "Allow the CUA SDK to send telemetry data. Disabled by default." + }, "shipyard_endpoint": { "description": "Shipyard API Endpoint", "hint": "API access address for Shipyard service." diff --git a/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json b/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json index 08d11aed6a..17f589a131 100644 --- a/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json +++ b/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json @@ -186,6 +186,22 @@ "description": "TTL песочницы Shipyard Neo", "hint": "Время жизни песочницы в секундах." }, + "cua_image": { + "description": "Образ CUA", + "hint": "Образ или тип ОС песочницы CUA. По умолчанию linux. Поддерживаемые значения зависят от установленного CUA SDK." + }, + "cua_os_type": { + "description": "Тип ОС CUA", + "hint": "Тип операционной системы песочницы CUA. По умолчанию linux." + }, + "cua_ttl": { + "description": "TTL песочницы CUA", + "hint": "Время жизни песочницы CUA в секундах. Фактическое поведение зависит от установленного CUA SDK." + }, + "cua_telemetry_enabled": { + "description": "Телеметрия CUA", + "hint": "Разрешить CUA SDK отправлять телеметрию. По умолчанию выключено." + }, "shipyard_endpoint": { "description": "Эндпоинт Shipyard API", "hint": "Адрес API для доступа к сервису Shipyard." diff --git a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json index 8495f9ba1a..75f28273a9 100644 --- a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json +++ b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json @@ -188,6 +188,22 @@ "description": "Shipyard Neo Sandbox 存活时间(秒)", "hint": "Shipyard Neo 沙箱的生存时间(秒)。" }, + "cua_image": { + "description": "CUA 镜像", + "hint": "CUA 沙箱镜像/系统类型,默认 linux。可填写 linux、macos、windows、android,具体取决于 CUA SDK 支持。" + }, + "cua_os_type": { + "description": "CUA 操作系统类型", + "hint": "CUA 沙箱操作系统类型,默认 linux。" + }, + "cua_ttl": { + "description": "CUA Sandbox 存活时间(秒)", + "hint": "CUA 沙箱生存时间(秒)。当前作为会话配置保存,具体生效取决于 CUA SDK。" + }, + "cua_telemetry_enabled": { + "description": "CUA 遥测", + "hint": "是否允许 CUA SDK 发送遥测数据。默认关闭。" + }, "shipyard_endpoint": { "description": "Shipyard API Endpoint", "hint": "Shipyard 服务的 API 访问地址。" From bc9f966c83ecdc3072e20d3b7ced754e014d45a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 12:10:38 +0900 Subject: [PATCH 03/33] fix: address CUA sandbox review feedback --- astrbot/core/astr_agent_tool_exec.py | 3 +- astrbot/core/computer/booters/cua.py | 12 +++++-- tests/unit/test_cua_computer_use.py | 50 ++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/astrbot/core/astr_agent_tool_exec.py b/astrbot/core/astr_agent_tool_exec.py index dd8f6e4537..bb4fd08629 100644 --- a/astrbot/core/astr_agent_tool_exec.py +++ b/astrbot/core/astr_agent_tool_exec.py @@ -191,6 +191,7 @@ def _get_runtime_computer_tools( tool_mgr, booter: str | None = None, ) -> dict[str, FunctionTool]: + booter = "" if booter is None else str(booter) if runtime == "sandbox": shell_tool = tool_mgr.get_builtin_tool(ExecuteShellTool) python_tool = tool_mgr.get_builtin_tool(PythonTool) @@ -258,7 +259,7 @@ def _build_handoff_toolset( runtime_computer_tools = cls._get_runtime_computer_tools( runtime, tool_mgr, - str(provider_settings.get("sandbox", {}).get("booter", "")), + provider_settings.get("sandbox", {}).get("booter"), ) # Keep persona semantics aligned with the main agent: tools=None means diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index e60b18a635..d9d63b4662 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -57,6 +57,10 @@ def _result_text(payload: dict[str, Any], *keys: str) -> str: return "" +def _split_listing_entries(output: str) -> list[str]: + return [line for line in output.splitlines() if line.strip()] + + class CuaShellComponent(ShellComponent): def __init__(self, sandbox: Any) -> None: self._sandbox = sandbox @@ -285,7 +289,7 @@ async def list_dir( return { "success": not bool(result.get("stderr")), "path": path, - "entries": result.get("stdout", ""), + "entries": _split_listing_entries(result.get("stdout", "")), "error": result.get("stderr", ""), } @@ -319,7 +323,7 @@ async def type_text(self, text: str) -> dict[str, Any]: def _screenshot_to_bytes(raw: Any) -> bytes: - if isinstance(raw, bytes | bytearray): + if isinstance(raw, (bytes, bytearray)): return bytes(raw) if isinstance(raw, str): if raw.startswith("data:image"): @@ -415,6 +419,10 @@ async def shutdown(self) -> None: await self._sandbox_cm.__aexit__(None, None, None) self._sandbox_cm = None self._sandbox = None + self._shell = None + self._python = None + self._fs = None + self._gui = None @property def capabilities(self) -> tuple[str, ...] | None: diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 18ba14e6ec..6cb0c637f6 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -170,6 +170,47 @@ async def test_cua_components_map_sdk_results(tmp_path): assert sandbox.keyboard.typed == ["hello"] +@pytest.mark.asyncio +async def test_cua_list_dir_returns_entries_list_for_shell_fallback(): + from astrbot.core.computer.booters.cua import CuaFileSystemComponent + + sandbox = FakeSandbox() + delattr(sandbox, "filesystem") + + result = await CuaFileSystemComponent(sandbox).list_dir(".") + + assert result["success"] is True + assert result["entries"] == ["ok"] + + +@pytest.mark.asyncio +async def test_cua_shutdown_clears_cached_components(): + from astrbot.core.computer.booters.cua import CuaBooter + + closed = [] + + class FakeSandboxContext: + async def __aexit__(self, exc_type, exc, tb): + closed.append(True) + + booter = CuaBooter() + booter._sandbox = FakeSandbox() + booter._sandbox_cm = FakeSandboxContext() + booter._shell = object() + booter._python = object() + booter._fs = object() + booter._gui = object() + + await booter.shutdown() + + assert closed == [True] + assert await booter.available() is False + assert booter._shell is None + assert booter._python is None + assert booter._fs is None + assert booter._gui is None + + def test_cua_tools_are_registered_as_builtin_tools(): from astrbot.core.tools.computer_tools.cua import ( CuaKeyboardTypeTool, @@ -194,6 +235,15 @@ def test_cua_runtime_tools_are_available_to_handoffs(): assert "astrbot_cua_keyboard_type" in tools +def test_runtime_tool_selection_treats_none_booter_as_empty(): + manager = FunctionToolManager() + + tools = FunctionToolExecutor._get_runtime_computer_tools("sandbox", manager, None) + + assert "astrbot_execute_shell" in tools + assert "astrbot_cua_screenshot" not in tools + + def test_cua_is_exposed_in_sandbox_config_metadata(): items = _agent_computer_use_items() booter = items["provider_settings.sandbox.booter"] From cd7ca5a6e98258f93b4fa89f8239ee979e71ada9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 13:13:20 +0900 Subject: [PATCH 04/33] fix: default CUA sandbox to local mode --- astrbot/core/computer/booters/cua.py | 8 ++++ astrbot/core/computer/computer_client.py | 4 ++ astrbot/core/config/default.py | 22 +++++++++++ .../en-US/features/config-metadata.json | 8 ++++ .../ru-RU/features/config-metadata.json | 8 ++++ .../zh-CN/features/config-metadata.json | 8 ++++ tests/unit/test_cua_computer_use.py | 39 ++++++++++++++++++- 7 files changed, 95 insertions(+), 2 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index d9d63b4662..b7aac5b889 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -356,11 +356,15 @@ def __init__( os_type: str = "linux", ttl: int = 3600, telemetry_enabled: bool = False, + local: bool = True, + api_key: str = "", ) -> None: self.image = image self.os_type = os_type self.ttl = ttl self.telemetry_enabled = telemetry_enabled + self.local = local + self.api_key = api_key self._sandbox: Any | None = None self._sandbox_cm: Any | None = None self._shell: CuaShellComponent | None = None @@ -412,6 +416,10 @@ def _build_ephemeral_kwargs(self, ephemeral: Any) -> dict[str, Any]: kwargs["ttl"] = self.ttl if "telemetry_enabled" in parameters: kwargs["telemetry_enabled"] = self.telemetry_enabled + if "local" in parameters: + kwargs["local"] = self.local + if "api_key" in parameters and self.api_key: + kwargs["api_key"] = self.api_key return kwargs async def shutdown(self) -> None: diff --git a/astrbot/core/computer/computer_client.py b/astrbot/core/computer/computer_client.py index c834d653d3..2f9895033c 100644 --- a/astrbot/core/computer/computer_client.py +++ b/astrbot/core/computer/computer_client.py @@ -491,6 +491,8 @@ async def get_booter( os_type = sandbox_cfg.get("cua_os_type", "linux") ttl = sandbox_cfg.get("cua_ttl", 3600) telemetry_enabled = sandbox_cfg.get("cua_telemetry_enabled", False) + local = sandbox_cfg.get("cua_local", True) + api_key = sandbox_cfg.get("cua_api_key", "") logger.info( f"[Computer] CUA config: image={image}, os_type={os_type}, ttl={ttl}" @@ -500,6 +502,8 @@ async def get_booter( os_type=os_type, ttl=ttl, telemetry_enabled=telemetry_enabled, + local=local, + api_key=api_key, ) elif booter_type == "boxlite": from .booters.boxlite import BoxliteBooter diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 2a3a02bf3a..05f9b345ae 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -179,6 +179,8 @@ "cua_os_type": "linux", "cua_ttl": 3600, "cua_telemetry_enabled": False, + "cua_local": True, + "cua_api_key": "", }, "image_compress_enabled": True, "image_compress_options": { @@ -3373,6 +3375,26 @@ class ChatProviderTemplate(TypedDict): "provider_settings.sandbox.booter": "cua", }, }, + "provider_settings.sandbox.cua_local": { + "description": "CUA Local Sandbox", + "type": "bool", + "hint": "是否优先使用 CUA 本地沙箱。默认开启,避免云端沙箱要求 CUA_API_KEY。关闭后可使用 CUA 云端沙箱。", + "condition": { + "provider_settings.computer_use_runtime": "sandbox", + "provider_settings.sandbox.booter": "cua", + }, + }, + "provider_settings.sandbox.cua_api_key": { + "description": "CUA API Key", + "type": "string", + "hint": "CUA 云端沙箱 API Key。仅在关闭本地沙箱时需要。也可以通过 CUA_API_KEY 环境变量提供。", + "obvious_hint": True, + "condition": { + "provider_settings.computer_use_runtime": "sandbox", + "provider_settings.sandbox.booter": "cua", + "provider_settings.sandbox.cua_local": False, + }, + }, "provider_settings.sandbox.shipyard_endpoint": { "description": "Shipyard API Endpoint", "type": "string", diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json index ff1bc690cf..c0796b7f07 100644 --- a/dashboard/src/i18n/locales/en-US/features/config-metadata.json +++ b/dashboard/src/i18n/locales/en-US/features/config-metadata.json @@ -202,6 +202,14 @@ "description": "CUA Telemetry", "hint": "Allow the CUA SDK to send telemetry data. Disabled by default." }, + "cua_local": { + "description": "CUA Local Sandbox", + "hint": "Prefer a local CUA sandbox. Enabled by default to avoid requiring CUA_API_KEY for cloud sandboxes. Disable this to use CUA cloud sandboxes." + }, + "cua_api_key": { + "description": "CUA API Key", + "hint": "CUA cloud sandbox API key. Required only when local sandbox is disabled. You can also provide it via the CUA_API_KEY environment variable." + }, "shipyard_endpoint": { "description": "Shipyard API Endpoint", "hint": "API access address for Shipyard service." diff --git a/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json b/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json index 17f589a131..2f62db65ab 100644 --- a/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json +++ b/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json @@ -202,6 +202,14 @@ "description": "Телеметрия CUA", "hint": "Разрешить CUA SDK отправлять телеметрию. По умолчанию выключено." }, + "cua_local": { + "description": "Локальная песочница CUA", + "hint": "Предпочитать локальную песочницу CUA. Включено по умолчанию, чтобы не требовать CUA_API_KEY для облачных песочниц. Отключите для использования облачных песочниц CUA." + }, + "cua_api_key": { + "description": "CUA API Key", + "hint": "API key для облачной песочницы CUA. Требуется только если локальная песочница отключена. Также можно передать через переменную окружения CUA_API_KEY." + }, "shipyard_endpoint": { "description": "Эндпоинт Shipyard API", "hint": "Адрес API для доступа к сервису Shipyard." diff --git a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json index 75f28273a9..407e9f9f45 100644 --- a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json +++ b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json @@ -204,6 +204,14 @@ "description": "CUA 遥测", "hint": "是否允许 CUA SDK 发送遥测数据。默认关闭。" }, + "cua_local": { + "description": "CUA 本地沙箱", + "hint": "是否优先使用 CUA 本地沙箱。默认开启,避免云端沙箱要求 CUA_API_KEY。关闭后可使用 CUA 云端沙箱。" + }, + "cua_api_key": { + "description": "CUA API Key", + "hint": "CUA 云端沙箱 API Key。仅在关闭本地沙箱时需要。也可以通过 CUA_API_KEY 环境变量提供。" + }, "shipyard_endpoint": { "description": "Shipyard API Endpoint", "hint": "Shipyard 服务的 API 访问地址。" diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 6cb0c637f6..7aa881bdb5 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -98,8 +98,10 @@ def __init__( os_type: str, ttl: int, telemetry_enabled: bool, + local: bool, + api_key: str, ): - created.append((image, os_type, ttl, telemetry_enabled)) + created.append((image, os_type, ttl, telemetry_enabled, local, api_key)) async def boot(self, session_id: str): self.session_id = session_id @@ -126,6 +128,8 @@ async def available(self): "cua_os_type": "linux", "cua_ttl": 120, "cua_telemetry_enabled": False, + "cua_local": True, + "cua_api_key": "", }, } } @@ -134,7 +138,33 @@ async def available(self): booter = await computer_client.get_booter(ctx, "cua-test") assert isinstance(booter, FakeCuaBooter) - assert created == [("linux", "linux", 120, False)] + assert created == [("linux", "linux", 120, False, True, "")] + + +def test_cua_ephemeral_kwargs_include_local_when_supported(): + from astrbot.core.computer.booters.cua import CuaBooter + + def ephemeral(image, ttl=None, telemetry_enabled=None, local=None): + return image, ttl, telemetry_enabled, local + + kwargs = CuaBooter(ttl=120, telemetry_enabled=False, local=True)._build_ephemeral_kwargs( + ephemeral + ) + + assert kwargs == {"ttl": 120, "telemetry_enabled": False, "local": True} + + +def test_cua_ephemeral_kwargs_include_api_key_for_cloud_when_supported(): + from astrbot.core.computer.booters.cua import CuaBooter + + def ephemeral(image, local=None, api_key=None): + return image, local, api_key + + kwargs = CuaBooter(local=False, api_key="sk-test")._build_ephemeral_kwargs( + ephemeral + ) + + assert kwargs == {"local": False, "api_key": "sk-test"} @pytest.mark.asyncio @@ -254,6 +284,11 @@ def test_cua_is_exposed_in_sandbox_config_metadata(): assert "provider_settings.sandbox.cua_os_type" in items assert "provider_settings.sandbox.cua_ttl" in items assert "provider_settings.sandbox.cua_telemetry_enabled" in items + assert "provider_settings.sandbox.cua_local" in items + assert "provider_settings.sandbox.cua_api_key" in items + assert items["provider_settings.sandbox.cua_api_key"]["condition"][ + "provider_settings.sandbox.cua_local" + ] is False @pytest.mark.asyncio From dc882c1c56d277022e2d8e9cd4160df10ca133dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 13:16:07 +0900 Subject: [PATCH 05/33] fix: harden CUA SDK method compatibility --- astrbot/core/computer/booters/cua.py | 10 +++++-- tests/unit/test_cua_computer_use.py | 45 ++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index b7aac5b889..50780b25a8 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -12,6 +12,12 @@ from .shipyard_search_file_util import search_files_via_shell +async def _maybe_await(value: Any) -> Any: + if inspect.isawaitable(value): + return await value + return value + + def _maybe_model_dump(value: Any) -> dict[str, Any]: if isinstance(value, dict): return value @@ -33,7 +39,7 @@ async def _call_first( method = getattr(obj, name, None) if method is None: continue - return await method(*args, **kwargs) + return await _maybe_await(method(*args, **kwargs)) raise AttributeError(f"None of these methods exist: {', '.join(names)}") @@ -284,7 +290,7 @@ async def list_dir( if fs is not None and hasattr(fs, "list_dir"): entries = await fs.list_dir(path) return {"success": True, "path": path, "entries": entries} - flags = "-la" if show_hidden else "-l" + flags = "-1A" if show_hidden else "-1" result = await self._shell.exec(f"ls {flags} {path!r}") return { "success": not bool(result.get("stderr")), diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 7aa881bdb5..319f0f76cd 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -81,6 +81,21 @@ async def screenshot(self): return b"fake-png" +class SyncShell: + def __init__(self, stdout: str = "ok"): + self.commands = [] + self.stdout = stdout + + def run(self, command: str, **kwargs): + self.commands.append((command, kwargs)) + return {"stdout": self.stdout, "stderr": "", "exit_code": 0} + + +class SyncPython: + def run(self, code: str, **kwargs): + return {"output": "sync", "error": ""} + + def _agent_computer_use_items(): return CONFIG_METADATA_3["ai_group"]["metadata"]["agent_computer_use"]["items"] @@ -211,6 +226,36 @@ async def test_cua_list_dir_returns_entries_list_for_shell_fallback(): assert result["success"] is True assert result["entries"] == ["ok"] + assert sandbox.shell.commands[0][0] == "ls -1 '.'" + + +@pytest.mark.asyncio +async def test_cua_list_dir_shell_fallback_returns_filename_only_entries(): + from astrbot.core.computer.booters.cua import CuaFileSystemComponent + + sandbox = FakeSandbox() + sandbox.shell = SyncShell("alpha.txt\nfolder\n") + delattr(sandbox, "filesystem") + + result = await CuaFileSystemComponent(sandbox).list_dir(".", show_hidden=True) + + assert result["entries"] == ["alpha.txt", "folder"] + assert sandbox.shell.commands[0][0] == "ls -1A '.'" + + +@pytest.mark.asyncio +async def test_cua_shell_and_python_accept_sync_sdk_methods(): + from astrbot.core.computer.booters.cua import CuaPythonComponent, CuaShellComponent + + sandbox = FakeSandbox() + sandbox.shell = SyncShell() + sandbox.python = SyncPython() + + shell_result = await CuaShellComponent(sandbox).exec("echo ok") + python_result = await CuaPythonComponent(sandbox).exec("print('ok')") + + assert shell_result["stdout"] == "ok" + assert python_result["data"]["output"]["text"] == "sync" @pytest.mark.asyncio From 1fbcfd3775426e5e44613cbcd38c990a2c35ea92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 13:22:38 +0900 Subject: [PATCH 06/33] fix: harden CUA GUI and permission handling --- astrbot/core/computer/booters/cua.py | 39 +++++++++++++++----- tests/unit/test_cua_computer_use.py | 54 ++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 8 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 50780b25a8..e896f10247 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -18,6 +18,15 @@ async def _maybe_await(value: Any) -> Any: return value +async def _write_base64_via_shell( + shell: ShellComponent, + path: str, + data: bytes, +) -> dict[str, Any]: + encoded = base64.b64encode(data).decode("ascii") + return await shell.exec(f"base64 -d > {path!r} <<'EOF'\n{encoded}\nEOF") + + def _maybe_model_dump(value: Any) -> dict[str, Any]: if isinstance(value, dict): return value @@ -67,6 +76,21 @@ def _split_listing_entries(output: str) -> list[str]: return [line for line in output.splitlines() if line.strip()] +def _require_component_method( + root: Any, + component_name: str, + method_name: str, +) -> Any: + component = getattr(root, component_name, None) + method = getattr(component, method_name, None) if component is not None else None + if method is None: + raise RuntimeError( + f"CUA sandbox does not provide `{component_name}.{method_name}`. " + "Please check the installed CUA SDK version and sandbox backend." + ) + return method + + class CuaShellComponent(ShellComponent): def __init__(self, sandbox: Any) -> None: self._sandbox = sandbox @@ -264,8 +288,7 @@ async def write_file( if fs is not None and hasattr(fs, "write_file"): await fs.write_file(path, content) else: - encoded = base64.b64encode(content.encode(encoding)).decode() - await self._shell.exec(f"base64 -d > {path!r} <<'EOF'\n{encoded}\nEOF") + await _write_base64_via_shell(self._shell, path, content.encode(encoding)) return {"success": True, "path": path} async def delete_file(self, path: str) -> dict[str, Any]: @@ -318,12 +341,14 @@ async def screenshot(self, path: str | None = None) -> dict[str, Any]: } async def click(self, x: int, y: int, button: str = "left") -> dict[str, Any]: - result = await self._sandbox.mouse.click(x, y, button=button) + click = _require_component_method(self._sandbox, "mouse", "click") + result = await _maybe_await(click(x, y, button=button)) payload = _maybe_model_dump(result) return {"success": bool(payload.get("success", True)), **payload} async def type_text(self, text: str) -> dict[str, Any]: - result = await self._sandbox.keyboard.type(text) + type_text = _require_component_method(self._sandbox, "keyboard", "type") + result = await _maybe_await(type_text(text)) payload = _maybe_model_dump(result) return {"success": bool(payload.get("success", True)), **payload} @@ -480,10 +505,8 @@ async def upload_file(self, path: str, file_name: str) -> dict: return _maybe_model_dump( await self._sandbox.upload_file(str(local_path), file_name) ) - content = local_path.read_bytes() - encoded = base64.b64encode(content).decode("ascii") - result = await self.shell.exec( - f"base64 -d > {file_name!r} <<'EOF'\n{encoded}\nEOF" + result = await _write_base64_via_shell( + self.shell, file_name, local_path.read_bytes() ) return { "success": not bool(result.get("stderr")), diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 319f0f76cd..2743c328f3 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -258,6 +258,23 @@ async def test_cua_shell_and_python_accept_sync_sdk_methods(): assert python_result["data"]["output"]["text"] == "sync" +@pytest.mark.asyncio +async def test_cua_gui_reports_missing_mouse_or_keyboard(): + from astrbot.core.computer.booters.cua import CuaGUIComponent + + class SandboxWithoutGuiDevices: + async def screenshot(self): + return b"fake-png" + + gui = CuaGUIComponent(SandboxWithoutGuiDevices()) + + with pytest.raises(RuntimeError, match="mouse.*click"): + await gui.click(1, 2) + + with pytest.raises(RuntimeError, match="keyboard.*type"): + await gui.type_text("hello") + + @pytest.mark.asyncio async def test_cua_shutdown_clears_cached_components(): from astrbot.core.computer.booters.cua import CuaBooter @@ -393,3 +410,40 @@ async def fake_get_booter(context, session_id): assert image_parts[0].data == base64.b64encode(b"fake-png").decode() assert Path(payload["path"]).exists() assert sent_messages + + +@pytest.mark.asyncio +async def test_cua_tools_return_permission_error_without_gui_lookup(monkeypatch): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import ( + CuaKeyboardTypeTool, + CuaMouseClickTool, + CuaScreenshotTool, + ) + + sent_messages = [] + + class FakeEvent: + unified_msg_origin = "umo" + role = "member" + + async def send(self, message): + sent_messages.append(message) + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext({"provider_settings": {}}) + + class FakeWrapper: + context = FakeAstrContext() + + async def fail_gui_lookup(context): + raise AssertionError("GUI lookup should not run after permission failure") + + monkeypatch.setattr(cua_tools, "check_admin_permission", lambda *args: "denied") + monkeypatch.setattr(cua_tools, "_get_gui_component", fail_gui_lookup) + + assert await CuaScreenshotTool().call(FakeWrapper()) == "denied" + assert await CuaMouseClickTool().call(FakeWrapper(), x=1, y=2) == "denied" + assert await CuaKeyboardTypeTool().call(FakeWrapper(), text="hello") == "denied" + assert sent_messages == [] From 0c02edd0ebb9f7afef1fced2e53fcf492084a878 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 13:30:26 +0900 Subject: [PATCH 07/33] fix: refine CUA capability and shell handling --- astrbot/core/astr_agent_tool_exec.py | 2 +- astrbot/core/computer/booters/cua.py | 107 ++++++++++++++++++--------- tests/unit/test_cua_computer_use.py | 69 +++++++++++++++++ 3 files changed, 143 insertions(+), 35 deletions(-) diff --git a/astrbot/core/astr_agent_tool_exec.py b/astrbot/core/astr_agent_tool_exec.py index bb4fd08629..de5caad554 100644 --- a/astrbot/core/astr_agent_tool_exec.py +++ b/astrbot/core/astr_agent_tool_exec.py @@ -191,7 +191,7 @@ def _get_runtime_computer_tools( tool_mgr, booter: str | None = None, ) -> dict[str, FunctionTool]: - booter = "" if booter is None else str(booter) + booter = "" if booter is None else str(booter).lower() if runtime == "sandbox": shell_tool = tool_mgr.get_builtin_tool(ExecuteShellTool) python_tool = tool_mgr.get_builtin_tool(PythonTool) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index e896f10247..48162918b8 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -2,6 +2,8 @@ import base64 import inspect +import shlex +from dataclasses import dataclass from pathlib import Path from typing import Any @@ -24,7 +26,21 @@ async def _write_base64_via_shell( data: bytes, ) -> dict[str, Any]: encoded = base64.b64encode(data).decode("ascii") - return await shell.exec(f"base64 -d > {path!r} <<'EOF'\n{encoded}\nEOF") + decoder = ( + "import base64,pathlib,sys; " + "pathlib.Path(sys.argv[1]).write_bytes(base64.b64decode(sys.stdin.read()))" + ) + return await shell.exec( + f"python3 -c {shlex.quote(decoder)} {shlex.quote(path)} <<'EOF'\n{encoded}\nEOF" + ) + + +@dataclass(slots=True) +class ProcessResult: + stdout: str + stderr: str + exit_code: int | None + success: bool def _maybe_model_dump(value: Any) -> dict[str, Any]: @@ -72,6 +88,25 @@ def _result_text(payload: dict[str, Any], *keys: str) -> str: return "" +def _normalize_process_result(raw: Any) -> ProcessResult: + payload = _maybe_model_dump(raw) + if not payload and isinstance(raw, str): + payload = {"stdout": raw} + + stdout = _result_text(payload, "stdout", "output") + stderr = _result_text(payload, "stderr", "error") + exit_code = payload.get( + "exit_code", payload.get("returncode", 0 if not stderr else 1) + ) + success = bool(payload.get("success", not stderr and exit_code in (0, None))) + return ProcessResult( + stdout=stdout, + stderr=stderr, + exit_code=exit_code, + success=success, + ) + + def _split_listing_entries(output: str) -> list[str]: return [line for line in output.splitlines() if line.strip()] @@ -91,6 +126,11 @@ def _require_component_method( return method +def _has_component_method(root: Any, component_name: str, method_name: str) -> bool: + component = getattr(root, component_name, None) + return getattr(component, method_name, None) is not None + + class CuaShellComponent(ShellComponent): def __init__(self, sandbox: Any) -> None: self._sandbox = sandbox @@ -127,26 +167,16 @@ async def exec( result = await _call_first( self._sandbox.shell, ("run", "exec"), command, **kwargs ) - payload = _maybe_model_dump(result) - if not payload and isinstance(result, str): - payload = {"stdout": result} - - stdout = _result_text(payload, "stdout", "output") - stderr = _result_text(payload, "stderr", "error") - exit_code = payload.get( - "exit_code", payload.get("returncode", 0 if not stderr else 1) - ) + proc = _normalize_process_result(result) response = { - "stdout": stdout, - "stderr": stderr, - "exit_code": exit_code, - "success": bool( - payload.get("success", not stderr and exit_code in (0, None)) - ), + "stdout": proc.stdout, + "stderr": proc.stderr, + "exit_code": proc.exit_code, + "success": proc.success, } if background: try: - response["pid"] = int(stdout.strip().splitlines()[-1]) + response["pid"] = int(proc.stdout.strip().splitlines()[-1]) except Exception: response["pid"] = None return response @@ -167,19 +197,21 @@ async def exec( python = getattr(self._sandbox, "python", None) if python is not None: result = await _call_first(python, ("run", "exec"), code, timeout=timeout) - payload = _maybe_model_dump(result) + proc = _normalize_process_result(result) else: shell = CuaShellComponent(self._sandbox) result = await shell.exec(f"python3 - <<'PY'\n{code}\nPY", timeout=timeout) - payload = { - "output": result.get("stdout", ""), - "error": result.get("stderr", ""), - } + proc = ProcessResult( + stdout=result.get("stdout", ""), + stderr=result.get("stderr", ""), + exit_code=result.get("exit_code"), + success=bool(result.get("success", False)), + ) - output_text = "" if silent else _result_text(payload, "stdout", "output") - error_text = _result_text(payload, "stderr", "error") + output_text = "" if silent else proc.stdout + error_text = proc.stderr return { - "success": bool(payload.get("success", not error_text)), + "success": proc.success if not silent else not bool(error_text), "data": { "output": {"text": output_text, "images": []}, "error": error_text, @@ -465,15 +497,22 @@ async def shutdown(self) -> None: @property def capabilities(self) -> tuple[str, ...] | None: - return ( - "python", - "shell", - "filesystem", - "gui", - "screenshot", - "mouse", - "keyboard", - ) + capabilities = ["python", "shell", "filesystem"] + if self._sandbox is None: + return tuple(capabilities) + + has_screenshot = getattr(self._sandbox, "screenshot", None) is not None + has_mouse = _has_component_method(self._sandbox, "mouse", "click") + has_keyboard = _has_component_method(self._sandbox, "keyboard", "type") + if has_screenshot or has_mouse or has_keyboard: + capabilities.append("gui") + if has_screenshot: + capabilities.append("screenshot") + if has_mouse: + capabilities.append("mouse") + if has_keyboard: + capabilities.append("keyboard") + return tuple(capabilities) @property def fs(self) -> FileSystemComponent: diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 2743c328f3..3d1495b68b 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -29,6 +29,11 @@ async def run(self, command: str, **kwargs): return {"stdout": "ok", "stderr": "", "exit_code": 0} +class ProcessShapeShell: + async def run(self, command: str, **kwargs): + return {"output": "shape-ok", "returncode": 0} + + class FakePython: async def run(self, code: str, **kwargs): return {"output": "42", "error": ""} @@ -229,6 +234,20 @@ async def test_cua_list_dir_returns_entries_list_for_shell_fallback(): assert sandbox.shell.commands[0][0] == "ls -1 '.'" +@pytest.mark.asyncio +async def test_cua_write_file_shell_fallback_uses_python_base64_decoder(): + from astrbot.core.computer.booters.cua import CuaFileSystemComponent + + sandbox = FakeSandbox() + delattr(sandbox, "filesystem") + + await CuaFileSystemComponent(sandbox).write_file("hello.txt", "hello") + + command = sandbox.shell.commands[0][0] + assert "python3 -c" in command + assert "base64 -d" not in command + + @pytest.mark.asyncio async def test_cua_list_dir_shell_fallback_returns_filename_only_entries(): from astrbot.core.computer.booters.cua import CuaFileSystemComponent @@ -258,6 +277,23 @@ async def test_cua_shell_and_python_accept_sync_sdk_methods(): assert python_result["data"]["output"]["text"] == "sync" +@pytest.mark.asyncio +async def test_cua_shell_normalizes_output_returncode_shape(): + from astrbot.core.computer.booters.cua import CuaShellComponent + + sandbox = FakeSandbox() + sandbox.shell = ProcessShapeShell() + + result = await CuaShellComponent(sandbox).exec("echo ok") + + assert result == { + "stdout": "shape-ok", + "stderr": "", + "exit_code": 0, + "success": True, + } + + @pytest.mark.asyncio async def test_cua_gui_reports_missing_mouse_or_keyboard(): from astrbot.core.computer.booters.cua import CuaGUIComponent @@ -275,6 +311,31 @@ async def screenshot(self): await gui.type_text("hello") +def test_cua_capabilities_reflect_initialized_sandbox_gui_devices(): + from astrbot.core.computer.booters.cua import CuaBooter + + booter = CuaBooter() + booter._sandbox = FakeSandbox() + + assert booter.capabilities == ( + "python", + "shell", + "filesystem", + "gui", + "screenshot", + "mouse", + "keyboard", + ) + + class ScreenshotOnlySandbox: + async def screenshot(self): + return b"fake-png" + + booter._sandbox = ScreenshotOnlySandbox() + + assert booter.capabilities == ("python", "shell", "filesystem", "gui", "screenshot") + + @pytest.mark.asyncio async def test_cua_shutdown_clears_cached_components(): from astrbot.core.computer.booters.cua import CuaBooter @@ -336,6 +397,14 @@ def test_runtime_tool_selection_treats_none_booter_as_empty(): assert "astrbot_cua_screenshot" not in tools +def test_runtime_tool_selection_normalizes_cua_booter_case(): + manager = FunctionToolManager() + + tools = FunctionToolExecutor._get_runtime_computer_tools("sandbox", manager, "CUA") + + assert "astrbot_cua_screenshot" in tools + + def test_cua_is_exposed_in_sandbox_config_metadata(): items = _agent_computer_use_items() booter = items["provider_settings.sandbox.booter"] From 22d74008011207b9ba44b4fa17c0dcc7fe9b6d2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 13:39:59 +0900 Subject: [PATCH 08/33] fix: avoid inline CUA screenshot image results by default --- astrbot/core/tools/computer_tools/cua.py | 23 +++++++---- tests/unit/test_cua_computer_use.py | 52 +++++++++++++++++++++++- 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/astrbot/core/tools/computer_tools/cua.py b/astrbot/core/tools/computer_tools/cua.py index 01d41ab469..0fa350b8e8 100644 --- a/astrbot/core/tools/computer_tools/cua.py +++ b/astrbot/core/tools/computer_tools/cua.py @@ -58,6 +58,11 @@ class CuaScreenshotTool(FunctionTool): "description": "Whether to send the screenshot image to the current conversation.", "default": True, }, + "return_image_to_llm": { + "type": "boolean", + "description": "Whether to include the screenshot image content in the tool result. Keep disabled for Gemini thinking models.", + "default": False, + }, }, } ) @@ -66,6 +71,7 @@ async def call( self, context: ContextWrapper[AstrAgentContext], send_to_user: bool = True, + return_image_to_llm: bool = False, ) -> ToolExecResult: if err := check_admin_permission(context, "Taking CUA screenshots"): return err @@ -77,16 +83,19 @@ async def call( if send_to_user: await context.context.event.send(MessageChain().file_image(path)) payload["sent_to_user"] = True - return mcp.types.CallToolResult( - content=[ - mcp.types.TextContent(type="text", text=_to_json(payload)), + image_data = payload.pop("base64", "") + content: list[mcp.types.TextContent | mcp.types.ImageContent] = [ + mcp.types.TextContent(type="text", text=_to_json(payload)) + ] + if return_image_to_llm: + content.append( mcp.types.ImageContent( type="image", - data=str(payload.get("base64", "")), + data=str(image_data), mimeType=str(payload.get("mime_type", "image/png")), - ), - ] - ) + ) + ) + return mcp.types.CallToolResult(content=content) except Exception as e: return f"Error taking CUA screenshot: {str(e)}" diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 3d1495b68b..94fd5992a9 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -476,11 +476,61 @@ async def fake_get_booter(context, session_id): image_parts = [part for part in result.content if part.type == "image"] text_parts = [part for part in result.content if part.type == "text"] payload = json.loads(text_parts[0].text) - assert image_parts[0].data == base64.b64encode(b"fake-png").decode() + assert image_parts == [] + assert "base64" not in payload assert Path(payload["path"]).exists() assert sent_messages +@pytest.mark.asyncio +async def test_screenshot_tool_can_opt_in_to_llm_image_content(monkeypatch, tmp_path): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaScreenshotTool + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + async def send(self, message): + pass + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) + + class FakeWrapper: + context = FakeAstrContext() + + class FakeGUI: + async def screenshot(self, path: str): + Path(path).write_bytes(b"fake-png") + return { + "success": True, + "path": path, + "mime_type": "image/png", + "base64": base64.b64encode(b"fake-png").decode(), + } + + class FakeBooter: + gui = FakeGUI() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) + monkeypatch.setattr(cua_tools, "get_astrbot_temp_path", lambda: str(tmp_path)) + + result = await CuaScreenshotTool().call( + FakeWrapper(), send_to_user=False, return_image_to_llm=True + ) + + image_parts = [part for part in result.content if part.type == "image"] + text_parts = [part for part in result.content if part.type == "text"] + payload = json.loads(text_parts[0].text) + assert image_parts[0].data == base64.b64encode(b"fake-png").decode() + assert "base64" not in payload + + @pytest.mark.asyncio async def test_cua_tools_return_permission_error_without_gui_lookup(monkeypatch): from astrbot.core.tools.computer_tools import cua as cua_tools From df54a02bad0576b3ecce914fa2eccf1130722bb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 13:48:27 +0900 Subject: [PATCH 09/33] fix: guide CUA browser startup workflow --- astrbot/core/astr_main_agent.py | 7 ++++++- tests/unit/test_astr_main_agent.py | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py index d2ce3687d9..af7485388a 100644 --- a/astrbot/core/astr_main_agent.py +++ b/astrbot/core/astr_main_agent.py @@ -1021,7 +1021,12 @@ def _apply_sandbox_tools( if booter == "cua": req.system_prompt += ( "\n[CUA Desktop Control]\n" - "Use `astrbot_cua_screenshot` to inspect the current desktop before " + "When launching GUI apps, use `astrbot_execute_shell` with " + "background=true; do not append shell background operators manually. " + "Prefer `chromium` for browser tasks in the default CUA Linux sandbox. " + "Do not use `firefox &` unless the user confirms Firefox exists. " + "After launching or changing any GUI app, immediately call " + "`astrbot_cua_screenshot` to inspect the current desktop before " "clicking or typing. Use coordinates from screenshots for " "`astrbot_cua_mouse_click`, then `astrbot_cua_keyboard_type` for text input.\n" ) diff --git a/tests/unit/test_astr_main_agent.py b/tests/unit/test_astr_main_agent.py index 5a5bceae15..f4aac00caa 100644 --- a/tests/unit/test_astr_main_agent.py +++ b/tests/unit/test_astr_main_agent.py @@ -1561,6 +1561,23 @@ def test_apply_sandbox_tools_adds_sandbox_prompt(self, mock_context): assert "sandboxed environment" in req.system_prompt + def test_apply_sandbox_tools_with_cua_adds_gui_guidance(self, mock_context): + """Test that CUA sandbox guidance nudges reliable GUI workflows.""" + module = ama + config = module.MainAgentBuildConfig( + tool_call_timeout=60, + computer_use_runtime="sandbox", + sandbox_cfg={"booter": "cua"}, + ) + req = ProviderRequest(prompt="Test", system_prompt="Original prompt") + + module._apply_sandbox_tools(config, req, "session-123") + + assert "chromium" in req.system_prompt + assert "background=true" in req.system_prompt + assert "astrbot_cua_screenshot" in req.system_prompt + assert "Do not use `firefox &`" in req.system_prompt + def test_apply_sandbox_tools_with_shipyard_booter(self, monkeypatch, mock_context): """Test sandbox tools with shipyard booter configuration.""" module = ama From b8674ef05df5b39e0f43e209d830e4c8054d9dbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 13:58:03 +0900 Subject: [PATCH 10/33] feat: add CUA browser and key press tools --- astrbot/core/astr_agent_tool_exec.py | 6 + astrbot/core/astr_main_agent.py | 10 +- astrbot/core/computer/booters/cua.py | 18 +++ astrbot/core/computer/olayer/gui.py | 4 + astrbot/core/tools/computer_tools/__init__.py | 10 +- astrbot/core/tools/computer_tools/cua.py | 109 +++++++++++++++++ tests/unit/test_astr_main_agent.py | 2 + tests/unit/test_cua_computer_use.py | 110 ++++++++++++++++++ 8 files changed, 266 insertions(+), 3 deletions(-) diff --git a/astrbot/core/astr_agent_tool_exec.py b/astrbot/core/astr_agent_tool_exec.py index de5caad554..6a74143c82 100644 --- a/astrbot/core/astr_agent_tool_exec.py +++ b/astrbot/core/astr_agent_tool_exec.py @@ -32,7 +32,9 @@ from astrbot.core.provider.register import llm_tools from astrbot.core.tools.computer_tools import ( CuaKeyboardTypeTool, + CuaKeyPressTool, CuaMouseClickTool, + CuaOpenBrowserTool, CuaScreenshotTool, ExecuteShellTool, FileDownloadTool, @@ -215,11 +217,15 @@ def _get_runtime_computer_tools( screenshot_tool = tool_mgr.get_builtin_tool(CuaScreenshotTool) mouse_click_tool = tool_mgr.get_builtin_tool(CuaMouseClickTool) keyboard_type_tool = tool_mgr.get_builtin_tool(CuaKeyboardTypeTool) + key_press_tool = tool_mgr.get_builtin_tool(CuaKeyPressTool) + open_browser_tool = tool_mgr.get_builtin_tool(CuaOpenBrowserTool) tools.update( { screenshot_tool.name: screenshot_tool, mouse_click_tool.name: mouse_click_tool, keyboard_type_tool.name: keyboard_type_tool, + key_press_tool.name: key_press_tool, + open_browser_tool.name: open_browser_tool, } ) return tools diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py index af7485388a..1a88453210 100644 --- a/astrbot/core/astr_main_agent.py +++ b/astrbot/core/astr_main_agent.py @@ -48,7 +48,9 @@ CreateSkillCandidateTool, CreateSkillPayloadTool, CuaKeyboardTypeTool, + CuaKeyPressTool, CuaMouseClickTool, + CuaOpenBrowserTool, CuaScreenshotTool, EvaluateSkillCandidateTool, ExecuteShellTool, @@ -1023,16 +1025,20 @@ def _apply_sandbox_tools( "\n[CUA Desktop Control]\n" "When launching GUI apps, use `astrbot_execute_shell` with " "background=true; do not append shell background operators manually. " - "Prefer `chromium` for browser tasks in the default CUA Linux sandbox. " + "Prefer `astrbot_cua_open_browser` for browser tasks. " + "If shell is needed, prefer `chromium` in the default CUA Linux sandbox. " "Do not use `firefox &` unless the user confirms Firefox exists. " "After launching or changing any GUI app, immediately call " "`astrbot_cua_screenshot` to inspect the current desktop before " "clicking or typing. Use coordinates from screenshots for " - "`astrbot_cua_mouse_click`, then `astrbot_cua_keyboard_type` for text input.\n" + "`astrbot_cua_mouse_click`, `astrbot_cua_keyboard_type` for text input, " + "and `astrbot_cua_key_press` for Enter, Ctrl+L, Tab, or Escape.\n" ) req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaScreenshotTool)) req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaMouseClickTool)) req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaKeyboardTypeTool)) + req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaKeyPressTool)) + req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaOpenBrowserTool)) req.system_prompt = f"{req.system_prompt or ''}\n{SANDBOX_MODE_PROMPT}\n" diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 48162918b8..19e552bbd9 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -384,6 +384,24 @@ async def type_text(self, text: str) -> dict[str, Any]: payload = _maybe_model_dump(result) return {"success": bool(payload.get("success", True)), **payload} + async def press_key(self, key: str) -> dict[str, Any]: + keyboard = getattr(self._sandbox, "keyboard", None) + press = None + if keyboard is not None: + press = ( + getattr(keyboard, "press", None) + or getattr(keyboard, "key_press", None) + or getattr(keyboard, "press_key", None) + ) + if press is None: + raise RuntimeError( + "CUA sandbox does not provide `keyboard.press`. " + "Please check the installed CUA SDK version and sandbox backend." + ) + result = await _maybe_await(press(key)) + payload = _maybe_model_dump(result) + return {"success": bool(payload.get("success", True)), **payload} + def _screenshot_to_bytes(raw: Any) -> bytes: if isinstance(raw, (bytes, bytearray)): diff --git a/astrbot/core/computer/olayer/gui.py b/astrbot/core/computer/olayer/gui.py index f837d57a2a..cc23b9d7af 100644 --- a/astrbot/core/computer/olayer/gui.py +++ b/astrbot/core/computer/olayer/gui.py @@ -19,3 +19,7 @@ async def click(self, x: int, y: int, button: str = "left") -> dict[str, Any]: async def type_text(self, text: str) -> dict[str, Any]: """Type text into the active UI target.""" ... + + async def press_key(self, key: str) -> dict[str, Any]: + """Press a keyboard key or shortcut.""" + ... diff --git a/astrbot/core/tools/computer_tools/__init__.py b/astrbot/core/tools/computer_tools/__init__.py index 116c813733..2f57b359bb 100644 --- a/astrbot/core/tools/computer_tools/__init__.py +++ b/astrbot/core/tools/computer_tools/__init__.py @@ -1,4 +1,10 @@ -from .cua import CuaKeyboardTypeTool, CuaMouseClickTool, CuaScreenshotTool +from .cua import ( + CuaKeyboardTypeTool, + CuaKeyPressTool, + CuaMouseClickTool, + CuaOpenBrowserTool, + CuaScreenshotTool, +) from .fs import ( FileDownloadTool, FileEditTool, @@ -33,8 +39,10 @@ "BrowserExecTool", "CreateSkillCandidateTool", "CreateSkillPayloadTool", + "CuaKeyPressTool", "CuaKeyboardTypeTool", "CuaMouseClickTool", + "CuaOpenBrowserTool", "CuaScreenshotTool", "EvaluateSkillCandidateTool", "ExecuteShellTool", diff --git a/astrbot/core/tools/computer_tools/cua.py b/astrbot/core/tools/computer_tools/cua.py index 0fa350b8e8..dce902cce1 100644 --- a/astrbot/core/tools/computer_tools/cua.py +++ b/astrbot/core/tools/computer_tools/cua.py @@ -1,6 +1,8 @@ from __future__ import annotations +import asyncio import json +import shlex import uuid from dataclasses import dataclass, field from pathlib import Path @@ -166,8 +168,115 @@ async def call( return f"Error typing in CUA desktop: {str(e)}" +@builtin_tool(config=_CUA_TOOL_CONFIG) +@dataclass +class CuaKeyPressTool(FunctionTool): + name: str = "astrbot_cua_key_press" + description: str = "Press a key or shortcut in the CUA sandbox desktop." + parameters: dict = field( + default_factory=lambda: { + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "Key or shortcut to press, for example Enter, Escape, Tab, or Ctrl+L.", + }, + }, + "required": ["key"], + } + ) + + async def call( + self, + context: ContextWrapper[AstrAgentContext], + key: str, + ) -> ToolExecResult: + if err := check_admin_permission(context, "Using CUA keyboard"): + return err + try: + gui = await _get_gui_component(context) + return _to_json(await gui.press_key(key)) + except Exception as e: + return f"Error pressing key in CUA desktop: {str(e)}" + + +@builtin_tool(config=_CUA_TOOL_CONFIG) +@dataclass +class CuaOpenBrowserTool(FunctionTool): + name: str = "astrbot_cua_open_browser" + description: str = "Open Chromium in the CUA sandbox, optionally navigating to a URL, then capture a screenshot." + parameters: dict = field( + default_factory=lambda: { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "Optional URL to open in Chromium.", + "default": "", + }, + "wait_seconds": { + "type": "number", + "description": "Seconds to wait before taking the screenshot.", + "default": 3, + }, + "send_to_user": { + "type": "boolean", + "description": "Whether to send the screenshot image to the current conversation.", + "default": True, + }, + }, + } + ) + + async def call( + self, + context: ContextWrapper[AstrAgentContext], + url: str = "", + wait_seconds: float = 3, + send_to_user: bool = True, + ) -> ToolExecResult: + if err := check_admin_permission(context, "Opening CUA browser"): + return err + try: + booter = await get_booter( + context.context.context, + context.context.event.unified_msg_origin, + ) + gui = getattr(booter, "gui", None) + if gui is None: + raise RuntimeError( + "Current sandbox booter does not support CUA GUI capability." + ) + command = _build_chromium_command(url) + shell_result = await booter.shell.exec(command, background=True) + await asyncio.sleep(max(0, wait_seconds)) + path = _new_screenshot_path(context.context.event.unified_msg_origin) + screenshot_result = await gui.screenshot(path) + payload = { + "success": bool(shell_result.get("success", True)), + "command": command, + "shell": shell_result, + **screenshot_result, + "path": path, + } + payload.pop("base64", None) + if send_to_user: + await context.context.event.send(MessageChain().file_image(path)) + payload["sent_to_user"] = True + return _to_json(payload) + except Exception as e: + return f"Error opening CUA browser: {str(e)}" + + def _new_screenshot_path(umo: str) -> str: safe_prefix = uuid.uuid5(uuid.NAMESPACE_DNS, umo).hex[:12] screenshot_dir = Path(get_astrbot_temp_path()) / "cua_screenshots" screenshot_dir.mkdir(parents=True, exist_ok=True) return str(screenshot_dir / f"{safe_prefix}-{uuid.uuid4().hex}.png") + + +def _build_chromium_command(url: str = "") -> str: + parts = ["chromium", "--no-sandbox", "--disable-dev-shm-usage"] + if url: + parts.append(url) + return " ".join(shlex.quote(part) for part in parts) diff --git a/tests/unit/test_astr_main_agent.py b/tests/unit/test_astr_main_agent.py index f4aac00caa..99f0e57af6 100644 --- a/tests/unit/test_astr_main_agent.py +++ b/tests/unit/test_astr_main_agent.py @@ -1576,6 +1576,8 @@ def test_apply_sandbox_tools_with_cua_adds_gui_guidance(self, mock_context): assert "chromium" in req.system_prompt assert "background=true" in req.system_prompt assert "astrbot_cua_screenshot" in req.system_prompt + assert "astrbot_cua_open_browser" in req.system_prompt + assert "astrbot_cua_key_press" in req.system_prompt assert "Do not use `firefox &`" in req.system_prompt def test_apply_sandbox_tools_with_shipyard_booter(self, monkeypatch, mock_context): diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 94fd5992a9..ce87b95816 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -68,11 +68,16 @@ async def click(self, x: int, y: int, button: str = "left"): class FakeKeyboard: def __init__(self): self.typed = [] + self.pressed = [] async def type(self, text: str): self.typed.append(text) return {"success": True} + async def press(self, key: str): + self.pressed.append(key) + return {"success": True} + class FakeSandbox: def __init__(self): @@ -208,6 +213,7 @@ async def test_cua_components_map_sdk_results(tmp_path): screenshot_result = await gui.screenshot(str(screenshot_path)) click_result = await gui.click(10, 20, button="right") type_result = await gui.type_text("hello") + press_result = await gui.press_key("Enter") assert shell_result["stdout"] == "ok" assert python_result["data"]["output"]["text"] == "42" @@ -216,8 +222,10 @@ async def test_cua_components_map_sdk_results(tmp_path): assert screenshot_result["mime_type"] == "image/png" assert click_result["success"] is True assert type_result["success"] is True + assert press_result["success"] is True assert sandbox.mouse.clicks == [(10, 20, "right")] assert sandbox.keyboard.typed == ["hello"] + assert sandbox.keyboard.pressed == ["Enter"] @pytest.mark.asyncio @@ -310,6 +318,9 @@ async def screenshot(self): with pytest.raises(RuntimeError, match="keyboard.*type"): await gui.type_text("hello") + with pytest.raises(RuntimeError, match="keyboard.*press"): + await gui.press_key("Enter") + def test_cua_capabilities_reflect_initialized_sandbox_gui_devices(): from astrbot.core.computer.booters.cua import CuaBooter @@ -366,8 +377,10 @@ async def __aexit__(self, exc_type, exc, tb): def test_cua_tools_are_registered_as_builtin_tools(): from astrbot.core.tools.computer_tools.cua import ( + CuaKeyPressTool, CuaKeyboardTypeTool, CuaMouseClickTool, + CuaOpenBrowserTool, CuaScreenshotTool, ) @@ -376,6 +389,8 @@ def test_cua_tools_are_registered_as_builtin_tools(): assert manager.get_builtin_tool(CuaScreenshotTool).name == "astrbot_cua_screenshot" assert manager.get_builtin_tool(CuaMouseClickTool).name == "astrbot_cua_mouse_click" assert manager.get_builtin_tool(CuaKeyboardTypeTool).name == "astrbot_cua_keyboard_type" + assert manager.get_builtin_tool(CuaKeyPressTool).name == "astrbot_cua_key_press" + assert manager.get_builtin_tool(CuaOpenBrowserTool).name == "astrbot_cua_open_browser" def test_cua_runtime_tools_are_available_to_handoffs(): @@ -386,6 +401,8 @@ def test_cua_runtime_tools_are_available_to_handoffs(): assert "astrbot_cua_screenshot" in tools assert "astrbot_cua_mouse_click" in tools assert "astrbot_cua_keyboard_type" in tools + assert "astrbot_cua_key_press" in tools + assert "astrbot_cua_open_browser" in tools def test_runtime_tool_selection_treats_none_booter_as_empty(): @@ -405,6 +422,99 @@ def test_runtime_tool_selection_normalizes_cua_booter_case(): assert "astrbot_cua_screenshot" in tools +@pytest.mark.asyncio +async def test_cua_key_press_tool_presses_keyboard(monkeypatch): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaKeyPressTool + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) + + class FakeWrapper: + context = FakeAstrContext() + + sandbox = FakeSandbox() + from astrbot.core.computer.booters.cua import CuaGUIComponent + + sandbox_gui = CuaGUIComponent(sandbox) + + class FakeBooter: + gui = sandbox_gui + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) + + result = await CuaKeyPressTool().call(FakeWrapper(), key="Enter") + + assert json.loads(result)["success"] is True + assert sandbox.keyboard.pressed == ["Enter"] + + +@pytest.mark.asyncio +async def test_cua_open_browser_tool_launches_chromium_and_screenshots(monkeypatch, tmp_path): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaOpenBrowserTool + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + async def send(self, message): + pass + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) + + class FakeWrapper: + context = FakeAstrContext() + + shell_commands = [] + + class FakeShellComponent: + async def exec(self, command, background=False, **kwargs): + shell_commands.append((command, background, kwargs)) + return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} + + class FakeGUI: + async def screenshot(self, path: str): + Path(path).write_bytes(b"fake-png") + return {"success": True, "path": path, "mime_type": "image/png", "base64": ""} + + class FakeBooter: + shell = FakeShellComponent() + gui = FakeGUI() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) + monkeypatch.setattr(cua_tools, "get_astrbot_temp_path", lambda: str(tmp_path)) + + async def fake_sleep(seconds): + pass + + monkeypatch.setattr(cua_tools.asyncio, "sleep", fake_sleep) + + result = await CuaOpenBrowserTool().call( + FakeWrapper(), url="https://www.google.com/maps/dir/Tokyo/Osaka" + ) + payload = json.loads(result) + + assert payload["success"] is True + assert payload["path"] + assert shell_commands[0][1] is True + assert "chromium" in shell_commands[0][0] + assert "https://www.google.com/maps/dir/Tokyo/Osaka" in shell_commands[0][0] + + def test_cua_is_exposed_in_sandbox_config_metadata(): items = _agent_computer_use_items() booter = items["provider_settings.sandbox.booter"] From 0fab3f404dd367deffa03e85656d69cf81c7b0e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 14:02:58 +0900 Subject: [PATCH 11/33] fix: launch CUA browser as sandbox user --- astrbot/core/tools/computer_tools/cua.py | 14 ++++++++++---- tests/unit/test_cua_computer_use.py | 5 ++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/astrbot/core/tools/computer_tools/cua.py b/astrbot/core/tools/computer_tools/cua.py index dce902cce1..2e096239c4 100644 --- a/astrbot/core/tools/computer_tools/cua.py +++ b/astrbot/core/tools/computer_tools/cua.py @@ -276,7 +276,13 @@ def _new_screenshot_path(umo: str) -> str: def _build_chromium_command(url: str = "") -> str: - parts = ["chromium", "--no-sandbox", "--disable-dev-shm-usage"] - if url: - parts.append(url) - return " ".join(shlex.quote(part) for part in parts) + quoted_url = shlex.quote(url) if url else "" + return ( + "browser=$(command -v chromium || command -v chromium-browser || " + "command -v google-chrome || command -v firefox) && " + 'if echo "$browser" | grep -qi firefox; then ' + f'su cua -c "DISPLAY=:1 $browser --no-remote {quoted_url}"; ' + "else " + f'su cua -c "DISPLAY=:1 $browser --no-sandbox --disable-dev-shm-usage {quoted_url}"; ' + "fi" + ) diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index ce87b95816..1b0edfb895 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -511,7 +511,10 @@ async def fake_sleep(seconds): assert payload["success"] is True assert payload["path"] assert shell_commands[0][1] is True - assert "chromium" in shell_commands[0][0] + assert "command -v chromium" in shell_commands[0][0] + assert "command -v firefox" in shell_commands[0][0] + assert "su cua -c" in shell_commands[0][0] + assert "DISPLAY=:1" in shell_commands[0][0] assert "https://www.google.com/maps/dir/Tokyo/Osaka" in shell_commands[0][0] From bfe043111b325383cfd391fb0c16ea25851571f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 14:26:20 +0900 Subject: [PATCH 12/33] fix: stabilize CUA browser screenshots --- astrbot/core/astr_main_agent.py | 4 +- astrbot/core/tools/computer_tools/cua.py | 67 +++++-- tests/unit/test_astr_main_agent.py | 1 + tests/unit/test_cua_computer_use.py | 217 ++++++++++++++++++++++- 4 files changed, 272 insertions(+), 17 deletions(-) diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py index 1a88453210..10f58a6d30 100644 --- a/astrbot/core/astr_main_agent.py +++ b/astrbot/core/astr_main_agent.py @@ -1030,7 +1030,9 @@ def _apply_sandbox_tools( "Do not use `firefox &` unless the user confirms Firefox exists. " "After launching or changing any GUI app, immediately call " "`astrbot_cua_screenshot` to inspect the current desktop before " - "clicking or typing. Use coordinates from screenshots for " + "clicking or typing; keep `return_image_to_llm` enabled unless the " + "provider cannot handle image tool results. " + "Use coordinates from screenshots for " "`astrbot_cua_mouse_click`, `astrbot_cua_keyboard_type` for text input, " "and `astrbot_cua_key_press` for Enter, Ctrl+L, Tab, or Escape.\n" ) diff --git a/astrbot/core/tools/computer_tools/cua.py b/astrbot/core/tools/computer_tools/cua.py index 2e096239c4..bea0e006ea 100644 --- a/astrbot/core/tools/computer_tools/cua.py +++ b/astrbot/core/tools/computer_tools/cua.py @@ -62,8 +62,8 @@ class CuaScreenshotTool(FunctionTool): }, "return_image_to_llm": { "type": "boolean", - "description": "Whether to include the screenshot image content in the tool result. Keep disabled for Gemini thinking models.", - "default": False, + "description": "Whether to include the screenshot image content in the tool result for model inspection.", + "default": True, }, }, } @@ -73,7 +73,7 @@ async def call( self, context: ContextWrapper[AstrAgentContext], send_to_user: bool = True, - return_image_to_llm: bool = False, + return_image_to_llm: bool = True, ) -> ToolExecResult: if err := check_admin_permission(context, "Taking CUA screenshots"): return err @@ -204,26 +204,36 @@ async def call( @dataclass class CuaOpenBrowserTool(FunctionTool): name: str = "astrbot_cua_open_browser" - description: str = "Open Chromium in the CUA sandbox, optionally navigating to a URL, then capture a screenshot." + description: str = "Open a browser in the CUA sandbox, optionally navigating to a URL, then capture a screenshot." parameters: dict = field( default_factory=lambda: { "type": "object", "properties": { "url": { "type": "string", - "description": "Optional URL to open in Chromium.", + "description": "Optional URL to open in the browser.", "default": "", }, "wait_seconds": { "type": "number", "description": "Seconds to wait before taking the screenshot.", - "default": 3, + "default": 8, }, "send_to_user": { "type": "boolean", "description": "Whether to send the screenshot image to the current conversation.", "default": True, }, + "debug": { + "type": "boolean", + "description": "Whether to include the internal shell command and shell result in the tool response.", + "default": False, + }, + "return_image_to_llm": { + "type": "boolean", + "description": "Whether to include the browser screenshot image content in the tool result for model inspection.", + "default": True, + }, }, } ) @@ -232,8 +242,10 @@ async def call( self, context: ContextWrapper[AstrAgentContext], url: str = "", - wait_seconds: float = 3, + wait_seconds: float = 8, send_to_user: bool = True, + debug: bool = False, + return_image_to_llm: bool = True, ) -> ToolExecResult: if err := check_admin_permission(context, "Opening CUA browser"): return err @@ -254,18 +266,32 @@ async def call( screenshot_result = await gui.screenshot(path) payload = { "success": bool(shell_result.get("success", True)), - "command": command, - "shell": shell_result, + "browser": "auto", + "url": url, **screenshot_result, "path": path, } - payload.pop("base64", None) + image_data = payload.pop("base64", "") + if debug: + payload["debug"] = {"command": command, "shell": shell_result} if send_to_user: await context.context.event.send(MessageChain().file_image(path)) payload["sent_to_user"] = True - return _to_json(payload) + content: list[mcp.types.TextContent | mcp.types.ImageContent] = [ + mcp.types.TextContent(type="text", text=_to_json(payload)) + ] + if return_image_to_llm: + content.append( + mcp.types.ImageContent( + type="image", + data=str(image_data), + mimeType=str(payload.get("mime_type", "image/png")), + ) + ) + return mcp.types.CallToolResult(content=content) except Exception as e: - return f"Error opening CUA browser: {str(e)}" + detail = str(e) or type(e).__name__ + return f"Error opening CUA browser: {detail}" def _new_screenshot_path(umo: str) -> str: @@ -277,12 +303,25 @@ def _new_screenshot_path(umo: str) -> str: def _build_chromium_command(url: str = "") -> str: quoted_url = shlex.quote(url) if url else "" + direct_args = ( + f"$browser --no-sandbox --disable-dev-shm-usage {quoted_url}" + if quoted_url + else "$browser --no-sandbox --disable-dev-shm-usage" + ) + fallback_chromium_args = direct_args + fallback_firefox_args = ( + f"$browser --no-remote {quoted_url}" if quoted_url else "$browser --no-remote" + ) return ( "browser=$(command -v chromium || command -v chromium-browser || " "command -v google-chrome || command -v firefox) && " 'if echo "$browser" | grep -qi firefox; then ' - f'su cua -c "DISPLAY=:1 $browser --no-remote {quoted_url}"; ' + f"DISPLAY=${{DISPLAY:-:1}} {fallback_firefox_args} " + "|| " + f'su cua -c "DISPLAY=:1 {fallback_firefox_args}"; ' "else " - f'su cua -c "DISPLAY=:1 $browser --no-sandbox --disable-dev-shm-usage {quoted_url}"; ' + f"DISPLAY=${{DISPLAY:-:1}} {direct_args} " + "|| " + f'su cua -c "DISPLAY=:1 {fallback_chromium_args}"; ' "fi" ) diff --git a/tests/unit/test_astr_main_agent.py b/tests/unit/test_astr_main_agent.py index 99f0e57af6..2286f5c222 100644 --- a/tests/unit/test_astr_main_agent.py +++ b/tests/unit/test_astr_main_agent.py @@ -1578,6 +1578,7 @@ def test_apply_sandbox_tools_with_cua_adds_gui_guidance(self, mock_context): assert "astrbot_cua_screenshot" in req.system_prompt assert "astrbot_cua_open_browser" in req.system_prompt assert "astrbot_cua_key_press" in req.system_prompt + assert "return_image_to_llm" in req.system_prompt assert "Do not use `firefox &`" in req.system_prompt def test_apply_sandbox_tools_with_shipyard_booter(self, monkeypatch, mock_context): diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 1b0edfb895..e82e5010ad 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -506,18 +506,182 @@ async def fake_sleep(seconds): result = await CuaOpenBrowserTool().call( FakeWrapper(), url="https://www.google.com/maps/dir/Tokyo/Osaka" ) - payload = json.loads(result) + assert isinstance(result, mcp.types.CallToolResult) + text_parts = [part for part in result.content if part.type == "text"] + payload = json.loads(text_parts[0].text) assert payload["success"] is True assert payload["path"] + assert payload["browser"] == "auto" + assert payload["url"] == "https://www.google.com/maps/dir/Tokyo/Osaka" + assert "command" not in payload + assert "shell" not in payload assert shell_commands[0][1] is True assert "command -v chromium" in shell_commands[0][0] assert "command -v firefox" in shell_commands[0][0] + assert "DISPLAY=${DISPLAY:-:1}" in shell_commands[0][0] + assert "su cua -c" in shell_commands[0][0] + assert shell_commands[0][0].index("DISPLAY=${DISPLAY:-:1}") < shell_commands[0][ + 0 + ].index("su cua -c") + assert ">/tmp/astrbot-cua-browser.log" not in shell_commands[0][0] + assert " 2>&1 &" not in shell_commands[0][0] assert "su cua -c" in shell_commands[0][0] assert "DISPLAY=:1" in shell_commands[0][0] assert "https://www.google.com/maps/dir/Tokyo/Osaka" in shell_commands[0][0] +@pytest.mark.asyncio +async def test_cua_open_browser_tool_can_return_debug_payload(monkeypatch, tmp_path): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaOpenBrowserTool + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + async def send(self, message): + pass + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) + + class FakeWrapper: + context = FakeAstrContext() + + class FakeShellComponent: + async def exec(self, command, background=False, **kwargs): + return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} + + class FakeGUI: + async def screenshot(self, path: str): + Path(path).write_bytes(b"fake-png") + return {"success": True, "path": path, "mime_type": "image/png", "base64": ""} + + class FakeBooter: + shell = FakeShellComponent() + gui = FakeGUI() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) + monkeypatch.setattr(cua_tools, "get_astrbot_temp_path", lambda: str(tmp_path)) + + async def fake_sleep(seconds): + pass + + monkeypatch.setattr(cua_tools.asyncio, "sleep", fake_sleep) + + result = await CuaOpenBrowserTool().call( + FakeWrapper(), url="https://example.com", debug=True + ) + assert isinstance(result, mcp.types.CallToolResult) + text_parts = [part for part in result.content if part.type == "text"] + payload = json.loads(text_parts[0].text) + + assert "debug" in payload + assert "command" in payload["debug"] + assert "shell" in payload["debug"] + + +@pytest.mark.asyncio +async def test_cua_open_browser_tool_returns_llm_image_and_waits_for_startup( + monkeypatch, tmp_path +): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaOpenBrowserTool + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + async def send(self, message): + pass + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) + + class FakeWrapper: + context = FakeAstrContext() + + class FakeShellComponent: + async def exec(self, command, background=False, **kwargs): + return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} + + class FakeGUI: + async def screenshot(self, path: str): + Path(path).write_bytes(b"fake-browser-png") + return { + "success": True, + "path": path, + "mime_type": "image/png", + "base64": base64.b64encode(b"fake-browser-png").decode(), + } + + class FakeBooter: + shell = FakeShellComponent() + gui = FakeGUI() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + sleep_calls = [] + + async def fake_sleep(seconds): + sleep_calls.append(seconds) + + monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) + monkeypatch.setattr(cua_tools, "get_astrbot_temp_path", lambda: str(tmp_path)) + monkeypatch.setattr(cua_tools.asyncio, "sleep", fake_sleep) + + result = await CuaOpenBrowserTool().call(FakeWrapper(), send_to_user=False) + + assert isinstance(result, mcp.types.CallToolResult) + image_parts = [part for part in result.content if part.type == "image"] + text_parts = [part for part in result.content if part.type == "text"] + payload = json.loads(text_parts[0].text) + assert image_parts[0].data == base64.b64encode(b"fake-browser-png").decode() + assert "base64" not in payload + assert sleep_calls == [8] + + +@pytest.mark.asyncio +async def test_cua_open_browser_reports_blank_exception_type(monkeypatch): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaOpenBrowserTool + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) + + class FakeWrapper: + context = FakeAstrContext() + + class FakeShellComponent: + async def exec(self, command, background=False, **kwargs): + raise TimeoutError() + + class FakeBooter: + shell = FakeShellComponent() + gui = object() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) + + result = await CuaOpenBrowserTool().call(FakeWrapper()) + + assert result == "Error opening CUA browser: TimeoutError" + + def test_cua_is_exposed_in_sandbox_config_metadata(): items = _agent_computer_use_items() booter = items["provider_settings.sandbox.booter"] @@ -589,7 +753,7 @@ async def fake_get_booter(context, session_id): image_parts = [part for part in result.content if part.type == "image"] text_parts = [part for part in result.content if part.type == "text"] payload = json.loads(text_parts[0].text) - assert image_parts == [] + assert image_parts[0].data == base64.b64encode(b"fake-png").decode() assert "base64" not in payload assert Path(payload["path"]).exists() assert sent_messages @@ -644,6 +808,55 @@ async def fake_get_booter(context, session_id): assert "base64" not in payload +@pytest.mark.asyncio +async def test_screenshot_tool_can_opt_out_of_llm_image_content(monkeypatch, tmp_path): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaScreenshotTool + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + async def send(self, message): + pass + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) + + class FakeWrapper: + context = FakeAstrContext() + + class FakeGUI: + async def screenshot(self, path: str): + Path(path).write_bytes(b"fake-png") + return { + "success": True, + "path": path, + "mime_type": "image/png", + "base64": base64.b64encode(b"fake-png").decode(), + } + + class FakeBooter: + gui = FakeGUI() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) + monkeypatch.setattr(cua_tools, "get_astrbot_temp_path", lambda: str(tmp_path)) + + result = await CuaScreenshotTool().call( + FakeWrapper(), send_to_user=False, return_image_to_llm=False + ) + + image_parts = [part for part in result.content if part.type == "image"] + text_parts = [part for part in result.content if part.type == "text"] + payload = json.loads(text_parts[0].text) + assert image_parts == [] + assert "base64" not in payload + + @pytest.mark.asyncio async def test_cua_tools_return_permission_error_without_gui_lookup(monkeypatch): from astrbot.core.tools.computer_tools import cua as cua_tools From a5f8dfc647bb7c2c05a8be1dfbee8c666249f11c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 14:30:43 +0900 Subject: [PATCH 13/33] fix: simplify CUA browser launch command --- astrbot/core/tools/computer_tools/cua.py | 27 +++--------------------- tests/unit/test_cua_computer_use.py | 15 +++---------- 2 files changed, 6 insertions(+), 36 deletions(-) diff --git a/astrbot/core/tools/computer_tools/cua.py b/astrbot/core/tools/computer_tools/cua.py index bea0e006ea..b01e98fa5f 100644 --- a/astrbot/core/tools/computer_tools/cua.py +++ b/astrbot/core/tools/computer_tools/cua.py @@ -259,7 +259,7 @@ async def call( raise RuntimeError( "Current sandbox booter does not support CUA GUI capability." ) - command = _build_chromium_command(url) + command = _build_browser_command(url) shell_result = await booter.shell.exec(command, background=True) await asyncio.sleep(max(0, wait_seconds)) path = _new_screenshot_path(context.context.event.unified_msg_origin) @@ -301,27 +301,6 @@ def _new_screenshot_path(umo: str) -> str: return str(screenshot_dir / f"{safe_prefix}-{uuid.uuid4().hex}.png") -def _build_chromium_command(url: str = "") -> str: +def _build_browser_command(url: str = "") -> str: quoted_url = shlex.quote(url) if url else "" - direct_args = ( - f"$browser --no-sandbox --disable-dev-shm-usage {quoted_url}" - if quoted_url - else "$browser --no-sandbox --disable-dev-shm-usage" - ) - fallback_chromium_args = direct_args - fallback_firefox_args = ( - f"$browser --no-remote {quoted_url}" if quoted_url else "$browser --no-remote" - ) - return ( - "browser=$(command -v chromium || command -v chromium-browser || " - "command -v google-chrome || command -v firefox) && " - 'if echo "$browser" | grep -qi firefox; then ' - f"DISPLAY=${{DISPLAY:-:1}} {fallback_firefox_args} " - "|| " - f'su cua -c "DISPLAY=:1 {fallback_firefox_args}"; ' - "else " - f"DISPLAY=${{DISPLAY:-:1}} {direct_args} " - "|| " - f'su cua -c "DISPLAY=:1 {fallback_chromium_args}"; ' - "fi" - ) + return f"chromium {quoted_url}" if quoted_url else "chromium" diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index e82e5010ad..8d9c3b8edb 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -517,18 +517,9 @@ async def fake_sleep(seconds): assert "command" not in payload assert "shell" not in payload assert shell_commands[0][1] is True - assert "command -v chromium" in shell_commands[0][0] - assert "command -v firefox" in shell_commands[0][0] - assert "DISPLAY=${DISPLAY:-:1}" in shell_commands[0][0] - assert "su cua -c" in shell_commands[0][0] - assert shell_commands[0][0].index("DISPLAY=${DISPLAY:-:1}") < shell_commands[0][ - 0 - ].index("su cua -c") - assert ">/tmp/astrbot-cua-browser.log" not in shell_commands[0][0] - assert " 2>&1 &" not in shell_commands[0][0] - assert "su cua -c" in shell_commands[0][0] - assert "DISPLAY=:1" in shell_commands[0][0] - assert "https://www.google.com/maps/dir/Tokyo/Osaka" in shell_commands[0][0] + assert shell_commands[0][0] == ( + "chromium https://www.google.com/maps/dir/Tokyo/Osaka" + ) @pytest.mark.asyncio From 07939ab6f30ee2881081dd1f78466b757b034dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 14:45:40 +0900 Subject: [PATCH 14/33] fix: remove CUA open browser tool --- astrbot/core/astr_agent_tool_exec.py | 3 - astrbot/core/astr_main_agent.py | 6 +- astrbot/core/tools/computer_tools/__init__.py | 2 - astrbot/core/tools/computer_tools/cua.py | 101 -------- tests/unit/test_astr_main_agent.py | 4 +- tests/unit/test_cua_computer_use.py | 219 ------------------ 6 files changed, 4 insertions(+), 331 deletions(-) diff --git a/astrbot/core/astr_agent_tool_exec.py b/astrbot/core/astr_agent_tool_exec.py index 6a74143c82..d3af45a1a1 100644 --- a/astrbot/core/astr_agent_tool_exec.py +++ b/astrbot/core/astr_agent_tool_exec.py @@ -34,7 +34,6 @@ CuaKeyboardTypeTool, CuaKeyPressTool, CuaMouseClickTool, - CuaOpenBrowserTool, CuaScreenshotTool, ExecuteShellTool, FileDownloadTool, @@ -218,14 +217,12 @@ def _get_runtime_computer_tools( mouse_click_tool = tool_mgr.get_builtin_tool(CuaMouseClickTool) keyboard_type_tool = tool_mgr.get_builtin_tool(CuaKeyboardTypeTool) key_press_tool = tool_mgr.get_builtin_tool(CuaKeyPressTool) - open_browser_tool = tool_mgr.get_builtin_tool(CuaOpenBrowserTool) tools.update( { screenshot_tool.name: screenshot_tool, mouse_click_tool.name: mouse_click_tool, keyboard_type_tool.name: keyboard_type_tool, key_press_tool.name: key_press_tool, - open_browser_tool.name: open_browser_tool, } ) return tools diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py index 10f58a6d30..8a12d66f54 100644 --- a/astrbot/core/astr_main_agent.py +++ b/astrbot/core/astr_main_agent.py @@ -50,7 +50,6 @@ CuaKeyboardTypeTool, CuaKeyPressTool, CuaMouseClickTool, - CuaOpenBrowserTool, CuaScreenshotTool, EvaluateSkillCandidateTool, ExecuteShellTool, @@ -1025,8 +1024,8 @@ def _apply_sandbox_tools( "\n[CUA Desktop Control]\n" "When launching GUI apps, use `astrbot_execute_shell` with " "background=true; do not append shell background operators manually. " - "Prefer `astrbot_cua_open_browser` for browser tasks. " - "If shell is needed, prefer `chromium` in the default CUA Linux sandbox. " + "For browser tasks, use `astrbot_execute_shell` with `background=true` " + "to launch Chromium in the default CUA Linux sandbox. " "Do not use `firefox &` unless the user confirms Firefox exists. " "After launching or changing any GUI app, immediately call " "`astrbot_cua_screenshot` to inspect the current desktop before " @@ -1040,7 +1039,6 @@ def _apply_sandbox_tools( req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaMouseClickTool)) req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaKeyboardTypeTool)) req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaKeyPressTool)) - req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaOpenBrowserTool)) req.system_prompt = f"{req.system_prompt or ''}\n{SANDBOX_MODE_PROMPT}\n" diff --git a/astrbot/core/tools/computer_tools/__init__.py b/astrbot/core/tools/computer_tools/__init__.py index 2f57b359bb..24060a0e82 100644 --- a/astrbot/core/tools/computer_tools/__init__.py +++ b/astrbot/core/tools/computer_tools/__init__.py @@ -2,7 +2,6 @@ CuaKeyboardTypeTool, CuaKeyPressTool, CuaMouseClickTool, - CuaOpenBrowserTool, CuaScreenshotTool, ) from .fs import ( @@ -42,7 +41,6 @@ "CuaKeyPressTool", "CuaKeyboardTypeTool", "CuaMouseClickTool", - "CuaOpenBrowserTool", "CuaScreenshotTool", "EvaluateSkillCandidateTool", "ExecuteShellTool", diff --git a/astrbot/core/tools/computer_tools/cua.py b/astrbot/core/tools/computer_tools/cua.py index b01e98fa5f..1e2320f478 100644 --- a/astrbot/core/tools/computer_tools/cua.py +++ b/astrbot/core/tools/computer_tools/cua.py @@ -1,8 +1,6 @@ from __future__ import annotations -import asyncio import json -import shlex import uuid from dataclasses import dataclass, field from pathlib import Path @@ -200,107 +198,8 @@ async def call( return f"Error pressing key in CUA desktop: {str(e)}" -@builtin_tool(config=_CUA_TOOL_CONFIG) -@dataclass -class CuaOpenBrowserTool(FunctionTool): - name: str = "astrbot_cua_open_browser" - description: str = "Open a browser in the CUA sandbox, optionally navigating to a URL, then capture a screenshot." - parameters: dict = field( - default_factory=lambda: { - "type": "object", - "properties": { - "url": { - "type": "string", - "description": "Optional URL to open in the browser.", - "default": "", - }, - "wait_seconds": { - "type": "number", - "description": "Seconds to wait before taking the screenshot.", - "default": 8, - }, - "send_to_user": { - "type": "boolean", - "description": "Whether to send the screenshot image to the current conversation.", - "default": True, - }, - "debug": { - "type": "boolean", - "description": "Whether to include the internal shell command and shell result in the tool response.", - "default": False, - }, - "return_image_to_llm": { - "type": "boolean", - "description": "Whether to include the browser screenshot image content in the tool result for model inspection.", - "default": True, - }, - }, - } - ) - - async def call( - self, - context: ContextWrapper[AstrAgentContext], - url: str = "", - wait_seconds: float = 8, - send_to_user: bool = True, - debug: bool = False, - return_image_to_llm: bool = True, - ) -> ToolExecResult: - if err := check_admin_permission(context, "Opening CUA browser"): - return err - try: - booter = await get_booter( - context.context.context, - context.context.event.unified_msg_origin, - ) - gui = getattr(booter, "gui", None) - if gui is None: - raise RuntimeError( - "Current sandbox booter does not support CUA GUI capability." - ) - command = _build_browser_command(url) - shell_result = await booter.shell.exec(command, background=True) - await asyncio.sleep(max(0, wait_seconds)) - path = _new_screenshot_path(context.context.event.unified_msg_origin) - screenshot_result = await gui.screenshot(path) - payload = { - "success": bool(shell_result.get("success", True)), - "browser": "auto", - "url": url, - **screenshot_result, - "path": path, - } - image_data = payload.pop("base64", "") - if debug: - payload["debug"] = {"command": command, "shell": shell_result} - if send_to_user: - await context.context.event.send(MessageChain().file_image(path)) - payload["sent_to_user"] = True - content: list[mcp.types.TextContent | mcp.types.ImageContent] = [ - mcp.types.TextContent(type="text", text=_to_json(payload)) - ] - if return_image_to_llm: - content.append( - mcp.types.ImageContent( - type="image", - data=str(image_data), - mimeType=str(payload.get("mime_type", "image/png")), - ) - ) - return mcp.types.CallToolResult(content=content) - except Exception as e: - detail = str(e) or type(e).__name__ - return f"Error opening CUA browser: {detail}" - - def _new_screenshot_path(umo: str) -> str: safe_prefix = uuid.uuid5(uuid.NAMESPACE_DNS, umo).hex[:12] screenshot_dir = Path(get_astrbot_temp_path()) / "cua_screenshots" screenshot_dir.mkdir(parents=True, exist_ok=True) return str(screenshot_dir / f"{safe_prefix}-{uuid.uuid4().hex}.png") - - -def _build_browser_command(url: str = "") -> str: - quoted_url = shlex.quote(url) if url else "" - return f"chromium {quoted_url}" if quoted_url else "chromium" diff --git a/tests/unit/test_astr_main_agent.py b/tests/unit/test_astr_main_agent.py index 2286f5c222..20fe5845d6 100644 --- a/tests/unit/test_astr_main_agent.py +++ b/tests/unit/test_astr_main_agent.py @@ -1573,12 +1573,12 @@ def test_apply_sandbox_tools_with_cua_adds_gui_guidance(self, mock_context): module._apply_sandbox_tools(config, req, "session-123") - assert "chromium" in req.system_prompt + assert "Chromium" in req.system_prompt assert "background=true" in req.system_prompt assert "astrbot_cua_screenshot" in req.system_prompt - assert "astrbot_cua_open_browser" in req.system_prompt assert "astrbot_cua_key_press" in req.system_prompt assert "return_image_to_llm" in req.system_prompt + assert "astrbot_execute_shell" in req.system_prompt assert "Do not use `firefox &`" in req.system_prompt def test_apply_sandbox_tools_with_shipyard_booter(self, monkeypatch, mock_context): diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 8d9c3b8edb..b77c2c3162 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -380,7 +380,6 @@ def test_cua_tools_are_registered_as_builtin_tools(): CuaKeyPressTool, CuaKeyboardTypeTool, CuaMouseClickTool, - CuaOpenBrowserTool, CuaScreenshotTool, ) @@ -390,7 +389,6 @@ def test_cua_tools_are_registered_as_builtin_tools(): assert manager.get_builtin_tool(CuaMouseClickTool).name == "astrbot_cua_mouse_click" assert manager.get_builtin_tool(CuaKeyboardTypeTool).name == "astrbot_cua_keyboard_type" assert manager.get_builtin_tool(CuaKeyPressTool).name == "astrbot_cua_key_press" - assert manager.get_builtin_tool(CuaOpenBrowserTool).name == "astrbot_cua_open_browser" def test_cua_runtime_tools_are_available_to_handoffs(): @@ -402,7 +400,6 @@ def test_cua_runtime_tools_are_available_to_handoffs(): assert "astrbot_cua_mouse_click" in tools assert "astrbot_cua_keyboard_type" in tools assert "astrbot_cua_key_press" in tools - assert "astrbot_cua_open_browser" in tools def test_runtime_tool_selection_treats_none_booter_as_empty(): @@ -457,222 +454,6 @@ async def fake_get_booter(context, session_id): assert sandbox.keyboard.pressed == ["Enter"] -@pytest.mark.asyncio -async def test_cua_open_browser_tool_launches_chromium_and_screenshots(monkeypatch, tmp_path): - from astrbot.core.tools.computer_tools import cua as cua_tools - from astrbot.core.tools.computer_tools.cua import CuaOpenBrowserTool - - class FakeEvent: - unified_msg_origin = "umo" - role = "admin" - - async def send(self, message): - pass - - class FakeAstrContext: - event = FakeEvent() - context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) - - class FakeWrapper: - context = FakeAstrContext() - - shell_commands = [] - - class FakeShellComponent: - async def exec(self, command, background=False, **kwargs): - shell_commands.append((command, background, kwargs)) - return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} - - class FakeGUI: - async def screenshot(self, path: str): - Path(path).write_bytes(b"fake-png") - return {"success": True, "path": path, "mime_type": "image/png", "base64": ""} - - class FakeBooter: - shell = FakeShellComponent() - gui = FakeGUI() - - async def fake_get_booter(context, session_id): - return FakeBooter() - - monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) - monkeypatch.setattr(cua_tools, "get_astrbot_temp_path", lambda: str(tmp_path)) - - async def fake_sleep(seconds): - pass - - monkeypatch.setattr(cua_tools.asyncio, "sleep", fake_sleep) - - result = await CuaOpenBrowserTool().call( - FakeWrapper(), url="https://www.google.com/maps/dir/Tokyo/Osaka" - ) - assert isinstance(result, mcp.types.CallToolResult) - text_parts = [part for part in result.content if part.type == "text"] - payload = json.loads(text_parts[0].text) - - assert payload["success"] is True - assert payload["path"] - assert payload["browser"] == "auto" - assert payload["url"] == "https://www.google.com/maps/dir/Tokyo/Osaka" - assert "command" not in payload - assert "shell" not in payload - assert shell_commands[0][1] is True - assert shell_commands[0][0] == ( - "chromium https://www.google.com/maps/dir/Tokyo/Osaka" - ) - - -@pytest.mark.asyncio -async def test_cua_open_browser_tool_can_return_debug_payload(monkeypatch, tmp_path): - from astrbot.core.tools.computer_tools import cua as cua_tools - from astrbot.core.tools.computer_tools.cua import CuaOpenBrowserTool - - class FakeEvent: - unified_msg_origin = "umo" - role = "admin" - - async def send(self, message): - pass - - class FakeAstrContext: - event = FakeEvent() - context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) - - class FakeWrapper: - context = FakeAstrContext() - - class FakeShellComponent: - async def exec(self, command, background=False, **kwargs): - return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} - - class FakeGUI: - async def screenshot(self, path: str): - Path(path).write_bytes(b"fake-png") - return {"success": True, "path": path, "mime_type": "image/png", "base64": ""} - - class FakeBooter: - shell = FakeShellComponent() - gui = FakeGUI() - - async def fake_get_booter(context, session_id): - return FakeBooter() - - monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) - monkeypatch.setattr(cua_tools, "get_astrbot_temp_path", lambda: str(tmp_path)) - - async def fake_sleep(seconds): - pass - - monkeypatch.setattr(cua_tools.asyncio, "sleep", fake_sleep) - - result = await CuaOpenBrowserTool().call( - FakeWrapper(), url="https://example.com", debug=True - ) - assert isinstance(result, mcp.types.CallToolResult) - text_parts = [part for part in result.content if part.type == "text"] - payload = json.loads(text_parts[0].text) - - assert "debug" in payload - assert "command" in payload["debug"] - assert "shell" in payload["debug"] - - -@pytest.mark.asyncio -async def test_cua_open_browser_tool_returns_llm_image_and_waits_for_startup( - monkeypatch, tmp_path -): - from astrbot.core.tools.computer_tools import cua as cua_tools - from astrbot.core.tools.computer_tools.cua import CuaOpenBrowserTool - - class FakeEvent: - unified_msg_origin = "umo" - role = "admin" - - async def send(self, message): - pass - - class FakeAstrContext: - event = FakeEvent() - context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) - - class FakeWrapper: - context = FakeAstrContext() - - class FakeShellComponent: - async def exec(self, command, background=False, **kwargs): - return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} - - class FakeGUI: - async def screenshot(self, path: str): - Path(path).write_bytes(b"fake-browser-png") - return { - "success": True, - "path": path, - "mime_type": "image/png", - "base64": base64.b64encode(b"fake-browser-png").decode(), - } - - class FakeBooter: - shell = FakeShellComponent() - gui = FakeGUI() - - async def fake_get_booter(context, session_id): - return FakeBooter() - - sleep_calls = [] - - async def fake_sleep(seconds): - sleep_calls.append(seconds) - - monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) - monkeypatch.setattr(cua_tools, "get_astrbot_temp_path", lambda: str(tmp_path)) - monkeypatch.setattr(cua_tools.asyncio, "sleep", fake_sleep) - - result = await CuaOpenBrowserTool().call(FakeWrapper(), send_to_user=False) - - assert isinstance(result, mcp.types.CallToolResult) - image_parts = [part for part in result.content if part.type == "image"] - text_parts = [part for part in result.content if part.type == "text"] - payload = json.loads(text_parts[0].text) - assert image_parts[0].data == base64.b64encode(b"fake-browser-png").decode() - assert "base64" not in payload - assert sleep_calls == [8] - - -@pytest.mark.asyncio -async def test_cua_open_browser_reports_blank_exception_type(monkeypatch): - from astrbot.core.tools.computer_tools import cua as cua_tools - from astrbot.core.tools.computer_tools.cua import CuaOpenBrowserTool - - class FakeEvent: - unified_msg_origin = "umo" - role = "admin" - - class FakeAstrContext: - event = FakeEvent() - context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) - - class FakeWrapper: - context = FakeAstrContext() - - class FakeShellComponent: - async def exec(self, command, background=False, **kwargs): - raise TimeoutError() - - class FakeBooter: - shell = FakeShellComponent() - gui = object() - - async def fake_get_booter(context, session_id): - return FakeBooter() - - monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) - - result = await CuaOpenBrowserTool().call(FakeWrapper()) - - assert result == "Error opening CUA browser: TimeoutError" - - def test_cua_is_exposed_in_sandbox_config_metadata(): items = _agent_computer_use_items() booter = items["provider_settings.sandbox.booter"] From e06ae8dd1f11449210a14b483bca6df5c4e97030 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 15:08:34 +0900 Subject: [PATCH 15/33] fix: align CUA desktop control guidance --- astrbot/core/astr_agent_tool_exec.py | 3 -- astrbot/core/astr_main_agent.py | 14 ++++-- astrbot/core/tools/computer_tools/__init__.py | 2 - astrbot/core/tools/computer_tools/cua.py | 32 ------------- astrbot/core/tools/computer_tools/shell.py | 4 +- tests/unit/test_astr_main_agent.py | 6 ++- tests/unit/test_cua_computer_use.py | 39 +-------------- tests/unit/test_func_tool_manager.py | 47 +++++++++++++++++++ 8 files changed, 65 insertions(+), 82 deletions(-) diff --git a/astrbot/core/astr_agent_tool_exec.py b/astrbot/core/astr_agent_tool_exec.py index d3af45a1a1..de5caad554 100644 --- a/astrbot/core/astr_agent_tool_exec.py +++ b/astrbot/core/astr_agent_tool_exec.py @@ -32,7 +32,6 @@ from astrbot.core.provider.register import llm_tools from astrbot.core.tools.computer_tools import ( CuaKeyboardTypeTool, - CuaKeyPressTool, CuaMouseClickTool, CuaScreenshotTool, ExecuteShellTool, @@ -216,13 +215,11 @@ def _get_runtime_computer_tools( screenshot_tool = tool_mgr.get_builtin_tool(CuaScreenshotTool) mouse_click_tool = tool_mgr.get_builtin_tool(CuaMouseClickTool) keyboard_type_tool = tool_mgr.get_builtin_tool(CuaKeyboardTypeTool) - key_press_tool = tool_mgr.get_builtin_tool(CuaKeyPressTool) tools.update( { screenshot_tool.name: screenshot_tool, mouse_click_tool.name: mouse_click_tool, keyboard_type_tool.name: keyboard_type_tool, - key_press_tool.name: key_press_tool, } ) return tools diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py index 8a12d66f54..2b8ddab939 100644 --- a/astrbot/core/astr_main_agent.py +++ b/astrbot/core/astr_main_agent.py @@ -48,7 +48,6 @@ CreateSkillCandidateTool, CreateSkillPayloadTool, CuaKeyboardTypeTool, - CuaKeyPressTool, CuaMouseClickTool, CuaScreenshotTool, EvaluateSkillCandidateTool, @@ -1031,14 +1030,21 @@ def _apply_sandbox_tools( "`astrbot_cua_screenshot` to inspect the current desktop before " "clicking or typing; keep `return_image_to_llm` enabled unless the " "provider cannot handle image tool results. " + "By default, set `send_to_user=true` on CUA screenshots after each " + "visible step so the user stays in control; only suppress user-visible " + "screenshots when the user explicitly asks for only the final result. " "Use coordinates from screenshots for " - "`astrbot_cua_mouse_click`, `astrbot_cua_keyboard_type` for text input, " - "and `astrbot_cua_key_press` for Enter, Ctrl+L, Tab, or Escape.\n" + "`astrbot_cua_mouse_click`, and use `astrbot_cua_keyboard_type` for " + "text input. Prevent input field pollution: before typing, inspect the " + "screenshot, confirm the intended input is focused and empty or safe to " + "append to, and avoid typing into fields that already contain unrelated " + "text. For Enter, use `astrbot_cua_keyboard_type` with text=`\\n`. " + "For URL navigation, launch Chromium with " + "`astrbot_execute_shell` instead of using address-bar shortcuts.\n" ) req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaScreenshotTool)) req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaMouseClickTool)) req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaKeyboardTypeTool)) - req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaKeyPressTool)) req.system_prompt = f"{req.system_prompt or ''}\n{SANDBOX_MODE_PROMPT}\n" diff --git a/astrbot/core/tools/computer_tools/__init__.py b/astrbot/core/tools/computer_tools/__init__.py index 24060a0e82..f90c2e1de8 100644 --- a/astrbot/core/tools/computer_tools/__init__.py +++ b/astrbot/core/tools/computer_tools/__init__.py @@ -1,6 +1,5 @@ from .cua import ( CuaKeyboardTypeTool, - CuaKeyPressTool, CuaMouseClickTool, CuaScreenshotTool, ) @@ -38,7 +37,6 @@ "BrowserExecTool", "CreateSkillCandidateTool", "CreateSkillPayloadTool", - "CuaKeyPressTool", "CuaKeyboardTypeTool", "CuaMouseClickTool", "CuaScreenshotTool", diff --git a/astrbot/core/tools/computer_tools/cua.py b/astrbot/core/tools/computer_tools/cua.py index 1e2320f478..87cf5cd79f 100644 --- a/astrbot/core/tools/computer_tools/cua.py +++ b/astrbot/core/tools/computer_tools/cua.py @@ -166,38 +166,6 @@ async def call( return f"Error typing in CUA desktop: {str(e)}" -@builtin_tool(config=_CUA_TOOL_CONFIG) -@dataclass -class CuaKeyPressTool(FunctionTool): - name: str = "astrbot_cua_key_press" - description: str = "Press a key or shortcut in the CUA sandbox desktop." - parameters: dict = field( - default_factory=lambda: { - "type": "object", - "properties": { - "key": { - "type": "string", - "description": "Key or shortcut to press, for example Enter, Escape, Tab, or Ctrl+L.", - }, - }, - "required": ["key"], - } - ) - - async def call( - self, - context: ContextWrapper[AstrAgentContext], - key: str, - ) -> ToolExecResult: - if err := check_admin_permission(context, "Using CUA keyboard"): - return err - try: - gui = await _get_gui_component(context) - return _to_json(await gui.press_key(key)) - except Exception as e: - return f"Error pressing key in CUA desktop: {str(e)}" - - def _new_screenshot_path(umo: str) -> str: safe_prefix = uuid.uuid5(uuid.NAMESPACE_DNS, umo).hex[:12] screenshot_dir = Path(get_astrbot_temp_path()) / "cua_screenshots" diff --git a/astrbot/core/tools/computer_tools/shell.py b/astrbot/core/tools/computer_tools/shell.py index af933e83b1..377af67f46 100644 --- a/astrbot/core/tools/computer_tools/shell.py +++ b/astrbot/core/tools/computer_tools/shell.py @@ -31,7 +31,7 @@ class ExecuteShellTool(FunctionTool): "background": { "type": "boolean", "description": "Whether to run the command in the background.", - "default": False, + "default": True, }, "env": { "type": "object", @@ -48,7 +48,7 @@ async def call( self, context: ContextWrapper[AstrAgentContext], command: str, - background: bool = False, + background: bool = True, env: dict = {}, ) -> ToolExecResult: if permission_error := check_admin_permission(context, "Shell execution"): diff --git a/tests/unit/test_astr_main_agent.py b/tests/unit/test_astr_main_agent.py index 20fe5845d6..d322de8465 100644 --- a/tests/unit/test_astr_main_agent.py +++ b/tests/unit/test_astr_main_agent.py @@ -1576,9 +1576,13 @@ def test_apply_sandbox_tools_with_cua_adds_gui_guidance(self, mock_context): assert "Chromium" in req.system_prompt assert "background=true" in req.system_prompt assert "astrbot_cua_screenshot" in req.system_prompt - assert "astrbot_cua_key_press" in req.system_prompt + assert "astrbot_cua_key_press" not in req.system_prompt assert "return_image_to_llm" in req.system_prompt assert "astrbot_execute_shell" in req.system_prompt + assert "\\n" in req.system_prompt + assert "input field pollution" in req.system_prompt + assert "send_to_user=true" in req.system_prompt + assert "only the final result" in req.system_prompt assert "Do not use `firefox &`" in req.system_prompt def test_apply_sandbox_tools_with_shipyard_booter(self, monkeypatch, mock_context): diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index b77c2c3162..f129d0713c 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -377,7 +377,6 @@ async def __aexit__(self, exc_type, exc, tb): def test_cua_tools_are_registered_as_builtin_tools(): from astrbot.core.tools.computer_tools.cua import ( - CuaKeyPressTool, CuaKeyboardTypeTool, CuaMouseClickTool, CuaScreenshotTool, @@ -388,7 +387,6 @@ def test_cua_tools_are_registered_as_builtin_tools(): assert manager.get_builtin_tool(CuaScreenshotTool).name == "astrbot_cua_screenshot" assert manager.get_builtin_tool(CuaMouseClickTool).name == "astrbot_cua_mouse_click" assert manager.get_builtin_tool(CuaKeyboardTypeTool).name == "astrbot_cua_keyboard_type" - assert manager.get_builtin_tool(CuaKeyPressTool).name == "astrbot_cua_key_press" def test_cua_runtime_tools_are_available_to_handoffs(): @@ -399,7 +397,7 @@ def test_cua_runtime_tools_are_available_to_handoffs(): assert "astrbot_cua_screenshot" in tools assert "astrbot_cua_mouse_click" in tools assert "astrbot_cua_keyboard_type" in tools - assert "astrbot_cua_key_press" in tools + assert "astrbot_cua_key_press" not in tools def test_runtime_tool_selection_treats_none_booter_as_empty(): @@ -419,41 +417,6 @@ def test_runtime_tool_selection_normalizes_cua_booter_case(): assert "astrbot_cua_screenshot" in tools -@pytest.mark.asyncio -async def test_cua_key_press_tool_presses_keyboard(monkeypatch): - from astrbot.core.tools.computer_tools import cua as cua_tools - from astrbot.core.tools.computer_tools.cua import CuaKeyPressTool - - class FakeEvent: - unified_msg_origin = "umo" - role = "admin" - - class FakeAstrContext: - event = FakeEvent() - context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) - - class FakeWrapper: - context = FakeAstrContext() - - sandbox = FakeSandbox() - from astrbot.core.computer.booters.cua import CuaGUIComponent - - sandbox_gui = CuaGUIComponent(sandbox) - - class FakeBooter: - gui = sandbox_gui - - async def fake_get_booter(context, session_id): - return FakeBooter() - - monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) - - result = await CuaKeyPressTool().call(FakeWrapper(), key="Enter") - - assert json.loads(result)["success"] is True - assert sandbox.keyboard.pressed == ["Enter"] - - def test_cua_is_exposed_in_sandbox_config_metadata(): items = _agent_computer_use_items() booter = items["provider_settings.sandbox.booter"] diff --git a/tests/unit/test_func_tool_manager.py b/tests/unit/test_func_tool_manager.py index c87a2de085..c62277d8dd 100644 --- a/tests/unit/test_func_tool_manager.py +++ b/tests/unit/test_func_tool_manager.py @@ -1,3 +1,7 @@ +import json + +import pytest + from astrbot.core import sp from astrbot.core.provider.func_tool_manager import FunctionToolManager from astrbot.core.tools.computer_tools.shell import ExecuteShellTool @@ -39,9 +43,52 @@ def test_computer_tools_are_registered_as_builtin_tools(): tool = manager.get_builtin_tool(ExecuteShellTool) assert tool.name == "astrbot_execute_shell" + assert tool.parameters["properties"]["background"]["default"] is True assert manager.is_builtin_tool("astrbot_execute_shell") is True +@pytest.mark.asyncio +async def test_execute_shell_defaults_to_background(monkeypatch): + from astrbot.core.tools.computer_tools import shell as shell_tools + + calls = [] + + class FakeShell: + async def exec(self, command, cwd=None, background=False, env=None): + calls.append({"command": command, "background": background}) + return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} + + class FakeBooter: + shell = FakeShell() + + class FakeConfig: + def get_config(self, umo): + return {"provider_settings": {"computer_use_runtime": "sandbox"}} + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + context = FakeConfig() + event = FakeEvent() + + class FakeWrapper: + context = FakeAstrContext() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(shell_tools, "get_booter", fake_get_booter) + + result = await ExecuteShellTool().call(FakeWrapper(), command="chromium https://example.com") + + assert json.loads(result)["success"] is True + assert calls == [ + {"command": "chromium https://example.com", "background": True} + ] + + def test_firecrawl_tools_are_registered_as_builtin_tools(): manager = FunctionToolManager() From 095d1422d7b5582a0ff1cbc03be1d760dfee40bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 18:26:16 +0900 Subject: [PATCH 16/33] fix: harden CUA shell background handling --- astrbot/core/astr_main_agent.py | 28 +++----- astrbot/core/computer/booters/cua.py | 17 ++++- astrbot/core/tools/computer_tools/shell.py | 15 +++- tests/unit/test_astr_main_agent.py | 7 +- tests/unit/test_cua_computer_use.py | 19 +++++ tests/unit/test_func_tool_manager.py | 83 ++++++++++++++++++++++ 6 files changed, 140 insertions(+), 29 deletions(-) diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py index 2b8ddab939..b013f010e1 100644 --- a/astrbot/core/astr_main_agent.py +++ b/astrbot/core/astr_main_agent.py @@ -1021,26 +1021,14 @@ def _apply_sandbox_tools( if booter == "cua": req.system_prompt += ( "\n[CUA Desktop Control]\n" - "When launching GUI apps, use `astrbot_execute_shell` with " - "background=true; do not append shell background operators manually. " - "For browser tasks, use `astrbot_execute_shell` with `background=true` " - "to launch Chromium in the default CUA Linux sandbox. " - "Do not use `firefox &` unless the user confirms Firefox exists. " - "After launching or changing any GUI app, immediately call " - "`astrbot_cua_screenshot` to inspect the current desktop before " - "clicking or typing; keep `return_image_to_llm` enabled unless the " - "provider cannot handle image tool results. " - "By default, set `send_to_user=true` on CUA screenshots after each " - "visible step so the user stays in control; only suppress user-visible " - "screenshots when the user explicitly asks for only the final result. " - "Use coordinates from screenshots for " - "`astrbot_cua_mouse_click`, and use `astrbot_cua_keyboard_type` for " - "text input. Prevent input field pollution: before typing, inspect the " - "screenshot, confirm the intended input is focused and empty or safe to " - "append to, and avoid typing into fields that already contain unrelated " - "text. For Enter, use `astrbot_cua_keyboard_type` with text=`\\n`. " - "For URL navigation, launch Chromium with " - "`astrbot_execute_shell` instead of using address-bar shortcuts.\n" + "Use `astrbot_execute_shell` with `background=true` to launch GUI apps. " + 'Use Firefox for browser tasks, for example `firefox "https://example.com"`. ' + "After each visible step, call `astrbot_cua_screenshot` with " + "`send_to_user=true` and `return_image_to_llm=true` so the user can " + "monitor progress. When typing, inspect the screenshot first and confirm " + "the target field is focused and empty or safe to append to. Use " + "`astrbot_cua_mouse_click` for coordinates and `astrbot_cua_keyboard_type` " + "for text input; use text=`\\n` for Enter.\n" ) req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaScreenshotTool)) req.func_tool.add_tool(tool_mgr.get_builtin_tool(CuaMouseClickTool)) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 19e552bbd9..cc193e8e38 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -160,9 +160,7 @@ async def exec( if env: kwargs["env"] = env if background: - command = ( - f"nohup sh -lc {command!r} >/tmp/astrbot_cua_bg.log 2>&1 & echo $!" - ) + command = _build_cua_background_command(command) result = await _call_first( self._sandbox.shell, ("run", "exec"), command, **kwargs @@ -182,6 +180,19 @@ async def exec( return response +def _build_cua_background_command(command: str) -> str: + launcher = ( + "import subprocess,sys,time; " + 'p=subprocess.Popen(["sh","-lc",sys.argv[1]], ' + "stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, " + "stderr=subprocess.DEVNULL, start_new_session=True); " + "time.sleep(0.2); " + "code=p.poll(); " + "sys.exit(0 if code is None else code)" + ) + return f"python3 -c {shlex.quote(launcher)} {shlex.quote(command)}" + + class CuaPythonComponent(PythonComponent): def __init__(self, sandbox: Any) -> None: self._sandbox = sandbox diff --git a/astrbot/core/tools/computer_tools/shell.py b/astrbot/core/tools/computer_tools/shell.py index 377af67f46..638e443752 100644 --- a/astrbot/core/tools/computer_tools/shell.py +++ b/astrbot/core/tools/computer_tools/shell.py @@ -1,4 +1,5 @@ import json +import re from dataclasses import dataclass, field from astrbot.api import FunctionTool @@ -67,12 +68,22 @@ async def call( current_workspace_root.mkdir(parents=True, exist_ok=True) cwd = str(current_workspace_root) + effective_background = background and not _is_self_detached_command(command) result = await sb.shell.exec( command, cwd=cwd, - background=background, + background=effective_background, env=env, ) return json.dumps(result, ensure_ascii=False) except Exception as e: - return f"Error executing command: {str(e)}" + detail = str(e) or type(e).__name__ + return f"Error executing command: {detail}" + + +def _is_self_detached_command(command: str) -> bool: + stripped = command.strip() + return ( + stripped.startswith("nohup ") + or re.search(r"(?:^|\s)&\s*$", stripped) is not None + ) diff --git a/tests/unit/test_astr_main_agent.py b/tests/unit/test_astr_main_agent.py index d322de8465..025ae86109 100644 --- a/tests/unit/test_astr_main_agent.py +++ b/tests/unit/test_astr_main_agent.py @@ -1573,17 +1573,16 @@ def test_apply_sandbox_tools_with_cua_adds_gui_guidance(self, mock_context): module._apply_sandbox_tools(config, req, "session-123") - assert "Chromium" in req.system_prompt + assert "Firefox" in req.system_prompt assert "background=true" in req.system_prompt + assert 'firefox "https://example.com"' in req.system_prompt assert "astrbot_cua_screenshot" in req.system_prompt assert "astrbot_cua_key_press" not in req.system_prompt assert "return_image_to_llm" in req.system_prompt assert "astrbot_execute_shell" in req.system_prompt assert "\\n" in req.system_prompt - assert "input field pollution" in req.system_prompt assert "send_to_user=true" in req.system_prompt - assert "only the final result" in req.system_prompt - assert "Do not use `firefox &`" in req.system_prompt + assert "focused and empty or safe to append" in req.system_prompt def test_apply_sandbox_tools_with_shipyard_booter(self, monkeypatch, mock_context): """Test sandbox tools with shipyard booter configuration.""" diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index f129d0713c..ecaa500e9e 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -302,6 +302,25 @@ async def test_cua_shell_normalizes_output_returncode_shape(): } +@pytest.mark.asyncio +async def test_cua_shell_background_wrapper_detaches_via_python_subprocess(): + from astrbot.core.computer.booters.cua import CuaShellComponent + + sandbox = FakeSandbox() + + await CuaShellComponent(sandbox).exec("chromium https://example.com", background=True) + + command = sandbox.shell.commands[0][0] + assert command.startswith("python3 -c ") + assert "subprocess.Popen" in command + assert "start_new_session=True" in command + assert "stdout=subprocess.DEVNULL" in command + assert "stderr=subprocess.DEVNULL" in command + assert "time.sleep(0.2)" in command + assert "'chromium https://example.com'" in command + assert "&" not in command + + @pytest.mark.asyncio async def test_cua_gui_reports_missing_mouse_or_keyboard(): from astrbot.core.computer.booters.cua import CuaGUIComponent diff --git a/tests/unit/test_func_tool_manager.py b/tests/unit/test_func_tool_manager.py index c62277d8dd..d605ee709a 100644 --- a/tests/unit/test_func_tool_manager.py +++ b/tests/unit/test_func_tool_manager.py @@ -89,6 +89,89 @@ async def fake_get_booter(context, session_id): ] +@pytest.mark.asyncio +async def test_execute_shell_avoids_double_background_for_detached_commands(monkeypatch): + from astrbot.core.tools.computer_tools import shell as shell_tools + + calls = [] + + class FakeShell: + async def exec(self, command, cwd=None, background=False, env=None): + calls.append({"command": command, "background": background}) + return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} + + class FakeBooter: + shell = FakeShell() + + class FakeConfig: + def get_config(self, umo): + return {"provider_settings": {"computer_use_runtime": "sandbox"}} + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + context = FakeConfig() + event = FakeEvent() + + class FakeWrapper: + context = FakeAstrContext() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(shell_tools, "get_booter", fake_get_booter) + + command = "nohup firefox >/tmp/astrbot-firefox.log 2>&1 &" + result = await ExecuteShellTool().call( + FakeWrapper(), command=command, background=True + ) + + assert json.loads(result)["success"] is True + assert calls == [{"command": command, "background": False}] + + +@pytest.mark.asyncio +async def test_execute_shell_reports_blank_exception_type(monkeypatch): + from astrbot.core.tools.computer_tools import shell as shell_tools + + class BlankError(Exception): + def __str__(self): + return "" + + class FakeShell: + async def exec(self, command, cwd=None, background=False, env=None): + raise BlankError() + + class FakeBooter: + shell = FakeShell() + + class FakeConfig: + def get_config(self, umo): + return {"provider_settings": {"computer_use_runtime": "sandbox"}} + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + context = FakeConfig() + event = FakeEvent() + + class FakeWrapper: + context = FakeAstrContext() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(shell_tools, "get_booter", fake_get_booter) + + result = await ExecuteShellTool().call(FakeWrapper(), command="firefox") + + assert result == "Error executing command: BlankError" + + def test_firecrawl_tools_are_registered_as_builtin_tools(): manager = FunctionToolManager() From 8367e11b48e71b862d22f24664f6697705de6fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 23:03:48 +0900 Subject: [PATCH 17/33] fix: harden CUA runtime adapters --- astrbot/core/computer/booters/cua.py | 282 ++++++++++++++++------- astrbot/core/tools/computer_tools/cua.py | 10 +- tests/unit/test_cua_computer_use.py | 146 +++++++++--- 3 files changed, 326 insertions(+), 112 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index cc193e8e38..7644d37a51 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -132,8 +132,9 @@ def _has_component_method(root: Any, component_name: str, method_name: str) -> b class CuaShellComponent(ShellComponent): - def __init__(self, sandbox: Any) -> None: + def __init__(self, sandbox: Any, os_type: str = "linux") -> None: self._sandbox = sandbox + self._os_type = os_type.lower() async def exec( self, @@ -160,6 +161,13 @@ async def exec( if env: kwargs["env"] = env if background: + if self._os_type not in {"linux", "darwin", "macos"}: + return { + "stdout": "", + "stderr": "error: background shell execution is only supported for POSIX CUA images.", + "exit_code": 2, + "success": False, + } command = _build_cua_background_command(command) result = await _call_first( @@ -186,6 +194,7 @@ def _build_cua_background_command(command: str) -> str: 'p=subprocess.Popen(["sh","-lc",sys.argv[1]], ' "stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, " "stderr=subprocess.DEVNULL, start_new_session=True); " + "sys.stdout.write(str(p.pid)+chr(10)); sys.stdout.flush(); " "time.sleep(0.2); " "code=p.poll(); " "sys.exit(0 if code is None else code)" @@ -194,8 +203,9 @@ def _build_cua_background_command(command: str) -> str: class CuaPythonComponent(PythonComponent): - def __init__(self, sandbox: Any) -> None: + def __init__(self, sandbox: Any, os_type: str = "linux") -> None: self._sandbox = sandbox + self._os_type = os_type async def exec( self, @@ -210,7 +220,7 @@ async def exec( result = await _call_first(python, ("run", "exec"), code, timeout=timeout) proc = _normalize_process_result(result) else: - shell = CuaShellComponent(self._sandbox) + shell = CuaShellComponent(self._sandbox, os_type=self._os_type) result = await shell.exec(f"python3 - <<'PY'\n{code}\nPY", timeout=timeout) proc = ProcessResult( stdout=result.get("stdout", ""), @@ -232,14 +242,10 @@ async def exec( } -class CuaFileSystemComponent(FileSystemComponent): - def __init__(self, sandbox: Any) -> None: - self._sandbox = sandbox - self._shell = CuaShellComponent(sandbox) - - @property - def _filesystem(self) -> Any: - return getattr(self._sandbox, "filesystem", None) +class _NativeFSAdapter: + def __init__(self, fs: Any, shell: CuaShellComponent) -> None: + self._fs = fs + self._shell = shell async def create_file( self, @@ -257,14 +263,14 @@ async def read_file( offset: int | None = None, limit: int | None = None, ) -> dict[str, Any]: - fs = self._filesystem - if fs is not None and hasattr(fs, "read_file"): - content = await fs.read_file(path) - else: + read_file = getattr(self._fs, "read_file", None) + if read_file is None: result = await self._shell.exec(f"cat {path!r}") if result.get("stderr"): return {"success": False, "path": path, "error": result["stderr"]} content = result.get("stdout", "") + else: + content = await _maybe_await(read_file(path)) if isinstance(content, bytes): content = content.decode(encoding, errors="replace") return { @@ -275,6 +281,140 @@ async def read_file( ), } + async def write_file( + self, + path: str, + content: str, + mode: str = "w", + encoding: str = "utf-8", + ) -> dict[str, Any]: + _ = mode + write_file = getattr(self._fs, "write_file", None) + if write_file is None: + await _write_base64_via_shell(self._shell, path, content.encode(encoding)) + else: + await _maybe_await(write_file(path, content)) + return {"success": True, "path": path} + + async def delete_file(self, path: str) -> dict[str, Any]: + delete = getattr(self._fs, "delete", None) or getattr( + self._fs, "delete_file", None + ) + if delete is None: + await self._shell.exec(f"rm -rf {path!r}") + else: + await _maybe_await(delete(path)) + return {"success": True, "path": path} + + async def list_dir( + self, + path: str = ".", + show_hidden: bool = False, + ) -> dict[str, Any]: + list_dir = getattr(self._fs, "list_dir", None) + if list_dir is not None: + entries = await _maybe_await(list_dir(path)) + return {"success": True, "path": path, "entries": entries} + return await _list_dir_via_shell(self._shell, path, show_hidden) + + +class _ShellFSAdapter: + def __init__(self, shell: CuaShellComponent) -> None: + self._shell = shell + + async def create_file( + self, + path: str, + content: str = "", + mode: int = 0o644, + ) -> dict[str, Any]: + await self.write_file(path, content) + return {"success": True, "path": path, "mode": mode} + + async def read_file( + self, + path: str, + encoding: str = "utf-8", + offset: int | None = None, + limit: int | None = None, + ) -> dict[str, Any]: + _ = encoding + result = await self._shell.exec(f"cat {path!r}") + if result.get("stderr"): + return {"success": False, "path": path, "error": result["stderr"]} + return { + "success": True, + "path": path, + "content": _slice_content_by_lines( + str(result.get("stdout", "")), offset=offset, limit=limit + ), + } + + async def write_file( + self, + path: str, + content: str, + mode: str = "w", + encoding: str = "utf-8", + ) -> dict[str, Any]: + _ = mode + await _write_base64_via_shell(self._shell, path, content.encode(encoding)) + return {"success": True, "path": path} + + async def delete_file(self, path: str) -> dict[str, Any]: + await self._shell.exec(f"rm -rf {path!r}") + return {"success": True, "path": path} + + async def list_dir( + self, + path: str = ".", + show_hidden: bool = False, + ) -> dict[str, Any]: + return await _list_dir_via_shell(self._shell, path, show_hidden) + + +async def _list_dir_via_shell( + shell: CuaShellComponent, + path: str, + show_hidden: bool, +) -> dict[str, Any]: + flags = "-1A" if show_hidden else "-1" + result = await shell.exec(f"ls {flags} {path!r}") + return { + "success": not bool(result.get("stderr")), + "path": path, + "entries": _split_listing_entries(result.get("stdout", "")), + "error": result.get("stderr", ""), + } + + +class CuaFileSystemComponent(FileSystemComponent): + def __init__(self, sandbox: Any, os_type: str = "linux") -> None: + self._shell = CuaShellComponent(sandbox, os_type=os_type) + fs = getattr(sandbox, "filesystem", None) + self._impl = ( + _NativeFSAdapter(fs, self._shell) + if fs is not None + else _ShellFSAdapter(self._shell) + ) + + async def create_file( + self, + path: str, + content: str = "", + mode: int = 0o644, + ) -> dict[str, Any]: + return await self._impl.create_file(path, content, mode) + + async def read_file( + self, + path: str, + encoding: str = "utf-8", + offset: int | None = None, + limit: int | None = None, + ) -> dict[str, Any]: + return await self._impl.read_file(path, encoding, offset, limit) + async def search_files( self, pattern: str, @@ -326,44 +466,17 @@ async def write_file( mode: str = "w", encoding: str = "utf-8", ) -> dict[str, Any]: - _ = mode - fs = self._filesystem - if fs is not None and hasattr(fs, "write_file"): - await fs.write_file(path, content) - else: - await _write_base64_via_shell(self._shell, path, content.encode(encoding)) - return {"success": True, "path": path} + return await self._impl.write_file(path, content, mode, encoding) async def delete_file(self, path: str) -> dict[str, Any]: - fs = self._filesystem - if fs is not None: - if hasattr(fs, "delete"): - await fs.delete(path) - elif hasattr(fs, "delete_file"): - await fs.delete_file(path) - else: - await self._shell.exec(f"rm -rf {path!r}") - else: - await self._shell.exec(f"rm -rf {path!r}") - return {"success": True, "path": path} + return await self._impl.delete_file(path) async def list_dir( self, path: str = ".", show_hidden: bool = False, ) -> dict[str, Any]: - fs = self._filesystem - if fs is not None and hasattr(fs, "list_dir"): - entries = await fs.list_dir(path) - return {"success": True, "path": path, "entries": entries} - flags = "-1A" if show_hidden else "-1" - result = await self._shell.exec(f"ls {flags} {path!r}") - return { - "success": not bool(result.get("stderr")), - "path": path, - "entries": _split_listing_entries(result.get("stdout", "")), - "error": result.get("stderr", ""), - } + return await self._impl.list_dir(path, show_hidden) class CuaGUIComponent(GUIComponent): @@ -441,6 +554,16 @@ def _screenshot_to_bytes(raw: Any) -> bytes: raise TypeError(f"Unsupported CUA screenshot result: {type(raw)!r}") +@dataclass(slots=True) +class _CuaRuntime: + sandbox_cm: Any + sandbox: Any + shell: CuaShellComponent + python: CuaPythonComponent + fs: CuaFileSystemComponent + gui: CuaGUIComponent | None + + class CuaBooter(ComputerBooter): def __init__( self, @@ -457,12 +580,7 @@ def __init__( self.telemetry_enabled = telemetry_enabled self.local = local self.api_key = api_key - self._sandbox: Any | None = None - self._sandbox_cm: Any | None = None - self._shell: CuaShellComponent | None = None - self._python: CuaPythonComponent | None = None - self._fs: CuaFileSystemComponent | None = None - self._gui: CuaGUIComponent | None = None + self._runtime: _CuaRuntime | None = None async def boot(self, session_id: str) -> None: _ = session_id @@ -476,12 +594,16 @@ async def boot(self, session_id: str) -> None: image_obj = self._build_image(Image) ephemeral_kwargs = self._build_ephemeral_kwargs(Sandbox.ephemeral) - self._sandbox_cm = Sandbox.ephemeral(image_obj, **ephemeral_kwargs) - self._sandbox = await self._sandbox_cm.__aenter__() - self._shell = CuaShellComponent(self._sandbox) - self._python = CuaPythonComponent(self._sandbox) - self._fs = CuaFileSystemComponent(self._sandbox) - self._gui = CuaGUIComponent(self._sandbox) + sandbox_cm = Sandbox.ephemeral(image_obj, **ephemeral_kwargs) + sandbox = await sandbox_cm.__aenter__() + self._runtime = _CuaRuntime( + sandbox_cm=sandbox_cm, + sandbox=sandbox, + shell=CuaShellComponent(sandbox, os_type=self.os_type), + python=CuaPythonComponent(sandbox, os_type=self.os_type), + fs=CuaFileSystemComponent(sandbox, os_type=self.os_type), + gui=CuaGUIComponent(sandbox), + ) logger.info( "[Computer] CUA sandbox booted: image=%s, os_type=%s", self.image, @@ -515,24 +637,20 @@ def _build_ephemeral_kwargs(self, ephemeral: Any) -> dict[str, Any]: return kwargs async def shutdown(self) -> None: - if self._sandbox_cm is not None: - await self._sandbox_cm.__aexit__(None, None, None) - self._sandbox_cm = None - self._sandbox = None - self._shell = None - self._python = None - self._fs = None - self._gui = None + if self._runtime is not None: + await self._runtime.sandbox_cm.__aexit__(None, None, None) + self._runtime = None @property def capabilities(self) -> tuple[str, ...] | None: capabilities = ["python", "shell", "filesystem"] - if self._sandbox is None: + if self._runtime is None: return tuple(capabilities) - has_screenshot = getattr(self._sandbox, "screenshot", None) is not None - has_mouse = _has_component_method(self._sandbox, "mouse", "click") - has_keyboard = _has_component_method(self._sandbox, "keyboard", "type") + sandbox = self._runtime.sandbox + has_screenshot = getattr(sandbox, "screenshot", None) is not None + has_mouse = _has_component_method(sandbox, "mouse", "click") + has_keyboard = _has_component_method(sandbox, "keyboard", "type") if has_screenshot or has_mouse or has_keyboard: capabilities.append("gui") if has_screenshot: @@ -545,33 +663,34 @@ def capabilities(self) -> tuple[str, ...] | None: @property def fs(self) -> FileSystemComponent: - if self._fs is None: + if self._runtime is None: raise RuntimeError("CuaBooter is not initialized.") - return self._fs + return self._runtime.fs @property def python(self) -> PythonComponent: - if self._python is None: + if self._runtime is None: raise RuntimeError("CuaBooter is not initialized.") - return self._python + return self._runtime.python @property def shell(self) -> ShellComponent: - if self._shell is None: + if self._runtime is None: raise RuntimeError("CuaBooter is not initialized.") - return self._shell + return self._runtime.shell @property def gui(self) -> GUIComponent | None: - return self._gui + return None if self._runtime is None else self._runtime.gui async def upload_file(self, path: str, file_name: str) -> dict: local_path = Path(path) if not local_path.is_file(): return {"success": False, "error": f"File not found: {path}"} - if self._sandbox is not None and hasattr(self._sandbox, "upload_file"): + sandbox = None if self._runtime is None else self._runtime.sandbox + if sandbox is not None and hasattr(sandbox, "upload_file"): return _maybe_model_dump( - await self._sandbox.upload_file(str(local_path), file_name) + await sandbox.upload_file(str(local_path), file_name) ) result = await _write_base64_via_shell( self.shell, file_name, local_path.read_bytes() @@ -583,8 +702,9 @@ async def upload_file(self, path: str, file_name: str) -> dict: } async def download_file(self, remote_path: str, local_path: str) -> None: - if self._sandbox is not None and hasattr(self._sandbox, "download_file"): - await self._sandbox.download_file(remote_path, local_path) + sandbox = None if self._runtime is None else self._runtime.sandbox + if sandbox is not None and hasattr(sandbox, "download_file"): + await sandbox.download_file(remote_path, local_path) return result = await self.shell.exec(f"base64 {remote_path!r}") if result.get("stderr"): @@ -593,4 +713,4 @@ async def download_file(self, remote_path: str, local_path: str) -> None: Path(local_path).write_bytes(base64.b64decode(result.get("stdout", ""))) async def available(self) -> bool: - return self._sandbox is not None + return self._runtime is not None diff --git a/astrbot/core/tools/computer_tools/cua.py b/astrbot/core/tools/computer_tools/cua.py index 87cf5cd79f..7b37a55086 100644 --- a/astrbot/core/tools/computer_tools/cua.py +++ b/astrbot/core/tools/computer_tools/cua.py @@ -28,6 +28,10 @@ def _to_json(data: Any) -> str: return json.dumps(data, ensure_ascii=False, default=str) +def _exception_detail(error: Exception) -> str: + return str(error) or type(error).__name__ + + async def _get_gui_component(context: ContextWrapper[AstrAgentContext]) -> Any: booter = await get_booter( context.context.context, @@ -97,7 +101,7 @@ async def call( ) return mcp.types.CallToolResult(content=content) except Exception as e: - return f"Error taking CUA screenshot: {str(e)}" + return f"Error taking CUA screenshot: {_exception_detail(e)}" @builtin_tool(config=_CUA_TOOL_CONFIG) @@ -134,7 +138,7 @@ async def call( gui = await _get_gui_component(context) return _to_json(await gui.click(x, y, button=button)) except Exception as e: - return f"Error clicking CUA desktop: {str(e)}" + return f"Error clicking CUA desktop: {_exception_detail(e)}" @builtin_tool(config=_CUA_TOOL_CONFIG) @@ -163,7 +167,7 @@ async def call( gui = await _get_gui_component(context) return _to_json(await gui.type_text(text)) except Exception as e: - return f"Error typing in CUA desktop: {str(e)}" + return f"Error typing in CUA desktop: {_exception_detail(e)}" def _new_screenshot_path(umo: str) -> str: diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index ecaa500e9e..639d33e33d 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -2,13 +2,12 @@ import base64 import json from pathlib import Path -from types import SimpleNamespace -import pytest import mcp +import pytest -from astrbot.core.config.default import CONFIG_METADATA_3 from astrbot.core.astr_agent_tool_exec import FunctionToolExecutor +from astrbot.core.config.default import CONFIG_METADATA_3 from astrbot.core.provider.func_tool_manager import FunctionToolManager @@ -134,7 +133,9 @@ async def boot(self, session_id: str): async def available(self): return True - monkeypatch.setattr(computer_client, "_sync_skills_to_sandbox", lambda booter: asyncio.sleep(0)) + monkeypatch.setattr( + computer_client, "_sync_skills_to_sandbox", lambda booter: asyncio.sleep(0) + ) monkeypatch.setitem(computer_client.session_booter, "cua-test", None) computer_client.session_booter.pop("cua-test", None) monkeypatch.setattr( @@ -172,9 +173,9 @@ def test_cua_ephemeral_kwargs_include_local_when_supported(): def ephemeral(image, ttl=None, telemetry_enabled=None, local=None): return image, ttl, telemetry_enabled, local - kwargs = CuaBooter(ttl=120, telemetry_enabled=False, local=True)._build_ephemeral_kwargs( - ephemeral - ) + kwargs = CuaBooter( + ttl=120, telemetry_enabled=False, local=True + )._build_ephemeral_kwargs(ephemeral) assert kwargs == {"ttl": 120, "telemetry_enabled": False, "local": True} @@ -308,12 +309,15 @@ async def test_cua_shell_background_wrapper_detaches_via_python_subprocess(): sandbox = FakeSandbox() - await CuaShellComponent(sandbox).exec("chromium https://example.com", background=True) + await CuaShellComponent(sandbox).exec( + "chromium https://example.com", background=True + ) command = sandbox.shell.commands[0][0] assert command.startswith("python3 -c ") assert "subprocess.Popen" in command assert "start_new_session=True" in command + assert "p.pid" in command assert "stdout=subprocess.DEVNULL" in command assert "stderr=subprocess.DEVNULL" in command assert "time.sleep(0.2)" in command @@ -321,6 +325,25 @@ async def test_cua_shell_background_wrapper_detaches_via_python_subprocess(): assert "&" not in command +@pytest.mark.asyncio +async def test_cua_shell_background_rejects_non_posix_os_type(): + from astrbot.core.computer.booters.cua import CuaShellComponent + + sandbox = FakeSandbox() + + result = await CuaShellComponent(sandbox, os_type="windows").exec( + "start notepad", background=True + ) + + assert result == { + "stdout": "", + "stderr": "error: background shell execution is only supported for POSIX CUA images.", + "exit_code": 2, + "success": False, + } + assert sandbox.shell.commands == [] + + @pytest.mark.asyncio async def test_cua_gui_reports_missing_mouse_or_keyboard(): from astrbot.core.computer.booters.cua import CuaGUIComponent @@ -342,10 +365,28 @@ async def screenshot(self): def test_cua_capabilities_reflect_initialized_sandbox_gui_devices(): - from astrbot.core.computer.booters.cua import CuaBooter + from astrbot.core.computer.booters.cua import ( + CuaBooter, + CuaFileSystemComponent, + CuaGUIComponent, + CuaPythonComponent, + CuaShellComponent, + _CuaRuntime, + ) + + def set_runtime(booter, sandbox): + shell = CuaShellComponent(sandbox) + booter._runtime = _CuaRuntime( + sandbox_cm=object(), + sandbox=sandbox, + shell=shell, + python=CuaPythonComponent(sandbox), + fs=CuaFileSystemComponent(sandbox), + gui=CuaGUIComponent(sandbox), + ) booter = CuaBooter() - booter._sandbox = FakeSandbox() + set_runtime(booter, FakeSandbox()) assert booter.capabilities == ( "python", @@ -361,14 +402,21 @@ class ScreenshotOnlySandbox: async def screenshot(self): return b"fake-png" - booter._sandbox = ScreenshotOnlySandbox() + set_runtime(booter, ScreenshotOnlySandbox()) assert booter.capabilities == ("python", "shell", "filesystem", "gui", "screenshot") @pytest.mark.asyncio async def test_cua_shutdown_clears_cached_components(): - from astrbot.core.computer.booters.cua import CuaBooter + from astrbot.core.computer.booters.cua import ( + CuaBooter, + CuaFileSystemComponent, + CuaGUIComponent, + CuaPythonComponent, + CuaShellComponent, + _CuaRuntime, + ) closed = [] @@ -377,21 +425,21 @@ async def __aexit__(self, exc_type, exc, tb): closed.append(True) booter = CuaBooter() - booter._sandbox = FakeSandbox() - booter._sandbox_cm = FakeSandboxContext() - booter._shell = object() - booter._python = object() - booter._fs = object() - booter._gui = object() + sandbox = FakeSandbox() + booter._runtime = _CuaRuntime( + sandbox_cm=FakeSandboxContext(), + sandbox=sandbox, + shell=CuaShellComponent(sandbox), + python=CuaPythonComponent(sandbox), + fs=CuaFileSystemComponent(sandbox), + gui=CuaGUIComponent(sandbox), + ) await booter.shutdown() assert closed == [True] assert await booter.available() is False - assert booter._shell is None - assert booter._python is None - assert booter._fs is None - assert booter._gui is None + assert booter._runtime is None def test_cua_tools_are_registered_as_builtin_tools(): @@ -405,7 +453,10 @@ def test_cua_tools_are_registered_as_builtin_tools(): assert manager.get_builtin_tool(CuaScreenshotTool).name == "astrbot_cua_screenshot" assert manager.get_builtin_tool(CuaMouseClickTool).name == "astrbot_cua_mouse_click" - assert manager.get_builtin_tool(CuaKeyboardTypeTool).name == "astrbot_cua_keyboard_type" + assert ( + manager.get_builtin_tool(CuaKeyboardTypeTool).name + == "astrbot_cua_keyboard_type" + ) def test_cua_runtime_tools_are_available_to_handoffs(): @@ -448,9 +499,12 @@ def test_cua_is_exposed_in_sandbox_config_metadata(): assert "provider_settings.sandbox.cua_telemetry_enabled" in items assert "provider_settings.sandbox.cua_local" in items assert "provider_settings.sandbox.cua_api_key" in items - assert items["provider_settings.sandbox.cua_api_key"]["condition"][ - "provider_settings.sandbox.cua_local" - ] is False + assert ( + items["provider_settings.sandbox.cua_api_key"]["condition"][ + "provider_settings.sandbox.cua_local" + ] + is False + ) @pytest.mark.asyncio @@ -527,7 +581,9 @@ async def send(self, message): class FakeAstrContext: event = FakeEvent() - context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) + context = FakeContext( + {"provider_settings": {"computer_use_require_admin": True}} + ) class FakeWrapper: context = FakeAstrContext() @@ -576,7 +632,9 @@ async def send(self, message): class FakeAstrContext: event = FakeEvent() - context = FakeContext({"provider_settings": {"computer_use_require_admin": True}}) + context = FakeContext( + {"provider_settings": {"computer_use_require_admin": True}} + ) class FakeWrapper: context = FakeAstrContext() @@ -646,3 +704,35 @@ async def fail_gui_lookup(context): assert await CuaMouseClickTool().call(FakeWrapper(), x=1, y=2) == "denied" assert await CuaKeyboardTypeTool().call(FakeWrapper(), text="hello") == "denied" assert sent_messages == [] + + +@pytest.mark.asyncio +async def test_cua_tools_include_exception_type_for_blank_error(monkeypatch): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaMouseClickTool + + class BlankError(Exception): + def __str__(self): + return "" + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext( + {"provider_settings": {"computer_use_require_admin": True}} + ) + + class FakeWrapper: + context = FakeAstrContext() + + async def fail_gui_lookup(context): + raise BlankError() + + monkeypatch.setattr(cua_tools, "_get_gui_component", fail_gui_lookup) + + assert await CuaMouseClickTool().call(FakeWrapper(), x=1, y=2) == ( + "Error clicking CUA desktop: BlankError" + ) From 60625d23c2407caaea90ec525c196f63a5c3cc5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 23:11:04 +0900 Subject: [PATCH 18/33] fix: surface CUA filesystem failures --- astrbot/core/computer/booters/cua.py | 108 +++++++-------------- astrbot/core/tools/computer_tools/shell.py | 5 +- tests/unit/test_cua_computer_use.py | 43 ++++++++ tests/unit/test_func_tool_manager.py | 62 ++++++++++-- 4 files changed, 138 insertions(+), 80 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 7644d37a51..32f9236bec 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -107,6 +107,15 @@ def _normalize_process_result(raw: Any) -> ProcessResult: ) +def _is_missing_python3_error(stderr: str) -> bool: + lowered = stderr.lower() + return "python3" in lowered and ( + "not found" in lowered + or "command not found" in lowered + or "no such file" in lowered + ) + + def _split_listing_entries(output: str) -> list[str]: return [line for line in output.splitlines() if line.strip()] @@ -174,9 +183,12 @@ async def exec( self._sandbox.shell, ("run", "exec"), command, **kwargs ) proc = _normalize_process_result(result) + stderr = proc.stderr + if background and stderr and _is_missing_python3_error(stderr): + stderr = f"CUA background execution requires python3 in the sandbox image: {stderr}" response = { "stdout": proc.stdout, - "stderr": proc.stderr, + "stderr": stderr, "exit_code": proc.exit_code, "success": proc.success, } @@ -242,8 +254,14 @@ async def exec( } -class _NativeFSAdapter: - def __init__(self, fs: Any, shell: CuaShellComponent) -> None: +def _write_result(path: str, result: dict[str, Any]) -> dict[str, Any]: + if result.get("stderr") or result.get("success") is False: + return {"success": False, "path": path, **result} + return {"success": True, "path": path, **result} + + +class _CuaFSAdapter: + def __init__(self, fs: Any | None, shell: CuaShellComponent) -> None: self._fs = fs self._shell = shell @@ -253,7 +271,9 @@ async def create_file( content: str = "", mode: int = 0o644, ) -> dict[str, Any]: - await self.write_file(path, content) + write_result = await self.write_file(path, content) + if not write_result.get("success"): + return {**write_result, "mode": mode} return {"success": True, "path": path, "mode": mode} async def read_file( @@ -263,7 +283,7 @@ async def read_file( offset: int | None = None, limit: int | None = None, ) -> dict[str, Any]: - read_file = getattr(self._fs, "read_file", None) + read_file = None if self._fs is None else getattr(self._fs, "read_file", None) if read_file is None: result = await self._shell.exec(f"cat {path!r}") if result.get("stderr"): @@ -289,17 +309,22 @@ async def write_file( encoding: str = "utf-8", ) -> dict[str, Any]: _ = mode - write_file = getattr(self._fs, "write_file", None) + write_file = None if self._fs is None else getattr(self._fs, "write_file", None) if write_file is None: - await _write_base64_via_shell(self._shell, path, content.encode(encoding)) + result = await _write_base64_via_shell( + self._shell, path, content.encode(encoding) + ) + return _write_result(path, result) else: await _maybe_await(write_file(path, content)) return {"success": True, "path": path} async def delete_file(self, path: str) -> dict[str, Any]: - delete = getattr(self._fs, "delete", None) or getattr( - self._fs, "delete_file", None - ) + delete = None + if self._fs is not None: + delete = getattr(self._fs, "delete", None) or getattr( + self._fs, "delete_file", None + ) if delete is None: await self._shell.exec(f"rm -rf {path!r}") else: @@ -311,68 +336,13 @@ async def list_dir( path: str = ".", show_hidden: bool = False, ) -> dict[str, Any]: - list_dir = getattr(self._fs, "list_dir", None) + list_dir = None if self._fs is None else getattr(self._fs, "list_dir", None) if list_dir is not None: entries = await _maybe_await(list_dir(path)) return {"success": True, "path": path, "entries": entries} return await _list_dir_via_shell(self._shell, path, show_hidden) -class _ShellFSAdapter: - def __init__(self, shell: CuaShellComponent) -> None: - self._shell = shell - - async def create_file( - self, - path: str, - content: str = "", - mode: int = 0o644, - ) -> dict[str, Any]: - await self.write_file(path, content) - return {"success": True, "path": path, "mode": mode} - - async def read_file( - self, - path: str, - encoding: str = "utf-8", - offset: int | None = None, - limit: int | None = None, - ) -> dict[str, Any]: - _ = encoding - result = await self._shell.exec(f"cat {path!r}") - if result.get("stderr"): - return {"success": False, "path": path, "error": result["stderr"]} - return { - "success": True, - "path": path, - "content": _slice_content_by_lines( - str(result.get("stdout", "")), offset=offset, limit=limit - ), - } - - async def write_file( - self, - path: str, - content: str, - mode: str = "w", - encoding: str = "utf-8", - ) -> dict[str, Any]: - _ = mode - await _write_base64_via_shell(self._shell, path, content.encode(encoding)) - return {"success": True, "path": path} - - async def delete_file(self, path: str) -> dict[str, Any]: - await self._shell.exec(f"rm -rf {path!r}") - return {"success": True, "path": path} - - async def list_dir( - self, - path: str = ".", - show_hidden: bool = False, - ) -> dict[str, Any]: - return await _list_dir_via_shell(self._shell, path, show_hidden) - - async def _list_dir_via_shell( shell: CuaShellComponent, path: str, @@ -392,11 +362,7 @@ class CuaFileSystemComponent(FileSystemComponent): def __init__(self, sandbox: Any, os_type: str = "linux") -> None: self._shell = CuaShellComponent(sandbox, os_type=os_type) fs = getattr(sandbox, "filesystem", None) - self._impl = ( - _NativeFSAdapter(fs, self._shell) - if fs is not None - else _ShellFSAdapter(self._shell) - ) + self._impl = _CuaFSAdapter(fs, self._shell) async def create_file( self, diff --git a/astrbot/core/tools/computer_tools/shell.py b/astrbot/core/tools/computer_tools/shell.py index 638e443752..7ce6de4699 100644 --- a/astrbot/core/tools/computer_tools/shell.py +++ b/astrbot/core/tools/computer_tools/shell.py @@ -1,6 +1,7 @@ import json import re from dataclasses import dataclass, field +from typing import Any from astrbot.api import FunctionTool from astrbot.core.agent.run_context import ContextWrapper @@ -50,7 +51,7 @@ async def call( context: ContextWrapper[AstrAgentContext], command: str, background: bool = True, - env: dict = {}, + env: dict[str, Any] | None = None, ) -> ToolExecResult: if permission_error := check_admin_permission(context, "Shell execution"): return permission_error @@ -73,7 +74,7 @@ async def call( command, cwd=cwd, background=effective_background, - env=env, + env=dict(env or {}), ) return json.dumps(result, ensure_ascii=False) except Exception as e: diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 639d33e33d..0f128380e7 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -100,6 +100,20 @@ def run(self, command: str, **kwargs): return {"stdout": self.stdout, "stderr": "", "exit_code": 0} +class FailingShell: + def __init__(self): + self.commands = [] + + async def run(self, command: str, **kwargs): + self.commands.append((command, kwargs)) + return { + "stdout": "", + "stderr": "python3: command not found", + "exit_code": 127, + "success": False, + } + + class SyncPython: def run(self, code: str, **kwargs): return {"output": "sync", "error": ""} @@ -257,6 +271,21 @@ async def test_cua_write_file_shell_fallback_uses_python_base64_decoder(): assert "base64 -d" not in command +@pytest.mark.asyncio +async def test_cua_write_file_shell_fallback_propagates_shell_failure(): + from astrbot.core.computer.booters.cua import CuaFileSystemComponent + + sandbox = FakeSandbox() + sandbox.shell = FailingShell() + delattr(sandbox, "filesystem") + + result = await CuaFileSystemComponent(sandbox).write_file("hello.txt", "hello") + + assert result["success"] is False + assert result["stderr"] == "python3: command not found" + assert result["path"] == "hello.txt" + + @pytest.mark.asyncio async def test_cua_list_dir_shell_fallback_returns_filename_only_entries(): from astrbot.core.computer.booters.cua import CuaFileSystemComponent @@ -344,6 +373,20 @@ async def test_cua_shell_background_rejects_non_posix_os_type(): assert sandbox.shell.commands == [] +@pytest.mark.asyncio +async def test_cua_shell_background_reports_missing_python3_requirement(): + from astrbot.core.computer.booters.cua import CuaShellComponent + + sandbox = FakeSandbox() + sandbox.shell = FailingShell() + + result = await CuaShellComponent(sandbox).exec("firefox", background=True) + + assert result["success"] is False + assert "requires python3" in result["stderr"] + assert "python3: command not found" in result["stderr"] + + @pytest.mark.asyncio async def test_cua_gui_reports_missing_mouse_or_keyboard(): from astrbot.core.computer.booters.cua import CuaGUIComponent diff --git a/tests/unit/test_func_tool_manager.py b/tests/unit/test_func_tool_manager.py index d605ee709a..08ab45bcaf 100644 --- a/tests/unit/test_func_tool_manager.py +++ b/tests/unit/test_func_tool_manager.py @@ -6,8 +6,10 @@ from astrbot.core.provider.func_tool_manager import FunctionToolManager from astrbot.core.tools.computer_tools.shell import ExecuteShellTool from astrbot.core.tools.message_tools import SendMessageToUserTool -from astrbot.core.tools.web_search_tools import FirecrawlExtractWebPageTool -from astrbot.core.tools.web_search_tools import FirecrawlWebSearchTool +from astrbot.core.tools.web_search_tools import ( + FirecrawlExtractWebPageTool, + FirecrawlWebSearchTool, +) def test_get_builtin_tool_by_class_returns_cached_instance(): @@ -81,16 +83,62 @@ async def fake_get_booter(context, session_id): monkeypatch.setattr(shell_tools, "get_booter", fake_get_booter) - result = await ExecuteShellTool().call(FakeWrapper(), command="chromium https://example.com") + result = await ExecuteShellTool().call( + FakeWrapper(), command="chromium https://example.com" + ) assert json.loads(result)["success"] is True - assert calls == [ - {"command": "chromium https://example.com", "background": True} - ] + assert calls == [{"command": "chromium https://example.com", "background": True}] + + +@pytest.mark.asyncio +async def test_execute_shell_uses_fresh_default_env_per_call(monkeypatch): + from astrbot.core.tools.computer_tools import shell as shell_tools + + calls = [] + + class FakeShell: + async def exec(self, command, cwd=None, background=False, env=None): + env["MUTATED_BY_FAKE_SHELL"] = command + calls.append(env) + return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} + + class FakeBooter: + shell = FakeShell() + + class FakeConfig: + def get_config(self, umo): + return {"provider_settings": {"computer_use_runtime": "sandbox"}} + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + context = FakeConfig() + event = FakeEvent() + + class FakeWrapper: + context = FakeAstrContext() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(shell_tools, "get_booter", fake_get_booter) + tool = ExecuteShellTool() + + await tool.call(FakeWrapper(), command="first") + await tool.call(FakeWrapper(), command="second") + + assert calls[0] is not calls[1] + assert calls[0]["MUTATED_BY_FAKE_SHELL"] == "first" + assert calls[1] == {"MUTATED_BY_FAKE_SHELL": "second"} @pytest.mark.asyncio -async def test_execute_shell_avoids_double_background_for_detached_commands(monkeypatch): +async def test_execute_shell_avoids_double_background_for_detached_commands( + monkeypatch, +): from astrbot.core.tools.computer_tools import shell as shell_tools calls = [] From e1bc2c4905cd6c19fe414023d79079cbb59a9086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 23:27:52 +0900 Subject: [PATCH 19/33] fix: clarify CUA shell fallback support --- astrbot/core/computer/booters/cua.py | 179 ++++++++++-------- astrbot/core/computer/booters/cua_defaults.py | 17 ++ astrbot/core/computer/computer_client.py | 22 +-- astrbot/core/config/default.py | 13 +- tests/unit/test_cua_computer_use.py | 68 ++++++- 5 files changed, 194 insertions(+), 105 deletions(-) create mode 100644 astrbot/core/computer/booters/cua_defaults.py diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 32f9236bec..a794f77ab7 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -11,8 +11,28 @@ from ..olayer import FileSystemComponent, GUIComponent, PythonComponent, ShellComponent from .base import ComputerBooter +from .cua_defaults import CUA_CONFIG_KEYS, CUA_DEFAULT_CONFIG from .shipyard_search_file_util import search_files_via_shell +_POSIX_OS_TYPES = {"linux", "darwin", "macos"} + +_CUA_BACKGROUND_LAUNCHER = """ +import subprocess, sys, time + +p = subprocess.Popen( + ["sh", "-lc", sys.argv[1]], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True, +) +sys.stdout.write(str(p.pid) + "\\n") +sys.stdout.flush() +time.sleep(0.2) +code = p.poll() +sys.exit(0 if code is None else code) +""".strip() + async def _maybe_await(value: Any) -> Any: if inspect.isawaitable(value): @@ -20,6 +40,13 @@ async def _maybe_await(value: Any) -> Any: return value +def build_cua_booter_kwargs(sandbox_cfg: dict[str, Any]) -> dict[str, Any]: + return { + name: sandbox_cfg.get(config_key, CUA_DEFAULT_CONFIG[name]) + for name, config_key in CUA_CONFIG_KEYS.items() + } + + async def _write_base64_via_shell( shell: ShellComponent, path: str, @@ -116,6 +143,25 @@ def _is_missing_python3_error(stderr: str) -> bool: ) +def _python3_requirement_error(operation: str, stderr: str) -> str: + return f"CUA {operation} requires python3 in the sandbox image: {stderr}" + + +def _is_posix_os_type(os_type: str) -> bool: + return os_type.lower() in _POSIX_OS_TYPES + + +def _non_posix_filesystem_result(path: str, os_type: str) -> dict[str, Any]: + return { + "success": False, + "path": path, + "error": ( + "CUA filesystem shell fallback is only supported for POSIX images; " + f"os_type={os_type!r} does not support the required shell commands." + ), + } + + def _split_listing_entries(output: str) -> list[str]: return [line for line in output.splitlines() if line.strip()] @@ -170,7 +216,7 @@ async def exec( if env: kwargs["env"] = env if background: - if self._os_type not in {"linux", "darwin", "macos"}: + if not _is_posix_os_type(self._os_type): return { "stdout": "", "stderr": "error: background shell execution is only supported for POSIX CUA images.", @@ -185,7 +231,7 @@ async def exec( proc = _normalize_process_result(result) stderr = proc.stderr if background and stderr and _is_missing_python3_error(stderr): - stderr = f"CUA background execution requires python3 in the sandbox image: {stderr}" + stderr = _python3_requirement_error("background execution", stderr) response = { "stdout": proc.stdout, "stderr": stderr, @@ -201,17 +247,7 @@ async def exec( def _build_cua_background_command(command: str) -> str: - launcher = ( - "import subprocess,sys,time; " - 'p=subprocess.Popen(["sh","-lc",sys.argv[1]], ' - "stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, " - "stderr=subprocess.DEVNULL, start_new_session=True); " - "sys.stdout.write(str(p.pid)+chr(10)); sys.stdout.flush(); " - "time.sleep(0.2); " - "code=p.poll(); " - "sys.exit(0 if code is None else code)" - ) - return f"python3 -c {shlex.quote(launcher)} {shlex.quote(command)}" + return f"python3 -c {shlex.quote(_CUA_BACKGROUND_LAUNCHER)} {shlex.quote(command)}" class CuaPythonComponent(PythonComponent): @@ -234,9 +270,12 @@ async def exec( else: shell = CuaShellComponent(self._sandbox, os_type=self._os_type) result = await shell.exec(f"python3 - <<'PY'\n{code}\nPY", timeout=timeout) + error = result.get("stderr", "") + if error and _is_missing_python3_error(error): + error = _python3_requirement_error("Python execution fallback", error) proc = ProcessResult( stdout=result.get("stdout", ""), - stderr=result.get("stderr", ""), + stderr=error, exit_code=result.get("exit_code"), success=bool(result.get("success", False)), ) @@ -255,15 +294,24 @@ async def exec( def _write_result(path: str, result: dict[str, Any]) -> dict[str, Any]: + stderr = result.get("stderr", "") + if stderr and _is_missing_python3_error(stderr): + result = { + **result, + "stderr": _python3_requirement_error("filesystem write fallback", stderr), + } if result.get("stderr") or result.get("success") is False: return {"success": False, "path": path, **result} return {"success": True, "path": path, **result} -class _CuaFSAdapter: - def __init__(self, fs: Any | None, shell: CuaShellComponent) -> None: - self._fs = fs - self._shell = shell +class CuaFileSystemComponent(FileSystemComponent): + def __init__( + self, sandbox: Any, os_type: str = CUA_DEFAULT_CONFIG["os_type"] + ) -> None: + self._shell = CuaShellComponent(sandbox, os_type=os_type) + self._fs = getattr(sandbox, "filesystem", None) + self._os_type = os_type.lower() async def create_file( self, @@ -285,6 +333,8 @@ async def read_file( ) -> dict[str, Any]: read_file = None if self._fs is None else getattr(self._fs, "read_file", None) if read_file is None: + if not _is_posix_os_type(self._os_type): + return _non_posix_filesystem_result(path, self._os_type) result = await self._shell.exec(f"cat {path!r}") if result.get("stderr"): return {"success": False, "path": path, "error": result["stderr"]} @@ -311,6 +361,8 @@ async def write_file( _ = mode write_file = None if self._fs is None else getattr(self._fs, "write_file", None) if write_file is None: + if not _is_posix_os_type(self._os_type): + return _non_posix_filesystem_result(path, self._os_type) result = await _write_base64_via_shell( self._shell, path, content.encode(encoding) ) @@ -326,7 +378,11 @@ async def delete_file(self, path: str) -> dict[str, Any]: self._fs, "delete_file", None ) if delete is None: - await self._shell.exec(f"rm -rf {path!r}") + if not _is_posix_os_type(self._os_type): + return _non_posix_filesystem_result(path, self._os_type) + result = await self._shell.exec(f"rm -rf {path!r}") + if result.get("stderr"): + return {"success": False, "path": path, "error": result["stderr"]} else: await _maybe_await(delete(path)) return {"success": True, "path": path} @@ -340,47 +396,10 @@ async def list_dir( if list_dir is not None: entries = await _maybe_await(list_dir(path)) return {"success": True, "path": path, "entries": entries} + if not _is_posix_os_type(self._os_type): + return _non_posix_filesystem_result(path, self._os_type) return await _list_dir_via_shell(self._shell, path, show_hidden) - -async def _list_dir_via_shell( - shell: CuaShellComponent, - path: str, - show_hidden: bool, -) -> dict[str, Any]: - flags = "-1A" if show_hidden else "-1" - result = await shell.exec(f"ls {flags} {path!r}") - return { - "success": not bool(result.get("stderr")), - "path": path, - "entries": _split_listing_entries(result.get("stdout", "")), - "error": result.get("stderr", ""), - } - - -class CuaFileSystemComponent(FileSystemComponent): - def __init__(self, sandbox: Any, os_type: str = "linux") -> None: - self._shell = CuaShellComponent(sandbox, os_type=os_type) - fs = getattr(sandbox, "filesystem", None) - self._impl = _CuaFSAdapter(fs, self._shell) - - async def create_file( - self, - path: str, - content: str = "", - mode: int = 0o644, - ) -> dict[str, Any]: - return await self._impl.create_file(path, content, mode) - - async def read_file( - self, - path: str, - encoding: str = "utf-8", - offset: int | None = None, - limit: int | None = None, - ) -> dict[str, Any]: - return await self._impl.read_file(path, encoding, offset, limit) - async def search_files( self, pattern: str, @@ -389,6 +408,8 @@ async def search_files( after_context: int | None = None, before_context: int | None = None, ) -> dict[str, Any]: + if not _is_posix_os_type(self._os_type): + return _non_posix_filesystem_result(path or ".", self._os_type) return await search_files_via_shell( self._shell, pattern=pattern, @@ -425,24 +446,20 @@ async def edit_file( "replacements": occurrences if replace_all else 1, } - async def write_file( - self, - path: str, - content: str, - mode: str = "w", - encoding: str = "utf-8", - ) -> dict[str, Any]: - return await self._impl.write_file(path, content, mode, encoding) - - async def delete_file(self, path: str) -> dict[str, Any]: - return await self._impl.delete_file(path) - async def list_dir( - self, - path: str = ".", - show_hidden: bool = False, - ) -> dict[str, Any]: - return await self._impl.list_dir(path, show_hidden) +async def _list_dir_via_shell( + shell: CuaShellComponent, + path: str, + show_hidden: bool, +) -> dict[str, Any]: + flags = "-1A" if show_hidden else "-1" + result = await shell.exec(f"ls {flags} {path!r}") + return { + "success": not bool(result.get("stderr")), + "path": path, + "entries": _split_listing_entries(result.get("stdout", "")), + "error": result.get("stderr", ""), + } class CuaGUIComponent(GUIComponent): @@ -533,12 +550,12 @@ class _CuaRuntime: class CuaBooter(ComputerBooter): def __init__( self, - image: str = "linux", - os_type: str = "linux", - ttl: int = 3600, - telemetry_enabled: bool = False, - local: bool = True, - api_key: str = "", + image: str = CUA_DEFAULT_CONFIG["image"], + os_type: str = CUA_DEFAULT_CONFIG["os_type"], + ttl: int = CUA_DEFAULT_CONFIG["ttl"], + telemetry_enabled: bool = CUA_DEFAULT_CONFIG["telemetry_enabled"], + local: bool = CUA_DEFAULT_CONFIG["local"], + api_key: str = CUA_DEFAULT_CONFIG["api_key"], ) -> None: self.image = image self.os_type = os_type diff --git a/astrbot/core/computer/booters/cua_defaults.py b/astrbot/core/computer/booters/cua_defaults.py new file mode 100644 index 0000000000..4c506154ad --- /dev/null +++ b/astrbot/core/computer/booters/cua_defaults.py @@ -0,0 +1,17 @@ +CUA_DEFAULT_CONFIG = { + "image": "linux", + "os_type": "linux", + "ttl": 3600, + "telemetry_enabled": False, + "local": True, + "api_key": "", +} + +CUA_CONFIG_KEYS = { + "image": "cua_image", + "os_type": "cua_os_type", + "ttl": "cua_ttl", + "telemetry_enabled": "cua_telemetry_enabled", + "local": "cua_local", + "api_key": "cua_api_key", +} diff --git a/astrbot/core/computer/computer_client.py b/astrbot/core/computer/computer_client.py index 2f9895033c..61c64c88db 100644 --- a/astrbot/core/computer/computer_client.py +++ b/astrbot/core/computer/computer_client.py @@ -485,26 +485,14 @@ async def get_booter( ttl=ttl, ) elif booter_type == "cua": - from .booters.cua import CuaBooter - - image = sandbox_cfg.get("cua_image", "linux") - os_type = sandbox_cfg.get("cua_os_type", "linux") - ttl = sandbox_cfg.get("cua_ttl", 3600) - telemetry_enabled = sandbox_cfg.get("cua_telemetry_enabled", False) - local = sandbox_cfg.get("cua_local", True) - api_key = sandbox_cfg.get("cua_api_key", "") + from .booters.cua import CuaBooter, build_cua_booter_kwargs + cua_kwargs = build_cua_booter_kwargs(sandbox_cfg) logger.info( - f"[Computer] CUA config: image={image}, os_type={os_type}, ttl={ttl}" - ) - client = CuaBooter( - image=image, - os_type=os_type, - ttl=ttl, - telemetry_enabled=telemetry_enabled, - local=local, - api_key=api_key, + f"[Computer] CUA config: image={cua_kwargs['image']}, " + f"os_type={cua_kwargs['os_type']}, ttl={cua_kwargs['ttl']}" ) + client = CuaBooter(**cua_kwargs) elif booter_type == "boxlite": from .booters.boxlite import BoxliteBooter diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 05f9b345ae..70ba156200 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -3,6 +3,7 @@ import os from typing import Any, TypedDict +from astrbot.core.computer.booters.cua_defaults import CUA_DEFAULT_CONFIG from astrbot.core.utils.astrbot_path import get_astrbot_data_path VERSION = "4.23.5" @@ -175,12 +176,12 @@ "shipyard_neo_access_token": "", "shipyard_neo_profile": "python-default", "shipyard_neo_ttl": 3600, - "cua_image": "linux", - "cua_os_type": "linux", - "cua_ttl": 3600, - "cua_telemetry_enabled": False, - "cua_local": True, - "cua_api_key": "", + "cua_image": CUA_DEFAULT_CONFIG["image"], + "cua_os_type": CUA_DEFAULT_CONFIG["os_type"], + "cua_ttl": CUA_DEFAULT_CONFIG["ttl"], + "cua_telemetry_enabled": CUA_DEFAULT_CONFIG["telemetry_enabled"], + "cua_local": CUA_DEFAULT_CONFIG["local"], + "cua_api_key": CUA_DEFAULT_CONFIG["api_key"], }, "image_compress_enabled": True, "image_compress_options": { diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 0f128380e7..e951c2df7b 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -114,6 +114,12 @@ async def run(self, command: str, **kwargs): } +class SandboxWithoutFilesystem: + def __init__(self): + self.shell = FakeShell() + self.python = FakePython() + + class SyncPython: def run(self, code: str, **kwargs): return {"output": "sync", "error": ""} @@ -207,6 +213,30 @@ def ephemeral(image, local=None, api_key=None): assert kwargs == {"local": False, "api_key": "sk-test"} +def test_cua_default_config_matches_booter_defaults(): + from astrbot.core.computer.booters.cua import CUA_DEFAULT_CONFIG, CuaBooter + from astrbot.core.config.default import DEFAULT_CONFIG + + booter = CuaBooter() + sandbox_defaults = DEFAULT_CONFIG["provider_settings"]["sandbox"] + + assert booter.image == CUA_DEFAULT_CONFIG["image"] + assert booter.os_type == CUA_DEFAULT_CONFIG["os_type"] + assert booter.ttl == CUA_DEFAULT_CONFIG["ttl"] + assert booter.telemetry_enabled == CUA_DEFAULT_CONFIG["telemetry_enabled"] + assert booter.local == CUA_DEFAULT_CONFIG["local"] + assert booter.api_key == CUA_DEFAULT_CONFIG["api_key"] + assert sandbox_defaults["cua_image"] == CUA_DEFAULT_CONFIG["image"] + assert sandbox_defaults["cua_os_type"] == CUA_DEFAULT_CONFIG["os_type"] + assert sandbox_defaults["cua_ttl"] == CUA_DEFAULT_CONFIG["ttl"] + assert ( + sandbox_defaults["cua_telemetry_enabled"] + == CUA_DEFAULT_CONFIG["telemetry_enabled"] + ) + assert sandbox_defaults["cua_local"] == CUA_DEFAULT_CONFIG["local"] + assert sandbox_defaults["cua_api_key"] == CUA_DEFAULT_CONFIG["api_key"] + + @pytest.mark.asyncio async def test_cua_components_map_sdk_results(tmp_path): from astrbot.core.computer.booters.cua import ( @@ -282,7 +312,8 @@ async def test_cua_write_file_shell_fallback_propagates_shell_failure(): result = await CuaFileSystemComponent(sandbox).write_file("hello.txt", "hello") assert result["success"] is False - assert result["stderr"] == "python3: command not found" + assert "requires python3" in result["stderr"] + assert "python3: command not found" in result["stderr"] assert result["path"] == "hello.txt" @@ -300,6 +331,26 @@ async def test_cua_list_dir_shell_fallback_returns_filename_only_entries(): assert sandbox.shell.commands[0][0] == "ls -1A '.'" +@pytest.mark.asyncio +async def test_cua_shell_filesystem_fallback_rejects_non_posix_os_type(): + from astrbot.core.computer.booters.cua import CuaFileSystemComponent + + sandbox = SandboxWithoutFilesystem() + fs = CuaFileSystemComponent(sandbox, os_type="windows") + + read_result = await fs.read_file("hello.txt") + write_result = await fs.write_file("hello.txt", "hello") + delete_result = await fs.delete_file("hello.txt") + list_result = await fs.list_dir(".") + + for result in (read_result, write_result, delete_result, list_result): + assert result["success"] is False + assert ( + "filesystem shell fallback is only supported for POSIX" in result["error"] + ) + assert sandbox.shell.commands == [] + + @pytest.mark.asyncio async def test_cua_shell_and_python_accept_sync_sdk_methods(): from astrbot.core.computer.booters.cua import CuaPythonComponent, CuaShellComponent @@ -387,6 +438,21 @@ async def test_cua_shell_background_reports_missing_python3_requirement(): assert "python3: command not found" in result["stderr"] +@pytest.mark.asyncio +async def test_cua_python_fallback_reports_missing_python3_requirement(): + from astrbot.core.computer.booters.cua import CuaPythonComponent + + sandbox = SandboxWithoutFilesystem() + sandbox.shell = FailingShell() + delattr(sandbox, "python") + + result = await CuaPythonComponent(sandbox).exec("print('hello')") + + assert result["success"] is False + assert "requires python3" in result["error"] + assert "python3: command not found" in result["error"] + + @pytest.mark.asyncio async def test_cua_gui_reports_missing_mouse_or_keyboard(): from astrbot.core.computer.booters.cua import CuaGUIComponent From 4af592dd155a92c55a20e79bcce7c7becf5317f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 23:36:33 +0900 Subject: [PATCH 20/33] fix: harden CUA shell helpers --- astrbot/core/computer/booters/cua.py | 76 ++++++++++------------ astrbot/core/config/astrbot_config.py | 11 +--- astrbot/core/tools/computer_tools/shell.py | 32 ++++++++- tests/unit/test_config.py | 20 ++++++ tests/unit/test_cua_computer_use.py | 56 +++++++++++++++- tests/unit/test_func_tool_manager.py | 43 ++++++++++++ 6 files changed, 183 insertions(+), 55 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index a794f77ab7..1f959f97cb 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -84,17 +84,6 @@ def _maybe_model_dump(value: Any) -> dict[str, Any]: return {} -async def _call_first( - obj: Any, names: tuple[str, ...], *args: Any, **kwargs: Any -) -> Any: - for name in names: - method = getattr(obj, name, None) - if method is None: - continue - return await _maybe_await(method(*args, **kwargs)) - raise AttributeError(f"None of these methods exist: {', '.join(names)}") - - def _slice_content_by_lines( content: str, *, @@ -169,16 +158,20 @@ def _split_listing_entries(output: str) -> list[str]: def _require_component_method( root: Any, component_name: str, - method_name: str, + method_names: str | tuple[str, ...], ) -> Any: component = getattr(root, component_name, None) - method = getattr(component, method_name, None) if component is not None else None - if method is None: - raise RuntimeError( - f"CUA sandbox does not provide `{component_name}.{method_name}`. " - "Please check the installed CUA SDK version and sandbox backend." - ) - return method + names = (method_names,) if isinstance(method_names, str) else method_names + if component is not None: + for method_name in names: + method = getattr(component, method_name, None) + if method is not None: + return method + candidates = ", ".join(f"{component_name}.{name}" for name in names) + raise RuntimeError( + f"CUA sandbox does not provide any of: {candidates}. " + "Please check the installed CUA SDK version and sandbox backend." + ) def _has_component_method(root: Any, component_name: str, method_name: str) -> bool: @@ -190,6 +183,10 @@ class CuaShellComponent(ShellComponent): def __init__(self, sandbox: Any, os_type: str = "linux") -> None: self._sandbox = sandbox self._os_type = os_type.lower() + shell = sandbox.shell + self._exec_raw = getattr(shell, "exec", None) or getattr(shell, "run", None) + if self._exec_raw is None: + raise RuntimeError("CUA sandbox shell must provide `.exec` or `.run`.") async def exec( self, @@ -225,9 +222,7 @@ async def exec( } command = _build_cua_background_command(command) - result = await _call_first( - self._sandbox.shell, ("run", "exec"), command, **kwargs - ) + result = await _maybe_await(self._exec_raw(command, **kwargs)) proc = _normalize_process_result(result) stderr = proc.stderr if background and stderr and _is_missing_python3_error(stderr): @@ -254,6 +249,12 @@ class CuaPythonComponent(PythonComponent): def __init__(self, sandbox: Any, os_type: str = "linux") -> None: self._sandbox = sandbox self._os_type = os_type + python = getattr(sandbox, "python", None) + self._python_exec = None + if python is not None: + self._python_exec = getattr(python, "exec", None) or getattr( + python, "run", None + ) async def exec( self, @@ -263,9 +264,8 @@ async def exec( silent: bool = False, ) -> dict[str, Any]: _ = kernel_id - python = getattr(self._sandbox, "python", None) - if python is not None: - result = await _call_first(python, ("run", "exec"), code, timeout=timeout) + if self._python_exec is not None: + result = await _maybe_await(self._python_exec(code, timeout=timeout)) proc = _normalize_process_result(result) else: shell = CuaShellComponent(self._sandbox, os_type=self._os_type) @@ -321,8 +321,8 @@ async def create_file( ) -> dict[str, Any]: write_result = await self.write_file(path, content) if not write_result.get("success"): - return {**write_result, "mode": mode} - return {"success": True, "path": path, "mode": mode} + return {**write_result, "mode": mode, "mode_applied": False} + return {"success": True, "path": path, "mode": mode, "mode_applied": False} async def read_file( self, @@ -335,7 +335,7 @@ async def read_file( if read_file is None: if not _is_posix_os_type(self._os_type): return _non_posix_filesystem_result(path, self._os_type) - result = await self._shell.exec(f"cat {path!r}") + result = await self._shell.exec(f"cat {shlex.quote(path)}") if result.get("stderr"): return {"success": False, "path": path, "error": result["stderr"]} content = result.get("stdout", "") @@ -380,7 +380,7 @@ async def delete_file(self, path: str) -> dict[str, Any]: if delete is None: if not _is_posix_os_type(self._os_type): return _non_posix_filesystem_result(path, self._os_type) - result = await self._shell.exec(f"rm -rf {path!r}") + result = await self._shell.exec(f"rm -rf {shlex.quote(path)}") if result.get("stderr"): return {"success": False, "path": path, "error": result["stderr"]} else: @@ -453,7 +453,7 @@ async def _list_dir_via_shell( show_hidden: bool, ) -> dict[str, Any]: flags = "-1A" if show_hidden else "-1" - result = await shell.exec(f"ls {flags} {path!r}") + result = await shell.exec(f"ls {flags} {shlex.quote(path)}") return { "success": not bool(result.get("stderr")), "path": path, @@ -492,19 +492,9 @@ async def type_text(self, text: str) -> dict[str, Any]: return {"success": bool(payload.get("success", True)), **payload} async def press_key(self, key: str) -> dict[str, Any]: - keyboard = getattr(self._sandbox, "keyboard", None) - press = None - if keyboard is not None: - press = ( - getattr(keyboard, "press", None) - or getattr(keyboard, "key_press", None) - or getattr(keyboard, "press_key", None) - ) - if press is None: - raise RuntimeError( - "CUA sandbox does not provide `keyboard.press`. " - "Please check the installed CUA SDK version and sandbox backend." - ) + press = _require_component_method( + self._sandbox, "keyboard", ("press", "key_press", "press_key") + ) result = await _maybe_await(press(key)) payload = _maybe_model_dump(result) return {"success": bool(payload.get("success", True)), **payload} diff --git a/astrbot/core/config/astrbot_config.py b/astrbot/core/config/astrbot_config.py index 77c298cac8..1dd222929c 100644 --- a/astrbot/core/config/astrbot_config.py +++ b/astrbot/core/config/astrbot_config.py @@ -103,8 +103,7 @@ def check_config_integrity(self, refer_conf: dict, conf: dict, path=""): for key, value in refer_conf.items(): if key not in conf: # 配置项不存在,插入默认值 - path_ = path + "." + key if path else key - logger.info(f"检查到配置项 {path_} 不存在,已插入默认值 {value}") + logger.info("检查到配置项不存在,已插入默认值") new_conf[key] = value has_new = True elif conf[key] is None: @@ -133,16 +132,12 @@ def check_config_integrity(self, refer_conf: dict, conf: dict, path=""): # 检查是否存在参考配置中没有的配置项 for key in list(conf.keys()): if key not in refer_conf: - path_ = path + "." + key if path else key - logger.info(f"检查到配置项 {path_} 不存在,将从当前配置中删除") + logger.info("检查到未知配置项,将从当前配置中删除") has_new = True # 顺序不一致也算作变更 if list(conf.keys()) != list(new_conf.keys()): - if path: - logger.info(f"检查到配置项 {path} 的子项顺序不一致,已重新排序") - else: - logger.info("检查到配置项顺序不一致,已重新排序") + logger.info("检查到配置项顺序不一致,已重新排序") has_new = True # 更新原始配置 diff --git a/astrbot/core/tools/computer_tools/shell.py b/astrbot/core/tools/computer_tools/shell.py index 7ce6de4699..78d7eb289b 100644 --- a/astrbot/core/tools/computer_tools/shell.py +++ b/astrbot/core/tools/computer_tools/shell.py @@ -83,8 +83,36 @@ async def call( def _is_self_detached_command(command: str) -> bool: - stripped = command.strip() + stripped = _strip_shell_comment(command).strip() + lowered = stripped.lower() return ( - stripped.startswith("nohup ") + lowered.startswith("nohup ") + or lowered.startswith("setsid ") + or lowered.startswith("disown ") + or lowered.startswith("start ") + or lowered.startswith("start-process ") or re.search(r"(?:^|\s)&\s*$", stripped) is not None ) + + +def _strip_shell_comment(command: str) -> str: + in_single = False + in_double = False + escaped = False + for index, char in enumerate(command): + if escaped: + escaped = False + continue + if char == "\\" and not in_single: + escaped = True + continue + if char == "'" and not in_double: + in_single = not in_single + continue + if char == '"' and not in_single: + in_double = not in_double + continue + if char == "#" and not in_single and not in_double: + if index == 0 or command[index - 1].isspace(): + return command[:index] + return command diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 1da02835b1..394a086cb1 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -291,6 +291,26 @@ def test_nested_config_validation(self, temp_config_path): assert "level2" in config.nested["level1"] assert config.nested["level1"]["level2"]["value"] == 42 + def test_integrity_log_does_not_include_inserted_secret_value( + self, temp_config_path, monkeypatch + ): + """Default values may contain secrets and should not be logged.""" + from astrbot.core.config import astrbot_config + + existing_config = {} + default_config = {"api_key": "secret-value"} + messages = [] + with open(temp_config_path, "w", encoding="utf-8-sig") as f: + json.dump(existing_config, f) + + monkeypatch.setattr(astrbot_config.logger, "info", messages.append) + + AstrBotConfig(config_path=temp_config_path, default_config=default_config) + + assert messages + assert all("secret-value" not in message for message in messages) + assert all("api_key" not in message for message in messages) + class TestConfigHotReload: """Tests for config hot reload functionality.""" diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index e951c2df7b..ba436f0fa7 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -1,6 +1,7 @@ import asyncio import base64 import json +import shlex from pathlib import Path import mcp @@ -284,7 +285,25 @@ async def test_cua_list_dir_returns_entries_list_for_shell_fallback(): assert result["success"] is True assert result["entries"] == ["ok"] - assert sandbox.shell.commands[0][0] == "ls -1 '.'" + assert sandbox.shell.commands[0][0] == "ls -1 ." + + +@pytest.mark.asyncio +async def test_cua_shell_filesystem_fallback_shell_quotes_paths(): + from astrbot.core.computer.booters.cua import CuaFileSystemComponent + + path = "folder/it's file.txt" + sandbox = FakeSandbox() + delattr(sandbox, "filesystem") + fs = CuaFileSystemComponent(sandbox) + + await fs.read_file(path) + await fs.delete_file(path) + await fs.list_dir(path) + + assert sandbox.shell.commands[0][0] == f"cat {shlex.quote(path)}" + assert sandbox.shell.commands[1][0] == f"rm -rf {shlex.quote(path)}" + assert sandbox.shell.commands[2][0] == f"ls -1 {shlex.quote(path)}" @pytest.mark.asyncio @@ -301,6 +320,19 @@ async def test_cua_write_file_shell_fallback_uses_python_base64_decoder(): assert "base64 -d" not in command +@pytest.mark.asyncio +async def test_cua_create_file_reports_mode_as_informational(): + from astrbot.core.computer.booters.cua import CuaFileSystemComponent + + sandbox = FakeSandbox() + + result = await CuaFileSystemComponent(sandbox).create_file("hello.txt", mode=0o600) + + assert result["success"] is True + assert result["mode"] == 0o600 + assert result["mode_applied"] is False + + @pytest.mark.asyncio async def test_cua_write_file_shell_fallback_propagates_shell_failure(): from astrbot.core.computer.booters.cua import CuaFileSystemComponent @@ -328,7 +360,7 @@ async def test_cua_list_dir_shell_fallback_returns_filename_only_entries(): result = await CuaFileSystemComponent(sandbox).list_dir(".", show_hidden=True) assert result["entries"] == ["alpha.txt", "folder"] - assert sandbox.shell.commands[0][0] == "ls -1A '.'" + assert sandbox.shell.commands[0][0] == "ls -1A ." @pytest.mark.asyncio @@ -473,6 +505,24 @@ async def screenshot(self): await gui.press_key("Enter") +@pytest.mark.asyncio +async def test_cua_gui_press_error_lists_probed_methods(): + from astrbot.core.computer.booters.cua import CuaGUIComponent + + class SandboxWithoutPress: + keyboard = object() + + gui = CuaGUIComponent(SandboxWithoutPress()) + + with pytest.raises(RuntimeError) as exc_info: + await gui.press_key("Enter") + + message = str(exc_info.value) + assert "keyboard.press" in message + assert "keyboard.key_press" in message + assert "keyboard.press_key" in message + + def test_cua_capabilities_reflect_initialized_sandbox_gui_devices(): from astrbot.core.computer.booters.cua import ( CuaBooter, @@ -508,6 +558,8 @@ def set_runtime(booter, sandbox): ) class ScreenshotOnlySandbox: + shell = FakeShell() + async def screenshot(self): return b"fake-png" diff --git a/tests/unit/test_func_tool_manager.py b/tests/unit/test_func_tool_manager.py index 08ab45bcaf..74c6964af3 100644 --- a/tests/unit/test_func_tool_manager.py +++ b/tests/unit/test_func_tool_manager.py @@ -180,6 +180,49 @@ async def fake_get_booter(context, session_id): assert calls == [{"command": command, "background": False}] +@pytest.mark.asyncio +async def test_execute_shell_recognizes_commented_background_command(monkeypatch): + from astrbot.core.tools.computer_tools import shell as shell_tools + + calls = [] + + class FakeShell: + async def exec(self, command, cwd=None, background=False, env=None): + calls.append({"command": command, "background": background}) + return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} + + class FakeBooter: + shell = FakeShell() + + class FakeConfig: + def get_config(self, umo): + return {"provider_settings": {"computer_use_runtime": "sandbox"}} + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + context = FakeConfig() + event = FakeEvent() + + class FakeWrapper: + context = FakeAstrContext() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(shell_tools, "get_booter", fake_get_booter) + + command = "firefox & # already detached" + result = await ExecuteShellTool().call( + FakeWrapper(), command=command, background=True + ) + + assert json.loads(result)["success"] is True + assert calls == [{"command": command, "background": False}] + + @pytest.mark.asyncio async def test_execute_shell_reports_blank_exception_type(monkeypatch): from astrbot.core.tools.computer_tools import shell as shell_tools From 19d78965f209735af01744c35150f942b0aeb946 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 23:48:07 +0900 Subject: [PATCH 21/33] fix: guard CUA file fallbacks --- astrbot/core/computer/booters/cua.py | 4 +- astrbot/core/config/astrbot_config.py | 11 ++- tests/unit/test_config.py | 2 +- tests/unit/test_cua_computer_use.py | 119 ++++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 5 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 1f959f97cb..86463f2b87 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -665,6 +665,8 @@ async def upload_file(self, path: str, file_name: str) -> dict: return _maybe_model_dump( await sandbox.upload_file(str(local_path), file_name) ) + if not _is_posix_os_type(self.os_type): + return _non_posix_filesystem_result(file_name, self.os_type) result = await _write_base64_via_shell( self.shell, file_name, local_path.read_bytes() ) @@ -679,7 +681,7 @@ async def download_file(self, remote_path: str, local_path: str) -> None: if sandbox is not None and hasattr(sandbox, "download_file"): await sandbox.download_file(remote_path, local_path) return - result = await self.shell.exec(f"base64 {remote_path!r}") + result = await self.shell.exec(f"base64 {shlex.quote(remote_path)}") if result.get("stderr"): raise RuntimeError(result["stderr"]) Path(local_path).parent.mkdir(parents=True, exist_ok=True) diff --git a/astrbot/core/config/astrbot_config.py b/astrbot/core/config/astrbot_config.py index 1dd222929c..3b04de3eb6 100644 --- a/astrbot/core/config/astrbot_config.py +++ b/astrbot/core/config/astrbot_config.py @@ -103,7 +103,8 @@ def check_config_integrity(self, refer_conf: dict, conf: dict, path=""): for key, value in refer_conf.items(): if key not in conf: # 配置项不存在,插入默认值 - logger.info("检查到配置项不存在,已插入默认值") + path_ = path + "." + key if path else key + logger.info(f"检查到配置项 {path_} 不存在,已插入默认值") new_conf[key] = value has_new = True elif conf[key] is None: @@ -132,12 +133,16 @@ def check_config_integrity(self, refer_conf: dict, conf: dict, path=""): # 检查是否存在参考配置中没有的配置项 for key in list(conf.keys()): if key not in refer_conf: - logger.info("检查到未知配置项,将从当前配置中删除") + path_ = path + "." + key if path else key + logger.info(f"检查到未知配置项 {path_},将从当前配置中删除") has_new = True # 顺序不一致也算作变更 if list(conf.keys()) != list(new_conf.keys()): - logger.info("检查到配置项顺序不一致,已重新排序") + if path: + logger.info(f"检查到配置项 {path} 的子项顺序不一致,已重新排序") + else: + logger.info("检查到配置项顺序不一致,已重新排序") has_new = True # 更新原始配置 diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 394a086cb1..fc36261621 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -309,7 +309,7 @@ def test_integrity_log_does_not_include_inserted_secret_value( assert messages assert all("secret-value" not in message for message in messages) - assert all("api_key" not in message for message in messages) + assert any("api_key" in message for message in messages) class TestConfigHotReload: diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index ba436f0fa7..793e3f6ea2 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -238,6 +238,54 @@ def test_cua_default_config_matches_booter_defaults(): assert sandbox_defaults["cua_api_key"] == CUA_DEFAULT_CONFIG["api_key"] +@pytest.mark.asyncio +async def test_cua_config_log_does_not_include_api_key(monkeypatch): + from astrbot.core.computer import computer_client + + log_messages = [] + + class FakeCuaBooter: + def __init__(self, **kwargs): + self.kwargs = kwargs + + async def boot(self, session_id: str): + self.session_id = session_id + + async def available(self): + return True + + monkeypatch.setattr( + computer_client, "_sync_skills_to_sandbox", lambda booter: asyncio.sleep(0) + ) + monkeypatch.setitem(computer_client.session_booter, "cua-log-test", None) + computer_client.session_booter.pop("cua-log-test", None) + monkeypatch.setattr( + "astrbot.core.computer.booters.cua.CuaBooter", + FakeCuaBooter, + raising=False, + ) + monkeypatch.setattr(computer_client.logger, "info", log_messages.append) + + ctx = FakeContext( + { + "provider_settings": { + "computer_use_runtime": "sandbox", + "sandbox": { + "booter": "cua", + "cua_local": False, + "cua_api_key": "sk-secret-value", + }, + } + } + ) + + await computer_client.get_booter(ctx, "cua-log-test") + + assert log_messages + assert all("sk-secret-value" not in message for message in log_messages) + assert all("api_key" not in message for message in log_messages) + + @pytest.mark.asyncio async def test_cua_components_map_sdk_results(tmp_path): from astrbot.core.computer.booters.cua import ( @@ -456,6 +504,77 @@ async def test_cua_shell_background_rejects_non_posix_os_type(): assert sandbox.shell.commands == [] +@pytest.mark.asyncio +async def test_cua_upload_file_fallback_rejects_non_posix_os_type(tmp_path): + from astrbot.core.computer.booters.cua import ( + CuaBooter, + CuaFileSystemComponent, + CuaGUIComponent, + CuaPythonComponent, + CuaShellComponent, + _CuaRuntime, + ) + + local_file = tmp_path / "upload.txt" + local_file.write_text("hello", encoding="utf-8") + sandbox = SandboxWithoutFilesystem() + booter = CuaBooter(os_type="windows") + booter._runtime = _CuaRuntime( + sandbox_cm=object(), + sandbox=sandbox, + shell=CuaShellComponent(sandbox, os_type="windows"), + python=CuaPythonComponent(sandbox, os_type="windows"), + fs=CuaFileSystemComponent(sandbox, os_type="windows"), + gui=CuaGUIComponent(sandbox), + ) + + result = await booter.upload_file(str(local_file), "remote.txt") + + assert result["success"] is False + assert "filesystem shell fallback is only supported for POSIX" in result["error"] + assert sandbox.shell.commands == [] + + +@pytest.mark.asyncio +async def test_cua_download_file_shell_quotes_remote_path(tmp_path): + from astrbot.core.computer.booters.cua import ( + CuaBooter, + CuaFileSystemComponent, + CuaGUIComponent, + CuaPythonComponent, + CuaShellComponent, + _CuaRuntime, + ) + + class Base64Shell(FakeShell): + async def run(self, command: str, **kwargs): + self.commands.append((command, kwargs)) + return { + "stdout": base64.b64encode(b"hello").decode(), + "stderr": "", + "exit_code": 0, + } + + sandbox = SandboxWithoutFilesystem() + sandbox.shell = Base64Shell() + booter = CuaBooter() + booter._runtime = _CuaRuntime( + sandbox_cm=object(), + sandbox=sandbox, + shell=CuaShellComponent(sandbox), + python=CuaPythonComponent(sandbox), + fs=CuaFileSystemComponent(sandbox), + gui=CuaGUIComponent(sandbox), + ) + remote_path = "folder/it's file.txt" + local_path = tmp_path / "download.txt" + + await booter.download_file(remote_path, str(local_path)) + + assert sandbox.shell.commands[0][0] == f"base64 {shlex.quote(remote_path)}" + assert local_path.read_bytes() == b"hello" + + @pytest.mark.asyncio async def test_cua_shell_background_reports_missing_python3_requirement(): from astrbot.core.computer.booters.cua import CuaShellComponent From 0e985a775f55dfda05451b07ec1c117277109592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 23:53:27 +0900 Subject: [PATCH 22/33] fix: redact sensitive config log paths --- astrbot/core/config/astrbot_config.py | 36 ++++++++++++++++++++++++--- tests/unit/test_config.py | 22 +++++++++++++++- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/astrbot/core/config/astrbot_config.py b/astrbot/core/config/astrbot_config.py index 3b04de3eb6..9ad0dc4145 100644 --- a/astrbot/core/config/astrbot_config.py +++ b/astrbot/core/config/astrbot_config.py @@ -10,6 +10,30 @@ ASTRBOT_CONFIG_PATH = os.path.join(get_astrbot_data_path(), "cmd_config.json") logger = logging.getLogger("astrbot") +SENSITIVE_CONFIG_PATH_PARTS = ( + "api_key", + "apikey", + "access_token", + "auth_token", + "token", + "password", + "passwd", + "secret", + "credential", + "credentials", +) + + +def _redact_config_path(path: str) -> str: + parts = path.split(".") + redacted = [ + "[REDACTED]" + if any(sensitive in part.lower() for sensitive in SENSITIVE_CONFIG_PATH_PARTS) + else part + for part in parts + ] + return ".".join(redacted) + class RateLimitStrategy(enum.Enum): STALL = "stall" @@ -104,7 +128,9 @@ def check_config_integrity(self, refer_conf: dict, conf: dict, path=""): if key not in conf: # 配置项不存在,插入默认值 path_ = path + "." + key if path else key - logger.info(f"检查到配置项 {path_} 不存在,已插入默认值") + logger.info( + f"检查到配置项 {_redact_config_path(path_)} 不存在,已插入默认值" + ) new_conf[key] = value has_new = True elif conf[key] is None: @@ -134,13 +160,17 @@ def check_config_integrity(self, refer_conf: dict, conf: dict, path=""): for key in list(conf.keys()): if key not in refer_conf: path_ = path + "." + key if path else key - logger.info(f"检查到未知配置项 {path_},将从当前配置中删除") + logger.info( + f"检查到未知配置项 {_redact_config_path(path_)},将从当前配置中删除" + ) has_new = True # 顺序不一致也算作变更 if list(conf.keys()) != list(new_conf.keys()): if path: - logger.info(f"检查到配置项 {path} 的子项顺序不一致,已重新排序") + logger.info( + f"检查到配置项 {_redact_config_path(path)} 的子项顺序不一致,已重新排序" + ) else: logger.info("检查到配置项顺序不一致,已重新排序") has_new = True diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index fc36261621..773dd084b2 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -309,7 +309,27 @@ def test_integrity_log_does_not_include_inserted_secret_value( assert messages assert all("secret-value" not in message for message in messages) - assert any("api_key" in message for message in messages) + assert all("api_key" not in message for message in messages) + assert any("[REDACTED]" in message for message in messages) + + def test_integrity_log_keeps_non_sensitive_config_path( + self, temp_config_path, monkeypatch + ): + """Non-sensitive paths remain useful for troubleshooting.""" + from astrbot.core.config import astrbot_config + + existing_config = {"provider_settings": {}} + default_config = {"provider_settings": {"enable": True}} + messages = [] + with open(temp_config_path, "w", encoding="utf-8-sig") as f: + json.dump(existing_config, f) + + monkeypatch.setattr(astrbot_config.logger, "info", messages.append) + + AstrBotConfig(config_path=temp_config_path, default_config=default_config) + + assert any("provider_settings.enable" in message for message in messages) + assert all("[REDACTED]" not in message for message in messages) class TestConfigHotReload: From 0a1790b2ec01515b521e141ec151ca01c1a669ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Mon, 27 Apr 2026 23:58:05 +0900 Subject: [PATCH 23/33] fix: guard CUA download fallback --- astrbot/core/computer/booters/cua.py | 3 + astrbot/core/config/astrbot_config.py | 41 +-------- tests/unit/test_config.py | 21 +---- tests/unit/test_cua_computer_use.py | 125 ++++++++++++++++++++++++++ 4 files changed, 132 insertions(+), 58 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 86463f2b87..5abcce35b2 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -681,6 +681,9 @@ async def download_file(self, remote_path: str, local_path: str) -> None: if sandbox is not None and hasattr(sandbox, "download_file"): await sandbox.download_file(remote_path, local_path) return + if not _is_posix_os_type(self.os_type): + result = _non_posix_filesystem_result(remote_path, self.os_type) + raise RuntimeError(result["error"]) result = await self.shell.exec(f"base64 {shlex.quote(remote_path)}") if result.get("stderr"): raise RuntimeError(result["stderr"]) diff --git a/astrbot/core/config/astrbot_config.py b/astrbot/core/config/astrbot_config.py index 9ad0dc4145..1dd222929c 100644 --- a/astrbot/core/config/astrbot_config.py +++ b/astrbot/core/config/astrbot_config.py @@ -10,30 +10,6 @@ ASTRBOT_CONFIG_PATH = os.path.join(get_astrbot_data_path(), "cmd_config.json") logger = logging.getLogger("astrbot") -SENSITIVE_CONFIG_PATH_PARTS = ( - "api_key", - "apikey", - "access_token", - "auth_token", - "token", - "password", - "passwd", - "secret", - "credential", - "credentials", -) - - -def _redact_config_path(path: str) -> str: - parts = path.split(".") - redacted = [ - "[REDACTED]" - if any(sensitive in part.lower() for sensitive in SENSITIVE_CONFIG_PATH_PARTS) - else part - for part in parts - ] - return ".".join(redacted) - class RateLimitStrategy(enum.Enum): STALL = "stall" @@ -127,10 +103,7 @@ def check_config_integrity(self, refer_conf: dict, conf: dict, path=""): for key, value in refer_conf.items(): if key not in conf: # 配置项不存在,插入默认值 - path_ = path + "." + key if path else key - logger.info( - f"检查到配置项 {_redact_config_path(path_)} 不存在,已插入默认值" - ) + logger.info("检查到配置项不存在,已插入默认值") new_conf[key] = value has_new = True elif conf[key] is None: @@ -159,20 +132,12 @@ def check_config_integrity(self, refer_conf: dict, conf: dict, path=""): # 检查是否存在参考配置中没有的配置项 for key in list(conf.keys()): if key not in refer_conf: - path_ = path + "." + key if path else key - logger.info( - f"检查到未知配置项 {_redact_config_path(path_)},将从当前配置中删除" - ) + logger.info("检查到未知配置项,将从当前配置中删除") has_new = True # 顺序不一致也算作变更 if list(conf.keys()) != list(new_conf.keys()): - if path: - logger.info( - f"检查到配置项 {_redact_config_path(path)} 的子项顺序不一致,已重新排序" - ) - else: - logger.info("检查到配置项顺序不一致,已重新排序") + logger.info("检查到配置项顺序不一致,已重新排序") has_new = True # 更新原始配置 diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 773dd084b2..c49e97ca0f 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -310,26 +310,7 @@ def test_integrity_log_does_not_include_inserted_secret_value( assert messages assert all("secret-value" not in message for message in messages) assert all("api_key" not in message for message in messages) - assert any("[REDACTED]" in message for message in messages) - - def test_integrity_log_keeps_non_sensitive_config_path( - self, temp_config_path, monkeypatch - ): - """Non-sensitive paths remain useful for troubleshooting.""" - from astrbot.core.config import astrbot_config - - existing_config = {"provider_settings": {}} - default_config = {"provider_settings": {"enable": True}} - messages = [] - with open(temp_config_path, "w", encoding="utf-8-sig") as f: - json.dump(existing_config, f) - - monkeypatch.setattr(astrbot_config.logger, "info", messages.append) - - AstrBotConfig(config_path=temp_config_path, default_config=default_config) - - assert any("provider_settings.enable" in message for message in messages) - assert all("[REDACTED]" not in message for message in messages) + assert any("配置项不存在" in message for message in messages) class TestConfigHotReload: diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 793e3f6ea2..4e292707d4 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -575,6 +575,34 @@ async def run(self, command: str, **kwargs): assert local_path.read_bytes() == b"hello" +@pytest.mark.asyncio +async def test_cua_download_file_fallback_rejects_non_posix_os_type(tmp_path): + from astrbot.core.computer.booters.cua import ( + CuaBooter, + CuaFileSystemComponent, + CuaGUIComponent, + CuaPythonComponent, + CuaShellComponent, + _CuaRuntime, + ) + + sandbox = SandboxWithoutFilesystem() + booter = CuaBooter(os_type="windows") + booter._runtime = _CuaRuntime( + sandbox_cm=object(), + sandbox=sandbox, + shell=CuaShellComponent(sandbox, os_type="windows"), + python=CuaPythonComponent(sandbox, os_type="windows"), + fs=CuaFileSystemComponent(sandbox, os_type="windows"), + gui=CuaGUIComponent(sandbox), + ) + + with pytest.raises(RuntimeError, match="filesystem shell fallback"): + await booter.download_file("remote.txt", str(tmp_path / "download.txt")) + + assert sandbox.shell.commands == [] + + @pytest.mark.asyncio async def test_cua_shell_background_reports_missing_python3_requirement(): from astrbot.core.computer.booters.cua import CuaShellComponent @@ -1016,3 +1044,100 @@ async def fail_gui_lookup(context): assert await CuaMouseClickTool().call(FakeWrapper(), x=1, y=2) == ( "Error clicking CUA desktop: BlankError" ) + + +@pytest.mark.asyncio +async def test_cua_mouse_click_tool_happy_path_forwards_args_and_serializes_json( + monkeypatch, +): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaMouseClickTool + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext( + {"provider_settings": {"computer_use_require_admin": True}} + ) + + class FakeWrapper: + context = FakeAstrContext() + + class FakeGui: + def __init__(self): + self.clicked_args = None + + async def click(self, x: int, y: int, button: str = "left"): + self.clicked_args = (x, y, button) + return {"status": "ok", "x": x, "y": y, "button": button} + + fake_gui = FakeGui() + get_gui_called = {"value": False} + wrapper = FakeWrapper() + + async def fake_get_gui_component(context): + get_gui_called["value"] = True + assert context is wrapper + return fake_gui + + monkeypatch.setattr(cua_tools, "_get_gui_component", fake_get_gui_component) + + result = await CuaMouseClickTool().call(wrapper, x=10, y=20, button="right") + + assert get_gui_called["value"] is True + assert fake_gui.clicked_args == (10, 20, "right") + assert json.loads(result) == { + "status": "ok", + "x": 10, + "y": 20, + "button": "right", + } + + +@pytest.mark.asyncio +async def test_cua_keyboard_type_tool_happy_path_forwards_args_and_serializes_json( + monkeypatch, +): + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaKeyboardTypeTool + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext( + {"provider_settings": {"computer_use_require_admin": True}} + ) + + class FakeWrapper: + context = FakeAstrContext() + + class FakeGui: + def __init__(self): + self.typed_text_args = None + + async def type_text(self, text: str): + self.typed_text_args = (text,) + return {"status": "ok", "text": text} + + fake_gui = FakeGui() + get_gui_called = {"value": False} + wrapper = FakeWrapper() + + async def fake_get_gui_component(context): + get_gui_called["value"] = True + assert context is wrapper + return fake_gui + + monkeypatch.setattr(cua_tools, "_get_gui_component", fake_get_gui_component) + + result = await CuaKeyboardTypeTool().call(wrapper, text="Hello CUA") + + assert get_gui_called["value"] is True + assert fake_gui.typed_text_args == ("Hello CUA",) + assert json.loads(result) == {"status": "ok", "text": "Hello CUA"} From 89aad6543d574c2607cf8ea8f1c3a67ec886ea46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Tue, 28 Apr 2026 00:02:16 +0900 Subject: [PATCH 24/33] test: cover CUA GUI and shell env wiring --- astrbot/core/computer/booters/cua.py | 53 ++++++++++++++++++---------- tests/unit/test_cua_computer_use.py | 32 +++++++++++++++++ tests/unit/test_func_tool_manager.py | 42 ++++++++++++++++++++++ 3 files changed, 109 insertions(+), 18 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 5abcce35b2..6a55b18fcd 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -155,20 +155,27 @@ def _split_listing_entries(output: str) -> list[str]: return [line for line in output.splitlines() if line.strip()] -def _require_component_method( - root: Any, +def _resolve_component_method( + component: Any, + method_names: str | tuple[str, ...], +) -> Any | None: + if component is None: + return None + names = (method_names,) if isinstance(method_names, str) else method_names + for method_name in names: + method = getattr(component, method_name, None) + if method is not None: + return method + return None + + +def _missing_component_method_error( component_name: str, method_names: str | tuple[str, ...], -) -> Any: - component = getattr(root, component_name, None) +) -> RuntimeError: names = (method_names,) if isinstance(method_names, str) else method_names - if component is not None: - for method_name in names: - method = getattr(component, method_name, None) - if method is not None: - return method candidates = ", ".join(f"{component_name}.{name}" for name in names) - raise RuntimeError( + return RuntimeError( f"CUA sandbox does not provide any of: {candidates}. " "Please check the installed CUA SDK version and sandbox backend." ) @@ -465,6 +472,13 @@ async def _list_dir_via_shell( class CuaGUIComponent(GUIComponent): def __init__(self, sandbox: Any) -> None: self._sandbox = sandbox + mouse = getattr(sandbox, "mouse", None) + keyboard = getattr(sandbox, "keyboard", None) + self._click = _resolve_component_method(mouse, "click") + self._type_text = _resolve_component_method(keyboard, "type") + self._press_key = _resolve_component_method( + keyboard, ("press", "key_press", "press_key") + ) async def screenshot(self, path: str | None = None) -> dict[str, Any]: raw = await self._sandbox.screenshot() @@ -480,22 +494,25 @@ async def screenshot(self, path: str | None = None) -> dict[str, Any]: } async def click(self, x: int, y: int, button: str = "left") -> dict[str, Any]: - click = _require_component_method(self._sandbox, "mouse", "click") - result = await _maybe_await(click(x, y, button=button)) + if self._click is None: + raise _missing_component_method_error("mouse", "click") + result = await _maybe_await(self._click(x, y, button=button)) payload = _maybe_model_dump(result) return {"success": bool(payload.get("success", True)), **payload} async def type_text(self, text: str) -> dict[str, Any]: - type_text = _require_component_method(self._sandbox, "keyboard", "type") - result = await _maybe_await(type_text(text)) + if self._type_text is None: + raise _missing_component_method_error("keyboard", "type") + result = await _maybe_await(self._type_text(text)) payload = _maybe_model_dump(result) return {"success": bool(payload.get("success", True)), **payload} async def press_key(self, key: str) -> dict[str, Any]: - press = _require_component_method( - self._sandbox, "keyboard", ("press", "key_press", "press_key") - ) - result = await _maybe_await(press(key)) + if self._press_key is None: + raise _missing_component_method_error( + "keyboard", ("press", "key_press", "press_key") + ) + result = await _maybe_await(self._press_key(key)) payload = _maybe_model_dump(result) return {"success": bool(payload.get("success", True)), **payload} diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 4e292707d4..e6d6503955 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -670,6 +670,38 @@ class SandboxWithoutPress: assert "keyboard.press_key" in message +@pytest.mark.asyncio +async def test_cua_gui_caches_component_methods_after_initialization(): + from astrbot.core.computer.booters.cua import CuaGUIComponent + + class CountingMouse: + def __init__(self): + self.click_lookups = 0 + self.clicks = [] + + def __getattribute__(self, name): + if name == "click": + object.__getattribute__(self, "__dict__")["click_lookups"] += 1 + return object.__getattribute__(self, name) + + async def click(self, x: int, y: int, button: str = "left"): + self.clicks.append((x, y, button)) + return {"success": True} + + class Sandbox: + def __init__(self): + self.mouse = CountingMouse() + + sandbox = Sandbox() + gui = CuaGUIComponent(sandbox) + + await gui.click(1, 2) + await gui.click(3, 4, button="right") + + assert sandbox.mouse.click_lookups == 1 + assert sandbox.mouse.clicks == [(1, 2, "left"), (3, 4, "right")] + + def test_cua_capabilities_reflect_initialized_sandbox_gui_devices(): from astrbot.core.computer.booters.cua import ( CuaBooter, diff --git a/tests/unit/test_func_tool_manager.py b/tests/unit/test_func_tool_manager.py index 74c6964af3..69949351f7 100644 --- a/tests/unit/test_func_tool_manager.py +++ b/tests/unit/test_func_tool_manager.py @@ -135,6 +135,48 @@ async def fake_get_booter(context, session_id): assert calls[1] == {"MUTATED_BY_FAKE_SHELL": "second"} +@pytest.mark.asyncio +async def test_execute_shell_copies_user_env_before_execution(monkeypatch): + from astrbot.core.tools.computer_tools import shell as shell_tools + + calls = [] + + class FakeShell: + async def exec(self, command, cwd=None, background=False, env=None): + env["MUTATED_BY_FAKE_SHELL"] = command + calls.append(env) + return {"success": True, "stdout": "", "stderr": "", "exit_code": 0} + + class FakeBooter: + shell = FakeShell() + + class FakeConfig: + def get_config(self, umo): + return {"provider_settings": {"computer_use_runtime": "sandbox"}} + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + class FakeAstrContext: + context = FakeConfig() + event = FakeEvent() + + class FakeWrapper: + context = FakeAstrContext() + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(shell_tools, "get_booter", fake_get_booter) + original_env = {"FOO": "bar"} + + await ExecuteShellTool().call(FakeWrapper(), command="first", env=original_env) + + assert original_env == {"FOO": "bar"} + assert calls == [{"FOO": "bar", "MUTATED_BY_FAKE_SHELL": "first"}] + + @pytest.mark.asyncio async def test_execute_shell_avoids_double_background_for_detached_commands( monkeypatch, From 5cc921d09d9369d8b7f9f397c74426bc7ae1b2c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Tue, 28 Apr 2026 00:15:40 +0900 Subject: [PATCH 25/33] fix: preserve CUA command result output --- astrbot/core/computer/booters/cua.py | 20 +++++++++++- tests/unit/test_cua_computer_use.py | 48 ++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 6a55b18fcd..c32b77651e 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -3,7 +3,7 @@ import base64 import inspect import shlex -from dataclasses import dataclass +from dataclasses import asdict, dataclass, is_dataclass from pathlib import Path from typing import Any @@ -73,6 +73,8 @@ class ProcessResult: def _maybe_model_dump(value: Any) -> dict[str, Any]: if isinstance(value, dict): return value + if is_dataclass(value) and not isinstance(value, type): + return asdict(value) if hasattr(value, "model_dump"): dumped = value.model_dump() if isinstance(dumped, dict): @@ -81,6 +83,22 @@ def _maybe_model_dump(value: Any) -> dict[str, Any]: dumped = value.dict() if isinstance(dumped, dict): return dumped + attr_payload = { + key: getattr(value, key) + for key in ( + "stdout", + "stderr", + "output", + "error", + "returncode", + "return_code", + "exit_code", + "success", + ) + if hasattr(value, key) + } + if attr_payload: + return attr_payload return {} diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index e6d6503955..45b95db05d 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -34,6 +34,22 @@ async def run(self, command: str, **kwargs): return {"output": "shape-ok", "returncode": 0} +class CommandResultShapeShell: + def __init__(self, stdout: str = "shape-ok", stderr: str = "", returncode: int = 0): + self.commands = [] + self.stdout = stdout + self.stderr = stderr + self.returncode = returncode + + @property + def success(self): + return self.returncode == 0 + + async def run(self, command: str, **kwargs): + self.commands.append((command, kwargs)) + return self + + class FakePython: async def run(self, code: str, **kwargs): return {"output": "42", "error": ""} @@ -463,6 +479,38 @@ async def test_cua_shell_normalizes_output_returncode_shape(): } +@pytest.mark.asyncio +async def test_cua_shell_normalizes_command_result_object_shape(): + from astrbot.core.computer.booters.cua import CuaShellComponent + + sandbox = FakeSandbox() + sandbox.shell = CommandResultShapeShell(stdout="hello\n", returncode=0) + + result = await CuaShellComponent(sandbox).exec("echo hello") + + assert result == { + "stdout": "hello\n", + "stderr": "", + "exit_code": 0, + "success": True, + } + + +@pytest.mark.asyncio +async def test_cua_python_fallback_preserves_shell_command_result_stdout(): + from astrbot.core.computer.booters.cua import CuaPythonComponent + + sandbox = SandboxWithoutFilesystem() + sandbox.shell = CommandResultShapeShell(stdout="from python fallback\n") + delattr(sandbox, "python") + + result = await CuaPythonComponent(sandbox).exec("print('from python fallback')") + + assert result["success"] is True + assert result["output"] == "from python fallback\n" + assert result["data"]["output"]["text"] == "from python fallback\n" + + @pytest.mark.asyncio async def test_cua_shell_background_wrapper_detaches_via_python_subprocess(): from astrbot.core.computer.booters.cua import CuaShellComponent From 8ffc2ce7cfe5b0799854ec9c9a315ee6cbd6ce18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Tue, 28 Apr 2026 00:22:16 +0900 Subject: [PATCH 26/33] fix: normalize CUA return codes --- astrbot/core/computer/booters/cua.py | 10 +++++++--- astrbot/core/tools/computer_tools/shell.py | 3 ++- tests/unit/test_cua_computer_use.py | 22 ++++++++++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index c32b77651e..eb8dea8ae9 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -129,9 +129,13 @@ def _normalize_process_result(raw: Any) -> ProcessResult: stdout = _result_text(payload, "stdout", "output") stderr = _result_text(payload, "stderr", "error") - exit_code = payload.get( - "exit_code", payload.get("returncode", 0 if not stderr else 1) - ) + exit_code = payload.get("exit_code") + if exit_code is None: + exit_code = payload.get("returncode") + if exit_code is None: + exit_code = payload.get("return_code") + if exit_code is None: + exit_code = 0 if not stderr else 1 success = bool(payload.get("success", not stderr and exit_code in (0, None))) return ProcessResult( stdout=stdout, diff --git a/astrbot/core/tools/computer_tools/shell.py b/astrbot/core/tools/computer_tools/shell.py index 78d7eb289b..e2a0390e24 100644 --- a/astrbot/core/tools/computer_tools/shell.py +++ b/astrbot/core/tools/computer_tools/shell.py @@ -69,12 +69,13 @@ async def call( current_workspace_root.mkdir(parents=True, exist_ok=True) cwd = str(current_workspace_root) + env = dict(env or {}) effective_background = background and not _is_self_detached_command(command) result = await sb.shell.exec( command, cwd=cwd, background=effective_background, - env=dict(env or {}), + env=env, ) return json.dumps(result, ensure_ascii=False) except Exception as e: diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 45b95db05d..a7b4327000 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -496,6 +496,28 @@ async def test_cua_shell_normalizes_command_result_object_shape(): } +@pytest.mark.asyncio +async def test_cua_shell_prefers_returncode_when_exit_code_is_none(): + from astrbot.core.computer.booters.cua import CuaShellComponent + + class ShellWithMixedExitCode: + async def run(self, command: str, **kwargs): + return { + "stdout": "", + "stderr": "", + "exit_code": None, + "returncode": 1, + } + + sandbox = FakeSandbox() + sandbox.shell = ShellWithMixedExitCode() + + result = await CuaShellComponent(sandbox).exec("false") + + assert result["exit_code"] == 1 + assert result["success"] is False + + @pytest.mark.asyncio async def test_cua_python_fallback_preserves_shell_command_result_stdout(): from astrbot.core.computer.booters.cua import CuaPythonComponent From c084a5a8d68c3b02c9aa0e104746fe8fa7c996c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Tue, 28 Apr 2026 00:49:20 +0900 Subject: [PATCH 27/33] fix: preserve foreground shell behavior --- astrbot/core/computer/booters/cua.py | 4 ++- astrbot/core/tools/computer_tools/shell.py | 4 +-- tests/unit/test_cua_computer_use.py | 30 ++++++++++++++++++++++ tests/unit/test_func_tool_manager.py | 6 ++--- 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index eb8dea8ae9..250e20d5c3 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -468,7 +468,9 @@ async def edit_file( "replacements": 0, } updated = content.replace(old_string, new_string, -1 if replace_all else 1) - await self.write_file(path, updated, encoding=encoding) + write_result = await self.write_file(path, updated, encoding=encoding) + if not write_result.get("success"): + return write_result return { "success": True, "path": path, diff --git a/astrbot/core/tools/computer_tools/shell.py b/astrbot/core/tools/computer_tools/shell.py index e2a0390e24..d00f586d41 100644 --- a/astrbot/core/tools/computer_tools/shell.py +++ b/astrbot/core/tools/computer_tools/shell.py @@ -33,7 +33,7 @@ class ExecuteShellTool(FunctionTool): "background": { "type": "boolean", "description": "Whether to run the command in the background.", - "default": True, + "default": False, }, "env": { "type": "object", @@ -50,7 +50,7 @@ async def call( self, context: ContextWrapper[AstrAgentContext], command: str, - background: bool = True, + background: bool = False, env: dict[str, Any] | None = None, ) -> ToolExecResult: if permission_error := check_admin_permission(context, "Shell execution"): diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index a7b4327000..ba4f7b8987 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -413,6 +413,36 @@ async def test_cua_write_file_shell_fallback_propagates_shell_failure(): assert result["path"] == "hello.txt" +@pytest.mark.asyncio +async def test_cua_edit_file_propagates_write_failure(): + from astrbot.core.computer.booters.cua import CuaFileSystemComponent + + class ReadableButFailingWriteShell: + def __init__(self): + self.commands = [] + + async def run(self, command: str, **kwargs): + self.commands.append((command, kwargs)) + if command.startswith("cat "): + return {"stdout": "hello old", "stderr": "", "exit_code": 0} + return { + "stdout": "", + "stderr": "permission denied", + "exit_code": 1, + "success": False, + } + + sandbox = FakeSandbox() + sandbox.shell = ReadableButFailingWriteShell() + delattr(sandbox, "filesystem") + + result = await CuaFileSystemComponent(sandbox).edit_file("hello.txt", "old", "new") + + assert result["success"] is False + assert result["stderr"] == "permission denied" + assert result["path"] == "hello.txt" + + @pytest.mark.asyncio async def test_cua_list_dir_shell_fallback_returns_filename_only_entries(): from astrbot.core.computer.booters.cua import CuaFileSystemComponent diff --git a/tests/unit/test_func_tool_manager.py b/tests/unit/test_func_tool_manager.py index 69949351f7..11bb9bcac2 100644 --- a/tests/unit/test_func_tool_manager.py +++ b/tests/unit/test_func_tool_manager.py @@ -45,12 +45,12 @@ def test_computer_tools_are_registered_as_builtin_tools(): tool = manager.get_builtin_tool(ExecuteShellTool) assert tool.name == "astrbot_execute_shell" - assert tool.parameters["properties"]["background"]["default"] is True + assert tool.parameters["properties"]["background"]["default"] is False assert manager.is_builtin_tool("astrbot_execute_shell") is True @pytest.mark.asyncio -async def test_execute_shell_defaults_to_background(monkeypatch): +async def test_execute_shell_defaults_to_foreground(monkeypatch): from astrbot.core.tools.computer_tools import shell as shell_tools calls = [] @@ -88,7 +88,7 @@ async def fake_get_booter(context, session_id): ) assert json.loads(result)["success"] is True - assert calls == [{"command": "chromium https://example.com", "background": True}] + assert calls == [{"command": "chromium https://example.com", "background": False}] @pytest.mark.asyncio From ff46f9e61c85ac57b2eacf9399a7717c53653601 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Tue, 28 Apr 2026 00:59:19 +0900 Subject: [PATCH 28/33] fix: clean up failed CUA boots --- astrbot/core/computer/booters/cua.py | 21 ++++-- astrbot/core/computer/computer_client.py | 8 ++ tests/unit/test_cua_computer_use.py | 95 ++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 8 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 250e20d5c3..dc01c62c48 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -610,14 +610,19 @@ async def boot(self, session_id: str) -> None: ephemeral_kwargs = self._build_ephemeral_kwargs(Sandbox.ephemeral) sandbox_cm = Sandbox.ephemeral(image_obj, **ephemeral_kwargs) sandbox = await sandbox_cm.__aenter__() - self._runtime = _CuaRuntime( - sandbox_cm=sandbox_cm, - sandbox=sandbox, - shell=CuaShellComponent(sandbox, os_type=self.os_type), - python=CuaPythonComponent(sandbox, os_type=self.os_type), - fs=CuaFileSystemComponent(sandbox, os_type=self.os_type), - gui=CuaGUIComponent(sandbox), - ) + try: + self._runtime = _CuaRuntime( + sandbox_cm=sandbox_cm, + sandbox=sandbox, + shell=CuaShellComponent(sandbox, os_type=self.os_type), + python=CuaPythonComponent(sandbox, os_type=self.os_type), + fs=CuaFileSystemComponent(sandbox, os_type=self.os_type), + gui=CuaGUIComponent(sandbox), + ) + except Exception: + await sandbox_cm.__aexit__(None, None, None) + self._runtime = None + raise logger.info( "[Computer] CUA sandbox booted: image=%s, os_type=%s", self.image, diff --git a/astrbot/core/computer/computer_client.py b/astrbot/core/computer/computer_client.py index 61c64c88db..3ee65ce1aa 100644 --- a/astrbot/core/computer/computer_client.py +++ b/astrbot/core/computer/computer_client.py @@ -508,6 +508,14 @@ async def get_booter( await _sync_skills_to_sandbox(client) except Exception as e: logger.error(f"Error booting sandbox for session {session_id}: {e}") + try: + await client.shutdown() + except Exception as shutdown_error: + logger.warning( + "Failed to shutdown sandbox after boot error for session %s: %s", + session_id, + shutdown_error, + ) raise e session_booter[session_id] = client diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index ba4f7b8987..0e886b5634 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -302,6 +302,50 @@ async def available(self): assert all("api_key" not in message for message in log_messages) +@pytest.mark.asyncio +async def test_get_booter_shuts_down_client_when_skill_sync_fails(monkeypatch): + from astrbot.core.computer import computer_client + + shutdowns = [] + + class FakeCuaBooter: + def __init__(self, **kwargs): + self.kwargs = kwargs + + async def boot(self, session_id: str): + self.session_id = session_id + + async def shutdown(self): + shutdowns.append(self.session_id) + + async def fail_sync(booter): + raise RuntimeError("sync failed") + + monkeypatch.setattr(computer_client, "_sync_skills_to_sandbox", fail_sync) + monkeypatch.setitem(computer_client.session_booter, "cua-sync-fail", None) + computer_client.session_booter.pop("cua-sync-fail", None) + monkeypatch.setattr( + "astrbot.core.computer.booters.cua.CuaBooter", + FakeCuaBooter, + raising=False, + ) + + ctx = FakeContext( + { + "provider_settings": { + "computer_use_runtime": "sandbox", + "sandbox": {"booter": "cua"}, + } + } + ) + + with pytest.raises(RuntimeError, match="sync failed"): + await computer_client.get_booter(ctx, "cua-sync-fail") + + assert len(shutdowns) == 1 + assert "cua-sync-fail" not in computer_client.session_booter + + @pytest.mark.asyncio async def test_cua_components_map_sdk_results(tmp_path): from astrbot.core.computer.booters.cua import ( @@ -703,6 +747,57 @@ async def test_cua_download_file_fallback_rejects_non_posix_os_type(tmp_path): assert sandbox.shell.commands == [] +@pytest.mark.asyncio +async def test_cua_boot_cleans_up_sandbox_when_component_setup_fails(monkeypatch): + from astrbot.core.computer.booters import cua as cua_booter + + closed = [] + + class FakeSandboxContext: + async def __aenter__(self): + return FakeSandbox() + + async def __aexit__(self, exc_type, exc, tb): + closed.append((exc_type, exc, tb)) + + class FakeImage: + @staticmethod + def linux(): + return "linux-image" + + class FakeSandboxFactory: + @staticmethod + def ephemeral(image, **kwargs): + return FakeSandboxContext() + + class BrokenShellComponent: + def __init__(self, sandbox, os_type="linux"): + raise RuntimeError("component setup failed") + + original_import = __import__ + + def fake_import(name, globals=None, locals=None, fromlist=(), level=0): + if name == "cua": + + class FakeCuaModule: + Image = FakeImage + Sandbox = FakeSandboxFactory + + return FakeCuaModule() + return original_import(name, globals, locals, fromlist, level) + + monkeypatch.setattr("builtins.__import__", fake_import) + monkeypatch.setattr(cua_booter, "CuaShellComponent", BrokenShellComponent) + + booter = cua_booter.CuaBooter() + + with pytest.raises(RuntimeError, match="component setup failed"): + await booter.boot("session") + + assert len(closed) == 1 + assert booter._runtime is None + + @pytest.mark.asyncio async def test_cua_shell_background_reports_missing_python3_requirement(): from astrbot.core.computer.booters.cua import CuaShellComponent From 14f7295bdb2b543d842291a69258b9b1eccd4443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Tue, 28 Apr 2026 01:02:03 +0900 Subject: [PATCH 29/33] docs: add CUA sandbox runtime guide --- docs/zh/use/astrbot-agent-sandbox.md | 108 ++++++++++++++++++++++++++- docs/zh/use/computer.md | 11 ++- 2 files changed, 115 insertions(+), 4 deletions(-) diff --git a/docs/zh/use/astrbot-agent-sandbox.md b/docs/zh/use/astrbot-agent-sandbox.md index 41a779e38b..bcf805eba4 100644 --- a/docs/zh/use/astrbot-agent-sandbox.md +++ b/docs/zh/use/astrbot-agent-sandbox.md @@ -13,11 +13,12 @@ - `Shipyard Neo`(当前推荐) - `Shipyard`(旧方案,仍可继续使用) +- `CUA`(本地或云端电脑使用沙盒,适合需要桌面操作的场景) 在当前版本的 AstrBot 控制台中,可在“AI 配置” -> “Agent Computer Use”中选择: - `Computer Use Runtime` = `sandbox` -- `沙箱环境驱动器` = `Shipyard Neo` 或 `Shipyard` +- `沙箱环境驱动器` = `Shipyard Neo`、`Shipyard` 或 `CUA` 其中,`Shipyard Neo` 是当前默认驱动器。它由 Bay、Ship、Gull 三部分组成: @@ -30,6 +31,109 @@ > [!TIP] > `Shipyard Neo` 下浏览器能力并不是所有 profile 都有。只有 profile 支持 `browser` capability 时,AstrBot 才会挂载浏览器相关工具。典型 profile 如 `browser-python`。 +## CUA 运行时 + +`CUA` 是一个面向电脑使用(Computer Use)的沙盒运行时。它可以通过统一的 Python SDK 创建 Linux、macOS、Windows、Android 等不同类型的沙盒,并暴露 Shell、截图、鼠标、键盘、文件系统等接口。 + +在 AstrBot 中选择 `CUA` 驱动器后,Agent 可以在 CUA sandbox 中使用: + +- Shell 工具 +- Python 工具 +- 文件读取、写入、编辑和搜索工具 +- 截图工具 +- 鼠标点击工具 +- 键盘输入工具 +- 沙盒文件上传与下载工具 + +> [!NOTE] +> CUA 是可选运行时,AstrBot 默认安装不会强制安装它。如果选择了 `CUA` 但当前 Python 环境没有安装 `cua` 包,启动沙盒时会提示安装缺失。 + +### 安装 CUA 依赖 + +如果您通过源码或虚拟环境运行 AstrBot,请在 AstrBot 使用的 Python 环境中安装 CUA: + +```bash +pip install cua +``` + +如果您使用 `uv` 管理 AstrBot 环境,可在 AstrBot 项目目录中执行: + +```bash +uv pip install cua +``` + +CUA 本身还依赖具体运行方式: + +- 本地 Linux 容器通常需要 Docker 可用。 +- 本地 Linux/Windows VM 通常需要 QEMU 或 CUA 对应的本地运行时。 +- macOS VM 通常依赖 CUA/Lume 相关运行时。 +- 云端 CUA 需要可用的 CUA API Key。 + +具体宿主机要求、镜像支持情况和本地运行时安装方式,请参考 [Cua 官方文档](https://cua.ai/docs)。 + +### 在 AstrBot 中配置 CUA + +进入 WebUI: + +- `配置 -> 普通配置 -> 使用电脑能力` + +然后设置: + +- `Computer Use Runtime` = `sandbox` +- `沙箱环境驱动器` = `CUA` + +CUA 相关配置项包括: + +- `CUA Image`:要启动的 CUA 镜像。常见值为 `linux`、`macos`、`windows`、`android`。默认 `linux`。 +- `CUA OS Type`:镜像的操作系统类型。默认 `linux`。它会影响 AstrBot 对 POSIX Shell fallback 的判断。 +- `CUA Sandbox TTL`:沙盒生命周期,单位为秒。默认 `3600`。 +- `CUA Telemetry Enabled`:是否启用 CUA 侧遥测。默认关闭。 +- `CUA Local Runtime`:是否使用本地运行时。默认开启。关闭后会按 CUA SDK 的云端方式创建沙盒。 +- `CUA API Key`:云端 CUA 所需的 API Key。仅在使用云端运行时时填写。 + +一个最小本地 Linux 容器配置通常是: + +```text +Computer Use Runtime = sandbox +沙箱环境驱动器 = CUA +CUA Image = linux +CUA OS Type = linux +CUA Local Runtime = true +CUA Sandbox TTL = 3600 +``` + +如果使用云端 CUA,可改为: + +```text +Computer Use Runtime = sandbox +沙箱环境驱动器 = CUA +CUA Image = linux +CUA OS Type = linux +CUA Local Runtime = false +CUA API Key = +``` + +> [!WARNING] +> 不要把 CUA API Key 写入公开日志、截图或 issue。AstrBot 的运行日志不会输出该字段,但部署平台、Shell 历史和容器环境变量仍需自行保护。 + +### 使用 CUA 时的注意事项 + +- `linux` 镜像通常适合 Shell、Python、文件系统和桌面自动化测试。 +- 非 POSIX 镜像(如 `windows`、`android`)不一定支持 `sh`、`cat`、`ls`、`rm`、`base64` 等命令。AstrBot 对需要这些命令的 fallback 操作会返回明确错误。 +- 如果需要在 CUA sandbox 中打开浏览器或 GUI 程序,通常应使用 Shell 后台执行,例如显式传入 `background=true`,避免命令阻塞后续工具调用。 +- 直接把 sandbox 内的文件路径发送给用户通常不可行。应优先使用 AstrBot 的沙盒下载工具,将文件下载到 AstrBot 临时目录后再发送。 +- CUA 与 Shipyard Neo 的 workspace 语义不同。Shipyard Neo 固定使用 `/workspace`;CUA 的工作目录和文件路径取决于镜像与运行时。 + +### 何时选择 CUA + +建议在以下场景选择 `CUA`: + +- 需要桌面截图、鼠标点击、键盘输入等 GUI 自动化能力。 +- 需要测试不同 OS 镜像中的行为,例如 Linux、Windows、Android。 +- 已经在本机或云端部署好 CUA 运行环境。 + +如果只是需要稳定的 Python/Shell/文件系统沙盒,且不需要桌面 GUI 操作,通常优先选择 `Shipyard Neo`。它与 AstrBot 的 workspace、Skills 同步和长期运行模式更贴合。 + ## 性能要求 AstrBot 给每个沙盒环境限制最高 1 CPU 和 512 MB 内存。 @@ -388,4 +492,4 @@ Shipyard 会自动将沙盒环境中的 /home 目录挂载到宿主机的 `${PWD ### luosheng520qaq/astrobot_plugin_code_executor -如果您资源有限,不希望使用沙盒环境来执行代码,可以尝试 luosheng520qaq 开发的 [astrobot_plugin_code_executor](https://github.com/luosheng520qaq/astrobot_plugin_code_executor) 插件。该插件会直接在宿主机上执行代码。插件已经尽力提升安全性,但仍需留意代码安全性问题。 \ No newline at end of file +如果您资源有限,不希望使用沙盒环境来执行代码,可以尝试 luosheng520qaq 开发的 [astrobot_plugin_code_executor](https://github.com/luosheng520qaq/astrobot_plugin_code_executor) 插件。该插件会直接在宿主机上执行代码。插件已经尽力提升安全性,但仍需留意代码安全性问题。 diff --git a/docs/zh/use/computer.md b/docs/zh/use/computer.md index bf0c6ecfc0..a9a4690da4 100644 --- a/docs/zh/use/computer.md +++ b/docs/zh/use/computer.md @@ -97,7 +97,12 @@ data/workspaces/{normalized_umo}/notes/todo.txt 在沙盒中,Agent 仍然可以使用 Shell、Python、文件系统工具;如果所选沙盒 profile 支持 `browser` capability,还会挂载浏览器自动化工具。 -使用 Shipyard Neo 时,沙盒 workspace 根目录通常是: +沙盒环境驱动器可在 `配置 -> 普通配置 -> 使用电脑能力` 的沙箱配置中选择。当前常用选项包括: + +- `Shipyard Neo`:AstrBot 推荐的远程/独立部署沙盒服务,适合长期运行和多人使用。 +- `CUA`:基于 [Cua](https://github.com/trycua/cua) 的本地或云端电脑使用沙盒,可提供桌面截图、鼠标、键盘、Shell、Python 和文件系统能力。 + +使用 `Shipyard Neo` 时,沙盒 workspace 根目录通常是: ```text /workspace @@ -115,7 +120,9 @@ result.txt /workspace/result.txt ``` -沙盒部署、profile、TTL、数据持久化、浏览器能力等内容请参考:[Agent 沙盒环境](/use/astrbot-agent-sandbox)。 +使用 `CUA` 时,工作目录和可用命令取决于所选 CUA image 与运行方式。Linux CUA 容器通常提供类 Unix Shell;Windows、Android 等非 POSIX 镜像不保证支持 `sh`、`ls`、`rm`、`base64` 等命令,AstrBot 会对部分 shell fallback 操作返回明确错误。 + +沙盒部署、驱动器选择、CUA 配置、profile、TTL、数据持久化、浏览器能力等内容请参考:[Agent 沙盒环境](/use/astrbot-agent-sandbox)。 > [!NOTE] > 即使在 `sandbox` 模式下,“需要 AstrBot 管理员权限”仍会影响 Shell、Python、浏览器、上传下载等工具的调用权限。具体权限取决于你的配置。 From 598a59dbfe774409669a3292489a1ef3c4ac79d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Tue, 28 Apr 2026 01:13:19 +0900 Subject: [PATCH 30/33] test: cover CUA GUI tool registration --- docs/zh/use/computer.md | 2 +- tests/unit/test_astr_main_agent.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/zh/use/computer.md b/docs/zh/use/computer.md index a9a4690da4..2b98420813 100644 --- a/docs/zh/use/computer.md +++ b/docs/zh/use/computer.md @@ -100,7 +100,7 @@ data/workspaces/{normalized_umo}/notes/todo.txt 沙盒环境驱动器可在 `配置 -> 普通配置 -> 使用电脑能力` 的沙箱配置中选择。当前常用选项包括: - `Shipyard Neo`:AstrBot 推荐的远程/独立部署沙盒服务,适合长期运行和多人使用。 -- `CUA`:基于 [Cua](https://github.com/trycua/cua) 的本地或云端电脑使用沙盒,可提供桌面截图、鼠标、键盘、Shell、Python 和文件系统能力。 +- `CUA`:基于 [CUA](https://github.com/trycua/cua) 的本地或云端电脑使用沙盒,可提供桌面截图、鼠标、键盘、Shell、Python 和文件系统能力。 使用 `Shipyard Neo` 时,沙盒 workspace 根目录通常是: diff --git a/tests/unit/test_astr_main_agent.py b/tests/unit/test_astr_main_agent.py index 025ae86109..faae767345 100644 --- a/tests/unit/test_astr_main_agent.py +++ b/tests/unit/test_astr_main_agent.py @@ -1573,6 +1573,13 @@ def test_apply_sandbox_tools_with_cua_adds_gui_guidance(self, mock_context): module._apply_sandbox_tools(config, req, "session-123") + assert req.func_tool is not None + tool_names = req.func_tool.names() + assert "astrbot_cua_screenshot" in tool_names + assert "astrbot_cua_mouse_click" in tool_names + assert "astrbot_cua_keyboard_type" in tool_names + assert "astrbot_cua_key_press" not in tool_names + assert "Firefox" in req.system_prompt assert "background=true" in req.system_prompt assert 'firefox "https://example.com"' in req.system_prompt From e25715810496330a0c06a586a39aaf9f871b64f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Tue, 28 Apr 2026 01:19:10 +0900 Subject: [PATCH 31/33] refactor: simplify CUA fallback handling --- astrbot/core/computer/booters/cua.py | 186 ++++++++++++++++++++------- docs/zh/use/astrbot-agent-sandbox.md | 2 +- 2 files changed, 139 insertions(+), 49 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index dc01c62c48..3684229e68 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -158,19 +158,47 @@ def _python3_requirement_error(operation: str, stderr: str) -> str: return f"CUA {operation} requires python3 in the sandbox image: {stderr}" +def _process_result_with_python3_error(raw: Any, operation: str) -> ProcessResult: + proc = _normalize_process_result(raw) + if proc.stderr and _is_missing_python3_error(proc.stderr): + return ProcessResult( + stdout=proc.stdout, + stderr=_python3_requirement_error(operation, proc.stderr), + exit_code=proc.exit_code, + success=proc.success, + ) + return proc + + +async def _exec_python3_or_error( + shell: ShellComponent, + code: str, + *, + operation: str, + timeout: int | None = 30, +) -> ProcessResult: + result = await shell.exec(f"python3 - <<'PY'\n{code}\nPY", timeout=timeout) + return _process_result_with_python3_error(result, operation) + + def _is_posix_os_type(os_type: str) -> bool: return os_type.lower() in _POSIX_OS_TYPES +def _non_posix_filesystem_error(os_type: str) -> str: + return ( + "CUA filesystem shell fallback is only supported for POSIX images; " + f"os_type={os_type!r} does not support the required shell commands." + ) + + def _non_posix_filesystem_result(path: str, os_type: str) -> dict[str, Any]: - return { - "success": False, - "path": path, - "error": ( - "CUA filesystem shell fallback is only supported for POSIX images; " - f"os_type={os_type!r} does not support the required shell commands." - ), - } + error = _non_posix_filesystem_error(os_type) + return {"success": False, "path": path, "error": error, "message": error} + + +def _raise_non_posix_filesystem_error(os_type: str) -> None: + raise RuntimeError(_non_posix_filesystem_error(os_type)) def _split_listing_entries(output: str) -> list[str]: @@ -252,13 +280,14 @@ async def exec( command = _build_cua_background_command(command) result = await _maybe_await(self._exec_raw(command, **kwargs)) - proc = _normalize_process_result(result) - stderr = proc.stderr - if background and stderr and _is_missing_python3_error(stderr): - stderr = _python3_requirement_error("background execution", stderr) + proc = ( + _process_result_with_python3_error(result, "background execution") + if background + else _normalize_process_result(result) + ) response = { "stdout": proc.stdout, - "stderr": stderr, + "stderr": proc.stderr, "exit_code": proc.exit_code, "success": proc.success, } @@ -298,15 +327,11 @@ async def exec( proc = _normalize_process_result(result) else: shell = CuaShellComponent(self._sandbox, os_type=self._os_type) - result = await shell.exec(f"python3 - <<'PY'\n{code}\nPY", timeout=timeout) - error = result.get("stderr", "") - if error and _is_missing_python3_error(error): - error = _python3_requirement_error("Python execution fallback", error) - proc = ProcessResult( - stdout=result.get("stdout", ""), - stderr=error, - exit_code=result.get("exit_code"), - success=bool(result.get("success", False)), + proc = await _exec_python3_or_error( + shell, + code, + operation="Python execution fallback", + timeout=timeout, ) output_text = "" if silent else proc.stdout @@ -341,6 +366,7 @@ def __init__( self._shell = CuaShellComponent(sandbox, os_type=os_type) self._fs = getattr(sandbox, "filesystem", None) self._os_type = os_type.lower() + self._fallback = _PosixShellFileSystem(self._shell, self._os_type) async def create_file( self, @@ -362,12 +388,7 @@ async def read_file( ) -> dict[str, Any]: read_file = None if self._fs is None else getattr(self._fs, "read_file", None) if read_file is None: - if not _is_posix_os_type(self._os_type): - return _non_posix_filesystem_result(path, self._os_type) - result = await self._shell.exec(f"cat {shlex.quote(path)}") - if result.get("stderr"): - return {"success": False, "path": path, "error": result["stderr"]} - content = result.get("stdout", "") + return await self._fallback.read_file(path, encoding, offset, limit) else: content = await _maybe_await(read_file(path)) if isinstance(content, bytes): @@ -390,12 +411,7 @@ async def write_file( _ = mode write_file = None if self._fs is None else getattr(self._fs, "write_file", None) if write_file is None: - if not _is_posix_os_type(self._os_type): - return _non_posix_filesystem_result(path, self._os_type) - result = await _write_base64_via_shell( - self._shell, path, content.encode(encoding) - ) - return _write_result(path, result) + return await self._fallback.write_file(path, content, mode, encoding) else: await _maybe_await(write_file(path, content)) return {"success": True, "path": path} @@ -407,11 +423,7 @@ async def delete_file(self, path: str) -> dict[str, Any]: self._fs, "delete_file", None ) if delete is None: - if not _is_posix_os_type(self._os_type): - return _non_posix_filesystem_result(path, self._os_type) - result = await self._shell.exec(f"rm -rf {shlex.quote(path)}") - if result.get("stderr"): - return {"success": False, "path": path, "error": result["stderr"]} + return await self._fallback.delete_file(path) else: await _maybe_await(delete(path)) return {"success": True, "path": path} @@ -425,9 +437,7 @@ async def list_dir( if list_dir is not None: entries = await _maybe_await(list_dir(path)) return {"success": True, "path": path, "entries": entries} - if not _is_posix_os_type(self._os_type): - return _non_posix_filesystem_result(path, self._os_type) - return await _list_dir_via_shell(self._shell, path, show_hidden) + return await self._fallback.list_dir(path, show_hidden) async def search_files( self, @@ -437,10 +447,7 @@ async def search_files( after_context: int | None = None, before_context: int | None = None, ) -> dict[str, Any]: - if not _is_posix_os_type(self._os_type): - return _non_posix_filesystem_result(path or ".", self._os_type) - return await search_files_via_shell( - self._shell, + return await self._fallback.search_files( pattern=pattern, path=path, glob=glob, @@ -478,6 +485,90 @@ async def edit_file( } +class _PosixShellFileSystem(FileSystemComponent): + def __init__(self, shell: CuaShellComponent, os_type: str) -> None: + self._shell = shell + self._os_type = os_type.lower() + + def _ensure_posix(self, path: str) -> dict[str, Any] | None: + if _is_posix_os_type(self._os_type): + return None + return _non_posix_filesystem_result(path, self._os_type) + + async def read_file( + self, + path: str, + encoding: str = "utf-8", + offset: int | None = None, + limit: int | None = None, + ) -> dict[str, Any]: + _ = encoding + if error := self._ensure_posix(path): + return error + result = await self._shell.exec(f"cat {shlex.quote(path)}") + if result.get("stderr"): + return {"success": False, "path": path, "error": result["stderr"]} + return { + "success": True, + "path": path, + "content": _slice_content_by_lines( + str(result.get("stdout", "")), offset=offset, limit=limit + ), + } + + async def write_file( + self, + path: str, + content: str, + mode: str = "w", + encoding: str = "utf-8", + ) -> dict[str, Any]: + _ = mode + if error := self._ensure_posix(path): + return error + result = await _write_base64_via_shell( + self._shell, path, content.encode(encoding) + ) + return _write_result(path, result) + + async def delete_file(self, path: str) -> dict[str, Any]: + if error := self._ensure_posix(path): + return error + result = await self._shell.exec(f"rm -rf {shlex.quote(path)}") + if result.get("stderr"): + return {"success": False, "path": path, "error": result["stderr"]} + return {"success": True, "path": path} + + async def list_dir( + self, + path: str = ".", + show_hidden: bool = False, + ) -> dict[str, Any]: + if error := self._ensure_posix(path): + return error + return await _list_dir_via_shell(self._shell, path, show_hidden) + + async def search_files( + self, + pattern: str, + path: str | None = None, + glob: str | None = None, + after_context: int | None = None, + before_context: int | None = None, + ) -> dict[str, Any]: + search_path = path or "." + if error := self._ensure_posix(search_path): + return error + return await search_files_via_shell( + self._shell, + pattern=pattern, + path=path, + glob=glob, + after_context=after_context, + before_context=before_context, + ) + + async def _list_dir_via_shell( shell: CuaShellComponent, path: str, @@ -728,8 +819,7 @@ async def download_file(self, remote_path: str, local_path: str) -> None: await sandbox.download_file(remote_path, local_path) return if not _is_posix_os_type(self.os_type): - result = _non_posix_filesystem_result(remote_path, self.os_type) - raise RuntimeError(result["error"]) + _raise_non_posix_filesystem_error(self.os_type) result = await self.shell.exec(f"base64 {shlex.quote(remote_path)}") if result.get("stderr"): raise RuntimeError(result["stderr"]) diff --git a/docs/zh/use/astrbot-agent-sandbox.md b/docs/zh/use/astrbot-agent-sandbox.md index bcf805eba4..ff59c6a7bd 100644 --- a/docs/zh/use/astrbot-agent-sandbox.md +++ b/docs/zh/use/astrbot-agent-sandbox.md @@ -69,7 +69,7 @@ CUA 本身还依赖具体运行方式: - macOS VM 通常依赖 CUA/Lume 相关运行时。 - 云端 CUA 需要可用的 CUA API Key。 -具体宿主机要求、镜像支持情况和本地运行时安装方式,请参考 [Cua 官方文档](https://cua.ai/docs)。 +具体宿主机要求、镜像支持情况和本地运行时安装方式,请参考 [CUA 官方文档](https://cua.ai/docs)。 ### 在 AstrBot 中配置 CUA From 51c0525335fae6a0254f166ccb69197a0798194d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Tue, 28 Apr 2026 01:29:19 +0900 Subject: [PATCH 32/33] refactor: simplify CUA shell helpers --- astrbot/core/computer/booters/cua.py | 58 +++++++++++----------- astrbot/core/tools/computer_tools/shell.py | 51 +++++++------------ tests/unit/test_func_tool_manager.py | 18 +++++++ 3 files changed, 66 insertions(+), 61 deletions(-) diff --git a/astrbot/core/computer/booters/cua.py b/astrbot/core/computer/booters/cua.py index 3684229e68..dd72a0aa8a 100644 --- a/astrbot/core/computer/booters/cua.py +++ b/astrbot/core/computer/booters/cua.py @@ -114,21 +114,21 @@ def _slice_content_by_lines( return "".join(selected) -def _result_text(payload: dict[str, Any], *keys: str) -> str: - for key in keys: - value = payload.get(key) - if value is not None: - return str(value) - return "" - - def _normalize_process_result(raw: Any) -> ProcessResult: + """Best-effort normalization for the process shapes returned by CUA SDKs.""" payload = _maybe_model_dump(raw) if not payload and isinstance(raw, str): payload = {"stdout": raw} - stdout = _result_text(payload, "stdout", "output") - stderr = _result_text(payload, "stderr", "error") + def first_text(*keys: str) -> str: + for key in keys: + value = payload.get(key) + if value is not None: + return str(value) + return "" + + stdout = first_text("stdout", "output") + stderr = first_text("stderr", "error") exit_code = payload.get("exit_code") if exit_code is None: exit_code = payload.get("returncode") @@ -158,7 +158,7 @@ def _python3_requirement_error(operation: str, stderr: str) -> str: return f"CUA {operation} requires python3 in the sandbox image: {stderr}" -def _process_result_with_python3_error(raw: Any, operation: str) -> ProcessResult: +def _normalize_with_python3_requirement(raw: Any, operation: str) -> ProcessResult: proc = _normalize_process_result(raw) if proc.stderr and _is_missing_python3_error(proc.stderr): return ProcessResult( @@ -178,14 +178,14 @@ async def _exec_python3_or_error( timeout: int | None = 30, ) -> ProcessResult: result = await shell.exec(f"python3 - <<'PY'\n{code}\nPY", timeout=timeout) - return _process_result_with_python3_error(result, operation) + return _normalize_with_python3_requirement(result, operation) def _is_posix_os_type(os_type: str) -> bool: return os_type.lower() in _POSIX_OS_TYPES -def _non_posix_filesystem_error(os_type: str) -> str: +def _posix_fs_error_message(os_type: str) -> str: return ( "CUA filesystem shell fallback is only supported for POSIX images; " f"os_type={os_type!r} does not support the required shell commands." @@ -193,16 +193,12 @@ def _non_posix_filesystem_error(os_type: str) -> str: def _non_posix_filesystem_result(path: str, os_type: str) -> dict[str, Any]: - error = _non_posix_filesystem_error(os_type) + error = _posix_fs_error_message(os_type) return {"success": False, "path": path, "error": error, "message": error} def _raise_non_posix_filesystem_error(os_type: str) -> None: - raise RuntimeError(_non_posix_filesystem_error(os_type)) - - -def _split_listing_entries(output: str) -> list[str]: - return [line for line in output.splitlines() if line.strip()] + raise RuntimeError(_posix_fs_error_message(os_type)) def _resolve_component_method( @@ -281,7 +277,7 @@ async def exec( result = await _maybe_await(self._exec_raw(command, **kwargs)) proc = ( - _process_result_with_python3_error(result, "background execution") + _normalize_with_python3_requirement(result, "background execution") if background else _normalize_process_result(result) ) @@ -576,10 +572,11 @@ async def _list_dir_via_shell( ) -> dict[str, Any]: flags = "-1A" if show_hidden else "-1" result = await shell.exec(f"ls {flags} {shlex.quote(path)}") + stdout = result.get("stdout", "") return { "success": not bool(result.get("stderr")), "path": path, - "entries": _split_listing_entries(result.get("stdout", "")), + "entries": [line for line in stdout.splitlines() if line.strip()], "error": result.get("stderr", ""), } @@ -633,18 +630,21 @@ async def press_key(self, key: str) -> dict[str, Any]: def _screenshot_to_bytes(raw: Any) -> bytes: - if isinstance(raw, (bytes, bytearray)): - return bytes(raw) - if isinstance(raw, str): - if raw.startswith("data:image"): - raw = raw.split(",", 1)[1] + def from_str(value: str) -> bytes: + if value.startswith("data:image"): + value = value.split(",", 1)[1] try: - return base64.b64decode(raw, validate=True) + return base64.b64decode(value, validate=True) except Exception: - candidate = Path(raw) + candidate = Path(value) if candidate.is_file(): return candidate.read_bytes() - return raw.encode("utf-8") + return value.encode("utf-8") + + if isinstance(raw, (bytes, bytearray)): + return bytes(raw) + if isinstance(raw, str): + return from_str(raw) if hasattr(raw, "save"): import io diff --git a/astrbot/core/tools/computer_tools/shell.py b/astrbot/core/tools/computer_tools/shell.py index d00f586d41..cdefe97a0e 100644 --- a/astrbot/core/tools/computer_tools/shell.py +++ b/astrbot/core/tools/computer_tools/shell.py @@ -1,5 +1,5 @@ import json -import re +import shlex from dataclasses import dataclass, field from typing import Any @@ -84,36 +84,23 @@ async def call( def _is_self_detached_command(command: str) -> bool: - stripped = _strip_shell_comment(command).strip() - lowered = stripped.lower() - return ( - lowered.startswith("nohup ") - or lowered.startswith("setsid ") - or lowered.startswith("disown ") - or lowered.startswith("start ") - or lowered.startswith("start-process ") - or re.search(r"(?:^|\s)&\s*$", stripped) is not None + lex = shlex.shlex(command, posix=False) + lex.whitespace_split = True + lex.commenters = "" + try: + tokens = list(lex) + except ValueError: + return False + comment_index = next( + (index for index, token in enumerate(tokens) if token.startswith("#")), + None, ) + if comment_index is not None: + tokens = tokens[:comment_index] + if not tokens: + return False - -def _strip_shell_comment(command: str) -> str: - in_single = False - in_double = False - escaped = False - for index, char in enumerate(command): - if escaped: - escaped = False - continue - if char == "\\" and not in_single: - escaped = True - continue - if char == "'" and not in_double: - in_single = not in_single - continue - if char == '"' and not in_single: - in_double = not in_double - continue - if char == "#" and not in_single and not in_double: - if index == 0 or command[index - 1].isspace(): - return command[:index] - return command + first = tokens[0].lower() + if first in {"nohup", "setsid", "disown", "start", "start-process"}: + return True + return tokens[-1] == "&" diff --git a/tests/unit/test_func_tool_manager.py b/tests/unit/test_func_tool_manager.py index 11bb9bcac2..4eae43b5ce 100644 --- a/tests/unit/test_func_tool_manager.py +++ b/tests/unit/test_func_tool_manager.py @@ -265,6 +265,24 @@ async def fake_get_booter(context, session_id): assert calls == [{"command": command, "background": False}] +@pytest.mark.parametrize( + ("command", "expected"), + [ + ("echo '#'", False), + ("echo '&'", False), + ("echo foo#bar &", True), + ("echo 'unterminated", False), + ("firefox & # already detached", True), + ("nohup firefox >/tmp/astrbot-firefox.log 2>&1 &", True), + ("firefox", False), + ], +) +def test_is_self_detached_command_handles_quotes_and_comments(command, expected): + from astrbot.core.tools.computer_tools.shell import _is_self_detached_command + + assert _is_self_detached_command(command) is expected + + @pytest.mark.asyncio async def test_execute_shell_reports_blank_exception_type(monkeypatch): from astrbot.core.tools.computer_tools import shell as shell_tools From 9f23aa8801c7c661f0cd94c8f870ad5eb3c813aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=B0=B8=E8=B5=AB?= <1259085392@qq.com> Date: Tue, 28 Apr 2026 01:39:33 +0900 Subject: [PATCH 33/33] test: cover CUA screenshot result shapes --- tests/unit/test_cua_computer_use.py | 88 +++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/tests/unit/test_cua_computer_use.py b/tests/unit/test_cua_computer_use.py index 0e886b5634..dc8bb6aa3e 100644 --- a/tests/unit/test_cua_computer_use.py +++ b/tests/unit/test_cua_computer_use.py @@ -1042,6 +1042,11 @@ def test_cua_is_exposed_in_sandbox_config_metadata(): ) +_PNG_BYTES = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=" +) + + @pytest.mark.asyncio async def test_screenshot_tool_returns_image_and_sends_file(monkeypatch, tmp_path): from astrbot.core.tools.computer_tools import cua as cua_tools @@ -1102,6 +1107,89 @@ async def fake_get_booter(context, session_id): assert sent_messages +@pytest.mark.parametrize( + "screenshot_shape", + [ + "data_url", + "path_string", + "save_object", + "base64_dict", + ], +) +@pytest.mark.asyncio +async def test_screenshot_tool_normalizes_supported_screenshot_shapes( + monkeypatch, + tmp_path, + screenshot_shape, +): + from astrbot.core.computer.booters.cua import CuaGUIComponent + from astrbot.core.tools.computer_tools import cua as cua_tools + from astrbot.core.tools.computer_tools.cua import CuaScreenshotTool + + sent_messages = [] + + class FakeEvent: + unified_msg_origin = "umo" + role = "admin" + + async def send(self, message): + sent_messages.append(message) + + class FakeAstrContext: + event = FakeEvent() + context = FakeContext( + { + "provider_settings": { + "computer_use_runtime": "sandbox", + "computer_use_require_admin": True, + "sandbox": {"booter": "cua"}, + } + } + ) + + class FakeWrapper: + context = FakeAstrContext() + + class SaveObject: + def save(self, output, format): + assert format == "PNG" + output.write(_PNG_BYTES) + + class FakeSandbox: + async def screenshot(self): + if screenshot_shape == "data_url": + encoded = base64.b64encode(_PNG_BYTES).decode() + return f"data:image/png;base64,{encoded}" + if screenshot_shape == "path_string": + source_path = tmp_path / "source.png" + source_path.write_bytes(_PNG_BYTES) + return str(source_path) + if screenshot_shape == "save_object": + return SaveObject() + return {"base64": base64.b64encode(_PNG_BYTES).decode()} + + class FakeBooter: + gui = CuaGUIComponent(FakeSandbox()) + + async def fake_get_booter(context, session_id): + return FakeBooter() + + monkeypatch.setattr(cua_tools, "get_booter", fake_get_booter) + monkeypatch.setattr(cua_tools, "get_astrbot_temp_path", lambda: str(tmp_path)) + + result = await CuaScreenshotTool().call(FakeWrapper(), send_to_user=True) + + assert isinstance(result, mcp.types.CallToolResult) + image_parts = [part for part in result.content if part.type == "image"] + text_parts = [part for part in result.content if part.type == "text"] + payload = json.loads(text_parts[0].text) + assert "base64" not in payload + assert payload["mime_type"] == "image/png" + assert Path(payload["path"]).read_bytes() == _PNG_BYTES + assert base64.b64decode(image_parts[0].data) == _PNG_BYTES + assert sent_messages + + @pytest.mark.asyncio async def test_screenshot_tool_can_opt_in_to_llm_image_content(monkeypatch, tmp_path): from astrbot.core.tools.computer_tools import cua as cua_tools