Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion astrbot/core/computer/booters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@ def gui(self) -> GUIComponent | None:

async def boot(self, session_id: str) -> None: ...

async def shutdown(self) -> None: ...
async def shutdown(self, **kwargs) -> None:
"""Shut down the computer sandbox.

Subclasses may accept extra keyword arguments for
type-specific cleanup (e.g. ``delete_sandbox`` for
ShipyardNeoBooter). The default implementation ignores
them.
"""
...

async def upload_file(self, path: str, file_name: str) -> dict:
"""Upload file to the computer.
Expand Down
107 changes: 104 additions & 3 deletions astrbot/core/computer/booters/shipyard_neo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import asyncio
import os
import shlex
from typing import Any, cast
Expand Down Expand Up @@ -438,6 +439,9 @@ async def boot(self, session_id: str) -> None:
ttl=self._ttl,
)

# --- Readiness gate: wait until sandbox session is READY ---
await self._wait_until_ready(self._sandbox)

self._shell = NeoShellComponent(self._sandbox)
self._fs = NeoFileSystemComponent(self._sandbox, self._shell)
self._python = NeoPythonComponent(self._sandbox)
Expand All @@ -455,6 +459,78 @@ async def boot(self, session_id: str) -> None:
bool(self._bay_manager),
)

async def _wait_until_ready(self, sandbox: Sandbox) -> None:
"""Poll sandbox status until READY, or raise on FAILED / timeout.

Covers both warm-pool hits (near-instant) and cold starts (up to 180s).
On FAILED, EXPIRED, or timeout the sandbox is deleted before raising
so no orphan resources leak on Bay.
"""
READINESS_TIMEOUT = 180 # seconds
POLL_INTERVAL = 2 # seconds

sandbox_id = sandbox.id
deadline = asyncio.get_running_loop().time() + READINESS_TIMEOUT

while True:
await sandbox.refresh()
status = getattr(sandbox.status, "value", str(sandbox.status))

if status == "ready":
logger.info(
"[Computer] Sandbox %s is ready (profile=%s)",
sandbox_id,
sandbox.profile,
)
return

if status in {"failed", "expired"}:
logger.error(
"[Computer] Sandbox %s reached terminal state: %s",
sandbox_id,
status,
)
try:
await sandbox.delete()
except Exception as del_err:
logger.warning(
"[Computer] Failed to delete failed sandbox %s: %s",
sandbox_id,
del_err,
)
raise RuntimeError(
f"Sandbox {sandbox_id} is in terminal state: {status}"
)

remaining = deadline - asyncio.get_running_loop().time()
if remaining <= 0:
logger.error(
"[Computer] Sandbox %s did not become ready within %ds "
"(last status: %s)",
sandbox_id,
READINESS_TIMEOUT,
status,
)
try:
await sandbox.delete()
except Exception as del_err:
logger.warning(
"[Computer] Failed to delete timed-out sandbox %s: %s",
sandbox_id,
del_err,
)
raise TimeoutError(
f"Sandbox {sandbox_id} did not become ready within "
f"{READINESS_TIMEOUT}s (last status: {status})"
)

logger.debug(
"[Computer] Sandbox %s status=%s, waiting...",
sandbox_id,
status,
)
await asyncio.sleep(POLL_INTERVAL)

async def _resolve_profile(self, client: Any) -> str:
"""Pick the best profile for this session.

Expand Down Expand Up @@ -510,16 +586,41 @@ def _score(p: Any) -> tuple[int, int]:

return chosen

async def shutdown(self) -> None:
async def shutdown(self, *, delete_sandbox: bool = False) -> None:
if self._client is not None:
sandbox_id = getattr(self._sandbox, "id", "unknown")

# Delete sandbox on Bay BEFORE closing the HTTP client.
# This is critical for cleanup — calling delete after
# __aexit__ would fail because the httpx session is already
# torn down.
if delete_sandbox and self._sandbox is not None:
try:
logger.info(
"[Computer] Deleting Shipyard Neo sandbox: id=%s", sandbox_id
)
await self._sandbox.delete()
logger.info(
"[Computer] Shipyard Neo sandbox deleted: id=%s", sandbox_id
)
except Exception as e:
logger.warning(
"[Computer] Failed to delete sandbox %s (may already be "
"cleaned up by Bay GC): %s",
sandbox_id,
e,
)

logger.info(
"[Computer] Shutting down Shipyard Neo sandbox: id=%s", sandbox_id
"[Computer] Shutting down Shipyard Neo sandbox client: id=%s",
sandbox_id,
)
await self._client.__aexit__(None, None, None)
self._client = None
self._sandbox = None
logger.info("[Computer] Shipyard Neo sandbox shut down: id=%s", sandbox_id)
logger.info(
"[Computer] Shipyard Neo sandbox client shut down: id=%s", sandbox_id
)

# NOTE: We intentionally do NOT stop the Bay container here.
# It stays running for reuse by future sessions. The user can
Expand Down
22 changes: 20 additions & 2 deletions astrbot/core/computer/computer_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,22 @@ async def get_booter(
if session_id in session_booter:
booter = session_booter[session_id]
if not await booter.available():
# rebuild
# Clean up old booter before rebuilding so sandbox resources
# on Bay (containers, volumes, networks) are not leaked.
# Only ShipyardNeoBooter supports delete_sandbox; other booters
# (local, boxlite, cua, etc.) are not backed by a remote sandbox
# manager and don't need it.
try:
if booter_type == "shipyard_neo":
await booter.shutdown(delete_sandbox=True)
else:
await booter.shutdown()
except Exception as shutdown_err:
logger.warning(
"[Computer] Error shutting down stale booter for session %s: %s",
session_id,
shutdown_err,
)
session_booter.pop(session_id, None)
if session_id not in session_booter:
uuid_str = uuid.uuid5(uuid.NAMESPACE_DNS, session_id).hex
Expand Down Expand Up @@ -509,7 +524,10 @@ async def get_booter(
except Exception as e:
logger.error(f"Error booting sandbox for session {session_id}: {e}")
try:
await client.shutdown()
if booter_type == "shipyard_neo":
await client.shutdown(delete_sandbox=True)
else:
await client.shutdown()
except Exception as shutdown_error:
logger.warning(
"Failed to shutdown sandbox after boot error for session %s: %s",
Expand Down
44 changes: 44 additions & 0 deletions astrbot/core/core_lifecycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def __init__(self, log_broker: LogBroker, db: BaseDatabase) -> None:
self.subagent_orchestrator: SubAgentOrchestrator | None = None
self.cron_manager: CronJobManager | None = None
self.temp_dir_cleaner: TempDirCleaner | None = None
self._default_chat_provider_warning_emitted = False

# 设置代理
proxy_config = self.astrbot_config.get("http_proxy", "")
Expand Down Expand Up @@ -97,6 +98,47 @@ async def _init_or_reload_subagent_orchestrator(self) -> None:
except Exception as e:
logger.error(f"Subagent orchestrator init failed: {e}", exc_info=True)

def _warn_about_unset_default_chat_provider(self) -> None:
if self._default_chat_provider_warning_emitted:
return

pm = getattr(self, "provider_manager", None)
if not pm:
return

providers = pm.provider_insts
if len(providers) == 0:
return

provider_settings = getattr(pm, "provider_settings", None) or {}
default_id = provider_settings.get("default_provider_id")
fallback = pm.curr_provider_inst or providers[0]
fallback_id = fallback.provider_config.get("id") or "unknown"

if not default_id:
if len(providers) <= 1:
return
self._default_chat_provider_warning_emitted = True
logger.warning(
"Detected %d enabled chat providers but `provider_settings.default_provider_id` is empty. "
"AstrBot will use `%s` as the startup fallback chat provider. "
"Set a default chat model in the WebUI configuration page to avoid unexpected provider switching.",
len(providers),
fallback_id,
)
return

found = any((p.provider_config.get("id") == default_id) for p in providers)
if not found:
self._default_chat_provider_warning_emitted = True
logger.warning(
"Configured `default_provider_id` is `%s` but no enabled provider matches that ID. "
"AstrBot will use `%s` as the fallback chat provider. "
"Please check the WebUI configuration page.",
default_id,
fallback_id,
)

async def initialize(self) -> None:
"""初始化 AstrBot 核心生命周期管理类.

Expand Down Expand Up @@ -201,7 +243,9 @@ async def initialize(self) -> None:
await self.plugin_manager.reload()

# 根据配置实例化各个 Provider
self._default_chat_provider_warning_emitted = False
await self.provider_manager.initialize()
self._warn_about_unset_default_chat_provider()

await self.kb_manager.initialize()

Expand Down
Loading
Loading